From 12abf0a5f7126036fc8354ad8a7ae512d73db0fd Mon Sep 17 00:00:00 2001 From: Christoph Wagner Date: Fri, 19 Sep 2025 23:26:18 +0200 Subject: [PATCH] native scheme yaml parser --- private_includes/base/all.ily | 1 - private_includes/base/scm/json_parser.scm | 482 ---------------------- private_includes/base/scm/yaml_parser.scm | 171 +++++++- 3 files changed, 150 insertions(+), 504 deletions(-) delete mode 100644 private_includes/base/scm/json_parser.scm diff --git a/private_includes/base/all.ily b/private_includes/base/all.ily index 03a811d..3376e3a 100644 --- a/private_includes/base/all.ily +++ b/private_includes/base/all.ily @@ -19,7 +19,6 @@ (string-append (dirname (current-filename)) file-name-separator-string)) "scm" file-name-separator-string filename ))))) - (scm-load "json_parser.scm") (scm-load "resolve_inherits.scm") (scm-load "yaml_parser.scm"))) #(define AUTHOR_DATA (if (defined? 'AUTHOR_DATA) AUTHOR_DATA (parse-yml-file "../../lilypond-song-includes/data/authors.yml"))) diff --git a/private_includes/base/scm/json_parser.scm b/private_includes/base/scm/json_parser.scm deleted file mode 100644 index 8f71415..0000000 --- a/private_includes/base/scm/json_parser.scm +++ /dev/null @@ -1,482 +0,0 @@ -;;; (json parser) --- Guile JSON implementation. - -;; Copyright (C) 2013-2020 Aleix Conchillo Flaque -;; -;; This file is part of guile-json. -;; -;; guile-json is free software: you can redistribute it and/or modify -;; it under the terms of the GNU General Public License as published by -;; the Free Software Foundation; either version 3 of the License, or -;; (at your option) any later version. -;; -;; guile-json is distributed in the hope that it will be useful, but -;; WITHOUT ANY WARRANTY; without even the implied warranty of -;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -;; General Public License for more details. -;; -;; You should have received a copy of the GNU General Public License -;; along with guile-json. If not, see https://www.gnu.org/licenses/. - -;;; Commentary: - -;; JSON module for Guile - -;;; Code: - -(define-module (json parser) - #:use-module (ice-9 rdelim) - #:use-module (ice-9 streams) - #:use-module (rnrs io ports) - #:export (json->scm - json-string->scm - json-seq->scm - json-seq-string->scm)) - -;; -;; Miscellaneuos helpers -;; - -(define (json-exception port) - (throw 'json-invalid port)) - -(define (digit? c) - (case c - ((#\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9) #t) - (else #f))) - -(define (whitespace? c) - (case c - ((#\sp #\ht #\lf #\cr) #t) - (else #f))) - -(define (control-char? ch) - (<= (char->integer ch) #x1F)) - -(define (skip-whitespaces port) - (let ((ch (peek-char port))) - (cond - ((whitespace? ch) - (read-char port) - (skip-whitespaces port)) - (else *unspecified*)))) - -(define (expect-string port expected return) - (let loop ((n 0)) - (cond - ;; All characters match. - ((= n (string-length expected)) return) - ;; Go to next characters. - ((eqv? (read-char port) (string-ref expected n)) - (loop (+ n 1))) - ;; Anything else is an error. - (else (json-exception port))))) - -(define (expect-delimiter port delimiter) - (let ((ch (read-char port))) - (cond - ((not (eqv? ch delimiter)) (json-exception port)) - ;; Unexpected EOF. - ((eof-object? ch) (json-exception port))))) - -(define (skip-record-separators port) - (when (eqv? #\rs (peek-char port)) - (read-char port) - (skip-record-separators port))) - -;; -;; Number parsing helpers -;; - -(define (expect-digit port) - (let ((ch (peek-char port))) - (cond - ((not (digit? ch)) (json-exception port)) - ;; Unexpected EOF. - ((eof-object? ch) (json-exception port))))) - -;; Read + or -, and return 1 or -1 respectively. If something different is -;; found, return 1. -(define (read-sign port) - (let ((ch (peek-char port))) - (cond - ((eqv? ch #\+) - (read-char port) - 1) - ((eqv? ch #\-) - (read-char port) - -1) - (else 1)))) - -(define (read-digit-value port) - (let ((ch (read-char port))) - (cond - ((eqv? ch #\0) 0) - ((eqv? ch #\1) 1) - ((eqv? ch #\2) 2) - ((eqv? ch #\3) 3) - ((eqv? ch #\4) 4) - ((eqv? ch #\5) 5) - ((eqv? ch #\6) 6) - ((eqv? ch #\7) 7) - ((eqv? ch #\8) 8) - ((eqv? ch #\9) 9) - (else (json-exception port))))) - -;; Read digits [0..9]. -(define (read-digits port) - (expect-digit port) - (let loop ((ch (peek-char port)) (number 0)) - (cond - ((digit? ch) - (let ((value (read-digit-value port))) - (loop (peek-char port) (+ (* number 10) value)))) - (else number)))) - -(define (read-digits-fraction port) - (expect-digit port) - (let loop ((ch (peek-char port)) (number 0) (length 0)) - (cond - ((digit? ch) - (let ((value (read-digit-value port))) - (loop (peek-char port) (+ (* number 10) value) (+ length 1)))) - (else - (/ number (expt 10 length)))))) - -(define (read-exponent port) - (let ((ch (peek-char port))) - (cond - ((or (eqv? ch #\e) (eqv? ch #\E)) - (read-char port) - (let ((sign (read-sign port)) - (digits (read-digits port))) - (if (<= digits 1000) ;; Some maximum exponent. - (expt 10 (* sign digits)) - (json-exception port)))) - (else 1)))) - -(define (read-fraction port) - (let ((ch (peek-char port))) - (cond - ((eqv? ch #\.) - (read-char port) - (read-digits-fraction port)) - (else 0)))) - -(define (read-positive-number port) - (let* ((number - (let ((ch (peek-char port))) - (cond - ;; Numbers that start with 0 must be a fraction. - ((eqv? ch #\0) - (read-char port) - 0) - ;; Otherwise read more digits. - (else (read-digits port))))) - (fraction (read-fraction port)) - (exponent (read-exponent port)) - (result (* (+ number fraction) exponent))) - (if (and (zero? fraction) (>= exponent 1)) - result - (exact->inexact result)))) - -(define (json-read-number port) - (let ((ch (peek-char port))) - (cond - ;; Negative numbers. - ((eqv? ch #\-) - (read-char port) - (expect-digit port) - (* -1 (read-positive-number port))) - ;; Positive numbers. - ((digit? ch) - (read-positive-number port)) - ;; Anything else is an error. - (else (json-exception port))))) - -;; -;; Object parsing helpers -;; - -(define (read-pair port null ordered) - ;; Read key. - (let ((key (json-read-string port))) - (skip-whitespaces port) - (let ((ch (peek-char port))) - (cond - ;; Skip colon and read value. - ((eqv? ch #\:) - (read-char port) - (cons key (json-read port null ordered))) - ;; Anything other than colon is an error. - (else (json-exception port)))))) - -(define (json-read-object port null ordered) - (expect-delimiter port #\{) - (let loop ((pairs '()) (added #t)) - (skip-whitespaces port) - (let ((ch (peek-char port))) - (cond - ;; End of object. - ((eqv? ch #\}) - (read-char port) - (cond - (added (if ordered (reverse! pairs) pairs)) - (else (json-exception port)))) - ;; Read one pair and continue. - ((eqv? ch #\") - (let ((pair (read-pair port null ordered))) - (loop (cons pair pairs) #t))) - ;; Skip comma and read more pairs. - ((eqv? ch #\,) - (read-char port) - (cond - (added (loop pairs #f)) - (else (json-exception port)))) - ;; Invalid object. - (else (json-exception port)))))) - -;; -;; Array parsing helpers -;; - -(define (json-read-array port null ordered) - (expect-delimiter port #\[) - (skip-whitespaces port) - (cond - ;; Special case when array is empty. - ((eqv? (peek-char port) #\]) - (read-char port) - #()) - (else - ;; Read first element in array. - (let loop ((values (list (json-read port null ordered)))) - (skip-whitespaces port) - (let ((ch (peek-char port))) - (cond - ;; Unexpected EOF. - ((eof-object? ch) (json-exception port)) - ;; Handle comma (if there's a comma there should be another element). - ((eqv? ch #\,) - (read-char port) - (loop (cons (json-read port null ordered) values))) - ;; End of array. - ((eqv? ch #\]) - (read-char port) - (list->vector (reverse! values))) - ;; Anything else other than comma and end of array is wrong. - (else (json-exception port)))))))) - -;; -;; String parsing helpers -;; - -(define (read-hex-digit->integer port) - (let ((ch (read-char port))) - (cond - ((eqv? ch #\0) 0) - ((eqv? ch #\1) 1) - ((eqv? ch #\2) 2) - ((eqv? ch #\3) 3) - ((eqv? ch #\4) 4) - ((eqv? ch #\5) 5) - ((eqv? ch #\6) 6) - ((eqv? ch #\7) 7) - ((eqv? ch #\8) 8) - ((eqv? ch #\9) 9) - ((or (eqv? ch #\A) (eqv? ch #\a)) 10) - ((or (eqv? ch #\B) (eqv? ch #\b)) 11) - ((or (eqv? ch #\C) (eqv? ch #\c)) 12) - ((or (eqv? ch #\D) (eqv? ch #\d)) 13) - ((or (eqv? ch #\E) (eqv? ch #\e)) 14) - ((or (eqv? ch #\F) (eqv? ch #\f)) 15) - (else (json-exception port))))) - -(define (read-unicode-value port) - (+ (* 4096 (read-hex-digit->integer port)) - (* 256 (read-hex-digit->integer port)) - (* 16 (read-hex-digit->integer port)) - (read-hex-digit->integer port))) - -;; Unicode codepoint with surrogates is: -;; 10000 + (high - D800) + (low - DC00) -;; which is equivalent to: -;; (high << 10) + low - 35FDC00 -;; see -;; https://github.com/aconchillo/guile-json/issues/58#issuecomment-662744070 -(define (json-surrogate-pair->unicode high low) - (+ (* high #x400) low #x-35FDC00)) - -(define (read-unicode-char port) - (let ((codepoint (read-unicode-value port))) - (cond - ;; Surrogate pairs. `codepoint` already contains the higher surrogate - ;; (between D800 and DC00) . At this point we are expecting another - ;; \uXXXX that holds the lower surrogate (between DC00 and DFFF). - ((and (>= codepoint #xD800) (< codepoint #xDC00)) - (expect-string port "\\u" #f) - (let ((low-surrogate (read-unicode-value port))) - (if (and (>= low-surrogate #xDC00) (< low-surrogate #xE000)) - (integer->char (json-surrogate-pair->unicode codepoint low-surrogate)) - (json-exception port)))) - ;; Reserved for surrogates (we just need to check starting from the low - ;; surrogates). - ((and (>= codepoint #xDC00) (< codepoint #xE000)) - (json-exception port)) - (else (integer->char codepoint))))) - -(define (read-control-char port) - (let ((ch (read-char port))) - (cond - ((eqv? ch #\") #\") - ((eqv? ch #\\) #\\) - ((eqv? ch #\/) #\/) - ((eqv? ch #\b) #\bs) - ((eqv? ch #\f) #\ff) - ((eqv? ch #\n) #\lf) - ((eqv? ch #\r) #\cr) - ((eqv? ch #\t) #\ht) - ((eqv? ch #\u) (read-unicode-char port)) - (else (json-exception port))))) - -(define (json-read-string port) - (expect-delimiter port #\") - (let loop ((chars '()) (ch (read-char port))) - (cond - ;; Unexpected EOF. - ((eof-object? ch) (json-exception port)) - ;; Unescaped control characters are not allowed. - ((control-char? ch) (json-exception port)) - ;; End of string. - ((eqv? ch #\") (reverse-list->string chars)) - ;; Escaped characters. - ((eqv? ch #\\) - (loop (cons (read-control-char port) chars) (read-char port))) - ;; All other characters. - (else - (loop (cons ch chars) (read-char port)))))) - -;; -;; Booleans and null parsing helpers -;; - -(define (json-read-true port) - (expect-string port "true" #t)) - -(define (json-read-false port) - (expect-string port "false" #f)) - -(define (json-read-null port null) - (expect-string port "null" null)) - -;; -;; Main parser functions -;; - -(define (json-read port null ordered) - (skip-whitespaces port) - (let ((ch (peek-char port))) - (cond - ;; Unexpected EOF. - ((eof-object? ch) (json-exception port)) - ;; Read JSON values. - ((eqv? ch #\t) (json-read-true port)) - ((eqv? ch #\f) (json-read-false port)) - ((eqv? ch #\n) (json-read-null port null)) - ((eqv? ch #\{) (json-read-object port null ordered)) - ((eqv? ch #\[) (json-read-array port null ordered)) - ((eqv? ch #\") (json-read-string port)) - ;; Anything else should be a number. - (else (json-read-number port))))) - -;; -;; Public procedures -;; - -(define* (json->scm #:optional (port (current-input-port)) - #:key (null 'null) (ordered #f) (concatenated #f)) - "Parse a JSON document into native. Takes one optional argument, -@var{port}, which defaults to the current input port from where the JSON -document is read. It also takes a few of keyword arguments: @{null}: value for -JSON's null, it defaults to the 'null symbol, @{ordered} to indicate whether -JSON objects order should be preserved or not (the default) and @{concatenated} -which can be used to tell the parser that more JSON documents might come after a -properly parsed document." - (let loop ((value (json-read port null ordered))) - ;; Skip any trailing whitespaces. - (skip-whitespaces port) - (cond - ;; If we reach the end the parsing succeeded. - ((eof-object? (peek-char port)) value) - ;; If there's anything else other than the end, check if user wants to keep - ;; parsing concatenated valid JSON documents, otherwise parser fails. - (else - (cond (concatenated value) - (else (json-exception port))))))) - -(define* (json-string->scm str #:key (null 'null) (ordered #f)) - "Parse a JSON document into native. Takes a string argument, -@var{str}, that contains the JSON document. It also takes a couple of keyword -argument: @{null}: value for JSON's null, it defaults to the 'null symbol and -@{ordered} to indicate whether JSON objects order should be preserved or -not (the default)." - (call-with-input-string str (lambda (p) (json->scm p #:null null #:ordered ordered)))) - -(define* (json-seq->scm #:optional (port (current-input-port)) - #:key (null 'null) (ordered #f) - (handle-truncate 'skip) (truncated-object 'truncated)) - "Lazy parse a JSON text sequence from the port @var{port}. -This procedure returns a stream of parsed documents. The optional argument -@var{port} defines the port to read from and defaults to the current input -port. It also takes a few keyword arguments: @{null}: value for JSON's null -(defaults to the 'null symbol), @{ordered} to indicate whether JSON objects -order should be preserved or not (the default), @{handle-truncate}: defines how -to handle data loss, @{truncated-object}: used to replace unparsable -objects. Allowed values for @{handle-truncate} argument are 'throw (throw an -exception), 'stop (stop parsing and end the stream), 'skip (default, skip -corrupted fragment and return the next entry), 'replace (skip corrupted fragment -and return @{truncated-object} instead)." - (letrec ((handle-truncation - (case handle-truncate - ((throw) json-exception) - ((stop) (const (eof-object))) - ((skip) - (lambda (port) - (read-delimited "\x1e" port 'peek) - (read-entry port))) - ((replace) - (lambda (port) - (read-delimited "\x1e" port 'peek) - truncated-object)))) - (read-entry - (lambda (port) - (let ((ch (read-char port))) - (cond - ((eof-object? ch) ch) - ((not (eqv? ch #\rs)) - (handle-truncation port)) - (else - (skip-record-separators port) - (catch 'json-invalid - (lambda () - (let ((next (json-read port null ordered))) - (if (eqv? #\lf (peek-char port)) - (begin - (read-char port) - next) - (handle-truncation port)))) - (lambda (_ port) - (handle-truncation port))))))))) - (port->stream port read-entry))) - -(define* (json-seq-string->scm str #:key (null 'null) (ordered #f) - (handle-truncate 'skip) (truncated-object 'truncated)) - "Lazy parse a JSON text sequence from the string @var{str}. -This procedure returns a stream of parsed documents and also takes the same -keyword arguments as @code{json-seq->scm}." - (call-with-input-string str - (lambda (p) - (json-seq->scm p #:null null #:ordered ordered - #:handle-truncate handle-truncate - #:truncated-object truncated-object)))) - -;;; (json parser) ends here diff --git a/private_includes/base/scm/yaml_parser.scm b/private_includes/base/scm/yaml_parser.scm index 82db85b..420d447 100644 --- a/private_includes/base/scm/yaml_parser.scm +++ b/private_includes/base/scm/yaml_parser.scm @@ -1,25 +1,154 @@ -(if (not windows?) (use-modules (ice-9 popen))) -(use-modules (ice-9 textual-ports) (json parser)) +(use-modules (ice-9 rdelim) (ice-9 regex) (ice-9 pretty-print) (srfi srfi-1)) -; We use Python to convert the data yamls like the authors.yml to json, that we can parse in scheme. -; Windows does not like Pipes, so we use a tmpfile instead. -; Be sure you have PyYAML installed. On Windows that could be done for example like "py -m pip install PyYAML" +;; Hauptparsingfunktion (define (yml-file->scm filename) - (if windows? - (let* ((port (make-tmpfile #f)) - (tmpfilepath (port-filename port)) - (ignore (close-port port)) - (python_code (string-append "import sys, yaml, json; f = open(r'" tmpfilepath "', 'w'); f.write(json.dumps(yaml.safe_load(open(r'" filename "')))); f.close()")) - (status (system (string-append (search-executable '("python3" "python" "py")) " -X utf8 -c \"" python_code "\""))) - (readport (open-file tmpfilepath "r" #:encoding "UTF-8")) - (json (get-string-all readport))) - (close-port readport) - (delete-file tmpfilepath) - (json-string->scm (if (status:exit-val status) json "{}"))) - (let* ((python_code (string-append "import sys, yaml, json; print(json.dumps(yaml.safe_load(open(r'" filename "'))))")) - (pipe (open-pipe (string-append "PYTHONHOME='' " (search-executable '("python3" "python" "py")) " -X utf8 -c \"" python_code "\"") OPEN_READ)) - (json (get-string-all pipe))) - (close-pipe pipe) - (json-string->scm json)))) + + ;; Utility: Zeile einlesen + (define (read-lines filename) + (call-with-input-file filename + (lambda (port) + (let loop ((lines '())) + (let ((line (read-line port))) + (if (eof-object? line) + (reverse lines) + (let ((clean (string-trim line))) + (if (or (string=? clean "---") (string-null? clean)) + (loop lines) ;; Ignoriere "---" oder leere Zeile + (loop (cons line lines)))))))))) + + ;; Einrückung bestimmen (Anzahl Leerzeichen am Anfang) + (define (line-indent line) + (let ((match (string-match "^ *" line))) + (if match + (match:end match) ; Anzahl der Leerzeichen = Position nach Leerzeichen + 0))) ; Falls kein Match → 0 + + ;; Kommentar entfernen + (define (strip-comment line) + (let ((m (string-match "#.*" line))) + (if m + (string-trim-right (string-take line (match:start m))) + line))) + + ;; Hilfsfunktion: Whitespace entfernen + (define (clean-line line) + (string-trim (strip-comment line))) + + ;; Ist Zeile leer (nach Entfernen von Kommentar & Whitespace)? + (define (blank-or-comment? line) + (string-null? (clean-line line))) + + ;; Skalare Werte interpretieren + (define (parse-scalar str) + (define (strip-quotes s) + (cond + ((and (string-prefix? "\"" s) (string-suffix? "\"" s)) + (string-drop-right (string-drop s 1) 1)) + ((and (string-prefix? "'" s) (string-suffix? "'" s)) + (string-drop-right (string-drop s 1) 1)) + (else s))) + (let ((s (strip-quotes (string-trim str)))) + (cond + ((string=? s "{}") '()) ;; leere Map + ((string=? s "[]") '()) ;; leere Liste + ((string-match "^[0-9]+$" s) (string->number s)) + ((string=? s "true") #t) + ((string=? s "false") #f) + ((string=? s "null") '()) + (else s)))) + + + ;; Hilfsfunktion: Zeilen mit gleicher oder höherer Einrückung sammeln + (define (take-indented lines min-indent) + (let loop ((ls lines) (acc '())) + (if (null? ls) + (reverse acc) + (let ((line (car ls))) + (if (or (blank-or-comment? line) + (>= (line-indent line) min-indent)) + (loop (cdr ls) (cons line acc)) + (reverse acc)))))) + + ;; Hilfsfunktion: N Zeilen überspringen + (define (drop lst n) + (let loop ((l lst) (i n)) + (if (or (zero? i) (null? l)) + l + (loop (cdr l) (- i 1))))) + + ;; Listenparsing: Liest Zeilen mit `-` als Listeneinträge + (define (parse-list lines current-indent) + (let loop ((ls lines) (result '())) + (if (null? ls) + (reverse result) + (let* ((line (clean-line (car ls)))) + (if (string-match "^-" line) + (let* ((indent (line-indent (car ls))) + (item-str (string-trim (string-drop line 1))) + (next-lines (cdr ls))) + (if (or (null? next-lines) + (> (line-indent (car next-lines)) indent)) + ;; Verschachtelter Inhalt + (let* ((sub (take-indented next-lines (+ indent 2))) + (parsed (if (null? sub) + (parse-scalar item-str) + (parse-lines sub (+ indent 2)))) + (remaining (drop next-lines (length sub)))) + (loop remaining (cons parsed result))) + ;; Einfacher Skalar + (loop next-lines (cons (parse-scalar item-str) result)))) + ;; Nicht mehr Teil der Liste + (reverse result)))))) + + ;; Hauptparser für Key-Value oder Listen + (define (parse-lines lines current-indent) + (let loop ((ls lines) (result '())) + (if (null? ls) + (reverse result) + (let* ((raw-line (car ls)) + (line (clean-line raw-line))) + (cond + ;; Kommentar oder leere Zeile + ((blank-or-comment? raw-line) + (loop (cdr ls) result)) + + ;; Liste + ((string-match "^- " line) + (let ((list-lines (take-indented ls current-indent))) + (let ((parsed-list (parse-list list-lines current-indent))) + (loop (drop ls (length list-lines)) + (cons parsed-list result))))) + + ;; Key: Value + ((string-match "^[^:]+:" line) + (let* ((kv (string-split line #\:)) + (key (string-trim (car kv))) + (value-str (string-trim (string-join (cdr kv) ":"))) + (next-lines (cdr ls))) + (if (string-null? value-str) + ;; Wert auf nachfolgender Einrückungsebene + (let* ((sub (take-indented next-lines (+ current-indent 2))) + (parsed (parse-lines sub (+ current-indent 2))) + (remaining (drop next-lines (length sub)))) + (loop remaining + (cons (cons key parsed) result))) + ;; Einfacher Key:Value + (loop next-lines + (cons (cons key (parse-scalar value-str)) result))))) + + ;; Fehlerhafte Zeile + (else + ;; Vermeide Fehlermeldung für Leerzeilen oder leere Objekte + (if (or (string-null? (string-trim line)) + (member line '("{}" "[]"))) + (loop (cdr ls) result) + (begin + (format (current-error-port) + "Syntaxfehler: Ungültige Zeile: ~a\n" raw-line) + (loop (cdr ls) result)))) + ))))) + + (let ((lines (read-lines filename))) + (parse-lines lines 0))) (define (parse-yml-file filename) (resolve-inherits (yml-file->scm filename)))