483 lines
15 KiB
Scheme
483 lines
15 KiB
Scheme
;;; (json parser) --- Guile JSON implementation.
|
|
|
|
;; Copyright (C) 2013-2020 Aleix Conchillo Flaque <aconchillo@gmail.com>
|
|
;;
|
|
;; This file is part of guile-json.
|
|
;;
|
|
;; guile-json is free software: you can redistribute it and/or modify
|
|
;; it under the terms of the GNU General Public License as published by
|
|
;; the Free Software Foundation; either version 3 of the License, or
|
|
;; (at your option) any later version.
|
|
;;
|
|
;; guile-json is distributed in the hope that it will be useful, but
|
|
;; WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
;; General Public License for more details.
|
|
;;
|
|
;; You should have received a copy of the GNU General Public License
|
|
;; along with guile-json. If not, see https://www.gnu.org/licenses/.
|
|
|
|
;;; Commentary:
|
|
|
|
;; JSON module for Guile
|
|
|
|
;;; Code:
|
|
|
|
(define-module (json parser)
|
|
#:use-module (ice-9 rdelim)
|
|
#:use-module (ice-9 streams)
|
|
#:use-module (rnrs io ports)
|
|
#:export (json->scm
|
|
json-string->scm
|
|
json-seq->scm
|
|
json-seq-string->scm))
|
|
|
|
;;
|
|
;; Miscellaneuos helpers
|
|
;;
|
|
|
|
(define (json-exception port)
|
|
(throw 'json-invalid port))
|
|
|
|
(define (digit? c)
|
|
(case c
|
|
((#\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9) #t)
|
|
(else #f)))
|
|
|
|
(define (whitespace? c)
|
|
(case c
|
|
((#\sp #\ht #\lf #\cr) #t)
|
|
(else #f)))
|
|
|
|
(define (control-char? ch)
|
|
(<= (char->integer ch) #x1F))
|
|
|
|
(define (skip-whitespaces port)
|
|
(let ((ch (peek-char port)))
|
|
(cond
|
|
((whitespace? ch)
|
|
(read-char port)
|
|
(skip-whitespaces port))
|
|
(else *unspecified*))))
|
|
|
|
(define (expect-string port expected return)
|
|
(let loop ((n 0))
|
|
(cond
|
|
;; All characters match.
|
|
((= n (string-length expected)) return)
|
|
;; Go to next characters.
|
|
((eqv? (read-char port) (string-ref expected n))
|
|
(loop (+ n 1)))
|
|
;; Anything else is an error.
|
|
(else (json-exception port)))))
|
|
|
|
(define (expect-delimiter port delimiter)
|
|
(let ((ch (read-char port)))
|
|
(cond
|
|
((not (eqv? ch delimiter)) (json-exception port))
|
|
;; Unexpected EOF.
|
|
((eof-object? ch) (json-exception port)))))
|
|
|
|
(define (skip-record-separators port)
|
|
(when (eqv? #\rs (peek-char port))
|
|
(read-char port)
|
|
(skip-record-separators port)))
|
|
|
|
;;
|
|
;; Number parsing helpers
|
|
;;
|
|
|
|
(define (expect-digit port)
|
|
(let ((ch (peek-char port)))
|
|
(cond
|
|
((not (digit? ch)) (json-exception port))
|
|
;; Unexpected EOF.
|
|
((eof-object? ch) (json-exception port)))))
|
|
|
|
;; Read + or -, and return 1 or -1 respectively. If something different is
|
|
;; found, return 1.
|
|
(define (read-sign port)
|
|
(let ((ch (peek-char port)))
|
|
(cond
|
|
((eqv? ch #\+)
|
|
(read-char port)
|
|
1)
|
|
((eqv? ch #\-)
|
|
(read-char port)
|
|
-1)
|
|
(else 1))))
|
|
|
|
(define (read-digit-value port)
|
|
(let ((ch (read-char port)))
|
|
(cond
|
|
((eqv? ch #\0) 0)
|
|
((eqv? ch #\1) 1)
|
|
((eqv? ch #\2) 2)
|
|
((eqv? ch #\3) 3)
|
|
((eqv? ch #\4) 4)
|
|
((eqv? ch #\5) 5)
|
|
((eqv? ch #\6) 6)
|
|
((eqv? ch #\7) 7)
|
|
((eqv? ch #\8) 8)
|
|
((eqv? ch #\9) 9)
|
|
(else (json-exception port)))))
|
|
|
|
;; Read digits [0..9].
|
|
(define (read-digits port)
|
|
(expect-digit port)
|
|
(let loop ((ch (peek-char port)) (number 0))
|
|
(cond
|
|
((digit? ch)
|
|
(let ((value (read-digit-value port)))
|
|
(loop (peek-char port) (+ (* number 10) value))))
|
|
(else number))))
|
|
|
|
(define (read-digits-fraction port)
|
|
(expect-digit port)
|
|
(let loop ((ch (peek-char port)) (number 0) (length 0))
|
|
(cond
|
|
((digit? ch)
|
|
(let ((value (read-digit-value port)))
|
|
(loop (peek-char port) (+ (* number 10) value) (+ length 1))))
|
|
(else
|
|
(/ number (expt 10 length))))))
|
|
|
|
(define (read-exponent port)
|
|
(let ((ch (peek-char port)))
|
|
(cond
|
|
((or (eqv? ch #\e) (eqv? ch #\E))
|
|
(read-char port)
|
|
(let ((sign (read-sign port))
|
|
(digits (read-digits port)))
|
|
(if (<= digits 1000) ;; Some maximum exponent.
|
|
(expt 10 (* sign digits))
|
|
(json-exception port))))
|
|
(else 1))))
|
|
|
|
(define (read-fraction port)
|
|
(let ((ch (peek-char port)))
|
|
(cond
|
|
((eqv? ch #\.)
|
|
(read-char port)
|
|
(read-digits-fraction port))
|
|
(else 0))))
|
|
|
|
(define (read-positive-number port)
|
|
(let* ((number
|
|
(let ((ch (peek-char port)))
|
|
(cond
|
|
;; Numbers that start with 0 must be a fraction.
|
|
((eqv? ch #\0)
|
|
(read-char port)
|
|
0)
|
|
;; Otherwise read more digits.
|
|
(else (read-digits port)))))
|
|
(fraction (read-fraction port))
|
|
(exponent (read-exponent port))
|
|
(result (* (+ number fraction) exponent)))
|
|
(if (and (zero? fraction) (>= exponent 1))
|
|
result
|
|
(exact->inexact result))))
|
|
|
|
(define (json-read-number port)
|
|
(let ((ch (peek-char port)))
|
|
(cond
|
|
;; Negative numbers.
|
|
((eqv? ch #\-)
|
|
(read-char port)
|
|
(expect-digit port)
|
|
(* -1 (read-positive-number port)))
|
|
;; Positive numbers.
|
|
((digit? ch)
|
|
(read-positive-number port))
|
|
;; Anything else is an error.
|
|
(else (json-exception port)))))
|
|
|
|
;;
|
|
;; Object parsing helpers
|
|
;;
|
|
|
|
(define (read-pair port null ordered)
|
|
;; Read key.
|
|
(let ((key (json-read-string port)))
|
|
(skip-whitespaces port)
|
|
(let ((ch (peek-char port)))
|
|
(cond
|
|
;; Skip colon and read value.
|
|
((eqv? ch #\:)
|
|
(read-char port)
|
|
(cons key (json-read port null ordered)))
|
|
;; Anything other than colon is an error.
|
|
(else (json-exception port))))))
|
|
|
|
(define (json-read-object port null ordered)
|
|
(expect-delimiter port #\{)
|
|
(let loop ((pairs '()) (added #t))
|
|
(skip-whitespaces port)
|
|
(let ((ch (peek-char port)))
|
|
(cond
|
|
;; End of object.
|
|
((eqv? ch #\})
|
|
(read-char port)
|
|
(cond
|
|
(added (if ordered (reverse! pairs) pairs))
|
|
(else (json-exception port))))
|
|
;; Read one pair and continue.
|
|
((eqv? ch #\")
|
|
(let ((pair (read-pair port null ordered)))
|
|
(loop (cons pair pairs) #t)))
|
|
;; Skip comma and read more pairs.
|
|
((eqv? ch #\,)
|
|
(read-char port)
|
|
(cond
|
|
(added (loop pairs #f))
|
|
(else (json-exception port))))
|
|
;; Invalid object.
|
|
(else (json-exception port))))))
|
|
|
|
;;
|
|
;; Array parsing helpers
|
|
;;
|
|
|
|
(define (json-read-array port null ordered)
|
|
(expect-delimiter port #\[)
|
|
(skip-whitespaces port)
|
|
(cond
|
|
;; Special case when array is empty.
|
|
((eqv? (peek-char port) #\])
|
|
(read-char port)
|
|
#())
|
|
(else
|
|
;; Read first element in array.
|
|
(let loop ((values (list (json-read port null ordered))))
|
|
(skip-whitespaces port)
|
|
(let ((ch (peek-char port)))
|
|
(cond
|
|
;; Unexpected EOF.
|
|
((eof-object? ch) (json-exception port))
|
|
;; Handle comma (if there's a comma there should be another element).
|
|
((eqv? ch #\,)
|
|
(read-char port)
|
|
(loop (cons (json-read port null ordered) values)))
|
|
;; End of array.
|
|
((eqv? ch #\])
|
|
(read-char port)
|
|
(list->vector (reverse! values)))
|
|
;; Anything else other than comma and end of array is wrong.
|
|
(else (json-exception port))))))))
|
|
|
|
;;
|
|
;; String parsing helpers
|
|
;;
|
|
|
|
(define (read-hex-digit->integer port)
|
|
(let ((ch (read-char port)))
|
|
(cond
|
|
((eqv? ch #\0) 0)
|
|
((eqv? ch #\1) 1)
|
|
((eqv? ch #\2) 2)
|
|
((eqv? ch #\3) 3)
|
|
((eqv? ch #\4) 4)
|
|
((eqv? ch #\5) 5)
|
|
((eqv? ch #\6) 6)
|
|
((eqv? ch #\7) 7)
|
|
((eqv? ch #\8) 8)
|
|
((eqv? ch #\9) 9)
|
|
((or (eqv? ch #\A) (eqv? ch #\a)) 10)
|
|
((or (eqv? ch #\B) (eqv? ch #\b)) 11)
|
|
((or (eqv? ch #\C) (eqv? ch #\c)) 12)
|
|
((or (eqv? ch #\D) (eqv? ch #\d)) 13)
|
|
((or (eqv? ch #\E) (eqv? ch #\e)) 14)
|
|
((or (eqv? ch #\F) (eqv? ch #\f)) 15)
|
|
(else (json-exception port)))))
|
|
|
|
(define (read-unicode-value port)
|
|
(+ (* 4096 (read-hex-digit->integer port))
|
|
(* 256 (read-hex-digit->integer port))
|
|
(* 16 (read-hex-digit->integer port))
|
|
(read-hex-digit->integer port)))
|
|
|
|
;; Unicode codepoint with surrogates is:
|
|
;; 10000 + (high - D800) + (low - DC00)
|
|
;; which is equivalent to:
|
|
;; (high << 10) + low - 35FDC00
|
|
;; see
|
|
;; https://github.com/aconchillo/guile-json/issues/58#issuecomment-662744070
|
|
(define (json-surrogate-pair->unicode high low)
|
|
(+ (* high #x400) low #x-35FDC00))
|
|
|
|
(define (read-unicode-char port)
|
|
(let ((codepoint (read-unicode-value port)))
|
|
(cond
|
|
;; Surrogate pairs. `codepoint` already contains the higher surrogate
|
|
;; (between D800 and DC00) . At this point we are expecting another
|
|
;; \uXXXX that holds the lower surrogate (between DC00 and DFFF).
|
|
((and (>= codepoint #xD800) (< codepoint #xDC00))
|
|
(expect-string port "\\u" #f)
|
|
(let ((low-surrogate (read-unicode-value port)))
|
|
(if (and (>= low-surrogate #xDC00) (< low-surrogate #xE000))
|
|
(integer->char (json-surrogate-pair->unicode codepoint low-surrogate))
|
|
(json-exception port))))
|
|
;; Reserved for surrogates (we just need to check starting from the low
|
|
;; surrogates).
|
|
((and (>= codepoint #xDC00) (< codepoint #xE000))
|
|
(json-exception port))
|
|
(else (integer->char codepoint)))))
|
|
|
|
(define (read-control-char port)
|
|
(let ((ch (read-char port)))
|
|
(cond
|
|
((eqv? ch #\") #\")
|
|
((eqv? ch #\\) #\\)
|
|
((eqv? ch #\/) #\/)
|
|
((eqv? ch #\b) #\bs)
|
|
((eqv? ch #\f) #\ff)
|
|
((eqv? ch #\n) #\lf)
|
|
((eqv? ch #\r) #\cr)
|
|
((eqv? ch #\t) #\ht)
|
|
((eqv? ch #\u) (read-unicode-char port))
|
|
(else (json-exception port)))))
|
|
|
|
(define (json-read-string port)
|
|
(expect-delimiter port #\")
|
|
(let loop ((chars '()) (ch (read-char port)))
|
|
(cond
|
|
;; Unexpected EOF.
|
|
((eof-object? ch) (json-exception port))
|
|
;; Unescaped control characters are not allowed.
|
|
((control-char? ch) (json-exception port))
|
|
;; End of string.
|
|
((eqv? ch #\") (reverse-list->string chars))
|
|
;; Escaped characters.
|
|
((eqv? ch #\\)
|
|
(loop (cons (read-control-char port) chars) (read-char port)))
|
|
;; All other characters.
|
|
(else
|
|
(loop (cons ch chars) (read-char port))))))
|
|
|
|
;;
|
|
;; Booleans and null parsing helpers
|
|
;;
|
|
|
|
(define (json-read-true port)
|
|
(expect-string port "true" #t))
|
|
|
|
(define (json-read-false port)
|
|
(expect-string port "false" #f))
|
|
|
|
(define (json-read-null port null)
|
|
(expect-string port "null" null))
|
|
|
|
;;
|
|
;; Main parser functions
|
|
;;
|
|
|
|
(define (json-read port null ordered)
|
|
(skip-whitespaces port)
|
|
(let ((ch (peek-char port)))
|
|
(cond
|
|
;; Unexpected EOF.
|
|
((eof-object? ch) (json-exception port))
|
|
;; Read JSON values.
|
|
((eqv? ch #\t) (json-read-true port))
|
|
((eqv? ch #\f) (json-read-false port))
|
|
((eqv? ch #\n) (json-read-null port null))
|
|
((eqv? ch #\{) (json-read-object port null ordered))
|
|
((eqv? ch #\[) (json-read-array port null ordered))
|
|
((eqv? ch #\") (json-read-string port))
|
|
;; Anything else should be a number.
|
|
(else (json-read-number port)))))
|
|
|
|
;;
|
|
;; Public procedures
|
|
;;
|
|
|
|
(define* (json->scm #:optional (port (current-input-port))
|
|
#:key (null 'null) (ordered #f) (concatenated #f))
|
|
"Parse a JSON document into native. Takes one optional argument,
|
|
@var{port}, which defaults to the current input port from where the JSON
|
|
document is read. It also takes a few of keyword arguments: @{null}: value for
|
|
JSON's null, it defaults to the 'null symbol, @{ordered} to indicate whether
|
|
JSON objects order should be preserved or not (the default) and @{concatenated}
|
|
which can be used to tell the parser that more JSON documents might come after a
|
|
properly parsed document."
|
|
(let loop ((value (json-read port null ordered)))
|
|
;; Skip any trailing whitespaces.
|
|
(skip-whitespaces port)
|
|
(cond
|
|
;; If we reach the end the parsing succeeded.
|
|
((eof-object? (peek-char port)) value)
|
|
;; If there's anything else other than the end, check if user wants to keep
|
|
;; parsing concatenated valid JSON documents, otherwise parser fails.
|
|
(else
|
|
(cond (concatenated value)
|
|
(else (json-exception port)))))))
|
|
|
|
(define* (json-string->scm str #:key (null 'null) (ordered #f))
|
|
"Parse a JSON document into native. Takes a string argument,
|
|
@var{str}, that contains the JSON document. It also takes a couple of keyword
|
|
argument: @{null}: value for JSON's null, it defaults to the 'null symbol and
|
|
@{ordered} to indicate whether JSON objects order should be preserved or
|
|
not (the default)."
|
|
(call-with-input-string str (lambda (p) (json->scm p #:null null #:ordered ordered))))
|
|
|
|
(define* (json-seq->scm #:optional (port (current-input-port))
|
|
#:key (null 'null) (ordered #f)
|
|
(handle-truncate 'skip) (truncated-object 'truncated))
|
|
"Lazy parse a JSON text sequence from the port @var{port}.
|
|
This procedure returns a stream of parsed documents. The optional argument
|
|
@var{port} defines the port to read from and defaults to the current input
|
|
port. It also takes a few keyword arguments: @{null}: value for JSON's null
|
|
(defaults to the 'null symbol), @{ordered} to indicate whether JSON objects
|
|
order should be preserved or not (the default), @{handle-truncate}: defines how
|
|
to handle data loss, @{truncated-object}: used to replace unparsable
|
|
objects. Allowed values for @{handle-truncate} argument are 'throw (throw an
|
|
exception), 'stop (stop parsing and end the stream), 'skip (default, skip
|
|
corrupted fragment and return the next entry), 'replace (skip corrupted fragment
|
|
and return @{truncated-object} instead)."
|
|
(letrec ((handle-truncation
|
|
(case handle-truncate
|
|
((throw) json-exception)
|
|
((stop) (const (eof-object)))
|
|
((skip)
|
|
(lambda (port)
|
|
(read-delimited "\x1e" port 'peek)
|
|
(read-entry port)))
|
|
((replace)
|
|
(lambda (port)
|
|
(read-delimited "\x1e" port 'peek)
|
|
truncated-object))))
|
|
(read-entry
|
|
(lambda (port)
|
|
(let ((ch (read-char port)))
|
|
(cond
|
|
((eof-object? ch) ch)
|
|
((not (eqv? ch #\rs))
|
|
(handle-truncation port))
|
|
(else
|
|
(skip-record-separators port)
|
|
(catch 'json-invalid
|
|
(lambda ()
|
|
(let ((next (json-read port null ordered)))
|
|
(if (eqv? #\lf (peek-char port))
|
|
(begin
|
|
(read-char port)
|
|
next)
|
|
(handle-truncation port))))
|
|
(lambda (_ port)
|
|
(handle-truncation port)))))))))
|
|
(port->stream port read-entry)))
|
|
|
|
(define* (json-seq-string->scm str #:key (null 'null) (ordered #f)
|
|
(handle-truncate 'skip) (truncated-object 'truncated))
|
|
"Lazy parse a JSON text sequence from the string @var{str}.
|
|
This procedure returns a stream of parsed documents and also takes the same
|
|
keyword arguments as @code{json-seq->scm}."
|
|
(call-with-input-string str
|
|
(lambda (p)
|
|
(json-seq->scm p #:null null #:ordered ordered
|
|
#:handle-truncate handle-truncate
|
|
#:truncated-object truncated-object))))
|
|
|
|
;;; (json parser) ends here
|