From dd81cb75f1a98f5e36d27ff15f53778ba7bd089b Mon Sep 17 00:00:00 2001 From: dlichteblau Date: Sun, 27 Nov 2005 12:24:38 +0000 Subject: [PATCH] eof in character references --- XMLCONF | 102 +++++++-------------------------------------- xml/xml-parse.lisp | 53 +++++++++++++---------- 2 files changed, 45 insertions(+), 110 deletions(-) diff --git a/XMLCONF b/XMLCONF index 14e20a3..8732643 100644 --- a/XMLCONF +++ b/XMLCONF @@ -19,10 +19,7 @@ xmltest/not-wf/sa/018.xml [not validating:] not-wf [validating:] invalid xmltest/not-wf/sa/019.xml [not validating:] not-wf [validating:] invalid xmltest/not-wf/sa/020.xml [not validating:] not-wf [validating:] not-wf xmltest/not-wf/sa/021.xml [not validating:] not-wf [validating:] not-wf -xmltest/not-wf/sa/022.xml [not validating:] FAILED: - The assertion (RUNES:RUNE= CXML::C 59) failed. -[ - Character references end with semicolons, always!] +xmltest/not-wf/sa/022.xml [not validating:] not-wf [validating:] not-wf xmltest/not-wf/sa/023.xml [not validating:] not-wf [validating:] not-wf xmltest/not-wf/sa/024.xml [not validating:] not-wf [validating:] invalid xmltest/not-wf/sa/025.xml [not validating:] not-wf [validating:] invalid @@ -916,11 +913,7 @@ oasis/p05fail4.xml [not validating:] not-wf [validating:] not-wf oasis/p05fail5.xml [not validating:] not-wf [validating:] not-wf oasis/p09fail1.xml [not validating:] not-wf [validating:] not-wf oasis/p09fail2.xml [not validating:] not-wf [validating:] not-wf -oasis/p09fail3.xml [not validating:] FAILED: - The assertion (RUNES:RUNE= CXML::C 59) failed. -[ - incomplete character reference - ] +oasis/p09fail3.xml [not validating:] not-wf [validating:] not-wf oasis/p09fail4.xml [not validating:] not-wf [validating:] not-wf oasis/p09fail5.xml [not validating:] not-wf [validating:] not-wf oasis/p10fail1.xml [not validating:] not-wf [validating:] not-wf @@ -1085,18 +1078,10 @@ oasis/p63fail1.xml [not validating:] not-wf [validating:] not-wf oasis/p63fail2.xml [not validating:] not-wf [validating:] not-wf oasis/p64fail1.xml [not validating:] not-wf [validating:] not-wf oasis/p64fail2.xml [not validating:] not-wf [validating:] not-wf -oasis/p66fail1.xml [not validating:] FAILED: - The assertion (RUNES:RUNE= CXML::C 59) failed. -[ - terminating ';' is required - ] +oasis/p66fail1.xml [not validating:] not-wf [validating:] invalid oasis/p66fail2.xml [not validating:] not-wf [validating:] invalid oasis/p66fail3.xml [not validating:] not-wf [validating:] invalid -oasis/p66fail4.xml [not validating:] FAILED: - The assertion (RUNES:RUNE= CXML::C 59) failed. -[ - only hex digits in hex references - ] +oasis/p66fail4.xml [not validating:] not-wf [validating:] invalid oasis/p66fail5.xml [not validating:] not-wf [validating:] invalid oasis/p66fail6.xml [not validating:] not-wf [validating:] invalid oasis/p68fail1.xml [not validating:] not-wf [validating:] not-wf @@ -1694,74 +1679,17 @@ ibm/not-wf/P64/ibm64n02.xml [not validating:] not-wf [validating:] not-wf ibm/not-wf/P64/ibm64n03.xml [not validating:] not-wf [validating:] not-wf ibm/not-wf/P65/ibm65n01.xml [not validating:] not-wf [validating:] not-wf ibm/not-wf/P65/ibm65n02.xml [not validating:] not-wf [validating:] not-wf -ibm/not-wf/P66/ibm66n01.xml [not validating:] FAILED: - The assertion (RUNES:RUNE= CXML::C 59) failed. -[ - Tests CharRef with an illegal character referred to. The "#002f" is - used as the referred character in the CharRef in the EntityDecl in the DTD. - ] -ibm/not-wf/P66/ibm66n02.xml [not validating:] FAILED: - The assertion (RUNES:RUNE= CXML::C 59) failed. -[ - Tests CharRef with the semicolon character missing. The semicolon - character is missing at the end of the CharRef in the attribute value in - the STag of element "root". - ] +ibm/not-wf/P66/ibm66n01.xml [not validating:] not-wf [validating:] not-wf +ibm/not-wf/P66/ibm66n02.xml [not validating:] not-wf [validating:] not-wf ibm/not-wf/P66/ibm66n03.xml [not validating:] not-wf [validating:] not-wf -ibm/not-wf/P66/ibm66n04.xml [not validating:] FAILED: - The assertion (RUNES:RUNE= CXML::C 59) failed. -[ - Tests CharRef with an illegal character referred to. The "#5~0" is - used as the referred character in the attribute value in the EmptyElemTag - of the element "root". - ] -ibm/not-wf/P66/ibm66n05.xml [not validating:] FAILED: - The assertion (RUNES:RUNE= CXML::C 59) failed. -[ - Tests CharRef with an illegal character referred to. The "#x002g" is - used as the referred character in the CharRef in the EntityDecl in the DTD. - ] -ibm/not-wf/P66/ibm66n06.xml [not validating:] FAILED: - The assertion (RUNES:RUNE= CXML::C 59) failed. -[ - Tests CharRef with an illegal character referred to. The "#x006G" is - used as the referred character in the attribute value in the EmptyElemTag - of the element "root". - ] -ibm/not-wf/P66/ibm66n07.xml [not validating:] FAILED: - The assertion (RUNES:RUNE= CXML::C 59) failed. -[ - Tests CharRef with an illegal character referred to. The "#0=2f" is - used as the referred character in the CharRef in the EntityDecl in the DTD. - ] -ibm/not-wf/P66/ibm66n08.xml [not validating:] FAILED: - The assertion (RUNES:RUNE= CXML::C 59) failed. -[ - Tests CharRef with an illegal character referred to. The "#56.0" is - used as the referred character in the attribute value in the EmptyElemTag - of the element "root". - ] -ibm/not-wf/P66/ibm66n09.xml [not validating:] FAILED: - The assertion (RUNES:RUNE= CXML::C 59) failed. -[ - Tests CharRef with an illegal character referred to. The "#x00/2f" - is used as the referred character in the CharRef in the EntityDecl in the - DTD. - ] -ibm/not-wf/P66/ibm66n10.xml [not validating:] FAILED: - The assertion (RUNES:RUNE= CXML::C 59) failed. -[ - Tests CharRef with an illegal character referred to. The "#51)" is - used as the referred character in the attribute value in the EmptyElemTag - of the element "root". - ] -ibm/not-wf/P66/ibm66n11.xml [not validating:] FAILED: - The assertion (RUNES:RUNE= CXML::C 59) failed. -[ - Tests CharRef with an illegal character referred to. The "#00 2f" - is used as the referred character in the CharRef in the EntityDecl in the - DTD. - ] +ibm/not-wf/P66/ibm66n04.xml [not validating:] not-wf [validating:] not-wf +ibm/not-wf/P66/ibm66n05.xml [not validating:] not-wf [validating:] not-wf +ibm/not-wf/P66/ibm66n06.xml [not validating:] not-wf [validating:] not-wf +ibm/not-wf/P66/ibm66n07.xml [not validating:] not-wf [validating:] not-wf +ibm/not-wf/P66/ibm66n08.xml [not validating:] not-wf [validating:] not-wf +ibm/not-wf/P66/ibm66n09.xml [not validating:] not-wf [validating:] not-wf +ibm/not-wf/P66/ibm66n10.xml [not validating:] not-wf [validating:] not-wf +ibm/not-wf/P66/ibm66n11.xml [not validating:] not-wf [validating:] not-wf ibm/not-wf/P66/ibm66n12.xml [not validating:] not-wf [validating:] not-wf ibm/not-wf/P66/ibm66n13.xml [not validating:] not-wf [validating:] not-wf ibm/not-wf/P66/ibm66n14.xml [not validating:] not-wf [validating:] not-wf @@ -2357,4 +2285,4 @@ ibm/valid/P86/ibm86v01.xml [not validating:] input [validating:] input ibm/valid/P87/ibm87v01.xml [not validating:] input [validating:] input ibm/valid/P88/ibm88v01.xml [not validating:] input [validating:] input ibm/valid/P89/ibm89v01.xml [not validating:] input [validating:] input -124/1786 tests failed; 376 tests were skipped \ No newline at end of file +110/1786 tests failed; 376 tests were skipped \ No newline at end of file diff --git a/xml/xml-parse.lisp b/xml/xml-parse.lisp index 0f58f01..0ba29ce 100644 --- a/xml/xml-parse.lisp +++ b/xml/xml-parse.lisp @@ -647,12 +647,12 @@ (defun validity-error (x &rest args) (error 'validity-error - :format-control "Validity constraint violated: ~?" + :format-control "Document not valid: ~?" :format-arguments (list x args))) (defun wf-error (x &rest args) (error 'well-formedness-violation - :format-control "Well-formedness violated: ~?" + :format-control "Document not well-formed: ~?" :format-arguments (list x args))) (defun eox (stream &optional x &rest args) @@ -1208,10 +1208,10 @@ (:DOC (cond ((rune= c #/&) - (multiple-value-bind (kind data) (read-entity-ref input) - (cond ((eq kind :NAMED) - (values :ENTITY-REF data) ) - ((eq kind :NUMERIC) + (multiple-value-bind (kind data) (read-entity-like input) + (cond ((eq kind :ENTITY-REFERENCE) + (values :ENTITY-REF data)) + ((eq kind :CHARACTER-REFERENCE) (values :CDATA (with-rune-collector (collect) (%put-unicode-char data collect))))))) @@ -1309,16 +1309,16 @@ (t nil))) -(defun read-entity-ref (input) +(defun read-entity-like (input) "Read an entity reference off the xstream `input'. Returns two values: - either :NAMED in case of a named entity - or :NUMERIC in case of numeric entities. + either :ENTITY-REFERENCE in case of a named entity + or :CHARACTER-REFERENCE in case of character references. The initial #\\& is considered to be consumed already." (let ((c (peek-rune input))) (cond ((eq c :eof) (eox input "EOF after '&'")) ((rune= c #/#) - (values :NUMERIC (read-numeric-entity input))) + (values :CHARACTER-REFERENCE (read-character-reference input))) (t (unless (name-start-rune-p (peek-rune input)) (wf-error "Expecting name after &.")) @@ -1326,7 +1326,7 @@ (setf c (read-rune input)) (unless (rune= c #/\;) (perror input "Expected \";\".")) - (values :NAMED name)))))) + (values :ENTITY-REFERENCE name)))))) (defun read-tag-2 (zinput input kind) (let ((name (read-name-token input)) @@ -1420,7 +1420,7 @@ ((rune= c #/&) (setf c (peek-rune input)) (cond ((rune= c #/#) - (let ((c (read-numeric-entity input))) + (let ((c (read-character-reference input))) (%put-unicode-char c collect))) (t (unless (name-start-rune-p (peek-rune input)) @@ -1476,17 +1476,25 @@ (assert (member delim '(#/\" #/\'))) delim)))))) -(defun read-numeric-entity (input) +(defun check-rune (input actual expected) + (declare (ignore input)) + (unless (eql actual expected) + (wf-error "expected #/~A but found #/~A" + (rune-char expected) + (rune-char actual)))) + +(defun read-character-reference (input) ;; xxx eof handling ;; The #/& is already read (let ((res (let ((c (read-rune input))) - (assert (rune= c #/#)) + (check-rune input c #/#) (setq c (read-rune input)) - (cond ((rune= c #/x) + (cond ((eql c #/x) ;; hexadecimal (setq c (read-rune input)) - (assert (digit-rune-p c 16)) + (unless (digit-rune-p c 16) + (wf-error "garbage in character reference")) (prog1 (parse-integer (with-output-to-string (sink) @@ -1494,8 +1502,7 @@ (while (digit-rune-p (setq c (read-rune input)) 16) (write-char (rune-char c) sink))) :radix 16) - (assert (rune= c #/\;))) - ) + (check-rune input c #/\;))) ((rune<= #/0 c #/9) ;; decimal (prog1 @@ -1505,7 +1512,7 @@ (while (rune<= #/0 (setq c (read-rune input)) #/9) (write-char (rune-char c) sink))) :radix 10) - (assert (rune= c #/\;))) ) + (check-rune input c #/\;))) (t (wf-error "Bad char in numeric character entity.") ))))) (unless (code-data-char-p res) @@ -3185,7 +3192,7 @@ ((rune= c #/&) (setf c (peek-rune input)) (cond ((rune= c #/#) - (let ((c (read-numeric-entity input))) + (let ((c (read-character-reference input))) (%put-unicode-char c collect))) (t (unless (name-start-rune-p (peek-rune input)) @@ -3248,11 +3255,11 @@ ((rune= c #/<) (wf-error "'<' not allowed in attribute values")) ((rune= #/& c) - (multiple-value-bind (kind sem) (read-entity-ref input) + (multiple-value-bind (kind sem) (read-entity-like input) (ecase kind - (:NUMERIC + (:CHARACTER-REFERENCE (%put-unicode-char sem collect)) - (:NAMED + (:ENTITY-REFERENCE (let* ((exp (internal-entity-expansion sem)) (n (length exp))) (declare (type (simple-array rune (*)) exp))