eof in character references

This commit is contained in:
dlichteblau
2005-11-27 12:24:38 +00:00
parent 86c843138c
commit dd81cb75f1
2 changed files with 45 additions and 110 deletions

102
XMLCONF
View File

@ -19,10 +19,7 @@ xmltest/not-wf/sa/018.xml [not validating:] not-wf [validating:] invalid
xmltest/not-wf/sa/019.xml [not validating:] not-wf [validating:] invalid
xmltest/not-wf/sa/020.xml [not validating:] not-wf [validating:] not-wf
xmltest/not-wf/sa/021.xml [not validating:] not-wf [validating:] not-wf
xmltest/not-wf/sa/022.xml [not validating:] FAILED:
The assertion (RUNES:RUNE= CXML::C 59) failed.
[
Character references end with semicolons, always!]
xmltest/not-wf/sa/022.xml [not validating:] not-wf [validating:] not-wf
xmltest/not-wf/sa/023.xml [not validating:] not-wf [validating:] not-wf
xmltest/not-wf/sa/024.xml [not validating:] not-wf [validating:] invalid
xmltest/not-wf/sa/025.xml [not validating:] not-wf [validating:] invalid
@ -916,11 +913,7 @@ oasis/p05fail4.xml [not validating:] not-wf [validating:] not-wf
oasis/p05fail5.xml [not validating:] not-wf [validating:] not-wf
oasis/p09fail1.xml [not validating:] not-wf [validating:] not-wf
oasis/p09fail2.xml [not validating:] not-wf [validating:] not-wf
oasis/p09fail3.xml [not validating:] FAILED:
The assertion (RUNES:RUNE= CXML::C 59) failed.
[
incomplete character reference
]
oasis/p09fail3.xml [not validating:] not-wf [validating:] not-wf
oasis/p09fail4.xml [not validating:] not-wf [validating:] not-wf
oasis/p09fail5.xml [not validating:] not-wf [validating:] not-wf
oasis/p10fail1.xml [not validating:] not-wf [validating:] not-wf
@ -1085,18 +1078,10 @@ oasis/p63fail1.xml [not validating:] not-wf [validating:] not-wf
oasis/p63fail2.xml [not validating:] not-wf [validating:] not-wf
oasis/p64fail1.xml [not validating:] not-wf [validating:] not-wf
oasis/p64fail2.xml [not validating:] not-wf [validating:] not-wf
oasis/p66fail1.xml [not validating:] FAILED:
The assertion (RUNES:RUNE= CXML::C 59) failed.
[
terminating ';' is required
]
oasis/p66fail1.xml [not validating:] not-wf [validating:] invalid
oasis/p66fail2.xml [not validating:] not-wf [validating:] invalid
oasis/p66fail3.xml [not validating:] not-wf [validating:] invalid
oasis/p66fail4.xml [not validating:] FAILED:
The assertion (RUNES:RUNE= CXML::C 59) failed.
[
only hex digits in hex references
]
oasis/p66fail4.xml [not validating:] not-wf [validating:] invalid
oasis/p66fail5.xml [not validating:] not-wf [validating:] invalid
oasis/p66fail6.xml [not validating:] not-wf [validating:] invalid
oasis/p68fail1.xml [not validating:] not-wf [validating:] not-wf
@ -1694,74 +1679,17 @@ ibm/not-wf/P64/ibm64n02.xml [not validating:] not-wf [validating:] not-wf
ibm/not-wf/P64/ibm64n03.xml [not validating:] not-wf [validating:] not-wf
ibm/not-wf/P65/ibm65n01.xml [not validating:] not-wf [validating:] not-wf
ibm/not-wf/P65/ibm65n02.xml [not validating:] not-wf [validating:] not-wf
ibm/not-wf/P66/ibm66n01.xml [not validating:] FAILED:
The assertion (RUNES:RUNE= CXML::C 59) failed.
[
Tests CharRef with an illegal character referred to. The "#002f" is
used as the referred character in the CharRef in the EntityDecl in the DTD.
]
ibm/not-wf/P66/ibm66n02.xml [not validating:] FAILED:
The assertion (RUNES:RUNE= CXML::C 59) failed.
[
Tests CharRef with the semicolon character missing. The semicolon
character is missing at the end of the CharRef in the attribute value in
the STag of element "root".
]
ibm/not-wf/P66/ibm66n01.xml [not validating:] not-wf [validating:] not-wf
ibm/not-wf/P66/ibm66n02.xml [not validating:] not-wf [validating:] not-wf
ibm/not-wf/P66/ibm66n03.xml [not validating:] not-wf [validating:] not-wf
ibm/not-wf/P66/ibm66n04.xml [not validating:] FAILED:
The assertion (RUNES:RUNE= CXML::C 59) failed.
[
Tests CharRef with an illegal character referred to. The "#5~0" is
used as the referred character in the attribute value in the EmptyElemTag
of the element "root".
]
ibm/not-wf/P66/ibm66n05.xml [not validating:] FAILED:
The assertion (RUNES:RUNE= CXML::C 59) failed.
[
Tests CharRef with an illegal character referred to. The "#x002g" is
used as the referred character in the CharRef in the EntityDecl in the DTD.
]
ibm/not-wf/P66/ibm66n06.xml [not validating:] FAILED:
The assertion (RUNES:RUNE= CXML::C 59) failed.
[
Tests CharRef with an illegal character referred to. The "#x006G" is
used as the referred character in the attribute value in the EmptyElemTag
of the element "root".
]
ibm/not-wf/P66/ibm66n07.xml [not validating:] FAILED:
The assertion (RUNES:RUNE= CXML::C 59) failed.
[
Tests CharRef with an illegal character referred to. The "#0=2f" is
used as the referred character in the CharRef in the EntityDecl in the DTD.
]
ibm/not-wf/P66/ibm66n08.xml [not validating:] FAILED:
The assertion (RUNES:RUNE= CXML::C 59) failed.
[
Tests CharRef with an illegal character referred to. The "#56.0" is
used as the referred character in the attribute value in the EmptyElemTag
of the element "root".
]
ibm/not-wf/P66/ibm66n09.xml [not validating:] FAILED:
The assertion (RUNES:RUNE= CXML::C 59) failed.
[
Tests CharRef with an illegal character referred to. The "#x00/2f"
is used as the referred character in the CharRef in the EntityDecl in the
DTD.
]
ibm/not-wf/P66/ibm66n10.xml [not validating:] FAILED:
The assertion (RUNES:RUNE= CXML::C 59) failed.
[
Tests CharRef with an illegal character referred to. The "#51)" is
used as the referred character in the attribute value in the EmptyElemTag
of the element "root".
]
ibm/not-wf/P66/ibm66n11.xml [not validating:] FAILED:
The assertion (RUNES:RUNE= CXML::C 59) failed.
[
Tests CharRef with an illegal character referred to. The "#00 2f"
is used as the referred character in the CharRef in the EntityDecl in the
DTD.
]
ibm/not-wf/P66/ibm66n04.xml [not validating:] not-wf [validating:] not-wf
ibm/not-wf/P66/ibm66n05.xml [not validating:] not-wf [validating:] not-wf
ibm/not-wf/P66/ibm66n06.xml [not validating:] not-wf [validating:] not-wf
ibm/not-wf/P66/ibm66n07.xml [not validating:] not-wf [validating:] not-wf
ibm/not-wf/P66/ibm66n08.xml [not validating:] not-wf [validating:] not-wf
ibm/not-wf/P66/ibm66n09.xml [not validating:] not-wf [validating:] not-wf
ibm/not-wf/P66/ibm66n10.xml [not validating:] not-wf [validating:] not-wf
ibm/not-wf/P66/ibm66n11.xml [not validating:] not-wf [validating:] not-wf
ibm/not-wf/P66/ibm66n12.xml [not validating:] not-wf [validating:] not-wf
ibm/not-wf/P66/ibm66n13.xml [not validating:] not-wf [validating:] not-wf
ibm/not-wf/P66/ibm66n14.xml [not validating:] not-wf [validating:] not-wf
@ -2357,4 +2285,4 @@ ibm/valid/P86/ibm86v01.xml [not validating:] input [validating:] input
ibm/valid/P87/ibm87v01.xml [not validating:] input [validating:] input
ibm/valid/P88/ibm88v01.xml [not validating:] input [validating:] input
ibm/valid/P89/ibm89v01.xml [not validating:] input [validating:] input
124/1786 tests failed; 376 tests were skipped
110/1786 tests failed; 376 tests were skipped

View File

@ -647,12 +647,12 @@
(defun validity-error (x &rest args)
(error 'validity-error
:format-control "Validity constraint violated: ~?"
:format-control "Document not valid: ~?"
:format-arguments (list x args)))
(defun wf-error (x &rest args)
(error 'well-formedness-violation
:format-control "Well-formedness violated: ~?"
:format-control "Document not well-formed: ~?"
:format-arguments (list x args)))
(defun eox (stream &optional x &rest args)
@ -1208,10 +1208,10 @@
(:DOC
(cond
((rune= c #/&)
(multiple-value-bind (kind data) (read-entity-ref input)
(cond ((eq kind :NAMED)
(values :ENTITY-REF data) )
((eq kind :NUMERIC)
(multiple-value-bind (kind data) (read-entity-like input)
(cond ((eq kind :ENTITY-REFERENCE)
(values :ENTITY-REF data))
((eq kind :CHARACTER-REFERENCE)
(values :CDATA
(with-rune-collector (collect)
(%put-unicode-char data collect)))))))
@ -1309,16 +1309,16 @@
(t
nil)))
(defun read-entity-ref (input)
(defun read-entity-like (input)
"Read an entity reference off the xstream `input'. Returns two values:
either :NAMED <interned-rod> in case of a named entity
or :NUMERIC <integer> in case of numeric entities.
either :ENTITY-REFERENCE <interned-rod> in case of a named entity
or :CHARACTER-REFERENCE <integer> in case of character references.
The initial #\\& is considered to be consumed already."
(let ((c (peek-rune input)))
(cond ((eq c :eof)
(eox input "EOF after '&'"))
((rune= c #/#)
(values :NUMERIC (read-numeric-entity input)))
(values :CHARACTER-REFERENCE (read-character-reference input)))
(t
(unless (name-start-rune-p (peek-rune input))
(wf-error "Expecting name after &."))
@ -1326,7 +1326,7 @@
(setf c (read-rune input))
(unless (rune= c #/\;)
(perror input "Expected \";\"."))
(values :NAMED name))))))
(values :ENTITY-REFERENCE name))))))
(defun read-tag-2 (zinput input kind)
(let ((name (read-name-token input))
@ -1420,7 +1420,7 @@
((rune= c #/&)
(setf c (peek-rune input))
(cond ((rune= c #/#)
(let ((c (read-numeric-entity input)))
(let ((c (read-character-reference input)))
(%put-unicode-char c collect)))
(t
(unless (name-start-rune-p (peek-rune input))
@ -1476,17 +1476,25 @@
(assert (member delim '(#/\" #/\')))
delim))))))
(defun read-numeric-entity (input)
(defun check-rune (input actual expected)
(declare (ignore input))
(unless (eql actual expected)
(wf-error "expected #/~A but found #/~A"
(rune-char expected)
(rune-char actual))))
(defun read-character-reference (input)
;; xxx eof handling
;; The #/& is already read
(let ((res
(let ((c (read-rune input)))
(assert (rune= c #/#))
(check-rune input c #/#)
(setq c (read-rune input))
(cond ((rune= c #/x)
(cond ((eql c #/x)
;; hexadecimal
(setq c (read-rune input))
(assert (digit-rune-p c 16))
(unless (digit-rune-p c 16)
(wf-error "garbage in character reference"))
(prog1
(parse-integer
(with-output-to-string (sink)
@ -1494,8 +1502,7 @@
(while (digit-rune-p (setq c (read-rune input)) 16)
(write-char (rune-char c) sink)))
:radix 16)
(assert (rune= c #/\;)))
)
(check-rune input c #/\;)))
((rune<= #/0 c #/9)
;; decimal
(prog1
@ -1505,7 +1512,7 @@
(while (rune<= #/0 (setq c (read-rune input)) #/9)
(write-char (rune-char c) sink)))
:radix 10)
(assert (rune= c #/\;))) )
(check-rune input c #/\;)))
(t
(wf-error "Bad char in numeric character entity.") )))))
(unless (code-data-char-p res)
@ -3185,7 +3192,7 @@
((rune= c #/&)
(setf c (peek-rune input))
(cond ((rune= c #/#)
(let ((c (read-numeric-entity input)))
(let ((c (read-character-reference input)))
(%put-unicode-char c collect)))
(t
(unless (name-start-rune-p (peek-rune input))
@ -3248,11 +3255,11 @@
((rune= c #/<)
(wf-error "'<' not allowed in attribute values"))
((rune= #/& c)
(multiple-value-bind (kind sem) (read-entity-ref input)
(multiple-value-bind (kind sem) (read-entity-like input)
(ecase kind
(:NUMERIC
(:CHARACTER-REFERENCE
(%put-unicode-char sem collect))
(:NAMED
(:ENTITY-REFERENCE
(let* ((exp (internal-entity-expansion sem))
(n (length exp)))
(declare (type (simple-array rune (*)) exp))