Hmm. Auf U+ffff und dergleichen wurde durch data-rune-p geprueft,

das aber eben nicht ueberall benutzt wurde.  Ich habe die Pruefung
jetzt mal direkt im Decoding eingebaut.

-xmltest/not-wf/sa/171.xml [not validating:] FAILED:
-  well-formedness violation not detected
-[
-    Character FFFF is not legal anywhere in an XML document. ]
This commit is contained in:
dlichteblau
2005-11-27 18:20:08 +00:00
parent b95f1ef093
commit 487338d20e
3 changed files with 25 additions and 24 deletions

View File

@ -168,10 +168,7 @@ xmltest/not-wf/sa/167.xml [not validating:] not-wf [validating:] invalid
xmltest/not-wf/sa/168.xml [not validating:] not-wf [validating:] invalid
xmltest/not-wf/sa/169.xml [not validating:] not-wf [validating:] invalid
xmltest/not-wf/sa/170.xml [not validating:] not-wf [validating:] invalid
xmltest/not-wf/sa/171.xml [not validating:] FAILED:
well-formedness violation not detected
[
Character FFFF is not legal anywhere in an XML document. ]
xmltest/not-wf/sa/171.xml [not validating:] not-wf [validating:] not-wf
xmltest/not-wf/sa/172.xml [not validating:] not-wf [validating:] not-wf
xmltest/not-wf/sa/173.xml [not validating:] not-wf [validating:] not-wf
xmltest/not-wf/sa/174.xml [not validating:] not-wf [validating:] invalid
@ -1856,4 +1853,4 @@ ibm/valid/P86/ibm86v01.xml [not validating:] input [validating:] input
ibm/valid/P87/ibm87v01.xml [not validating:] input [validating:] input
ibm/valid/P88/ibm88v01.xml [not validating:] input [validating:] input
ibm/valid/P89/ibm89v01.xml [not validating:] input [validating:] input
14/1786 tests failed; 376 tests were skipped
13/1786 tests failed; 376 tests were skipped

View File

@ -123,7 +123,10 @@
;; FIXME: Wenn wir hier ein Surrogate sehen, muessen wir das naechste
;; Zeichen abwarten und nachgucken, dass nicht etwa die andere
;; Haelfte fehlt!
(setf (aref out wptr) (logior (ash hi 8) lo))
(let ((x (logior (ash hi 8) lo)))
(when (or (eql x #xFFFE) (eql x #/U+FFFF))
(xerror "not a valid code point: #x~X" x))
(setf (aref out wptr) x))
(setf wptr (%+ 1 wptr))))
(values wptr rptr)))
@ -143,7 +146,10 @@
;; FIXME: Wenn wir hier ein Surrogate sehen, muessen wir das naechste
;; Zeichen abwarten und nachgucken, dass nicht etwa die andere
;; Haelfte fehlt!
(setf (aref out wptr) (logior (ash hi 8) lo))
(let ((x (logior (ash hi 8) lo)))
(when (or (eql x #xFFFE) (eql x #/U+FFFF))
(xerror "not a valid code point: #x~X" x))
(setf (aref out wptr) x))
(setf wptr (%+ 1 wptr))))
(values wptr rptr)))
@ -161,7 +167,9 @@
(when (or (<= #xD800 x #xDBFF)
(<= #xDC00 x #xDFFF))
(xerror "surrogate encoded in UTF-8: #x~X." x))
(cond ((%> x #x10FFFF)
(cond ((or (%> x #x10FFFF)
(eql x #xFFFE)
(eql x #/U+FFFF))
(xerror "not a valid code point: #x~X" x))
((%> x #xFFFF)
(setf (aref out (%+ 0 wptr)) (%+ #xD7C0 (ash x -10))

View File

@ -1413,15 +1413,18 @@
(definline data-rune-p (rune)
;; any Unicode character, excluding the surrogate blocks, FFFE, and FFFF.
;;
;; FIXME: das halte ich fuer verkehrt. Surrogates als Unicode-Zeichen
;; sind verboten. Das liegt hier aber nicht vor, denn wir arbeiten
;; ja tatsaechlich mit UTF-16. Verboten ist es nur, wenn wir ein
;; solches Zeichen beim Dekodieren finden, das wird aber eben
;; in encodings.lisp bereits geprueft. --david
(let ((c (rune-code rune)))
(or (= c #x9) (= c #xA) (= c #xD)
(<= #x20 c #xD7FF)
(<= #xE000 c #xFFFD)
;;
(<= #xD800 c #xDBFF)
(<= #xDC00 c #xDFFF)
;;
)))
(<= #xDC00 c #xDFFF))))
(defun read-att-value (zinput input mode &optional canon-space-p (delim nil))
(with-rune-collector-2 (collect)
@ -2686,7 +2689,7 @@
((:ENTITY-REF)
(let ((name sem))
(consume-token input)
(append ;; nil #+(OR)
(append
(recurse-on-entity input name :general
(lambda (input)
(prog1
@ -3190,17 +3193,10 @@
(defun read-cdata (input)
(read-data-until* ((lambda (rune)
(declare (type rune rune))
(when (or (and (%rune< rune #/U+0020)
(not (or (%rune= rune #/U+0009)
(%rune= rune #/U+000a)
(%rune= rune #/U+000d))))
;; Surrogates nicht ausschliessen, denn wir
;; haben ja UTF-16 Runen.
#+(or)
(and (%rune<= #/U+D800 rune)
(%rune< rune #/U+E000))
(%rune= rune #/U+FFFE)
(%rune= rune #/U+FFFF))
(when (and (%rune< rune #/U+0020)
(not (or (%rune= rune #/U+0009)
(%rune= rune #/U+000a)
(%rune= rune #/U+000d))))
(wf-error "code point invalid: ~A" rune))
(or (%rune= rune #/<) (%rune= rune #/&)))
input