Hmm. Auf U+ffff und dergleichen wurde durch data-rune-p geprueft,
das aber eben nicht ueberall benutzt wurde. Ich habe die Pruefung jetzt mal direkt im Decoding eingebaut. -xmltest/not-wf/sa/171.xml [not validating:] FAILED: - well-formedness violation not detected -[ - Character FFFF is not legal anywhere in an XML document. ]
This commit is contained in:
7
XMLCONF
7
XMLCONF
@ -168,10 +168,7 @@ xmltest/not-wf/sa/167.xml [not validating:] not-wf [validating:] invalid
|
||||
xmltest/not-wf/sa/168.xml [not validating:] not-wf [validating:] invalid
|
||||
xmltest/not-wf/sa/169.xml [not validating:] not-wf [validating:] invalid
|
||||
xmltest/not-wf/sa/170.xml [not validating:] not-wf [validating:] invalid
|
||||
xmltest/not-wf/sa/171.xml [not validating:] FAILED:
|
||||
well-formedness violation not detected
|
||||
[
|
||||
Character FFFF is not legal anywhere in an XML document. ]
|
||||
xmltest/not-wf/sa/171.xml [not validating:] not-wf [validating:] not-wf
|
||||
xmltest/not-wf/sa/172.xml [not validating:] not-wf [validating:] not-wf
|
||||
xmltest/not-wf/sa/173.xml [not validating:] not-wf [validating:] not-wf
|
||||
xmltest/not-wf/sa/174.xml [not validating:] not-wf [validating:] invalid
|
||||
@ -1856,4 +1853,4 @@ ibm/valid/P86/ibm86v01.xml [not validating:] input [validating:] input
|
||||
ibm/valid/P87/ibm87v01.xml [not validating:] input [validating:] input
|
||||
ibm/valid/P88/ibm88v01.xml [not validating:] input [validating:] input
|
||||
ibm/valid/P89/ibm89v01.xml [not validating:] input [validating:] input
|
||||
14/1786 tests failed; 376 tests were skipped
|
||||
13/1786 tests failed; 376 tests were skipped
|
||||
@ -123,7 +123,10 @@
|
||||
;; FIXME: Wenn wir hier ein Surrogate sehen, muessen wir das naechste
|
||||
;; Zeichen abwarten und nachgucken, dass nicht etwa die andere
|
||||
;; Haelfte fehlt!
|
||||
(setf (aref out wptr) (logior (ash hi 8) lo))
|
||||
(let ((x (logior (ash hi 8) lo)))
|
||||
(when (or (eql x #xFFFE) (eql x #/U+FFFF))
|
||||
(xerror "not a valid code point: #x~X" x))
|
||||
(setf (aref out wptr) x))
|
||||
(setf wptr (%+ 1 wptr))))
|
||||
(values wptr rptr)))
|
||||
|
||||
@ -143,7 +146,10 @@
|
||||
;; FIXME: Wenn wir hier ein Surrogate sehen, muessen wir das naechste
|
||||
;; Zeichen abwarten und nachgucken, dass nicht etwa die andere
|
||||
;; Haelfte fehlt!
|
||||
(setf (aref out wptr) (logior (ash hi 8) lo))
|
||||
(let ((x (logior (ash hi 8) lo)))
|
||||
(when (or (eql x #xFFFE) (eql x #/U+FFFF))
|
||||
(xerror "not a valid code point: #x~X" x))
|
||||
(setf (aref out wptr) x))
|
||||
(setf wptr (%+ 1 wptr))))
|
||||
(values wptr rptr)))
|
||||
|
||||
@ -161,7 +167,9 @@
|
||||
(when (or (<= #xD800 x #xDBFF)
|
||||
(<= #xDC00 x #xDFFF))
|
||||
(xerror "surrogate encoded in UTF-8: #x~X." x))
|
||||
(cond ((%> x #x10FFFF)
|
||||
(cond ((or (%> x #x10FFFF)
|
||||
(eql x #xFFFE)
|
||||
(eql x #/U+FFFF))
|
||||
(xerror "not a valid code point: #x~X" x))
|
||||
((%> x #xFFFF)
|
||||
(setf (aref out (%+ 0 wptr)) (%+ #xD7C0 (ash x -10))
|
||||
|
||||
@ -1413,15 +1413,18 @@
|
||||
|
||||
(definline data-rune-p (rune)
|
||||
;; any Unicode character, excluding the surrogate blocks, FFFE, and FFFF.
|
||||
;;
|
||||
;; FIXME: das halte ich fuer verkehrt. Surrogates als Unicode-Zeichen
|
||||
;; sind verboten. Das liegt hier aber nicht vor, denn wir arbeiten
|
||||
;; ja tatsaechlich mit UTF-16. Verboten ist es nur, wenn wir ein
|
||||
;; solches Zeichen beim Dekodieren finden, das wird aber eben
|
||||
;; in encodings.lisp bereits geprueft. --david
|
||||
(let ((c (rune-code rune)))
|
||||
(or (= c #x9) (= c #xA) (= c #xD)
|
||||
(<= #x20 c #xD7FF)
|
||||
(<= #xE000 c #xFFFD)
|
||||
;;
|
||||
(<= #xD800 c #xDBFF)
|
||||
(<= #xDC00 c #xDFFF)
|
||||
;;
|
||||
)))
|
||||
(<= #xDC00 c #xDFFF))))
|
||||
|
||||
(defun read-att-value (zinput input mode &optional canon-space-p (delim nil))
|
||||
(with-rune-collector-2 (collect)
|
||||
@ -2686,7 +2689,7 @@
|
||||
((:ENTITY-REF)
|
||||
(let ((name sem))
|
||||
(consume-token input)
|
||||
(append ;; nil #+(OR)
|
||||
(append
|
||||
(recurse-on-entity input name :general
|
||||
(lambda (input)
|
||||
(prog1
|
||||
@ -3190,17 +3193,10 @@
|
||||
(defun read-cdata (input)
|
||||
(read-data-until* ((lambda (rune)
|
||||
(declare (type rune rune))
|
||||
(when (or (and (%rune< rune #/U+0020)
|
||||
(not (or (%rune= rune #/U+0009)
|
||||
(%rune= rune #/U+000a)
|
||||
(%rune= rune #/U+000d))))
|
||||
;; Surrogates nicht ausschliessen, denn wir
|
||||
;; haben ja UTF-16 Runen.
|
||||
#+(or)
|
||||
(and (%rune<= #/U+D800 rune)
|
||||
(%rune< rune #/U+E000))
|
||||
(%rune= rune #/U+FFFE)
|
||||
(%rune= rune #/U+FFFF))
|
||||
(when (and (%rune< rune #/U+0020)
|
||||
(not (or (%rune= rune #/U+0009)
|
||||
(%rune= rune #/U+000a)
|
||||
(%rune= rune #/U+000d))))
|
||||
(wf-error "code point invalid: ~A" rune))
|
||||
(or (%rune= rune #/<) (%rune= rune #/&)))
|
||||
input
|
||||
|
||||
Reference in New Issue
Block a user