From 36ba984844f640915f557c076e48b9c966f1737c Mon Sep 17 00:00:00 2001 From: dlichteblau Date: Tue, 27 Dec 2005 20:01:10 +0000 Subject: [PATCH] sb-unicode backport namespace-korrekturen noch documentation --- README.html | 1 + doc/dom.html | 9 ++++++++ doc/installation.html | 39 +++++++++----------------------- doc/quickstart.html | 2 +- doc/using.html | 32 ++++---------------------- dom/dom-sax.lisp | 14 +++++++----- runes/characters.lisp | 4 +++- runes/utf8.lisp | 8 ------- test/domtest.lisp | 4 ++-- xml/xml-parse.lisp | 47 +++++++++++++++++---------------------- xml/xmlns-normalizer.lisp | 3 ++- 11 files changed, 62 insertions(+), 101 deletions(-) diff --git a/README.html b/README.html index 9b74164..ab7a1d7 100644 --- a/README.html +++ b/README.html @@ -66,6 +66,7 @@ +

+ unparse-document-to-octets returns an (unsigned-byte + 8) array, whereas unparse-document writes + characters.  unparse-document is useful together + with with-output-to-string.  However, note that the + resulting document in both cases is UTF-8 encoded, so the + characters written by unparse-document are really UTF-8 + bytes encoded as characters. +

DOM/Lisp mapping

diff --git a/doc/installation.html b/doc/installation.html index ef05bdc..cc62863 100644 --- a/doc/installation.html +++ b/doc/installation.html @@ -14,35 +14,31 @@

Download

Implementation-specific notes

CXML should be portable to all Common Lisp implementations - supporting gray streams. Currently supported are ACL, CLISP, - CMUCL, LispWorks, OpenMCL, and SBCL. + supported by trivial-gray-streams.

- -

Compilation

@@ -117,9 +100,9 @@ $ cd 2001/DOM-Test-Suite && ant dom1-dtd dom2-dtd

- fixme My parser does not understand the current testsuite - anymore.  To fix this problem, revert the affected files - manually after check-out: + fixme domtest.lisp does not understand the current + testsuite driver anymore.  To fix this problem, revert the + affected files manually after check-out:

$ cd 2001/XML-Test-Suite/xmlconf/
diff --git a/doc/quickstart.html b/doc/quickstart.html
index 513076b..69f038f 100644
--- a/doc/quickstart.html
+++ b/doc/quickstart.html
@@ -16,7 +16,7 @@
           
 	
diff --git a/doc/using.html b/doc/using.html
index 67c033e..2f53068 100644
--- a/doc/using.html
+++ b/doc/using.html
@@ -138,9 +138,10 @@
     

-

Function CXML:MAKE-CHARACTER-STREAM-SINK (stream &rest keys) => sink
+
Function CXML:MAKE-OCTET-STREAM-SINK (stream &rest keys) => sink
Function CXML:MAKE-OCTET-VECTOR-SINK (&rest keys) => sink
- Return a handle suitable for event-based XML serialization. +
Function CXML:MAKE-CHARACTER-STREAM-SINK (stream &rest keys) => sink
+ Return a SAX serialization handle.

Keyword arguments:

- Prints this to stream, which must be an - (unsigned-byte 8) stream: + Prints this to stream:

<foo xyz="abc">
   <bar blub="bla"></bar>
   Hi there.
 </foo>
-

- (Note that these functions accept both strings and rods, so we - can write "foo" instead of #"foo" above.) -

Macro XHTML-GENERATOR:WITH-XHTML (sink &rest forms)
diff --git a/dom/dom-sax.lisp b/dom/dom-sax.lisp index 544d328..248586c 100644 --- a/dom/dom-sax.lisp +++ b/dom/dom-sax.lisp @@ -13,7 +13,8 @@ &key (include-xmlns-attributes sax:*include-xmlns-attributes*) include-doctype include-default-values - (recode (typep document 'utf8-dom::node))) + (recode (and #+rune-is-integer (typep document 'utf8-dom::node)))) + (declare (ignorable recode)) #+rune-is-integer (when recode (setf handler (make-recoder handler #'utf8-string-to-rod))) @@ -54,16 +55,16 @@ (dom:do-node-list (child (dom:child-nodes node)) (ecase (dom:node-type child) (:element - ;; fixme: namespaces (let ((attlist (compute-attributes child include-xmlns-attributes include-default-values)) - (lname (dom:tag-name child)) + (uri (dom:namespace-uri child)) + (lname (dom:local-name child)) (qname (dom:tag-name child))) - (sax:start-element handler nil lname qname attlist) + (sax:start-element handler uri lname qname attlist) (walk child) - (sax:end-element handler nil lname qname))) + (sax:end-element handler uri lname qname))) (:cdata-section (sax:start-cdata handler) (sax:characters handler (dom:data child)) @@ -83,10 +84,11 @@ (let ((results '())) (dom:do-node-list (a (dom:attributes element)) (when (and (or defaultp (dom:specified a)) - (or xmlnsp (not (cxml::xmlns-attr-p (dom:name a))))) + (or xmlnsp (not (cxml::xmlns-attr-p (rod (dom:name a)))))) (push (sax:make-attribute :qname (dom:name a) :value (dom:value a) + :namespace-uri (dom:namespace-uri a) :specified-p (dom:specified a)) results))) (reverse results))) diff --git a/runes/characters.lisp b/runes/characters.lisp index 11195b3..5fa1aa5 100644 --- a/runes/characters.lisp +++ b/runes/characters.lisp @@ -95,7 +95,9 @@ (stringp x)) (defun rod= (x y) - (string= x y)) + (if (zerop (length x)) + (zerop (length y)) + (and (plusp (length y)) (string= x y)))) (defun rod-equal (x y) (string-equal x y)) diff --git a/runes/utf8.lisp b/runes/utf8.lisp index 938b67e..48e4183 100644 --- a/runes/utf8.lisp +++ b/runes/utf8.lisp @@ -10,14 +10,6 @@ (deftype rod () '(vector rune)) (deftype simple-rod () '(simple-array rune)) -#+(or) -(definline rune (rod index) - (char rod index)) - -#+(or) -(defun (setf rune) (newval rod index) - (setf (char rod index) newval)) - (defun rod= (r s) (string= r s)) diff --git a/test/domtest.lisp b/test/domtest.lisp index ba99ff8..6d40b62 100644 --- a/test/domtest.lisp +++ b/test/domtest.lisp @@ -151,7 +151,7 @@ (#/n (vector-push-extend #/newline v (length v))) ((#/\\ #/\") (vector-push-extend #/\\ v (length v))))) (vector-push-extend c v (length v)))) - (coerce v 'runes::simple-rod))) + (make-array (length v) :element-type 'runes:rune :initial-contents v))) (t (%intern str)))) @@ -680,7 +680,7 @@ (dom:get-attribute member "href")))) (unless (or (runes:rod= (dom:tag-name member) #"metadata") (member href *bad-tests* :test 'equal)) - (format t "~&~D/~D ~A~%" i n href) + (format t "~&~D/~D ~A~%" i #+nil n 808 href) (let ((lisp (slurp-test (merge-pathnames href test-directory)))) (when verbose diff --git a/xml/xml-parse.lisp b/xml/xml-parse.lisp index 3d5da79..d489067 100644 --- a/xml/xml-parse.lisp +++ b/xml/xml-parse.lisp @@ -116,11 +116,7 @@ ;; ;; o better extensibility wrt character representation, one may want to ;; have -;; - UTF-8 in standard CL strings -;; - UCS-2 in RODs -;; - UTF-16 in RODs ;; - UCS-4 in vectoren -;; [habe ich eigentlich nicht vor--david] ;; ;; o xstreams auslagern, documententieren und dann auch in SGML und ;; CSS parser verwenden. (halt alles was zeichen liest). @@ -1210,10 +1206,10 @@ (values :nmtoken (read-name-token input))) ((rune= #/# c) (let ((q (read-name-token input))) - (cond ((equalp q '#.(string-rod "REQUIRED")) :|#REQUIRED|) - ((equalp q '#.(string-rod "IMPLIED")) :|#IMPLIED|) - ((equalp q '#.(string-rod "FIXED")) :|#FIXED|) - ((equalp q '#.(string-rod "PCDATA")) :|#PCDATA|) + (cond ((rod= q '#.(string-rod "REQUIRED")) :|#REQUIRED|) + ((rod= q '#.(string-rod "IMPLIED")) :|#IMPLIED|) + ((rod= q '#.(string-rod "FIXED")) :|#FIXED|) + ((rod= q '#.(string-rod "PCDATA")) :|#PCDATA|) (t (wf-error zinput "Unknown token: ~S." q))))) ((or (rune= c #/U+0020) @@ -1821,15 +1817,15 @@ ;; [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')' /* VC: Enumeration */ (multiple-value-bind (cat sem) (read-token input) (cond ((eq cat :nmtoken) - (cond ((equalp sem '#.(string-rod "CDATA")) :CDATA) - ((equalp sem '#.(string-rod "ID")) :ID) - ((equalp sem '#.(string-rod "IDREF")) :IDREFS) - ((equalp sem '#.(string-rod "IDREFS")) :IDREFS) - ((equalp sem '#.(string-rod "ENTITY")) :ENTITY) - ((equalp sem '#.(string-rod "ENTITIES")) :ENTITIES) - ((equalp sem '#.(string-rod "NMTOKEN")) :NMTOKEN) - ((equalp sem '#.(string-rod "NMTOKENS")) :NMTOKENS) - ((equalp sem '#.(string-rod "NOTATION")) + (cond ((rod= sem '#.(string-rod "CDATA")) :CDATA) + ((rod= sem '#.(string-rod "ID")) :ID) + ((rod= sem '#.(string-rod "IDREF")) :IDREFS) + ((rod= sem '#.(string-rod "IDREFS")) :IDREFS) + ((rod= sem '#.(string-rod "ENTITY")) :ENTITY) + ((rod= sem '#.(string-rod "ENTITIES")) :ENTITIES) + ((rod= sem '#.(string-rod "NMTOKEN")) :NMTOKEN) + ((rod= sem '#.(string-rod "NMTOKENS")) :NMTOKENS) + ((rod= sem '#.(string-rod "NOTATION")) (let (names) (p/S input) (expect input :\() @@ -1923,15 +1919,15 @@ (cond ((member cat '(:\" :\')) (make-internal-entdef (p/entity-value input))) ((and (eq cat :nmtoken) - (or (equalp sem '#.(string-rod "SYSTEM")) - (equalp sem '#.(string-rod "PUBLIC")))) + (or (rod= sem '#.(string-rod "SYSTEM")) + (rod= sem '#.(string-rod "PUBLIC")))) (let (extid ndata) (setf extid (p/external-id input nil)) (when (eq kind :general) ;NDATA allowed at all? (cond ((eq (peek-token input) :S) (p/S? input) (when (and (eq (peek-token input) :nmtoken) - (equalp (nth-value 1 (peek-token input)) + (rod= (nth-value 1 (peek-token input)) '#.(string-rod "NDATA"))) (consume-token input) (p/S input) @@ -1961,10 +1957,10 @@ (defun p/external-id (input &optional (public-only-ok-p nil)) ;; xxx public-only-ok-p (multiple-value-bind (cat sem) (read-token input) - (cond ((and (eq cat :nmtoken) (equalp sem '#.(string-rod "SYSTEM"))) + (cond ((and (eq cat :nmtoken) (rod= sem '#.(string-rod "SYSTEM"))) (p/S input) (make-extid nil (p/system-literal input))) - ((and (eq cat :nmtoken) (equalp sem '#.(string-rod "PUBLIC"))) + ((and (eq cat :nmtoken) (rod= sem '#.(string-rod "PUBLIC"))) (let (pub sys) (p/S input) (setf pub (p/pubid-literal input)) @@ -3390,9 +3386,7 @@ (dolist (ns-decl ns-decls) ;; check some namespace validity constraints (let ((prefix (car ns-decl)) - (uri (if (rod= #"" (cdr ns-decl)) - nil - (cdr ns-decl)))) + (uri (cdr ns-decl))) (cond ((and (rod= prefix #"xml") (not (rod= uri #"http://www.w3.org/XML/1998/namespace"))) @@ -3425,7 +3419,8 @@ may be bound to an empty namespace URI, thus ~ undeclaring it.")) (t - (push (cons prefix uri) *namespace-bindings*) + (push (cons prefix (if (rod= #"" uri) nil uri)) + *namespace-bindings*) (sax:start-prefix-mapping (handler *ctx*) (car ns-decl) (cdr ns-decl)))))) diff --git a/xml/xmlns-normalizer.lisp b/xml/xmlns-normalizer.lisp index 97ff4ec..5ae2d79 100644 --- a/xml/xmlns-normalizer.lisp +++ b/xml/xmlns-normalizer.lisp @@ -90,7 +90,8 @@ (let ((binding (normalizer-find-prefix handler prefix))) (cond ((null binding) - (push-namespace prefix uri)) + (unless (and (null prefix) (zerop (length uri))) + (push-namespace prefix uri))) ((rod= (sax:attribute-value binding) uri)) ((member binding (car (xmlns-stack handler))) (setf (sax:attribute-value binding) uri))