From 36ba984844f640915f557c076e48b9c966f1737c Mon Sep 17 00:00:00 2001
From: dlichteblau
Date: Tue, 27 Dec 2005 20:01:10 +0000
Subject: [PATCH] sb-unicode backport namespace-korrekturen noch documentation
---
README.html | 1 +
doc/dom.html | 9 ++++++++
doc/installation.html | 39 +++++++++-----------------------
doc/quickstart.html | 2 +-
doc/using.html | 32 ++++----------------------
dom/dom-sax.lisp | 14 +++++++-----
runes/characters.lisp | 4 +++-
runes/utf8.lisp | 8 -------
test/domtest.lisp | 4 ++--
xml/xml-parse.lisp | 47 +++++++++++++++++----------------------
xml/xmlns-normalizer.lisp | 3 ++-
11 files changed, 62 insertions(+), 101 deletions(-)
diff --git a/README.html b/README.html
index 9b74164..ab7a1d7 100644
--- a/README.html
+++ b/README.html
@@ -66,6 +66,7 @@
- Implemented DOM 2 Core.
- Error handling overhaul.
+ - UTF-8 string support in DOM on Lisps without Unicode characters.
- Support internal subset serialization.
- Gilbert Baumann has clarified the license as Lisp-LGPL.
- Use trivial-gray-streams.
diff --git a/doc/dom.html b/doc/dom.html
index 2174f0e..665cad8 100644
--- a/doc/dom.html
+++ b/doc/dom.html
@@ -117,6 +117,15 @@
href="using.html#misc">namespace normalizer is used.
+
+ unparse-document-to-octets returns an (unsigned-byte
+ 8) array, whereas unparse-document writes
+ characters. unparse-document is useful together
+ with with-output-to-string. However, note that the
+ resulting document in both cases is UTF-8 encoded, so the
+ characters written by unparse-document are really UTF-8
+ bytes encoded as characters.
+
DOM/Lisp mapping
diff --git a/doc/installation.html b/doc/installation.html
index ef05bdc..cc62863 100644
--- a/doc/installation.html
+++ b/doc/installation.html
@@ -14,35 +14,31 @@
Download
-
- tarballs
+
- -
+
-
+
Anoncvs (
browse):
$ export CVSROOT=:pserver:anonymous@common-lisp.net:/project/cxml/cvsroot
$ cvs login
Logging in to :pserver:anonymous@common-lisp.net:2401/project/cxml/cvsroot
CVS password: anonymous
$ cvs co cxml
-
+
+
Implementation-specific notes
CXML should be portable to all Common Lisp implementations
- supporting gray streams. Currently supported are ACL, CLISP,
- CMUCL, LispWorks, OpenMCL, and SBCL.
+ supported by trivial-gray-streams.
- -
- Note that CMUCL and OpenMCL do not support Unicode
- natively. (You might want to use the recoding SAX handler to work with
- native strings anyway.)
-
-
SBCL and CLISP will trip over cxml's non-ASCII source files
- unless compiled using a suitable locale configuration
+ unless run using a suitable locale configuration
(LC_CTYPE=en_US.ISO-8859-1 should help).
-
@@ -51,19 +47,6 @@ $ cvs co cxml
-
-
Compilation
@@ -117,9 +100,9 @@ $ cd 2001/DOM-Test-Suite && ant dom1-dtd dom2-dtd
- fixme My parser does not understand the current testsuite
- anymore. To fix this problem, revert the affected files
- manually after check-out:
+ fixme domtest.lisp does not understand the current
+ testsuite driver anymore. To fix this problem, revert the
+ affected files manually after check-out:
$ cd 2001/XML-Test-Suite/xmlconf/
diff --git a/doc/quickstart.html b/doc/quickstart.html
index 513076b..69f038f 100644
--- a/doc/quickstart.html
+++ b/doc/quickstart.html
@@ -16,7 +16,7 @@
diff --git a/doc/using.html b/doc/using.html
index 67c033e..2f53068 100644
--- a/doc/using.html
+++ b/doc/using.html
@@ -138,9 +138,10 @@
-
Function CXML:MAKE-CHARACTER-STREAM-SINK (stream &rest keys) => sink
+ Function CXML:MAKE-OCTET-STREAM-SINK (stream &rest keys) => sink
Function CXML:MAKE-OCTET-VECTOR-SINK (&rest keys) => sink
- Return a handle suitable for event-based XML serialization.
+ Function CXML:MAKE-CHARACTER-STREAM-SINK (stream &rest keys) => sink
+ Return a SAX serialization handle.
Keyword arguments:
@@ -175,26 +176,6 @@
changes the document model and should only be used if whitespace
does not matter to the application.
-
- If namespace support is enabled (the default), these functions use
- a namespace normalizer (cxml:make-namespace-normalizer).
-
-
- unparse-document-to-octets returns an (unsigned-byte
- 8) array, whereas unparse-document writes
- characters. unparse-document is useful together
- with with-output-to-string. However, note that the
- resulting document in both cases is UTF-8 encoded, so the
- characters written by unparse-document are really UTF-8
- bytes encoded as characters.
-
-
- These function provide the low-level mechanism used by the DOM
- serialization functions. To serialize a document without building
- its DOM tree first, create a sink handle and call SAX functions on that
- handle. sax:end-document returns the serialized form of
- the document described by the SAX events.
-
Macro CXML:WITH-XML-OUTPUT (sink &body body) => sink-specific result
@@ -214,17 +195,12 @@
(attribute "blub" "bla"))
(text "Hi there.")))
- Prints this to stream, which must be an
- (unsigned-byte 8) stream:
+ Prints this to stream:
<foo xyz="abc">
<bar blub="bla"></bar>
Hi there.
</foo>
-
- (Note that these functions accept both strings and rods, so we
- can write "foo" instead of #"foo" above.)
-
Macro XHTML-GENERATOR:WITH-XHTML (sink &rest forms)
diff --git a/dom/dom-sax.lisp b/dom/dom-sax.lisp
index 544d328..248586c 100644
--- a/dom/dom-sax.lisp
+++ b/dom/dom-sax.lisp
@@ -13,7 +13,8 @@
&key (include-xmlns-attributes sax:*include-xmlns-attributes*)
include-doctype
include-default-values
- (recode (typep document 'utf8-dom::node)))
+ (recode (and #+rune-is-integer (typep document 'utf8-dom::node))))
+ (declare (ignorable recode))
#+rune-is-integer
(when recode
(setf handler (make-recoder handler #'utf8-string-to-rod)))
@@ -54,16 +55,16 @@
(dom:do-node-list (child (dom:child-nodes node))
(ecase (dom:node-type child)
(:element
- ;; fixme: namespaces
(let ((attlist
(compute-attributes child
include-xmlns-attributes
include-default-values))
- (lname (dom:tag-name child))
+ (uri (dom:namespace-uri child))
+ (lname (dom:local-name child))
(qname (dom:tag-name child)))
- (sax:start-element handler nil lname qname attlist)
+ (sax:start-element handler uri lname qname attlist)
(walk child)
- (sax:end-element handler nil lname qname)))
+ (sax:end-element handler uri lname qname)))
(:cdata-section
(sax:start-cdata handler)
(sax:characters handler (dom:data child))
@@ -83,10 +84,11 @@
(let ((results '()))
(dom:do-node-list (a (dom:attributes element))
(when (and (or defaultp (dom:specified a))
- (or xmlnsp (not (cxml::xmlns-attr-p (dom:name a)))))
+ (or xmlnsp (not (cxml::xmlns-attr-p (rod (dom:name a))))))
(push
(sax:make-attribute :qname (dom:name a)
:value (dom:value a)
+ :namespace-uri (dom:namespace-uri a)
:specified-p (dom:specified a))
results)))
(reverse results)))
diff --git a/runes/characters.lisp b/runes/characters.lisp
index 11195b3..5fa1aa5 100644
--- a/runes/characters.lisp
+++ b/runes/characters.lisp
@@ -95,7 +95,9 @@
(stringp x))
(defun rod= (x y)
- (string= x y))
+ (if (zerop (length x))
+ (zerop (length y))
+ (and (plusp (length y)) (string= x y))))
(defun rod-equal (x y)
(string-equal x y))
diff --git a/runes/utf8.lisp b/runes/utf8.lisp
index 938b67e..48e4183 100644
--- a/runes/utf8.lisp
+++ b/runes/utf8.lisp
@@ -10,14 +10,6 @@
(deftype rod () '(vector rune))
(deftype simple-rod () '(simple-array rune))
-#+(or)
-(definline rune (rod index)
- (char rod index))
-
-#+(or)
-(defun (setf rune) (newval rod index)
- (setf (char rod index) newval))
-
(defun rod= (r s)
(string= r s))
diff --git a/test/domtest.lisp b/test/domtest.lisp
index ba99ff8..6d40b62 100644
--- a/test/domtest.lisp
+++ b/test/domtest.lisp
@@ -151,7 +151,7 @@
(#/n (vector-push-extend #/newline v (length v)))
((#/\\ #/\") (vector-push-extend #/\\ v (length v)))))
(vector-push-extend c v (length v))))
- (coerce v 'runes::simple-rod)))
+ (make-array (length v) :element-type 'runes:rune :initial-contents v)))
(t
(%intern str))))
@@ -680,7 +680,7 @@
(dom:get-attribute member "href"))))
(unless (or (runes:rod= (dom:tag-name member) #"metadata")
(member href *bad-tests* :test 'equal))
- (format t "~&~D/~D ~A~%" i n href)
+ (format t "~&~D/~D ~A~%" i #+nil n 808 href)
(let ((lisp (slurp-test
(merge-pathnames href test-directory))))
(when verbose
diff --git a/xml/xml-parse.lisp b/xml/xml-parse.lisp
index 3d5da79..d489067 100644
--- a/xml/xml-parse.lisp
+++ b/xml/xml-parse.lisp
@@ -116,11 +116,7 @@
;;
;; o better extensibility wrt character representation, one may want to
;; have
-;; - UTF-8 in standard CL strings
-;; - UCS-2 in RODs
-;; - UTF-16 in RODs
;; - UCS-4 in vectoren
-;; [habe ich eigentlich nicht vor--david]
;;
;; o xstreams auslagern, documententieren und dann auch in SGML und
;; CSS parser verwenden. (halt alles was zeichen liest).
@@ -1210,10 +1206,10 @@
(values :nmtoken (read-name-token input)))
((rune= #/# c)
(let ((q (read-name-token input)))
- (cond ((equalp q '#.(string-rod "REQUIRED")) :|#REQUIRED|)
- ((equalp q '#.(string-rod "IMPLIED")) :|#IMPLIED|)
- ((equalp q '#.(string-rod "FIXED")) :|#FIXED|)
- ((equalp q '#.(string-rod "PCDATA")) :|#PCDATA|)
+ (cond ((rod= q '#.(string-rod "REQUIRED")) :|#REQUIRED|)
+ ((rod= q '#.(string-rod "IMPLIED")) :|#IMPLIED|)
+ ((rod= q '#.(string-rod "FIXED")) :|#FIXED|)
+ ((rod= q '#.(string-rod "PCDATA")) :|#PCDATA|)
(t
(wf-error zinput "Unknown token: ~S." q)))))
((or (rune= c #/U+0020)
@@ -1821,15 +1817,15 @@
;; [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')' /* VC: Enumeration */
(multiple-value-bind (cat sem) (read-token input)
(cond ((eq cat :nmtoken)
- (cond ((equalp sem '#.(string-rod "CDATA")) :CDATA)
- ((equalp sem '#.(string-rod "ID")) :ID)
- ((equalp sem '#.(string-rod "IDREF")) :IDREFS)
- ((equalp sem '#.(string-rod "IDREFS")) :IDREFS)
- ((equalp sem '#.(string-rod "ENTITY")) :ENTITY)
- ((equalp sem '#.(string-rod "ENTITIES")) :ENTITIES)
- ((equalp sem '#.(string-rod "NMTOKEN")) :NMTOKEN)
- ((equalp sem '#.(string-rod "NMTOKENS")) :NMTOKENS)
- ((equalp sem '#.(string-rod "NOTATION"))
+ (cond ((rod= sem '#.(string-rod "CDATA")) :CDATA)
+ ((rod= sem '#.(string-rod "ID")) :ID)
+ ((rod= sem '#.(string-rod "IDREF")) :IDREFS)
+ ((rod= sem '#.(string-rod "IDREFS")) :IDREFS)
+ ((rod= sem '#.(string-rod "ENTITY")) :ENTITY)
+ ((rod= sem '#.(string-rod "ENTITIES")) :ENTITIES)
+ ((rod= sem '#.(string-rod "NMTOKEN")) :NMTOKEN)
+ ((rod= sem '#.(string-rod "NMTOKENS")) :NMTOKENS)
+ ((rod= sem '#.(string-rod "NOTATION"))
(let (names)
(p/S input)
(expect input :\()
@@ -1923,15 +1919,15 @@
(cond ((member cat '(:\" :\'))
(make-internal-entdef (p/entity-value input)))
((and (eq cat :nmtoken)
- (or (equalp sem '#.(string-rod "SYSTEM"))
- (equalp sem '#.(string-rod "PUBLIC"))))
+ (or (rod= sem '#.(string-rod "SYSTEM"))
+ (rod= sem '#.(string-rod "PUBLIC"))))
(let (extid ndata)
(setf extid (p/external-id input nil))
(when (eq kind :general) ;NDATA allowed at all?
(cond ((eq (peek-token input) :S)
(p/S? input)
(when (and (eq (peek-token input) :nmtoken)
- (equalp (nth-value 1 (peek-token input))
+ (rod= (nth-value 1 (peek-token input))
'#.(string-rod "NDATA")))
(consume-token input)
(p/S input)
@@ -1961,10 +1957,10 @@
(defun p/external-id (input &optional (public-only-ok-p nil))
;; xxx public-only-ok-p
(multiple-value-bind (cat sem) (read-token input)
- (cond ((and (eq cat :nmtoken) (equalp sem '#.(string-rod "SYSTEM")))
+ (cond ((and (eq cat :nmtoken) (rod= sem '#.(string-rod "SYSTEM")))
(p/S input)
(make-extid nil (p/system-literal input)))
- ((and (eq cat :nmtoken) (equalp sem '#.(string-rod "PUBLIC")))
+ ((and (eq cat :nmtoken) (rod= sem '#.(string-rod "PUBLIC")))
(let (pub sys)
(p/S input)
(setf pub (p/pubid-literal input))
@@ -3390,9 +3386,7 @@
(dolist (ns-decl ns-decls)
;; check some namespace validity constraints
(let ((prefix (car ns-decl))
- (uri (if (rod= #"" (cdr ns-decl))
- nil
- (cdr ns-decl))))
+ (uri (cdr ns-decl)))
(cond
((and (rod= prefix #"xml")
(not (rod= uri #"http://www.w3.org/XML/1998/namespace")))
@@ -3425,7 +3419,8 @@
may be bound to an empty namespace URI, thus ~
undeclaring it."))
(t
- (push (cons prefix uri) *namespace-bindings*)
+ (push (cons prefix (if (rod= #"" uri) nil uri))
+ *namespace-bindings*)
(sax:start-prefix-mapping (handler *ctx*)
(car ns-decl)
(cdr ns-decl))))))
diff --git a/xml/xmlns-normalizer.lisp b/xml/xmlns-normalizer.lisp
index 97ff4ec..5ae2d79 100644
--- a/xml/xmlns-normalizer.lisp
+++ b/xml/xmlns-normalizer.lisp
@@ -90,7 +90,8 @@
(let ((binding (normalizer-find-prefix handler prefix)))
(cond
((null binding)
- (push-namespace prefix uri))
+ (unless (and (null prefix) (zerop (length uri)))
+ (push-namespace prefix uri)))
((rod= (sax:attribute-value binding) uri))
((member binding (car (xmlns-stack handler)))
(setf (sax:attribute-value binding) uri))