sb-unicode backport
namespace-korrekturen noch documentation
This commit is contained in:
@ -66,6 +66,7 @@
|
||||
<ul class="nomargin">
|
||||
<li>Implemented DOM 2 Core.</li>
|
||||
<li>Error handling overhaul.</li>
|
||||
<li>UTF-8 string support in DOM on Lisps without Unicode characters.</li>
|
||||
<li>Support internal subset serialization.</li>
|
||||
<li>Gilbert Baumann has clarified the license as Lisp-LGPL.</li>
|
||||
<li>Use trivial-gray-streams.</li>
|
||||
|
||||
@ -117,6 +117,15 @@
|
||||
href="using.html#misc">namespace normalizer</a> is used.
|
||||
</li>
|
||||
</ul>
|
||||
<p>
|
||||
<tt>unparse-document-to-octets</tt> returns an <tt>(unsigned-byte
|
||||
8)</tt> array, whereas <tt>unparse-document</tt> writes
|
||||
characters. <tt>unparse-document</tt> is useful together
|
||||
with <tt>with-output-to-string</tt>. However, note that the
|
||||
resulting document in both cases is UTF-8 encoded, so the
|
||||
characters written by <tt>unparse-document</tt> are really UTF-8
|
||||
bytes encoded as characters.
|
||||
</p>
|
||||
|
||||
<a name="mapping"/>
|
||||
<h3>DOM/Lisp mapping</h3>
|
||||
|
||||
@ -14,35 +14,31 @@
|
||||
<h2>Download</h2>
|
||||
<ul>
|
||||
<li>
|
||||
<a href="http://common-lisp.net/project/cxml/download/">tarballs</a>
|
||||
<div><a href="http://common-lisp.net/project/cxml/download/">tarballs</a></div>
|
||||
</li>
|
||||
<li>
|
||||
<li>
|
||||
<div>
|
||||
Anoncvs (<a href="http://common-lisp.net/cgi-bin/viewcvs.cgi/cxml/?cvsroot=cxml">browse</a>):
|
||||
<pre>$ export CVSROOT=:pserver:anonymous@common-lisp.net:/project/cxml/cvsroot
|
||||
$ cvs login
|
||||
Logging in to :pserver:anonymous@common-lisp.net:2401/project/cxml/cvsroot
|
||||
CVS password: anonymous
|
||||
$ cvs co cxml</pre>
|
||||
</lii>
|
||||
</div>
|
||||
</li>
|
||||
</ul>
|
||||
|
||||
<a name="implementations"/>
|
||||
<h2>Implementation-specific notes</h2>
|
||||
<p>
|
||||
CXML should be portable to all Common Lisp implementations
|
||||
supporting gray streams. Currently supported are ACL, CLISP,
|
||||
CMUCL, LispWorks, OpenMCL, and SBCL.
|
||||
supported by <a
|
||||
href="http://common-lisp.net/project/cl-plus-ssl/#trivial-gray-streams">trivial-gray-streams</a>.
|
||||
</p>
|
||||
<ul>
|
||||
<li>
|
||||
Note that CMUCL and OpenMCL do not support Unicode
|
||||
natively. (You might want to use the <a
|
||||
href="using.html#rods">recoding SAX handler</a> to work with
|
||||
native strings anyway.)
|
||||
</li>
|
||||
<li>
|
||||
SBCL and CLISP will trip over cxml's non-ASCII source files
|
||||
unless compiled using a suitable locale configuration
|
||||
unless run using a suitable locale configuration
|
||||
(<tt>LC_CTYPE=en_US.ISO-8859-1</tt> should help).
|
||||
</li>
|
||||
<li>
|
||||
@ -51,19 +47,6 @@ $ cvs co cxml</pre>
|
||||
</li>
|
||||
</ul>
|
||||
|
||||
<!--
|
||||
<p>
|
||||
Optional configuration (skip this unless you know better): CXML
|
||||
has full Unicode code support - - even on Lisps without Unicode
|
||||
strings. On non-unicode aware Lisps, <tt>DOMString</tt> is
|
||||
implemented as an array of character codes. CXML will auto-detect
|
||||
at compile-time which string representation to use. To override
|
||||
the auto-detection, you can set one of the features
|
||||
<tt>:rune-is-character</tt> and <tt>:rune-is-integer</tt> before
|
||||
loading <tt>cxml.asd</tt>.
|
||||
</p>
|
||||
-->
|
||||
|
||||
<a name="compilation"/>
|
||||
<h2>Compilation</h2>
|
||||
<p>
|
||||
@ -117,9 +100,9 @@ $ cd 2001/DOM-Test-Suite && ant dom1-dtd dom2-dtd</pre>
|
||||
</p>
|
||||
|
||||
<p>
|
||||
<b>fixme</b> My parser does not understand the current testsuite
|
||||
anymore. To fix this problem, revert the affected files
|
||||
manually after check-out:
|
||||
<b>fixme</b> domtest.lisp does not understand the current
|
||||
testsuite driver anymore. To fix this problem, revert the
|
||||
affected files manually after check-out:
|
||||
</p>
|
||||
|
||||
<pre>$ cd 2001/XML-Test-Suite/xmlconf/
|
||||
|
||||
@ -16,7 +16,7 @@
|
||||
<ul class="sub">
|
||||
<li><a href="installation.html#download"><b>Download</b></a></li>
|
||||
<li><a href="installation.html#implementations">Implementation-specific notes</a></li>
|
||||
<li><a href="installation.html#compilation"><b>Compilation</b></a></li>
|
||||
<li><a href="installation.html#compilation">Compilation</a></li>
|
||||
<li><a href="installation.html#tests">Tests</a></li>
|
||||
</ul>
|
||||
</li>
|
||||
|
||||
@ -138,9 +138,10 @@
|
||||
</p>
|
||||
|
||||
<p>
|
||||
<div class="def">Function CXML:MAKE-CHARACTER-STREAM-SINK (stream &rest keys) => sink</div>
|
||||
<div class="def">Function CXML:MAKE-OCTET-STREAM-SINK (stream &rest keys) => sink</div>
|
||||
<div class="def">Function CXML:MAKE-OCTET-VECTOR-SINK (&rest keys) => sink</div>
|
||||
Return a handle suitable for event-based XML serialization.
|
||||
<div class="def">Function CXML:MAKE-CHARACTER-STREAM-SINK (stream &rest keys) => sink</div>
|
||||
Return a SAX serialization handle.
|
||||
</p>
|
||||
<p>Keyword arguments:</p>
|
||||
<ul>
|
||||
@ -175,26 +176,6 @@
|
||||
changes the document model and should only be used if whitespace
|
||||
does not matter to the application.
|
||||
</p>
|
||||
<p>
|
||||
If namespace support is enabled (the default), these functions use
|
||||
a namespace normalizer (<tt>cxml:make-namespace-normalizer</tt>).
|
||||
</p>
|
||||
<p>
|
||||
<tt>unparse-document-to-octets</tt> returns an <tt>(unsigned-byte
|
||||
8)</tt> array, whereas <tt>unparse-document</tt> writes
|
||||
characters. <tt>unparse-document</tt> is useful together
|
||||
with <tt>with-output-to-string</tt>. However, note that the
|
||||
resulting document in both cases is UTF-8 encoded, so the
|
||||
characters written by <tt>unparse-document</tt> are really UTF-8
|
||||
bytes encoded as characters.
|
||||
</p>
|
||||
<p>
|
||||
These function provide the low-level mechanism used by the DOM
|
||||
serialization functions. To serialize a document without building
|
||||
its DOM tree first, create a sink handle and call SAX functions on that
|
||||
handle. <tt>sax:end-document</tt> returns the serialized form of
|
||||
the document described by the SAX events.
|
||||
</p>
|
||||
|
||||
<p>
|
||||
<div class="def">Macro CXML:WITH-XML-OUTPUT (sink &body body) => sink-specific result</div>
|
||||
@ -214,17 +195,12 @@
|
||||
(attribute "blub" "bla"))
|
||||
(text "Hi there.")))</pre>
|
||||
<p>
|
||||
Prints this to <tt>stream</tt>, which must be an
|
||||
<tt>(unsigned-byte 8)</tt> stream:
|
||||
Prints this to <tt>stream</tt>:
|
||||
</p>
|
||||
<pre><foo xyz="abc">
|
||||
<bar blub="bla"></bar>
|
||||
Hi there.
|
||||
</foo></pre>
|
||||
<p>
|
||||
(Note that these functions accept both strings and rods, so we
|
||||
can write <tt>"foo"</tt> instead of <tt>#"foo"</tt> above.)
|
||||
</p>
|
||||
|
||||
<p>
|
||||
<div class="def">Macro XHTML-GENERATOR:WITH-XHTML (sink &rest forms)</div>
|
||||
|
||||
@ -13,7 +13,8 @@
|
||||
&key (include-xmlns-attributes sax:*include-xmlns-attributes*)
|
||||
include-doctype
|
||||
include-default-values
|
||||
(recode (typep document 'utf8-dom::node)))
|
||||
(recode (and #+rune-is-integer (typep document 'utf8-dom::node))))
|
||||
(declare (ignorable recode))
|
||||
#+rune-is-integer
|
||||
(when recode
|
||||
(setf handler (make-recoder handler #'utf8-string-to-rod)))
|
||||
@ -54,16 +55,16 @@
|
||||
(dom:do-node-list (child (dom:child-nodes node))
|
||||
(ecase (dom:node-type child)
|
||||
(:element
|
||||
;; fixme: namespaces
|
||||
(let ((attlist
|
||||
(compute-attributes child
|
||||
include-xmlns-attributes
|
||||
include-default-values))
|
||||
(lname (dom:tag-name child))
|
||||
(uri (dom:namespace-uri child))
|
||||
(lname (dom:local-name child))
|
||||
(qname (dom:tag-name child)))
|
||||
(sax:start-element handler nil lname qname attlist)
|
||||
(sax:start-element handler uri lname qname attlist)
|
||||
(walk child)
|
||||
(sax:end-element handler nil lname qname)))
|
||||
(sax:end-element handler uri lname qname)))
|
||||
(:cdata-section
|
||||
(sax:start-cdata handler)
|
||||
(sax:characters handler (dom:data child))
|
||||
@ -83,10 +84,11 @@
|
||||
(let ((results '()))
|
||||
(dom:do-node-list (a (dom:attributes element))
|
||||
(when (and (or defaultp (dom:specified a))
|
||||
(or xmlnsp (not (cxml::xmlns-attr-p (dom:name a)))))
|
||||
(or xmlnsp (not (cxml::xmlns-attr-p (rod (dom:name a))))))
|
||||
(push
|
||||
(sax:make-attribute :qname (dom:name a)
|
||||
:value (dom:value a)
|
||||
:namespace-uri (dom:namespace-uri a)
|
||||
:specified-p (dom:specified a))
|
||||
results)))
|
||||
(reverse results)))
|
||||
|
||||
@ -95,7 +95,9 @@
|
||||
(stringp x))
|
||||
|
||||
(defun rod= (x y)
|
||||
(string= x y))
|
||||
(if (zerop (length x))
|
||||
(zerop (length y))
|
||||
(and (plusp (length y)) (string= x y))))
|
||||
|
||||
(defun rod-equal (x y)
|
||||
(string-equal x y))
|
||||
|
||||
@ -10,14 +10,6 @@
|
||||
(deftype rod () '(vector rune))
|
||||
(deftype simple-rod () '(simple-array rune))
|
||||
|
||||
#+(or)
|
||||
(definline rune (rod index)
|
||||
(char rod index))
|
||||
|
||||
#+(or)
|
||||
(defun (setf rune) (newval rod index)
|
||||
(setf (char rod index) newval))
|
||||
|
||||
(defun rod= (r s)
|
||||
(string= r s))
|
||||
|
||||
|
||||
@ -151,7 +151,7 @@
|
||||
(#/n (vector-push-extend #/newline v (length v)))
|
||||
((#/\\ #/\") (vector-push-extend #/\\ v (length v)))))
|
||||
(vector-push-extend c v (length v))))
|
||||
(coerce v 'runes::simple-rod)))
|
||||
(make-array (length v) :element-type 'runes:rune :initial-contents v)))
|
||||
(t
|
||||
(%intern str))))
|
||||
|
||||
@ -680,7 +680,7 @@
|
||||
(dom:get-attribute member "href"))))
|
||||
(unless (or (runes:rod= (dom:tag-name member) #"metadata")
|
||||
(member href *bad-tests* :test 'equal))
|
||||
(format t "~&~D/~D ~A~%" i n href)
|
||||
(format t "~&~D/~D ~A~%" i #+nil n 808 href)
|
||||
(let ((lisp (slurp-test
|
||||
(merge-pathnames href test-directory))))
|
||||
(when verbose
|
||||
|
||||
@ -116,11 +116,7 @@
|
||||
;;
|
||||
;; o better extensibility wrt character representation, one may want to
|
||||
;; have
|
||||
;; - UTF-8 in standard CL strings
|
||||
;; - UCS-2 in RODs
|
||||
;; - UTF-16 in RODs
|
||||
;; - UCS-4 in vectoren
|
||||
;; [habe ich eigentlich nicht vor--david]
|
||||
;;
|
||||
;; o xstreams auslagern, documententieren und dann auch in SGML und
|
||||
;; CSS parser verwenden. (halt alles was zeichen liest).
|
||||
@ -1210,10 +1206,10 @@
|
||||
(values :nmtoken (read-name-token input)))
|
||||
((rune= #/# c)
|
||||
(let ((q (read-name-token input)))
|
||||
(cond ((equalp q '#.(string-rod "REQUIRED")) :|#REQUIRED|)
|
||||
((equalp q '#.(string-rod "IMPLIED")) :|#IMPLIED|)
|
||||
((equalp q '#.(string-rod "FIXED")) :|#FIXED|)
|
||||
((equalp q '#.(string-rod "PCDATA")) :|#PCDATA|)
|
||||
(cond ((rod= q '#.(string-rod "REQUIRED")) :|#REQUIRED|)
|
||||
((rod= q '#.(string-rod "IMPLIED")) :|#IMPLIED|)
|
||||
((rod= q '#.(string-rod "FIXED")) :|#FIXED|)
|
||||
((rod= q '#.(string-rod "PCDATA")) :|#PCDATA|)
|
||||
(t
|
||||
(wf-error zinput "Unknown token: ~S." q)))))
|
||||
((or (rune= c #/U+0020)
|
||||
@ -1821,15 +1817,15 @@
|
||||
;; [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')' /* VC: Enumeration */
|
||||
(multiple-value-bind (cat sem) (read-token input)
|
||||
(cond ((eq cat :nmtoken)
|
||||
(cond ((equalp sem '#.(string-rod "CDATA")) :CDATA)
|
||||
((equalp sem '#.(string-rod "ID")) :ID)
|
||||
((equalp sem '#.(string-rod "IDREF")) :IDREFS)
|
||||
((equalp sem '#.(string-rod "IDREFS")) :IDREFS)
|
||||
((equalp sem '#.(string-rod "ENTITY")) :ENTITY)
|
||||
((equalp sem '#.(string-rod "ENTITIES")) :ENTITIES)
|
||||
((equalp sem '#.(string-rod "NMTOKEN")) :NMTOKEN)
|
||||
((equalp sem '#.(string-rod "NMTOKENS")) :NMTOKENS)
|
||||
((equalp sem '#.(string-rod "NOTATION"))
|
||||
(cond ((rod= sem '#.(string-rod "CDATA")) :CDATA)
|
||||
((rod= sem '#.(string-rod "ID")) :ID)
|
||||
((rod= sem '#.(string-rod "IDREF")) :IDREFS)
|
||||
((rod= sem '#.(string-rod "IDREFS")) :IDREFS)
|
||||
((rod= sem '#.(string-rod "ENTITY")) :ENTITY)
|
||||
((rod= sem '#.(string-rod "ENTITIES")) :ENTITIES)
|
||||
((rod= sem '#.(string-rod "NMTOKEN")) :NMTOKEN)
|
||||
((rod= sem '#.(string-rod "NMTOKENS")) :NMTOKENS)
|
||||
((rod= sem '#.(string-rod "NOTATION"))
|
||||
(let (names)
|
||||
(p/S input)
|
||||
(expect input :\()
|
||||
@ -1923,15 +1919,15 @@
|
||||
(cond ((member cat '(:\" :\'))
|
||||
(make-internal-entdef (p/entity-value input)))
|
||||
((and (eq cat :nmtoken)
|
||||
(or (equalp sem '#.(string-rod "SYSTEM"))
|
||||
(equalp sem '#.(string-rod "PUBLIC"))))
|
||||
(or (rod= sem '#.(string-rod "SYSTEM"))
|
||||
(rod= sem '#.(string-rod "PUBLIC"))))
|
||||
(let (extid ndata)
|
||||
(setf extid (p/external-id input nil))
|
||||
(when (eq kind :general) ;NDATA allowed at all?
|
||||
(cond ((eq (peek-token input) :S)
|
||||
(p/S? input)
|
||||
(when (and (eq (peek-token input) :nmtoken)
|
||||
(equalp (nth-value 1 (peek-token input))
|
||||
(rod= (nth-value 1 (peek-token input))
|
||||
'#.(string-rod "NDATA")))
|
||||
(consume-token input)
|
||||
(p/S input)
|
||||
@ -1961,10 +1957,10 @@
|
||||
(defun p/external-id (input &optional (public-only-ok-p nil))
|
||||
;; xxx public-only-ok-p
|
||||
(multiple-value-bind (cat sem) (read-token input)
|
||||
(cond ((and (eq cat :nmtoken) (equalp sem '#.(string-rod "SYSTEM")))
|
||||
(cond ((and (eq cat :nmtoken) (rod= sem '#.(string-rod "SYSTEM")))
|
||||
(p/S input)
|
||||
(make-extid nil (p/system-literal input)))
|
||||
((and (eq cat :nmtoken) (equalp sem '#.(string-rod "PUBLIC")))
|
||||
((and (eq cat :nmtoken) (rod= sem '#.(string-rod "PUBLIC")))
|
||||
(let (pub sys)
|
||||
(p/S input)
|
||||
(setf pub (p/pubid-literal input))
|
||||
@ -3390,9 +3386,7 @@
|
||||
(dolist (ns-decl ns-decls)
|
||||
;; check some namespace validity constraints
|
||||
(let ((prefix (car ns-decl))
|
||||
(uri (if (rod= #"" (cdr ns-decl))
|
||||
nil
|
||||
(cdr ns-decl))))
|
||||
(uri (cdr ns-decl)))
|
||||
(cond
|
||||
((and (rod= prefix #"xml")
|
||||
(not (rod= uri #"http://www.w3.org/XML/1998/namespace")))
|
||||
@ -3425,7 +3419,8 @@
|
||||
may be bound to an empty namespace URI, thus ~
|
||||
undeclaring it."))
|
||||
(t
|
||||
(push (cons prefix uri) *namespace-bindings*)
|
||||
(push (cons prefix (if (rod= #"" uri) nil uri))
|
||||
*namespace-bindings*)
|
||||
(sax:start-prefix-mapping (handler *ctx*)
|
||||
(car ns-decl)
|
||||
(cdr ns-decl))))))
|
||||
|
||||
@ -90,7 +90,8 @@
|
||||
(let ((binding (normalizer-find-prefix handler prefix)))
|
||||
(cond
|
||||
((null binding)
|
||||
(push-namespace prefix uri))
|
||||
(unless (and (null prefix) (zerop (length uri)))
|
||||
(push-namespace prefix uri)))
|
||||
((rod= (sax:attribute-value binding) uri))
|
||||
((member binding (car (xmlns-stack handler)))
|
||||
(setf (sax:attribute-value binding) uri))
|
||||
|
||||
Reference in New Issue
Block a user