UTF-8 fix, thanks to Francis Leboutte
This commit is contained in:
@ -2,5 +2,5 @@ all: dom.html index.html installation.html klacks.html quickstart.html sax.html
|
|||||||
|
|
||||||
%.html: %.xml html.xsl
|
%.html: %.xml html.xsl
|
||||||
xsltproc html.xsl $< >$@.tmp
|
xsltproc html.xsl $< >$@.tmp
|
||||||
chmod -w *.html
|
|
||||||
mv $@.tmp $@
|
mv $@.tmp $@
|
||||||
|
chmod -w $@
|
||||||
|
|||||||
@ -4,26 +4,13 @@
|
|||||||
<p>An XML parser written in Common Lisp.</p>
|
<p>An XML parser written in Common Lisp.</p>
|
||||||
|
|
||||||
<p>
|
<p>
|
||||||
Closure XML was written by <a
|
Closure XML was written
|
||||||
href="http://www.stud.uni-karlsruhe.de/~unk6/">Gilbert Baumann</a>
|
by <a href="http://www.stud.uni-karlsruhe.de/~unk6/">Gilbert
|
||||||
(unk6 at rz.uni-karlsruhe.de) as part of the Closure web
|
Baumann</a> as part of the Closure web browser and is now
|
||||||
browser.<br/>
|
maintained by
|
||||||
Contributions to the parser by
|
<a href="mailto:david@lichteblau.com">David Lichteblau</a>.
|
||||||
|
It is licensed under Lisp-LGPL.
|
||||||
</p>
|
</p>
|
||||||
<ul>
|
|
||||||
<li>
|
|
||||||
Henrik Motakef (hmot at henrik-motakef.de)<br/>
|
|
||||||
(SAX layer; namespace support)
|
|
||||||
</li>
|
|
||||||
<li>
|
|
||||||
<a href="mailto:david@lichteblau.com">David Lichteblau</a> for <a
|
|
||||||
href="http://www.knowledgetools.de">knowledgeTools</a>
|
|
||||||
(conversion into an independent package; DOM bug fixing; validation)
|
|
||||||
and <a href="http://www.headcraft.de/">headcraft</a>
|
|
||||||
(most september/october 2004 changes) and privately (changes
|
|
||||||
since then).
|
|
||||||
</li>
|
|
||||||
</ul>
|
|
||||||
|
|
||||||
<p>
|
<p>
|
||||||
CXML implements a <a
|
CXML implements a <a
|
||||||
@ -36,10 +23,6 @@
|
|||||||
other similar to StAX.
|
other similar to StAX.
|
||||||
</p>
|
</p>
|
||||||
|
|
||||||
<p>
|
|
||||||
CXML is licensed under Lisp-LGPL.
|
|
||||||
</p>
|
|
||||||
|
|
||||||
<p>
|
<p>
|
||||||
Send bug reports to <a
|
Send bug reports to <a
|
||||||
href="mailto:cxml-devel@common-lisp.net">cxml-devel@common-lisp.net</a>
|
href="mailto:cxml-devel@common-lisp.net">cxml-devel@common-lisp.net</a>
|
||||||
@ -48,15 +31,31 @@
|
|||||||
information</a>).
|
information</a>).
|
||||||
</p>
|
</p>
|
||||||
|
|
||||||
<h3>See also</h3>
|
<h3>Add-on features</h3>
|
||||||
<p>
|
<p>
|
||||||
Relax NG validation is available as a separate
|
The following libraries are available as separate downloads:
|
||||||
project: <a href="http://www.lichteblau.com/cxml-rng/">cxml-rng</a>.
|
</p>
|
||||||
|
<p>
|
||||||
|
⬗ 
|
||||||
|
<a href="http://www.lichteblau.com/cxml-rng/">cxml-rng</a>
|
||||||
|
  
|
||||||
|
Relax NG validation
|
||||||
|
</p>
|
||||||
|
<p>
|
||||||
|
⬗ 
|
||||||
|
<a href="http://www.lichteblau.com/cxml-stp/">cxml-stp</a>
|
||||||
|
  
|
||||||
|
STP, an alternative to DOM
|
||||||
</p>
|
</p>
|
||||||
|
|
||||||
|
|
||||||
<a name="changes"/>
|
<a name="changes"/>
|
||||||
<h3>Recent Changes</h3>
|
<h3>Recent Changes</h3>
|
||||||
|
<p class="nomargin"><tt>rel-2007-xx-yy</tt></p>
|
||||||
|
<ul class="nomargin">
|
||||||
|
<li>Various DTD serialization fixes</li>
|
||||||
|
<li>UTF-8 fix, thanks to Francis Leboutte</li>
|
||||||
|
</ul>
|
||||||
<p class="nomargin"><tt>rel-2007-07-07</tt></p>
|
<p class="nomargin"><tt>rel-2007-07-07</tt></p>
|
||||||
<ul class="nomargin">
|
<ul class="nomargin">
|
||||||
<li>
|
<li>
|
||||||
|
|||||||
@ -135,7 +135,7 @@
|
|||||||
(cond
|
(cond
|
||||||
((eq (dom:node-type parent) :cdata-section)
|
((eq (dom:node-type parent) :cdata-section)
|
||||||
(setf (dom:data parent) data))
|
(setf (dom:data parent) data))
|
||||||
((and last-child (eq (dom:node-type last-child) :text))
|
((and last-child (eq (dom:node-type last-child) :text))
|
||||||
;; um entities herum wird SAX:CHARACTERS mehrfach aufgerufen fuer
|
;; um entities herum wird SAX:CHARACTERS mehrfach aufgerufen fuer
|
||||||
;; den gleichen Textknoten. Hier muessen wir den bestehenden Knoten
|
;; den gleichen Textknoten. Hier muessen wir den bestehenden Knoten
|
||||||
;; erweitern, sonst ist das Dokument nicht normalisiert.
|
;; erweitern, sonst ist das Dokument nicht normalisiert.
|
||||||
|
|||||||
@ -250,7 +250,7 @@
|
|||||||
(setf rptr (%+ rptr 1)))
|
(setf rptr (%+ rptr 1)))
|
||||||
|
|
||||||
((%<= #|#b11000000|# byte0 #b11011111)
|
((%<= #|#b11000000|# byte0 #b11011111)
|
||||||
(cond ((< (%+ rptr 2) in-end)
|
(cond ((<= (%+ rptr 2) in-end)
|
||||||
(put
|
(put
|
||||||
(dpb (ldb (byte 5 0) byte0) (byte 5 6)
|
(dpb (ldb (byte 5 0) byte0) (byte 5 6)
|
||||||
(dpb (ldb (byte 6 0) (aref in (%+ rptr 1))) (byte 6 0)
|
(dpb (ldb (byte 6 0) (aref in (%+ rptr 1))) (byte 6 0)
|
||||||
@ -260,7 +260,7 @@
|
|||||||
(return))))
|
(return))))
|
||||||
|
|
||||||
((%<= #|#b11100000|# byte0 #b11101111)
|
((%<= #|#b11100000|# byte0 #b11101111)
|
||||||
(cond ((< (%+ rptr 3) in-end)
|
(cond ((<= (%+ rptr 3) in-end)
|
||||||
(put
|
(put
|
||||||
(dpb (ldb (byte 4 0) byte0) (byte 4 12)
|
(dpb (ldb (byte 4 0) byte0) (byte 4 12)
|
||||||
(dpb (ldb (byte 6 0) (aref in (%+ 1 rptr))) (byte 6 6)
|
(dpb (ldb (byte 6 0) (aref in (%+ 1 rptr))) (byte 6 6)
|
||||||
@ -271,7 +271,7 @@
|
|||||||
(return))))
|
(return))))
|
||||||
|
|
||||||
((%<= #|#b11110000|# byte0 #b11110111)
|
((%<= #|#b11110000|# byte0 #b11110111)
|
||||||
(cond ((< (%+ rptr 4) in-end)
|
(cond ((<= (%+ rptr 4) in-end)
|
||||||
(put
|
(put
|
||||||
(dpb (ldb (byte 3 0) byte0) (byte 3 18)
|
(dpb (ldb (byte 3 0) byte0) (byte 3 18)
|
||||||
(dpb (ldb (byte 6 0) (aref in (%+ 1 rptr))) (byte 6 12)
|
(dpb (ldb (byte 6 0) (aref in (%+ 1 rptr))) (byte 6 12)
|
||||||
@ -283,7 +283,7 @@
|
|||||||
(return))))
|
(return))))
|
||||||
|
|
||||||
((%<= #|#b11111000|# byte0 #b11111011)
|
((%<= #|#b11111000|# byte0 #b11111011)
|
||||||
(cond ((< (%+ rptr 5) in-end)
|
(cond ((<= (%+ rptr 5) in-end)
|
||||||
(put
|
(put
|
||||||
(dpb (ldb (byte 2 0) byte0) (byte 2 24)
|
(dpb (ldb (byte 2 0) byte0) (byte 2 24)
|
||||||
(dpb (ldb (byte 6 0) (aref in (%+ 1 rptr))) (byte 6 18)
|
(dpb (ldb (byte 6 0) (aref in (%+ 1 rptr))) (byte 6 18)
|
||||||
@ -296,7 +296,7 @@
|
|||||||
(return))))
|
(return))))
|
||||||
|
|
||||||
((%<= #|#b11111100|# byte0 #b11111101)
|
((%<= #|#b11111100|# byte0 #b11111101)
|
||||||
(cond ((< (%+ rptr 6) in-end)
|
(cond ((<= (%+ rptr 6) in-end)
|
||||||
(put
|
(put
|
||||||
(dpb (ldb (byte 1 0) byte0) (byte 1 30)
|
(dpb (ldb (byte 1 0) byte0) (byte 1 30)
|
||||||
(dpb (ldb (byte 6 0) (aref in (%+ 1 rptr))) (byte 6 24)
|
(dpb (ldb (byte 6 0) (aref in (%+ 1 rptr))) (byte 6 24)
|
||||||
|
|||||||
29
test/misc.lisp
Normal file
29
test/misc.lisp
Normal file
@ -0,0 +1,29 @@
|
|||||||
|
;;;
|
||||||
|
;;; When I'll grow up, I'll be a complete test suite.
|
||||||
|
|
||||||
|
(deftest utf-8
|
||||||
|
(flet ((doit (from below)
|
||||||
|
(loop for code from from below below do
|
||||||
|
(when (and (code-char code)
|
||||||
|
(not (eql code #xfffe))
|
||||||
|
(not (eql code #xffff)))
|
||||||
|
(let* ((a (if (< code #x10000)
|
||||||
|
(format nil "abc~C" (code-char code))
|
||||||
|
(let* ((x (- code #x10000))
|
||||||
|
(lo (ldb (byte 10 0) x))
|
||||||
|
(hi (ldb (byte 10 10) x)))
|
||||||
|
(format nil "abc~C~C"
|
||||||
|
(code-char (logior #xD800 hi))
|
||||||
|
(code-char
|
||||||
|
(logior #xDC00 lo))))))
|
||||||
|
(b (cxml:utf8-string-to-rod
|
||||||
|
(cxml:rod-to-utf8-string
|
||||||
|
a))))
|
||||||
|
(unless (string= a b)
|
||||||
|
(format t "FAIL: ~S ~A ~A~%"
|
||||||
|
(code-char code)
|
||||||
|
(map 'vector #'char-code a)
|
||||||
|
(map 'vector #'char-code b))))))))
|
||||||
|
(doit 32 #xD800)
|
||||||
|
(doit #x10000 char-code-limit)
|
||||||
|
(values)))
|
||||||
Reference in New Issue
Block a user