UTF-8 fix, thanks to Francis Leboutte
This commit is contained in:
@ -2,5 +2,5 @@ all: dom.html index.html installation.html klacks.html quickstart.html sax.html
|
||||
|
||||
%.html: %.xml html.xsl
|
||||
xsltproc html.xsl $< >$@.tmp
|
||||
chmod -w *.html
|
||||
mv $@.tmp $@
|
||||
chmod -w $@
|
||||
|
||||
@ -4,26 +4,13 @@
|
||||
<p>An XML parser written in Common Lisp.</p>
|
||||
|
||||
<p>
|
||||
Closure XML was written by <a
|
||||
href="http://www.stud.uni-karlsruhe.de/~unk6/">Gilbert Baumann</a>
|
||||
(unk6 at rz.uni-karlsruhe.de) as part of the Closure web
|
||||
browser.<br/>
|
||||
Contributions to the parser by
|
||||
</p>
|
||||
<ul>
|
||||
<li>
|
||||
Henrik Motakef (hmot at henrik-motakef.de)<br/>
|
||||
(SAX layer; namespace support)
|
||||
</li>
|
||||
<li>
|
||||
<a href="mailto:david@lichteblau.com">David Lichteblau</a> for <a
|
||||
href="http://www.knowledgetools.de">knowledgeTools</a>
|
||||
(conversion into an independent package; DOM bug fixing; validation)
|
||||
and <a href="http://www.headcraft.de/">headcraft</a>
|
||||
(most september/october 2004 changes) and privately (changes
|
||||
since then).
|
||||
</li>
|
||||
</ul>
|
||||
Closure XML was written
|
||||
by <a href="http://www.stud.uni-karlsruhe.de/~unk6/">Gilbert
|
||||
Baumann</a> as part of the Closure web browser and is now
|
||||
maintained by
|
||||
<a href="mailto:david@lichteblau.com">David Lichteblau</a>.
|
||||
It is licensed under Lisp-LGPL.
|
||||
</p>
|
||||
|
||||
<p>
|
||||
CXML implements a <a
|
||||
@ -36,10 +23,6 @@
|
||||
other similar to StAX.
|
||||
</p>
|
||||
|
||||
<p>
|
||||
CXML is licensed under Lisp-LGPL.
|
||||
</p>
|
||||
|
||||
<p>
|
||||
Send bug reports to <a
|
||||
href="mailto:cxml-devel@common-lisp.net">cxml-devel@common-lisp.net</a>
|
||||
@ -48,15 +31,31 @@
|
||||
information</a>).
|
||||
</p>
|
||||
|
||||
<h3>See also</h3>
|
||||
<h3>Add-on features</h3>
|
||||
<p>
|
||||
Relax NG validation is available as a separate
|
||||
project: <a href="http://www.lichteblau.com/cxml-rng/">cxml-rng</a>.
|
||||
The following libraries are available as separate downloads:
|
||||
</p>
|
||||
<p>
|
||||
⬗ 
|
||||
<a href="http://www.lichteblau.com/cxml-rng/">cxml-rng</a>
|
||||
  
|
||||
Relax NG validation
|
||||
</p>
|
||||
<p>
|
||||
⬗ 
|
||||
<a href="http://www.lichteblau.com/cxml-stp/">cxml-stp</a>
|
||||
  
|
||||
STP, an alternative to DOM
|
||||
</p>
|
||||
|
||||
|
||||
<a name="changes"/>
|
||||
<h3>Recent Changes</h3>
|
||||
<p class="nomargin"><tt>rel-2007-xx-yy</tt></p>
|
||||
<ul class="nomargin">
|
||||
<li>Various DTD serialization fixes</li>
|
||||
<li>UTF-8 fix, thanks to Francis Leboutte</li>
|
||||
</ul>
|
||||
<p class="nomargin"><tt>rel-2007-07-07</tt></p>
|
||||
<ul class="nomargin">
|
||||
<li>
|
||||
|
||||
@ -135,7 +135,7 @@
|
||||
(cond
|
||||
((eq (dom:node-type parent) :cdata-section)
|
||||
(setf (dom:data parent) data))
|
||||
((and last-child (eq (dom:node-type last-child) :text))
|
||||
((and last-child (eq (dom:node-type last-child) :text))
|
||||
;; um entities herum wird SAX:CHARACTERS mehrfach aufgerufen fuer
|
||||
;; den gleichen Textknoten. Hier muessen wir den bestehenden Knoten
|
||||
;; erweitern, sonst ist das Dokument nicht normalisiert.
|
||||
|
||||
@ -250,7 +250,7 @@
|
||||
(setf rptr (%+ rptr 1)))
|
||||
|
||||
((%<= #|#b11000000|# byte0 #b11011111)
|
||||
(cond ((< (%+ rptr 2) in-end)
|
||||
(cond ((<= (%+ rptr 2) in-end)
|
||||
(put
|
||||
(dpb (ldb (byte 5 0) byte0) (byte 5 6)
|
||||
(dpb (ldb (byte 6 0) (aref in (%+ rptr 1))) (byte 6 0)
|
||||
@ -260,7 +260,7 @@
|
||||
(return))))
|
||||
|
||||
((%<= #|#b11100000|# byte0 #b11101111)
|
||||
(cond ((< (%+ rptr 3) in-end)
|
||||
(cond ((<= (%+ rptr 3) in-end)
|
||||
(put
|
||||
(dpb (ldb (byte 4 0) byte0) (byte 4 12)
|
||||
(dpb (ldb (byte 6 0) (aref in (%+ 1 rptr))) (byte 6 6)
|
||||
@ -271,7 +271,7 @@
|
||||
(return))))
|
||||
|
||||
((%<= #|#b11110000|# byte0 #b11110111)
|
||||
(cond ((< (%+ rptr 4) in-end)
|
||||
(cond ((<= (%+ rptr 4) in-end)
|
||||
(put
|
||||
(dpb (ldb (byte 3 0) byte0) (byte 3 18)
|
||||
(dpb (ldb (byte 6 0) (aref in (%+ 1 rptr))) (byte 6 12)
|
||||
@ -283,7 +283,7 @@
|
||||
(return))))
|
||||
|
||||
((%<= #|#b11111000|# byte0 #b11111011)
|
||||
(cond ((< (%+ rptr 5) in-end)
|
||||
(cond ((<= (%+ rptr 5) in-end)
|
||||
(put
|
||||
(dpb (ldb (byte 2 0) byte0) (byte 2 24)
|
||||
(dpb (ldb (byte 6 0) (aref in (%+ 1 rptr))) (byte 6 18)
|
||||
@ -296,7 +296,7 @@
|
||||
(return))))
|
||||
|
||||
((%<= #|#b11111100|# byte0 #b11111101)
|
||||
(cond ((< (%+ rptr 6) in-end)
|
||||
(cond ((<= (%+ rptr 6) in-end)
|
||||
(put
|
||||
(dpb (ldb (byte 1 0) byte0) (byte 1 30)
|
||||
(dpb (ldb (byte 6 0) (aref in (%+ 1 rptr))) (byte 6 24)
|
||||
|
||||
29
test/misc.lisp
Normal file
29
test/misc.lisp
Normal file
@ -0,0 +1,29 @@
|
||||
;;;
|
||||
;;; When I'll grow up, I'll be a complete test suite.
|
||||
|
||||
(deftest utf-8
|
||||
(flet ((doit (from below)
|
||||
(loop for code from from below below do
|
||||
(when (and (code-char code)
|
||||
(not (eql code #xfffe))
|
||||
(not (eql code #xffff)))
|
||||
(let* ((a (if (< code #x10000)
|
||||
(format nil "abc~C" (code-char code))
|
||||
(let* ((x (- code #x10000))
|
||||
(lo (ldb (byte 10 0) x))
|
||||
(hi (ldb (byte 10 10) x)))
|
||||
(format nil "abc~C~C"
|
||||
(code-char (logior #xD800 hi))
|
||||
(code-char
|
||||
(logior #xDC00 lo))))))
|
||||
(b (cxml:utf8-string-to-rod
|
||||
(cxml:rod-to-utf8-string
|
||||
a))))
|
||||
(unless (string= a b)
|
||||
(format t "FAIL: ~S ~A ~A~%"
|
||||
(code-char code)
|
||||
(map 'vector #'char-code a)
|
||||
(map 'vector #'char-code b))))))))
|
||||
(doit 32 #xD800)
|
||||
(doit #x10000 char-code-limit)
|
||||
(values)))
|
||||
Reference in New Issue
Block a user