From 5fcc11f2448cc6b9b8d36e6b77a20240bd41de46 Mon Sep 17 00:00:00 2001
From: dlichteblau
Date: Sun, 22 Jul 2007 19:59:26 +0000
Subject: [PATCH] UTF-8 fix, thanks to Francis Leboutte
---
doc/GNUmakefile | 2 +-
doc/index.xml | 53 ++++++++++++++++++++++----------------------
dom/dom-builder.lisp | 2 +-
runes/encodings.lisp | 10 ++++-----
test/misc.lisp | 29 ++++++++++++++++++++++++
5 files changed, 62 insertions(+), 34 deletions(-)
create mode 100644 test/misc.lisp
diff --git a/doc/GNUmakefile b/doc/GNUmakefile
index dfe2ad2..bf14b11 100644
--- a/doc/GNUmakefile
+++ b/doc/GNUmakefile
@@ -2,5 +2,5 @@ all: dom.html index.html installation.html klacks.html quickstart.html sax.html
%.html: %.xml html.xsl
xsltproc html.xsl $< >$@.tmp
- chmod -w *.html
mv $@.tmp $@
+ chmod -w $@
diff --git a/doc/index.xml b/doc/index.xml
index 4130d8f..1ce721e 100644
--- a/doc/index.xml
+++ b/doc/index.xml
@@ -4,26 +4,13 @@
An XML parser written in Common Lisp.
- Closure XML was written by Gilbert Baumann
- (unk6 at rz.uni-karlsruhe.de) as part of the Closure web
- browser.
- Contributions to the parser by
-
-
- -
- Henrik Motakef (hmot at henrik-motakef.de)
- (SAX layer; namespace support)
-
- -
- David Lichteblau for knowledgeTools
- (conversion into an independent package; DOM bug fixing; validation)
- and headcraft
- (most september/october 2004 changes) and privately (changes
- since then).
-
-
+ Closure XML was written
+ by Gilbert
+ Baumann as part of the Closure web browser and is now
+ maintained by
+ David Lichteblau.
+ It is licensed under Lisp-LGPL.
+
CXML implements a
-
- CXML is licensed under Lisp-LGPL.
-
-
Send bug reports to cxml-devel@common-lisp.net
@@ -48,15 +31,31 @@
information
).
- See also
+ Add-on features
- Relax NG validation is available as a separate
- project: cxml-rng.
+ The following libraries are available as separate downloads:
+
+
+ ⬗
+ cxml-rng
+
+ Relax NG validation
+
+
+ ⬗
+ cxml-stp
+
+ STP, an alternative to DOM
Recent Changes
+ rel-2007-xx-yy
+
+ - Various DTD serialization fixes
+ - UTF-8 fix, thanks to Francis Leboutte
+
rel-2007-07-07
-
diff --git a/dom/dom-builder.lisp b/dom/dom-builder.lisp
index ab3e9e3..8e1bb41 100644
--- a/dom/dom-builder.lisp
+++ b/dom/dom-builder.lisp
@@ -135,7 +135,7 @@
(cond
((eq (dom:node-type parent) :cdata-section)
(setf (dom:data parent) data))
- ((and last-child (eq (dom:node-type last-child) :text))
+ ((and last-child (eq (dom:node-type last-child) :text))
;; um entities herum wird SAX:CHARACTERS mehrfach aufgerufen fuer
;; den gleichen Textknoten. Hier muessen wir den bestehenden Knoten
;; erweitern, sonst ist das Dokument nicht normalisiert.
diff --git a/runes/encodings.lisp b/runes/encodings.lisp
index 6e26ed7..5788adb 100644
--- a/runes/encodings.lisp
+++ b/runes/encodings.lisp
@@ -250,7 +250,7 @@
(setf rptr (%+ rptr 1)))
((%<= #|#b11000000|# byte0 #b11011111)
- (cond ((< (%+ rptr 2) in-end)
+ (cond ((<= (%+ rptr 2) in-end)
(put
(dpb (ldb (byte 5 0) byte0) (byte 5 6)
(dpb (ldb (byte 6 0) (aref in (%+ rptr 1))) (byte 6 0)
@@ -260,7 +260,7 @@
(return))))
((%<= #|#b11100000|# byte0 #b11101111)
- (cond ((< (%+ rptr 3) in-end)
+ (cond ((<= (%+ rptr 3) in-end)
(put
(dpb (ldb (byte 4 0) byte0) (byte 4 12)
(dpb (ldb (byte 6 0) (aref in (%+ 1 rptr))) (byte 6 6)
@@ -271,7 +271,7 @@
(return))))
((%<= #|#b11110000|# byte0 #b11110111)
- (cond ((< (%+ rptr 4) in-end)
+ (cond ((<= (%+ rptr 4) in-end)
(put
(dpb (ldb (byte 3 0) byte0) (byte 3 18)
(dpb (ldb (byte 6 0) (aref in (%+ 1 rptr))) (byte 6 12)
@@ -283,7 +283,7 @@
(return))))
((%<= #|#b11111000|# byte0 #b11111011)
- (cond ((< (%+ rptr 5) in-end)
+ (cond ((<= (%+ rptr 5) in-end)
(put
(dpb (ldb (byte 2 0) byte0) (byte 2 24)
(dpb (ldb (byte 6 0) (aref in (%+ 1 rptr))) (byte 6 18)
@@ -296,7 +296,7 @@
(return))))
((%<= #|#b11111100|# byte0 #b11111101)
- (cond ((< (%+ rptr 6) in-end)
+ (cond ((<= (%+ rptr 6) in-end)
(put
(dpb (ldb (byte 1 0) byte0) (byte 1 30)
(dpb (ldb (byte 6 0) (aref in (%+ 1 rptr))) (byte 6 24)
diff --git a/test/misc.lisp b/test/misc.lisp
new file mode 100644
index 0000000..84d65f4
--- /dev/null
+++ b/test/misc.lisp
@@ -0,0 +1,29 @@
+;;;
+;;; When I'll grow up, I'll be a complete test suite.
+
+(deftest utf-8
+ (flet ((doit (from below)
+ (loop for code from from below below do
+ (when (and (code-char code)
+ (not (eql code #xfffe))
+ (not (eql code #xffff)))
+ (let* ((a (if (< code #x10000)
+ (format nil "abc~C" (code-char code))
+ (let* ((x (- code #x10000))
+ (lo (ldb (byte 10 0) x))
+ (hi (ldb (byte 10 10) x)))
+ (format nil "abc~C~C"
+ (code-char (logior #xD800 hi))
+ (code-char
+ (logior #xDC00 lo))))))
+ (b (cxml:utf8-string-to-rod
+ (cxml:rod-to-utf8-string
+ a))))
+ (unless (string= a b)
+ (format t "FAIL: ~S ~A ~A~%"
+ (code-char code)
+ (map 'vector #'char-code a)
+ (map 'vector #'char-code b))))))))
+ (doit 32 #xD800)
+ (doit #x10000 char-code-limit)
+ (values)))