From a4e680e949795e07be981a2663eb9bdc3d3028b1 Mon Sep 17 00:00:00 2001
From: dlichteblau An XML parser written in Common Lisp.
+ CXML currently implements a namespace-aware, validating SAX-like
+ XML 1.0
+ parser as well as the DOM Level 1 Core
+ interfaces.
+
+ CXML is licensed under (L)LGPL.
+
Send bug reports to cxml-devel@common-lisp.net
@@ -63,59 +84,16 @@
information).
- (David's tla archive is out of date.)
- patch-xyz (200-mm-dd) rel-2005-06-25 patch-357 (2004-10-10) CXML provides three packages:
- CXML should be portable to all Common Lisp implementations
- supporting gray streams. Currently assumed to work are:
-
- Incomplete port:
-
- Optional configuration (skip this unless you know better): CXML
- has full Unicode code support -- even on Lisps without Unicode
- strings. On non-unicode aware Lisps, DOMString is
- implemented as an array of character codes. CXML will auto-detect
- at compile-time which string representation to use. To override
- the auto-detection, you can set one of the features
- :rune-is-character and :rune-is-octet before
- loading cxml.asd. (fixme: feature
- :rune-is-octet is of course misnamed, since it uses 16bit
- runes, not 8bit runes. It will probably be renamed
- to :rune-is-integer at some point.)
-
- ASDF is used for
- compilation. The following instructions assume that ASDF has
- already been loaded.
-
- Prerequisites.
- CXML needs the puri library.
-
- Compiling and loading CXML.
- Register the .asd file, e.g. by symlinking it:
- Then compile CXML using:
- You can then try the quick-start example.
- Check out the XML and DOM testsuites:
- Omit -D to get the latest version, which may not work
- with cxml yet. The ant step is necessary to run the DOM
- tests.
- Usage and expected output:
- fixme: Add an explanation of xml/sax-tests here.
-
- fixme My parser does not understand the current testsuite
- anymore. To fix this problem, revert the affected files
- manually after check-out:
-
- The log message for the changes reads "Removed unnecessary
- xml:base attribute". If I understand correctly, only
- DOM 3 parsers provide the baseURI attribute necessary for
- understanding xmlconf.xml now. We don't have that
- yet.
-
- Make sure to install and load cxml first.
- Create a test file called example.xml: Parse example.xml into a DOM tree (read
- more): Inspect the DOM tree (read more): Serialize the DOM document back into a stream (read more): As an alternative to DOM, parse into xmls-compatible list
- structure (read more):
- Closure XML Parser
Download
-
-
- $ export CVSROOT=:pserver:anonymous@common-lisp.net:/project/cxml/cvsroot
-$ cvs login
-Logging in to :pserver:anonymous@common-lisp.net:2401/project/cxml/cvsroot
-CVS password: anonymous
-$ cvs co cxml
-
- Contents
-
-
-
Recent Changes
-
+
@@ -148,149 +126,14 @@ $ cvs co cxml
-
- CXML Modules
-
-
-
-
- Installation
-
-
-
-
-
- $ ln -sf `pwd`/cxml.asd /path/to/your/registry/
- * (asdf:operate 'asdf:load-op :cxml)
-
- Tests
- $ export CVSROOT=:pserver:anonymous@dev.w3.org:/sources/public
-$ cvs login # password is "anonymous"
-$ cvs co 2001/XML-Test-Suite/xmlconf
-$ cvs co -D '2005-05-06 23:00' 2001/DOM-Test-Suite
-$ cd 2001/DOM-Test-Suite && ant dom1-dtd
- * (xmlconf:run-all-tests "/path/to/2001/XML-Test-Suite/xmlconf/")
-0/556 tests failed; 1606 tests were skipped
-* (domtest:run-all-tests "/path/to/2001/DOM-Test-Suite/")
-0/450 tests failed; 71 tests were skipped
-
- $ cd 2001/XML-Test-Suite/xmlconf/
-xmltest$ patch -p0 -R </path/to/cxml/test/xmlconf-base.diff
-
- Using CXML
-
-
- Quick-Start Example
-
- * (with-open-file (s "example.xml" :direction :output)
- (write-string "<test a='b'><child/></test>" s))
-
- * (cxml:parse-file "example.xml" (dom:make-dom-builder))
-#<DOM-IMPL::DOCUMENT @ #x72206172>
-;; save result for later:
-* (defparameter *example* *)
-*EXAMPLE*
-
- * (dom:document-element *example*)
-#<DOM-IMPL::ELEMENT test @ #x722b6ba2>
-* (dom:tag-name (dom:document-element *example*))
-"test"
-* (dom:child-nodes (dom:document-element *example*))
-#(#<DOM-IMPL::ELEMENT child @ #x722b6d8a>)
-* (dom:get-attribute (dom:document-element *example*) "a")
-"b"
-
- (cxml:unparse-document *example* *standard-output*)
-<test a="b"><child></child></test>
-
- * (cxml:parse-file "example.xml" (cxml-xmls:make-xmls-builder))
-("test" (("a" "b")) ("child" NIL))
-
-
- Parsing and Validating
-
- Arguments:
-
- Common keyword arguments: -
--
-
-
(cxml:parse-file "test.xml" (dom:make-dom-builder))- - -
-
Keyword arguments:
-- The following canonical values are allowed: -
-- With an indentation level, pretty-print the XML by - inserting additional whitespace. Note that indentation - changes the document model and should only be used if whitespace - does not matter to the application. -
-- unparse-document-to-octets returns an (unsigned-byte - 8) array, whereas unparse-document writes - characters. unparse-document is useful together - with with-output-to-string. However, note that the - resulting document in both cases is UTF-8 encoded, so the - characters written by unparse-document are really UTF-8 - bytes encoded as characters. -
- --
- These function provide the low-level mechanism used by the DOM - serialization functions. To serialize a document without building - its DOM tree first, create a sink handle and call SAX functions on that - handle. sax:end-document returns the serialized form of - the document described by the SAX events. -
- --
- Example: -
-(with-xml-output (make-octet-stream-sink stream :indentation 2 :canonical nil) - (with-element "foo" - (attribute "xyz" "abc") - (with-element "bar" - (attribute "blub" "bla")) - (text "Hi there.")))-
- Prints this to stream, which must be an - (unsigned-byte 8) stream: -
-<foo xyz="abc"> - <bar blub="bla"></bar> - Hi there. -</foo>-
- (Note that these functions accept both strings and rods, so we - could write "foo" instead of #"foo" above.) -
- --
- xhtmlgen is included as contrib/xhtmlgen.lisp in - the cxml distribution. Example: -
-(let ((sink (cxml:make-character-stream-sink *standard-output*))) - (sax:start-document sink) - (xhtml-generator:write-doctype sink) - (xhtml-generator:with-html sink - (:html - (:head - (:title "Titel")) - (:body - ((:p "style" "font-weight: bold") - "Inhalt") - (:ul - (:li "Eins") - (:li "Zwei") - (:li "Drei"))))) - (sax:end-document sink))- - -
-
(let ((d (parse-file "~/test.xml" (dom:make-dom-builder))) - (x (parse-dtd-file "~/test.dtd"))) - (dom:map-document (cxml:make-validator x #"foo") d))- -
-
-
- Like other XML parsers written in Lisp, CXML can work with - documents represented as list structures. The specific model - implemented by cxml is compatible with the xmls parser. Xmls - list structures are a simpler and faster alternative to full DOM - document trees. They also serve as an example showing how to - implement user-defined document models as an independent layer - over the the base parser (c.f. xml/xmls-compat.lisp in - the cxml distribution). However, note that the list structures do - not include all information available in DOM documents and are - sometimes more difficult to work wth since many DOM functions - cannot be implemented on them. -
--
- Example: -
-(cxml:parse-file "test.xml" (cxml-xmls:make-xmls-builder))-
-
- Use this function to serialize XMLS data. For example, we could - define a replacement for xmls:write-xml like this: -
-(defun write-xml (stream node &key indent) - (let ((sink (cxml:make-character-stream-sink - stream :canonical nil :indentation indent))) - (cxml-xmls:map-node sink node)))-
-
- The node list's car can also be a cons of local name - and namespace prefix ns. - fixme: It is unclear to me how namespaces are meant to - work in xmls, since xmls documentation differs from how xmls - actually works in current releases. Usually applications need to - know both the namespace prefix and the namespace URI. We - currently follow the xmls implementation and use the - namespace prefix instead of following its documentation which - shows the URI. We do not follow xmls in munging xmlns attribute - values. Attributes themselves have namespaces and it is not clear - to me how that works in xmls. -
--
-
- - -- As explained above, the XML parser handles character encoding and - uses 16bit strings internally. Instead of using characters and strings - it uses runes and rods. This is seen as a - feature, but can be inconvenient. -
-- Note that the recoder approach does not work with the DOM - builder, since DOM is specified to use UTF-16. -
--
- Example. In a Lisp which ordinarily would use octet vector rods: -
-CL-USER(14): (cxml:parse-string "<test/>" (cxml-xmls:make-xmls-builder)) -(#(116 101 115 116) NIL)-
- Use a SAX recoder to get strings instead:: -
-CL-USER(17): (parse-string "<test/>" (cxml:make-recoder (cxml-xmls:make-xmls-builder)))
-("test" NIL)
-
-
- - To avoid spending time parsing the same DTD over and over again, - CXML can cache DTD objects. The parser consults - cxml:*dtd-cache* whenever it is looking for an external - subset in a document which does not have an internal subset and - uses the cached DTD instance if one is present in the cache for - the System ID in question. -
-- Note that DTDs do not expire from the cache automatically. - (Future versions of CXML might introduce automatic checks for - outdated DTDs.) -
--
-
-
-
-
-
-
- fixme: thread-safety -
- - -- External entities (for example, DTDs) are referred to using their - Public and System IDs. Usually the System ID, a URI, is used to - locate the entity. CXML itself handles only file://-URIs, but - many System IDs in practical use are http://-URIs. There are two - different mechanims applications can use to allow CXML to locate - entities using arbitrary Public ID or System ID: -
-- This section describes XML Catalogs, the second solution. CXML - implements Oasis - XML Catalogs. -
--
-
-
-
-
- Example: -
-* (setf cxml:*catalog* nil) -* (cxml:parse-file "test.xhtml" nil) -=> Error: URI scheme :HTTP not supported - -* (setf cxml:*catalog* (cxml:make-catalog)) -* (cxml:parse-file "test.xhtml" nil) -;; no error! -NIL-
- Note that parsed catalog files are cached in the catalog object. - Catalog files cached do not expire automatically. To ensure that - all catalog files are parsed again, create a new catalog object. -
- - -- A SAX handler is an arbitrary objects that implements some of the - generic functions in the SAX package. Note that no default - handler class is necessary, because all generic functions have default - methods which do nothing. SAX functions are: -
- The entity declaration methods are similar to Java SAX - definitions, but parameter entities are distinguished from - general entities not by a % prefix to the name, but by - the kind argument, either :parameter or - :general. -
-- The arguments to sax:element-declaration and - sax:attribute-declaration differ significantly from their - Java counterparts. -
-- fixme: For more information on these functions refer to the docstrings. -
- - - -- CXML implements the DOM Level 1 Core interfaces. Explaining - DOM is better left to the specification, - so please refer to the official W3C documents for DOM. -
-- However, there is no "standard" DOM mapping for Lisp. DOM - is specified - in CORBA IDL, but it refrains from using object-oriented IDL - features, allowing for a much more natural Lisp implemenation than - the the ordinary IDL/Lisp mapping would. -
-- Differences between CXML's DOM and the direct IDL/Lisp mapping: -
-Example:
-XML(97): (dom:node-type - (dom:document-element - (cxml:parse-file "~/test.xml" (dom:make-dom-builder)))) -:ELEMENTdiff --git a/cxml.asd b/cxml.asd index 1e1e4ce..20d256d 100644 --- a/cxml.asd +++ b/cxml.asd @@ -98,7 +98,6 @@ (:file "dom-impl" :depends-on ("package")) (:file "dom-builder" :depends-on ("dom-impl")) (:file "unparse" :depends-on ("package")) - (:file "simple-dom" :depends-on ("package")) (:file "dom-sax" :depends-on ("package"))) :depends-on (:xml)) diff --git a/doc/cxml.css b/doc/cxml.css new file mode 100644 index 0000000..d35d8d6 --- /dev/null +++ b/doc/cxml.css @@ -0,0 +1,42 @@ +div.sidebar { + float: right; + background-color: #eeeeee; + border: 2pt solid black; + margin: 0em 2pt 1em 2em; + min-width: 15%; + padding: 0pt 5pt 5pt 5pt; +} + +div.sidebar ul { + padding: 0pt 0pt 0pt 1em; + margin: 0 0 1em; +} + +body { + color: #000000; + background-color: #ffffff; + margin-right: 0pt; + margin-bottom: 10%; + padding-left: 30px; +} + +h1,h2,h3 { + margin-left: -30px; +} + +pre { + background-color: #eeeeee; + border: solid 1px #d0d0d0; + padding: 1em; + margin-right: 10%; +} + +.def { + background-color: #ddddff; + font-weight: bold; +} + +.nomargin { + margin-bottom: 0; + margin-top: 0; +} diff --git a/doc/installation.html b/doc/installation.html new file mode 100644 index 0000000..365f016 --- /dev/null +++ b/doc/installation.html @@ -0,0 +1,164 @@ + + + + +
$ export CVSROOT=:pserver:anonymous@common-lisp.net:/project/cxml/cvsroot +$ cvs login +Logging in to :pserver:anonymous@common-lisp.net:2401/project/cxml/cvsroot +CVS password: anonymous +$ cvs co cxml+ +
+ CXML should be portable to all Common Lisp implementations + supporting gray streams. Currently supported are ACL, CLISP, + CMUCL, LispWorks, OpenMCL, and SBCL. +
++ ASDF is used for + compilation. The following instructions assume that ASDF has + already been loaded. +
+ ++ Prerequisites. + CXML needs the puri library. +
+ ++ Compiling and loading CXML. + Register the .asd file, e.g. by symlinking it: +
+$ ln -sf `pwd`/cxml.asd /path/to/your/registry/+
Then compile CXML using:
+* (asdf:operate 'asdf:load-op :cxml)+ +
+ You can then try the quick-start example. +
+ + +Check out the XML and DOM testsuites:
+$ export CVSROOT=:pserver:anonymous@dev.w3.org:/sources/public +$ cvs login # password is "anonymous" +$ cvs co 2001/XML-Test-Suite/xmlconf +$ cvs co -D '2005-05-06 23:00' 2001/DOM-Test-Suite +$ cd 2001/DOM-Test-Suite && ant dom1-dtd+
+ Omit -D to get the latest version, which may not work + with cxml yet. The ant step is necessary to run the DOM + tests. +
+Usage and expected output:
+* (xmlconf:run-all-tests "/path/to/2001/XML-Test-Suite/xmlconf/") +0/556 tests failed; 1606 tests were skipped +* (domtest:run-all-tests "/path/to/2001/DOM-Test-Suite/") +0/449 tests failed; 71 tests were skipped+ +
+ fixme: Add an explanation of xml/sax-tests here. +
+ ++ fixme My parser does not understand the current testsuite + anymore. To fix this problem, revert the affected files + manually after check-out: +
+ +$ cd 2001/XML-Test-Suite/xmlconf/ +xmltest$ patch -p0 -R </path/to/cxml/test/xmlconf-base.diff+ +
+ The log message for the changes reads "Removed unnecessary + xml:base attribute". If I understand correctly, only + DOM 3 parsers provide the baseURI attribute necessary for + understanding xmlconf.xml now. We don't have that + yet. +
+ + diff --git a/doc/using.html b/doc/using.html new file mode 100644 index 0000000..02c7589 --- /dev/null +++ b/doc/using.html @@ -0,0 +1,705 @@ + + + + ++ Make sure to install and load cxml first. +
+ +Create a test file called example.xml:
+* (with-open-file (s "example.xml" :direction :output) + (write-string "<test a='b'><child/></test>" s))+ +
Parse example.xml into a DOM tree (read + more):
+* (cxml:parse-file "example.xml" (dom:make-dom-builder)) +#<DOM-IMPL::DOCUMENT @ #x72206172> +;; save result for later: +* (defparameter *example* *) +*EXAMPLE*+ +
Inspect the DOM tree (read more):
+* (dom:document-element *example*) +#<DOM-IMPL::ELEMENT test @ #x722b6ba2> +* (dom:tag-name (dom:document-element *example*)) +"test" +* (dom:child-nodes (dom:document-element *example*)) +#(#<DOM-IMPL::ELEMENT child @ #x722b6d8a>) +* (dom:get-attribute (dom:document-element *example*) "a") +"b"+ +
Serialize the DOM document back into a stream (read more):
+(cxml:unparse-document *example* *standard-output*) +<test a="b"><child></child></test>+ +
As an alternative to DOM, parse into xmls-compatible list + structure (read more):
+* (cxml:parse-file "example.xml" (cxml-xmls:make-xmls-builder))
+("test" (("a" "b")) ("child" NIL))
+
+
+ +
+ Common keyword arguments: +
++
+
+
(cxml:parse-file "test.xml" (dom:make-dom-builder))+ + +
+
Keyword arguments:
++ The following canonical values are allowed: +
++ With an indentation level, pretty-print the XML by + inserting additional whitespace. Note that indentation + changes the document model and should only be used if whitespace + does not matter to the application. +
++ unparse-document-to-octets returns an (unsigned-byte + 8) array, whereas unparse-document writes + characters. unparse-document is useful together + with with-output-to-string. However, note that the + resulting document in both cases is UTF-8 encoded, so the + characters written by unparse-document are really UTF-8 + bytes encoded as characters. +
+ ++
+ These function provide the low-level mechanism used by the DOM + serialization functions. To serialize a document without building + its DOM tree first, create a sink handle and call SAX functions on that + handle. sax:end-document returns the serialized form of + the document described by the SAX events. +
+ ++
+ Example: +
+(with-xml-output (make-octet-stream-sink stream :indentation 2 :canonical nil) + (with-element "foo" + (attribute "xyz" "abc") + (with-element "bar" + (attribute "blub" "bla")) + (text "Hi there.")))+
+ Prints this to stream, which must be an + (unsigned-byte 8) stream: +
+<foo xyz="abc"> + <bar blub="bla"></bar> + Hi there. +</foo>+
+ (Note that these functions accept both strings and rods, so we + could write "foo" instead of #"foo" above.) +
+ ++
+ xhtmlgen is included as contrib/xhtmlgen.lisp in + the cxml distribution. Example: +
+(let ((sink (cxml:make-character-stream-sink *standard-output*))) + (sax:start-document sink) + (xhtml-generator:write-doctype sink) + (xhtml-generator:with-html sink + (:html + (:head + (:title "Titel")) + (:body + ((:p "style" "font-weight: bold") + "Inhalt") + (:ul + (:li "Eins") + (:li "Zwei") + (:li "Drei"))))) + (sax:end-document sink))+ + +
+
(let ((d (parse-file "~/test.xml" (dom:make-dom-builder))) + (x (parse-dtd-file "~/test.dtd"))) + (dom:map-document (cxml:make-validator x #"foo") d))+ +
+
+
+ Like other XML parsers written in Lisp, CXML can work with + documents represented as list structures. The specific model + implemented by cxml is compatible with the xmls parser. Xmls + list structures are a simpler and faster alternative to full DOM + document trees. They also serve as an example showing how to + implement user-defined document models as an independent layer + over the the base parser (c.f. xml/xmls-compat.lisp in + the cxml distribution). However, note that the list structures do + not include all information available in DOM documents and are + sometimes more difficult to work wth since many DOM functions + cannot be implemented on them. +
++
+ Example: +
+(cxml:parse-file "test.xml" (cxml-xmls:make-xmls-builder))+
+
+ Use this function to serialize XMLS data. For example, we could + define a replacement for xmls:write-xml like this: +
+(defun write-xml (stream node &key indent) + (let ((sink (cxml:make-character-stream-sink + stream :canonical nil :indentation indent))) + (cxml-xmls:map-node sink node)))+
+
+ The node list's car can also be a cons of local name + and namespace prefix ns. + fixme: It is unclear to me how namespaces are meant to + work in xmls, since xmls documentation differs from how xmls + actually works in current releases. Usually applications need to + know both the namespace prefix and the namespace URI. We + currently follow the xmls implementation and use the + namespace prefix instead of following its documentation which + shows the URI. We do not follow xmls in munging xmlns attribute + values. Attributes themselves have namespaces and it is not clear + to me how that works in xmls. +
++
+
+ + ++ As explained above, the XML parser handles character encoding and + uses 16bit strings internally. Instead of using characters and strings + it uses runes and rods. This is seen as a + feature, but can be inconvenient. +
++ Note that the recoder approach does not work with the DOM + builder, since DOM is specified to use UTF-16. +
++
+ Example. In a Lisp which ordinarily would use octet vector rods: +
+CL-USER(14): (cxml:parse-string "<test/>" (cxml-xmls:make-xmls-builder)) +(#(116 101 115 116) NIL)+
+ Use a SAX recoder to get strings instead:: +
+CL-USER(17): (parse-string "<test/>" (cxml:make-recoder (cxml-xmls:make-xmls-builder)))
+("test" NIL)
+
+
+ + To avoid spending time parsing the same DTD over and over again, + CXML can cache DTD objects. The parser consults + cxml:*dtd-cache* whenever it is looking for an external + subset in a document which does not have an internal subset and + uses the cached DTD instance if one is present in the cache for + the System ID in question. +
++ Note that DTDs do not expire from the cache automatically. + (Future versions of CXML might introduce automatic checks for + outdated DTDs.) +
++
+
+
+
+
+
+
+ fixme: thread-safety +
+ + ++ External entities (for example, DTDs) are referred to using their + Public and System IDs. Usually the System ID, a URI, is used to + locate the entity. CXML itself handles only file://-URIs, but + many System IDs in practical use are http://-URIs. There are two + different mechanims applications can use to allow CXML to locate + entities using arbitrary Public ID or System ID: +
++ This section describes XML Catalogs, the second solution. CXML + implements Oasis + XML Catalogs. +
++
+
+
+
+
+ Example: +
+* (setf cxml:*catalog* nil) +* (cxml:parse-file "test.xhtml" nil) +=> Error: URI scheme :HTTP not supported + +* (setf cxml:*catalog* (cxml:make-catalog)) +* (cxml:parse-file "test.xhtml" nil) +;; no error! +NIL+
+ Note that parsed catalog files are cached in the catalog object. + Catalog files cached do not expire automatically. To ensure that + all catalog files are parsed again, create a new catalog object. +
+ + ++ A SAX handler is an arbitrary objects that implements some of the + generic functions in the SAX package. Note that no default + handler class is necessary, because all generic functions have default + methods which do nothing. SAX functions are: +
+ The entity declaration methods are similar to Java SAX + definitions, but parameter entities are distinguished from + general entities not by a % prefix to the name, but by + the kind argument, either :parameter or + :general. +
++ The arguments to sax:element-declaration and + sax:attribute-declaration differ significantly from their + Java counterparts. +
++ fixme: For more information on these functions refer to the docstrings. +
+ + + ++ CXML implements the DOM Level 1 Core interfaces. Explaining + DOM is better left to the specification, + so please refer to the official W3C documents for DOM. +
++ However, there is no "standard" DOM mapping for Lisp. DOM + is specified + in CORBA IDL, but it refrains from using object-oriented IDL + features, allowing for a much more natural Lisp implemenation than + the the ordinary IDL/Lisp mapping would. +
++ Differences between CXML's DOM and the direct IDL/Lisp mapping: +
+Example:
+XML(97): (dom:node-type + (dom:document-element + (cxml:parse-file "~/test.xml" (dom:make-dom-builder)))) +:ELEMENT+ + diff --git a/runes/runes.lisp b/runes/runes.lisp index 620bb79..94b8f81 100644 --- a/runes/runes.lisp +++ b/runes/runes.lisp @@ -147,12 +147,17 @@ (defun char-rune (char) (code-rune (char-code char))) -(defun rune-char (rune &optional (default #\?)) - (if (>= rune char-code-limit) - default - (or (code-char rune) default))) +(defparameter *invalid-rune* nil ;;#\? + "Rune to use as a replacement in RUNE-CHAR and ROD-STRING for runes not + representable as characters. If NIL, an error is signalled instead.") -(defun rod-string (rod &optional (default-char #\?)) +(defun rune-char (rune &optional (default *invalid-rune*)) + (or (if (>= rune char-code-limit) + default + (or (code-char rune) default)) + (error "rune cannot be represented as a character: ~A" rune))) + +(defun rod-string (rod &optional (default-char *invalid-rune*)) (map 'string (lambda (x) (rune-char x default-char)) rod)) (defun string-rod (string) diff --git a/test/domtest.lisp b/test/domtest.lisp index 478048a..4692358 100644 --- a/test/domtest.lisp +++ b/test/domtest.lisp @@ -126,28 +126,29 @@ (map-child-elements 'list #'identity element)) (defun parse-java-literal (str) - (unless (stringp str) - (setf str (runes:rod-string str))) + (when (stringp str) + (setf str (runes:string-rod str))) (cond ((zerop (length str)) nil) - ((equal str "true") + ((runes:rod= str #"true") t) - ((equal str "false") + ((runes:rod= str #"false") nil) - ((digit-char-p (char str 0)) - (parse-integer str)) - ((char= (char str 0) #\") - (runes:rod - (with-output-to-string (out) - (with-input-from-string (in str) - (read-char in) - (for ((c = (read-char in)) - :until (char= c #\")) - (if (char= c #\\) - (ecase (read-char in) - ;; ... - (#\n (write-char #\newline out))) - (write-char c out))))))) + ((digit-char-p (runes:rune-char (elt str 0))) + (parse-integer (runes:rod-string str))) + ((runes:rune= (elt str 0) #.(runes:char-rune #\")) + (let ((v (make-array 1 :fill-pointer 0 :adjustable t))) + (for* ((i = 1 :then (1+ i)) + (c = (elt str i)) + :until (runes:rune= c #.(runes:char-rune #\"))) + (if (runes:rune= c #.(runes:char-rune #\\)) + (ecase (progn + (incf i) + (elt str i)) + ;; ... + (#/n (vector-push-extend #/newline v (length v)))) + (vector-push-extend c v (length v)))) + (coerce v 'runes::simple-rod))) (t (%intern str)))) @@ -613,7 +614,8 @@ document)) (defparameter *bad-tests* - '("hc_nodereplacechildnewchildexists.xml" + '("hc_elementnormalize2.xml" + "hc_nodereplacechildnewchildexists.xml" "characterdatadeletedatanomodificationallowederr.xml")) (defun run-all-tests (*directory* &optional verbose) @@ -635,7 +637,7 @@ (incf n))) (do-child-elements (member suite) (let ((href (runes:rod-string (dom:get-attribute member "href")))) - (unless (or (equal (dom:tag-name member) "metadata") + (unless (or (runes:rod= (dom:tag-name member) #"metadata") (member href *bad-tests* :test 'equal)) (format t "~&~D/~D ~A~%" i n href) (let ((lisp (slurp-test (merge-pathnames href test-directory))))