sink reorganization
This commit is contained in:
44
README.html
44
README.html
@ -10,6 +10,49 @@
|
||||
<p>
|
||||
<a href="README.html">CXML Homepage</a>
|
||||
</p>
|
||||
<ul class="main">
|
||||
<li>
|
||||
<a href="doc/installation.html">Installing Closure XML</a>
|
||||
<ul class="sub">
|
||||
<li><a href="doc/installation.html#download"><b>Download</b></a></li>
|
||||
<li><a href="doc/installation.html#implementations">Implementation-specific notes</a></li>
|
||||
<li><a href="doc/installation.html#compilation">Compilation</a></li>
|
||||
<li><a href="doc/installation.html#tests">Tests</a></li>
|
||||
</ul>
|
||||
</li>
|
||||
<li>
|
||||
<ul class="hack">
|
||||
<li>
|
||||
<a href="doc/using.html#quickstart"><b>Quick-Start Example</b></a>
|
||||
</li>
|
||||
</ul>
|
||||
</li>
|
||||
<li>
|
||||
<a href="doc/using.html">SAX parser</a>
|
||||
<ul class="sub">
|
||||
<li><a href="doc/using.html#parser">Parsing and Validating</a></li>
|
||||
<li><a href="doc/using.html#serialization">Serialization</a></li>
|
||||
<li><a href="doc/using.html#misc">Miscellaneous SAX handlers</a></li>
|
||||
<li><a href="doc/using.html#rods">Recoders</a></li>
|
||||
<li><a href="doc/using.html#dtdcache">Caching of DTD Objects</a></li>
|
||||
<li><a href="doc/using.html#catalogs">XML Catalogs</a></li>
|
||||
<li><a href="doc/using.html#sax">SAX Interface</a></li>
|
||||
</ul>
|
||||
</li>
|
||||
<li>
|
||||
<a href="doc/using.html">DOM implementation</a>
|
||||
<ul class="sub">
|
||||
<li><a href="doc/dom.html#parser">Parsing with the DOM builder</a></li>
|
||||
<li><a href="doc/dom.html#serialization">Serialization</a></li>
|
||||
<li><a href="doc/dom.html#mapping">DOM/Lisp mapping</a></li>
|
||||
</ul>
|
||||
</li>
|
||||
<li>
|
||||
<ul class="hack">
|
||||
<li><a href="doc/xmls-compat.html">XMLS Builder</a></li>
|
||||
</ul>
|
||||
</li>
|
||||
</ul>
|
||||
</div>
|
||||
|
||||
<h1>Closure XML Parser</h1>
|
||||
@ -67,6 +110,7 @@
|
||||
<li>Implemented DOM 2 Core.</li>
|
||||
<li>Error handling overhaul.</li>
|
||||
<li>UTF-8 string support in DOM on Lisps without Unicode characters.</li>
|
||||
<li>Sink API has been changed.</li>
|
||||
<li>Support internal subset serialization.</li>
|
||||
<li>Gilbert Baumann has clarified the license as Lisp-LGPL.</li>
|
||||
<li>Use trivial-gray-streams.</li>
|
||||
|
||||
4
cxml.asd
4
cxml.asd
@ -51,7 +51,8 @@
|
||||
(:file "encodings" :depends-on ("package"))
|
||||
(:file "encodings-data" :depends-on ("package" "encodings"))
|
||||
(:file "xstream"
|
||||
:depends-on ("package" "definline" "syntax" "encodings-data"))))
|
||||
:depends-on ("package" "definline" "syntax" "encodings-data"))
|
||||
(:file "ystream" :depends-on (runes))))
|
||||
|
||||
(asdf:defsystem :cxml-xml
|
||||
:default-component-class closure-source-file
|
||||
@ -106,7 +107,6 @@
|
||||
(utf8dom-file utf8-impl :pathname "dom-impl" :depends-on ("package"))
|
||||
#+rune-is-integer
|
||||
(utf8dom-file utf8-builder :pathname "dom-builder" :depends-on (utf8-impl))
|
||||
(:file "unparse" :depends-on ("package"))
|
||||
(:file "dom-sax" :depends-on ("package")))
|
||||
:depends-on (:cxml-xml))
|
||||
|
||||
|
||||
85
doc/dom.html
85
doc/dom.html
@ -7,6 +7,52 @@
|
||||
</head>
|
||||
<body>
|
||||
<div class="sidebar">
|
||||
<p>
|
||||
<a href="../README.html">CXML Homepage</a>
|
||||
</p>
|
||||
<ul class="main">
|
||||
<li>
|
||||
<a href="installation.html">Installing Closure XML</a>
|
||||
<ul class="sub">
|
||||
<li><a href="installation.html#download"><b>Download</b></a></li>
|
||||
<li><a href="installation.html#implementations">Implementation-specific notes</a></li>
|
||||
<li><a href="installation.html#compilation">Compilation</a></li>
|
||||
<li><a href="installation.html#tests">Tests</a></li>
|
||||
</ul>
|
||||
</li>
|
||||
<li>
|
||||
<ul class="hack">
|
||||
<li>
|
||||
<a href="using.html#quickstart"><b>Quick-Start Example</b></a>
|
||||
</li>
|
||||
</ul>
|
||||
</li>
|
||||
<li>
|
||||
<a href="using.html">SAX parser</a>
|
||||
<ul class="sub">
|
||||
<li><a href="using.html#parser">Parsing and Validating</a></li>
|
||||
<li><a href="using.html#serialization">Serialization</a></li>
|
||||
<li><a href="using.html#misc">Miscellaneous SAX handlers</a></li>
|
||||
<li><a href="using.html#rods">Recoders</a></li>
|
||||
<li><a href="using.html#dtdcache">Caching of DTD Objects</a></li>
|
||||
<li><a href="using.html#catalogs">XML Catalogs</a></li>
|
||||
<li><a href="using.html#sax">SAX Interface</a></li>
|
||||
</ul>
|
||||
</li>
|
||||
<li>
|
||||
<a href="using.html">DOM implementation</a>
|
||||
<ul class="sub">
|
||||
<li><a href="dom.html#parser">Parsing with the DOM builder</a></li>
|
||||
<li><a href="dom.html#serialization">Serialization</a></li>
|
||||
<li><a href="dom.html#mapping">DOM/Lisp mapping</a></li>
|
||||
</ul>
|
||||
</li>
|
||||
<li>
|
||||
<ul class="hack">
|
||||
<li><a href="xmls-compat.html">XMLS Builder</a></li>
|
||||
</ul>
|
||||
</li>
|
||||
</ul>
|
||||
</div>
|
||||
|
||||
<h1>The DOM implementation</h1>
|
||||
@ -54,14 +100,9 @@
|
||||
<a name="serialization"/>
|
||||
<h3>Serializing DOM</h3>
|
||||
<p>
|
||||
The technique used to serialize a DOM document is to use a SAX
|
||||
serialization sink as the argument to <tt>dom:map-document</tt>,
|
||||
which generates SAX events for the DOM tree.
|
||||
</p>
|
||||
<p>
|
||||
In addition, there are convenience functions like
|
||||
<tt>unparse-document</tt> as a thin wrapper around
|
||||
<tt>map-document</tt>.
|
||||
To serialize a DOM document, use a SAX serialization sink as the
|
||||
argument to <tt>dom:map-document</tt>, which generates SAX events
|
||||
for the DOM tree.
|
||||
</p>
|
||||
<p>
|
||||
Applications dealing with namespaces might want to inject a
|
||||
@ -99,34 +140,6 @@
|
||||
</li>
|
||||
</ul>
|
||||
|
||||
<p>
|
||||
<div class="def">Function CXML:UNPARSE-DOCUMENT (document stream &rest keys)</div>
|
||||
<div class="def">Function CXML:UNPARSE-DOCUMENT-TO-OCTETS (document &rest keys) => vector</div>
|
||||
</p>
|
||||
<p>
|
||||
Serialize a DOM document object. These convenience functions are
|
||||
wrappers around <tt>dom:map-document</tt>.
|
||||
</p>
|
||||
<p>Keyword arguments are passed on to the sink. C.f. <a
|
||||
href="using.html#serialization">cxml:make-octet-vector-sink</a>.</p>
|
||||
<p>Notes:</p>
|
||||
<ul>
|
||||
<li>
|
||||
If keyword argument <tt>canonical</tt> is specified as 2, a
|
||||
doctype declaration will be written that includes notations
|
||||
declared in the document.
|
||||
</li>
|
||||
</ul>
|
||||
<p>
|
||||
<tt>unparse-document-to-octets</tt> returns an <tt>(unsigned-byte
|
||||
8)</tt> array, whereas <tt>unparse-document</tt> writes
|
||||
characters. <tt>unparse-document</tt> is useful together
|
||||
with <tt>with-output-to-string</tt>. However, note that the
|
||||
resulting document in both cases is UTF-8 encoded, so the
|
||||
characters written by <tt>unparse-document</tt> are really UTF-8
|
||||
bytes encoded as characters.
|
||||
</p>
|
||||
|
||||
<a name="mapping"/>
|
||||
<h3>DOM/Lisp mapping</h3>
|
||||
<p>
|
||||
|
||||
@ -7,6 +7,52 @@
|
||||
</head>
|
||||
<body>
|
||||
<div class="sidebar">
|
||||
<p>
|
||||
<a href="../README.html">CXML Homepage</a>
|
||||
</p>
|
||||
<ul class="main">
|
||||
<li>
|
||||
<a href="installation.html">Installing Closure XML</a>
|
||||
<ul class="sub">
|
||||
<li><a href="installation.html#download"><b>Download</b></a></li>
|
||||
<li><a href="installation.html#implementations">Implementation-specific notes</a></li>
|
||||
<li><a href="installation.html#compilation">Compilation</a></li>
|
||||
<li><a href="installation.html#tests">Tests</a></li>
|
||||
</ul>
|
||||
</li>
|
||||
<li>
|
||||
<ul class="hack">
|
||||
<li>
|
||||
<a href="using.html#quickstart"><b>Quick-Start Example</b></a>
|
||||
</li>
|
||||
</ul>
|
||||
</li>
|
||||
<li>
|
||||
<a href="using.html">SAX parser</a>
|
||||
<ul class="sub">
|
||||
<li><a href="using.html#parser">Parsing and Validating</a></li>
|
||||
<li><a href="using.html#serialization">Serialization</a></li>
|
||||
<li><a href="using.html#misc">Miscellaneous SAX handlers</a></li>
|
||||
<li><a href="using.html#rods">Recoders</a></li>
|
||||
<li><a href="using.html#dtdcache">Caching of DTD Objects</a></li>
|
||||
<li><a href="using.html#catalogs">XML Catalogs</a></li>
|
||||
<li><a href="using.html#sax">SAX Interface</a></li>
|
||||
</ul>
|
||||
</li>
|
||||
<li>
|
||||
<a href="using.html">DOM implementation</a>
|
||||
<ul class="sub">
|
||||
<li><a href="dom.html#parser">Parsing with the DOM builder</a></li>
|
||||
<li><a href="dom.html#serialization">Serialization</a></li>
|
||||
<li><a href="dom.html#mapping">DOM/Lisp mapping</a></li>
|
||||
</ul>
|
||||
</li>
|
||||
<li>
|
||||
<ul class="hack">
|
||||
<li><a href="xmls-compat.html">XMLS Builder</a></li>
|
||||
</ul>
|
||||
</li>
|
||||
</ul>
|
||||
</div>
|
||||
<h1>Installation of Closure XML</h1>
|
||||
|
||||
|
||||
@ -83,10 +83,10 @@
|
||||
* <b>(dom:get-attribute (dom:document-element *example*) "a")</b>
|
||||
"b"</pre>
|
||||
|
||||
<p>Serialize the DOM document back into a stream (<a
|
||||
<p>Serialize the DOM document back into a file (<a
|
||||
href="using.html#serialization">read more</a>):</p>
|
||||
<pre><b>(cxml:unparse-document *example* *standard-output*)</b>
|
||||
<test a="b"><child></child></test></pre>
|
||||
<pre><b>(with-open-file (out "example.out" :direction :output :element-type '(unsigned-byte 8))
|
||||
(dom:map-document (cxml:make-octet-stream-sink out) *example*))</b></pre>
|
||||
|
||||
<p>As an alternative to DOM, parse into xmls-compatible list
|
||||
structure (<a href="xmls-compat.html">read more</a>):</p>
|
||||
|
||||
120
doc/using.html
120
doc/using.html
@ -7,6 +7,52 @@
|
||||
</head>
|
||||
<body>
|
||||
<div class="sidebar">
|
||||
<p>
|
||||
<a href="../README.html">CXML Homepage</a>
|
||||
</p>
|
||||
<ul class="main">
|
||||
<li>
|
||||
<a href="installation.html">Installing Closure XML</a>
|
||||
<ul class="sub">
|
||||
<li><a href="installation.html#download"><b>Download</b></a></li>
|
||||
<li><a href="installation.html#implementations">Implementation-specific notes</a></li>
|
||||
<li><a href="installation.html#compilation">Compilation</a></li>
|
||||
<li><a href="installation.html#tests">Tests</a></li>
|
||||
</ul>
|
||||
</li>
|
||||
<li>
|
||||
<ul class="hack">
|
||||
<li>
|
||||
<a href="using.html#quickstart"><b>Quick-Start Example</b></a>
|
||||
</li>
|
||||
</ul>
|
||||
</li>
|
||||
<li>
|
||||
<a href="using.html">SAX parser</a>
|
||||
<ul class="sub">
|
||||
<li><a href="using.html#parser">Parsing and Validating</a></li>
|
||||
<li><a href="using.html#serialization">Serialization</a></li>
|
||||
<li><a href="using.html#misc">Miscellaneous SAX handlers</a></li>
|
||||
<li><a href="using.html#rods">Recoders</a></li>
|
||||
<li><a href="using.html#dtdcache">Caching of DTD Objects</a></li>
|
||||
<li><a href="using.html#catalogs">XML Catalogs</a></li>
|
||||
<li><a href="using.html#sax">SAX Interface</a></li>
|
||||
</ul>
|
||||
</li>
|
||||
<li>
|
||||
<a href="using.html">DOM implementation</a>
|
||||
<ul class="sub">
|
||||
<li><a href="dom.html#parser">Parsing with the DOM builder</a></li>
|
||||
<li><a href="dom.html#serialization">Serialization</a></li>
|
||||
<li><a href="dom.html#mapping">DOM/Lisp mapping</a></li>
|
||||
</ul>
|
||||
</li>
|
||||
<li>
|
||||
<ul class="hack">
|
||||
<li><a href="xmls-compat.html">XMLS Builder</a></li>
|
||||
</ul>
|
||||
</li>
|
||||
</ul>
|
||||
</div>
|
||||
<h1>Using the SAX parser</h1>
|
||||
|
||||
@ -137,12 +183,70 @@
|
||||
<tt>with-xml-output</tt>, <tt>with-element</tt>, etc).
|
||||
</p>
|
||||
|
||||
<div style="background-color: #ddddff">
|
||||
Portable sinks:<br/>
|
||||
<span class="def">Function CXML:MAKE-OCTET-VECTOR-SINK (&rest keys) => sink</span><br/>
|
||||
<span class="def">Function CXML:MAKE-OCTET-STREAM-SINK (stream &rest keys) => sink</span><br/>
|
||||
<span class="def">Function CXML:MAKE-ROD-SINK (&rest keys) => sink</span><br/>
|
||||
<br/>
|
||||
Only on Lisps with Unicode support:<br/>
|
||||
<span class="def">Function CXML:MAKE-STRING-SINK</span> -- alias for <tt>cxml:make-rod-sink</tt><br/>
|
||||
<span class="def">Function CXML:MAKE-CHARACTER-STREAM-SINK (stream &rest keys) => sink</span><br/>
|
||||
<br/>
|
||||
Only on Lisps <em>without</em> Unicode support:<br/>
|
||||
<span class="def">Function CXML:MAKE-STRING-SINK/UTF8 (&rest keys) => sink</span><br/>
|
||||
<span class="def">Function CXML:MAKE-CHARACTER-STREAM-SINK/UTF8 (stream &rest keys) => sink</span><br/>
|
||||
</div>
|
||||
<p>
|
||||
<div class="def">Function CXML:MAKE-OCTET-STREAM-SINK (stream &rest keys) => sink</div>
|
||||
<div class="def">Function CXML:MAKE-OCTET-VECTOR-SINK (&rest keys) => sink</div>
|
||||
<div class="def">Function CXML:MAKE-CHARACTER-STREAM-SINK (stream &rest keys) => sink</div>
|
||||
Return a SAX serialization handle.
|
||||
</p>
|
||||
<ul>
|
||||
<li>
|
||||
The <tt>-octet-</tt> functions write the document encoded into
|
||||
UTF-8.
|
||||
<tt>make-octet-stream-sink</tt> works with Lisp streams of
|
||||
element-type <tt>(unsigned-byte 8)</tt>.
|
||||
<tt>make-octet-vector-sink</tt> returns a vector of
|
||||
<tt>(unsigned-byte 8)</tt>.
|
||||
</li>
|
||||
<li>
|
||||
<tt>make-character-stream-sink</tt> works with character
|
||||
streams. It serializes the document into characters <em>without
|
||||
encoding it into an external format</em>. When using these
|
||||
functions, <em>take care to avoid encoding the result into
|
||||
an incorrect external format</em>. (Note that characters undergo
|
||||
external format conversion when written to a character stream.
|
||||
If the document's XML declaration specifies an encoding, make
|
||||
sure to specify this encoding as the external format if and when
|
||||
writing the serialized document to a character stream. If the
|
||||
document does not specify an encoding, either UTF-8 or UTF-16
|
||||
must be used.) This function is available only on Lisps with
|
||||
unicode support.
|
||||
</li>
|
||||
<li>
|
||||
<tt>make-rod-sink</tt> serializes the document into a vector of
|
||||
runes <em>without encoding it into an external format</em>.
|
||||
(On Lisp with unicode support, the result will be a string;
|
||||
otherwise, a vector of character codes will be returned.)
|
||||
The warnings given for <tt>make-character-stream-sink</tt>
|
||||
apply to this function as well.
|
||||
</li>
|
||||
<li>
|
||||
The <tt>/utf8</tt> functions write the document encoded into
|
||||
characters representing a UTF-8 encoding.
|
||||
When using these functions, <em>take care to avoid encoding the
|
||||
result</em> into an external format for a second time. (Note
|
||||
that characters undergo external format conversion when written
|
||||
to a character stream. Since these functions already perform
|
||||
external format conversion, make sure to specify an external
|
||||
format that does "nothing" if and when writing the serialized document
|
||||
to a character stream. ISO-8859-1 external formats usually
|
||||
achieve the desired effect.)
|
||||
<tt>make-character-stream-sink/utf8</tt> works with character streams.
|
||||
<tt>make-string-sink/utf8</tt> returns a string.
|
||||
These functions are available only on Lisps without unicode support.
|
||||
</li>
|
||||
</ul>
|
||||
<p>Keyword arguments:</p>
|
||||
<ul>
|
||||
<li>
|
||||
@ -170,6 +274,16 @@
|
||||
<tt>NIL</tt>: Use a more readable non-canonical representation.
|
||||
</li>
|
||||
</ul>
|
||||
<p>
|
||||
An internal subset will be included in the result regardless of
|
||||
the <tt>canonical</tt> setting. It is the responsibility of the
|
||||
caller to not report an internal subset for
|
||||
canonical <= 1, or only notations as required for
|
||||
canonical = 2. For example, the
|
||||
<tt>include-doctype</tt> argument to <tt>dom:map-document</tt>
|
||||
should be set to <tt>nil</tt> for the former behaviour and
|
||||
<tt>:canonical-notations</tt> for the latter.
|
||||
</p>
|
||||
<p>
|
||||
With an <tt>indentation</tt> level, pretty-print the XML by
|
||||
inserting additional whitespace. Note that indentation
|
||||
|
||||
@ -7,6 +7,52 @@
|
||||
</head>
|
||||
<body>
|
||||
<div class="sidebar">
|
||||
<p>
|
||||
<a href="../README.html">CXML Homepage</a>
|
||||
</p>
|
||||
<ul class="main">
|
||||
<li>
|
||||
<a href="installation.html">Installing Closure XML</a>
|
||||
<ul class="sub">
|
||||
<li><a href="installation.html#download"><b>Download</b></a></li>
|
||||
<li><a href="installation.html#implementations">Implementation-specific notes</a></li>
|
||||
<li><a href="installation.html#compilation">Compilation</a></li>
|
||||
<li><a href="installation.html#tests">Tests</a></li>
|
||||
</ul>
|
||||
</li>
|
||||
<li>
|
||||
<ul class="hack">
|
||||
<li>
|
||||
<a href="using.html#quickstart"><b>Quick-Start Example</b></a>
|
||||
</li>
|
||||
</ul>
|
||||
</li>
|
||||
<li>
|
||||
<a href="using.html">SAX parser</a>
|
||||
<ul class="sub">
|
||||
<li><a href="using.html#parser">Parsing and Validating</a></li>
|
||||
<li><a href="using.html#serialization">Serialization</a></li>
|
||||
<li><a href="using.html#misc">Miscellaneous SAX handlers</a></li>
|
||||
<li><a href="using.html#rods">Recoders</a></li>
|
||||
<li><a href="using.html#dtdcache">Caching of DTD Objects</a></li>
|
||||
<li><a href="using.html#catalogs">XML Catalogs</a></li>
|
||||
<li><a href="using.html#sax">SAX Interface</a></li>
|
||||
</ul>
|
||||
</li>
|
||||
<li>
|
||||
<a href="using.html">DOM implementation</a>
|
||||
<ul class="sub">
|
||||
<li><a href="dom.html#parser">Parsing with the DOM builder</a></li>
|
||||
<li><a href="dom.html#serialization">Serialization</a></li>
|
||||
<li><a href="dom.html#mapping">DOM/Lisp mapping</a></li>
|
||||
</ul>
|
||||
</li>
|
||||
<li>
|
||||
<ul class="hack">
|
||||
<li><a href="xmls-compat.html">XMLS Builder</a></li>
|
||||
</ul>
|
||||
</li>
|
||||
</ul>
|
||||
</div>
|
||||
|
||||
<h1>XMLS Builder</h1>
|
||||
|
||||
@ -973,7 +973,7 @@
|
||||
(rod-stream-buf stream)))
|
||||
|
||||
(defmethod write-attribute-child ((node node) stream)
|
||||
(write-rod (dom:node-value node) stream))
|
||||
(put-rod (dom:node-value node) stream))
|
||||
|
||||
(defmethod write-attribute-child ((node entity-reference) stream)
|
||||
(dovector (child (dom:child-nodes node))
|
||||
@ -988,7 +988,7 @@
|
||||
(buf nil)
|
||||
(position 0))
|
||||
|
||||
(defun write-rod (rod rod-stream)
|
||||
(defun put-rod (rod rod-stream)
|
||||
(let ((buf (rod-stream-buf rod-stream)))
|
||||
(when buf
|
||||
(move rod buf 0 (rod-stream-position rod-stream) (length rod)))
|
||||
@ -1210,10 +1210,12 @@
|
||||
;; dass ein leeres internal subset nicht vorhanden ist und
|
||||
;; wir daher nil liefern sollen. bittesehr!
|
||||
(dom::%internal-subset node))
|
||||
(with-output-to-string (stream)
|
||||
(let ((sink (cxml:make-character-stream-sink stream)))
|
||||
(dolist (def (dom::%internal-subset node))
|
||||
(apply (car def) sink (cdr def)))))
|
||||
(let ((sink
|
||||
#+rune-is-character (cxml:make-string-sink)
|
||||
#-rune-is-character (cxml:make-string-sink/utf8)))
|
||||
(dolist (def (dom::%internal-subset node))
|
||||
(apply (car def) sink (cdr def)))
|
||||
(sax:end-document sink))
|
||||
nil))
|
||||
|
||||
;;; NOTATION -- nix
|
||||
|
||||
@ -1,19 +0,0 @@
|
||||
(in-package :cxml)
|
||||
|
||||
(defun %unparse-document (sink doc canonical)
|
||||
(dom:map-document sink
|
||||
doc
|
||||
:include-doctype (if (and canonical (>= canonical 2))
|
||||
:canonical-notations
|
||||
nil)
|
||||
:include-default-values t))
|
||||
|
||||
(defun unparse-document-to-octets (doc &rest initargs &key canonical)
|
||||
(%unparse-document (apply #'make-octet-vector-sink initargs)
|
||||
doc
|
||||
canonical))
|
||||
|
||||
(defun unparse-document (doc stream &rest initargs &key canonical)
|
||||
(%unparse-document (apply #'make-character-stream-sink stream initargs)
|
||||
doc
|
||||
canonical))
|
||||
@ -59,7 +59,21 @@
|
||||
#:xstream-plist
|
||||
#:xstream-encoding
|
||||
#:set-to-full-speed
|
||||
#:xstream-name))
|
||||
#:xstream-name
|
||||
|
||||
;; ystream.lisp
|
||||
#:ystream
|
||||
#:close-ystream
|
||||
#:write-rune
|
||||
#:write-rod
|
||||
#:ystream-column
|
||||
#:make-octet-vector-ystream
|
||||
#:make-octet-stream-ystream
|
||||
#:make-rod-ystream
|
||||
#+rune-is-character #:make-character-stream-ystream
|
||||
#+rune-is-integer #:make-string-ystream/utf8
|
||||
#+rune-is-integer #:make-character-stream-ystream/utf8
|
||||
#:runes-to-utf8/adjustable-string))
|
||||
|
||||
(defpackage :utf8-runes
|
||||
(:use :cl)
|
||||
|
||||
247
runes/ystream.lisp
Normal file
247
runes/ystream.lisp
Normal file
@ -0,0 +1,247 @@
|
||||
;;; (c) 2005 David Lichteblau <david@lichteblau.com>
|
||||
;;; License: Lisp-LGPL (See file COPYING for details).
|
||||
;;;
|
||||
;;; ystream (for lack of a better name): a rune output "stream"
|
||||
|
||||
(in-package :runes)
|
||||
|
||||
(defconstant +ystream-bufsize+ 1024)
|
||||
|
||||
(defun make-ub8-array (n)
|
||||
(make-array n :element-type '(unsigned-byte 8)))
|
||||
|
||||
(defun make-ub16-array (n)
|
||||
(make-array n :element-type '(unsigned-byte 16)))
|
||||
|
||||
(defun make-buffer (&key (element-type '(unsigned-byte 8)))
|
||||
(make-array 1
|
||||
:element-type element-type
|
||||
:adjustable t
|
||||
:fill-pointer 0))
|
||||
|
||||
(defmacro while (test &body body)
|
||||
`(until (not ,test) ,@body))
|
||||
|
||||
(defmacro until (test &body body)
|
||||
`(do () (,test) ,@body))
|
||||
|
||||
;;; ystream
|
||||
;;; +- utf8-ystream
|
||||
;;; | +- octet-vector-ystream
|
||||
;;; | \- %stream-ystream
|
||||
;;; | +- octet-stream-ystream
|
||||
;;; | \- character-stream-ystream/utf8
|
||||
;;; | \- string-ystream/utf8
|
||||
;;; +- rod-ystream
|
||||
;;; \-- character-stream-ystream
|
||||
|
||||
(defstruct ystream
|
||||
(column 0 :type integer)
|
||||
(in-ptr 0 :type fixnum)
|
||||
(in-buffer (make-rod +ystream-bufsize+) :type simple-rod))
|
||||
|
||||
(defstruct (utf8-ystream
|
||||
(:include ystream)
|
||||
(:conc-name "YSTREAM-"))
|
||||
(out-buffer (make-ub8-array (* 6 +ystream-bufsize+))
|
||||
:type (simple-array (unsigned-byte 8) (*))))
|
||||
|
||||
(defstruct (%stream-ystream (:include utf8-ystream) (:conc-name "YSTREAM-"))
|
||||
(os-stream nil))
|
||||
|
||||
(definline write-rune (rune ystream)
|
||||
(let ((in (ystream-in-buffer ystream)))
|
||||
(when (eql (ystream-in-ptr ystream) (length in))
|
||||
(flush-ystream ystream)
|
||||
(setf in (ystream-in-buffer ystream)))
|
||||
(setf (elt in (ystream-in-ptr ystream)) rune)
|
||||
(incf (ystream-in-ptr ystream))
|
||||
(setf (ystream-column ystream)
|
||||
(if (eql rune #/U+0010) 0 (1+ (ystream-column ystream))))
|
||||
rune))
|
||||
|
||||
(defmethod close-ystream :before ((ystream ystream))
|
||||
(flush-ystream ystream))
|
||||
|
||||
|
||||
;;;; UTF8-YSTREAM (abstract)
|
||||
|
||||
(defmethod close-ystream ((ystream %stream-ystream))
|
||||
(ystream-os-stream ystream))
|
||||
|
||||
(defgeneric ystream-device-write (ystream buf nbytes))
|
||||
|
||||
(defmethod flush-ystream ((ystream utf8-ystream))
|
||||
(let ((ptr (ystream-in-ptr ystream)))
|
||||
(when (plusp ptr)
|
||||
(let* ((in (ystream-in-buffer ystream))
|
||||
(out (ystream-out-buffer ystream))
|
||||
(surrogatep (<= #xD800 (elt in (1- ptr)) #xDBFF))
|
||||
n)
|
||||
(when surrogatep
|
||||
(decf ptr))
|
||||
(when (plusp ptr)
|
||||
(setf n (runes-to-utf8 out in ptr))
|
||||
(ystream-device-write ystream out n)
|
||||
(cond
|
||||
(surrogatep
|
||||
(setf (elt in 0) (elt in (1- ptr)))
|
||||
(setf (ystream-in-ptr ystream) 1))
|
||||
(t
|
||||
(setf (ystream-in-ptr ystream) 0))))))))
|
||||
|
||||
(defun write-rod (rod sink)
|
||||
(loop for rune across rod do (write-rune rune sink)))
|
||||
|
||||
(defun fast-push (new-element vector)
|
||||
(vector-push-extend new-element vector (max 1 (array-dimension vector 0))))
|
||||
|
||||
(macrolet ((define-utf8-writer (name (byte &rest aux) result &body body)
|
||||
`(defun ,name (out in n)
|
||||
(let ((high-surrogate nil)
|
||||
,@aux)
|
||||
(labels
|
||||
((write0 (,byte)
|
||||
,@body)
|
||||
(write1 (r)
|
||||
(cond
|
||||
((<= #x00000000 r #x0000007F)
|
||||
(write0 r))
|
||||
((<= #x00000080 r #x000007FF)
|
||||
(write0 (logior #b11000000 (ldb (byte 5 6) r)))
|
||||
(write0 (logior #b10000000 (ldb (byte 6 0) r))))
|
||||
((<= #x00000800 r #x0000FFFF)
|
||||
(write0 (logior #b11100000 (ldb (byte 4 12) r)))
|
||||
(write0 (logior #b10000000 (ldb (byte 6 6) r)))
|
||||
(write0 (logior #b10000000 (ldb (byte 6 0) r))))
|
||||
((<= #x00010000 r #x001FFFFF)
|
||||
(write0 (logior #b11110000 (ldb (byte 3 18) r)))
|
||||
(write0 (logior #b10000000 (ldb (byte 6 12) r)))
|
||||
(write0 (logior #b10000000 (ldb (byte 6 6) r)))
|
||||
(write0 (logior #b10000000 (ldb (byte 6 0) r))))
|
||||
((<= #x00200000 r #x03FFFFFF)
|
||||
(write0 (logior #b11111000 (ldb (byte 2 24) r)))
|
||||
(write0 (logior #b10000000 (ldb (byte 6 18) r)))
|
||||
(write0 (logior #b10000000 (ldb (byte 6 12) r)))
|
||||
(write0 (logior #b10000000 (ldb (byte 6 6) r)))
|
||||
(write0 (logior #b10000000 (ldb (byte 6 0) r))))
|
||||
((<= #x04000000 r #x7FFFFFFF)
|
||||
(write0 (logior #b11111100 (ldb (byte 1 30) r)))
|
||||
(write0 (logior #b10000000 (ldb (byte 6 24) r)))
|
||||
(write0 (logior #b10000000 (ldb (byte 6 18) r)))
|
||||
(write0 (logior #b10000000 (ldb (byte 6 12) r)))
|
||||
(write0 (logior #b10000000 (ldb (byte 6 6) r)))
|
||||
(write0 (logior #b10000000 (ldb (byte 6 0) r))))))
|
||||
(write2 (r)
|
||||
(cond
|
||||
((<= #xD800 r #xDBFF)
|
||||
(setf high-surrogate r))
|
||||
((<= #xDC00 r #xDFFF)
|
||||
(let ((q (logior (ash (- high-surrogate #xD7C0) 10)
|
||||
(- r #xDC00))))
|
||||
(write1 q))
|
||||
(setf high-surrogate nil))
|
||||
(t
|
||||
(write1 r)))))
|
||||
(dotimes (j n)
|
||||
(write2 (rune-code (elt in j)))))
|
||||
,result))))
|
||||
(define-utf8-writer runes-to-utf8 (x (i 0))
|
||||
i
|
||||
(setf (elt out i) x)
|
||||
(incf i))
|
||||
(define-utf8-writer runes-to-utf8/adjustable-string (x)
|
||||
nil
|
||||
(fast-push (code-char x) out)))
|
||||
|
||||
|
||||
;;;; ROD-YSTREAM
|
||||
|
||||
(defstruct (rod-ystream (:include ystream)))
|
||||
|
||||
(defmethod flush-ystream ((ystream rod-ystream))
|
||||
(let* ((old (ystream-in-buffer ystream))
|
||||
(new (make-rod (* 2 (length old)))))
|
||||
(replace new old)
|
||||
(setf (ystream-in-buffer ystream) new)))
|
||||
|
||||
(defmethod close-ystream ((ystream rod-ystream))
|
||||
(subseq (ystream-in-buffer ystream) 0 (ystream-in-ptr ystream)))
|
||||
|
||||
|
||||
;;;; CHARACTER-STREAM-YSTREAM
|
||||
|
||||
#+rune-is-character
|
||||
(progn
|
||||
(defstruct (character-stream-ystream
|
||||
(:constructor make-character-stream-ystream (target-stream))
|
||||
(:include ystream)
|
||||
(:conc-name "YSTREAM-"))
|
||||
(target-stream nil))
|
||||
|
||||
(defmethod flush-ystream ((ystream rod-ystream))
|
||||
(write-string (ystream-in-buffer ystream) (ystream-target-stream ystream))
|
||||
(setf (ystream-in-ptr ystream) 0))
|
||||
|
||||
(defmethod close-ystream ((ystream rod-ystream))
|
||||
(ystream-target-stream ystream)))
|
||||
|
||||
|
||||
;;;; OCTET-VECTOR-YSTREAM
|
||||
|
||||
(defstruct (octet-vector-ystream
|
||||
(:include utf8-ystream)
|
||||
(:conc-name "YSTREAM-"))
|
||||
(result (make-buffer)))
|
||||
|
||||
(defmethod ystream-device-write ((ystream octet-vector-ystream) buf nbytes)
|
||||
(let* ((result (ystream-result ystream))
|
||||
(start (length result))
|
||||
(size (array-dimension result 0)))
|
||||
(while (> (+ start nbytes) size)
|
||||
(setf size (* 2 size)))
|
||||
(adjust-array result size :fill-pointer (+ start nbytes))
|
||||
(replace result buf :start1 start :end2 nbytes)))
|
||||
|
||||
(defmethod close-ystream ((ystream octet-vector-ystream))
|
||||
(ystream-result ystream))
|
||||
|
||||
|
||||
;;;; OCTET-STREAM-YSTREAM
|
||||
|
||||
(defstruct (octet-stream-ystream
|
||||
(:include %stream-ystream)
|
||||
(:constructor make-octet-stream-ystream (os-stream))
|
||||
(:conc-name "YSTREAM-")))
|
||||
|
||||
(defmethod ystream-device-write ((ystream octet-stream-ystream) buf nbytes)
|
||||
(write-sequence buf (ystream-os-stream ystream) :end nbytes))
|
||||
|
||||
|
||||
;;;; CHARACTER-STREAM-YSTREAM/UTF8
|
||||
|
||||
#+rune-is-integer
|
||||
(progn
|
||||
(defstruct (character-stream-ystream/utf8
|
||||
(:include %stream-ystream)
|
||||
(:conc-name "YSTREAM-")))
|
||||
|
||||
(defmethod ystream-device-write
|
||||
((ystream character-stream-ystream/utf8) buf nbytes)
|
||||
(declare (type (simple-array (unsigned-byte 8) (*)) buf))
|
||||
(let ((out (ystream-os-stream ystream)))
|
||||
(dotimes (x nbytes)
|
||||
(write-char (code-char (elt buf x)) out)))))
|
||||
|
||||
|
||||
;;;; STRING-YSTREAM/UTF8
|
||||
|
||||
#+rune-is-integer
|
||||
(progn
|
||||
(defstruct (string-ystream/utf8
|
||||
(:include character-stream-ystream/utf8
|
||||
(os-stream (make-string-output-stream)))
|
||||
(:conc-name "YSTREAM-")))
|
||||
|
||||
(defmethod close-ystream ((ystream string-ystream/utf8))
|
||||
(get-output-stream-string (ystream-os-stream ystream))))
|
||||
@ -680,7 +680,7 @@
|
||||
(dom:get-attribute member "href"))))
|
||||
(unless (or (runes:rod= (dom:tag-name member) #"metadata")
|
||||
(member href *bad-tests* :test 'equal))
|
||||
(format t "~&~D/~D ~A~%" i #+nil n 808 href)
|
||||
(format t "~&~D/~D ~A~%" i n href)
|
||||
(let ((lisp (slurp-test
|
||||
(merge-pathnames href test-directory))))
|
||||
(when verbose
|
||||
|
||||
@ -54,9 +54,10 @@
|
||||
(merge-pathnames output sub-directory)))))
|
||||
|
||||
(defun serialize-document (document)
|
||||
(map 'vector #'char-code
|
||||
(with-output-to-string (s)
|
||||
(cxml:unparse-document document s :canonical 2))))
|
||||
(dom:map-document (cxml:make-octet-vector-sink :canonical 2)
|
||||
document
|
||||
:include-doctype :canonical-notations
|
||||
:include-default-values t))
|
||||
|
||||
(defun file-contents (pathname)
|
||||
(with-open-file (s pathname :element-type '(unsigned-byte 8))
|
||||
|
||||
@ -38,11 +38,13 @@
|
||||
;; #:parse-string
|
||||
#:parse-octets
|
||||
|
||||
#:make-character-stream-sink
|
||||
#:make-octet-vector-sink
|
||||
#:make-octet-stream-sink
|
||||
#:unparse-document
|
||||
#:unparse-document-to-octets
|
||||
#:make-rod-sink
|
||||
#+rune-is-character #:make-string-sink
|
||||
#+rune-is-character #:make-character-stream-sink
|
||||
#-rune-is-character #:make-string-sink/utf8
|
||||
#-rune-is-character #:make-character-stream-sink/utf8
|
||||
|
||||
#:with-xml-output
|
||||
#:with-element
|
||||
|
||||
452
xml/unparse.lisp
452
xml/unparse.lisp
@ -67,11 +67,10 @@
|
||||
;; -- James Clark (jjc@jclark.com)
|
||||
|
||||
|
||||
;;;; SINK: a rune output "stream"
|
||||
;;;; SINK: an xml output sink
|
||||
|
||||
(defclass sink ()
|
||||
((high-surrogate :initform nil)
|
||||
(column :initform 0 :accessor column)
|
||||
((ystream :initarg :ystream :accessor sink-ystream)
|
||||
(width :initform 79 :initarg :width :accessor width)
|
||||
(canonical :initform t :initarg :canonical :accessor canonical)
|
||||
(indentation :initform nil :initarg :indentation :accessor indentation)
|
||||
@ -90,77 +89,49 @@
|
||||
(when (and (canonical instance) (indentation instance))
|
||||
(error "Cannot indent XML in canonical mode")))
|
||||
|
||||
;; WRITE-OCTET als generisch zu machen ist vielleicht nicht die schnellste
|
||||
;; Loesung, aber die einfachste.
|
||||
(defgeneric write-octet (octet sink))
|
||||
|
||||
(defun make-buffer (&key (element-type '(unsigned-byte 8)))
|
||||
(make-array 1
|
||||
:element-type element-type
|
||||
:adjustable t
|
||||
:fill-pointer 0))
|
||||
|
||||
(defmethod write-octet :after (octet sink)
|
||||
(with-slots (column) sink
|
||||
(setf column (if (eql octet 10) 0 (1+ column)))))
|
||||
;; total haesslich, aber die ystreams will ich im moment eigentlich nicht
|
||||
;; dokumentieren
|
||||
(macrolet ((define-maker (make-sink make-ystream &rest args)
|
||||
`(defun ,make-sink (,@args &rest initargs)
|
||||
(apply #'make-instance
|
||||
'sink
|
||||
:ystream (,make-ystream ,@args)
|
||||
initargs))))
|
||||
(define-maker make-octet-vector-sink make-octet-vector-ystream)
|
||||
(define-maker make-octet-stream-sink make-octet-stream-ystream stream)
|
||||
(define-maker make-rod-sink make-rod-ystream)
|
||||
|
||||
#+rune-is-character
|
||||
(define-maker make-character-stream-sink make-character-ystream stream)
|
||||
|
||||
#-rune-is-character
|
||||
(define-maker make-string-sink/utf8 make-string-ystream/utf8)
|
||||
|
||||
#-rune-is-character
|
||||
(define-maker make-character-stream-sink/utf8
|
||||
make-character-stream-ystream/utf8
|
||||
stream))
|
||||
|
||||
#+rune-is-character
|
||||
(defun make-string-sink (&rest args) (apply #'make-rod-sink args))
|
||||
|
||||
|
||||
;; vector (octet) sinks
|
||||
|
||||
(defclass vector-sink (sink)
|
||||
((target-vector :initform (make-buffer))))
|
||||
|
||||
(defun make-octet-vector-sink (&rest initargs)
|
||||
(apply #'make-instance 'vector-sink initargs))
|
||||
|
||||
(defmethod write-octet (octet (sink vector-sink))
|
||||
(let ((target-vector (slot-value sink 'target-vector)))
|
||||
(vector-push-extend octet target-vector (length target-vector))))
|
||||
|
||||
(defmethod sax:end-document ((sink vector-sink))
|
||||
(slot-value sink 'target-vector))
|
||||
|
||||
|
||||
;; character stream sinks
|
||||
|
||||
(defclass character-stream-sink (sink)
|
||||
((target-stream :initarg :target-stream)))
|
||||
|
||||
(defun make-character-stream-sink (character-stream &rest initargs)
|
||||
(apply #'make-instance 'character-stream-sink
|
||||
:target-stream character-stream
|
||||
initargs))
|
||||
|
||||
(defmethod write-octet (octet (sink character-stream-sink))
|
||||
(write-char (code-char octet) (slot-value sink 'target-stream)))
|
||||
|
||||
(defmethod sax:end-document ((sink character-stream-sink))
|
||||
(slot-value sink 'target-stream))
|
||||
|
||||
|
||||
;; octet stream sinks
|
||||
|
||||
(defclass octet-stream-sink (sink)
|
||||
((target-stream :initarg :target-stream)))
|
||||
|
||||
(defun make-octet-stream-sink (octet-stream &rest initargs)
|
||||
(apply #'make-instance 'octet-stream-sink
|
||||
:target-stream octet-stream
|
||||
initargs))
|
||||
|
||||
(defmethod write-octet (octet (sink octet-stream-sink))
|
||||
(write-byte octet (slot-value sink 'target-stream)))
|
||||
|
||||
(defmethod sax:end-document ((sink octet-stream-sink))
|
||||
(slot-value sink 'target-stream))
|
||||
(defmethod sax:end-document ((sink sink))
|
||||
(close-ystream (sink-ystream sink)))
|
||||
|
||||
|
||||
;;;; doctype and notations
|
||||
|
||||
(defmethod sax:start-document ((sink sink))
|
||||
(unless (canonical sink)
|
||||
(write-rod #"<?xml version=\"1.0\" encoding=\"UTF-8\"?>" sink)
|
||||
(write-rune #/U+000A sink)))
|
||||
(%write-rod #"<?xml version=\"1.0\" encoding=\"UTF-8\"?>" sink)
|
||||
(%write-rune #/U+000A sink)))
|
||||
|
||||
(defmethod sax:start-dtd ((sink sink) name public-id system-id)
|
||||
(setf (name-for-dtd sink) name)
|
||||
@ -170,28 +141,28 @@
|
||||
(defun ensure-doctype (sink &optional public-id system-id)
|
||||
(unless (have-doctype sink)
|
||||
(setf (have-doctype sink) t)
|
||||
(write-rod #"<!DOCTYPE " sink)
|
||||
(write-rod (name-for-dtd sink) sink)
|
||||
(%write-rod #"<!DOCTYPE " sink)
|
||||
(%write-rod (name-for-dtd sink) sink)
|
||||
(cond
|
||||
(public-id
|
||||
(write-rod #" PUBLIC \"" sink)
|
||||
(%write-rod #" PUBLIC \"" sink)
|
||||
(unparse-string public-id sink)
|
||||
(write-rod #"\" \"" sink)
|
||||
(%write-rod #"\" \"" sink)
|
||||
(unparse-string system-id sink)
|
||||
(write-rod #"\"" sink))
|
||||
(%write-rod #"\"" sink))
|
||||
(system-id
|
||||
(write-rod #" SYSTEM \"" sink)
|
||||
(%write-rod #" SYSTEM \"" sink)
|
||||
(unparse-string public-id sink)
|
||||
(write-rod #"\"" sink)))))
|
||||
(%write-rod #"\"" sink)))))
|
||||
|
||||
(defmethod sax:start-internal-subset ((sink sink))
|
||||
(ensure-doctype sink)
|
||||
(write-rod #" [" sink)
|
||||
(write-rune #/U+000A sink))
|
||||
(%write-rod #" [" sink)
|
||||
(%write-rune #/U+000A sink))
|
||||
|
||||
(defmethod sax:end-internal-subset ((sink sink))
|
||||
(ensure-doctype sink)
|
||||
(write-rod #"]" sink))
|
||||
(%write-rod #"]" sink))
|
||||
|
||||
(defmethod sax:notation-declaration ((sink sink) name public-id system-id)
|
||||
(let ((prev (previous-notation sink)))
|
||||
@ -200,171 +171,171 @@
|
||||
(not (rod< prev name)))
|
||||
(error "misordered notations; cannot unparse canonically"))
|
||||
(setf (previous-notation sink) name))
|
||||
(write-rod #"<!NOTATION " sink)
|
||||
(write-rod name sink)
|
||||
(%write-rod #"<!NOTATION " sink)
|
||||
(%write-rod name sink)
|
||||
(cond
|
||||
((zerop (length public-id))
|
||||
(write-rod #" SYSTEM '" sink)
|
||||
(write-rod system-id sink)
|
||||
(write-rune #/' sink))
|
||||
(%write-rod #" SYSTEM '" sink)
|
||||
(%write-rod system-id sink)
|
||||
(%write-rune #/' sink))
|
||||
((zerop (length system-id))
|
||||
(write-rod #" PUBLIC '" sink)
|
||||
(write-rod public-id sink)
|
||||
(write-rune #/' sink))
|
||||
(%write-rod #" PUBLIC '" sink)
|
||||
(%write-rod public-id sink)
|
||||
(%write-rune #/' sink))
|
||||
(t
|
||||
(write-rod #" PUBLIC '" sink)
|
||||
(write-rod public-id sink)
|
||||
(write-rod #"' '" sink)
|
||||
(write-rod system-id sink)
|
||||
(write-rune #/' sink)))
|
||||
(write-rune #/> sink)
|
||||
(write-rune #/U+000A sink))
|
||||
(%write-rod #" PUBLIC '" sink)
|
||||
(%write-rod public-id sink)
|
||||
(%write-rod #"' '" sink)
|
||||
(%write-rod system-id sink)
|
||||
(%write-rune #/' sink)))
|
||||
(%write-rune #/> sink)
|
||||
(%write-rune #/U+000A sink))
|
||||
|
||||
(defmethod sax:unparsed-entity-declaration
|
||||
((sink sink) name public-id system-id notation-name)
|
||||
(unless (and (canonical sink) (< (canonical sink) 3))
|
||||
(write-rod #"<!ENTITY " sink)
|
||||
(write-rod name sink)
|
||||
(%write-rod #"<!ENTITY " sink)
|
||||
(%write-rod name sink)
|
||||
(cond
|
||||
((zerop (length public-id))
|
||||
(write-rod #" SYSTEM '" sink)
|
||||
(write-rod system-id sink)
|
||||
(write-rune #/' sink))
|
||||
(%write-rod #" SYSTEM '" sink)
|
||||
(%write-rod system-id sink)
|
||||
(%write-rune #/' sink))
|
||||
((zerop (length system-id))
|
||||
(write-rod #" PUBLIC '" sink)
|
||||
(write-rod public-id sink)
|
||||
(write-rune #/' sink))
|
||||
(%write-rod #" PUBLIC '" sink)
|
||||
(%write-rod public-id sink)
|
||||
(%write-rune #/' sink))
|
||||
(t
|
||||
(write-rod #" PUBLIC '" sink)
|
||||
(write-rod public-id sink)
|
||||
(write-rod #"' '" sink)
|
||||
(write-rod system-id sink)
|
||||
(write-rune #/' sink)))
|
||||
(write-rod #" NDATA " sink)
|
||||
(write-rod notation-name sink)
|
||||
(write-rune #/> sink)
|
||||
(write-rune #/U+000A sink)))
|
||||
(%write-rod #" PUBLIC '" sink)
|
||||
(%write-rod public-id sink)
|
||||
(%write-rod #"' '" sink)
|
||||
(%write-rod system-id sink)
|
||||
(%write-rune #/' sink)))
|
||||
(%write-rod #" NDATA " sink)
|
||||
(%write-rod notation-name sink)
|
||||
(%write-rune #/> sink)
|
||||
(%write-rune #/U+000A sink)))
|
||||
|
||||
(defmethod sax:external-entity-declaration
|
||||
((sink sink) kind name public-id system-id)
|
||||
(when (canonical sink)
|
||||
(error "cannot serialize parsed entities in canonical mode"))
|
||||
(write-rod #"<!ENTITY " sink)
|
||||
(%write-rod #"<!ENTITY " sink)
|
||||
(when (eq kind :parameter)
|
||||
(write-rod #" % " sink))
|
||||
(write-rod name sink)
|
||||
(%write-rod #" % " sink))
|
||||
(%write-rod name sink)
|
||||
(cond
|
||||
((zerop (length public-id))
|
||||
(write-rod #" SYSTEM '" sink)
|
||||
(write-rod system-id sink)
|
||||
(write-rune #/' sink))
|
||||
(%write-rod #" SYSTEM '" sink)
|
||||
(%write-rod system-id sink)
|
||||
(%write-rune #/' sink))
|
||||
((zerop (length system-id))
|
||||
(write-rod #" PUBLIC '" sink)
|
||||
(write-rod public-id sink)
|
||||
(write-rune #/' sink))
|
||||
(%write-rod #" PUBLIC '" sink)
|
||||
(%write-rod public-id sink)
|
||||
(%write-rune #/' sink))
|
||||
(t
|
||||
(write-rod #" PUBLIC '" sink)
|
||||
(write-rod public-id sink)
|
||||
(write-rod #"' '" sink)
|
||||
(write-rod system-id sink)
|
||||
(write-rune #/' sink)))
|
||||
(write-rune #/> sink)
|
||||
(write-rune #/U+000A sink))
|
||||
(%write-rod #" PUBLIC '" sink)
|
||||
(%write-rod public-id sink)
|
||||
(%write-rod #"' '" sink)
|
||||
(%write-rod system-id sink)
|
||||
(%write-rune #/' sink)))
|
||||
(%write-rune #/> sink)
|
||||
(%write-rune #/U+000A sink))
|
||||
|
||||
(defmethod sax:internal-entity-declaration ((sink sink) kind name value)
|
||||
(when (canonical sink)
|
||||
(error "cannot serialize parsed entities in canonical mode"))
|
||||
(write-rod #"<!ENTITY " sink)
|
||||
(%write-rod #"<!ENTITY " sink)
|
||||
(when (eq kind :parameter)
|
||||
(write-rod #" % " sink))
|
||||
(write-rod name sink)
|
||||
(write-rune #/U+0020 sink)
|
||||
(write-rune #/\" sink)
|
||||
(%write-rod #" % " sink))
|
||||
(%write-rod name sink)
|
||||
(%write-rune #/U+0020 sink)
|
||||
(%write-rune #/\" sink)
|
||||
(unparse-string value sink)
|
||||
(write-rune #/\" sink)
|
||||
(write-rune #/> sink)
|
||||
(write-rune #/U+000A sink))
|
||||
(%write-rune #/\" sink)
|
||||
(%write-rune #/> sink)
|
||||
(%write-rune #/U+000A sink))
|
||||
|
||||
(defmethod sax:element-declaration ((sink sink) name model)
|
||||
(when (canonical sink)
|
||||
(error "cannot serialize element type declarations in canonical mode"))
|
||||
(write-rod #"<!ELEMENT " sink)
|
||||
(write-rod name sink)
|
||||
(write-rune #/U+0020 sink)
|
||||
(%write-rod #"<!ELEMENT " sink)
|
||||
(%write-rod name sink)
|
||||
(%write-rune #/U+0020 sink)
|
||||
(labels ((walk (m)
|
||||
(cond
|
||||
((eq m :EMPTY)
|
||||
(write-rod "EMPTY" sink))
|
||||
(%write-rod "EMPTY" sink))
|
||||
((eq m :PCDATA)
|
||||
(write-rod "#PCDATA" sink))
|
||||
(%write-rod "#PCDATA" sink))
|
||||
((atom m)
|
||||
(unparse-string m sink))
|
||||
(t
|
||||
(ecase (car m)
|
||||
(and
|
||||
(write-rune #/\( sink)
|
||||
(%write-rune #/\( sink)
|
||||
(loop for (n . rest) on (cdr m) do
|
||||
(walk n)
|
||||
(when rest
|
||||
(write-rune #\, sink)))
|
||||
(write-rune #/\) sink))
|
||||
(%write-rune #\, sink)))
|
||||
(%write-rune #/\) sink))
|
||||
(or
|
||||
(write-rune #/\( sink)
|
||||
(%write-rune #/\( sink)
|
||||
(loop for (n . rest) on (cdr m) do
|
||||
(walk n)
|
||||
(when rest
|
||||
(write-rune #\| sink)))
|
||||
(write-rune #/\) sink))
|
||||
(%write-rune #\| sink)))
|
||||
(%write-rune #/\) sink))
|
||||
(*
|
||||
(walk (second m))
|
||||
(write-rod #/* sink))
|
||||
(%write-rod #/* sink))
|
||||
(+
|
||||
(walk (second m))
|
||||
(write-rod #/+ sink))
|
||||
(%write-rod #/+ sink))
|
||||
(?
|
||||
(walk (second m))
|
||||
(write-rod #/? sink)))))))
|
||||
(%write-rod #/? sink)))))))
|
||||
(walk model))
|
||||
(write-rune #/> sink)
|
||||
(write-rune #/U+000A sink))
|
||||
(%write-rune #/> sink)
|
||||
(%write-rune #/U+000A sink))
|
||||
|
||||
(defmethod sax:attribute-declaration ((sink sink) ename aname type default)
|
||||
(when (canonical sink)
|
||||
(error "cannot serialize attribute type declarations in canonical mode"))
|
||||
(write-rod #"<!ATTLIST " sink)
|
||||
(write-rod ename sink)
|
||||
(write-rune #/U+0020 sink)
|
||||
(write-rod aname sink)
|
||||
(write-rune #/U+0020 sink)
|
||||
(%write-rod #"<!ATTLIST " sink)
|
||||
(%write-rod ename sink)
|
||||
(%write-rune #/U+0020 sink)
|
||||
(%write-rod aname sink)
|
||||
(%write-rune #/U+0020 sink)
|
||||
(cond
|
||||
((atom type)
|
||||
(write-rod (rod (string-upcase (symbol-name type))) sink))
|
||||
(%write-rod (rod (string-upcase (symbol-name type))) sink))
|
||||
(t
|
||||
(when (eq :NOTATION (car type))
|
||||
(write-rod #"NOTATION " sink))
|
||||
(write-rune #/\( sink)
|
||||
(%write-rod #"NOTATION " sink))
|
||||
(%write-rune #/\( sink)
|
||||
(loop for (n . rest) on (cdr type) do
|
||||
(write-rod n sink)
|
||||
(%write-rod n sink)
|
||||
(when rest
|
||||
(write-rune #\| sink)))
|
||||
(write-rune #/\) sink)))
|
||||
(%write-rune #\| sink)))
|
||||
(%write-rune #/\) sink)))
|
||||
(cond
|
||||
((atom default)
|
||||
(write-rune #/# sink)
|
||||
(write-rod (rod (string-upcase (symbol-name default))) sink))
|
||||
(%write-rune #/# sink)
|
||||
(%write-rod (rod (string-upcase (symbol-name default))) sink))
|
||||
(t
|
||||
(when (eq :FIXED (car default))
|
||||
(write-rod #"#FIXED " sink))
|
||||
(write-rune #/\" sink)
|
||||
(%write-rod #"#FIXED " sink))
|
||||
(%write-rune #/\" sink)
|
||||
(unparse-string (second default) sink)
|
||||
(write-rune #/\" sink)))
|
||||
(write-rune #/> sink)
|
||||
(write-rune #/U+000A sink))
|
||||
(%write-rune #/\" sink)))
|
||||
(%write-rune #/> sink)
|
||||
(%write-rune #/U+000A sink))
|
||||
|
||||
(defmethod sax:end-dtd ((sink sink))
|
||||
(when (have-doctype sink)
|
||||
(write-rod #">" sink)
|
||||
(write-rune #/U+000A sink)))
|
||||
(%write-rod #">" sink)
|
||||
(%write-rune #/U+000A sink)))
|
||||
|
||||
|
||||
;;;; elements
|
||||
@ -375,15 +346,15 @@
|
||||
(have-gt nil))
|
||||
|
||||
(defun sink-fresh-line (sink)
|
||||
(unless (zerop (column sink))
|
||||
(write-rune-0 10 sink)
|
||||
(unless (zerop (ystream-column (sink-ystream sink)))
|
||||
(%write-rune 10 sink)
|
||||
(indent sink)))
|
||||
|
||||
(defun maybe-close-tag (sink)
|
||||
(let ((tag (car (stack sink))))
|
||||
(when (and (tag-p tag) (not (tag-have-gt tag)))
|
||||
(setf (tag-have-gt tag) t)
|
||||
(write-rune #/> sink))))
|
||||
(%write-rune #/> sink))))
|
||||
|
||||
(defmethod sax:start-element
|
||||
((sink sink) namespace-uri local-name qname attributes)
|
||||
@ -395,16 +366,16 @@
|
||||
(when (indentation sink)
|
||||
(sink-fresh-line sink)
|
||||
(start-indentation-block sink))
|
||||
(write-rune #/< sink)
|
||||
(write-rod qname sink)
|
||||
(%write-rune #/< sink)
|
||||
(%write-rod qname sink)
|
||||
(let ((atts (sort (copy-list attributes) #'rod< :key #'sax:attribute-qname)))
|
||||
(dolist (a atts)
|
||||
(write-rune #/space sink)
|
||||
(write-rod (sax:attribute-qname a) sink)
|
||||
(write-rune #/= sink)
|
||||
(write-rune #/\" sink)
|
||||
(map nil (lambda (c) (unparse-datachar c sink)) (sax:attribute-value a))
|
||||
(write-rune #/\" sink)))
|
||||
(%write-rune #/space sink)
|
||||
(%write-rod (sax:attribute-qname a) sink)
|
||||
(%write-rune #/= sink)
|
||||
(%write-rune #/\" sink)
|
||||
(unparse-string (sax:attribute-value a) sink)
|
||||
(%write-rune #/\" sink)))
|
||||
(when (canonical sink)
|
||||
(maybe-close-tag sink)))
|
||||
|
||||
@ -423,21 +394,21 @@
|
||||
(sink-fresh-line sink)))
|
||||
(cond
|
||||
((tag-have-gt tag)
|
||||
(write-rod '#.(string-rod "</") sink)
|
||||
(write-rod qname sink)
|
||||
(write-rod '#.(string-rod ">") sink))
|
||||
(%write-rod '#.(string-rod "</") sink)
|
||||
(%write-rod qname sink)
|
||||
(%write-rod '#.(string-rod ">") sink))
|
||||
(t
|
||||
(write-rod #"/>" sink)))))
|
||||
(%write-rod #"/>" sink)))))
|
||||
|
||||
(defmethod sax:processing-instruction ((sink sink) target data)
|
||||
(maybe-close-tag sink)
|
||||
(unless (rod-equal target '#.(string-rod "xml"))
|
||||
(write-rod '#.(string-rod "<?") sink)
|
||||
(write-rod target sink)
|
||||
(%write-rod '#.(string-rod "<?") sink)
|
||||
(%write-rod target sink)
|
||||
(when data
|
||||
(write-rune #/space sink)
|
||||
(write-rod data sink))
|
||||
(write-rod '#.(string-rod "?>") sink)))
|
||||
(%write-rune #/space sink)
|
||||
(%write-rod data sink))
|
||||
(%write-rod '#.(string-rod "?>") sink)))
|
||||
|
||||
(defmethod sax:start-cdata ((sink sink))
|
||||
(maybe-close-tag sink)
|
||||
@ -451,17 +422,17 @@
|
||||
(not (search #"]]" data)))
|
||||
(when (indentation sink)
|
||||
(sink-fresh-line sink))
|
||||
(write-rod #"<![CDATA[" sink)
|
||||
(%write-rod #"<![CDATA[" sink)
|
||||
;; XXX signal error if body is unprintable?
|
||||
(map nil (lambda (c) (write-rune c sink)) data)
|
||||
(write-rod #"]]>" sink))
|
||||
(map nil (lambda (c) (%write-rune c sink)) data)
|
||||
(%write-rod #"]]>" sink))
|
||||
(t
|
||||
(if (indentation sink)
|
||||
(unparse-indented-text data sink)
|
||||
(map nil (if (canonical sink)
|
||||
(lambda (c) (unparse-datachar c sink))
|
||||
(lambda (c) (unparse-datachar-readable c sink)))
|
||||
data)))))
|
||||
(let ((y (sink-ystream sink)))
|
||||
(if (canonical sink)
|
||||
(loop for c across data do (unparse-datachar c y))
|
||||
(loop for c across data do (unparse-datachar-readable c y))))))))
|
||||
|
||||
(defmethod sax:end-cdata ((sink sink))
|
||||
(unless (eq (pop (stack sink)) :cdata)
|
||||
@ -469,7 +440,7 @@
|
||||
|
||||
(defun indent (sink)
|
||||
(dotimes (x (current-indentation sink))
|
||||
(write-rune-0 32 sink)))
|
||||
(%write-rune 32 sink)))
|
||||
|
||||
(defun start-indentation-block (sink)
|
||||
(incf (current-indentation sink) (indentation sink)))
|
||||
@ -491,89 +462,47 @@
|
||||
(let* ((w (or (position-if #'whitespacep data :start (1+ pos)) n))
|
||||
(next (or (position-if-not #'whitespacep data :start w) n)))
|
||||
(when need-whitespace-p
|
||||
(if (< (+ (column sink) w (- pos)) (width sink))
|
||||
(write-rune-0 32 sink)
|
||||
(if (< (+ (ystream-column (sink-ystream sink)) w (- pos))
|
||||
(width sink))
|
||||
(%write-rune 32 sink)
|
||||
(sink-fresh-line sink)))
|
||||
(loop
|
||||
with y = (sink-ystream sink)
|
||||
for i from pos below w do
|
||||
(unparse-datachar-readable (elt data i) sink))
|
||||
(unparse-datachar-readable (elt data i) y))
|
||||
(setf need-whitespace-p (< w n))
|
||||
(setf pos next))))
|
||||
(t
|
||||
(write-rune-0 32 sink))))))
|
||||
(%write-rune 32 sink))))))
|
||||
|
||||
(defun unparse-string (str sink)
|
||||
(map nil (lambda (c) (unparse-datachar c sink)) str))
|
||||
(let ((y (sink-ystream sink)))
|
||||
(loop for rune across str do (unparse-datachar rune y))))
|
||||
|
||||
(defun unparse-datachar (c sink)
|
||||
(cond ((rune= c #/&) (write-rod '#.(string-rod "&") sink))
|
||||
((rune= c #/<) (write-rod '#.(string-rod "<") sink))
|
||||
((rune= c #/>) (write-rod '#.(string-rod ">") sink))
|
||||
((rune= c #/\") (write-rod '#.(string-rod """) sink))
|
||||
((rune= c #/U+0009) (write-rod '#.(string-rod "	") sink))
|
||||
((rune= c #/U+000A) (write-rod '#.(string-rod " ") sink))
|
||||
((rune= c #/U+000D) (write-rod '#.(string-rod " ") sink))
|
||||
(defun unparse-datachar (c ystream)
|
||||
(cond ((rune= c #/&) (write-rod '#.(string-rod "&") ystream))
|
||||
((rune= c #/<) (write-rod '#.(string-rod "<") ystream))
|
||||
((rune= c #/>) (write-rod '#.(string-rod ">") ystream))
|
||||
((rune= c #/\") (write-rod '#.(string-rod """) ystream))
|
||||
((rune= c #/U+0009) (write-rod '#.(string-rod "	") ystream))
|
||||
((rune= c #/U+000A) (write-rod '#.(string-rod " ") ystream))
|
||||
((rune= c #/U+000D) (write-rod '#.(string-rod " ") ystream))
|
||||
(t
|
||||
(write-rune c sink))))
|
||||
(write-rune c ystream))))
|
||||
|
||||
(defun unparse-datachar-readable (c sink)
|
||||
(cond ((rune= c #/&) (write-rod '#.(string-rod "&") sink))
|
||||
((rune= c #/<) (write-rod '#.(string-rod "<") sink))
|
||||
((rune= c #/>) (write-rod '#.(string-rod ">") sink))
|
||||
((rune= c #/\") (write-rod '#.(string-rod """) sink))
|
||||
(defun unparse-datachar-readable (c ystream)
|
||||
(cond ((rune= c #/&) (write-rod '#.(string-rod "&") ystream))
|
||||
((rune= c #/<) (write-rod '#.(string-rod "<") ystream))
|
||||
((rune= c #/>) (write-rod '#.(string-rod ">") ystream))
|
||||
((rune= c #/\") (write-rod '#.(string-rod """) ystream))
|
||||
(t
|
||||
(write-rune c sink))))
|
||||
(write-rune c ystream))))
|
||||
|
||||
(defun %write-rune (c sink)
|
||||
(write-rune c (sink-ystream sink)))
|
||||
|
||||
;;;; UTF-8 output for SINKs
|
||||
|
||||
(defun write-rod (rod sink)
|
||||
(map nil (lambda (c) (write-rune c sink)) rod))
|
||||
|
||||
(defun write-rune (rune sink)
|
||||
(let ((code (rune-code rune)))
|
||||
(with-slots (high-surrogate) sink
|
||||
(cond
|
||||
((<= #xD800 code #xDBFF)
|
||||
(setf high-surrogate code))
|
||||
((<= #xDC00 code #xDFFF)
|
||||
(let ((q (logior (ash (- high-surrogate #xD7C0) 10)
|
||||
(- code #xDC00))))
|
||||
(write-rune-0 q sink))
|
||||
(setf high-surrogate nil))
|
||||
(t
|
||||
(write-rune-0 code sink))))))
|
||||
|
||||
(defun write-rune-0 (code sink)
|
||||
(labels ((wr (x)
|
||||
(write-octet x sink)))
|
||||
(cond ((<= #x00000000 code #x0000007F)
|
||||
(wr code))
|
||||
((<= #x00000080 code #x000007FF)
|
||||
(wr (logior #b11000000 (ldb (byte 5 6) code)))
|
||||
(wr (logior #b10000000 (ldb (byte 6 0) code))))
|
||||
((<= #x00000800 code #x0000FFFF)
|
||||
(wr (logior #b11100000 (ldb (byte 4 12) code)))
|
||||
(wr (logior #b10000000 (ldb (byte 6 6) code)))
|
||||
(wr (logior #b10000000 (ldb (byte 6 0) code))))
|
||||
((<= #x00010000 code #x001FFFFF)
|
||||
(wr (logior #b11110000 (ldb (byte 3 18) code)))
|
||||
(wr (logior #b10000000 (ldb (byte 6 12) code)))
|
||||
(wr (logior #b10000000 (ldb (byte 6 6) code)))
|
||||
(wr (logior #b10000000 (ldb (byte 6 0) code))))
|
||||
((<= #x00200000 code #x03FFFFFF)
|
||||
(wr (logior #b11111000 (ldb (byte 2 24) code)))
|
||||
(wr (logior #b10000000 (ldb (byte 6 18) code)))
|
||||
(wr (logior #b10000000 (ldb (byte 6 12) code)))
|
||||
(wr (logior #b10000000 (ldb (byte 6 6) code)))
|
||||
(wr (logior #b10000000 (ldb (byte 6 0) code))))
|
||||
((<= #x04000000 code #x7FFFFFFF)
|
||||
(wr (logior #b11111100 (ldb (byte 1 30) code)))
|
||||
(wr (logior #b10000000 (ldb (byte 6 24) code)))
|
||||
(wr (logior #b10000000 (ldb (byte 6 18) code)))
|
||||
(wr (logior #b10000000 (ldb (byte 6 12) code)))
|
||||
(wr (logior #b10000000 (ldb (byte 6 6) code)))
|
||||
(wr (logior #b10000000 (ldb (byte 6 0) code)))))))
|
||||
(defun %write-rod (r sink)
|
||||
(write-rod r (sink-ystream sink)))
|
||||
|
||||
|
||||
;;;; convenience functions for DOMless XML serialization
|
||||
@ -632,8 +561,9 @@
|
||||
data)
|
||||
|
||||
(defun rod-to-utf8-string (rod)
|
||||
(with-output-to-string (s)
|
||||
(write-rod rod (cxml:make-character-stream-sink s))))
|
||||
(let ((out (make-buffer :element-type 'character)))
|
||||
(runes-to-utf8/adjustable-string out rod (length rod))
|
||||
out))
|
||||
|
||||
(defun utf8-string-to-rod (str)
|
||||
(let* ((bytes (map '(vector (unsigned-byte 8)) #'char-code str))
|
||||
|
||||
Reference in New Issue
Block a user