sink reorganization

This commit is contained in:
dlichteblau
2005-12-28 23:11:18 +00:00
parent a6a31873a8
commit b5bd89f643
15 changed files with 778 additions and 338 deletions

View File

@ -10,6 +10,49 @@
<p> <p>
<a href="README.html">CXML Homepage</a> <a href="README.html">CXML Homepage</a>
</p> </p>
<ul class="main">
<li>
<a href="doc/installation.html">Installing Closure XML</a>
<ul class="sub">
<li><a href="doc/installation.html#download"><b>Download</b></a></li>
<li><a href="doc/installation.html#implementations">Implementation-specific notes</a></li>
<li><a href="doc/installation.html#compilation">Compilation</a></li>
<li><a href="doc/installation.html#tests">Tests</a></li>
</ul>
</li>
<li>
<ul class="hack">
<li>
<a href="doc/using.html#quickstart"><b>Quick-Start Example</b></a>
</li>
</ul>
</li>
<li>
<a href="doc/using.html">SAX parser</a>
<ul class="sub">
<li><a href="doc/using.html#parser">Parsing and Validating</a></li>
<li><a href="doc/using.html#serialization">Serialization</a></li>
<li><a href="doc/using.html#misc">Miscellaneous SAX handlers</a></li>
<li><a href="doc/using.html#rods">Recoders</a></li>
<li><a href="doc/using.html#dtdcache">Caching of DTD Objects</a></li>
<li><a href="doc/using.html#catalogs">XML Catalogs</a></li>
<li><a href="doc/using.html#sax">SAX Interface</a></li>
</ul>
</li>
<li>
<a href="doc/using.html">DOM implementation</a>
<ul class="sub">
<li><a href="doc/dom.html#parser">Parsing with the DOM builder</a></li>
<li><a href="doc/dom.html#serialization">Serialization</a></li>
<li><a href="doc/dom.html#mapping">DOM/Lisp mapping</a></li>
</ul>
</li>
<li>
<ul class="hack">
<li><a href="doc/xmls-compat.html">XMLS Builder</a></li>
</ul>
</li>
</ul>
</div> </div>
<h1>Closure XML Parser</h1> <h1>Closure XML Parser</h1>
@ -67,6 +110,7 @@
<li>Implemented DOM 2 Core.</li> <li>Implemented DOM 2 Core.</li>
<li>Error handling overhaul.</li> <li>Error handling overhaul.</li>
<li>UTF-8 string support in DOM on Lisps without Unicode characters.</li> <li>UTF-8 string support in DOM on Lisps without Unicode characters.</li>
<li>Sink API has been changed.</li>
<li>Support internal subset serialization.</li> <li>Support internal subset serialization.</li>
<li>Gilbert Baumann has clarified the license as Lisp-LGPL.</li> <li>Gilbert Baumann has clarified the license as Lisp-LGPL.</li>
<li>Use trivial-gray-streams.</li> <li>Use trivial-gray-streams.</li>

View File

@ -51,7 +51,8 @@
(:file "encodings" :depends-on ("package")) (:file "encodings" :depends-on ("package"))
(:file "encodings-data" :depends-on ("package" "encodings")) (:file "encodings-data" :depends-on ("package" "encodings"))
(:file "xstream" (:file "xstream"
:depends-on ("package" "definline" "syntax" "encodings-data")))) :depends-on ("package" "definline" "syntax" "encodings-data"))
(:file "ystream" :depends-on (runes))))
(asdf:defsystem :cxml-xml (asdf:defsystem :cxml-xml
:default-component-class closure-source-file :default-component-class closure-source-file
@ -106,7 +107,6 @@
(utf8dom-file utf8-impl :pathname "dom-impl" :depends-on ("package")) (utf8dom-file utf8-impl :pathname "dom-impl" :depends-on ("package"))
#+rune-is-integer #+rune-is-integer
(utf8dom-file utf8-builder :pathname "dom-builder" :depends-on (utf8-impl)) (utf8dom-file utf8-builder :pathname "dom-builder" :depends-on (utf8-impl))
(:file "unparse" :depends-on ("package"))
(:file "dom-sax" :depends-on ("package"))) (:file "dom-sax" :depends-on ("package")))
:depends-on (:cxml-xml)) :depends-on (:cxml-xml))

View File

@ -7,6 +7,52 @@
</head> </head>
<body> <body>
<div class="sidebar"> <div class="sidebar">
<p>
<a href="../README.html">CXML Homepage</a>
</p>
<ul class="main">
<li>
<a href="installation.html">Installing Closure XML</a>
<ul class="sub">
<li><a href="installation.html#download"><b>Download</b></a></li>
<li><a href="installation.html#implementations">Implementation-specific notes</a></li>
<li><a href="installation.html#compilation">Compilation</a></li>
<li><a href="installation.html#tests">Tests</a></li>
</ul>
</li>
<li>
<ul class="hack">
<li>
<a href="using.html#quickstart"><b>Quick-Start Example</b></a>
</li>
</ul>
</li>
<li>
<a href="using.html">SAX parser</a>
<ul class="sub">
<li><a href="using.html#parser">Parsing and Validating</a></li>
<li><a href="using.html#serialization">Serialization</a></li>
<li><a href="using.html#misc">Miscellaneous SAX handlers</a></li>
<li><a href="using.html#rods">Recoders</a></li>
<li><a href="using.html#dtdcache">Caching of DTD Objects</a></li>
<li><a href="using.html#catalogs">XML Catalogs</a></li>
<li><a href="using.html#sax">SAX Interface</a></li>
</ul>
</li>
<li>
<a href="using.html">DOM implementation</a>
<ul class="sub">
<li><a href="dom.html#parser">Parsing with the DOM builder</a></li>
<li><a href="dom.html#serialization">Serialization</a></li>
<li><a href="dom.html#mapping">DOM/Lisp mapping</a></li>
</ul>
</li>
<li>
<ul class="hack">
<li><a href="xmls-compat.html">XMLS Builder</a></li>
</ul>
</li>
</ul>
</div> </div>
<h1>The DOM implementation</h1> <h1>The DOM implementation</h1>
@ -54,14 +100,9 @@
<a name="serialization"/> <a name="serialization"/>
<h3>Serializing DOM</h3> <h3>Serializing DOM</h3>
<p> <p>
The technique used to serialize a DOM document is to use a SAX To serialize a DOM document, use a SAX serialization sink as the
serialization sink as the argument to <tt>dom:map-document</tt>, argument to <tt>dom:map-document</tt>, which generates SAX events
which generates SAX events for the DOM tree. for the DOM tree.
</p>
<p>
In addition, there are convenience functions like
<tt>unparse-document</tt> as a thin wrapper around
<tt>map-document</tt>.
</p> </p>
<p> <p>
Applications dealing with namespaces might want to inject a Applications dealing with namespaces might want to inject a
@ -99,34 +140,6 @@
</li> </li>
</ul> </ul>
<p>
<div class="def">Function CXML:UNPARSE-DOCUMENT (document stream &rest keys)</div>
<div class="def">Function CXML:UNPARSE-DOCUMENT-TO-OCTETS (document &rest keys) => vector</div>
</p>
<p>
Serialize a DOM document object. These convenience functions are
wrappers around <tt>dom:map-document</tt>.
</p>
<p>Keyword arguments are passed on to the sink. C.f. <a
href="using.html#serialization">cxml:make-octet-vector-sink</a>.</p>
<p>Notes:</p>
<ul>
<li>
If keyword argument <tt>canonical</tt> is specified as 2, a
doctype declaration will be written that includes notations
declared in the document.
</li>
</ul>
<p>
<tt>unparse-document-to-octets</tt> returns an <tt>(unsigned-byte
8)</tt> array, whereas <tt>unparse-document</tt> writes
characters.&nbsp; <tt>unparse-document</tt> is useful together
with <tt>with-output-to-string</tt>.&nbsp; However, note that the
resulting document in both cases is UTF-8 encoded, so the
characters written by <tt>unparse-document</tt> are really UTF-8
bytes encoded as characters.
</p>
<a name="mapping"/> <a name="mapping"/>
<h3>DOM/Lisp mapping</h3> <h3>DOM/Lisp mapping</h3>
<p> <p>

View File

@ -7,6 +7,52 @@
</head> </head>
<body> <body>
<div class="sidebar"> <div class="sidebar">
<p>
<a href="../README.html">CXML Homepage</a>
</p>
<ul class="main">
<li>
<a href="installation.html">Installing Closure XML</a>
<ul class="sub">
<li><a href="installation.html#download"><b>Download</b></a></li>
<li><a href="installation.html#implementations">Implementation-specific notes</a></li>
<li><a href="installation.html#compilation">Compilation</a></li>
<li><a href="installation.html#tests">Tests</a></li>
</ul>
</li>
<li>
<ul class="hack">
<li>
<a href="using.html#quickstart"><b>Quick-Start Example</b></a>
</li>
</ul>
</li>
<li>
<a href="using.html">SAX parser</a>
<ul class="sub">
<li><a href="using.html#parser">Parsing and Validating</a></li>
<li><a href="using.html#serialization">Serialization</a></li>
<li><a href="using.html#misc">Miscellaneous SAX handlers</a></li>
<li><a href="using.html#rods">Recoders</a></li>
<li><a href="using.html#dtdcache">Caching of DTD Objects</a></li>
<li><a href="using.html#catalogs">XML Catalogs</a></li>
<li><a href="using.html#sax">SAX Interface</a></li>
</ul>
</li>
<li>
<a href="using.html">DOM implementation</a>
<ul class="sub">
<li><a href="dom.html#parser">Parsing with the DOM builder</a></li>
<li><a href="dom.html#serialization">Serialization</a></li>
<li><a href="dom.html#mapping">DOM/Lisp mapping</a></li>
</ul>
</li>
<li>
<ul class="hack">
<li><a href="xmls-compat.html">XMLS Builder</a></li>
</ul>
</li>
</ul>
</div> </div>
<h1>Installation of Closure XML</h1> <h1>Installation of Closure XML</h1>

View File

@ -83,10 +83,10 @@
* <b>(dom:get-attribute (dom:document-element *example*) "a")</b> * <b>(dom:get-attribute (dom:document-element *example*) "a")</b>
"b"</pre> "b"</pre>
<p>Serialize the DOM document back into a stream (<a <p>Serialize the DOM document back into a file (<a
href="using.html#serialization">read more</a>):</p> href="using.html#serialization">read more</a>):</p>
<pre><b>(cxml:unparse-document *example* *standard-output*)</b> <pre><b>(with-open-file (out "example.out" :direction :output :element-type '(unsigned-byte 8))
&lt;test a="b"&gt;&lt;child&gt;&lt;/child>&lt;/test></pre> (dom:map-document (cxml:make-octet-stream-sink out) *example*))</b></pre>
<p>As an alternative to DOM, parse into xmls-compatible list <p>As an alternative to DOM, parse into xmls-compatible list
structure (<a href="xmls-compat.html">read more</a>):</p> structure (<a href="xmls-compat.html">read more</a>):</p>

View File

@ -7,6 +7,52 @@
</head> </head>
<body> <body>
<div class="sidebar"> <div class="sidebar">
<p>
<a href="../README.html">CXML Homepage</a>
</p>
<ul class="main">
<li>
<a href="installation.html">Installing Closure XML</a>
<ul class="sub">
<li><a href="installation.html#download"><b>Download</b></a></li>
<li><a href="installation.html#implementations">Implementation-specific notes</a></li>
<li><a href="installation.html#compilation">Compilation</a></li>
<li><a href="installation.html#tests">Tests</a></li>
</ul>
</li>
<li>
<ul class="hack">
<li>
<a href="using.html#quickstart"><b>Quick-Start Example</b></a>
</li>
</ul>
</li>
<li>
<a href="using.html">SAX parser</a>
<ul class="sub">
<li><a href="using.html#parser">Parsing and Validating</a></li>
<li><a href="using.html#serialization">Serialization</a></li>
<li><a href="using.html#misc">Miscellaneous SAX handlers</a></li>
<li><a href="using.html#rods">Recoders</a></li>
<li><a href="using.html#dtdcache">Caching of DTD Objects</a></li>
<li><a href="using.html#catalogs">XML Catalogs</a></li>
<li><a href="using.html#sax">SAX Interface</a></li>
</ul>
</li>
<li>
<a href="using.html">DOM implementation</a>
<ul class="sub">
<li><a href="dom.html#parser">Parsing with the DOM builder</a></li>
<li><a href="dom.html#serialization">Serialization</a></li>
<li><a href="dom.html#mapping">DOM/Lisp mapping</a></li>
</ul>
</li>
<li>
<ul class="hack">
<li><a href="xmls-compat.html">XMLS Builder</a></li>
</ul>
</li>
</ul>
</div> </div>
<h1>Using the SAX parser</h1> <h1>Using the SAX parser</h1>
@ -137,12 +183,70 @@
<tt>with-xml-output</tt>, <tt>with-element</tt>, etc). <tt>with-xml-output</tt>, <tt>with-element</tt>, etc).
</p> </p>
<div style="background-color: #ddddff">
Portable sinks:<br/>
<span class="def">Function CXML:MAKE-OCTET-VECTOR-SINK (&rest keys) => sink</span><br/>
<span class="def">Function CXML:MAKE-OCTET-STREAM-SINK (stream &rest keys) => sink</span><br/>
<span class="def">Function CXML:MAKE-ROD-SINK (&rest keys) => sink</span><br/>
<br/>
Only on Lisps with Unicode support:<br/>
<span class="def">Function CXML:MAKE-STRING-SINK</span> -- alias for <tt>cxml:make-rod-sink</tt><br/>
<span class="def">Function CXML:MAKE-CHARACTER-STREAM-SINK (stream &rest keys) => sink</span><br/>
<br/>
Only on Lisps <em>without</em> Unicode support:<br/>
<span class="def">Function CXML:MAKE-STRING-SINK/UTF8 (&rest keys) => sink</span><br/>
<span class="def">Function CXML:MAKE-CHARACTER-STREAM-SINK/UTF8 (stream &rest keys) => sink</span><br/>
</div>
<p> <p>
<div class="def">Function CXML:MAKE-OCTET-STREAM-SINK (stream &rest keys) => sink</div>
<div class="def">Function CXML:MAKE-OCTET-VECTOR-SINK (&rest keys) => sink</div>
<div class="def">Function CXML:MAKE-CHARACTER-STREAM-SINK (stream &rest keys) => sink</div>
Return a SAX serialization handle. Return a SAX serialization handle.
</p> </p>
<ul>
<li>
The <tt>-octet-</tt> functions write the document encoded into
UTF-8.
<tt>make-octet-stream-sink</tt> works with Lisp streams of
element-type <tt>(unsigned-byte 8)</tt>.
<tt>make-octet-vector-sink</tt> returns a vector of
<tt>(unsigned-byte 8)</tt>.
</li>
<li>
<tt>make-character-stream-sink</tt> works with character
streams. It serializes the document into characters <em>without
encoding it into an external format</em>. When using these
functions, <em>take care to avoid encoding the result into
an incorrect external format</em>. (Note that characters undergo
external format conversion when written to a character stream.
If the document's XML declaration specifies an encoding, make
sure to specify this encoding as the external format if and when
writing the serialized document to a character stream. If the
document does not specify an encoding, either UTF-8 or UTF-16
must be used.) This function is available only on Lisps with
unicode support.
</li>
<li>
<tt>make-rod-sink</tt> serializes the document into a vector of
runes <em>without encoding it into an external format</em>.
(On Lisp with unicode support, the result will be a string;
otherwise, a vector of character codes will be returned.)
The warnings given for <tt>make-character-stream-sink</tt>
apply to this function as well.
</li>
<li>
The <tt>/utf8</tt> functions write the document encoded into
characters representing a UTF-8 encoding.
When using these functions, <em>take care to avoid encoding the
result</em> into an external format for a second time. (Note
that characters undergo external format conversion when written
to a character stream. Since these functions already perform
external format conversion, make sure to specify an external
format that does "nothing" if and when writing the serialized document
to a character stream. ISO-8859-1 external formats usually
achieve the desired effect.)
<tt>make-character-stream-sink/utf8</tt> works with character streams.
<tt>make-string-sink/utf8</tt> returns a string.
These functions are available only on Lisps without unicode support.
</li>
</ul>
<p>Keyword arguments:</p> <p>Keyword arguments:</p>
<ul> <ul>
<li> <li>
@ -170,6 +274,16 @@
<tt>NIL</tt>: Use a more readable non-canonical representation. <tt>NIL</tt>: Use a more readable non-canonical representation.
</li> </li>
</ul> </ul>
<p>
An internal subset will be included in the result regardless of
the <tt>canonical</tt> setting. It is the responsibility of the
caller to not report an internal subset for
canonical&nbsp;&lt;=&nbsp;1, or only notations as required for
canonical&nbsp;=&nbsp;2. For example, the
<tt>include-doctype</tt> argument to <tt>dom:map-document</tt>
should be set to <tt>nil</tt> for the former behaviour and
<tt>:canonical-notations</tt> for the latter.
</p>
<p> <p>
With an <tt>indentation</tt> level, pretty-print the XML by With an <tt>indentation</tt> level, pretty-print the XML by
inserting additional whitespace.&nbsp; Note that indentation inserting additional whitespace.&nbsp; Note that indentation

View File

@ -7,6 +7,52 @@
</head> </head>
<body> <body>
<div class="sidebar"> <div class="sidebar">
<p>
<a href="../README.html">CXML Homepage</a>
</p>
<ul class="main">
<li>
<a href="installation.html">Installing Closure XML</a>
<ul class="sub">
<li><a href="installation.html#download"><b>Download</b></a></li>
<li><a href="installation.html#implementations">Implementation-specific notes</a></li>
<li><a href="installation.html#compilation">Compilation</a></li>
<li><a href="installation.html#tests">Tests</a></li>
</ul>
</li>
<li>
<ul class="hack">
<li>
<a href="using.html#quickstart"><b>Quick-Start Example</b></a>
</li>
</ul>
</li>
<li>
<a href="using.html">SAX parser</a>
<ul class="sub">
<li><a href="using.html#parser">Parsing and Validating</a></li>
<li><a href="using.html#serialization">Serialization</a></li>
<li><a href="using.html#misc">Miscellaneous SAX handlers</a></li>
<li><a href="using.html#rods">Recoders</a></li>
<li><a href="using.html#dtdcache">Caching of DTD Objects</a></li>
<li><a href="using.html#catalogs">XML Catalogs</a></li>
<li><a href="using.html#sax">SAX Interface</a></li>
</ul>
</li>
<li>
<a href="using.html">DOM implementation</a>
<ul class="sub">
<li><a href="dom.html#parser">Parsing with the DOM builder</a></li>
<li><a href="dom.html#serialization">Serialization</a></li>
<li><a href="dom.html#mapping">DOM/Lisp mapping</a></li>
</ul>
</li>
<li>
<ul class="hack">
<li><a href="xmls-compat.html">XMLS Builder</a></li>
</ul>
</li>
</ul>
</div> </div>
<h1>XMLS Builder</h1> <h1>XMLS Builder</h1>

View File

@ -973,7 +973,7 @@
(rod-stream-buf stream))) (rod-stream-buf stream)))
(defmethod write-attribute-child ((node node) stream) (defmethod write-attribute-child ((node node) stream)
(write-rod (dom:node-value node) stream)) (put-rod (dom:node-value node) stream))
(defmethod write-attribute-child ((node entity-reference) stream) (defmethod write-attribute-child ((node entity-reference) stream)
(dovector (child (dom:child-nodes node)) (dovector (child (dom:child-nodes node))
@ -988,7 +988,7 @@
(buf nil) (buf nil)
(position 0)) (position 0))
(defun write-rod (rod rod-stream) (defun put-rod (rod rod-stream)
(let ((buf (rod-stream-buf rod-stream))) (let ((buf (rod-stream-buf rod-stream)))
(when buf (when buf
(move rod buf 0 (rod-stream-position rod-stream) (length rod))) (move rod buf 0 (rod-stream-position rod-stream) (length rod)))
@ -1210,10 +1210,12 @@
;; dass ein leeres internal subset nicht vorhanden ist und ;; dass ein leeres internal subset nicht vorhanden ist und
;; wir daher nil liefern sollen. bittesehr! ;; wir daher nil liefern sollen. bittesehr!
(dom::%internal-subset node)) (dom::%internal-subset node))
(with-output-to-string (stream) (let ((sink
(let ((sink (cxml:make-character-stream-sink stream))) #+rune-is-character (cxml:make-string-sink)
(dolist (def (dom::%internal-subset node)) #-rune-is-character (cxml:make-string-sink/utf8)))
(apply (car def) sink (cdr def))))) (dolist (def (dom::%internal-subset node))
(apply (car def) sink (cdr def)))
(sax:end-document sink))
nil)) nil))
;;; NOTATION -- nix ;;; NOTATION -- nix

View File

@ -1,19 +0,0 @@
(in-package :cxml)
(defun %unparse-document (sink doc canonical)
(dom:map-document sink
doc
:include-doctype (if (and canonical (>= canonical 2))
:canonical-notations
nil)
:include-default-values t))
(defun unparse-document-to-octets (doc &rest initargs &key canonical)
(%unparse-document (apply #'make-octet-vector-sink initargs)
doc
canonical))
(defun unparse-document (doc stream &rest initargs &key canonical)
(%unparse-document (apply #'make-character-stream-sink stream initargs)
doc
canonical))

View File

@ -59,7 +59,21 @@
#:xstream-plist #:xstream-plist
#:xstream-encoding #:xstream-encoding
#:set-to-full-speed #:set-to-full-speed
#:xstream-name)) #:xstream-name
;; ystream.lisp
#:ystream
#:close-ystream
#:write-rune
#:write-rod
#:ystream-column
#:make-octet-vector-ystream
#:make-octet-stream-ystream
#:make-rod-ystream
#+rune-is-character #:make-character-stream-ystream
#+rune-is-integer #:make-string-ystream/utf8
#+rune-is-integer #:make-character-stream-ystream/utf8
#:runes-to-utf8/adjustable-string))
(defpackage :utf8-runes (defpackage :utf8-runes
(:use :cl) (:use :cl)

247
runes/ystream.lisp Normal file
View File

@ -0,0 +1,247 @@
;;; (c) 2005 David Lichteblau <david@lichteblau.com>
;;; License: Lisp-LGPL (See file COPYING for details).
;;;
;;; ystream (for lack of a better name): a rune output "stream"
(in-package :runes)
(defconstant +ystream-bufsize+ 1024)
(defun make-ub8-array (n)
(make-array n :element-type '(unsigned-byte 8)))
(defun make-ub16-array (n)
(make-array n :element-type '(unsigned-byte 16)))
(defun make-buffer (&key (element-type '(unsigned-byte 8)))
(make-array 1
:element-type element-type
:adjustable t
:fill-pointer 0))
(defmacro while (test &body body)
`(until (not ,test) ,@body))
(defmacro until (test &body body)
`(do () (,test) ,@body))
;;; ystream
;;; +- utf8-ystream
;;; | +- octet-vector-ystream
;;; | \- %stream-ystream
;;; | +- octet-stream-ystream
;;; | \- character-stream-ystream/utf8
;;; | \- string-ystream/utf8
;;; +- rod-ystream
;;; \-- character-stream-ystream
(defstruct ystream
(column 0 :type integer)
(in-ptr 0 :type fixnum)
(in-buffer (make-rod +ystream-bufsize+) :type simple-rod))
(defstruct (utf8-ystream
(:include ystream)
(:conc-name "YSTREAM-"))
(out-buffer (make-ub8-array (* 6 +ystream-bufsize+))
:type (simple-array (unsigned-byte 8) (*))))
(defstruct (%stream-ystream (:include utf8-ystream) (:conc-name "YSTREAM-"))
(os-stream nil))
(definline write-rune (rune ystream)
(let ((in (ystream-in-buffer ystream)))
(when (eql (ystream-in-ptr ystream) (length in))
(flush-ystream ystream)
(setf in (ystream-in-buffer ystream)))
(setf (elt in (ystream-in-ptr ystream)) rune)
(incf (ystream-in-ptr ystream))
(setf (ystream-column ystream)
(if (eql rune #/U+0010) 0 (1+ (ystream-column ystream))))
rune))
(defmethod close-ystream :before ((ystream ystream))
(flush-ystream ystream))
;;;; UTF8-YSTREAM (abstract)
(defmethod close-ystream ((ystream %stream-ystream))
(ystream-os-stream ystream))
(defgeneric ystream-device-write (ystream buf nbytes))
(defmethod flush-ystream ((ystream utf8-ystream))
(let ((ptr (ystream-in-ptr ystream)))
(when (plusp ptr)
(let* ((in (ystream-in-buffer ystream))
(out (ystream-out-buffer ystream))
(surrogatep (<= #xD800 (elt in (1- ptr)) #xDBFF))
n)
(when surrogatep
(decf ptr))
(when (plusp ptr)
(setf n (runes-to-utf8 out in ptr))
(ystream-device-write ystream out n)
(cond
(surrogatep
(setf (elt in 0) (elt in (1- ptr)))
(setf (ystream-in-ptr ystream) 1))
(t
(setf (ystream-in-ptr ystream) 0))))))))
(defun write-rod (rod sink)
(loop for rune across rod do (write-rune rune sink)))
(defun fast-push (new-element vector)
(vector-push-extend new-element vector (max 1 (array-dimension vector 0))))
(macrolet ((define-utf8-writer (name (byte &rest aux) result &body body)
`(defun ,name (out in n)
(let ((high-surrogate nil)
,@aux)
(labels
((write0 (,byte)
,@body)
(write1 (r)
(cond
((<= #x00000000 r #x0000007F)
(write0 r))
((<= #x00000080 r #x000007FF)
(write0 (logior #b11000000 (ldb (byte 5 6) r)))
(write0 (logior #b10000000 (ldb (byte 6 0) r))))
((<= #x00000800 r #x0000FFFF)
(write0 (logior #b11100000 (ldb (byte 4 12) r)))
(write0 (logior #b10000000 (ldb (byte 6 6) r)))
(write0 (logior #b10000000 (ldb (byte 6 0) r))))
((<= #x00010000 r #x001FFFFF)
(write0 (logior #b11110000 (ldb (byte 3 18) r)))
(write0 (logior #b10000000 (ldb (byte 6 12) r)))
(write0 (logior #b10000000 (ldb (byte 6 6) r)))
(write0 (logior #b10000000 (ldb (byte 6 0) r))))
((<= #x00200000 r #x03FFFFFF)
(write0 (logior #b11111000 (ldb (byte 2 24) r)))
(write0 (logior #b10000000 (ldb (byte 6 18) r)))
(write0 (logior #b10000000 (ldb (byte 6 12) r)))
(write0 (logior #b10000000 (ldb (byte 6 6) r)))
(write0 (logior #b10000000 (ldb (byte 6 0) r))))
((<= #x04000000 r #x7FFFFFFF)
(write0 (logior #b11111100 (ldb (byte 1 30) r)))
(write0 (logior #b10000000 (ldb (byte 6 24) r)))
(write0 (logior #b10000000 (ldb (byte 6 18) r)))
(write0 (logior #b10000000 (ldb (byte 6 12) r)))
(write0 (logior #b10000000 (ldb (byte 6 6) r)))
(write0 (logior #b10000000 (ldb (byte 6 0) r))))))
(write2 (r)
(cond
((<= #xD800 r #xDBFF)
(setf high-surrogate r))
((<= #xDC00 r #xDFFF)
(let ((q (logior (ash (- high-surrogate #xD7C0) 10)
(- r #xDC00))))
(write1 q))
(setf high-surrogate nil))
(t
(write1 r)))))
(dotimes (j n)
(write2 (rune-code (elt in j)))))
,result))))
(define-utf8-writer runes-to-utf8 (x (i 0))
i
(setf (elt out i) x)
(incf i))
(define-utf8-writer runes-to-utf8/adjustable-string (x)
nil
(fast-push (code-char x) out)))
;;;; ROD-YSTREAM
(defstruct (rod-ystream (:include ystream)))
(defmethod flush-ystream ((ystream rod-ystream))
(let* ((old (ystream-in-buffer ystream))
(new (make-rod (* 2 (length old)))))
(replace new old)
(setf (ystream-in-buffer ystream) new)))
(defmethod close-ystream ((ystream rod-ystream))
(subseq (ystream-in-buffer ystream) 0 (ystream-in-ptr ystream)))
;;;; CHARACTER-STREAM-YSTREAM
#+rune-is-character
(progn
(defstruct (character-stream-ystream
(:constructor make-character-stream-ystream (target-stream))
(:include ystream)
(:conc-name "YSTREAM-"))
(target-stream nil))
(defmethod flush-ystream ((ystream rod-ystream))
(write-string (ystream-in-buffer ystream) (ystream-target-stream ystream))
(setf (ystream-in-ptr ystream) 0))
(defmethod close-ystream ((ystream rod-ystream))
(ystream-target-stream ystream)))
;;;; OCTET-VECTOR-YSTREAM
(defstruct (octet-vector-ystream
(:include utf8-ystream)
(:conc-name "YSTREAM-"))
(result (make-buffer)))
(defmethod ystream-device-write ((ystream octet-vector-ystream) buf nbytes)
(let* ((result (ystream-result ystream))
(start (length result))
(size (array-dimension result 0)))
(while (> (+ start nbytes) size)
(setf size (* 2 size)))
(adjust-array result size :fill-pointer (+ start nbytes))
(replace result buf :start1 start :end2 nbytes)))
(defmethod close-ystream ((ystream octet-vector-ystream))
(ystream-result ystream))
;;;; OCTET-STREAM-YSTREAM
(defstruct (octet-stream-ystream
(:include %stream-ystream)
(:constructor make-octet-stream-ystream (os-stream))
(:conc-name "YSTREAM-")))
(defmethod ystream-device-write ((ystream octet-stream-ystream) buf nbytes)
(write-sequence buf (ystream-os-stream ystream) :end nbytes))
;;;; CHARACTER-STREAM-YSTREAM/UTF8
#+rune-is-integer
(progn
(defstruct (character-stream-ystream/utf8
(:include %stream-ystream)
(:conc-name "YSTREAM-")))
(defmethod ystream-device-write
((ystream character-stream-ystream/utf8) buf nbytes)
(declare (type (simple-array (unsigned-byte 8) (*)) buf))
(let ((out (ystream-os-stream ystream)))
(dotimes (x nbytes)
(write-char (code-char (elt buf x)) out)))))
;;;; STRING-YSTREAM/UTF8
#+rune-is-integer
(progn
(defstruct (string-ystream/utf8
(:include character-stream-ystream/utf8
(os-stream (make-string-output-stream)))
(:conc-name "YSTREAM-")))
(defmethod close-ystream ((ystream string-ystream/utf8))
(get-output-stream-string (ystream-os-stream ystream))))

View File

@ -680,7 +680,7 @@
(dom:get-attribute member "href")))) (dom:get-attribute member "href"))))
(unless (or (runes:rod= (dom:tag-name member) #"metadata") (unless (or (runes:rod= (dom:tag-name member) #"metadata")
(member href *bad-tests* :test 'equal)) (member href *bad-tests* :test 'equal))
(format t "~&~D/~D ~A~%" i #+nil n 808 href) (format t "~&~D/~D ~A~%" i n href)
(let ((lisp (slurp-test (let ((lisp (slurp-test
(merge-pathnames href test-directory)))) (merge-pathnames href test-directory))))
(when verbose (when verbose

View File

@ -54,9 +54,10 @@
(merge-pathnames output sub-directory))))) (merge-pathnames output sub-directory)))))
(defun serialize-document (document) (defun serialize-document (document)
(map 'vector #'char-code (dom:map-document (cxml:make-octet-vector-sink :canonical 2)
(with-output-to-string (s) document
(cxml:unparse-document document s :canonical 2)))) :include-doctype :canonical-notations
:include-default-values t))
(defun file-contents (pathname) (defun file-contents (pathname)
(with-open-file (s pathname :element-type '(unsigned-byte 8)) (with-open-file (s pathname :element-type '(unsigned-byte 8))

View File

@ -38,11 +38,13 @@
;; #:parse-string ;; #:parse-string
#:parse-octets #:parse-octets
#:make-character-stream-sink
#:make-octet-vector-sink #:make-octet-vector-sink
#:make-octet-stream-sink #:make-octet-stream-sink
#:unparse-document #:make-rod-sink
#:unparse-document-to-octets #+rune-is-character #:make-string-sink
#+rune-is-character #:make-character-stream-sink
#-rune-is-character #:make-string-sink/utf8
#-rune-is-character #:make-character-stream-sink/utf8
#:with-xml-output #:with-xml-output
#:with-element #:with-element

View File

@ -67,11 +67,10 @@
;; -- James Clark (jjc@jclark.com) ;; -- James Clark (jjc@jclark.com)
;;;; SINK: a rune output "stream" ;;;; SINK: an xml output sink
(defclass sink () (defclass sink ()
((high-surrogate :initform nil) ((ystream :initarg :ystream :accessor sink-ystream)
(column :initform 0 :accessor column)
(width :initform 79 :initarg :width :accessor width) (width :initform 79 :initarg :width :accessor width)
(canonical :initform t :initarg :canonical :accessor canonical) (canonical :initform t :initarg :canonical :accessor canonical)
(indentation :initform nil :initarg :indentation :accessor indentation) (indentation :initform nil :initarg :indentation :accessor indentation)
@ -90,77 +89,49 @@
(when (and (canonical instance) (indentation instance)) (when (and (canonical instance) (indentation instance))
(error "Cannot indent XML in canonical mode"))) (error "Cannot indent XML in canonical mode")))
;; WRITE-OCTET als generisch zu machen ist vielleicht nicht die schnellste
;; Loesung, aber die einfachste.
(defgeneric write-octet (octet sink))
(defun make-buffer (&key (element-type '(unsigned-byte 8))) (defun make-buffer (&key (element-type '(unsigned-byte 8)))
(make-array 1 (make-array 1
:element-type element-type :element-type element-type
:adjustable t :adjustable t
:fill-pointer 0)) :fill-pointer 0))
(defmethod write-octet :after (octet sink) ;; total haesslich, aber die ystreams will ich im moment eigentlich nicht
(with-slots (column) sink ;; dokumentieren
(setf column (if (eql octet 10) 0 (1+ column))))) (macrolet ((define-maker (make-sink make-ystream &rest args)
`(defun ,make-sink (,@args &rest initargs)
(apply #'make-instance
'sink
:ystream (,make-ystream ,@args)
initargs))))
(define-maker make-octet-vector-sink make-octet-vector-ystream)
(define-maker make-octet-stream-sink make-octet-stream-ystream stream)
(define-maker make-rod-sink make-rod-ystream)
#+rune-is-character
(define-maker make-character-stream-sink make-character-ystream stream)
#-rune-is-character
(define-maker make-string-sink/utf8 make-string-ystream/utf8)
#-rune-is-character
(define-maker make-character-stream-sink/utf8
make-character-stream-ystream/utf8
stream))
#+rune-is-character
(defun make-string-sink (&rest args) (apply #'make-rod-sink args))
;; vector (octet) sinks (defmethod sax:end-document ((sink sink))
(close-ystream (sink-ystream sink)))
(defclass vector-sink (sink)
((target-vector :initform (make-buffer))))
(defun make-octet-vector-sink (&rest initargs)
(apply #'make-instance 'vector-sink initargs))
(defmethod write-octet (octet (sink vector-sink))
(let ((target-vector (slot-value sink 'target-vector)))
(vector-push-extend octet target-vector (length target-vector))))
(defmethod sax:end-document ((sink vector-sink))
(slot-value sink 'target-vector))
;; character stream sinks
(defclass character-stream-sink (sink)
((target-stream :initarg :target-stream)))
(defun make-character-stream-sink (character-stream &rest initargs)
(apply #'make-instance 'character-stream-sink
:target-stream character-stream
initargs))
(defmethod write-octet (octet (sink character-stream-sink))
(write-char (code-char octet) (slot-value sink 'target-stream)))
(defmethod sax:end-document ((sink character-stream-sink))
(slot-value sink 'target-stream))
;; octet stream sinks
(defclass octet-stream-sink (sink)
((target-stream :initarg :target-stream)))
(defun make-octet-stream-sink (octet-stream &rest initargs)
(apply #'make-instance 'octet-stream-sink
:target-stream octet-stream
initargs))
(defmethod write-octet (octet (sink octet-stream-sink))
(write-byte octet (slot-value sink 'target-stream)))
(defmethod sax:end-document ((sink octet-stream-sink))
(slot-value sink 'target-stream))
;;;; doctype and notations ;;;; doctype and notations
(defmethod sax:start-document ((sink sink)) (defmethod sax:start-document ((sink sink))
(unless (canonical sink) (unless (canonical sink)
(write-rod #"<?xml version=\"1.0\" encoding=\"UTF-8\"?>" sink) (%write-rod #"<?xml version=\"1.0\" encoding=\"UTF-8\"?>" sink)
(write-rune #/U+000A sink))) (%write-rune #/U+000A sink)))
(defmethod sax:start-dtd ((sink sink) name public-id system-id) (defmethod sax:start-dtd ((sink sink) name public-id system-id)
(setf (name-for-dtd sink) name) (setf (name-for-dtd sink) name)
@ -170,28 +141,28 @@
(defun ensure-doctype (sink &optional public-id system-id) (defun ensure-doctype (sink &optional public-id system-id)
(unless (have-doctype sink) (unless (have-doctype sink)
(setf (have-doctype sink) t) (setf (have-doctype sink) t)
(write-rod #"<!DOCTYPE " sink) (%write-rod #"<!DOCTYPE " sink)
(write-rod (name-for-dtd sink) sink) (%write-rod (name-for-dtd sink) sink)
(cond (cond
(public-id (public-id
(write-rod #" PUBLIC \"" sink) (%write-rod #" PUBLIC \"" sink)
(unparse-string public-id sink) (unparse-string public-id sink)
(write-rod #"\" \"" sink) (%write-rod #"\" \"" sink)
(unparse-string system-id sink) (unparse-string system-id sink)
(write-rod #"\"" sink)) (%write-rod #"\"" sink))
(system-id (system-id
(write-rod #" SYSTEM \"" sink) (%write-rod #" SYSTEM \"" sink)
(unparse-string public-id sink) (unparse-string public-id sink)
(write-rod #"\"" sink))))) (%write-rod #"\"" sink)))))
(defmethod sax:start-internal-subset ((sink sink)) (defmethod sax:start-internal-subset ((sink sink))
(ensure-doctype sink) (ensure-doctype sink)
(write-rod #" [" sink) (%write-rod #" [" sink)
(write-rune #/U+000A sink)) (%write-rune #/U+000A sink))
(defmethod sax:end-internal-subset ((sink sink)) (defmethod sax:end-internal-subset ((sink sink))
(ensure-doctype sink) (ensure-doctype sink)
(write-rod #"]" sink)) (%write-rod #"]" sink))
(defmethod sax:notation-declaration ((sink sink) name public-id system-id) (defmethod sax:notation-declaration ((sink sink) name public-id system-id)
(let ((prev (previous-notation sink))) (let ((prev (previous-notation sink)))
@ -200,171 +171,171 @@
(not (rod< prev name))) (not (rod< prev name)))
(error "misordered notations; cannot unparse canonically")) (error "misordered notations; cannot unparse canonically"))
(setf (previous-notation sink) name)) (setf (previous-notation sink) name))
(write-rod #"<!NOTATION " sink) (%write-rod #"<!NOTATION " sink)
(write-rod name sink) (%write-rod name sink)
(cond (cond
((zerop (length public-id)) ((zerop (length public-id))
(write-rod #" SYSTEM '" sink) (%write-rod #" SYSTEM '" sink)
(write-rod system-id sink) (%write-rod system-id sink)
(write-rune #/' sink)) (%write-rune #/' sink))
((zerop (length system-id)) ((zerop (length system-id))
(write-rod #" PUBLIC '" sink) (%write-rod #" PUBLIC '" sink)
(write-rod public-id sink) (%write-rod public-id sink)
(write-rune #/' sink)) (%write-rune #/' sink))
(t (t
(write-rod #" PUBLIC '" sink) (%write-rod #" PUBLIC '" sink)
(write-rod public-id sink) (%write-rod public-id sink)
(write-rod #"' '" sink) (%write-rod #"' '" sink)
(write-rod system-id sink) (%write-rod system-id sink)
(write-rune #/' sink))) (%write-rune #/' sink)))
(write-rune #/> sink) (%write-rune #/> sink)
(write-rune #/U+000A sink)) (%write-rune #/U+000A sink))
(defmethod sax:unparsed-entity-declaration (defmethod sax:unparsed-entity-declaration
((sink sink) name public-id system-id notation-name) ((sink sink) name public-id system-id notation-name)
(unless (and (canonical sink) (< (canonical sink) 3)) (unless (and (canonical sink) (< (canonical sink) 3))
(write-rod #"<!ENTITY " sink) (%write-rod #"<!ENTITY " sink)
(write-rod name sink) (%write-rod name sink)
(cond (cond
((zerop (length public-id)) ((zerop (length public-id))
(write-rod #" SYSTEM '" sink) (%write-rod #" SYSTEM '" sink)
(write-rod system-id sink) (%write-rod system-id sink)
(write-rune #/' sink)) (%write-rune #/' sink))
((zerop (length system-id)) ((zerop (length system-id))
(write-rod #" PUBLIC '" sink) (%write-rod #" PUBLIC '" sink)
(write-rod public-id sink) (%write-rod public-id sink)
(write-rune #/' sink)) (%write-rune #/' sink))
(t (t
(write-rod #" PUBLIC '" sink) (%write-rod #" PUBLIC '" sink)
(write-rod public-id sink) (%write-rod public-id sink)
(write-rod #"' '" sink) (%write-rod #"' '" sink)
(write-rod system-id sink) (%write-rod system-id sink)
(write-rune #/' sink))) (%write-rune #/' sink)))
(write-rod #" NDATA " sink) (%write-rod #" NDATA " sink)
(write-rod notation-name sink) (%write-rod notation-name sink)
(write-rune #/> sink) (%write-rune #/> sink)
(write-rune #/U+000A sink))) (%write-rune #/U+000A sink)))
(defmethod sax:external-entity-declaration (defmethod sax:external-entity-declaration
((sink sink) kind name public-id system-id) ((sink sink) kind name public-id system-id)
(when (canonical sink) (when (canonical sink)
(error "cannot serialize parsed entities in canonical mode")) (error "cannot serialize parsed entities in canonical mode"))
(write-rod #"<!ENTITY " sink) (%write-rod #"<!ENTITY " sink)
(when (eq kind :parameter) (when (eq kind :parameter)
(write-rod #" % " sink)) (%write-rod #" % " sink))
(write-rod name sink) (%write-rod name sink)
(cond (cond
((zerop (length public-id)) ((zerop (length public-id))
(write-rod #" SYSTEM '" sink) (%write-rod #" SYSTEM '" sink)
(write-rod system-id sink) (%write-rod system-id sink)
(write-rune #/' sink)) (%write-rune #/' sink))
((zerop (length system-id)) ((zerop (length system-id))
(write-rod #" PUBLIC '" sink) (%write-rod #" PUBLIC '" sink)
(write-rod public-id sink) (%write-rod public-id sink)
(write-rune #/' sink)) (%write-rune #/' sink))
(t (t
(write-rod #" PUBLIC '" sink) (%write-rod #" PUBLIC '" sink)
(write-rod public-id sink) (%write-rod public-id sink)
(write-rod #"' '" sink) (%write-rod #"' '" sink)
(write-rod system-id sink) (%write-rod system-id sink)
(write-rune #/' sink))) (%write-rune #/' sink)))
(write-rune #/> sink) (%write-rune #/> sink)
(write-rune #/U+000A sink)) (%write-rune #/U+000A sink))
(defmethod sax:internal-entity-declaration ((sink sink) kind name value) (defmethod sax:internal-entity-declaration ((sink sink) kind name value)
(when (canonical sink) (when (canonical sink)
(error "cannot serialize parsed entities in canonical mode")) (error "cannot serialize parsed entities in canonical mode"))
(write-rod #"<!ENTITY " sink) (%write-rod #"<!ENTITY " sink)
(when (eq kind :parameter) (when (eq kind :parameter)
(write-rod #" % " sink)) (%write-rod #" % " sink))
(write-rod name sink) (%write-rod name sink)
(write-rune #/U+0020 sink) (%write-rune #/U+0020 sink)
(write-rune #/\" sink) (%write-rune #/\" sink)
(unparse-string value sink) (unparse-string value sink)
(write-rune #/\" sink) (%write-rune #/\" sink)
(write-rune #/> sink) (%write-rune #/> sink)
(write-rune #/U+000A sink)) (%write-rune #/U+000A sink))
(defmethod sax:element-declaration ((sink sink) name model) (defmethod sax:element-declaration ((sink sink) name model)
(when (canonical sink) (when (canonical sink)
(error "cannot serialize element type declarations in canonical mode")) (error "cannot serialize element type declarations in canonical mode"))
(write-rod #"<!ELEMENT " sink) (%write-rod #"<!ELEMENT " sink)
(write-rod name sink) (%write-rod name sink)
(write-rune #/U+0020 sink) (%write-rune #/U+0020 sink)
(labels ((walk (m) (labels ((walk (m)
(cond (cond
((eq m :EMPTY) ((eq m :EMPTY)
(write-rod "EMPTY" sink)) (%write-rod "EMPTY" sink))
((eq m :PCDATA) ((eq m :PCDATA)
(write-rod "#PCDATA" sink)) (%write-rod "#PCDATA" sink))
((atom m) ((atom m)
(unparse-string m sink)) (unparse-string m sink))
(t (t
(ecase (car m) (ecase (car m)
(and (and
(write-rune #/\( sink) (%write-rune #/\( sink)
(loop for (n . rest) on (cdr m) do (loop for (n . rest) on (cdr m) do
(walk n) (walk n)
(when rest (when rest
(write-rune #\, sink))) (%write-rune #\, sink)))
(write-rune #/\) sink)) (%write-rune #/\) sink))
(or (or
(write-rune #/\( sink) (%write-rune #/\( sink)
(loop for (n . rest) on (cdr m) do (loop for (n . rest) on (cdr m) do
(walk n) (walk n)
(when rest (when rest
(write-rune #\| sink))) (%write-rune #\| sink)))
(write-rune #/\) sink)) (%write-rune #/\) sink))
(* (*
(walk (second m)) (walk (second m))
(write-rod #/* sink)) (%write-rod #/* sink))
(+ (+
(walk (second m)) (walk (second m))
(write-rod #/+ sink)) (%write-rod #/+ sink))
(? (?
(walk (second m)) (walk (second m))
(write-rod #/? sink))))))) (%write-rod #/? sink)))))))
(walk model)) (walk model))
(write-rune #/> sink) (%write-rune #/> sink)
(write-rune #/U+000A sink)) (%write-rune #/U+000A sink))
(defmethod sax:attribute-declaration ((sink sink) ename aname type default) (defmethod sax:attribute-declaration ((sink sink) ename aname type default)
(when (canonical sink) (when (canonical sink)
(error "cannot serialize attribute type declarations in canonical mode")) (error "cannot serialize attribute type declarations in canonical mode"))
(write-rod #"<!ATTLIST " sink) (%write-rod #"<!ATTLIST " sink)
(write-rod ename sink) (%write-rod ename sink)
(write-rune #/U+0020 sink) (%write-rune #/U+0020 sink)
(write-rod aname sink) (%write-rod aname sink)
(write-rune #/U+0020 sink) (%write-rune #/U+0020 sink)
(cond (cond
((atom type) ((atom type)
(write-rod (rod (string-upcase (symbol-name type))) sink)) (%write-rod (rod (string-upcase (symbol-name type))) sink))
(t (t
(when (eq :NOTATION (car type)) (when (eq :NOTATION (car type))
(write-rod #"NOTATION " sink)) (%write-rod #"NOTATION " sink))
(write-rune #/\( sink) (%write-rune #/\( sink)
(loop for (n . rest) on (cdr type) do (loop for (n . rest) on (cdr type) do
(write-rod n sink) (%write-rod n sink)
(when rest (when rest
(write-rune #\| sink))) (%write-rune #\| sink)))
(write-rune #/\) sink))) (%write-rune #/\) sink)))
(cond (cond
((atom default) ((atom default)
(write-rune #/# sink) (%write-rune #/# sink)
(write-rod (rod (string-upcase (symbol-name default))) sink)) (%write-rod (rod (string-upcase (symbol-name default))) sink))
(t (t
(when (eq :FIXED (car default)) (when (eq :FIXED (car default))
(write-rod #"#FIXED " sink)) (%write-rod #"#FIXED " sink))
(write-rune #/\" sink) (%write-rune #/\" sink)
(unparse-string (second default) sink) (unparse-string (second default) sink)
(write-rune #/\" sink))) (%write-rune #/\" sink)))
(write-rune #/> sink) (%write-rune #/> sink)
(write-rune #/U+000A sink)) (%write-rune #/U+000A sink))
(defmethod sax:end-dtd ((sink sink)) (defmethod sax:end-dtd ((sink sink))
(when (have-doctype sink) (when (have-doctype sink)
(write-rod #">" sink) (%write-rod #">" sink)
(write-rune #/U+000A sink))) (%write-rune #/U+000A sink)))
;;;; elements ;;;; elements
@ -375,15 +346,15 @@
(have-gt nil)) (have-gt nil))
(defun sink-fresh-line (sink) (defun sink-fresh-line (sink)
(unless (zerop (column sink)) (unless (zerop (ystream-column (sink-ystream sink)))
(write-rune-0 10 sink) (%write-rune 10 sink)
(indent sink))) (indent sink)))
(defun maybe-close-tag (sink) (defun maybe-close-tag (sink)
(let ((tag (car (stack sink)))) (let ((tag (car (stack sink))))
(when (and (tag-p tag) (not (tag-have-gt tag))) (when (and (tag-p tag) (not (tag-have-gt tag)))
(setf (tag-have-gt tag) t) (setf (tag-have-gt tag) t)
(write-rune #/> sink)))) (%write-rune #/> sink))))
(defmethod sax:start-element (defmethod sax:start-element
((sink sink) namespace-uri local-name qname attributes) ((sink sink) namespace-uri local-name qname attributes)
@ -395,16 +366,16 @@
(when (indentation sink) (when (indentation sink)
(sink-fresh-line sink) (sink-fresh-line sink)
(start-indentation-block sink)) (start-indentation-block sink))
(write-rune #/< sink) (%write-rune #/< sink)
(write-rod qname sink) (%write-rod qname sink)
(let ((atts (sort (copy-list attributes) #'rod< :key #'sax:attribute-qname))) (let ((atts (sort (copy-list attributes) #'rod< :key #'sax:attribute-qname)))
(dolist (a atts) (dolist (a atts)
(write-rune #/space sink) (%write-rune #/space sink)
(write-rod (sax:attribute-qname a) sink) (%write-rod (sax:attribute-qname a) sink)
(write-rune #/= sink) (%write-rune #/= sink)
(write-rune #/\" sink) (%write-rune #/\" sink)
(map nil (lambda (c) (unparse-datachar c sink)) (sax:attribute-value a)) (unparse-string (sax:attribute-value a) sink)
(write-rune #/\" sink))) (%write-rune #/\" sink)))
(when (canonical sink) (when (canonical sink)
(maybe-close-tag sink))) (maybe-close-tag sink)))
@ -423,21 +394,21 @@
(sink-fresh-line sink))) (sink-fresh-line sink)))
(cond (cond
((tag-have-gt tag) ((tag-have-gt tag)
(write-rod '#.(string-rod "</") sink) (%write-rod '#.(string-rod "</") sink)
(write-rod qname sink) (%write-rod qname sink)
(write-rod '#.(string-rod ">") sink)) (%write-rod '#.(string-rod ">") sink))
(t (t
(write-rod #"/>" sink))))) (%write-rod #"/>" sink)))))
(defmethod sax:processing-instruction ((sink sink) target data) (defmethod sax:processing-instruction ((sink sink) target data)
(maybe-close-tag sink) (maybe-close-tag sink)
(unless (rod-equal target '#.(string-rod "xml")) (unless (rod-equal target '#.(string-rod "xml"))
(write-rod '#.(string-rod "<?") sink) (%write-rod '#.(string-rod "<?") sink)
(write-rod target sink) (%write-rod target sink)
(when data (when data
(write-rune #/space sink) (%write-rune #/space sink)
(write-rod data sink)) (%write-rod data sink))
(write-rod '#.(string-rod "?>") sink))) (%write-rod '#.(string-rod "?>") sink)))
(defmethod sax:start-cdata ((sink sink)) (defmethod sax:start-cdata ((sink sink))
(maybe-close-tag sink) (maybe-close-tag sink)
@ -451,17 +422,17 @@
(not (search #"]]" data))) (not (search #"]]" data)))
(when (indentation sink) (when (indentation sink)
(sink-fresh-line sink)) (sink-fresh-line sink))
(write-rod #"<![CDATA[" sink) (%write-rod #"<![CDATA[" sink)
;; XXX signal error if body is unprintable? ;; XXX signal error if body is unprintable?
(map nil (lambda (c) (write-rune c sink)) data) (map nil (lambda (c) (%write-rune c sink)) data)
(write-rod #"]]>" sink)) (%write-rod #"]]>" sink))
(t (t
(if (indentation sink) (if (indentation sink)
(unparse-indented-text data sink) (unparse-indented-text data sink)
(map nil (if (canonical sink) (let ((y (sink-ystream sink)))
(lambda (c) (unparse-datachar c sink)) (if (canonical sink)
(lambda (c) (unparse-datachar-readable c sink))) (loop for c across data do (unparse-datachar c y))
data))))) (loop for c across data do (unparse-datachar-readable c y))))))))
(defmethod sax:end-cdata ((sink sink)) (defmethod sax:end-cdata ((sink sink))
(unless (eq (pop (stack sink)) :cdata) (unless (eq (pop (stack sink)) :cdata)
@ -469,7 +440,7 @@
(defun indent (sink) (defun indent (sink)
(dotimes (x (current-indentation sink)) (dotimes (x (current-indentation sink))
(write-rune-0 32 sink))) (%write-rune 32 sink)))
(defun start-indentation-block (sink) (defun start-indentation-block (sink)
(incf (current-indentation sink) (indentation sink))) (incf (current-indentation sink) (indentation sink)))
@ -491,89 +462,47 @@
(let* ((w (or (position-if #'whitespacep data :start (1+ pos)) n)) (let* ((w (or (position-if #'whitespacep data :start (1+ pos)) n))
(next (or (position-if-not #'whitespacep data :start w) n))) (next (or (position-if-not #'whitespacep data :start w) n)))
(when need-whitespace-p (when need-whitespace-p
(if (< (+ (column sink) w (- pos)) (width sink)) (if (< (+ (ystream-column (sink-ystream sink)) w (- pos))
(write-rune-0 32 sink) (width sink))
(%write-rune 32 sink)
(sink-fresh-line sink))) (sink-fresh-line sink)))
(loop (loop
with y = (sink-ystream sink)
for i from pos below w do for i from pos below w do
(unparse-datachar-readable (elt data i) sink)) (unparse-datachar-readable (elt data i) y))
(setf need-whitespace-p (< w n)) (setf need-whitespace-p (< w n))
(setf pos next)))) (setf pos next))))
(t (t
(write-rune-0 32 sink)))))) (%write-rune 32 sink))))))
(defun unparse-string (str sink) (defun unparse-string (str sink)
(map nil (lambda (c) (unparse-datachar c sink)) str)) (let ((y (sink-ystream sink)))
(loop for rune across str do (unparse-datachar rune y))))
(defun unparse-datachar (c sink) (defun unparse-datachar (c ystream)
(cond ((rune= c #/&) (write-rod '#.(string-rod "&amp;") sink)) (cond ((rune= c #/&) (write-rod '#.(string-rod "&amp;") ystream))
((rune= c #/<) (write-rod '#.(string-rod "&lt;") sink)) ((rune= c #/<) (write-rod '#.(string-rod "&lt;") ystream))
((rune= c #/>) (write-rod '#.(string-rod "&gt;") sink)) ((rune= c #/>) (write-rod '#.(string-rod "&gt;") ystream))
((rune= c #/\") (write-rod '#.(string-rod "&quot;") sink)) ((rune= c #/\") (write-rod '#.(string-rod "&quot;") ystream))
((rune= c #/U+0009) (write-rod '#.(string-rod "&#9;") sink)) ((rune= c #/U+0009) (write-rod '#.(string-rod "&#9;") ystream))
((rune= c #/U+000A) (write-rod '#.(string-rod "&#10;") sink)) ((rune= c #/U+000A) (write-rod '#.(string-rod "&#10;") ystream))
((rune= c #/U+000D) (write-rod '#.(string-rod "&#13;") sink)) ((rune= c #/U+000D) (write-rod '#.(string-rod "&#13;") ystream))
(t (t
(write-rune c sink)))) (write-rune c ystream))))
(defun unparse-datachar-readable (c sink) (defun unparse-datachar-readable (c ystream)
(cond ((rune= c #/&) (write-rod '#.(string-rod "&amp;") sink)) (cond ((rune= c #/&) (write-rod '#.(string-rod "&amp;") ystream))
((rune= c #/<) (write-rod '#.(string-rod "&lt;") sink)) ((rune= c #/<) (write-rod '#.(string-rod "&lt;") ystream))
((rune= c #/>) (write-rod '#.(string-rod "&gt;") sink)) ((rune= c #/>) (write-rod '#.(string-rod "&gt;") ystream))
((rune= c #/\") (write-rod '#.(string-rod "&quot;") sink)) ((rune= c #/\") (write-rod '#.(string-rod "&quot;") ystream))
(t (t
(write-rune c sink)))) (write-rune c ystream))))
(defun %write-rune (c sink)
(write-rune c (sink-ystream sink)))
;;;; UTF-8 output for SINKs (defun %write-rod (r sink)
(write-rod r (sink-ystream sink)))
(defun write-rod (rod sink)
(map nil (lambda (c) (write-rune c sink)) rod))
(defun write-rune (rune sink)
(let ((code (rune-code rune)))
(with-slots (high-surrogate) sink
(cond
((<= #xD800 code #xDBFF)
(setf high-surrogate code))
((<= #xDC00 code #xDFFF)
(let ((q (logior (ash (- high-surrogate #xD7C0) 10)
(- code #xDC00))))
(write-rune-0 q sink))
(setf high-surrogate nil))
(t
(write-rune-0 code sink))))))
(defun write-rune-0 (code sink)
(labels ((wr (x)
(write-octet x sink)))
(cond ((<= #x00000000 code #x0000007F)
(wr code))
((<= #x00000080 code #x000007FF)
(wr (logior #b11000000 (ldb (byte 5 6) code)))
(wr (logior #b10000000 (ldb (byte 6 0) code))))
((<= #x00000800 code #x0000FFFF)
(wr (logior #b11100000 (ldb (byte 4 12) code)))
(wr (logior #b10000000 (ldb (byte 6 6) code)))
(wr (logior #b10000000 (ldb (byte 6 0) code))))
((<= #x00010000 code #x001FFFFF)
(wr (logior #b11110000 (ldb (byte 3 18) code)))
(wr (logior #b10000000 (ldb (byte 6 12) code)))
(wr (logior #b10000000 (ldb (byte 6 6) code)))
(wr (logior #b10000000 (ldb (byte 6 0) code))))
((<= #x00200000 code #x03FFFFFF)
(wr (logior #b11111000 (ldb (byte 2 24) code)))
(wr (logior #b10000000 (ldb (byte 6 18) code)))
(wr (logior #b10000000 (ldb (byte 6 12) code)))
(wr (logior #b10000000 (ldb (byte 6 6) code)))
(wr (logior #b10000000 (ldb (byte 6 0) code))))
((<= #x04000000 code #x7FFFFFFF)
(wr (logior #b11111100 (ldb (byte 1 30) code)))
(wr (logior #b10000000 (ldb (byte 6 24) code)))
(wr (logior #b10000000 (ldb (byte 6 18) code)))
(wr (logior #b10000000 (ldb (byte 6 12) code)))
(wr (logior #b10000000 (ldb (byte 6 6) code)))
(wr (logior #b10000000 (ldb (byte 6 0) code)))))))
;;;; convenience functions for DOMless XML serialization ;;;; convenience functions for DOMless XML serialization
@ -632,8 +561,9 @@
data) data)
(defun rod-to-utf8-string (rod) (defun rod-to-utf8-string (rod)
(with-output-to-string (s) (let ((out (make-buffer :element-type 'character)))
(write-rod rod (cxml:make-character-stream-sink s)))) (runes-to-utf8/adjustable-string out rod (length rod))
out))
(defun utf8-string-to-rod (str) (defun utf8-string-to-rod (str)
(let* ((bytes (map '(vector (unsigned-byte 8)) #'char-code str)) (let* ((bytes (map '(vector (unsigned-byte 8)) #'char-code str))