whitespace normalizer
This commit is contained in:
@ -23,7 +23,7 @@
|
|||||||
<li>
|
<li>
|
||||||
<ul class="hack">
|
<ul class="hack">
|
||||||
<li>
|
<li>
|
||||||
<a href="doc/using.html#quickstart"><b>Quick-Start Example</b></a>
|
<a href="doc/quickstart.html"><b>Quick-Start Example</b></a>
|
||||||
</li>
|
</li>
|
||||||
</ul>
|
</ul>
|
||||||
</li>
|
</li>
|
||||||
@ -112,6 +112,7 @@
|
|||||||
<li>UTF-8 string support in DOM on Lisps without Unicode characters.</li>
|
<li>UTF-8 string support in DOM on Lisps without Unicode characters.</li>
|
||||||
<li>Sink API has been changed.</li>
|
<li>Sink API has been changed.</li>
|
||||||
<li>Support internal subset serialization.</li>
|
<li>Support internal subset serialization.</li>
|
||||||
|
<li>Whitespace normalizer.</li>
|
||||||
<li>Gilbert Baumann has clarified the license as Lisp-LGPL.</li>
|
<li>Gilbert Baumann has clarified the license as Lisp-LGPL.</li>
|
||||||
<li>Use trivial-gray-streams.</li>
|
<li>Use trivial-gray-streams.</li>
|
||||||
</ul>
|
</ul>
|
||||||
|
|||||||
1
cxml.asd
1
cxml.asd
@ -71,6 +71,7 @@
|
|||||||
(:file "xmls-compat" :depends-on ("xml-parse"))
|
(:file "xmls-compat" :depends-on ("xml-parse"))
|
||||||
(:file "recoder" :depends-on ("xml-parse"))
|
(:file "recoder" :depends-on ("xml-parse"))
|
||||||
(:file "xmlns-normalizer" :depends-on ("xml-parse"))
|
(:file "xmlns-normalizer" :depends-on ("xml-parse"))
|
||||||
|
(:file "space-normalizer" :depends-on ("xml-parse"))
|
||||||
(:file "catalog" :depends-on ("xml-parse"))
|
(:file "catalog" :depends-on ("xml-parse"))
|
||||||
(:file "sax-proxy" :depends-on ("xml-parse")))
|
(:file "sax-proxy" :depends-on ("xml-parse")))
|
||||||
:depends-on (:cxml-runes :puri :trivial-gray-streams))
|
:depends-on (:cxml-runes :puri :trivial-gray-streams))
|
||||||
|
|||||||
@ -23,7 +23,7 @@
|
|||||||
<li>
|
<li>
|
||||||
<ul class="hack">
|
<ul class="hack">
|
||||||
<li>
|
<li>
|
||||||
<a href="using.html#quickstart"><b>Quick-Start Example</b></a>
|
<a href="quickstart.html"><b>Quick-Start Example</b></a>
|
||||||
</li>
|
</li>
|
||||||
</ul>
|
</ul>
|
||||||
</li>
|
</li>
|
||||||
|
|||||||
@ -23,7 +23,7 @@
|
|||||||
<li>
|
<li>
|
||||||
<ul class="hack">
|
<ul class="hack">
|
||||||
<li>
|
<li>
|
||||||
<a href="using.html#quickstart"><b>Quick-Start Example</b></a>
|
<a href="quickstart.html"><b>Quick-Start Example</b></a>
|
||||||
</li>
|
</li>
|
||||||
</ul>
|
</ul>
|
||||||
</li>
|
</li>
|
||||||
@ -116,7 +116,7 @@ $ cvs co cxml</pre>
|
|||||||
<pre>* (asdf:operate 'asdf:load-op :cxml)</pre>
|
<pre>* (asdf:operate 'asdf:load-op :cxml)</pre>
|
||||||
|
|
||||||
<p>
|
<p>
|
||||||
You can then try the <a href="using.html#quickstart">quick-start example</a>.
|
You can then try the <a href="quickstart.html">quick-start example</a>.
|
||||||
</p>
|
</p>
|
||||||
|
|
||||||
<a name="tests"/>
|
<a name="tests"/>
|
||||||
|
|||||||
@ -23,7 +23,7 @@
|
|||||||
<li>
|
<li>
|
||||||
<ul class="hack">
|
<ul class="hack">
|
||||||
<li>
|
<li>
|
||||||
<a href="using.html#quickstart"><b>Quick-Start Example</b></a>
|
<a href="quickstart.html"><b>Quick-Start Example</b></a>
|
||||||
</li>
|
</li>
|
||||||
</ul>
|
</ul>
|
||||||
</li>
|
</li>
|
||||||
|
|||||||
@ -23,7 +23,7 @@
|
|||||||
<li>
|
<li>
|
||||||
<ul class="hack">
|
<ul class="hack">
|
||||||
<li>
|
<li>
|
||||||
<a href="using.html#quickstart"><b>Quick-Start Example</b></a>
|
<a href="quickstart.html"><b>Quick-Start Example</b></a>
|
||||||
</li>
|
</li>
|
||||||
</ul>
|
</ul>
|
||||||
</li>
|
</li>
|
||||||
@ -388,6 +388,32 @@
|
|||||||
<tt>start-element</tt> events before passing them on the next
|
<tt>start-element</tt> events before passing them on the next
|
||||||
handler.
|
handler.
|
||||||
</p>
|
</p>
|
||||||
|
<p>
|
||||||
|
<div class="def">Function CXML:MAKE-WHITESPACE-NORMALIZER (chained-handler &optional dtd)</div>
|
||||||
|
Return a SAX handler which removes whitespace from elements that
|
||||||
|
have <em>element content</em> and have not been declared to
|
||||||
|
preserve space using an xml:space attribute.
|
||||||
|
</p>
|
||||||
|
<p>Example:</p>
|
||||||
|
<pre>(cxml:parse-file "example.xml"
|
||||||
|
(cxml:make-whitespace-normalizer (cxml-dom:make-dom-builder))
|
||||||
|
:validate t)</pre>
|
||||||
|
<p>Example input:</p>
|
||||||
|
<pre><!DOCTYPE test [
|
||||||
|
<!ELEMENT test (foo,bar*)>
|
||||||
|
<!ATTLIST test a CDATA #IMPLIED>
|
||||||
|
<!ELEMENT foo #PCDATA>
|
||||||
|
<!ELEMENT bar (foo?)>
|
||||||
|
<!ATTLIST bar xml:space (default|preserve) "default">
|
||||||
|
]>
|
||||||
|
<test a='b'>
|
||||||
|
<foo> </foo>
|
||||||
|
<bar> </bar>
|
||||||
|
<bar xml:space="preserve"> </bar>
|
||||||
|
</test>
|
||||||
|
</pre>
|
||||||
|
<p>Example result:</p>
|
||||||
|
<pre><test a="b"><foo> </foo><bar></bar><bar xml:space="preserve"> </bar></test></pre>
|
||||||
|
|
||||||
<a name="rods"/>
|
<a name="rods"/>
|
||||||
<h3>Recoders</h3>
|
<h3>Recoders</h3>
|
||||||
@ -572,6 +598,9 @@ NIL</pre>
|
|||||||
<div class="def">Accessor SAX:ATTRIBUTE-QNAME (attribute)</div>
|
<div class="def">Accessor SAX:ATTRIBUTE-QNAME (attribute)</div>
|
||||||
<div class="def">Accessor SAX:ATTRIBUTE-SPECIFIED-P (attribute)</div>
|
<div class="def">Accessor SAX:ATTRIBUTE-SPECIFIED-P (attribute)</div>
|
||||||
<div class="def">Accessor SAX:ATTRIBUTE-VALUE (attribute)</div>
|
<div class="def">Accessor SAX:ATTRIBUTE-VALUE (attribute)</div>
|
||||||
|
<br/>
|
||||||
|
<div class="def">Function SAX:FIND-ATTRIBUTE (qname attributes)</div>
|
||||||
|
<div class="def">Function SAX:FIND-ATTRIBUTE-NS (uri lname attributes)</div>
|
||||||
</p>
|
</p>
|
||||||
<p>
|
<p>
|
||||||
The entity declaration methods are similar to Java SAX
|
The entity declaration methods are similar to Java SAX
|
||||||
|
|||||||
@ -23,7 +23,7 @@
|
|||||||
<li>
|
<li>
|
||||||
<ul class="hack">
|
<ul class="hack">
|
||||||
<li>
|
<li>
|
||||||
<a href="using.html#quickstart"><b>Quick-Start Example</b></a>
|
<a href="quickstart.html"><b>Quick-Start Example</b></a>
|
||||||
</li>
|
</li>
|
||||||
</ul>
|
</ul>
|
||||||
</li>
|
</li>
|
||||||
|
|||||||
@ -223,6 +223,7 @@
|
|||||||
#+rune-is-integer
|
#+rune-is-integer
|
||||||
(progn
|
(progn
|
||||||
(defstruct (character-stream-ystream/utf8
|
(defstruct (character-stream-ystream/utf8
|
||||||
|
(:constructor make-character-stream-ystream/utf8 (os-stream))
|
||||||
(:include %stream-ystream)
|
(:include %stream-ystream)
|
||||||
(:conc-name "YSTREAM-")))
|
(:conc-name "YSTREAM-")))
|
||||||
|
|
||||||
|
|||||||
@ -77,5 +77,6 @@
|
|||||||
#:sax-proxy
|
#:sax-proxy
|
||||||
#:proxy-chained-handler
|
#:proxy-chained-handler
|
||||||
#:make-namespace-normalizer
|
#:make-namespace-normalizer
|
||||||
|
#:make-whitespace-normalizer
|
||||||
#:rod-to-utf8-string
|
#:rod-to-utf8-string
|
||||||
#:utf8-string-to-rod))
|
#:utf8-string-to-rod))
|
||||||
|
|||||||
@ -118,4 +118,8 @@
|
|||||||
|
|
||||||
(defwrapper sax:entity-resolver
|
(defwrapper sax:entity-resolver
|
||||||
(resolver)
|
(resolver)
|
||||||
resolver))
|
resolver)
|
||||||
|
|
||||||
|
(defwrapper sax::dtd
|
||||||
|
(dtd)
|
||||||
|
dtd))
|
||||||
|
|||||||
@ -53,6 +53,8 @@
|
|||||||
#:*use-xmlns-namespace*
|
#:*use-xmlns-namespace*
|
||||||
|
|
||||||
#:make-attribute
|
#:make-attribute
|
||||||
|
#:find-attribute
|
||||||
|
#:find-attribute-ns
|
||||||
#:attribute-namespace-uri
|
#:attribute-namespace-uri
|
||||||
#:attribute-local-name
|
#:attribute-local-name
|
||||||
#:attribute-qname
|
#:attribute-qname
|
||||||
@ -137,6 +139,23 @@ Setting this variable has no effect unless both
|
|||||||
value
|
value
|
||||||
specified-p)
|
specified-p)
|
||||||
|
|
||||||
|
(defun %rod= (x y)
|
||||||
|
;; allow rods *and* strings *and* null
|
||||||
|
(cond
|
||||||
|
((zerop (length x)) (zerop (length y)))
|
||||||
|
((zerop (length y)) nil)
|
||||||
|
((stringp x) (string= x y))
|
||||||
|
(t (runes:rod= x y))))
|
||||||
|
|
||||||
|
(defun find-attribute (qname attrs)
|
||||||
|
(find qname attrs :key #'attribute-qname :test #'%rod=))
|
||||||
|
|
||||||
|
(defun find-attribute-ns (uri lname attrs)
|
||||||
|
(find-if (lambda (attr)
|
||||||
|
(and (%rod= uri (sax:attribute-namespace-uri attr))
|
||||||
|
(%rod= lname (sax:attribute-local-name attr))))
|
||||||
|
attrs))
|
||||||
|
|
||||||
(defgeneric start-document (handler)
|
(defgeneric start-document (handler)
|
||||||
(:documentation "Called at the beginning of the parsing process,
|
(:documentation "Called at the beginning of the parsing process,
|
||||||
before any element, processing instruction or comment is reported.
|
before any element, processing instruction or comment is reported.
|
||||||
@ -325,7 +344,11 @@ finished, if present.")
|
|||||||
(:documentation
|
(:documentation
|
||||||
"Called between sax:end-dtd and sax:end-document to register an entity
|
"Called between sax:end-dtd and sax:end-document to register an entity
|
||||||
resolver, a function of two arguments: An entity name and SAX handler.
|
resolver, a function of two arguments: An entity name and SAX handler.
|
||||||
When called, the resolver function will parse the named entities data.")
|
When called, the resolver function will parse the named entity's data.")
|
||||||
(:method ((handler t) resolver)
|
(:method ((handler t) resolver)
|
||||||
(declare (ignore resolver))
|
(declare (ignore resolver))
|
||||||
nil))
|
nil))
|
||||||
|
|
||||||
|
;; internal for now
|
||||||
|
(defgeneric dtd (handler dtd)
|
||||||
|
(:method ((handler t) dtd) (declare (ignore dtd)) nil))
|
||||||
|
|||||||
@ -37,4 +37,5 @@
|
|||||||
(define-proxy-method sax:notation-declaration (name public-id system-id))
|
(define-proxy-method sax:notation-declaration (name public-id system-id))
|
||||||
(define-proxy-method sax:element-declaration (name model))
|
(define-proxy-method sax:element-declaration (name model))
|
||||||
(define-proxy-method sax:attribute-declaration (elt attr type default))
|
(define-proxy-method sax:attribute-declaration (elt attr type default))
|
||||||
(define-proxy-method sax:entity-resolver (resolver)))
|
(define-proxy-method sax:entity-resolver (resolver))
|
||||||
|
(define-proxy-method sax::dtd (dtd)))
|
||||||
|
|||||||
62
xml/space-normalizer.lisp
Normal file
62
xml/space-normalizer.lisp
Normal file
@ -0,0 +1,62 @@
|
|||||||
|
;;;; space-normalizer.lisp -- whitespace removal
|
||||||
|
;;;;
|
||||||
|
;;;; This file is part of the CXML parser, released under Lisp-LGPL.
|
||||||
|
;;;; See file COPYING for details.
|
||||||
|
;;;;
|
||||||
|
;;;; Copyright (c) 2005 David Lichteblau
|
||||||
|
|
||||||
|
(in-package :cxml)
|
||||||
|
|
||||||
|
(defclass whitespace-normalizer (sax-proxy)
|
||||||
|
((attributes :initform '(t) :accessor xml-space-attributes)
|
||||||
|
(models :initform nil :accessor xml-space-models)
|
||||||
|
(dtd :initarg :dtd :accessor xml-space-dtd)))
|
||||||
|
|
||||||
|
(defun make-whitespace-normalizer (chained-handler &optional dtd)
|
||||||
|
(make-instance 'whitespace-normalizer
|
||||||
|
:dtd dtd
|
||||||
|
:chained-handler chained-handler))
|
||||||
|
|
||||||
|
(defmethod sax::dtd ((handler whitespace-normalizer) dtd)
|
||||||
|
(unless (xml-space-dtd handler)
|
||||||
|
(setf (xml-space-dtd handler) dtd)))
|
||||||
|
|
||||||
|
(defmethod sax:start-element
|
||||||
|
((handler whitespace-normalizer) uri lname qname attrs)
|
||||||
|
(declare (ignore uri lname))
|
||||||
|
(let ((dtd (xml-space-dtd handler)))
|
||||||
|
(when dtd
|
||||||
|
(let ((xml-space
|
||||||
|
(sax:find-attribute (if (stringp qname) "xml:space" #"xml:space")
|
||||||
|
attrs)))
|
||||||
|
(push (print(if xml-space
|
||||||
|
(rod= (rod (sax:attribute-value xml-space)) #"default")
|
||||||
|
(car (xml-space-attributes handler))))
|
||||||
|
(xml-space-attributes handler)))
|
||||||
|
(let* ((e (cxml::find-element (rod qname) dtd))
|
||||||
|
(cspec (when e (cxml::elmdef-content e))))
|
||||||
|
(push (and (consp cspec)
|
||||||
|
(not (and (eq (car cspec) '*)
|
||||||
|
(let ((subspec (second cspec)))
|
||||||
|
(and (eq (car subspec) 'or)
|
||||||
|
(eq (cadr subspec) :PCDATA))))))
|
||||||
|
(xml-space-models handler)))))
|
||||||
|
(call-next-method))
|
||||||
|
|
||||||
|
(defmethod sax:characters ((handler whitespace-normalizer) data)
|
||||||
|
(cond
|
||||||
|
((and (xml-space-dtd handler)
|
||||||
|
(car (xml-space-attributes handler))
|
||||||
|
(car (xml-space-models handler)))
|
||||||
|
(unless (every #'white-space-rune-p (rod data))
|
||||||
|
(warn "non-whitespace character data in element content")
|
||||||
|
(call-next-method)))
|
||||||
|
(t
|
||||||
|
(call-next-method))))
|
||||||
|
|
||||||
|
(defmethod sax:end-element ((handler whitespace-normalizer) uri lname qname)
|
||||||
|
(declare (ignore uri lname qname))
|
||||||
|
(when (xml-space-dtd handler)
|
||||||
|
(pop (xml-space-attributes handler))
|
||||||
|
(pop (xml-space-models handler)))
|
||||||
|
(call-next-method))
|
||||||
@ -2513,7 +2513,8 @@
|
|||||||
(let ((dtd (dtd *ctx*)))
|
(let ((dtd (dtd *ctx*)))
|
||||||
(sax:entity-resolver
|
(sax:entity-resolver
|
||||||
(handler *ctx*)
|
(handler *ctx*)
|
||||||
(lambda (name handler) (resolve-entity name handler dtd))))
|
(lambda (name handler) (resolve-entity name handler dtd)))
|
||||||
|
(sax::dtd (handler *ctx*) dtd))
|
||||||
(list :DOCTYPE name extid))))
|
(list :DOCTYPE name extid))))
|
||||||
|
|
||||||
(defun report-cached-dtd (dtd)
|
(defun report-cached-dtd (dtd)
|
||||||
|
|||||||
Reference in New Issue
Block a user