This commit is contained in:
dlichteblau
2007-03-04 21:04:11 +00:00
parent 21aa3df3bd
commit e0e54c172f
9 changed files with 252 additions and 17 deletions

View File

@ -60,6 +60,7 @@
<li><a href="klacks.html#sources">Parsing incrementally</a></li>
<li><a href="klacks.html#convenience">Convenience functions</a></li>
<li><a href="klacks.html#klacksax">Bridging Klacks and SAX</a></li>
<li><a href="klacks.html#locator">Location information</a></li>
<li><a href="klacks.html#klacksax">Examples</a></li>
</ul>
</li>

View File

@ -52,6 +52,9 @@
<h2>Recent Changes</h2>
<p class="nomargin"><tt>rel-2007-xx-yy</tt></p>
<ul class="nomargin">
<li>xml:base support (SAX and Klacks only, not yet used in DOM).
See documentation <a href="sax.html#saxparser">here</a> and <a
href="klacks.html#locator">here</a>.</li>
<li>Fixed attributes to carry an lname even without when occurring
without a namespace.</li>
<li>Klacks improvements: Incompatibly changed

View File

@ -282,6 +282,29 @@
events to the SAX <tt>handler</tt>.
</p>
<a name="locator"/>
<h3>Location information</h3>
<p>
<div class="def">Function KLACKS:CURRENT-LINE-NUMBER (source)</div>
Return an approximation of the current line number, or NIL.
</p>
<p>
<div class="def">Function KLACKS:CURRENT-COLUMN-NUMBER (source)</div>
Return an approximation of the current column number, or NIL.
</p>
<p>
<div class="def">Function KLACKS:CURRENT-SYSTEM-ID (source)</div>
Return the URI of the document being parsed. This is either the
main document, or the entity's system ID while contents of a parsed
general external entity are being processed.
</p>
<p>
<div class="def">Function KLACKS:CURRENT-XML-BASE (source)</div>
Return the [Base URI] of the current element. This URI can differ from
the value returned by <tt>current-system-id</tt> if xml:base
attributes are present.
</p>
<a name="examples"/>
<h3>Examples</h3>
<p>

View File

@ -476,6 +476,48 @@
<em>fixme:</em> thread-safety
</p>
<a name="saxparser"/>
<h3>Location information</h3>
<p>
<div class="def">Class SAX:SAX-PARSER ()</div>
A class providing location information through an
implementation-specific subclass. Parsers will use
<tt>sax:register-sax-parser</tt> to pass their parser instance to
the handler. The easiest way to receive sax parsers instances is
to inherit from sax-parser-mixin when defining a sax handler.
</p>
<p>
<div class="def">Class SAX:SAX-PARSER-MIXIN ()</div>
A mixin for sax handler classes that records the sax handler
object for use with the following functions. Trampoline methods
are provided that allow those functions to be called directly on
the sax-parser-mixin.
</p>
<p>
<div class="def">Function SAX:SAX-HANDLER (sax-handler-mixin) => sax-handler</div>
Return the sax-parser instance recorded by this handler, or NIL.
</p>
<p>
<div class="def">Function SAX:LINE-NUMBER (sax-parser)</div>
Return an approximation of the current line number, or NIL.
</p>
<p>
<div class="def">Function SAX:COLUMN-NUMBER (sax-parser)</div>
Return an approximation of the current column number, or NIL.
</p>
<p>
<div class="def">Function SAX:SYSTEM-ID (sax-parser)</div>
Return the URI of the document being parsed. This is either the
main document, or the entity's system ID while contents of a parsed
general external entity are being processed.
</p>
<p>
<div class="def">Function SAX:XML-BASE (sax-parser)</div>
Return the [Base URI] of the current element. This URI can differ from
the value returned by <tt>sax:system-id</tt> if xml:base
attributes are present.
</p>
<a name="catalogs"/>
<h3>XML Catalogs</h3>
<p>

View File

@ -356,11 +356,12 @@
(defun klacks/entity-reference (source zstream name cont)
(assert (not (zstream-token-category zstream)))
(with-source (source temporary-streams)
(with-source (source temporary-streams context)
(let ((new-xstream (entity->xstream zstream name :general nil)))
(push new-xstream temporary-streams)
(push :stop (zstream-input-stack zstream))
(zstream-push new-xstream zstream)
(push (stream-name-uri (xstream-name new-xstream)) (base-stack context))
(let ((next
(lambda ()
(klacks/entity-reference-2 source zstream new-xstream cont))))
@ -371,12 +372,13 @@
(klacks/ext-parsed-ent source zstream next)))))))
(defun klacks/entity-reference-2 (source zstream new-xstream cont)
(with-source (source temporary-streams)
(with-source (source temporary-streams context)
(unless (eq (peek-token zstream) :eof)
(wf-error zstream "Trailing garbage. - ~S" (peek-token zstream)))
(assert (eq (peek-token zstream) :eof))
(assert (eq (pop (zstream-input-stack zstream)) new-xstream))
(assert (eq (pop (zstream-input-stack zstream)) :stop))
(pop (base-stack context))
(setf (zstream-token-category zstream) nil)
(setf temporary-streams (remove new-xstream temporary-streams))
(close-xstream new-xstream)
@ -441,6 +443,39 @@
element-name attribute-name type default))
;;;; locator
(defun source-xstream (source)
(car (zstream-input-stack (main-zstream (slot-value source 'context)))))
(defun source-stream-name (source)
(let ((xstream (source-xstream source)))
(if xstream
(xstream-name xstream)
nil)))
(defmethod current-line-number ((source cxml-source))
(let ((x (source-xstream source)))
(if x
(xstream-line-number x)
nil)))
(defmethod current-column-number ((source cxml-source))
(let ((x (source-xstream source)))
(if x
(xstream-column-number x)
nil)))
(defmethod current-system-id ((source cxml-source))
(let ((name (source-stream-name source)))
(if name
(stream-name-uri name)
nil)))
(defmethod current-xml-base ((source cxml-source))
(car (base-stack (slot-value source 'context))))
;;;; debugging
#+(or)

View File

@ -40,6 +40,11 @@
;;;(defgeneric klacks:current-characters (source))
(defgeneric klacks:current-cdata-section-p (source))
(defgeneric current-line-number (source))
(defgeneric current-column-number (source))
(defgeneric current-system-id (source))
(defgeneric current-xml-base (source))
(defmacro klacks:with-open-source ((var source) &body body)
`(let ((,var ,source))
(unwind-protect

View File

@ -258,17 +258,17 @@
((result :initform (make-entry-file) :accessor result)
(next :initform '() :accessor next)
(prefer-stack :initform (list *prefer*) :accessor prefer-stack)
(base-stack :accessor base-stack)))
(catalog-base-stack :accessor catalog-base-stack)))
(defmethod initialize-instance :after
((instance catalog-parser) &key uri)
(setf (base-stack instance) (list uri)))
(setf (catalog-base-stack instance) (list uri)))
(defmethod prefer ((handler catalog-parser))
(car (prefer-stack handler)))
(defmethod base ((handler catalog-parser))
(car (base-stack handler)))
(car (catalog-base-stack handler)))
(defun get-attribute/lname (name attributes)
(let ((a (find name attributes
@ -283,6 +283,7 @@
(setf lname (or lname qname))
;; we can dispatch on lnames only because we validate against the DTD,
;; which disallows other namespaces.
;; FIXME: we don't, because we can't.
(push (let ((new (get-attribute/lname "prefer" attrs)))
(cond
((equal new "public") :public)
@ -290,7 +291,7 @@
((null new) (prefer handler))))
(prefer-stack handler))
(push (string-or (get-attribute/lname "base" attrs) (base handler))
(base-stack handler))
(catalog-base-stack handler))
(flet ((geturi (lname)
(puri:merge-uris
(safe-parse-uri (get-attribute/lname lname attrs))
@ -341,7 +342,7 @@
(defmethod sax:end-element ((handler catalog-parser) uri lname qname)
(declare (ignore uri lname qname))
(pop (base-stack handler))
(pop (catalog-base-stack handler))
(pop (prefer-stack handler)))
(defmethod sax:end-document ((handler catalog-parser))

View File

@ -39,8 +39,6 @@
;; don't really see why.
;; o Missing stuff from Java SAX2:
;; * ignorable-whitespace
;; * document-locator/(setf document-locator)
;; (probably implies a handler class with an appropriate slot)
;; * skipped-entity
;; * The whole ErrorHandler class, this is better handled using
;; conditions (but isn't yet)
@ -82,10 +80,64 @@
#:notation-declaration
#:element-declaration
#:attribute-declaration
#:entity-resolver))
#:entity-resolver
#:sax-parser
#:sax-parser-mixin
#:register-sax-parser
#:line-number
#:column-number
#:system-id
#:xml-base))
(in-package :sax)
;;;; SAX-PARSER interface
(defclass sax-parser () ())
(defclass sax-parser-mixin ()
((sax-parser :initform nil :reader sax-parser)))
(defgeneric line-number (sax-parser)
(:documentation
"Return an approximation of the current line number, or NIL.")
(:method ((handler sax-parser-mixin))
(if (sax-parser handler)
(line-number (sax-parser handler))
nil)))
(defgeneric column-number (sax-parser)
(:documentation
"Return an approximation of the current column number, or NIL.")
(:method ((handler sax-parser-mixin))
(if (sax-parser handler)
(column-number (sax-parser handler))
nil)))
(defgeneric system-id (sax-parser)
(:documentation
"Return the URI of the document being parsed. This is either the
main document, or the entity's system ID while contents of a parsed
general external entity are being processed.")
(:method ((handler sax-parser-mixin))
(if (sax-parser handler)
(system-id (sax-parser handler))
nil)))
(defgeneric xml-base (sax-parser)
(:documentation
"Return the [Base URI] of the current element. This URI can differ from
the value returned by SAX:SYSTEM-ID if xml:base attributes are present.")
(:method ((handler sax-parser-mixin))
(if (sax-parser handler)
(xml-base (sax-parser handler))
nil)))
;;;; Configuration variables
;; The http://xml.org/sax/features/namespaces property
(defvar *namespace-processing* t
"If non-nil (the default), namespace processing is enabled.
@ -349,6 +401,16 @@ finished, if present.")
(declare (ignore resolver))
nil))
(defgeneric register-sax-parser
(handler sax-parser)
(:documentation
"Set the SAX-PARSER instance of this handler.")
(:method ((handler t) sax-parser)
(declare (ignore sax-parser))
nil)
(:method ((handler sax-parser-mixin) sax-parser)
(setf (slot-value handler 'sax-parser) sax-parser)))
;; internal for now
(defgeneric dtd (handler dtd)
(:method ((handler t) dtd) (declare (ignore dtd)) nil))

View File

@ -183,6 +183,8 @@
handler
(dtd nil)
model-stack
;; xml:base machen wir fuer klacks mal gleich als expliziten stack:
base-stack
(referenced-notations '())
(id-table (%make-rod-hash-table))
;; FIXME: Wofuer ist name-hashtable da? Will man das wissen?
@ -659,6 +661,38 @@
stream
(format nil "End of file~@[: ~?~]" x args)))
(defclass cxml-parser (sax:sax-parser) ((ctx :initarg :ctx)))
(defun parser-xstream (parser)
(car (zstream-input-stack (main-zstream (slot-value parser 'ctx)))))
(defun parser-stream-name (parser)
(let ((xstream (parser-xstream parser)))
(if xstream
(xstream-name xstream)
nil)))
(defmethod sax:line-number ((parser cxml-parser))
(let ((x (parser-xstream parser)))
(if x
(xstream-line-number x)
nil)))
(defmethod sax:column-number ((parser cxml-parser))
(let ((x (parser-xstream parser)))
(if x
(xstream-column-number x)
nil)))
(defmethod sax:system-id ((parser cxml-parser))
(let ((name (parser-stream-name parser)))
(if name
(stream-name-uri name)
nil)))
(defmethod sax:xml-base ((parser cxml-parser))
(car (base-stack (slot-value parser 'ctx))))
(defvar *validate* t)
(defvar *external-subset-p* nil)
@ -966,8 +1000,10 @@
(defun call-with-entity-expansion-as-stream (zstream cont name kind internalp)
;; `zstream' is for error messages
(let ((in (entity->xstream zstream name kind internalp)))
(push (stream-name-uri (xstream-name in)) (base-stack *ctx*))
(unwind-protect
(funcall cont in)
(pop (base-stack *ctx*))
(close-xstream in))))
(defun ensure-dtd ()
@ -2570,13 +2606,18 @@
#+rune-is-integer
(when recode
(setf handler (make-recoder handler #'rod-to-utf8-string)))
(let ((*ctx*
(let* ((xstream (car (zstream-input-stack input)))
(name (xstream-name xstream))
(base (when name (stream-name-uri name)))
(*ctx*
(make-context :handler handler
:main-zstream input
:entity-resolver entity-resolver
:base-stack (list (or base ""))
:disallow-internal-subset disallow-internal-subset))
(*validate* validate)
(*namespace-bindings* *initial-namespace-bindings*))
(sax:register-sax-parser handler (make-instance 'cxml-parser :ctx *ctx*))
(sax:start-document handler)
;; document ::= XMLDecl? Misc* (doctypedecl Misc*)? element Misc*
;; Misc ::= Comment | PI | S
@ -2658,6 +2699,7 @@
(p/etag input qname))
(sax:end-element (handler *ctx*) uri lname qname)
(undeclare-namespaces new-b)
(pop (base-stack *ctx*))
(validate-end-element *ctx* qname)))
(defun p/sztag (input)
@ -2675,6 +2717,7 @@
(when sax:*namespace-processing*
(setf new-namespaces (declare-namespaces attrs))
(mapc #'set-attribute-namespace attrs))
(push (compute-base attrs) (base-stack *ctx*))
(multiple-value-bind (uri prefix local-name)
(if sax:*namespace-processing*
(decode-qname name)
@ -2701,6 +2744,23 @@
(when (cdr sem2)
(wf-error input "no attributes allowed in end tag"))))
;; copy&paste from cxml-rng
(defun escape-uri (string)
(with-output-to-string (out)
(loop for c across (cxml::rod-to-utf8-string string) do
(let ((code (char-code c)))
;; http://www.w3.org/TR/xlink/#link-locators
(if (or (>= code 127) (<= code 32) (find c "<>\"{}|\\^`"))
(format out "%~2,'0X" code)
(write-char c out))))))
(defun compute-base (attrs)
(let ((new (sax:find-attribute "xml:base" attrs))
(current (car (base-stack *ctx*))))
(if new
(puri:merge-uris (escape-uri (sax:attribute-value new)) current)
current)))
(defun process-characters (input sem)
(consume-token input)
(when (search #"]]>" sem)
@ -3317,6 +3377,7 @@
(return))))
res))))
;; used only by read-att-value-2
(defun internal-entity-expansion (name)
(let ((def (get-entity-definition name :general (dtd *ctx*))))
(unless def
@ -3326,6 +3387,7 @@
(or (entdef-expansion def)
(setf (entdef-expansion def) (find-internal-entity-expansion name)))))
;; used only by read-att-value-2
(defun find-internal-entity-expansion (name)
(let ((zinput (make-zstream)))
(with-rune-collector-3 (collect)
@ -3366,6 +3428,7 @@
(lambda (zinput)
(muffle (car (zstream-input-stack zinput))))) ))))
;; callback for DOM
(defun resolve-entity (name handler dtd)
(let ((*validate* nil))
(if (get-entity-definition name :general dtd)