This commit is contained in:
dlichteblau
2007-03-04 21:04:11 +00:00
parent 21aa3df3bd
commit e0e54c172f
9 changed files with 252 additions and 17 deletions

View File

@ -60,6 +60,7 @@
<li><a href="klacks.html#sources">Parsing incrementally</a></li> <li><a href="klacks.html#sources">Parsing incrementally</a></li>
<li><a href="klacks.html#convenience">Convenience functions</a></li> <li><a href="klacks.html#convenience">Convenience functions</a></li>
<li><a href="klacks.html#klacksax">Bridging Klacks and SAX</a></li> <li><a href="klacks.html#klacksax">Bridging Klacks and SAX</a></li>
<li><a href="klacks.html#locator">Location information</a></li>
<li><a href="klacks.html#klacksax">Examples</a></li> <li><a href="klacks.html#klacksax">Examples</a></li>
</ul> </ul>
</li> </li>

View File

@ -52,6 +52,9 @@
<h2>Recent Changes</h2> <h2>Recent Changes</h2>
<p class="nomargin"><tt>rel-2007-xx-yy</tt></p> <p class="nomargin"><tt>rel-2007-xx-yy</tt></p>
<ul class="nomargin"> <ul class="nomargin">
<li>xml:base support (SAX and Klacks only, not yet used in DOM).
See documentation <a href="sax.html#saxparser">here</a> and <a
href="klacks.html#locator">here</a>.</li>
<li>Fixed attributes to carry an lname even without when occurring <li>Fixed attributes to carry an lname even without when occurring
without a namespace.</li> without a namespace.</li>
<li>Klacks improvements: Incompatibly changed <li>Klacks improvements: Incompatibly changed

View File

@ -282,6 +282,29 @@
events to the SAX <tt>handler</tt>. events to the SAX <tt>handler</tt>.
</p> </p>
<a name="locator"/>
<h3>Location information</h3>
<p>
<div class="def">Function KLACKS:CURRENT-LINE-NUMBER (source)</div>
Return an approximation of the current line number, or NIL.
</p>
<p>
<div class="def">Function KLACKS:CURRENT-COLUMN-NUMBER (source)</div>
Return an approximation of the current column number, or NIL.
</p>
<p>
<div class="def">Function KLACKS:CURRENT-SYSTEM-ID (source)</div>
Return the URI of the document being parsed. This is either the
main document, or the entity's system ID while contents of a parsed
general external entity are being processed.
</p>
<p>
<div class="def">Function KLACKS:CURRENT-XML-BASE (source)</div>
Return the [Base URI] of the current element. This URI can differ from
the value returned by <tt>current-system-id</tt> if xml:base
attributes are present.
</p>
<a name="examples"/> <a name="examples"/>
<h3>Examples</h3> <h3>Examples</h3>
<p> <p>

View File

@ -476,6 +476,48 @@
<em>fixme:</em> thread-safety <em>fixme:</em> thread-safety
</p> </p>
<a name="saxparser"/>
<h3>Location information</h3>
<p>
<div class="def">Class SAX:SAX-PARSER ()</div>
A class providing location information through an
implementation-specific subclass. Parsers will use
<tt>sax:register-sax-parser</tt> to pass their parser instance to
the handler. The easiest way to receive sax parsers instances is
to inherit from sax-parser-mixin when defining a sax handler.
</p>
<p>
<div class="def">Class SAX:SAX-PARSER-MIXIN ()</div>
A mixin for sax handler classes that records the sax handler
object for use with the following functions. Trampoline methods
are provided that allow those functions to be called directly on
the sax-parser-mixin.
</p>
<p>
<div class="def">Function SAX:SAX-HANDLER (sax-handler-mixin) => sax-handler</div>
Return the sax-parser instance recorded by this handler, or NIL.
</p>
<p>
<div class="def">Function SAX:LINE-NUMBER (sax-parser)</div>
Return an approximation of the current line number, or NIL.
</p>
<p>
<div class="def">Function SAX:COLUMN-NUMBER (sax-parser)</div>
Return an approximation of the current column number, or NIL.
</p>
<p>
<div class="def">Function SAX:SYSTEM-ID (sax-parser)</div>
Return the URI of the document being parsed. This is either the
main document, or the entity's system ID while contents of a parsed
general external entity are being processed.
</p>
<p>
<div class="def">Function SAX:XML-BASE (sax-parser)</div>
Return the [Base URI] of the current element. This URI can differ from
the value returned by <tt>sax:system-id</tt> if xml:base
attributes are present.
</p>
<a name="catalogs"/> <a name="catalogs"/>
<h3>XML Catalogs</h3> <h3>XML Catalogs</h3>
<p> <p>

View File

@ -356,11 +356,12 @@
(defun klacks/entity-reference (source zstream name cont) (defun klacks/entity-reference (source zstream name cont)
(assert (not (zstream-token-category zstream))) (assert (not (zstream-token-category zstream)))
(with-source (source temporary-streams) (with-source (source temporary-streams context)
(let ((new-xstream (entity->xstream zstream name :general nil))) (let ((new-xstream (entity->xstream zstream name :general nil)))
(push new-xstream temporary-streams) (push new-xstream temporary-streams)
(push :stop (zstream-input-stack zstream)) (push :stop (zstream-input-stack zstream))
(zstream-push new-xstream zstream) (zstream-push new-xstream zstream)
(push (stream-name-uri (xstream-name new-xstream)) (base-stack context))
(let ((next (let ((next
(lambda () (lambda ()
(klacks/entity-reference-2 source zstream new-xstream cont)))) (klacks/entity-reference-2 source zstream new-xstream cont))))
@ -371,12 +372,13 @@
(klacks/ext-parsed-ent source zstream next))))))) (klacks/ext-parsed-ent source zstream next)))))))
(defun klacks/entity-reference-2 (source zstream new-xstream cont) (defun klacks/entity-reference-2 (source zstream new-xstream cont)
(with-source (source temporary-streams) (with-source (source temporary-streams context)
(unless (eq (peek-token zstream) :eof) (unless (eq (peek-token zstream) :eof)
(wf-error zstream "Trailing garbage. - ~S" (peek-token zstream))) (wf-error zstream "Trailing garbage. - ~S" (peek-token zstream)))
(assert (eq (peek-token zstream) :eof)) (assert (eq (peek-token zstream) :eof))
(assert (eq (pop (zstream-input-stack zstream)) new-xstream)) (assert (eq (pop (zstream-input-stack zstream)) new-xstream))
(assert (eq (pop (zstream-input-stack zstream)) :stop)) (assert (eq (pop (zstream-input-stack zstream)) :stop))
(pop (base-stack context))
(setf (zstream-token-category zstream) nil) (setf (zstream-token-category zstream) nil)
(setf temporary-streams (remove new-xstream temporary-streams)) (setf temporary-streams (remove new-xstream temporary-streams))
(close-xstream new-xstream) (close-xstream new-xstream)
@ -441,6 +443,39 @@
element-name attribute-name type default)) element-name attribute-name type default))
;;;; locator
(defun source-xstream (source)
(car (zstream-input-stack (main-zstream (slot-value source 'context)))))
(defun source-stream-name (source)
(let ((xstream (source-xstream source)))
(if xstream
(xstream-name xstream)
nil)))
(defmethod current-line-number ((source cxml-source))
(let ((x (source-xstream source)))
(if x
(xstream-line-number x)
nil)))
(defmethod current-column-number ((source cxml-source))
(let ((x (source-xstream source)))
(if x
(xstream-column-number x)
nil)))
(defmethod current-system-id ((source cxml-source))
(let ((name (source-stream-name source)))
(if name
(stream-name-uri name)
nil)))
(defmethod current-xml-base ((source cxml-source))
(car (base-stack (slot-value source 'context))))
;;;; debugging ;;;; debugging
#+(or) #+(or)

View File

@ -40,6 +40,11 @@
;;;(defgeneric klacks:current-characters (source)) ;;;(defgeneric klacks:current-characters (source))
(defgeneric klacks:current-cdata-section-p (source)) (defgeneric klacks:current-cdata-section-p (source))
(defgeneric current-line-number (source))
(defgeneric current-column-number (source))
(defgeneric current-system-id (source))
(defgeneric current-xml-base (source))
(defmacro klacks:with-open-source ((var source) &body body) (defmacro klacks:with-open-source ((var source) &body body)
`(let ((,var ,source)) `(let ((,var ,source))
(unwind-protect (unwind-protect

View File

@ -258,17 +258,17 @@
((result :initform (make-entry-file) :accessor result) ((result :initform (make-entry-file) :accessor result)
(next :initform '() :accessor next) (next :initform '() :accessor next)
(prefer-stack :initform (list *prefer*) :accessor prefer-stack) (prefer-stack :initform (list *prefer*) :accessor prefer-stack)
(base-stack :accessor base-stack))) (catalog-base-stack :accessor catalog-base-stack)))
(defmethod initialize-instance :after (defmethod initialize-instance :after
((instance catalog-parser) &key uri) ((instance catalog-parser) &key uri)
(setf (base-stack instance) (list uri))) (setf (catalog-base-stack instance) (list uri)))
(defmethod prefer ((handler catalog-parser)) (defmethod prefer ((handler catalog-parser))
(car (prefer-stack handler))) (car (prefer-stack handler)))
(defmethod base ((handler catalog-parser)) (defmethod base ((handler catalog-parser))
(car (base-stack handler))) (car (catalog-base-stack handler)))
(defun get-attribute/lname (name attributes) (defun get-attribute/lname (name attributes)
(let ((a (find name attributes (let ((a (find name attributes
@ -283,6 +283,7 @@
(setf lname (or lname qname)) (setf lname (or lname qname))
;; we can dispatch on lnames only because we validate against the DTD, ;; we can dispatch on lnames only because we validate against the DTD,
;; which disallows other namespaces. ;; which disallows other namespaces.
;; FIXME: we don't, because we can't.
(push (let ((new (get-attribute/lname "prefer" attrs))) (push (let ((new (get-attribute/lname "prefer" attrs)))
(cond (cond
((equal new "public") :public) ((equal new "public") :public)
@ -290,7 +291,7 @@
((null new) (prefer handler)))) ((null new) (prefer handler))))
(prefer-stack handler)) (prefer-stack handler))
(push (string-or (get-attribute/lname "base" attrs) (base handler)) (push (string-or (get-attribute/lname "base" attrs) (base handler))
(base-stack handler)) (catalog-base-stack handler))
(flet ((geturi (lname) (flet ((geturi (lname)
(puri:merge-uris (puri:merge-uris
(safe-parse-uri (get-attribute/lname lname attrs)) (safe-parse-uri (get-attribute/lname lname attrs))
@ -341,7 +342,7 @@
(defmethod sax:end-element ((handler catalog-parser) uri lname qname) (defmethod sax:end-element ((handler catalog-parser) uri lname qname)
(declare (ignore uri lname qname)) (declare (ignore uri lname qname))
(pop (base-stack handler)) (pop (catalog-base-stack handler))
(pop (prefer-stack handler))) (pop (prefer-stack handler)))
(defmethod sax:end-document ((handler catalog-parser)) (defmethod sax:end-document ((handler catalog-parser))

View File

@ -39,8 +39,6 @@
;; don't really see why. ;; don't really see why.
;; o Missing stuff from Java SAX2: ;; o Missing stuff from Java SAX2:
;; * ignorable-whitespace ;; * ignorable-whitespace
;; * document-locator/(setf document-locator)
;; (probably implies a handler class with an appropriate slot)
;; * skipped-entity ;; * skipped-entity
;; * The whole ErrorHandler class, this is better handled using ;; * The whole ErrorHandler class, this is better handled using
;; conditions (but isn't yet) ;; conditions (but isn't yet)
@ -82,10 +80,64 @@
#:notation-declaration #:notation-declaration
#:element-declaration #:element-declaration
#:attribute-declaration #:attribute-declaration
#:entity-resolver)) #:entity-resolver
#:sax-parser
#:sax-parser-mixin
#:register-sax-parser
#:line-number
#:column-number
#:system-id
#:xml-base))
(in-package :sax) (in-package :sax)
;;;; SAX-PARSER interface
(defclass sax-parser () ())
(defclass sax-parser-mixin ()
((sax-parser :initform nil :reader sax-parser)))
(defgeneric line-number (sax-parser)
(:documentation
"Return an approximation of the current line number, or NIL.")
(:method ((handler sax-parser-mixin))
(if (sax-parser handler)
(line-number (sax-parser handler))
nil)))
(defgeneric column-number (sax-parser)
(:documentation
"Return an approximation of the current column number, or NIL.")
(:method ((handler sax-parser-mixin))
(if (sax-parser handler)
(column-number (sax-parser handler))
nil)))
(defgeneric system-id (sax-parser)
(:documentation
"Return the URI of the document being parsed. This is either the
main document, or the entity's system ID while contents of a parsed
general external entity are being processed.")
(:method ((handler sax-parser-mixin))
(if (sax-parser handler)
(system-id (sax-parser handler))
nil)))
(defgeneric xml-base (sax-parser)
(:documentation
"Return the [Base URI] of the current element. This URI can differ from
the value returned by SAX:SYSTEM-ID if xml:base attributes are present.")
(:method ((handler sax-parser-mixin))
(if (sax-parser handler)
(xml-base (sax-parser handler))
nil)))
;;;; Configuration variables
;; The http://xml.org/sax/features/namespaces property ;; The http://xml.org/sax/features/namespaces property
(defvar *namespace-processing* t (defvar *namespace-processing* t
"If non-nil (the default), namespace processing is enabled. "If non-nil (the default), namespace processing is enabled.
@ -349,6 +401,16 @@ finished, if present.")
(declare (ignore resolver)) (declare (ignore resolver))
nil)) nil))
(defgeneric register-sax-parser
(handler sax-parser)
(:documentation
"Set the SAX-PARSER instance of this handler.")
(:method ((handler t) sax-parser)
(declare (ignore sax-parser))
nil)
(:method ((handler sax-parser-mixin) sax-parser)
(setf (slot-value handler 'sax-parser) sax-parser)))
;; internal for now ;; internal for now
(defgeneric dtd (handler dtd) (defgeneric dtd (handler dtd)
(:method ((handler t) dtd) (declare (ignore dtd)) nil)) (:method ((handler t) dtd) (declare (ignore dtd)) nil))

View File

@ -183,6 +183,8 @@
handler handler
(dtd nil) (dtd nil)
model-stack model-stack
;; xml:base machen wir fuer klacks mal gleich als expliziten stack:
base-stack
(referenced-notations '()) (referenced-notations '())
(id-table (%make-rod-hash-table)) (id-table (%make-rod-hash-table))
;; FIXME: Wofuer ist name-hashtable da? Will man das wissen? ;; FIXME: Wofuer ist name-hashtable da? Will man das wissen?
@ -659,6 +661,38 @@
stream stream
(format nil "End of file~@[: ~?~]" x args))) (format nil "End of file~@[: ~?~]" x args)))
(defclass cxml-parser (sax:sax-parser) ((ctx :initarg :ctx)))
(defun parser-xstream (parser)
(car (zstream-input-stack (main-zstream (slot-value parser 'ctx)))))
(defun parser-stream-name (parser)
(let ((xstream (parser-xstream parser)))
(if xstream
(xstream-name xstream)
nil)))
(defmethod sax:line-number ((parser cxml-parser))
(let ((x (parser-xstream parser)))
(if x
(xstream-line-number x)
nil)))
(defmethod sax:column-number ((parser cxml-parser))
(let ((x (parser-xstream parser)))
(if x
(xstream-column-number x)
nil)))
(defmethod sax:system-id ((parser cxml-parser))
(let ((name (parser-stream-name parser)))
(if name
(stream-name-uri name)
nil)))
(defmethod sax:xml-base ((parser cxml-parser))
(car (base-stack (slot-value parser 'ctx))))
(defvar *validate* t) (defvar *validate* t)
(defvar *external-subset-p* nil) (defvar *external-subset-p* nil)
@ -966,8 +1000,10 @@
(defun call-with-entity-expansion-as-stream (zstream cont name kind internalp) (defun call-with-entity-expansion-as-stream (zstream cont name kind internalp)
;; `zstream' is for error messages ;; `zstream' is for error messages
(let ((in (entity->xstream zstream name kind internalp))) (let ((in (entity->xstream zstream name kind internalp)))
(push (stream-name-uri (xstream-name in)) (base-stack *ctx*))
(unwind-protect (unwind-protect
(funcall cont in) (funcall cont in)
(pop (base-stack *ctx*))
(close-xstream in)))) (close-xstream in))))
(defun ensure-dtd () (defun ensure-dtd ()
@ -2570,13 +2606,18 @@
#+rune-is-integer #+rune-is-integer
(when recode (when recode
(setf handler (make-recoder handler #'rod-to-utf8-string))) (setf handler (make-recoder handler #'rod-to-utf8-string)))
(let ((*ctx* (let* ((xstream (car (zstream-input-stack input)))
(name (xstream-name xstream))
(base (when name (stream-name-uri name)))
(*ctx*
(make-context :handler handler (make-context :handler handler
:main-zstream input :main-zstream input
:entity-resolver entity-resolver :entity-resolver entity-resolver
:base-stack (list (or base ""))
:disallow-internal-subset disallow-internal-subset)) :disallow-internal-subset disallow-internal-subset))
(*validate* validate) (*validate* validate)
(*namespace-bindings* *initial-namespace-bindings*)) (*namespace-bindings* *initial-namespace-bindings*))
(sax:register-sax-parser handler (make-instance 'cxml-parser :ctx *ctx*))
(sax:start-document handler) (sax:start-document handler)
;; document ::= XMLDecl? Misc* (doctypedecl Misc*)? element Misc* ;; document ::= XMLDecl? Misc* (doctypedecl Misc*)? element Misc*
;; Misc ::= Comment | PI | S ;; Misc ::= Comment | PI | S
@ -2658,6 +2699,7 @@
(p/etag input qname)) (p/etag input qname))
(sax:end-element (handler *ctx*) uri lname qname) (sax:end-element (handler *ctx*) uri lname qname)
(undeclare-namespaces new-b) (undeclare-namespaces new-b)
(pop (base-stack *ctx*))
(validate-end-element *ctx* qname))) (validate-end-element *ctx* qname)))
(defun p/sztag (input) (defun p/sztag (input)
@ -2675,6 +2717,7 @@
(when sax:*namespace-processing* (when sax:*namespace-processing*
(setf new-namespaces (declare-namespaces attrs)) (setf new-namespaces (declare-namespaces attrs))
(mapc #'set-attribute-namespace attrs)) (mapc #'set-attribute-namespace attrs))
(push (compute-base attrs) (base-stack *ctx*))
(multiple-value-bind (uri prefix local-name) (multiple-value-bind (uri prefix local-name)
(if sax:*namespace-processing* (if sax:*namespace-processing*
(decode-qname name) (decode-qname name)
@ -2701,6 +2744,23 @@
(when (cdr sem2) (when (cdr sem2)
(wf-error input "no attributes allowed in end tag")))) (wf-error input "no attributes allowed in end tag"))))
;; copy&paste from cxml-rng
(defun escape-uri (string)
(with-output-to-string (out)
(loop for c across (cxml::rod-to-utf8-string string) do
(let ((code (char-code c)))
;; http://www.w3.org/TR/xlink/#link-locators
(if (or (>= code 127) (<= code 32) (find c "<>\"{}|\\^`"))
(format out "%~2,'0X" code)
(write-char c out))))))
(defun compute-base (attrs)
(let ((new (sax:find-attribute "xml:base" attrs))
(current (car (base-stack *ctx*))))
(if new
(puri:merge-uris (escape-uri (sax:attribute-value new)) current)
current)))
(defun process-characters (input sem) (defun process-characters (input sem)
(consume-token input) (consume-token input)
(when (search #"]]>" sem) (when (search #"]]>" sem)
@ -3317,6 +3377,7 @@
(return)))) (return))))
res)))) res))))
;; used only by read-att-value-2
(defun internal-entity-expansion (name) (defun internal-entity-expansion (name)
(let ((def (get-entity-definition name :general (dtd *ctx*)))) (let ((def (get-entity-definition name :general (dtd *ctx*))))
(unless def (unless def
@ -3326,6 +3387,7 @@
(or (entdef-expansion def) (or (entdef-expansion def)
(setf (entdef-expansion def) (find-internal-entity-expansion name))))) (setf (entdef-expansion def) (find-internal-entity-expansion name)))))
;; used only by read-att-value-2
(defun find-internal-entity-expansion (name) (defun find-internal-entity-expansion (name)
(let ((zinput (make-zstream))) (let ((zinput (make-zstream)))
(with-rune-collector-3 (collect) (with-rune-collector-3 (collect)
@ -3366,6 +3428,7 @@
(lambda (zinput) (lambda (zinput)
(muffle (car (zstream-input-stack zinput))))) )))) (muffle (car (zstream-input-stack zinput))))) ))))
;; callback for DOM
(defun resolve-entity (name handler dtd) (defun resolve-entity (name handler dtd)
(let ((*validate* nil)) (let ((*validate* nil))
(if (get-entity-definition name :general dtd) (if (get-entity-definition name :general dtd)