klacks parser
This commit is contained in:
16
cxml.asd
16
cxml.asd
@ -109,6 +109,18 @@
|
|||||||
(:file "dom-sax" :depends-on ("package")))
|
(:file "dom-sax" :depends-on ("package")))
|
||||||
:depends-on (:cxml-xml))
|
:depends-on (:cxml-xml))
|
||||||
|
|
||||||
|
(asdf:defsystem :cxml-klacks
|
||||||
|
:default-component-class closure-source-file
|
||||||
|
:pathname (merge-pathnames
|
||||||
|
"klacks/"
|
||||||
|
(make-pathname :name nil :type nil :defaults *load-truename*))
|
||||||
|
:serial t
|
||||||
|
:components
|
||||||
|
((:file "package")
|
||||||
|
(:file "klacks")
|
||||||
|
(:file "klacks-impl"))
|
||||||
|
:depends-on (:cxml-xml))
|
||||||
|
|
||||||
(asdf:defsystem :cxml-test
|
(asdf:defsystem :cxml-test
|
||||||
:default-component-class closure-source-file
|
:default-component-class closure-source-file
|
||||||
:pathname (merge-pathnames
|
:pathname (merge-pathnames
|
||||||
@ -117,4 +129,6 @@
|
|||||||
:components ((:file "domtest") (:file "xmlconf"))
|
:components ((:file "domtest") (:file "xmlconf"))
|
||||||
:depends-on (:cxml-xml :cxml-dom))
|
:depends-on (:cxml-xml :cxml-dom))
|
||||||
|
|
||||||
(asdf:defsystem :cxml :components () :depends-on (:cxml-dom :cxml-test))
|
(asdf:defsystem :cxml
|
||||||
|
:components ()
|
||||||
|
:depends-on (:cxml-dom :cxml-klacks :cxml-test))
|
||||||
|
|||||||
@ -38,7 +38,9 @@
|
|||||||
(push document (element-stack handler))))
|
(push document (element-stack handler))))
|
||||||
|
|
||||||
(defmethod sax:end-document ((handler dom-builder))
|
(defmethod sax:end-document ((handler dom-builder))
|
||||||
(setf (slot-value (document handler) 'dtd) (cxml::dtd cxml::*ctx*))
|
(setf (slot-value (document handler) 'dtd)
|
||||||
|
;; FIXME!
|
||||||
|
(and cxml::*ctx* (cxml::dtd cxml::*ctx*)))
|
||||||
(let ((doctype (dom:doctype (document handler))))
|
(let ((doctype (dom:doctype (document handler))))
|
||||||
(when doctype
|
(when doctype
|
||||||
(setf (slot-value (dom:entities doctype) 'read-only-p) t)
|
(setf (slot-value (dom:entities doctype) 'read-only-p) t)
|
||||||
|
|||||||
391
klacks/klacks-impl.lisp
Normal file
391
klacks/klacks-impl.lisp
Normal file
@ -0,0 +1,391 @@
|
|||||||
|
;;; -*- Mode: Lisp; readtable: runes; -*-
|
||||||
|
;;; (c) copyright 2007 David Lichteblau
|
||||||
|
|
||||||
|
;;; This library is free software; you can redistribute it and/or
|
||||||
|
;;; modify it under the terms of the GNU Library General Public
|
||||||
|
;;; License as published by the Free Software Foundation; either
|
||||||
|
;;; version 2 of the License, or (at your option) any later version.
|
||||||
|
;;;
|
||||||
|
;;; This library is distributed in the hope that it will be useful,
|
||||||
|
;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
;;; Library General Public License for more details.
|
||||||
|
;;;
|
||||||
|
;;; You should have received a copy of the GNU Library General Public
|
||||||
|
;;; License along with this library; if not, write to the
|
||||||
|
;;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
||||||
|
;;; Boston, MA 02111-1307 USA.
|
||||||
|
|
||||||
|
(in-package :cxml)
|
||||||
|
|
||||||
|
(defclass cxml-source (klacks:source)
|
||||||
|
(;; args to make-source
|
||||||
|
(context :initarg :context)
|
||||||
|
(validate :initarg :validate)
|
||||||
|
(root :initarg :root)
|
||||||
|
(dtd :initarg :dtd)
|
||||||
|
(error-culprit :initarg :error-culprit)
|
||||||
|
;; current state
|
||||||
|
(continuation)
|
||||||
|
(current-key :initform nil)
|
||||||
|
(current-values)
|
||||||
|
(current-attributes)
|
||||||
|
(cdata-section-p :reader klacks:current-cdata-section-p)
|
||||||
|
;; extra with-source magic
|
||||||
|
(data-behaviour :initform :DTD)
|
||||||
|
(namespace-stack :initform (list *initial-namespace-bindings*))
|
||||||
|
(temporary-streams :initform nil)
|
||||||
|
(scratch-pad :initarg :scratch-pad)
|
||||||
|
(scratch-pad-2 :initarg :scratch-pad-2)
|
||||||
|
(scratch-pad-3 :initarg :scratch-pad-3)
|
||||||
|
(scratch-pad-4 :initarg :scratch-pad-4)))
|
||||||
|
|
||||||
|
(defmethod klacks:close-source ((source cxml-source))
|
||||||
|
(dolist (xstream (slot-value source 'temporary-streams))
|
||||||
|
;; fixme: error handling?
|
||||||
|
(close-xstream xstream)))
|
||||||
|
|
||||||
|
(defmacro with-source ((source &rest slots) &body body)
|
||||||
|
(let ((s (gensym)))
|
||||||
|
`(let* ((,s ,source)
|
||||||
|
(*ctx* (slot-value ,s 'context))
|
||||||
|
(*validate* (slot-value ,s 'validate))
|
||||||
|
(*data-behaviour* (slot-value source 'data-behaviour))
|
||||||
|
(*namespace-bindings* (car (slot-value source 'namespace-stack)))
|
||||||
|
(*scratch-pad* (slot-value source 'scratch-pad))
|
||||||
|
(*scratch-pad-2* (slot-value source 'scratch-pad-2))
|
||||||
|
(*scratch-pad-3* (slot-value source 'scratch-pad-3))
|
||||||
|
(*scratch-pad-4* (slot-value source 'scratch-pad-4)))
|
||||||
|
(handler-case
|
||||||
|
(with-slots (,@slots) ,s
|
||||||
|
,@body)
|
||||||
|
(runes-encoding:encoding-error (c)
|
||||||
|
(wf-error (slot-value ,s 'error-culprit) "~A" c))))))
|
||||||
|
|
||||||
|
(defun fill-source (source)
|
||||||
|
(with-slots (current-key current-values continuation) source
|
||||||
|
(unless current-key
|
||||||
|
(setf current-key :bogus)
|
||||||
|
(setf continuation (funcall continuation))
|
||||||
|
(assert (not (eq current-key :bogus))))))
|
||||||
|
|
||||||
|
(defmethod klacks:peek ((source cxml-source))
|
||||||
|
(with-source (source current-key current-values)
|
||||||
|
(fill-source source)
|
||||||
|
(apply #'values current-key current-values)))
|
||||||
|
|
||||||
|
(defmethod klacks:peek-value ((source cxml-source))
|
||||||
|
(with-source (source current-key current-values)
|
||||||
|
(fill-source source)
|
||||||
|
(apply #'values current-values)))
|
||||||
|
|
||||||
|
(defmethod klacks:consume ((source cxml-source))
|
||||||
|
(with-source (source current-key current-values)
|
||||||
|
(fill-source source)
|
||||||
|
(multiple-value-prog1
|
||||||
|
(apply #'values current-key current-values)
|
||||||
|
(setf current-key nil))))
|
||||||
|
|
||||||
|
(defmethod klacks:map-attributes (fn (source cxml-source))
|
||||||
|
(dolist (a (slot-value source 'current-attributes))
|
||||||
|
(funcall fn
|
||||||
|
(sax:attribute-namespace-uri a)
|
||||||
|
(sax:attribute-local-name a)
|
||||||
|
(sax:attribute-qname a)
|
||||||
|
(sax:attribute-value a)
|
||||||
|
(sax:attribute-specified-p a))))
|
||||||
|
|
||||||
|
(defmethod klacks:list-attributes ((source cxml-source))
|
||||||
|
(slot-value source 'current-attributes))
|
||||||
|
|
||||||
|
(defun make-source
|
||||||
|
(input &rest args
|
||||||
|
&key validate dtd root entity-resolver disallow-internal-subset
|
||||||
|
pathname)
|
||||||
|
(declare (ignore validate dtd root entity-resolver disallow-internal-subset))
|
||||||
|
(etypecase input
|
||||||
|
(xstream
|
||||||
|
(let ((*ctx* nil))
|
||||||
|
(let ((zstream (make-zstream :input-stack (list input))))
|
||||||
|
(peek-rune input)
|
||||||
|
(with-scratch-pads ()
|
||||||
|
(apply #'%make-source
|
||||||
|
zstream
|
||||||
|
(loop
|
||||||
|
for (name value) on args by #'cddr
|
||||||
|
unless (eq name :pathname)
|
||||||
|
append (list name value)))))))
|
||||||
|
(stream
|
||||||
|
(let ((xstream (make-xstream input)))
|
||||||
|
(setf (xstream-name xstream)
|
||||||
|
(make-stream-name
|
||||||
|
:entity-name "main document"
|
||||||
|
:entity-kind :main
|
||||||
|
:uri (pathname-to-uri
|
||||||
|
(merge-pathnames (or pathname (pathname input))))))
|
||||||
|
(apply #'make-source xstream args)))
|
||||||
|
(pathname
|
||||||
|
(let* ((xstream
|
||||||
|
(make-xstream (open input :element-type '(unsigned-byte 8))))
|
||||||
|
(source (apply #'make-source
|
||||||
|
xstream
|
||||||
|
:pathname input
|
||||||
|
args)))
|
||||||
|
(push xstream (slot-value source 'temporary-streams))
|
||||||
|
source))
|
||||||
|
(rod
|
||||||
|
(let ((xstream (string->xstream input)))
|
||||||
|
(setf (xstream-name xstream)
|
||||||
|
(make-stream-name
|
||||||
|
:entity-name "main document"
|
||||||
|
:entity-kind :main
|
||||||
|
:uri nil))
|
||||||
|
(apply #'make-source xstream args)))))
|
||||||
|
|
||||||
|
(defun %make-source
|
||||||
|
(input &key validate dtd root entity-resolver disallow-internal-subset
|
||||||
|
error-culprit)
|
||||||
|
;; check types of user-supplied arguments for better error messages:
|
||||||
|
(check-type validate boolean)
|
||||||
|
(check-type dtd (or null extid))
|
||||||
|
(check-type root (or null rod))
|
||||||
|
(check-type entity-resolver (or null function symbol))
|
||||||
|
(check-type disallow-internal-subset boolean)
|
||||||
|
(let* ((context
|
||||||
|
(make-context :handler nil
|
||||||
|
:main-zstream input
|
||||||
|
:entity-resolver entity-resolver
|
||||||
|
:disallow-internal-subset disallow-internal-subset))
|
||||||
|
(source
|
||||||
|
(make-instance 'cxml-source
|
||||||
|
:context context
|
||||||
|
:validate validate
|
||||||
|
:dtd dtd
|
||||||
|
:root root
|
||||||
|
:error-culprit error-culprit
|
||||||
|
:scratch-pad *scratch-pad*
|
||||||
|
:scratch-pad-2 *scratch-pad-2*
|
||||||
|
:scratch-pad-3 *scratch-pad-3*
|
||||||
|
:scratch-pad-4 *scratch-pad-4*)))
|
||||||
|
(setf (slot-value source 'continuation)
|
||||||
|
(lambda () (klacks/xmldecl source input)))
|
||||||
|
source))
|
||||||
|
|
||||||
|
(defun klacks/xmldecl (source input)
|
||||||
|
(with-source (source current-key current-values)
|
||||||
|
(let ((hd (p/xmldecl input)))
|
||||||
|
(setf current-key :start-document)
|
||||||
|
(setf current-values
|
||||||
|
(when hd
|
||||||
|
(list (xml-header-version hd)
|
||||||
|
(xml-header-encoding hd)
|
||||||
|
(xml-header-standalone-p hd))))
|
||||||
|
(lambda ()
|
||||||
|
(klacks/misc*-2 source input
|
||||||
|
(lambda ()
|
||||||
|
(klacks/doctype source input)))))))
|
||||||
|
|
||||||
|
(defun klacks/misc*-2 (source input successor)
|
||||||
|
(with-source (source current-key current-values)
|
||||||
|
(multiple-value-bind (cat sem) (peek-token input)
|
||||||
|
(case cat
|
||||||
|
(:COMMENT
|
||||||
|
(setf current-key :comment)
|
||||||
|
(setf current-values (list sem))
|
||||||
|
(consume-token input)
|
||||||
|
(lambda () (klacks/misc*-2 source input successor)))
|
||||||
|
(:PI
|
||||||
|
(setf current-key :processing-instruction)
|
||||||
|
(setf current-values (list (car sem) (cdr sem)))
|
||||||
|
(consume-token input)
|
||||||
|
(lambda () (klacks/misc*-2 source input successor)))
|
||||||
|
(:S
|
||||||
|
(consume-token input)
|
||||||
|
(klacks/misc*-2 source input successor))
|
||||||
|
(t
|
||||||
|
(funcall successor))))))
|
||||||
|
|
||||||
|
(defun klacks/doctype (source input)
|
||||||
|
(with-source (source current-key current-values validate dtd)
|
||||||
|
(let ((cont (lambda () (klacks/finish-doctype source input)))
|
||||||
|
ignoreme name extid)
|
||||||
|
(prog1
|
||||||
|
(cond
|
||||||
|
((eq (peek-token input) :<!DOCTYPE)
|
||||||
|
(setf (values ignoreme name extid)
|
||||||
|
(p/doctype-decl input dtd))
|
||||||
|
(lambda () (klacks/misc*-2 source input cont)))
|
||||||
|
(dtd
|
||||||
|
(setf (values ignoreme name extid)
|
||||||
|
(synthesize-doctype dtd input))
|
||||||
|
cont)
|
||||||
|
((and validate (not dtd))
|
||||||
|
(validity-error "invalid document: no doctype"))
|
||||||
|
(t
|
||||||
|
(return-from klacks/doctype
|
||||||
|
(funcall cont))))
|
||||||
|
(setf current-key :dtd)
|
||||||
|
(setf current-values
|
||||||
|
(list name (extid-public extid) (extid-system extid)))))))
|
||||||
|
|
||||||
|
(defun klacks/finish-doctype (source input)
|
||||||
|
(with-source (source current-key current-values root data-behaviour)
|
||||||
|
(ensure-dtd)
|
||||||
|
(when root
|
||||||
|
(setf (model-stack *ctx*) (list (make-root-model root))))
|
||||||
|
(setf data-behaviour :DOC)
|
||||||
|
(setf *data-behaviour* :DOC)
|
||||||
|
(fix-seen-< input)
|
||||||
|
(let* ((final
|
||||||
|
(lambda ()
|
||||||
|
(klacks/eof source input)))
|
||||||
|
(next
|
||||||
|
(lambda ()
|
||||||
|
(setf data-behaviour :DTD)
|
||||||
|
(setf *data-behaviour* :DTD)
|
||||||
|
(klacks/misc*-2 source input final))))
|
||||||
|
(klacks/element source input next))))
|
||||||
|
|
||||||
|
(defun klacks/eof (source input)
|
||||||
|
(with-source (source current-key current-values)
|
||||||
|
(p/eof input)
|
||||||
|
(setf current-key :end-document)
|
||||||
|
(setf current-values nil)
|
||||||
|
(lambda () (klacks/nil source))))
|
||||||
|
|
||||||
|
(defun klacks/nil (source)
|
||||||
|
(with-source (source current-key current-values)
|
||||||
|
(setf current-key nil)
|
||||||
|
(setf current-values nil)
|
||||||
|
(labels ((klacks/done () #'klacks/done))
|
||||||
|
#'klacks/done)))
|
||||||
|
|
||||||
|
(defun klacks/element (source input cont)
|
||||||
|
(with-source (source current-key current-values current-attributes)
|
||||||
|
(multiple-value-bind (cat n-b new-b uri lname qname attrs) (p/sztag input)
|
||||||
|
(declare (ignore new-b))
|
||||||
|
(setf current-key :start-element)
|
||||||
|
(setf current-values (list uri lname qname))
|
||||||
|
(setf current-attributes attrs)
|
||||||
|
(if (eq cat :stag)
|
||||||
|
(lambda ()
|
||||||
|
(klacks/element-2 source input n-b cont))
|
||||||
|
(lambda ()
|
||||||
|
(klacks/ztag source cont))))))
|
||||||
|
|
||||||
|
(defun klacks/ztag (source cont)
|
||||||
|
(with-source (source current-key current-values current-attributes)
|
||||||
|
(setf current-key :end-element)
|
||||||
|
(setf current-attributes nil)
|
||||||
|
;; fixme: (undeclare-namespaces new-b)
|
||||||
|
(validate-end-element *ctx* (third current-values))
|
||||||
|
cont))
|
||||||
|
|
||||||
|
(defun klacks/element-2 (source input n-b cont)
|
||||||
|
(with-source (source
|
||||||
|
current-key current-values current-attributes namespace-stack)
|
||||||
|
(let ((values* current-values))
|
||||||
|
(setf current-attributes nil)
|
||||||
|
(push n-b namespace-stack)
|
||||||
|
(let ((finish
|
||||||
|
(lambda ()
|
||||||
|
(pop namespace-stack)
|
||||||
|
(klacks/element-3 source input values* cont))))
|
||||||
|
(klacks/content source input finish)))))
|
||||||
|
|
||||||
|
(defun klacks/element-3 (source input tag-values cont)
|
||||||
|
(with-source (source current-key current-values current-attributes)
|
||||||
|
(setf current-key :end-element)
|
||||||
|
(setf current-values tag-values)
|
||||||
|
(let ((qname (third tag-values)))
|
||||||
|
(p/etag input qname)
|
||||||
|
;; fixme: (undeclare-namespaces new-b)
|
||||||
|
(validate-end-element *ctx* qname))
|
||||||
|
cont))
|
||||||
|
|
||||||
|
(defun klacks/content (source input cont)
|
||||||
|
(with-source (source current-key current-values cdata-section-p)
|
||||||
|
(let ((recurse (lambda () (klacks/content source input cont))))
|
||||||
|
(multiple-value-bind (cat sem) (peek-token input)
|
||||||
|
(case cat
|
||||||
|
((:stag :ztag)
|
||||||
|
(klacks/element source input recurse))
|
||||||
|
((:CDATA)
|
||||||
|
(process-characters input sem)
|
||||||
|
(setf current-key :characters)
|
||||||
|
(setf current-values (list sem))
|
||||||
|
(setf cdata-section-p nil)
|
||||||
|
recurse)
|
||||||
|
((:ENTITY-REF)
|
||||||
|
(let ((name sem))
|
||||||
|
(consume-token input)
|
||||||
|
(klacks/entity-reference source input name recurse)))
|
||||||
|
((:<!\[)
|
||||||
|
(setf current-key :characters)
|
||||||
|
(setf current-values (list (process-cdata-section input sem)))
|
||||||
|
(setf cdata-section-p t)
|
||||||
|
recurse)
|
||||||
|
((:PI)
|
||||||
|
(setf current-key :processing-instruction)
|
||||||
|
(setf current-values (list (car sem) (cdr sem)))
|
||||||
|
(consume-token input)
|
||||||
|
recurse)
|
||||||
|
((:COMMENT)
|
||||||
|
(setf current-key :comment)
|
||||||
|
(setf current-values (list sem))
|
||||||
|
(consume-token input)
|
||||||
|
recurse)
|
||||||
|
(otherwise
|
||||||
|
(funcall cont)))))))
|
||||||
|
|
||||||
|
(defun klacks/entity-reference (source zstream name cont)
|
||||||
|
(assert (not (zstream-token-category zstream)))
|
||||||
|
(with-source (source temporary-streams)
|
||||||
|
(let ((new-xstream (entity->xstream zstream name :general nil)))
|
||||||
|
(push new-xstream temporary-streams)
|
||||||
|
(push :stop (zstream-input-stack zstream))
|
||||||
|
(zstream-push new-xstream zstream)
|
||||||
|
(let ((next
|
||||||
|
(lambda ()
|
||||||
|
(klacks/entity-reference-2 source zstream new-xstream cont))))
|
||||||
|
(etypecase (checked-get-entdef name :general)
|
||||||
|
(internal-entdef
|
||||||
|
(klacks/content source zstream next))
|
||||||
|
(external-entdef
|
||||||
|
(klacks/ext-parsed-ent source zstream next)))))))
|
||||||
|
|
||||||
|
(defun klacks/entity-reference-2 (source zstream new-xstream cont)
|
||||||
|
(with-source (source temporary-streams)
|
||||||
|
(unless (eq (peek-token zstream) :eof)
|
||||||
|
(wf-error zstream "Trailing garbage. - ~S" (peek-token zstream)))
|
||||||
|
(assert (eq (peek-token zstream) :eof))
|
||||||
|
(assert (eq (pop (zstream-input-stack zstream)) new-xstream))
|
||||||
|
(assert (eq (pop (zstream-input-stack zstream)) :stop))
|
||||||
|
(setf (zstream-token-category zstream) nil)
|
||||||
|
(setf temporary-streams (remove new-xstream temporary-streams))
|
||||||
|
(close-xstream new-xstream)
|
||||||
|
(funcall cont)))
|
||||||
|
|
||||||
|
(defun klacks/ext-parsed-ent (source input cont)
|
||||||
|
(with-source (source)
|
||||||
|
(when (eq (peek-token input) :xml-decl)
|
||||||
|
(let ((hd (parse-text-decl (cdr (nth-value 1 (peek-token input))))))
|
||||||
|
(setup-encoding input hd))
|
||||||
|
(consume-token input))
|
||||||
|
(set-full-speed input)
|
||||||
|
(klacks/content source input cont)))
|
||||||
|
|
||||||
|
#+(or)
|
||||||
|
(trace CXML::KLACKS/DOCTYPE
|
||||||
|
CXML::KLACKS/EXT-PARSED-ENT
|
||||||
|
CXML::KLACKS/MISC*-2
|
||||||
|
CXML::KLACKS/ENTITY-REFERENCE
|
||||||
|
CXML::KLACKS/ENTITY-REFERENCE-2
|
||||||
|
CXML::KLACKS/ELEMENT
|
||||||
|
CXML::KLACKS/ZTAG
|
||||||
|
CXML::KLACKS/XMLDECL
|
||||||
|
CXML::KLACKS/FINISH-DOCTYPE
|
||||||
|
CXML::KLACKS/ELEMENT-3
|
||||||
|
CXML::KLACKS/EOF
|
||||||
|
CXML::KLACKS/ELEMENT-2
|
||||||
|
CXML::KLACKS/CONTENT )
|
||||||
95
klacks/klacks.lisp
Normal file
95
klacks/klacks.lisp
Normal file
@ -0,0 +1,95 @@
|
|||||||
|
;;; -*- Mode: Lisp; readtable: runes; -*-
|
||||||
|
;;; (c) copyright 2007 David Lichteblau
|
||||||
|
|
||||||
|
;;; This library is free software; you can redistribute it and/or
|
||||||
|
;;; modify it under the terms of the GNU Library General Public
|
||||||
|
;;; License as published by the Free Software Foundation; either
|
||||||
|
;;; version 2 of the License, or (at your option) any later version.
|
||||||
|
;;;
|
||||||
|
;;; This library is distributed in the hope that it will be useful,
|
||||||
|
;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
;;; Library General Public License for more details.
|
||||||
|
;;;
|
||||||
|
;;; You should have received a copy of the GNU Library General Public
|
||||||
|
;;; License along with this library; if not, write to the
|
||||||
|
;;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
||||||
|
;;; Boston, MA 02111-1307 USA.
|
||||||
|
|
||||||
|
(in-package :cxml)
|
||||||
|
|
||||||
|
(defclass klacks:source () ())
|
||||||
|
|
||||||
|
(defgeneric klacks:close-source (source))
|
||||||
|
|
||||||
|
(defgeneric klacks:peek (source))
|
||||||
|
(defgeneric klacks:peek-value (source))
|
||||||
|
(defgeneric klacks:consume (source))
|
||||||
|
|
||||||
|
(defgeneric klacks:map-attributes (fn source))
|
||||||
|
(defgeneric klacks:list-attributes (source))
|
||||||
|
;;;(defgeneric klacks:current-uri (source))
|
||||||
|
;;;(defgeneric klacks:current-lname (source))
|
||||||
|
;;;(defgeneric klacks:current-qname (source))
|
||||||
|
;;;(defgeneric klacks:current-characters (source))
|
||||||
|
(defgeneric klacks:current-cdata-section-p (source))
|
||||||
|
|
||||||
|
(defmacro klacks:with-open-source ((var source) &body body)
|
||||||
|
`(let ((,var ,source))
|
||||||
|
(unwind-protect
|
||||||
|
(progn ,@body)
|
||||||
|
(klacks:close-source ,var))))
|
||||||
|
|
||||||
|
(defun klacks:current-uri (source)
|
||||||
|
(multiple-value-bind (key uri lname qname) (klacks:peek source)
|
||||||
|
(declare (ignore lname qname))
|
||||||
|
(check-type key (member :start-element :end-element))
|
||||||
|
uri))
|
||||||
|
|
||||||
|
(defun klacks:current-lname (source)
|
||||||
|
(multiple-value-bind (key uri lname qname) (klacks:peek source)
|
||||||
|
(declare (ignore uri qname))
|
||||||
|
(check-type key (member :start-element :end-element))
|
||||||
|
lname))
|
||||||
|
|
||||||
|
(defun klacks:current-qname (source)
|
||||||
|
(multiple-value-bind (key uri lname qname) (klacks:peek source)
|
||||||
|
(declare (ignore uri lname))
|
||||||
|
(check-type key (member :start-element :end-element))
|
||||||
|
qname))
|
||||||
|
|
||||||
|
(defun klacks:current-characters (source)
|
||||||
|
(multiple-value-bind (key characters) (klacks:peek source)
|
||||||
|
(check-type key (member :characters))
|
||||||
|
characters))
|
||||||
|
|
||||||
|
(defun klacks:serialize-source (source handler)
|
||||||
|
(loop
|
||||||
|
(multiple-value-bind (key a b c) (klacks:peek source)
|
||||||
|
(case key
|
||||||
|
(:start-document
|
||||||
|
(sax:start-document handler))
|
||||||
|
(:characters
|
||||||
|
(cond
|
||||||
|
((klacks:current-cdata-section-p source)
|
||||||
|
(sax:start-cdata source)
|
||||||
|
(sax:characters handler a)
|
||||||
|
(sax:end-cdata source))
|
||||||
|
(T
|
||||||
|
(sax:characters handler a))))
|
||||||
|
(:processing-instruction
|
||||||
|
(sax:processing-instruction handler a b))
|
||||||
|
(:comment
|
||||||
|
(sax:comment handler a))
|
||||||
|
(:dtd
|
||||||
|
(sax:start-dtd handler a b c)
|
||||||
|
(sax:end-dtd handler))
|
||||||
|
(:start-element
|
||||||
|
(sax:start-element handler a b c (klacks:list-attributes source)))
|
||||||
|
(:end-element
|
||||||
|
(sax:end-element handler a b c))
|
||||||
|
(:end-document
|
||||||
|
(return (sax:end-document handler)))
|
||||||
|
(t
|
||||||
|
(error "unexpected klacks key: ~A" key)))
|
||||||
|
(klacks:consume source))))
|
||||||
38
klacks/package.lisp
Normal file
38
klacks/package.lisp
Normal file
@ -0,0 +1,38 @@
|
|||||||
|
;;; -*- Mode: Lisp; readtable: runes; -*-
|
||||||
|
;;; (c) copyright 2007 David Lichteblau
|
||||||
|
|
||||||
|
;;; This library is free software; you can redistribute it and/or
|
||||||
|
;;; modify it under the terms of the GNU Library General Public
|
||||||
|
;;; License as published by the Free Software Foundation; either
|
||||||
|
;;; version 2 of the License, or (at your option) any later version.
|
||||||
|
;;;
|
||||||
|
;;; This library is distributed in the hope that it will be useful,
|
||||||
|
;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
;;; Library General Public License for more details.
|
||||||
|
;;;
|
||||||
|
;;; You should have received a copy of the GNU Library General Public
|
||||||
|
;;; License along with this library; if not, write to the
|
||||||
|
;;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
||||||
|
;;; Boston, MA 02111-1307 USA.
|
||||||
|
|
||||||
|
(defpackage klacks
|
||||||
|
(:use)
|
||||||
|
(:export #:source
|
||||||
|
#:close-source
|
||||||
|
#:with-open-source
|
||||||
|
|
||||||
|
#:peek
|
||||||
|
#:peek-value
|
||||||
|
|
||||||
|
#:map-attributes
|
||||||
|
#:list-attributes
|
||||||
|
#:current-uri
|
||||||
|
#:current-lname
|
||||||
|
#:current-qname
|
||||||
|
#:current-characters
|
||||||
|
#:current-cdata-section-p
|
||||||
|
|
||||||
|
#:consume
|
||||||
|
|
||||||
|
#:serialize-source))
|
||||||
@ -83,4 +83,6 @@
|
|||||||
#:make-namespace-normalizer
|
#:make-namespace-normalizer
|
||||||
#:make-whitespace-normalizer
|
#:make-whitespace-normalizer
|
||||||
#:rod-to-utf8-string
|
#:rod-to-utf8-string
|
||||||
#:utf8-string-to-rod))
|
#:utf8-string-to-rod
|
||||||
|
|
||||||
|
#:make-source))
|
||||||
|
|||||||
@ -68,11 +68,11 @@
|
|||||||
;; :stag (<name> . <atts>) ;start tag
|
;; :stag (<name> . <atts>) ;start tag
|
||||||
;; :etag (<name> . <atts>) ;end tag
|
;; :etag (<name> . <atts>) ;end tag
|
||||||
;; :ztag (<name> . <atts>) ;empty tag
|
;; :ztag (<name> . <atts>) ;empty tag
|
||||||
;; :<!element
|
;; :<!ELEMENT
|
||||||
;; :<!entity
|
;; :<!ENTITY
|
||||||
;; :<!attlist
|
;; :<!ATTLIST
|
||||||
;; :<!notation
|
;; :<!NOTATION
|
||||||
;; :<!doctype
|
;; :<!DOCTYPE
|
||||||
;; :<![
|
;; :<![
|
||||||
;; :comment <content>
|
;; :comment <content>
|
||||||
|
|
||||||
@ -194,11 +194,13 @@
|
|||||||
|
|
||||||
(defvar *expand-pe-p* nil)
|
(defvar *expand-pe-p* nil)
|
||||||
|
|
||||||
(defparameter *namespace-bindings*
|
(defparameter *initial-namespace-bindings*
|
||||||
'((#"" . nil)
|
'((#"" . nil)
|
||||||
(#"xmlns" . #"http://www.w3.org/2000/xmlns/")
|
(#"xmlns" . #"http://www.w3.org/2000/xmlns/")
|
||||||
(#"xml" . #"http://www.w3.org/XML/1998/namespace")))
|
(#"xml" . #"http://www.w3.org/XML/1998/namespace")))
|
||||||
|
|
||||||
|
(defparameter *namespace-bindings* *initial-namespace-bindings*)
|
||||||
|
|
||||||
;;;; ---------------------------------------------------------------------------
|
;;;; ---------------------------------------------------------------------------
|
||||||
;;;; xstreams
|
;;;; xstreams
|
||||||
;;;;
|
;;;;
|
||||||
@ -2571,22 +2573,16 @@
|
|||||||
:main-zstream input
|
:main-zstream input
|
||||||
:entity-resolver entity-resolver
|
:entity-resolver entity-resolver
|
||||||
:disallow-internal-subset disallow-internal-subset))
|
:disallow-internal-subset disallow-internal-subset))
|
||||||
(*validate* validate))
|
(*validate* validate)
|
||||||
|
(*namespace-bindings* *initial-namespace-bindings*))
|
||||||
(sax:start-document handler)
|
(sax:start-document handler)
|
||||||
;; document ::= XMLDecl? Misc* (doctypedecl Misc*)? element Misc*
|
;; document ::= XMLDecl? Misc* (doctypedecl Misc*)? element Misc*
|
||||||
;; Misc ::= Comment | PI | S
|
;; Misc ::= Comment | PI | S
|
||||||
;; xmldecl::='<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
|
;; xmldecl::='<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
|
||||||
;; sddecl::= S 'standalone' Eq (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no') '"'))
|
;; sddecl::= S 'standalone' Eq (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no') '"'))
|
||||||
;;
|
|
||||||
;; we will use the attribute-value parser for the xml decl.
|
|
||||||
(let ((*data-behaviour* :DTD))
|
(let ((*data-behaviour* :DTD))
|
||||||
;; optional XMLDecl?
|
;; optional XMLDecl?
|
||||||
(cond ((eq (peek-token input) :xml-decl)
|
(p/xmldecl input)
|
||||||
(let ((hd (parse-xml-decl (cdr (nth-value 1 (peek-token input))))))
|
|
||||||
(setf (standalone-p *ctx*) (eq (xml-header-standalone-p hd) :yes))
|
|
||||||
(setup-encoding input hd))
|
|
||||||
(read-token input)))
|
|
||||||
(set-full-speed input)
|
|
||||||
;; Misc*
|
;; Misc*
|
||||||
(p/misc*-2 input)
|
(p/misc*-2 input)
|
||||||
;; (doctypedecl Misc*)?
|
;; (doctypedecl Misc*)?
|
||||||
@ -2595,13 +2591,7 @@
|
|||||||
(p/doctype-decl input dtd)
|
(p/doctype-decl input dtd)
|
||||||
(p/misc*-2 input))
|
(p/misc*-2 input))
|
||||||
(dtd
|
(dtd
|
||||||
(let ((dummy (string->xstream "<!DOCTYPE dummy>")))
|
(synthesize-doctype dtd input))
|
||||||
(setf (xstream-name dummy)
|
|
||||||
(make-stream-name
|
|
||||||
:entity-name "dummy doctype"
|
|
||||||
:entity-kind :main
|
|
||||||
:uri (zstream-base-sysid input)))
|
|
||||||
(p/doctype-decl (make-zstream :input-stack (list dummy)) dtd)))
|
|
||||||
((and validate (not dtd))
|
((and validate (not dtd))
|
||||||
(validity-error "invalid document: no doctype")))
|
(validity-error "invalid document: no doctype")))
|
||||||
(ensure-dtd)
|
(ensure-dtd)
|
||||||
@ -2610,28 +2600,65 @@
|
|||||||
(setf (model-stack *ctx*) (list (make-root-model root))))
|
(setf (model-stack *ctx*) (list (make-root-model root))))
|
||||||
;; element
|
;; element
|
||||||
(let ((*data-behaviour* :DOC))
|
(let ((*data-behaviour* :DOC))
|
||||||
(when (eq (peek-token input) :seen-<)
|
(fix-seen-< input)
|
||||||
(multiple-value-bind (c s)
|
|
||||||
(read-token-after-|<| input (car (zstream-input-stack input)))
|
|
||||||
(setf (zstream-token-category input) c
|
|
||||||
(zstream-token-semantic input) s)))
|
|
||||||
(p/element input))
|
(p/element input))
|
||||||
;; optional Misc*
|
;; optional Misc*
|
||||||
(p/misc*-2 input)
|
(p/misc*-2 input)
|
||||||
(unless (eq (peek-token input) :eof)
|
(p/eof input)
|
||||||
(wf-error input "Garbage at end of document."))
|
|
||||||
(when *validate*
|
|
||||||
(maphash (lambda (k v)
|
|
||||||
(unless v
|
|
||||||
(validity-error "(11) IDREF: ~S not defined" (rod-string k))))
|
|
||||||
(id-table *ctx*))
|
|
||||||
|
|
||||||
(dolist (name (referenced-notations *ctx*))
|
|
||||||
(unless (find-notation name (dtd *ctx*))
|
|
||||||
(validity-error "(23) Notation Declared: ~S" (rod-string name)))))
|
|
||||||
(sax:end-document handler))))
|
(sax:end-document handler))))
|
||||||
|
|
||||||
|
(defun synthesize-doctype (dtd input)
|
||||||
|
(let ((dummy (string->xstream "<!DOCTYPE dummy>")))
|
||||||
|
(setf (xstream-name dummy)
|
||||||
|
(make-stream-name
|
||||||
|
:entity-name "dummy doctype"
|
||||||
|
:entity-kind :main
|
||||||
|
:uri (zstream-base-sysid input)))
|
||||||
|
(p/doctype-decl (make-zstream :input-stack (list dummy)) dtd)))
|
||||||
|
|
||||||
|
(defun fix-seen-< (input)
|
||||||
|
(when (eq (peek-token input) :seen-<)
|
||||||
|
(multiple-value-bind (c s)
|
||||||
|
(read-token-after-|<| input (car (zstream-input-stack input)))
|
||||||
|
(setf (zstream-token-category input) c
|
||||||
|
(zstream-token-semantic input) s))))
|
||||||
|
|
||||||
|
(defun p/xmldecl (input)
|
||||||
|
;; we will use the attribute-value parser for the xml decl.
|
||||||
|
(prog1
|
||||||
|
(when (eq (peek-token input) :xml-decl)
|
||||||
|
(let ((hd (parse-xml-decl (cdr (nth-value 1 (peek-token input))))))
|
||||||
|
(setf (standalone-p *ctx*) (eq (xml-header-standalone-p hd) :yes))
|
||||||
|
(setup-encoding input hd)
|
||||||
|
(read-token input)
|
||||||
|
hd))
|
||||||
|
(set-full-speed input)))
|
||||||
|
|
||||||
|
(defun p/eof (input)
|
||||||
|
(unless (eq (peek-token input) :eof)
|
||||||
|
(wf-error input "Garbage at end of document."))
|
||||||
|
(when *validate*
|
||||||
|
(maphash (lambda (k v)
|
||||||
|
(unless v
|
||||||
|
(validity-error "(11) IDREF: ~S not defined" (rod-string k))))
|
||||||
|
(id-table *ctx*))
|
||||||
|
|
||||||
|
(dolist (name (referenced-notations *ctx*))
|
||||||
|
(unless (find-notation name (dtd *ctx*))
|
||||||
|
(validity-error "(23) Notation Declared: ~S" (rod-string name))))))
|
||||||
|
|
||||||
(defun p/element (input)
|
(defun p/element (input)
|
||||||
|
(multiple-value-bind (cat n-b new-b uri lname qname attrs) (p/sztag input)
|
||||||
|
(sax:start-element (handler *ctx*) uri lname qname attrs)
|
||||||
|
(when (eq cat :stag)
|
||||||
|
(let ((*namespace-bindings* n-b))
|
||||||
|
(p/content input))
|
||||||
|
(p/etag input qname))
|
||||||
|
(sax:end-element (handler *ctx*) uri lname qname)
|
||||||
|
(undeclare-namespaces new-b)
|
||||||
|
(validate-end-element *ctx* qname)))
|
||||||
|
|
||||||
|
(defun p/sztag (input)
|
||||||
(multiple-value-bind (cat sem) (read-token input)
|
(multiple-value-bind (cat sem) (read-token input)
|
||||||
(case cat
|
(case cat
|
||||||
((:stag :ztag))
|
((:stag :ztag))
|
||||||
@ -2657,28 +2684,39 @@
|
|||||||
(setf attrs
|
(setf attrs
|
||||||
(remove-if (compose #'xmlns-attr-p #'sax:attribute-qname)
|
(remove-if (compose #'xmlns-attr-p #'sax:attribute-qname)
|
||||||
attrs)))
|
attrs)))
|
||||||
(cond
|
(values cat
|
||||||
((eq cat :ztag)
|
*namespace-bindings*
|
||||||
(sax:start-element (handler *ctx*) uri local-name name attrs)
|
new-namespaces
|
||||||
(sax:end-element (handler *ctx*) uri local-name name))
|
uri local-name name attrs))))))
|
||||||
|
|
||||||
((eq cat :stag)
|
(defun p/etag (input qname)
|
||||||
(sax:start-element (handler *ctx*) uri local-name name attrs)
|
(multiple-value-bind (cat2 sem2) (read-token input)
|
||||||
(p/content input)
|
(unless (and (eq cat2 :etag)
|
||||||
(multiple-value-bind (cat2 sem2) (read-token input)
|
(eq (car sem2) qname))
|
||||||
(unless (and (eq cat2 :etag)
|
(wf-error input "Bad nesting. ~S / ~S"
|
||||||
(eq (car sem2) name))
|
(mu qname)
|
||||||
(wf-error input "Bad nesting. ~S / ~S"
|
(mu (cons cat2 sem2))))
|
||||||
(mu name)
|
(when (cdr sem2)
|
||||||
(mu (cons cat2 sem2))))
|
(wf-error input "no attributes allowed in end tag"))))
|
||||||
(when (cdr sem2)
|
|
||||||
(wf-error input "no attributes allowed in end tag")))
|
(defun process-characters (input sem)
|
||||||
(sax:end-element (handler *ctx*) uri local-name name))
|
(consume-token input)
|
||||||
|
(when (search #"]]>" sem)
|
||||||
(t
|
(wf-error input "']]>' not allowed in CharData"))
|
||||||
(wf-error input "Expecting element, got ~S." cat))))
|
(validate-characters *ctx* sem))
|
||||||
(undeclare-namespaces new-namespaces))
|
|
||||||
(validate-end-element *ctx* name))))
|
(defun process-cdata-section (input)
|
||||||
|
(consume-token input)
|
||||||
|
(let ((input (car (zstream-input-stack input))))
|
||||||
|
(unless (and (rune= #/C (read-rune input))
|
||||||
|
(rune= #/D (read-rune input))
|
||||||
|
(rune= #/A (read-rune input))
|
||||||
|
(rune= #/T (read-rune input))
|
||||||
|
(rune= #/A (read-rune input))
|
||||||
|
(rune= #/\[ (read-rune input)))
|
||||||
|
(wf-error input "After '<![', 'CDATA[' is expected."))
|
||||||
|
(validate-characters *ctx* #"hack") ;anything other than whitespace
|
||||||
|
(read-cdata-sect input)))
|
||||||
|
|
||||||
(defun p/content (input)
|
(defun p/content (input)
|
||||||
;; [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
|
;; [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
|
||||||
@ -2688,10 +2726,7 @@
|
|||||||
(p/element input)
|
(p/element input)
|
||||||
(p/content input))
|
(p/content input))
|
||||||
((:CDATA)
|
((:CDATA)
|
||||||
(consume-token input)
|
(process-characters input sem)
|
||||||
(when (search #"]]>" sem)
|
|
||||||
(wf-error input "']]>' not allowed in CharData"))
|
|
||||||
(validate-characters *ctx* sem)
|
|
||||||
(sax:characters (handler *ctx*) sem)
|
(sax:characters (handler *ctx*) sem)
|
||||||
(p/content input))
|
(p/content input))
|
||||||
((:ENTITY-REF)
|
((:ENTITY-REF)
|
||||||
@ -2709,21 +2744,11 @@
|
|||||||
(peek-token input))))))
|
(peek-token input))))))
|
||||||
(p/content input))))
|
(p/content input))))
|
||||||
((:<!\[)
|
((:<!\[)
|
||||||
(consume-token input)
|
(let ((data (process-cdata-section input)))
|
||||||
(cons
|
(sax:start-cdata (handler *ctx*))
|
||||||
(let ((input (car (zstream-input-stack input))))
|
(sax:characters (handler *ctx*) data)
|
||||||
(unless (and (rune= #/C (read-rune input))
|
(sax:end-cdata (handler *ctx*)))
|
||||||
(rune= #/D (read-rune input))
|
(p/content input))
|
||||||
(rune= #/A (read-rune input))
|
|
||||||
(rune= #/T (read-rune input))
|
|
||||||
(rune= #/A (read-rune input))
|
|
||||||
(rune= #/\[ (read-rune input)))
|
|
||||||
(wf-error input "After '<![', 'CDATA[' is expected."))
|
|
||||||
(validate-characters *ctx* #"hack") ;anything other than whitespace
|
|
||||||
(sax:start-cdata (handler *ctx*))
|
|
||||||
(sax:characters (handler *ctx*) (read-cdata-sect input))
|
|
||||||
(sax:end-cdata (handler *ctx*)))
|
|
||||||
(p/content input)))
|
|
||||||
((:PI)
|
((:PI)
|
||||||
(consume-token input)
|
(consume-token input)
|
||||||
(sax:processing-instruction (handler *ctx*) (car sem) (cdr sem))
|
(sax:processing-instruction (handler *ctx*) (car sem) (cdr sem))
|
||||||
|
|||||||
Reference in New Issue
Block a user