diff --git a/cxml.asd b/cxml.asd index 03845dc..bfcd2b8 100644 --- a/cxml.asd +++ b/cxml.asd @@ -109,6 +109,18 @@ (:file "dom-sax" :depends-on ("package"))) :depends-on (:cxml-xml)) +(asdf:defsystem :cxml-klacks + :default-component-class closure-source-file + :pathname (merge-pathnames + "klacks/" + (make-pathname :name nil :type nil :defaults *load-truename*)) + :serial t + :components + ((:file "package") + (:file "klacks") + (:file "klacks-impl")) + :depends-on (:cxml-xml)) + (asdf:defsystem :cxml-test :default-component-class closure-source-file :pathname (merge-pathnames @@ -117,4 +129,6 @@ :components ((:file "domtest") (:file "xmlconf")) :depends-on (:cxml-xml :cxml-dom)) -(asdf:defsystem :cxml :components () :depends-on (:cxml-dom :cxml-test)) +(asdf:defsystem :cxml + :components () + :depends-on (:cxml-dom :cxml-klacks :cxml-test)) diff --git a/dom/dom-builder.lisp b/dom/dom-builder.lisp index 30cec4f..eed2048 100644 --- a/dom/dom-builder.lisp +++ b/dom/dom-builder.lisp @@ -38,7 +38,9 @@ (push document (element-stack handler)))) (defmethod sax:end-document ((handler dom-builder)) - (setf (slot-value (document handler) 'dtd) (cxml::dtd cxml::*ctx*)) + (setf (slot-value (document handler) 'dtd) + ;; FIXME! + (and cxml::*ctx* (cxml::dtd cxml::*ctx*))) (let ((doctype (dom:doctype (document handler)))) (when doctype (setf (slot-value (dom:entities doctype) 'read-only-p) t) diff --git a/klacks/klacks-impl.lisp b/klacks/klacks-impl.lisp new file mode 100644 index 0000000..125ca3b --- /dev/null +++ b/klacks/klacks-impl.lisp @@ -0,0 +1,391 @@ +;;; -*- Mode: Lisp; readtable: runes; -*- +;;; (c) copyright 2007 David Lichteblau + +;;; This library is free software; you can redistribute it and/or +;;; modify it under the terms of the GNU Library General Public +;;; License as published by the Free Software Foundation; either +;;; version 2 of the License, or (at your option) any later version. +;;; +;;; This library is distributed in the hope that it will be useful, +;;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;;; Library General Public License for more details. +;;; +;;; You should have received a copy of the GNU Library General Public +;;; License along with this library; if not, write to the +;;; Free Software Foundation, Inc., 59 Temple Place - Suite 330, +;;; Boston, MA 02111-1307 USA. + +(in-package :cxml) + +(defclass cxml-source (klacks:source) + (;; args to make-source + (context :initarg :context) + (validate :initarg :validate) + (root :initarg :root) + (dtd :initarg :dtd) + (error-culprit :initarg :error-culprit) + ;; current state + (continuation) + (current-key :initform nil) + (current-values) + (current-attributes) + (cdata-section-p :reader klacks:current-cdata-section-p) + ;; extra with-source magic + (data-behaviour :initform :DTD) + (namespace-stack :initform (list *initial-namespace-bindings*)) + (temporary-streams :initform nil) + (scratch-pad :initarg :scratch-pad) + (scratch-pad-2 :initarg :scratch-pad-2) + (scratch-pad-3 :initarg :scratch-pad-3) + (scratch-pad-4 :initarg :scratch-pad-4))) + +(defmethod klacks:close-source ((source cxml-source)) + (dolist (xstream (slot-value source 'temporary-streams)) + ;; fixme: error handling? + (close-xstream xstream))) + +(defmacro with-source ((source &rest slots) &body body) + (let ((s (gensym))) + `(let* ((,s ,source) + (*ctx* (slot-value ,s 'context)) + (*validate* (slot-value ,s 'validate)) + (*data-behaviour* (slot-value source 'data-behaviour)) + (*namespace-bindings* (car (slot-value source 'namespace-stack))) + (*scratch-pad* (slot-value source 'scratch-pad)) + (*scratch-pad-2* (slot-value source 'scratch-pad-2)) + (*scratch-pad-3* (slot-value source 'scratch-pad-3)) + (*scratch-pad-4* (slot-value source 'scratch-pad-4))) + (handler-case + (with-slots (,@slots) ,s + ,@body) + (runes-encoding:encoding-error (c) + (wf-error (slot-value ,s 'error-culprit) "~A" c)))))) + +(defun fill-source (source) + (with-slots (current-key current-values continuation) source + (unless current-key + (setf current-key :bogus) + (setf continuation (funcall continuation)) + (assert (not (eq current-key :bogus)))))) + +(defmethod klacks:peek ((source cxml-source)) + (with-source (source current-key current-values) + (fill-source source) + (apply #'values current-key current-values))) + +(defmethod klacks:peek-value ((source cxml-source)) + (with-source (source current-key current-values) + (fill-source source) + (apply #'values current-values))) + +(defmethod klacks:consume ((source cxml-source)) + (with-source (source current-key current-values) + (fill-source source) + (multiple-value-prog1 + (apply #'values current-key current-values) + (setf current-key nil)))) + +(defmethod klacks:map-attributes (fn (source cxml-source)) + (dolist (a (slot-value source 'current-attributes)) + (funcall fn + (sax:attribute-namespace-uri a) + (sax:attribute-local-name a) + (sax:attribute-qname a) + (sax:attribute-value a) + (sax:attribute-specified-p a)))) + +(defmethod klacks:list-attributes ((source cxml-source)) + (slot-value source 'current-attributes)) + +(defun make-source + (input &rest args + &key validate dtd root entity-resolver disallow-internal-subset + pathname) + (declare (ignore validate dtd root entity-resolver disallow-internal-subset)) + (etypecase input + (xstream + (let ((*ctx* nil)) + (let ((zstream (make-zstream :input-stack (list input)))) + (peek-rune input) + (with-scratch-pads () + (apply #'%make-source + zstream + (loop + for (name value) on args by #'cddr + unless (eq name :pathname) + append (list name value))))))) + (stream + (let ((xstream (make-xstream input))) + (setf (xstream-name xstream) + (make-stream-name + :entity-name "main document" + :entity-kind :main + :uri (pathname-to-uri + (merge-pathnames (or pathname (pathname input)))))) + (apply #'make-source xstream args))) + (pathname + (let* ((xstream + (make-xstream (open input :element-type '(unsigned-byte 8)))) + (source (apply #'make-source + xstream + :pathname input + args))) + (push xstream (slot-value source 'temporary-streams)) + source)) + (rod + (let ((xstream (string->xstream input))) + (setf (xstream-name xstream) + (make-stream-name + :entity-name "main document" + :entity-kind :main + :uri nil)) + (apply #'make-source xstream args))))) + +(defun %make-source + (input &key validate dtd root entity-resolver disallow-internal-subset + error-culprit) + ;; check types of user-supplied arguments for better error messages: + (check-type validate boolean) + (check-type dtd (or null extid)) + (check-type root (or null rod)) + (check-type entity-resolver (or null function symbol)) + (check-type disallow-internal-subset boolean) + (let* ((context + (make-context :handler nil + :main-zstream input + :entity-resolver entity-resolver + :disallow-internal-subset disallow-internal-subset)) + (source + (make-instance 'cxml-source + :context context + :validate validate + :dtd dtd + :root root + :error-culprit error-culprit + :scratch-pad *scratch-pad* + :scratch-pad-2 *scratch-pad-2* + :scratch-pad-3 *scratch-pad-3* + :scratch-pad-4 *scratch-pad-4*))) + (setf (slot-value source 'continuation) + (lambda () (klacks/xmldecl source input))) + source)) + +(defun klacks/xmldecl (source input) + (with-source (source current-key current-values) + (let ((hd (p/xmldecl input))) + (setf current-key :start-document) + (setf current-values + (when hd + (list (xml-header-version hd) + (xml-header-encoding hd) + (xml-header-standalone-p hd)))) + (lambda () + (klacks/misc*-2 source input + (lambda () + (klacks/doctype source input))))))) + +(defun klacks/misc*-2 (source input successor) + (with-source (source current-key current-values) + (multiple-value-bind (cat sem) (peek-token input) + (case cat + (:COMMENT + (setf current-key :comment) + (setf current-values (list sem)) + (consume-token input) + (lambda () (klacks/misc*-2 source input successor))) + (:PI + (setf current-key :processing-instruction) + (setf current-values (list (car sem) (cdr sem))) + (consume-token input) + (lambda () (klacks/misc*-2 source input successor))) + (:S + (consume-token input) + (klacks/misc*-2 source input successor)) + (t + (funcall successor)))))) + +(defun klacks/doctype (source input) + (with-source (source current-key current-values validate dtd) + (let ((cont (lambda () (klacks/finish-doctype source input))) + ignoreme name extid) + (prog1 + (cond + ((eq (peek-token input) :xstream zstream name :general nil))) + (push new-xstream temporary-streams) + (push :stop (zstream-input-stack zstream)) + (zstream-push new-xstream zstream) + (let ((next + (lambda () + (klacks/entity-reference-2 source zstream new-xstream cont)))) + (etypecase (checked-get-entdef name :general) + (internal-entdef + (klacks/content source zstream next)) + (external-entdef + (klacks/ext-parsed-ent source zstream next))))))) + +(defun klacks/entity-reference-2 (source zstream new-xstream cont) + (with-source (source temporary-streams) + (unless (eq (peek-token zstream) :eof) + (wf-error zstream "Trailing garbage. - ~S" (peek-token zstream))) + (assert (eq (peek-token zstream) :eof)) + (assert (eq (pop (zstream-input-stack zstream)) new-xstream)) + (assert (eq (pop (zstream-input-stack zstream)) :stop)) + (setf (zstream-token-category zstream) nil) + (setf temporary-streams (remove new-xstream temporary-streams)) + (close-xstream new-xstream) + (funcall cont))) + +(defun klacks/ext-parsed-ent (source input cont) + (with-source (source) + (when (eq (peek-token input) :xml-decl) + (let ((hd (parse-text-decl (cdr (nth-value 1 (peek-token input)))))) + (setup-encoding input hd)) + (consume-token input)) + (set-full-speed input) + (klacks/content source input cont))) + +#+(or) +(trace CXML::KLACKS/DOCTYPE + CXML::KLACKS/EXT-PARSED-ENT + CXML::KLACKS/MISC*-2 + CXML::KLACKS/ENTITY-REFERENCE + CXML::KLACKS/ENTITY-REFERENCE-2 + CXML::KLACKS/ELEMENT + CXML::KLACKS/ZTAG + CXML::KLACKS/XMLDECL + CXML::KLACKS/FINISH-DOCTYPE + CXML::KLACKS/ELEMENT-3 + CXML::KLACKS/EOF + CXML::KLACKS/ELEMENT-2 + CXML::KLACKS/CONTENT ) diff --git a/klacks/klacks.lisp b/klacks/klacks.lisp new file mode 100644 index 0000000..88c78f5 --- /dev/null +++ b/klacks/klacks.lisp @@ -0,0 +1,95 @@ +;;; -*- Mode: Lisp; readtable: runes; -*- +;;; (c) copyright 2007 David Lichteblau + +;;; This library is free software; you can redistribute it and/or +;;; modify it under the terms of the GNU Library General Public +;;; License as published by the Free Software Foundation; either +;;; version 2 of the License, or (at your option) any later version. +;;; +;;; This library is distributed in the hope that it will be useful, +;;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;;; Library General Public License for more details. +;;; +;;; You should have received a copy of the GNU Library General Public +;;; License along with this library; if not, write to the +;;; Free Software Foundation, Inc., 59 Temple Place - Suite 330, +;;; Boston, MA 02111-1307 USA. + +(in-package :cxml) + +(defclass klacks:source () ()) + +(defgeneric klacks:close-source (source)) + +(defgeneric klacks:peek (source)) +(defgeneric klacks:peek-value (source)) +(defgeneric klacks:consume (source)) + +(defgeneric klacks:map-attributes (fn source)) +(defgeneric klacks:list-attributes (source)) +;;;(defgeneric klacks:current-uri (source)) +;;;(defgeneric klacks:current-lname (source)) +;;;(defgeneric klacks:current-qname (source)) +;;;(defgeneric klacks:current-characters (source)) +(defgeneric klacks:current-cdata-section-p (source)) + +(defmacro klacks:with-open-source ((var source) &body body) + `(let ((,var ,source)) + (unwind-protect + (progn ,@body) + (klacks:close-source ,var)))) + +(defun klacks:current-uri (source) + (multiple-value-bind (key uri lname qname) (klacks:peek source) + (declare (ignore lname qname)) + (check-type key (member :start-element :end-element)) + uri)) + +(defun klacks:current-lname (source) + (multiple-value-bind (key uri lname qname) (klacks:peek source) + (declare (ignore uri qname)) + (check-type key (member :start-element :end-element)) + lname)) + +(defun klacks:current-qname (source) + (multiple-value-bind (key uri lname qname) (klacks:peek source) + (declare (ignore uri lname)) + (check-type key (member :start-element :end-element)) + qname)) + +(defun klacks:current-characters (source) + (multiple-value-bind (key characters) (klacks:peek source) + (check-type key (member :characters)) + characters)) + +(defun klacks:serialize-source (source handler) + (loop + (multiple-value-bind (key a b c) (klacks:peek source) + (case key + (:start-document + (sax:start-document handler)) + (:characters + (cond + ((klacks:current-cdata-section-p source) + (sax:start-cdata source) + (sax:characters handler a) + (sax:end-cdata source)) + (T + (sax:characters handler a)))) + (:processing-instruction + (sax:processing-instruction handler a b)) + (:comment + (sax:comment handler a)) + (:dtd + (sax:start-dtd handler a b c) + (sax:end-dtd handler)) + (:start-element + (sax:start-element handler a b c (klacks:list-attributes source))) + (:end-element + (sax:end-element handler a b c)) + (:end-document + (return (sax:end-document handler))) + (t + (error "unexpected klacks key: ~A" key))) + (klacks:consume source)))) diff --git a/klacks/package.lisp b/klacks/package.lisp new file mode 100644 index 0000000..124071e --- /dev/null +++ b/klacks/package.lisp @@ -0,0 +1,38 @@ +;;; -*- Mode: Lisp; readtable: runes; -*- +;;; (c) copyright 2007 David Lichteblau + +;;; This library is free software; you can redistribute it and/or +;;; modify it under the terms of the GNU Library General Public +;;; License as published by the Free Software Foundation; either +;;; version 2 of the License, or (at your option) any later version. +;;; +;;; This library is distributed in the hope that it will be useful, +;;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;;; Library General Public License for more details. +;;; +;;; You should have received a copy of the GNU Library General Public +;;; License along with this library; if not, write to the +;;; Free Software Foundation, Inc., 59 Temple Place - Suite 330, +;;; Boston, MA 02111-1307 USA. + +(defpackage klacks + (:use) + (:export #:source + #:close-source + #:with-open-source + + #:peek + #:peek-value + + #:map-attributes + #:list-attributes + #:current-uri + #:current-lname + #:current-qname + #:current-characters + #:current-cdata-section-p + + #:consume + + #:serialize-source)) diff --git a/xml/package.lisp b/xml/package.lisp index 82cdb83..97d0c5a 100644 --- a/xml/package.lisp +++ b/xml/package.lisp @@ -83,4 +83,6 @@ #:make-namespace-normalizer #:make-whitespace-normalizer #:rod-to-utf8-string - #:utf8-string-to-rod)) + #:utf8-string-to-rod + + #:make-source)) diff --git a/xml/xml-parse.lisp b/xml/xml-parse.lisp index c2e8960..dc87174 100644 --- a/xml/xml-parse.lisp +++ b/xml/xml-parse.lisp @@ -68,11 +68,11 @@ ;; :stag ( . ) ;start tag ;; :etag ( . ) ;end tag ;; :ztag ( . ) ;empty tag -;; : @@ -194,11 +194,13 @@ (defvar *expand-pe-p* nil) -(defparameter *namespace-bindings* +(defparameter *initial-namespace-bindings* '((#"" . nil) (#"xmlns" . #"http://www.w3.org/2000/xmlns/") (#"xml" . #"http://www.w3.org/XML/1998/namespace"))) +(defparameter *namespace-bindings* *initial-namespace-bindings*) + ;;;; --------------------------------------------------------------------------- ;;;; xstreams ;;;; @@ -2571,22 +2573,16 @@ :main-zstream input :entity-resolver entity-resolver :disallow-internal-subset disallow-internal-subset)) - (*validate* validate)) + (*validate* validate) + (*namespace-bindings* *initial-namespace-bindings*)) (sax:start-document handler) ;; document ::= XMLDecl? Misc* (doctypedecl Misc*)? element Misc* ;; Misc ::= Comment | PI | S ;; xmldecl::='' ;; sddecl::= S 'standalone' Eq (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no') '"')) - ;; - ;; we will use the attribute-value parser for the xml decl. (let ((*data-behaviour* :DTD)) ;; optional XMLDecl? - (cond ((eq (peek-token input) :xml-decl) - (let ((hd (parse-xml-decl (cdr (nth-value 1 (peek-token input)))))) - (setf (standalone-p *ctx*) (eq (xml-header-standalone-p hd) :yes)) - (setup-encoding input hd)) - (read-token input))) - (set-full-speed input) + (p/xmldecl input) ;; Misc* (p/misc*-2 input) ;; (doctypedecl Misc*)? @@ -2595,13 +2591,7 @@ (p/doctype-decl input dtd) (p/misc*-2 input)) (dtd - (let ((dummy (string->xstream ""))) - (setf (xstream-name dummy) - (make-stream-name - :entity-name "dummy doctype" - :entity-kind :main - :uri (zstream-base-sysid input))) - (p/doctype-decl (make-zstream :input-stack (list dummy)) dtd))) + (synthesize-doctype dtd input)) ((and validate (not dtd)) (validity-error "invalid document: no doctype"))) (ensure-dtd) @@ -2610,28 +2600,65 @@ (setf (model-stack *ctx*) (list (make-root-model root)))) ;; element (let ((*data-behaviour* :DOC)) - (when (eq (peek-token input) :seen-<) - (multiple-value-bind (c s) - (read-token-after-|<| input (car (zstream-input-stack input))) - (setf (zstream-token-category input) c - (zstream-token-semantic input) s))) + (fix-seen-< input) (p/element input)) ;; optional Misc* (p/misc*-2 input) - (unless (eq (peek-token input) :eof) - (wf-error input "Garbage at end of document.")) - (when *validate* - (maphash (lambda (k v) - (unless v - (validity-error "(11) IDREF: ~S not defined" (rod-string k)))) - (id-table *ctx*)) - - (dolist (name (referenced-notations *ctx*)) - (unless (find-notation name (dtd *ctx*)) - (validity-error "(23) Notation Declared: ~S" (rod-string name))))) + (p/eof input) (sax:end-document handler)))) +(defun synthesize-doctype (dtd input) + (let ((dummy (string->xstream ""))) + (setf (xstream-name dummy) + (make-stream-name + :entity-name "dummy doctype" + :entity-kind :main + :uri (zstream-base-sysid input))) + (p/doctype-decl (make-zstream :input-stack (list dummy)) dtd))) + +(defun fix-seen-< (input) + (when (eq (peek-token input) :seen-<) + (multiple-value-bind (c s) + (read-token-after-|<| input (car (zstream-input-stack input))) + (setf (zstream-token-category input) c + (zstream-token-semantic input) s)))) + +(defun p/xmldecl (input) + ;; we will use the attribute-value parser for the xml decl. + (prog1 + (when (eq (peek-token input) :xml-decl) + (let ((hd (parse-xml-decl (cdr (nth-value 1 (peek-token input)))))) + (setf (standalone-p *ctx*) (eq (xml-header-standalone-p hd) :yes)) + (setup-encoding input hd) + (read-token input) + hd)) + (set-full-speed input))) + +(defun p/eof (input) + (unless (eq (peek-token input) :eof) + (wf-error input "Garbage at end of document.")) + (when *validate* + (maphash (lambda (k v) + (unless v + (validity-error "(11) IDREF: ~S not defined" (rod-string k)))) + (id-table *ctx*)) + + (dolist (name (referenced-notations *ctx*)) + (unless (find-notation name (dtd *ctx*)) + (validity-error "(23) Notation Declared: ~S" (rod-string name)))))) + (defun p/element (input) + (multiple-value-bind (cat n-b new-b uri lname qname attrs) (p/sztag input) + (sax:start-element (handler *ctx*) uri lname qname attrs) + (when (eq cat :stag) + (let ((*namespace-bindings* n-b)) + (p/content input)) + (p/etag input qname)) + (sax:end-element (handler *ctx*) uri lname qname) + (undeclare-namespaces new-b) + (validate-end-element *ctx* qname))) + +(defun p/sztag (input) (multiple-value-bind (cat sem) (read-token input) (case cat ((:stag :ztag)) @@ -2657,28 +2684,39 @@ (setf attrs (remove-if (compose #'xmlns-attr-p #'sax:attribute-qname) attrs))) - (cond - ((eq cat :ztag) - (sax:start-element (handler *ctx*) uri local-name name attrs) - (sax:end-element (handler *ctx*) uri local-name name)) - - ((eq cat :stag) - (sax:start-element (handler *ctx*) uri local-name name attrs) - (p/content input) - (multiple-value-bind (cat2 sem2) (read-token input) - (unless (and (eq cat2 :etag) - (eq (car sem2) name)) - (wf-error input "Bad nesting. ~S / ~S" - (mu name) - (mu (cons cat2 sem2)))) - (when (cdr sem2) - (wf-error input "no attributes allowed in end tag"))) - (sax:end-element (handler *ctx*) uri local-name name)) - - (t - (wf-error input "Expecting element, got ~S." cat)))) - (undeclare-namespaces new-namespaces)) - (validate-end-element *ctx* name)))) + (values cat + *namespace-bindings* + new-namespaces + uri local-name name attrs)))))) + +(defun p/etag (input qname) + (multiple-value-bind (cat2 sem2) (read-token input) + (unless (and (eq cat2 :etag) + (eq (car sem2) qname)) + (wf-error input "Bad nesting. ~S / ~S" + (mu qname) + (mu (cons cat2 sem2)))) + (when (cdr sem2) + (wf-error input "no attributes allowed in end tag")))) + +(defun process-characters (input sem) + (consume-token input) + (when (search #"]]>" sem) + (wf-error input "']]>' not allowed in CharData")) + (validate-characters *ctx* sem)) + +(defun process-cdata-section (input) + (consume-token input) + (let ((input (car (zstream-input-stack input)))) + (unless (and (rune= #/C (read-rune input)) + (rune= #/D (read-rune input)) + (rune= #/A (read-rune input)) + (rune= #/T (read-rune input)) + (rune= #/A (read-rune input)) + (rune= #/\[ (read-rune input))) + (wf-error input "After '" sem) - (wf-error input "']]>' not allowed in CharData")) - (validate-characters *ctx* sem) + (process-characters input sem) (sax:characters (handler *ctx*) sem) (p/content input)) ((:ENTITY-REF) @@ -2709,21 +2744,11 @@ (peek-token input)))))) (p/content input)))) ((: