+ <li>Gilbert Baumann has clarified the license as Lisp-LGPL.</li>
This commit is contained in:
374
xml/catalog.lisp
374
xml/catalog.lisp
@ -1,6 +1,6 @@
|
||||
;;;; catalogs.lisp -- XML Catalogs -*- Mode: Lisp; readtable: runes -*-
|
||||
;;;;
|
||||
;;;; This file is part of the CXML parser, released under (L)LGPL.
|
||||
;;;; This file is part of the CXML parser, released under Lisp-LGPL.
|
||||
;;;; See file COPYING for details.
|
||||
;;;;
|
||||
;;;; Developed 2004 for headcraft - http://headcraft.de/
|
||||
@ -15,27 +15,208 @@
|
||||
;;; and will will fail to parse files using other parser's extensions.
|
||||
;;; (Jedenfalls behauptet das die Spec.)
|
||||
;;; A long-term solution might be an XML Schema validator.
|
||||
;;;
|
||||
;;; XXX Das mit dem :dtd geht natuerlich gar nicht. Die Option muss weg.
|
||||
|
||||
(defvar *prefer* nil)
|
||||
(defvar *catalog*
|
||||
(defvar *prefer* :public)
|
||||
(defvar *default-catalog*
|
||||
'(;; libxml standard
|
||||
"/etc/xml/catalog"
|
||||
;; FreeBSD
|
||||
"/usr/local/share/xml/catalog.ports"))
|
||||
|
||||
(defparameter *catalog-dtd* nil)
|
||||
(defstruct (catalog (:constructor %make-catalog ()))
|
||||
main-files
|
||||
(dtd-cache (make-dtd-cache))
|
||||
(file-table (puri:make-uri-space)))
|
||||
|
||||
(defun parse-catalog (files)
|
||||
(defstruct (entry-file (:conc-name ""))
|
||||
(system-entries) ;extid 2
|
||||
(rewrite-system-entries) ; 3
|
||||
(delegate-system-entries) ; 4
|
||||
(public-entries) ; 5
|
||||
(delegate-public-entries) ; 6
|
||||
(uri-entries) ;uri 2
|
||||
(rewrite-uri-entries) ; 3
|
||||
(delegate-uri-entries) ; 4
|
||||
(next-catalog-entries) ; 5/7
|
||||
)
|
||||
|
||||
(defun starts-with-p (string prefix)
|
||||
(let ((mismatch (mismatch string prefix)))
|
||||
(or (null mismatch) (= mismatch (length prefix)))))
|
||||
|
||||
(defun normalize-public (str)
|
||||
(setf str (rod-to-utf8-string (rod str)))
|
||||
(flet ((whitespacep (c)
|
||||
(find c #.(map 'string #'code-char '(#x9 #xa #xd #x20)))))
|
||||
(let ((start (position-if-not #'whitespacep str))
|
||||
(end (position-if-not #'whitespacep str :from-end t))
|
||||
(spacep nil))
|
||||
(with-output-to-string (out)
|
||||
(when start
|
||||
(loop for i from start to end do
|
||||
(let ((c (char str i)))
|
||||
(cond
|
||||
((whitespacep c)
|
||||
(unless spacep
|
||||
(setf spacep t)
|
||||
(write-char #\space out)))
|
||||
(t
|
||||
(setf spacep nil)
|
||||
(write-char c out))))))))))
|
||||
|
||||
(defun normalize-uri (str)
|
||||
(when (typep str 'puri:uri)
|
||||
(setf str (puri:render-uri str nil)))
|
||||
(setf str (rod-to-utf8-string (rod str)))
|
||||
(with-output-to-string (out)
|
||||
(loop for ch across str do
|
||||
(let ((c (char-code ch)))
|
||||
(if (< c 15)
|
||||
(write-string (string-upcase (format nil "%~2,'0X" c)) out)
|
||||
(write-char ch out))))))
|
||||
|
||||
(defun unwrap-publicid (str)
|
||||
(normalize-public
|
||||
(with-output-to-string (out)
|
||||
(let ((i (length "urn:publicid:"))
|
||||
(n (length str)))
|
||||
(while (< i n)
|
||||
(let ((c (char str i)))
|
||||
(case c
|
||||
(#\+ (write-char #\space out))
|
||||
(#\: (write-string "//" out))
|
||||
(#\; (write-string "::" out))
|
||||
(#\%
|
||||
(let ((code
|
||||
(parse-integer str
|
||||
:start (+ i 1)
|
||||
:end (+ i 3)
|
||||
:radix 16)))
|
||||
(write-char (code-char code) out))
|
||||
(incf i 2))
|
||||
(t (write-char c out))))
|
||||
(incf i))))))
|
||||
|
||||
(defun match-exact (key table &optional check-prefer)
|
||||
(dolist (pair table)
|
||||
(destructuring-bind (from to &optional prefer) pair
|
||||
(when (and (equal key from) (or (not check-prefer) (eq prefer :public)))
|
||||
(return to)))))
|
||||
|
||||
(defun match-prefix/rewrite (key table &optional check-prefer)
|
||||
(let ((match nil)
|
||||
(match-length -1))
|
||||
(dolist (pair table)
|
||||
(destructuring-bind (from to &optional prefer) pair
|
||||
(when (and (or (not check-prefer) (eq prefer :public))
|
||||
(starts-with-p key from)
|
||||
(> (length from) match-length))
|
||||
(setf match-length (length from))
|
||||
(setf match to))))
|
||||
(if match
|
||||
(concatenate 'string
|
||||
match
|
||||
(subseq key match-length))
|
||||
nil)))
|
||||
|
||||
(defun match-prefix/sorted (key table &optional check-prefer)
|
||||
(let ((result '()))
|
||||
(loop
|
||||
(dolist (pair table)
|
||||
(destructuring-bind (from to &optional prefer) pair
|
||||
(when (and (or (not check-prefer) (eq prefer :public))
|
||||
(starts-with-p key from))
|
||||
(push (cons (length from) to) result))))
|
||||
(mapcar #'cdr (sort result #'> :key #'car))))
|
||||
|
||||
(defun resolve-extid (public system catalog)
|
||||
(when public (setf public (normalize-public public)))
|
||||
(when system (setf system (normalize-uri system)))
|
||||
(when (and system (starts-with-p system "urn:publicid:"))
|
||||
(let ((new-public (unwrap-publicid system)))
|
||||
(assert (or (null public) (equal public new-public)))
|
||||
(setf public new-public
|
||||
system nil)))
|
||||
(let ((files (catalog-main-files catalog))
|
||||
(seen '()))
|
||||
(while files
|
||||
(let ((file (pop files))
|
||||
(delegates nil))
|
||||
(unless (typep file 'entry-file)
|
||||
(setf file (find-catalog-file file catalog)))
|
||||
(unless (or (null file) (member file seen))
|
||||
(push file seen)
|
||||
(when system
|
||||
(let ((result
|
||||
(or (match-exact system (system-entries file))
|
||||
(match-prefix/rewrite
|
||||
system
|
||||
(rewrite-system-entries file)))))
|
||||
(when result
|
||||
(return result))
|
||||
(setf delegates
|
||||
(match-prefix/sorted
|
||||
system
|
||||
(delegate-system-entries file)))))
|
||||
(when (and public (not delegates))
|
||||
(let* ((check-prefer (and system t))
|
||||
(result
|
||||
(match-exact public
|
||||
(public-entries file)
|
||||
check-prefer)))
|
||||
(when result
|
||||
(return result))
|
||||
(setf delegates
|
||||
(match-prefix/sorted
|
||||
public
|
||||
(delegate-public-entries file)
|
||||
check-prefer))))
|
||||
(if delegates
|
||||
(setf files delegates)
|
||||
(setf files (append (next-catalog-entries file) files))))))))
|
||||
|
||||
(defun resolve-uri (uri catalog)
|
||||
(setf uri (normalize-uri uri))
|
||||
(when (starts-with-p uri "urn:publicid:")
|
||||
(return-from resolve-uri
|
||||
(resolve-extid (unwrap-publicid uri) nil catalog)))
|
||||
(let ((files (catalog-main-files catalog))
|
||||
(seen '()))
|
||||
(while files
|
||||
(let ((file (pop files)))
|
||||
(unless file
|
||||
(return))
|
||||
(multiple-value-bind (entries next) (parse-catalog-file file)
|
||||
(setf result (append result entries))
|
||||
(setf files (append next files)))))
|
||||
(unless (typep file 'entry-file)
|
||||
(setf file (find-catalog-file file catalog)))
|
||||
(unless (or (null file) (member file seen))
|
||||
(push file seen)
|
||||
(let ((result
|
||||
(or (match-exact uri (uri-entries file))
|
||||
(match-prefix/rewrite uri (rewrite-uri-entries file)))))
|
||||
(when result
|
||||
(return result))
|
||||
(let* ((delegate-entries
|
||||
(delegate-uri-entries file))
|
||||
(delegates
|
||||
(match-prefix/sorted uri delegate-entries)))
|
||||
(if delegates
|
||||
(setf files delegates)
|
||||
(setf files (append (next-catalog-entries file) files))))))))))
|
||||
|
||||
(defun find-catalog-file (uri catalog)
|
||||
(setf uri (if (stringp uri) (safe-parse-uri uri) uri))
|
||||
(let* ((*dtd-cache* (catalog-dtd-cache catalog))
|
||||
(*cache-all-dtds* t)
|
||||
(file (parse-catalog-file uri)))
|
||||
(when file
|
||||
(let ((interned (puri:intern-uri uri (catalog-file-table catalog))))
|
||||
(setf (getf (puri:uri-plist interned) 'catalog) file)))
|
||||
file))
|
||||
|
||||
(defun make-catalog (&optional (uris *default-catalog*))
|
||||
(let ((result (%make-catalog)))
|
||||
(setf (catalog-main-files result)
|
||||
(loop
|
||||
for uri in uris
|
||||
for file = (find-catalog-file uri result)
|
||||
when file collect file))
|
||||
result))
|
||||
|
||||
(defun parse-catalog-file (uri)
|
||||
@ -44,25 +225,38 @@
|
||||
(file-error () nil)
|
||||
(parser-error () nil)))
|
||||
|
||||
(defparameter *catalog-dtd*
|
||||
(let* ((cxml
|
||||
(slot-value (asdf:find-system :cxml) 'asdf::relative-pathname))
|
||||
(dtd (merge-pathnames "catalog.dtd" cxml)))
|
||||
(with-open-file (s dtd :element-type '(unsigned-byte 8))
|
||||
(let ((bytes
|
||||
(make-array (file-length s) :element-type '(unsigned-byte 8))))
|
||||
(read-sequence bytes s)
|
||||
bytes))))
|
||||
|
||||
(defun parse-catalog-file/strict (uri)
|
||||
(when (stringp uri)
|
||||
(setf uri (puri:parse-uri uri)))
|
||||
(unless *catalog-dtd*
|
||||
(let ((cxml
|
||||
(slot-value (asdf:find-system :cxml) 'asdf::relative-pathname)))
|
||||
(setf *catalog-dtd*
|
||||
(parse-dtd-file (merge-pathnames "catalog.dtd" cxml)))))
|
||||
(with-open-stream (s (open (uri-to-pathname uri)
|
||||
:element-type '(unsigned-byte 8)
|
||||
:direction :input))
|
||||
(parse-stream s
|
||||
(make-instance 'catalog-parser :uri uri)
|
||||
:validate t
|
||||
;; XXX das geht nicht
|
||||
:dtd *catalog-dtd*)))
|
||||
(let* ((*catalog* nil)
|
||||
(dtd-sysid
|
||||
(puri:parse-uri "http://www.oasis-open.org/committees/entity/release/1.0/catalog.dtd")))
|
||||
(flet ((entity-resolver (public system)
|
||||
(declare (ignore public))
|
||||
(if (puri:uri= system dtd-sysid)
|
||||
(make-octet-input-stream *catalog-dtd*)
|
||||
nil)))
|
||||
(with-open-stream (s (open (uri-to-pathname uri)
|
||||
:element-type '(unsigned-byte 8)
|
||||
:direction :input))
|
||||
(parse-stream s
|
||||
(make-recoder (make-instance 'catalog-parser :uri uri)
|
||||
#'rod-to-utf8-string)
|
||||
:validate t
|
||||
:dtd (make-extid nil dtd-sysid)
|
||||
:root #"catalog"
|
||||
:entity-resolver #'entity-resolver)))))
|
||||
|
||||
(defclass catalog-parser ()
|
||||
((entries :initform '() :accessor entries)
|
||||
((result :initform (make-entry-file) :accessor result)
|
||||
(next :initform '() :accessor next)
|
||||
(prefer-stack :initform (list *prefer*) :accessor prefer-stack)
|
||||
(base-stack :accessor base-stack)))
|
||||
@ -78,79 +272,69 @@
|
||||
(car (base-stack handler)))
|
||||
|
||||
(defun get-attribute/lname (name attributes)
|
||||
(member name attributes
|
||||
:key (lambda (a)
|
||||
(or (sax:attribute-local-name a)
|
||||
(sax:attribute-qname a)))
|
||||
:test #'rod=))
|
||||
(let ((a (find name attributes
|
||||
:key (lambda (a)
|
||||
(or (sax:attribute-local-name a)
|
||||
(sax:attribute-qname a)))
|
||||
:test #'string=)))
|
||||
(and a (sax:attribute-value a))))
|
||||
|
||||
(defmethod sax:start-element ((handler catalog-parser) uri lname qname attrs)
|
||||
(declare (ignore uri))
|
||||
(setf lname (or lname qname))
|
||||
;; we can dispatch on lnames only because we validate against the DTD,
|
||||
;; which disallows other namespaces.
|
||||
(push (string-or (get-attribute/lname #"prefer" attrs) (prefer handler))
|
||||
(push (string-or (get-attribute/lname "prefer" attrs) (prefer handler))
|
||||
(prefer-stack handler))
|
||||
(push (string-or (get-attribute/lname #"base" attrs) (base handler))
|
||||
(push (string-or (get-attribute/lname "base" attrs) (base handler))
|
||||
(base-stack handler))
|
||||
(cond
|
||||
((rod= lname #"public")
|
||||
(push (list :public
|
||||
(get-attribute/lname #"publicId" attrs)
|
||||
(puri:merge-uris
|
||||
(puri:parse-uri (get-attribute/lname #"uri" attrs))
|
||||
(base handler)))
|
||||
(entries handler)))
|
||||
((rod= lname #"system")
|
||||
(push (list :system
|
||||
(get-attribute/lname #"systemId" attrs)
|
||||
(puri:merge-uris
|
||||
(puri:parse-uri (get-attribute/lname #"uri" attrs))
|
||||
(base handler)))
|
||||
(entries handler)))
|
||||
((rod= lname #"uri")
|
||||
(push (list :uri
|
||||
(get-attribute/lname #"name" attrs)
|
||||
(puri:merge-uris
|
||||
(puri:parse-uri (get-attribute/lname #"uri" attrs))
|
||||
(base handler)))
|
||||
(entries handler)))
|
||||
((rod= lname #"rewriteSystem")
|
||||
(push (list :rewrite-system
|
||||
(get-attribute/lname #"systemIdStartString" attrs)
|
||||
(get-attribute/lname #"rewritePrefix" attrs))
|
||||
(entries handler)))
|
||||
((rod= lname #"rewriteURI")
|
||||
(push (list :rewrite-uri
|
||||
(get-attribute/lname #"uriStartString" attrs)
|
||||
(get-attribute/lname #"rewritePrefix" attrs))
|
||||
(entries handler)))
|
||||
((rod= lname #"delegatePublic")
|
||||
(push (list :delegate-public
|
||||
(get-attribute/lname #"publicIdStartString" attrs)
|
||||
(puri:merge-uris
|
||||
(puri:parse-uri (get-attribute/lname #"catalog" attrs))
|
||||
(base handler)))
|
||||
(entries handler)))
|
||||
((rod= lname #"delegateSystem")
|
||||
(push (list :delegate-system
|
||||
(get-attribute/lname #"systemIdStartString" attrs)
|
||||
(puri:merge-uris
|
||||
(puri:parse-uri (get-attribute/lname #"catalog" attrs))
|
||||
(base handler)))
|
||||
(entries handler)))
|
||||
((rod= lname #"delegateURI")
|
||||
(push (list :delegate-uri
|
||||
(get-attribute/lname #"uriStartString" attrs)
|
||||
(puri:merge-uris
|
||||
(puri:parse-uri (get-attribute/lname #"catalog" attrs))
|
||||
(base handler)))
|
||||
(entries handler)))
|
||||
((rod= lname #"nextCatalog")
|
||||
(push (puri:merge-uris
|
||||
(puri:parse-uri (get-attribute/lname #"catalog" attrs))
|
||||
(base handler))
|
||||
(next handler)))))
|
||||
(flet ((geturi (lname)
|
||||
(puri:merge-uris
|
||||
(safe-parse-uri (get-attribute/lname lname attrs))
|
||||
(base handler))))
|
||||
(cond
|
||||
((string= lname "public")
|
||||
(push (list (normalize-public (get-attribute/lname "publicId" attrs))
|
||||
(geturi "uri")
|
||||
(prefer handler))
|
||||
(public-entries (result handler))))
|
||||
((string= lname "system")
|
||||
(push (list (normalize-uri (get-attribute/lname "systemId" attrs))
|
||||
(geturi "uri"))
|
||||
(system-entries (result handler))))
|
||||
((string= lname "uri")
|
||||
(push (list (normalize-uri (get-attribute/lname "name" attrs))
|
||||
(geturi "uri"))
|
||||
(uri-entries (result handler))))
|
||||
((string= lname "rewriteSystem")
|
||||
(push (list (normalize-uri
|
||||
(get-attribute/lname "systemIdStartString" attrs))
|
||||
(get-attribute/lname "rewritePrefix" attrs))
|
||||
(rewrite-system-entries (result handler))))
|
||||
((string= lname "rewriteURI")
|
||||
(push (list (normalize-uri
|
||||
(get-attribute/lname "uriStartString" attrs))
|
||||
(get-attribute/lname "rewritePrefix" attrs))
|
||||
(rewrite-uri-entries (result handler))))
|
||||
((string= lname "delegatePublic")
|
||||
(push (list (normalize-public
|
||||
(get-attribute/lname "publicIdStartString" attrs))
|
||||
(geturi "catalog")
|
||||
(prefer handler))
|
||||
(delegate-public-entries (result handler))))
|
||||
((string= lname "delegateSystem")
|
||||
(push (list (normalize-uri
|
||||
(get-attribute/lname "systemIdStartString" attrs))
|
||||
(geturi "catalog"))
|
||||
(delegate-system-entries (result handler))))
|
||||
((string= lname "delegateURI")
|
||||
(push (list (normalize-uri
|
||||
(get-attribute/lname "uriStartString" attrs))
|
||||
(geturi "catalog"))
|
||||
(delegate-uri-entries (result handler))))
|
||||
((string= lname "nextCatalog")
|
||||
(push (geturi "catalog")
|
||||
(next-catalog-entries (result handler)))))))
|
||||
|
||||
(defmethod sax:end-element ((handler catalog-parser) uri lname qname)
|
||||
(declare (ignore uri lname qname))
|
||||
@ -158,4 +342,4 @@
|
||||
(pop (prefer-stack handler)))
|
||||
|
||||
(defmethod sax:end-document ((handler catalog-parser))
|
||||
(values (reverse (entries handler)) (reverse (next handler))))
|
||||
(result handler))
|
||||
|
||||
Reference in New Issue
Block a user