+ <li>Gilbert Baumann has clarified the license as Lisp-LGPL.</li>

This commit is contained in:
dlichteblau
2005-11-28 22:33:29 +00:00
parent e688f34235
commit 938dca13b5
23 changed files with 456 additions and 1117 deletions

View File

@ -1,6 +1,6 @@
;;;; catalogs.lisp -- XML Catalogs -*- Mode: Lisp; readtable: runes -*-
;;;;
;;;; This file is part of the CXML parser, released under (L)LGPL.
;;;; This file is part of the CXML parser, released under Lisp-LGPL.
;;;; See file COPYING for details.
;;;;
;;;; Developed 2004 for headcraft - http://headcraft.de/
@ -15,27 +15,208 @@
;;; and will will fail to parse files using other parser's extensions.
;;; (Jedenfalls behauptet das die Spec.)
;;; A long-term solution might be an XML Schema validator.
;;;
;;; XXX Das mit dem :dtd geht natuerlich gar nicht. Die Option muss weg.
(defvar *prefer* nil)
(defvar *catalog*
(defvar *prefer* :public)
(defvar *default-catalog*
'(;; libxml standard
"/etc/xml/catalog"
;; FreeBSD
"/usr/local/share/xml/catalog.ports"))
(defparameter *catalog-dtd* nil)
(defstruct (catalog (:constructor %make-catalog ()))
main-files
(dtd-cache (make-dtd-cache))
(file-table (puri:make-uri-space)))
(defun parse-catalog (files)
(defstruct (entry-file (:conc-name ""))
(system-entries) ;extid 2
(rewrite-system-entries) ; 3
(delegate-system-entries) ; 4
(public-entries) ; 5
(delegate-public-entries) ; 6
(uri-entries) ;uri 2
(rewrite-uri-entries) ; 3
(delegate-uri-entries) ; 4
(next-catalog-entries) ; 5/7
)
(defun starts-with-p (string prefix)
(let ((mismatch (mismatch string prefix)))
(or (null mismatch) (= mismatch (length prefix)))))
(defun normalize-public (str)
(setf str (rod-to-utf8-string (rod str)))
(flet ((whitespacep (c)
(find c #.(map 'string #'code-char '(#x9 #xa #xd #x20)))))
(let ((start (position-if-not #'whitespacep str))
(end (position-if-not #'whitespacep str :from-end t))
(spacep nil))
(with-output-to-string (out)
(when start
(loop for i from start to end do
(let ((c (char str i)))
(cond
((whitespacep c)
(unless spacep
(setf spacep t)
(write-char #\space out)))
(t
(setf spacep nil)
(write-char c out))))))))))
(defun normalize-uri (str)
(when (typep str 'puri:uri)
(setf str (puri:render-uri str nil)))
(setf str (rod-to-utf8-string (rod str)))
(with-output-to-string (out)
(loop for ch across str do
(let ((c (char-code ch)))
(if (< c 15)
(write-string (string-upcase (format nil "%~2,'0X" c)) out)
(write-char ch out))))))
(defun unwrap-publicid (str)
(normalize-public
(with-output-to-string (out)
(let ((i (length "urn:publicid:"))
(n (length str)))
(while (< i n)
(let ((c (char str i)))
(case c
(#\+ (write-char #\space out))
(#\: (write-string "//" out))
(#\; (write-string "::" out))
(#\%
(let ((code
(parse-integer str
:start (+ i 1)
:end (+ i 3)
:radix 16)))
(write-char (code-char code) out))
(incf i 2))
(t (write-char c out))))
(incf i))))))
(defun match-exact (key table &optional check-prefer)
(dolist (pair table)
(destructuring-bind (from to &optional prefer) pair
(when (and (equal key from) (or (not check-prefer) (eq prefer :public)))
(return to)))))
(defun match-prefix/rewrite (key table &optional check-prefer)
(let ((match nil)
(match-length -1))
(dolist (pair table)
(destructuring-bind (from to &optional prefer) pair
(when (and (or (not check-prefer) (eq prefer :public))
(starts-with-p key from)
(> (length from) match-length))
(setf match-length (length from))
(setf match to))))
(if match
(concatenate 'string
match
(subseq key match-length))
nil)))
(defun match-prefix/sorted (key table &optional check-prefer)
(let ((result '()))
(loop
(dolist (pair table)
(destructuring-bind (from to &optional prefer) pair
(when (and (or (not check-prefer) (eq prefer :public))
(starts-with-p key from))
(push (cons (length from) to) result))))
(mapcar #'cdr (sort result #'> :key #'car))))
(defun resolve-extid (public system catalog)
(when public (setf public (normalize-public public)))
(when system (setf system (normalize-uri system)))
(when (and system (starts-with-p system "urn:publicid:"))
(let ((new-public (unwrap-publicid system)))
(assert (or (null public) (equal public new-public)))
(setf public new-public
system nil)))
(let ((files (catalog-main-files catalog))
(seen '()))
(while files
(let ((file (pop files))
(delegates nil))
(unless (typep file 'entry-file)
(setf file (find-catalog-file file catalog)))
(unless (or (null file) (member file seen))
(push file seen)
(when system
(let ((result
(or (match-exact system (system-entries file))
(match-prefix/rewrite
system
(rewrite-system-entries file)))))
(when result
(return result))
(setf delegates
(match-prefix/sorted
system
(delegate-system-entries file)))))
(when (and public (not delegates))
(let* ((check-prefer (and system t))
(result
(match-exact public
(public-entries file)
check-prefer)))
(when result
(return result))
(setf delegates
(match-prefix/sorted
public
(delegate-public-entries file)
check-prefer))))
(if delegates
(setf files delegates)
(setf files (append (next-catalog-entries file) files))))))))
(defun resolve-uri (uri catalog)
(setf uri (normalize-uri uri))
(when (starts-with-p uri "urn:publicid:")
(return-from resolve-uri
(resolve-extid (unwrap-publicid uri) nil catalog)))
(let ((files (catalog-main-files catalog))
(seen '()))
(while files
(let ((file (pop files)))
(unless file
(return))
(multiple-value-bind (entries next) (parse-catalog-file file)
(setf result (append result entries))
(setf files (append next files)))))
(unless (typep file 'entry-file)
(setf file (find-catalog-file file catalog)))
(unless (or (null file) (member file seen))
(push file seen)
(let ((result
(or (match-exact uri (uri-entries file))
(match-prefix/rewrite uri (rewrite-uri-entries file)))))
(when result
(return result))
(let* ((delegate-entries
(delegate-uri-entries file))
(delegates
(match-prefix/sorted uri delegate-entries)))
(if delegates
(setf files delegates)
(setf files (append (next-catalog-entries file) files))))))))))
(defun find-catalog-file (uri catalog)
(setf uri (if (stringp uri) (safe-parse-uri uri) uri))
(let* ((*dtd-cache* (catalog-dtd-cache catalog))
(*cache-all-dtds* t)
(file (parse-catalog-file uri)))
(when file
(let ((interned (puri:intern-uri uri (catalog-file-table catalog))))
(setf (getf (puri:uri-plist interned) 'catalog) file)))
file))
(defun make-catalog (&optional (uris *default-catalog*))
(let ((result (%make-catalog)))
(setf (catalog-main-files result)
(loop
for uri in uris
for file = (find-catalog-file uri result)
when file collect file))
result))
(defun parse-catalog-file (uri)
@ -44,25 +225,38 @@
(file-error () nil)
(parser-error () nil)))
(defparameter *catalog-dtd*
(let* ((cxml
(slot-value (asdf:find-system :cxml) 'asdf::relative-pathname))
(dtd (merge-pathnames "catalog.dtd" cxml)))
(with-open-file (s dtd :element-type '(unsigned-byte 8))
(let ((bytes
(make-array (file-length s) :element-type '(unsigned-byte 8))))
(read-sequence bytes s)
bytes))))
(defun parse-catalog-file/strict (uri)
(when (stringp uri)
(setf uri (puri:parse-uri uri)))
(unless *catalog-dtd*
(let ((cxml
(slot-value (asdf:find-system :cxml) 'asdf::relative-pathname)))
(setf *catalog-dtd*
(parse-dtd-file (merge-pathnames "catalog.dtd" cxml)))))
(with-open-stream (s (open (uri-to-pathname uri)
:element-type '(unsigned-byte 8)
:direction :input))
(parse-stream s
(make-instance 'catalog-parser :uri uri)
:validate t
;; XXX das geht nicht
:dtd *catalog-dtd*)))
(let* ((*catalog* nil)
(dtd-sysid
(puri:parse-uri "http://www.oasis-open.org/committees/entity/release/1.0/catalog.dtd")))
(flet ((entity-resolver (public system)
(declare (ignore public))
(if (puri:uri= system dtd-sysid)
(make-octet-input-stream *catalog-dtd*)
nil)))
(with-open-stream (s (open (uri-to-pathname uri)
:element-type '(unsigned-byte 8)
:direction :input))
(parse-stream s
(make-recoder (make-instance 'catalog-parser :uri uri)
#'rod-to-utf8-string)
:validate t
:dtd (make-extid nil dtd-sysid)
:root #"catalog"
:entity-resolver #'entity-resolver)))))
(defclass catalog-parser ()
((entries :initform '() :accessor entries)
((result :initform (make-entry-file) :accessor result)
(next :initform '() :accessor next)
(prefer-stack :initform (list *prefer*) :accessor prefer-stack)
(base-stack :accessor base-stack)))
@ -78,79 +272,69 @@
(car (base-stack handler)))
(defun get-attribute/lname (name attributes)
(member name attributes
:key (lambda (a)
(or (sax:attribute-local-name a)
(sax:attribute-qname a)))
:test #'rod=))
(let ((a (find name attributes
:key (lambda (a)
(or (sax:attribute-local-name a)
(sax:attribute-qname a)))
:test #'string=)))
(and a (sax:attribute-value a))))
(defmethod sax:start-element ((handler catalog-parser) uri lname qname attrs)
(declare (ignore uri))
(setf lname (or lname qname))
;; we can dispatch on lnames only because we validate against the DTD,
;; which disallows other namespaces.
(push (string-or (get-attribute/lname #"prefer" attrs) (prefer handler))
(push (string-or (get-attribute/lname "prefer" attrs) (prefer handler))
(prefer-stack handler))
(push (string-or (get-attribute/lname #"base" attrs) (base handler))
(push (string-or (get-attribute/lname "base" attrs) (base handler))
(base-stack handler))
(cond
((rod= lname #"public")
(push (list :public
(get-attribute/lname #"publicId" attrs)
(puri:merge-uris
(puri:parse-uri (get-attribute/lname #"uri" attrs))
(base handler)))
(entries handler)))
((rod= lname #"system")
(push (list :system
(get-attribute/lname #"systemId" attrs)
(puri:merge-uris
(puri:parse-uri (get-attribute/lname #"uri" attrs))
(base handler)))
(entries handler)))
((rod= lname #"uri")
(push (list :uri
(get-attribute/lname #"name" attrs)
(puri:merge-uris
(puri:parse-uri (get-attribute/lname #"uri" attrs))
(base handler)))
(entries handler)))
((rod= lname #"rewriteSystem")
(push (list :rewrite-system
(get-attribute/lname #"systemIdStartString" attrs)
(get-attribute/lname #"rewritePrefix" attrs))
(entries handler)))
((rod= lname #"rewriteURI")
(push (list :rewrite-uri
(get-attribute/lname #"uriStartString" attrs)
(get-attribute/lname #"rewritePrefix" attrs))
(entries handler)))
((rod= lname #"delegatePublic")
(push (list :delegate-public
(get-attribute/lname #"publicIdStartString" attrs)
(puri:merge-uris
(puri:parse-uri (get-attribute/lname #"catalog" attrs))
(base handler)))
(entries handler)))
((rod= lname #"delegateSystem")
(push (list :delegate-system
(get-attribute/lname #"systemIdStartString" attrs)
(puri:merge-uris
(puri:parse-uri (get-attribute/lname #"catalog" attrs))
(base handler)))
(entries handler)))
((rod= lname #"delegateURI")
(push (list :delegate-uri
(get-attribute/lname #"uriStartString" attrs)
(puri:merge-uris
(puri:parse-uri (get-attribute/lname #"catalog" attrs))
(base handler)))
(entries handler)))
((rod= lname #"nextCatalog")
(push (puri:merge-uris
(puri:parse-uri (get-attribute/lname #"catalog" attrs))
(base handler))
(next handler)))))
(flet ((geturi (lname)
(puri:merge-uris
(safe-parse-uri (get-attribute/lname lname attrs))
(base handler))))
(cond
((string= lname "public")
(push (list (normalize-public (get-attribute/lname "publicId" attrs))
(geturi "uri")
(prefer handler))
(public-entries (result handler))))
((string= lname "system")
(push (list (normalize-uri (get-attribute/lname "systemId" attrs))
(geturi "uri"))
(system-entries (result handler))))
((string= lname "uri")
(push (list (normalize-uri (get-attribute/lname "name" attrs))
(geturi "uri"))
(uri-entries (result handler))))
((string= lname "rewriteSystem")
(push (list (normalize-uri
(get-attribute/lname "systemIdStartString" attrs))
(get-attribute/lname "rewritePrefix" attrs))
(rewrite-system-entries (result handler))))
((string= lname "rewriteURI")
(push (list (normalize-uri
(get-attribute/lname "uriStartString" attrs))
(get-attribute/lname "rewritePrefix" attrs))
(rewrite-uri-entries (result handler))))
((string= lname "delegatePublic")
(push (list (normalize-public
(get-attribute/lname "publicIdStartString" attrs))
(geturi "catalog")
(prefer handler))
(delegate-public-entries (result handler))))
((string= lname "delegateSystem")
(push (list (normalize-uri
(get-attribute/lname "systemIdStartString" attrs))
(geturi "catalog"))
(delegate-system-entries (result handler))))
((string= lname "delegateURI")
(push (list (normalize-uri
(get-attribute/lname "uriStartString" attrs))
(geturi "catalog"))
(delegate-uri-entries (result handler))))
((string= lname "nextCatalog")
(push (geturi "catalog")
(next-catalog-entries (result handler)))))))
(defmethod sax:end-element ((handler catalog-parser) uri lname qname)
(declare (ignore uri lname qname))
@ -158,4 +342,4 @@
(pop (prefer-stack handler)))
(defmethod sax:end-document ((handler catalog-parser))
(values (reverse (entries handler)) (reverse (next handler))))
(result handler))