SCL support (thanks to Douglas Crosher). Includes support for

implementations where URIs are valid namestrings, and a mode
        where normal streams are used instead of xstreams and ystreams
        (albeit both SCL-specific at this point).
This commit is contained in:
dlichteblau
2007-06-16 11:27:18 +00:00
parent ee394c591d
commit fb9a2fa002
9 changed files with 523 additions and 216 deletions

View File

@ -4,6 +4,9 @@
(defclass closure-source-file (cl-source-file) ()) (defclass closure-source-file (cl-source-file) ())
#+scl
(pushnew 'uri-is-namestring *features*)
#+sbcl #+sbcl
(defmethod perform :around ((o compile-op) (s closure-source-file)) (defmethod perform :around ((o compile-op) (s closure-source-file))
;; shut up already. Correctness first. ;; shut up already. Correctness first.
@ -30,7 +33,7 @@
(:file "space-normalizer" :depends-on ("xml-parse")) (:file "space-normalizer" :depends-on ("xml-parse"))
(:file "catalog" :depends-on ("xml-parse")) (:file "catalog" :depends-on ("xml-parse"))
(:file "sax-proxy" :depends-on ("xml-parse"))) (:file "sax-proxy" :depends-on ("xml-parse")))
:depends-on (:runes :puri :trivial-gray-streams)) :depends-on (:runes :puri #-scl :trivial-gray-streams))
(defclass utf8dom-file (closure-source-file) ((of))) (defclass utf8dom-file (closure-source-file) ((of)))

View File

@ -68,6 +68,12 @@
only. The old behaviour using pairs of prefix and local names only. The old behaviour using pairs of prefix and local names
was removed. (Thanks to Douglas Crosher.) was removed. (Thanks to Douglas Crosher.)
</li> </li>
<li>
SCL support (thanks to Douglas Crosher). Includes support for
implementations where URIs are valid namestrings, and a mode
where normal streams are used instead of xstreams and ystreams
(albeit both SCL-specific at this point).
</li>
</ul> </ul>
<p class="nomargin"><tt>rel-2007-05-26</tt></p> <p class="nomargin"><tt>rel-2007-05-26</tt></p>
<ul class="nomargin"> <ul class="nomargin">

View File

@ -52,7 +52,9 @@
#+rune-is-character "characters") #+rune-is-character "characters")
#+rune-is-integer (:file "utf8") #+rune-is-integer (:file "utf8")
(:file "syntax") (:file "syntax")
(:file "encodings") #-x&y-streams-are-stream (:file "encodings")
(:file "encodings-data") #-x&y-streams-are-stream (:file "encodings-data")
(:file "xstream") #-x&y-streams-are-stream (:file "xstream")
(:file "ystream"))) #-x&y-streams-are-stream (:file "ystream")
#+x&y-streams-are-stream (:file #+scl "stream-scl")
))

View File

@ -79,7 +79,11 @@
#:make-string-ystream/utf8 #:make-string-ystream/utf8
;; #+rune-is-integer ;; #+rune-is-integer
#:make-character-stream-ystream/utf8 #:make-character-stream-ystream/utf8
#:runes-to-utf8/adjustable-string)) #:runes-to-utf8/adjustable-string
#:rod-to-utf8-string
#:utf8-string-to-rod
#:make-octet-input-stream))
(defpackage :utf8-runes (defpackage :utf8-runes
(:use :cl) (:use :cl)

253
runes/stream-scl.lisp Normal file
View File

@ -0,0 +1,253 @@
;;; -*- Mode: Lisp; Syntax: Common-Lisp; readtable: runes; Encoding: utf-8; -*-
;;; ---------------------------------------------------------------------------
;;; Title: Fast streams
;;; Created: 1999-07-17
;;; Author: Douglas Crosher
;;; License: Lisp-LGPL (See file COPYING for details).
;;; ---------------------------------------------------------------------------
;;; (c) copyright 2007 by Douglas Crosher
;;; This library is free software; you can redistribute it and/or
;;; modify it under the terms of the GNU Library General Public
;;; License as published by the Free Software Foundation; either
;;; version 2 of the License, or (at your option) any later version.
;;;
;;; This library is distributed in the hope that it will be useful,
;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
;;; Library General Public License for more details.
;;;
;;; You should have received a copy of the GNU Library General Public
;;; License along with this library; if not, write to the
;;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
;;; Boston, MA 02111-1307 USA.
(in-package :runes)
(eval-when (:compile-toplevel :load-toplevel :execute)
(defparameter *fast* '(optimize (speed 3) (safety 3))))
(deftype runes-encoding:encoding-error ()
'ext:character-conversion-error)
;;; xstream
(eval-when (:compile-toplevel :load-toplevel :execute)
(defclass xstream (ext:character-stream)
((name :initarg :name :initform nil
:accessor xstream-name)
(column :initarg :column :initform 0)
(line :initarg :line :initform 1)
(unread-column :initarg :unread-column :initform 0)))
(defclass eol-conversion-xstream (lisp::eol-conversion-input-stream xstream)
())
) ; eval-when
(defun make-eol-conversion-xstream (source-stream)
"Returns a character stream that conversion CR-LF pairs and lone CR
characters into single linefeed character."
(declare (type stream source-stream))
(let ((stream (ext:make-eol-conversion-stream source-stream
:input t
:close-stream-p t)))
(change-class stream 'eol-conversion-xstream)))
(definline xstream-p (stream)
(typep stream 'xstream))
(defun close-xstream (input)
(close input))
(definline read-rune (input)
(declare (type stream input)
(inline read-char)
#.*fast*)
(let ((char (read-char input nil :eof)))
(cond ((member char '(#\UFFFE #\UFFFF))
;; These characters are illegal within XML documents.
(simple-error 'ext:character-conversion-error
"~@<Illegal XML document character: ~S~:@>" char))
((eql char #\linefeed)
(setf (slot-value input 'unread-column) (slot-value input 'column))
(setf (slot-value input 'column) 0)
(incf (the kernel:index (slot-value input 'line))))
(t
(incf (the kernel:index (slot-value input 'column)))))
char))
(definline peek-rune (input)
(declare (type stream input)
(inline peek-char)
#.*fast*)
(peek-char nil input nil :eof))
(definline consume-rune (input)
(declare (type stream input)
(inline read-rune)
#.*fast*)
(read-rune input)
nil)
(definline unread-rune (rune input)
(declare (type stream input)
(inline unread-char)
#.*fast*)
(unread-char rune input)
(cond ((eql rune #\linefeed)
(setf (slot-value input 'column) (slot-value input 'unread-column))
(setf (slot-value input 'unread-column) 0)
(decf (the kernel:index (slot-value input 'line))))
(t
(decf (the kernel:index (slot-value input 'column)))))
nil)
(defun fread-rune (input)
(read-rune input))
(defun fpeek-rune (input)
(peek-rune input))
(defun xstream-position (input)
(file-position input))
(defun runes-encoding:find-encoding (encoding)
encoding)
(defun make-xstream (os-stream &key name
(speed 8192)
(initial-speed 1)
(initial-encoding :guess))
(declare (ignore speed))
(assert (eql initial-speed 1))
(assert (eq initial-encoding :guess))
(let* ((stream (ext:make-xml-character-conversion-stream os-stream
:input t
:close-stream-p t))
(xstream (make-eol-conversion-xstream stream)))
(setf (xstream-name xstream) name)
xstream))
(defclass xstream-string-input-stream (lisp::string-input-stream xstream)
())
(defun make-rod-xstream (string &key name)
(declare (type string string))
(let ((stream (make-string-input-stream string)))
(change-class stream 'xstream-string-input-stream :name name)))
;;; already at 'full speed' so just return the buffer size.
(defun set-to-full-speed (stream)
(length (ext:stream-in-buffer stream)))
(defun xstream-speed (stream)
(length (ext:stream-in-buffer stream)))
(defun xstream-line-number (stream)
(slot-value stream 'line))
(defun xstream-column-number (stream)
(slot-value stream 'column))
(defun xstream-encoding (stream)
(stream-external-format stream))
;;; the encoding will have already been detected, but it is checked against the
;;; declared encoding here.
(defun (setf xstream-encoding) (declared-encoding stream)
(let* ((initial-encoding (xstream-encoding stream))
(canonical-encoding
(cond ((and (eq initial-encoding :utf-16le)
(member declared-encoding '(:utf-16 :utf16 :utf-16le :utf16le)
:test 'string-equal))
:utf-16le)
((and (eq initial-encoding :utf-16be)
(member declared-encoding '(:utf-16 :utf16 :utf-16be :utf16be)
:test 'string-equal))
:utf-16be)
((and (eq initial-encoding :ucs-4be)
(member declared-encoding '(:ucs-4 :ucs4 :ucs-4be :ucs4be)
:test 'string-equal))
:ucs4-be)
((and (eq initial-encoding :ucs-4le)
(member declared-encoding '(:ucs-4 :ucs4 :ucs-4le :ucs4le)
:test 'string-equal))
:ucs4-le)
(t
declared-encoding))))
(unless (string-equal initial-encoding canonical-encoding)
(warn "Unable to change xstream encoding from ~S to ~S (~S)~%"
initial-encoding declared-encoding canonical-encoding))
declared-encoding))
;;; ystream - a run output stream.
(deftype ystream () 'stream)
(defun ystream-column (stream)
(ext:line-column stream))
(definline write-rune (rune stream)
(declare (inline write-char))
(write-char rune stream))
(defun write-rod (rod stream)
(declare (type rod rod)
(type stream stream))
(write-string rod stream))
(defun make-rod-ystream ()
(make-string-output-stream))
(defun close-ystream (stream)
(etypecase stream
(ext:string-output-stream
(get-output-stream-string stream))
(ext:character-conversion-output-stream
(let ((target (slot-value stream 'stream)))
(close stream)
(if (typep target 'ext:byte-output-stream)
(ext:get-output-stream-bytes target)
stream)))))
;;;; CHARACTER-STREAM-YSTREAM
(defun make-character-stream-ystream (target-stream)
target-stream)
;;;; OCTET-VECTOR-YSTREAM
(defun make-octet-vector-ystream ()
(let ((target (ext:make-byte-output-stream)))
(ext:make-character-conversion-stream target :output t
:external-format :utf-8
:close-stream-p t)))
;;;; OCTET-STREAM-YSTREAM
(defun make-octet-stream-ystream (os-stream)
(ext:make-character-conversion-stream os-stream :output t
:external-format :utf-8
:close-stream-p t))
;;;; helper functions
(defun rod-to-utf8-string (rod)
(ext:make-string-from-bytes (ext:make-bytes-from-string rod :utf8)
:iso-8859-1))
(defun utf8-string-to-rod (str)
(let ((bytes (map '(vector (unsigned-byte 8)) #'char-code str)))
(ext:make-string-from-bytes bytes :utf-8)))
(defun make-octet-input-stream (octets)
(ext:make-byte-input-stream octets))

View File

@ -248,3 +248,50 @@
(defmethod close-ystream ((ystream string-ystream/utf8)) (defmethod close-ystream ((ystream string-ystream/utf8))
(get-output-stream-string (ystream-os-stream ystream)))) (get-output-stream-string (ystream-os-stream ystream))))
;;;; helper functions
(defun rod-to-utf8-string (rod)
(let ((out (make-buffer :element-type 'character)))
(runes-to-utf8/adjustable-string out rod (length rod))
out))
(defun utf8-string-to-rod (str)
(let* ((bytes (map '(vector (unsigned-byte 8)) #'char-code str))
(buffer (make-array (length bytes) :element-type '(unsigned-byte 16)))
(n (runes-encoding:decode-sequence
:utf-8 bytes 0 (length bytes) buffer 0 0 nil))
(result (make-array n :element-type 'rune)))
(map-into result #'code-rune buffer)
result))
(defclass octet-input-stream
(trivial-gray-stream-mixin fundamental-binary-input-stream)
((octets :initarg :octets)
(pos :initform 0)))
(defmethod close ((stream octet-input-stream) &key abort)
(declare (ignore abort))
(open-stream-p stream))
(defmethod stream-read-byte ((stream octet-input-stream))
(with-slots (octets pos) stream
(if (>= pos (length octets))
:eof
(prog1
(elt octets pos)
(incf pos)))))
(defmethod stream-read-sequence
((stream octet-input-stream) sequence start end &key &allow-other-keys)
(with-slots (octets pos) stream
(let* ((length (min (- end start) (- (length octets) pos)))
(end1 (+ start length))
(end2 (+ pos length)))
(replace sequence octets :start1 start :end1 end1 :start2 pos :end2 end2)
(setf pos end2)
end1)))
(defun make-octet-input-stream (octets)
(make-instance 'octet-input-stream :octets octets))

View File

@ -6,7 +6,7 @@
(in-package :cl-user) (in-package :cl-user)
(defpackage :cxml (defpackage :cxml
(:use :cl :runes :runes-encoding :trivial-gray-streams) (:use :cl :runes :runes-encoding #-scl :trivial-gray-streams)
(:export (:export
;; xstreams ;; xstreams
#:make-xstream #:make-xstream

View File

@ -619,16 +619,3 @@
(maybe-emit-start-tag) (maybe-emit-start-tag)
(sax:characters *sink* (rod data)) (sax:characters *sink* (rod data))
data) data)
(defun rod-to-utf8-string (rod)
(let ((out (make-buffer :element-type 'character)))
(runes-to-utf8/adjustable-string out rod (length rod))
out))
(defun utf8-string-to-rod (str)
(let* ((bytes (map '(vector (unsigned-byte 8)) #'char-code str))
(buffer (make-array (length bytes) :element-type '(unsigned-byte 16)))
(n (decode-sequence :utf-8 bytes 0 (length bytes) buffer 0 0 nil))
(result (make-array n :element-type 'rune)))
(map-into result #'code-rune buffer)
result))

View File

@ -1175,6 +1175,21 @@
token-semantic token-semantic
input-stack) input-stack)
(defun call-with-zstream (fn zstream)
(unwind-protect
(funcall fn zstream)
(dolist (input (zstream-input-stack zstream))
(cond #-x&y-streams-are-stream
((xstream-p input)
(close-xstream input))
#+x&y-streams-are-stream
((streamp input)
(close input))))))
(defmacro with-zstream ((zstream &rest args) &body body)
`(call-with-zstream (lambda (,zstream) ,@body)
(make-zstream ,@args)))
(defun read-token (input) (defun read-token (input)
(cond ((zstream-token-category input) (cond ((zstream-token-category input)
(multiple-value-prog1 (multiple-value-prog1
@ -2545,15 +2560,15 @@
(setf (dtd *ctx*) cached-dtd) (setf (dtd *ctx*) cached-dtd)
(report-cached-dtd cached-dtd)) (report-cached-dtd cached-dtd))
(t (t
(let* ((xi2 (xstream-open-extid effective-extid)) (let ((xi2 (xstream-open-extid effective-extid)))
(zi2 (make-zstream :input-stack (list xi2)))) (with-zstream (zi2 :input-stack (list xi2))
(ensure-dtd) (ensure-dtd)
(p/ext-subset zi2) (p/ext-subset zi2)
(when (and fresh-dtd-p (when (and fresh-dtd-p
*cache-all-dtds* *cache-all-dtds*
*validate* *validate*
(not (standalone-p *ctx*))) (not (standalone-p *ctx*)))
(setf (getdtd sysid *dtd-cache*) (dtd *ctx*)))))))) (setf (getdtd sysid *dtd-cache*) (dtd *ctx*)))))))))
(sax:end-dtd (handler *ctx*)) (sax:end-dtd (handler *ctx*))
(let ((dtd (dtd *ctx*))) (let ((dtd (dtd *ctx*)))
(sax:entity-resolver (sax:entity-resolver
@ -2657,7 +2672,8 @@
:entity-name "dummy doctype" :entity-name "dummy doctype"
:entity-kind :main :entity-kind :main
:uri (zstream-base-sysid input))) :uri (zstream-base-sysid input)))
(p/doctype-decl (make-zstream :input-stack (list dummy)) dtd))) (with-zstream (zstream :input-stack (list dummy))
(p/doctype-decl zstream dtd))))
(defun fix-seen-< (input) (defun fix-seen-< (input)
(when (eq (peek-token input) :seen-<) (when (eq (peek-token input) :seen-<)
@ -2841,106 +2857,106 @@
(defun parse-xml-decl (content) (defun parse-xml-decl (content)
(let* ((res (make-xml-header)) (let* ((res (make-xml-header))
(i (make-rod-xstream content)) (i (make-rod-xstream content)))
(z (make-zstream :input-stack (list i))) (with-zstream (z :input-stack (list i))
(atts (read-attribute-list z i t))) (let ((atts (read-attribute-list z i t)))
(unless (eq (peek-rune i) :eof) (unless (eq (peek-rune i) :eof)
(wf-error i "Garbage at end of XMLDecl.")) (wf-error i "Garbage at end of XMLDecl."))
;; versioninfo muss da sein ;; versioninfo muss da sein
;; dann ? encodingdecl ;; dann ? encodingdecl
;; dann ? sddecl ;; dann ? sddecl
;; dann ende ;; dann ende
(unless (eq (caar atts) (intern-name '#.(string-rod "version"))) (unless (eq (caar atts) (intern-name '#.(string-rod "version")))
(wf-error i "XMLDecl needs version.")) (wf-error i "XMLDecl needs version."))
(unless (and (>= (length (cdar atts)) 1) (unless (and (>= (length (cdar atts)) 1)
(every (lambda (x) (every (lambda (x)
(or (rune<= #/a x #/z)
(rune<= #/A x #/Z)
(rune<= #/0 x #/9)
(rune= x #/_)
(rune= x #/.)
(rune= x #/:)
(rune= x #/-)))
(cdar atts)))
(wf-error i"Bad XML version number: ~S." (rod-string (cdar atts))))
(setf (xml-header-version res) (rod-string (cdar atts)))
(pop atts)
(when (eq (caar atts) (intern-name '#.(string-rod "encoding")))
(unless (and (>= (length (cdar atts)) 1)
(every (lambda (x)
(or (rune<= #/a x #/z)
(rune<= #/A x #/Z)
(rune<= #/0 x #/9)
(rune= x #/_)
(rune= x #/.)
(rune= x #/-)))
(cdar atts))
((lambda (x)
(or (rune<= #/a x #/z) (or (rune<= #/a x #/z)
(rune<= #/A x #/Z) (rune<= #/A x #/Z)))
(rune<= #/0 x #/9) (aref (cdar atts) 0)))
(rune= x #/_) (wf-error i "Bad XML encoding name: ~S." (rod-string (cdar atts))))
(rune= x #/.) (setf (xml-header-encoding res) (rod-string (cdar atts)))
(rune= x #/:) (pop atts))
(rune= x #/-))) (when (eq (caar atts) (intern-name '#.(string-rod "standalone")))
(cdar atts))) (unless (or (rod= (cdar atts) '#.(string-rod "yes"))
(wf-error i"Bad XML version number: ~S." (rod-string (cdar atts)))) (rod= (cdar atts) '#.(string-rod "no")))
(setf (xml-header-version res) (rod-string (cdar atts))) (wf-error i "XMLDecl's 'standalone' attribute must be exactly \"yes\" or \"no\" and not ~S."
(pop atts) (rod-string (cdar atts))))
(when (eq (caar atts) (intern-name '#.(string-rod "encoding"))) (setf (xml-header-standalone-p res)
(unless (and (>= (length (cdar atts)) 1) (if (rod-equal '#.(string-rod "yes") (cdar atts))
(every (lambda (x) :yes
(or (rune<= #/a x #/z) :no))
(rune<= #/A x #/Z) (pop atts))
(rune<= #/0 x #/9) (when atts
(rune= x #/_) (wf-error i "Garbage in XMLDecl: ~A" (rod-string content)))
(rune= x #/.) res))))
(rune= x #/-)))
(cdar atts))
((lambda (x)
(or (rune<= #/a x #/z)
(rune<= #/A x #/Z)))
(aref (cdar atts) 0)))
(wf-error i "Bad XML encoding name: ~S." (rod-string (cdar atts))))
(setf (xml-header-encoding res) (rod-string (cdar atts)))
(pop atts))
(when (eq (caar atts) (intern-name '#.(string-rod "standalone")))
(unless (or (rod= (cdar atts) '#.(string-rod "yes"))
(rod= (cdar atts) '#.(string-rod "no")))
(wf-error i "XMLDecl's 'standalone' attribute must be exactly \"yes\" or \"no\" and not ~S."
(rod-string (cdar atts))))
(setf (xml-header-standalone-p res)
(if (rod-equal '#.(string-rod "yes") (cdar atts))
:yes
:no))
(pop atts))
(when atts
(wf-error i "Garbage in XMLDecl: ~A" (rod-string content)))
res))
(defun parse-text-decl (content) (defun parse-text-decl (content)
(let* ((res (make-xml-header)) (let* ((res (make-xml-header))
(i (make-rod-xstream content)) (i (make-rod-xstream content)))
(z (make-zstream :input-stack (list i))) (with-zstream (z :input-stack (list i))
(atts (read-attribute-list z i t))) (let ((atts (read-attribute-list z i t)))
(unless (eq (peek-rune i) :eof) (unless (eq (peek-rune i) :eof)
(wf-error i "Garbage at end of TextDecl")) (wf-error i "Garbage at end of TextDecl"))
;; versioninfo optional ;; versioninfo optional
;; encodingdecl muss da sein ;; encodingdecl muss da sein
;; dann ende ;; dann ende
(when (eq (caar atts) (intern-name '#.(string-rod "version"))) (when (eq (caar atts) (intern-name '#.(string-rod "version")))
(unless (and (>= (length (cdar atts)) 1) (unless (and (>= (length (cdar atts)) 1)
(every (lambda (x) (every (lambda (x)
(or (rune<= #/a x #/z) (or (rune<= #/a x #/z)
(rune<= #/A x #/Z) (rune<= #/A x #/Z)
(rune<= #/0 x #/9) (rune<= #/0 x #/9)
(rune= x #/_) (rune= x #/_)
(rune= x #/.) (rune= x #/.)
(rune= x #/:) (rune= x #/:)
(rune= x #/-))) (rune= x #/-)))
(cdar atts))) (cdar atts)))
(wf-error i "Bad XML version number: ~S." (rod-string (cdar atts)))) (wf-error i "Bad XML version number: ~S." (rod-string (cdar atts))))
(setf (xml-header-version res) (rod-string (cdar atts))) (setf (xml-header-version res) (rod-string (cdar atts)))
(pop atts)) (pop atts))
(unless (eq (caar atts) (intern-name '#.(string-rod "encoding"))) (unless (eq (caar atts) (intern-name '#.(string-rod "encoding")))
(wf-error i "TextDecl needs encoding.")) (wf-error i "TextDecl needs encoding."))
(unless (and (>= (length (cdar atts)) 1) (unless (and (>= (length (cdar atts)) 1)
(every (lambda (x) (every (lambda (x)
(or (rune<= #/a x #/z) (or (rune<= #/a x #/z)
(rune<= #/A x #/Z) (rune<= #/A x #/Z)
(rune<= #/0 x #/9) (rune<= #/0 x #/9)
(rune= x #/_) (rune= x #/_)
(rune= x #/.) (rune= x #/.)
(rune= x #/-))) (rune= x #/-)))
(cdar atts)) (cdar atts))
((lambda (x) ((lambda (x)
(or (rune<= #/a x #/z) (or (rune<= #/a x #/z)
(rune<= #/A x #/Z) (rune<= #/A x #/Z)
(rune<= #/0 x #/9))) (rune<= #/0 x #/9)))
(aref (cdar atts) 0))) (aref (cdar atts) 0)))
(wf-error i "Bad XML encoding name: ~S." (rod-string (cdar atts)))) (wf-error i "Bad XML encoding name: ~S." (rod-string (cdar atts))))
(setf (xml-header-encoding res) (rod-string (cdar atts))) (setf (xml-header-encoding res) (rod-string (cdar atts)))
(pop atts) (pop atts)
(when atts (when atts
(wf-error i "Garbage in TextDecl: ~A" (rod-string content))) (wf-error i "Garbage in TextDecl: ~A" (rod-string content)))))
res)) res))
;;;; --------------------------------------------------------------------------- ;;;; ---------------------------------------------------------------------------
@ -2957,6 +2973,7 @@
;;;; --------------------------------------------------------------------------- ;;;; ---------------------------------------------------------------------------
;;;; User interface ;;;; ;;;; User interface ;;;;
#-cxml-system::uri-is-namestring
(defun specific-or (component &optional (alternative nil)) (defun specific-or (component &optional (alternative nil))
(if (eq component :unspecific) (if (eq component :unspecific)
alternative alternative
@ -2967,6 +2984,7 @@
alternative alternative
str)) str))
#-cxml-system::uri-is-namestring
(defun make-uri (&rest initargs &key path query &allow-other-keys) (defun make-uri (&rest initargs &key path query &allow-other-keys)
(apply #'make-instance (apply #'make-instance
'puri:uri 'puri:uri
@ -2974,9 +2992,11 @@
:query (and query (escape-query query)) :query (and query (escape-query query))
initargs)) initargs))
#-cxml-system::uri-is-namestring
(defun escape-path (list) (defun escape-path (list)
(puri::render-parsed-path list t)) (puri::render-parsed-path list t))
#-cxml-system::uri-is-namestring
(defun escape-query (pairs) (defun escape-query (pairs)
(flet ((escape (str) (flet ((escape (str)
(puri::encode-escaped-encoding str puri::*reserved-characters* t))) (puri::encode-escaped-encoding str puri::*reserved-characters* t)))
@ -2990,6 +3010,7 @@
(write-char #\= s) (write-char #\= s)
(write-string (escape (cdr pair)) s)))))) (write-string (escape (cdr pair)) s))))))
#-cxml-system::uri-is-namestring
(defun uri-parsed-query (uri) (defun uri-parsed-query (uri)
(flet ((unescape (str) (flet ((unescape (str)
(puri::decode-escaped-encoding str t puri::*reserved-characters*))) (puri::decode-escaped-encoding str t puri::*reserved-characters*)))
@ -3005,9 +3026,11 @@
(t (t
nil))))) nil)))))
#-cxml-system::uri-is-namestring
(defun query-value (name alist) (defun query-value (name alist)
(cdr (assoc name alist :test #'equal))) (cdr (assoc name alist :test #'equal)))
#-cxml-system::uri-is-namestring
(defun pathname-to-uri (pathname) (defun pathname-to-uri (pathname)
(let ((path (let ((path
(append (pathname-directory pathname) (append (pathname-directory pathname)
@ -3027,6 +3050,11 @@
(specific-or (pathname-device pathname))) (specific-or (pathname-device pathname)))
:path path)))) :path path))))
#+cxml-system::uri-is-namestring
(defun pathname-to-uri (pathname)
(puri:parse-uri (namestring pathname)))
#-cxml-system::uri-is-namestring
(defun parse-name.type (str) (defun parse-name.type (str)
(if str (if str
(let ((i (position #\. str :from-end t))) (let ((i (position #\. str :from-end t)))
@ -3035,6 +3063,7 @@
(values str nil))) (values str nil)))
(values nil nil))) (values nil nil)))
#-cxml-system::uri-is-namestring
(defun uri-to-pathname (uri) (defun uri-to-pathname (uri)
(let ((scheme (puri:uri-scheme uri)) (let ((scheme (puri:uri-scheme uri))
(path (puri:uri-parsed-path uri))) (path (puri:uri-parsed-path uri)))
@ -3058,11 +3087,17 @@
:directory (cons :absolute (butlast (cdr path))) :directory (cons :absolute (butlast (cdr path)))
:name name :name name
:type type)))))) :type type))))))
#+cxml-system::uri-is-namestring
(defun uri-to-pathname (uri)
(let ((pathname (puri:render-uri uri nil)))
(when (equalp (pathname-host pathname) "+")
(setf (slot-value pathname 'lisp::host) "localhost"))
pathname))
(defun parse-xstream (xstream handler &rest args) (defun parse-xstream (xstream handler &rest args)
(let ((*ctx* nil)) (let ((*ctx* nil))
(handler-case (handler-case
(let ((zstream (make-zstream :input-stack (list xstream)))) (with-zstream (zstream :input-stack (list xstream))
(peek-rune xstream) (peek-rune xstream)
(with-scratch-pads () (with-scratch-pads ()
(apply #'p/document zstream handler args))) (apply #'p/document zstream handler args)))
@ -3129,10 +3164,10 @@
(unless (dtd *ctx*) (unless (dtd *ctx*)
(with-scratch-pads () (with-scratch-pads ()
(let ((*data-behaviour* :DTD)) (let ((*data-behaviour* :DTD))
(let* ((xi2 (xstream-open-extid extid)) (let ((xi2 (xstream-open-extid extid)))
(zi2 (make-zstream :input-stack (list xi2)))) (with-zstream (zi2 :input-stack (list xi2))
(ensure-dtd) (ensure-dtd)
(p/ext-subset zi2))))) (p/ext-subset zi2))))))
(sax:end-dtd handler) (sax:end-dtd handler)
(let ((dtd (dtd *ctx*))) (let ((dtd (dtd *ctx*)))
(sax:entity-resolver handler (lambda (n h) (resolve-entity n h dtd))) (sax:entity-resolver handler (lambda (n h) (resolve-entity n h dtd)))
@ -3171,15 +3206,15 @@
:entity-name "dtd" :entity-name "dtd"
:entity-kind :main :entity-kind :main
:uri (safe-stream-sysid stream))) :uri (safe-stream-sysid stream)))
(let ((zstream (make-zstream :input-stack (list input))) (let ((*ctx* (make-context :handler handler))
(*ctx* (make-context :handler handler))
(*validate* t) (*validate* t)
(*data-behaviour* :DTD)) (*data-behaviour* :DTD))
(with-scratch-pads () (with-zstream (zstream :input-stack (list input))
(ensure-dtd) (with-scratch-pads ()
(peek-rune input) (ensure-dtd)
(p/ext-subset zstream) (peek-rune input)
(dtd *ctx*))))) (p/ext-subset zstream)
(dtd *ctx*))))))
(defun parse-rod (string handler &rest args) (defun parse-rod (string handler &rest args)
(let ((xstream (string->xstream string))) (let ((xstream (string->xstream string)))
@ -3193,36 +3228,6 @@
(defun string->xstream (string) (defun string->xstream (string)
(make-rod-xstream (string-rod string))) (make-rod-xstream (string-rod string)))
(defclass octet-input-stream
(trivial-gray-stream-mixin fundamental-binary-input-stream)
((octets :initarg :octets)
(pos :initform 0)))
(defmethod close ((stream octet-input-stream) &key abort)
(declare (ignore abort))
(open-stream-p stream))
(defmethod stream-read-byte ((stream octet-input-stream))
(with-slots (octets pos) stream
(if (>= pos (length octets))
:eof
(prog1
(elt octets pos)
(incf pos)))))
(defmethod stream-read-sequence
((stream octet-input-stream) sequence start end &key &allow-other-keys)
(with-slots (octets pos) stream
(let* ((length (min (- end start) (- (length octets) pos)))
(end1 (+ start length))
(end2 (+ pos length)))
(replace sequence octets :start1 start :end1 end1 :start2 pos :end2 end2)
(setf pos end2)
end1)))
(defun make-octet-input-stream (octets)
(make-instance 'octet-input-stream :octets octets))
(defun parse-octets (octets handler &rest args) (defun parse-octets (octets handler &rest args)
(apply #'parse-stream (make-octet-input-stream octets) handler args)) (apply #'parse-stream (make-octet-input-stream octets) handler args))
@ -3389,63 +3394,63 @@
;; used only by read-att-value-2 ;; used only by read-att-value-2
(defun find-internal-entity-expansion (name) (defun find-internal-entity-expansion (name)
(let ((zinput (make-zstream))) (with-zstream (zinput)
(with-rune-collector-3 (collect) (with-rune-collector-3 (collect)
(labels ((muffle (input) (labels ((muffle (input)
(let (c) (let (c)
(loop (loop
(setf c (read-rune input)) (setf c (read-rune input))
(cond ((eq c :eof) (cond ((eq c :eof)
(return)) (return))
((rune= c #/&) ((rune= c #/&)
(setf c (peek-rune input)) (setf c (peek-rune input))
(cond ((eql c :eof) (cond ((eql c :eof)
(eox input)) (eox input))
((rune= c #/#) ((rune= c #/#)
(let ((c (read-character-reference input))) (let ((c (read-character-reference input)))
(%put-unicode-char c collect))) (%put-unicode-char c collect)))
(t (t
(unless (name-start-rune-p c) (unless (name-start-rune-p c)
(wf-error zinput "Expecting name after &.")) (wf-error zinput "Expecting name after &."))
(let ((name (read-name-token input))) (let ((name (read-name-token input)))
(setf c (read-rune input)) (setf c (read-rune input))
(check-rune input c #/\;) (check-rune input c #/\;)
(recurse-on-entity (recurse-on-entity
zinput name :general zinput name :general
(lambda (zinput) (lambda (zinput)
(muffle (car (zstream-input-stack zinput))))))))) (muffle (car (zstream-input-stack zinput)))))))))
((rune= c #/<) ((rune= c #/<)
(wf-error zinput "unexpected #\/<")) (wf-error zinput "unexpected #\/<"))
((space-rune-p c) ((space-rune-p c)
(collect #/space)) (collect #/space))
((not (data-rune-p c)) ((not (data-rune-p c))
(wf-error zinput "illegal char: ~S." c)) (wf-error zinput "illegal char: ~S." c))
(t (t
(collect c))))))) (collect c)))))))
(declare (dynamic-extent #'muffle)) (declare (dynamic-extent #'muffle))
(recurse-on-entity (recurse-on-entity
zinput name :general zinput name :general
(lambda (zinput) (lambda (zinput)
(muffle (car (zstream-input-stack zinput))))) )))) (muffle (car (zstream-input-stack zinput)))))))))
;; callback for DOM ;; callback for DOM
(defun resolve-entity (name handler dtd) (defun resolve-entity (name handler dtd)
(let ((*validate* nil)) (let ((*validate* nil))
(if (get-entity-definition name :general dtd) (if (get-entity-definition name :general dtd)
(let* ((*ctx* (make-context :handler handler :dtd dtd)) (let* ((*ctx* (make-context :handler handler :dtd dtd))
(input (make-zstream))
(*data-behaviour* :DOC)) (*data-behaviour* :DOC))
(with-scratch-pads () (with-zstream (input)
(recurse-on-entity (with-scratch-pads ()
input name :general (recurse-on-entity
(lambda (input) input name :general
(prog1 (lambda (input)
(etypecase (checked-get-entdef name :general) (prog1
(internal-entdef (p/content input)) (etypecase (checked-get-entdef name :general)
(external-entdef (p/ext-parsed-ent input))) (internal-entdef (p/content input))
(unless (eq (peek-token input) :eof) (external-entdef (p/ext-parsed-ent input)))
(wf-error input "Trailing garbage. - ~S" (unless (eq (peek-token input) :eof)
(peek-token input)))))))) (wf-error input "Trailing garbage. - ~S"
(peek-token input)))))))))
nil))) nil)))
(defun read-att-value-2 (input) (defun read-att-value-2 (input)