SCL support (thanks to Douglas Crosher). Includes support for

implementations where URIs are valid namestrings, and a mode
        where normal streams are used instead of xstreams and ystreams
        (albeit both SCL-specific at this point).
This commit is contained in:
dlichteblau
2007-06-16 11:27:18 +00:00
parent ee394c591d
commit fb9a2fa002
9 changed files with 523 additions and 216 deletions

View File

@ -4,6 +4,9 @@
(defclass closure-source-file (cl-source-file) ())
#+scl
(pushnew 'uri-is-namestring *features*)
#+sbcl
(defmethod perform :around ((o compile-op) (s closure-source-file))
;; shut up already. Correctness first.
@ -30,7 +33,7 @@
(:file "space-normalizer" :depends-on ("xml-parse"))
(:file "catalog" :depends-on ("xml-parse"))
(:file "sax-proxy" :depends-on ("xml-parse")))
:depends-on (:runes :puri :trivial-gray-streams))
:depends-on (:runes :puri #-scl :trivial-gray-streams))
(defclass utf8dom-file (closure-source-file) ((of)))

View File

@ -68,6 +68,12 @@
only. The old behaviour using pairs of prefix and local names
was removed. (Thanks to Douglas Crosher.)
</li>
<li>
SCL support (thanks to Douglas Crosher). Includes support for
implementations where URIs are valid namestrings, and a mode
where normal streams are used instead of xstreams and ystreams
(albeit both SCL-specific at this point).
</li>
</ul>
<p class="nomargin"><tt>rel-2007-05-26</tt></p>
<ul class="nomargin">

View File

@ -52,7 +52,9 @@
#+rune-is-character "characters")
#+rune-is-integer (:file "utf8")
(:file "syntax")
(:file "encodings")
(:file "encodings-data")
(:file "xstream")
(:file "ystream")))
#-x&y-streams-are-stream (:file "encodings")
#-x&y-streams-are-stream (:file "encodings-data")
#-x&y-streams-are-stream (:file "xstream")
#-x&y-streams-are-stream (:file "ystream")
#+x&y-streams-are-stream (:file #+scl "stream-scl")
))

View File

@ -79,7 +79,11 @@
#:make-string-ystream/utf8
;; #+rune-is-integer
#:make-character-stream-ystream/utf8
#:runes-to-utf8/adjustable-string))
#:runes-to-utf8/adjustable-string
#:rod-to-utf8-string
#:utf8-string-to-rod
#:make-octet-input-stream))
(defpackage :utf8-runes
(:use :cl)

253
runes/stream-scl.lisp Normal file
View File

@ -0,0 +1,253 @@
;;; -*- Mode: Lisp; Syntax: Common-Lisp; readtable: runes; Encoding: utf-8; -*-
;;; ---------------------------------------------------------------------------
;;; Title: Fast streams
;;; Created: 1999-07-17
;;; Author: Douglas Crosher
;;; License: Lisp-LGPL (See file COPYING for details).
;;; ---------------------------------------------------------------------------
;;; (c) copyright 2007 by Douglas Crosher
;;; This library is free software; you can redistribute it and/or
;;; modify it under the terms of the GNU Library General Public
;;; License as published by the Free Software Foundation; either
;;; version 2 of the License, or (at your option) any later version.
;;;
;;; This library is distributed in the hope that it will be useful,
;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
;;; Library General Public License for more details.
;;;
;;; You should have received a copy of the GNU Library General Public
;;; License along with this library; if not, write to the
;;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
;;; Boston, MA 02111-1307 USA.
(in-package :runes)
(eval-when (:compile-toplevel :load-toplevel :execute)
(defparameter *fast* '(optimize (speed 3) (safety 3))))
(deftype runes-encoding:encoding-error ()
'ext:character-conversion-error)
;;; xstream
(eval-when (:compile-toplevel :load-toplevel :execute)
(defclass xstream (ext:character-stream)
((name :initarg :name :initform nil
:accessor xstream-name)
(column :initarg :column :initform 0)
(line :initarg :line :initform 1)
(unread-column :initarg :unread-column :initform 0)))
(defclass eol-conversion-xstream (lisp::eol-conversion-input-stream xstream)
())
) ; eval-when
(defun make-eol-conversion-xstream (source-stream)
"Returns a character stream that conversion CR-LF pairs and lone CR
characters into single linefeed character."
(declare (type stream source-stream))
(let ((stream (ext:make-eol-conversion-stream source-stream
:input t
:close-stream-p t)))
(change-class stream 'eol-conversion-xstream)))
(definline xstream-p (stream)
(typep stream 'xstream))
(defun close-xstream (input)
(close input))
(definline read-rune (input)
(declare (type stream input)
(inline read-char)
#.*fast*)
(let ((char (read-char input nil :eof)))
(cond ((member char '(#\UFFFE #\UFFFF))
;; These characters are illegal within XML documents.
(simple-error 'ext:character-conversion-error
"~@<Illegal XML document character: ~S~:@>" char))
((eql char #\linefeed)
(setf (slot-value input 'unread-column) (slot-value input 'column))
(setf (slot-value input 'column) 0)
(incf (the kernel:index (slot-value input 'line))))
(t
(incf (the kernel:index (slot-value input 'column)))))
char))
(definline peek-rune (input)
(declare (type stream input)
(inline peek-char)
#.*fast*)
(peek-char nil input nil :eof))
(definline consume-rune (input)
(declare (type stream input)
(inline read-rune)
#.*fast*)
(read-rune input)
nil)
(definline unread-rune (rune input)
(declare (type stream input)
(inline unread-char)
#.*fast*)
(unread-char rune input)
(cond ((eql rune #\linefeed)
(setf (slot-value input 'column) (slot-value input 'unread-column))
(setf (slot-value input 'unread-column) 0)
(decf (the kernel:index (slot-value input 'line))))
(t
(decf (the kernel:index (slot-value input 'column)))))
nil)
(defun fread-rune (input)
(read-rune input))
(defun fpeek-rune (input)
(peek-rune input))
(defun xstream-position (input)
(file-position input))
(defun runes-encoding:find-encoding (encoding)
encoding)
(defun make-xstream (os-stream &key name
(speed 8192)
(initial-speed 1)
(initial-encoding :guess))
(declare (ignore speed))
(assert (eql initial-speed 1))
(assert (eq initial-encoding :guess))
(let* ((stream (ext:make-xml-character-conversion-stream os-stream
:input t
:close-stream-p t))
(xstream (make-eol-conversion-xstream stream)))
(setf (xstream-name xstream) name)
xstream))
(defclass xstream-string-input-stream (lisp::string-input-stream xstream)
())
(defun make-rod-xstream (string &key name)
(declare (type string string))
(let ((stream (make-string-input-stream string)))
(change-class stream 'xstream-string-input-stream :name name)))
;;; already at 'full speed' so just return the buffer size.
(defun set-to-full-speed (stream)
(length (ext:stream-in-buffer stream)))
(defun xstream-speed (stream)
(length (ext:stream-in-buffer stream)))
(defun xstream-line-number (stream)
(slot-value stream 'line))
(defun xstream-column-number (stream)
(slot-value stream 'column))
(defun xstream-encoding (stream)
(stream-external-format stream))
;;; the encoding will have already been detected, but it is checked against the
;;; declared encoding here.
(defun (setf xstream-encoding) (declared-encoding stream)
(let* ((initial-encoding (xstream-encoding stream))
(canonical-encoding
(cond ((and (eq initial-encoding :utf-16le)
(member declared-encoding '(:utf-16 :utf16 :utf-16le :utf16le)
:test 'string-equal))
:utf-16le)
((and (eq initial-encoding :utf-16be)
(member declared-encoding '(:utf-16 :utf16 :utf-16be :utf16be)
:test 'string-equal))
:utf-16be)
((and (eq initial-encoding :ucs-4be)
(member declared-encoding '(:ucs-4 :ucs4 :ucs-4be :ucs4be)
:test 'string-equal))
:ucs4-be)
((and (eq initial-encoding :ucs-4le)
(member declared-encoding '(:ucs-4 :ucs4 :ucs-4le :ucs4le)
:test 'string-equal))
:ucs4-le)
(t
declared-encoding))))
(unless (string-equal initial-encoding canonical-encoding)
(warn "Unable to change xstream encoding from ~S to ~S (~S)~%"
initial-encoding declared-encoding canonical-encoding))
declared-encoding))
;;; ystream - a run output stream.
(deftype ystream () 'stream)
(defun ystream-column (stream)
(ext:line-column stream))
(definline write-rune (rune stream)
(declare (inline write-char))
(write-char rune stream))
(defun write-rod (rod stream)
(declare (type rod rod)
(type stream stream))
(write-string rod stream))
(defun make-rod-ystream ()
(make-string-output-stream))
(defun close-ystream (stream)
(etypecase stream
(ext:string-output-stream
(get-output-stream-string stream))
(ext:character-conversion-output-stream
(let ((target (slot-value stream 'stream)))
(close stream)
(if (typep target 'ext:byte-output-stream)
(ext:get-output-stream-bytes target)
stream)))))
;;;; CHARACTER-STREAM-YSTREAM
(defun make-character-stream-ystream (target-stream)
target-stream)
;;;; OCTET-VECTOR-YSTREAM
(defun make-octet-vector-ystream ()
(let ((target (ext:make-byte-output-stream)))
(ext:make-character-conversion-stream target :output t
:external-format :utf-8
:close-stream-p t)))
;;;; OCTET-STREAM-YSTREAM
(defun make-octet-stream-ystream (os-stream)
(ext:make-character-conversion-stream os-stream :output t
:external-format :utf-8
:close-stream-p t))
;;;; helper functions
(defun rod-to-utf8-string (rod)
(ext:make-string-from-bytes (ext:make-bytes-from-string rod :utf8)
:iso-8859-1))
(defun utf8-string-to-rod (str)
(let ((bytes (map '(vector (unsigned-byte 8)) #'char-code str)))
(ext:make-string-from-bytes bytes :utf-8)))
(defun make-octet-input-stream (octets)
(ext:make-byte-input-stream octets))

View File

@ -248,3 +248,50 @@
(defmethod close-ystream ((ystream string-ystream/utf8))
(get-output-stream-string (ystream-os-stream ystream))))
;;;; helper functions
(defun rod-to-utf8-string (rod)
(let ((out (make-buffer :element-type 'character)))
(runes-to-utf8/adjustable-string out rod (length rod))
out))
(defun utf8-string-to-rod (str)
(let* ((bytes (map '(vector (unsigned-byte 8)) #'char-code str))
(buffer (make-array (length bytes) :element-type '(unsigned-byte 16)))
(n (runes-encoding:decode-sequence
:utf-8 bytes 0 (length bytes) buffer 0 0 nil))
(result (make-array n :element-type 'rune)))
(map-into result #'code-rune buffer)
result))
(defclass octet-input-stream
(trivial-gray-stream-mixin fundamental-binary-input-stream)
((octets :initarg :octets)
(pos :initform 0)))
(defmethod close ((stream octet-input-stream) &key abort)
(declare (ignore abort))
(open-stream-p stream))
(defmethod stream-read-byte ((stream octet-input-stream))
(with-slots (octets pos) stream
(if (>= pos (length octets))
:eof
(prog1
(elt octets pos)
(incf pos)))))
(defmethod stream-read-sequence
((stream octet-input-stream) sequence start end &key &allow-other-keys)
(with-slots (octets pos) stream
(let* ((length (min (- end start) (- (length octets) pos)))
(end1 (+ start length))
(end2 (+ pos length)))
(replace sequence octets :start1 start :end1 end1 :start2 pos :end2 end2)
(setf pos end2)
end1)))
(defun make-octet-input-stream (octets)
(make-instance 'octet-input-stream :octets octets))

View File

@ -6,7 +6,7 @@
(in-package :cl-user)
(defpackage :cxml
(:use :cl :runes :runes-encoding :trivial-gray-streams)
(:use :cl :runes :runes-encoding #-scl :trivial-gray-streams)
(:export
;; xstreams
#:make-xstream

View File

@ -619,16 +619,3 @@
(maybe-emit-start-tag)
(sax:characters *sink* (rod data))
data)
(defun rod-to-utf8-string (rod)
(let ((out (make-buffer :element-type 'character)))
(runes-to-utf8/adjustable-string out rod (length rod))
out))
(defun utf8-string-to-rod (str)
(let* ((bytes (map '(vector (unsigned-byte 8)) #'char-code str))
(buffer (make-array (length bytes) :element-type '(unsigned-byte 16)))
(n (decode-sequence :utf-8 bytes 0 (length bytes) buffer 0 0 nil))
(result (make-array n :element-type 'rune)))
(map-into result #'code-rune buffer)
result))

View File

@ -1175,6 +1175,21 @@
token-semantic
input-stack)
(defun call-with-zstream (fn zstream)
(unwind-protect
(funcall fn zstream)
(dolist (input (zstream-input-stack zstream))
(cond #-x&y-streams-are-stream
((xstream-p input)
(close-xstream input))
#+x&y-streams-are-stream
((streamp input)
(close input))))))
(defmacro with-zstream ((zstream &rest args) &body body)
`(call-with-zstream (lambda (,zstream) ,@body)
(make-zstream ,@args)))
(defun read-token (input)
(cond ((zstream-token-category input)
(multiple-value-prog1
@ -2545,15 +2560,15 @@
(setf (dtd *ctx*) cached-dtd)
(report-cached-dtd cached-dtd))
(t
(let* ((xi2 (xstream-open-extid effective-extid))
(zi2 (make-zstream :input-stack (list xi2))))
(let ((xi2 (xstream-open-extid effective-extid)))
(with-zstream (zi2 :input-stack (list xi2))
(ensure-dtd)
(p/ext-subset zi2)
(when (and fresh-dtd-p
*cache-all-dtds*
*validate*
(not (standalone-p *ctx*)))
(setf (getdtd sysid *dtd-cache*) (dtd *ctx*))))))))
(setf (getdtd sysid *dtd-cache*) (dtd *ctx*)))))))))
(sax:end-dtd (handler *ctx*))
(let ((dtd (dtd *ctx*)))
(sax:entity-resolver
@ -2657,7 +2672,8 @@
:entity-name "dummy doctype"
:entity-kind :main
:uri (zstream-base-sysid input)))
(p/doctype-decl (make-zstream :input-stack (list dummy)) dtd)))
(with-zstream (zstream :input-stack (list dummy))
(p/doctype-decl zstream dtd))))
(defun fix-seen-< (input)
(when (eq (peek-token input) :seen-<)
@ -2841,9 +2857,9 @@
(defun parse-xml-decl (content)
(let* ((res (make-xml-header))
(i (make-rod-xstream content))
(z (make-zstream :input-stack (list i)))
(atts (read-attribute-list z i t)))
(i (make-rod-xstream content)))
(with-zstream (z :input-stack (list i))
(let ((atts (read-attribute-list z i t)))
(unless (eq (peek-rune i) :eof)
(wf-error i "Garbage at end of XMLDecl."))
;; versioninfo muss da sein
@ -2894,13 +2910,13 @@
(pop atts))
(when atts
(wf-error i "Garbage in XMLDecl: ~A" (rod-string content)))
res))
res))))
(defun parse-text-decl (content)
(let* ((res (make-xml-header))
(i (make-rod-xstream content))
(z (make-zstream :input-stack (list i)))
(atts (read-attribute-list z i t)))
(i (make-rod-xstream content)))
(with-zstream (z :input-stack (list i))
(let ((atts (read-attribute-list z i t)))
(unless (eq (peek-rune i) :eof)
(wf-error i "Garbage at end of TextDecl"))
;; versioninfo optional
@ -2940,7 +2956,7 @@
(setf (xml-header-encoding res) (rod-string (cdar atts)))
(pop atts)
(when atts
(wf-error i "Garbage in TextDecl: ~A" (rod-string content)))
(wf-error i "Garbage in TextDecl: ~A" (rod-string content)))))
res))
;;;; ---------------------------------------------------------------------------
@ -2957,6 +2973,7 @@
;;;; ---------------------------------------------------------------------------
;;;; User interface ;;;;
#-cxml-system::uri-is-namestring
(defun specific-or (component &optional (alternative nil))
(if (eq component :unspecific)
alternative
@ -2967,6 +2984,7 @@
alternative
str))
#-cxml-system::uri-is-namestring
(defun make-uri (&rest initargs &key path query &allow-other-keys)
(apply #'make-instance
'puri:uri
@ -2974,9 +2992,11 @@
:query (and query (escape-query query))
initargs))
#-cxml-system::uri-is-namestring
(defun escape-path (list)
(puri::render-parsed-path list t))
#-cxml-system::uri-is-namestring
(defun escape-query (pairs)
(flet ((escape (str)
(puri::encode-escaped-encoding str puri::*reserved-characters* t)))
@ -2990,6 +3010,7 @@
(write-char #\= s)
(write-string (escape (cdr pair)) s))))))
#-cxml-system::uri-is-namestring
(defun uri-parsed-query (uri)
(flet ((unescape (str)
(puri::decode-escaped-encoding str t puri::*reserved-characters*)))
@ -3005,9 +3026,11 @@
(t
nil)))))
#-cxml-system::uri-is-namestring
(defun query-value (name alist)
(cdr (assoc name alist :test #'equal)))
#-cxml-system::uri-is-namestring
(defun pathname-to-uri (pathname)
(let ((path
(append (pathname-directory pathname)
@ -3027,6 +3050,11 @@
(specific-or (pathname-device pathname)))
:path path))))
#+cxml-system::uri-is-namestring
(defun pathname-to-uri (pathname)
(puri:parse-uri (namestring pathname)))
#-cxml-system::uri-is-namestring
(defun parse-name.type (str)
(if str
(let ((i (position #\. str :from-end t)))
@ -3035,6 +3063,7 @@
(values str nil)))
(values nil nil)))
#-cxml-system::uri-is-namestring
(defun uri-to-pathname (uri)
(let ((scheme (puri:uri-scheme uri))
(path (puri:uri-parsed-path uri)))
@ -3058,11 +3087,17 @@
:directory (cons :absolute (butlast (cdr path)))
:name name
:type type))))))
#+cxml-system::uri-is-namestring
(defun uri-to-pathname (uri)
(let ((pathname (puri:render-uri uri nil)))
(when (equalp (pathname-host pathname) "+")
(setf (slot-value pathname 'lisp::host) "localhost"))
pathname))
(defun parse-xstream (xstream handler &rest args)
(let ((*ctx* nil))
(handler-case
(let ((zstream (make-zstream :input-stack (list xstream))))
(with-zstream (zstream :input-stack (list xstream))
(peek-rune xstream)
(with-scratch-pads ()
(apply #'p/document zstream handler args)))
@ -3129,10 +3164,10 @@
(unless (dtd *ctx*)
(with-scratch-pads ()
(let ((*data-behaviour* :DTD))
(let* ((xi2 (xstream-open-extid extid))
(zi2 (make-zstream :input-stack (list xi2))))
(let ((xi2 (xstream-open-extid extid)))
(with-zstream (zi2 :input-stack (list xi2))
(ensure-dtd)
(p/ext-subset zi2)))))
(p/ext-subset zi2))))))
(sax:end-dtd handler)
(let ((dtd (dtd *ctx*)))
(sax:entity-resolver handler (lambda (n h) (resolve-entity n h dtd)))
@ -3171,15 +3206,15 @@
:entity-name "dtd"
:entity-kind :main
:uri (safe-stream-sysid stream)))
(let ((zstream (make-zstream :input-stack (list input)))
(*ctx* (make-context :handler handler))
(let ((*ctx* (make-context :handler handler))
(*validate* t)
(*data-behaviour* :DTD))
(with-zstream (zstream :input-stack (list input))
(with-scratch-pads ()
(ensure-dtd)
(peek-rune input)
(p/ext-subset zstream)
(dtd *ctx*)))))
(dtd *ctx*))))))
(defun parse-rod (string handler &rest args)
(let ((xstream (string->xstream string)))
@ -3193,36 +3228,6 @@
(defun string->xstream (string)
(make-rod-xstream (string-rod string)))
(defclass octet-input-stream
(trivial-gray-stream-mixin fundamental-binary-input-stream)
((octets :initarg :octets)
(pos :initform 0)))
(defmethod close ((stream octet-input-stream) &key abort)
(declare (ignore abort))
(open-stream-p stream))
(defmethod stream-read-byte ((stream octet-input-stream))
(with-slots (octets pos) stream
(if (>= pos (length octets))
:eof
(prog1
(elt octets pos)
(incf pos)))))
(defmethod stream-read-sequence
((stream octet-input-stream) sequence start end &key &allow-other-keys)
(with-slots (octets pos) stream
(let* ((length (min (- end start) (- (length octets) pos)))
(end1 (+ start length))
(end2 (+ pos length)))
(replace sequence octets :start1 start :end1 end1 :start2 pos :end2 end2)
(setf pos end2)
end1)))
(defun make-octet-input-stream (octets)
(make-instance 'octet-input-stream :octets octets))
(defun parse-octets (octets handler &rest args)
(apply #'parse-stream (make-octet-input-stream octets) handler args))
@ -3389,7 +3394,7 @@
;; used only by read-att-value-2
(defun find-internal-entity-expansion (name)
(let ((zinput (make-zstream)))
(with-zstream (zinput)
(with-rune-collector-3 (collect)
(labels ((muffle (input)
(let (c)
@ -3433,8 +3438,8 @@
(let ((*validate* nil))
(if (get-entity-definition name :general dtd)
(let* ((*ctx* (make-context :handler handler :dtd dtd))
(input (make-zstream))
(*data-behaviour* :DOC))
(with-zstream (input)
(with-scratch-pads ()
(recurse-on-entity
input name :general
@ -3445,7 +3450,7 @@
(external-entdef (p/ext-parsed-ent input)))
(unless (eq (peek-token input) :eof)
(wf-error input "Trailing garbage. - ~S"
(peek-token input))))))))
(peek-token input)))))))))
nil)))
(defun read-att-value-2 (input)