SCL support (thanks to Douglas Crosher). Includes support for
implementations where URIs are valid namestrings, and a mode
where normal streams are used instead of xstreams and ystreams
(albeit both SCL-specific at this point).
This commit is contained in:
@ -79,7 +79,11 @@
|
||||
#:make-string-ystream/utf8
|
||||
;; #+rune-is-integer
|
||||
#:make-character-stream-ystream/utf8
|
||||
#:runes-to-utf8/adjustable-string))
|
||||
#:runes-to-utf8/adjustable-string
|
||||
|
||||
#:rod-to-utf8-string
|
||||
#:utf8-string-to-rod
|
||||
#:make-octet-input-stream))
|
||||
|
||||
(defpackage :utf8-runes
|
||||
(:use :cl)
|
||||
|
||||
253
stream-scl.lisp
Normal file
253
stream-scl.lisp
Normal file
@ -0,0 +1,253 @@
|
||||
;;; -*- Mode: Lisp; Syntax: Common-Lisp; readtable: runes; Encoding: utf-8; -*-
|
||||
;;; ---------------------------------------------------------------------------
|
||||
;;; Title: Fast streams
|
||||
;;; Created: 1999-07-17
|
||||
;;; Author: Douglas Crosher
|
||||
;;; License: Lisp-LGPL (See file COPYING for details).
|
||||
;;; ---------------------------------------------------------------------------
|
||||
;;; (c) copyright 2007 by Douglas Crosher
|
||||
|
||||
;;; This library is free software; you can redistribute it and/or
|
||||
;;; modify it under the terms of the GNU Library General Public
|
||||
;;; License as published by the Free Software Foundation; either
|
||||
;;; version 2 of the License, or (at your option) any later version.
|
||||
;;;
|
||||
;;; This library is distributed in the hope that it will be useful,
|
||||
;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
;;; Library General Public License for more details.
|
||||
;;;
|
||||
;;; You should have received a copy of the GNU Library General Public
|
||||
;;; License along with this library; if not, write to the
|
||||
;;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
||||
;;; Boston, MA 02111-1307 USA.
|
||||
|
||||
(in-package :runes)
|
||||
|
||||
(eval-when (:compile-toplevel :load-toplevel :execute)
|
||||
(defparameter *fast* '(optimize (speed 3) (safety 3))))
|
||||
|
||||
(deftype runes-encoding:encoding-error ()
|
||||
'ext:character-conversion-error)
|
||||
|
||||
|
||||
;;; xstream
|
||||
|
||||
(eval-when (:compile-toplevel :load-toplevel :execute)
|
||||
|
||||
(defclass xstream (ext:character-stream)
|
||||
((name :initarg :name :initform nil
|
||||
:accessor xstream-name)
|
||||
(column :initarg :column :initform 0)
|
||||
(line :initarg :line :initform 1)
|
||||
(unread-column :initarg :unread-column :initform 0)))
|
||||
|
||||
(defclass eol-conversion-xstream (lisp::eol-conversion-input-stream xstream)
|
||||
())
|
||||
|
||||
) ; eval-when
|
||||
|
||||
(defun make-eol-conversion-xstream (source-stream)
|
||||
"Returns a character stream that conversion CR-LF pairs and lone CR
|
||||
characters into single linefeed character."
|
||||
(declare (type stream source-stream))
|
||||
(let ((stream (ext:make-eol-conversion-stream source-stream
|
||||
:input t
|
||||
:close-stream-p t)))
|
||||
(change-class stream 'eol-conversion-xstream)))
|
||||
|
||||
(definline xstream-p (stream)
|
||||
(typep stream 'xstream))
|
||||
|
||||
(defun close-xstream (input)
|
||||
(close input))
|
||||
|
||||
(definline read-rune (input)
|
||||
(declare (type stream input)
|
||||
(inline read-char)
|
||||
#.*fast*)
|
||||
(let ((char (read-char input nil :eof)))
|
||||
(cond ((member char '(#\UFFFE #\UFFFF))
|
||||
;; These characters are illegal within XML documents.
|
||||
(simple-error 'ext:character-conversion-error
|
||||
"~@<Illegal XML document character: ~S~:@>" char))
|
||||
((eql char #\linefeed)
|
||||
(setf (slot-value input 'unread-column) (slot-value input 'column))
|
||||
(setf (slot-value input 'column) 0)
|
||||
(incf (the kernel:index (slot-value input 'line))))
|
||||
(t
|
||||
(incf (the kernel:index (slot-value input 'column)))))
|
||||
char))
|
||||
|
||||
(definline peek-rune (input)
|
||||
(declare (type stream input)
|
||||
(inline peek-char)
|
||||
#.*fast*)
|
||||
(peek-char nil input nil :eof))
|
||||
|
||||
(definline consume-rune (input)
|
||||
(declare (type stream input)
|
||||
(inline read-rune)
|
||||
#.*fast*)
|
||||
(read-rune input)
|
||||
nil)
|
||||
|
||||
(definline unread-rune (rune input)
|
||||
(declare (type stream input)
|
||||
(inline unread-char)
|
||||
#.*fast*)
|
||||
(unread-char rune input)
|
||||
(cond ((eql rune #\linefeed)
|
||||
(setf (slot-value input 'column) (slot-value input 'unread-column))
|
||||
(setf (slot-value input 'unread-column) 0)
|
||||
(decf (the kernel:index (slot-value input 'line))))
|
||||
(t
|
||||
(decf (the kernel:index (slot-value input 'column)))))
|
||||
nil)
|
||||
|
||||
(defun fread-rune (input)
|
||||
(read-rune input))
|
||||
|
||||
(defun fpeek-rune (input)
|
||||
(peek-rune input))
|
||||
|
||||
(defun xstream-position (input)
|
||||
(file-position input))
|
||||
|
||||
(defun runes-encoding:find-encoding (encoding)
|
||||
encoding)
|
||||
|
||||
(defun make-xstream (os-stream &key name
|
||||
(speed 8192)
|
||||
(initial-speed 1)
|
||||
(initial-encoding :guess))
|
||||
(declare (ignore speed))
|
||||
(assert (eql initial-speed 1))
|
||||
(assert (eq initial-encoding :guess))
|
||||
(let* ((stream (ext:make-xml-character-conversion-stream os-stream
|
||||
:input t
|
||||
:close-stream-p t))
|
||||
(xstream (make-eol-conversion-xstream stream)))
|
||||
(setf (xstream-name xstream) name)
|
||||
xstream))
|
||||
|
||||
|
||||
(defclass xstream-string-input-stream (lisp::string-input-stream xstream)
|
||||
())
|
||||
|
||||
(defun make-rod-xstream (string &key name)
|
||||
(declare (type string string))
|
||||
(let ((stream (make-string-input-stream string)))
|
||||
(change-class stream 'xstream-string-input-stream :name name)))
|
||||
|
||||
;;; already at 'full speed' so just return the buffer size.
|
||||
(defun set-to-full-speed (stream)
|
||||
(length (ext:stream-in-buffer stream)))
|
||||
|
||||
(defun xstream-speed (stream)
|
||||
(length (ext:stream-in-buffer stream)))
|
||||
|
||||
(defun xstream-line-number (stream)
|
||||
(slot-value stream 'line))
|
||||
|
||||
(defun xstream-column-number (stream)
|
||||
(slot-value stream 'column))
|
||||
|
||||
(defun xstream-encoding (stream)
|
||||
(stream-external-format stream))
|
||||
|
||||
;;; the encoding will have already been detected, but it is checked against the
|
||||
;;; declared encoding here.
|
||||
(defun (setf xstream-encoding) (declared-encoding stream)
|
||||
(let* ((initial-encoding (xstream-encoding stream))
|
||||
(canonical-encoding
|
||||
(cond ((and (eq initial-encoding :utf-16le)
|
||||
(member declared-encoding '(:utf-16 :utf16 :utf-16le :utf16le)
|
||||
:test 'string-equal))
|
||||
:utf-16le)
|
||||
((and (eq initial-encoding :utf-16be)
|
||||
(member declared-encoding '(:utf-16 :utf16 :utf-16be :utf16be)
|
||||
:test 'string-equal))
|
||||
:utf-16be)
|
||||
((and (eq initial-encoding :ucs-4be)
|
||||
(member declared-encoding '(:ucs-4 :ucs4 :ucs-4be :ucs4be)
|
||||
:test 'string-equal))
|
||||
:ucs4-be)
|
||||
((and (eq initial-encoding :ucs-4le)
|
||||
(member declared-encoding '(:ucs-4 :ucs4 :ucs-4le :ucs4le)
|
||||
:test 'string-equal))
|
||||
:ucs4-le)
|
||||
(t
|
||||
declared-encoding))))
|
||||
(unless (string-equal initial-encoding canonical-encoding)
|
||||
(warn "Unable to change xstream encoding from ~S to ~S (~S)~%"
|
||||
initial-encoding declared-encoding canonical-encoding))
|
||||
declared-encoding))
|
||||
|
||||
|
||||
;;; ystream - a run output stream.
|
||||
|
||||
(deftype ystream () 'stream)
|
||||
|
||||
(defun ystream-column (stream)
|
||||
(ext:line-column stream))
|
||||
|
||||
(definline write-rune (rune stream)
|
||||
(declare (inline write-char))
|
||||
(write-char rune stream))
|
||||
|
||||
(defun write-rod (rod stream)
|
||||
(declare (type rod rod)
|
||||
(type stream stream))
|
||||
(write-string rod stream))
|
||||
|
||||
(defun make-rod-ystream ()
|
||||
(make-string-output-stream))
|
||||
|
||||
(defun close-ystream (stream)
|
||||
(etypecase stream
|
||||
(ext:string-output-stream
|
||||
(get-output-stream-string stream))
|
||||
(ext:character-conversion-output-stream
|
||||
(let ((target (slot-value stream 'stream)))
|
||||
(close stream)
|
||||
(if (typep target 'ext:byte-output-stream)
|
||||
(ext:get-output-stream-bytes target)
|
||||
stream)))))
|
||||
|
||||
;;;; CHARACTER-STREAM-YSTREAM
|
||||
|
||||
(defun make-character-stream-ystream (target-stream)
|
||||
target-stream)
|
||||
|
||||
|
||||
;;;; OCTET-VECTOR-YSTREAM
|
||||
|
||||
(defun make-octet-vector-ystream ()
|
||||
(let ((target (ext:make-byte-output-stream)))
|
||||
(ext:make-character-conversion-stream target :output t
|
||||
:external-format :utf-8
|
||||
:close-stream-p t)))
|
||||
|
||||
;;;; OCTET-STREAM-YSTREAM
|
||||
|
||||
(defun make-octet-stream-ystream (os-stream)
|
||||
(ext:make-character-conversion-stream os-stream :output t
|
||||
:external-format :utf-8
|
||||
:close-stream-p t))
|
||||
|
||||
|
||||
;;;; helper functions
|
||||
|
||||
(defun rod-to-utf8-string (rod)
|
||||
(ext:make-string-from-bytes (ext:make-bytes-from-string rod :utf8)
|
||||
:iso-8859-1))
|
||||
|
||||
(defun utf8-string-to-rod (str)
|
||||
(let ((bytes (map '(vector (unsigned-byte 8)) #'char-code str)))
|
||||
(ext:make-string-from-bytes bytes :utf-8)))
|
||||
|
||||
(defun make-octet-input-stream (octets)
|
||||
(ext:make-byte-input-stream octets))
|
||||
|
||||
|
||||
47
ystream.lisp
47
ystream.lisp
@ -248,3 +248,50 @@
|
||||
|
||||
(defmethod close-ystream ((ystream string-ystream/utf8))
|
||||
(get-output-stream-string (ystream-os-stream ystream))))
|
||||
|
||||
|
||||
;;;; helper functions
|
||||
|
||||
(defun rod-to-utf8-string (rod)
|
||||
(let ((out (make-buffer :element-type 'character)))
|
||||
(runes-to-utf8/adjustable-string out rod (length rod))
|
||||
out))
|
||||
|
||||
(defun utf8-string-to-rod (str)
|
||||
(let* ((bytes (map '(vector (unsigned-byte 8)) #'char-code str))
|
||||
(buffer (make-array (length bytes) :element-type '(unsigned-byte 16)))
|
||||
(n (runes-encoding:decode-sequence
|
||||
:utf-8 bytes 0 (length bytes) buffer 0 0 nil))
|
||||
(result (make-array n :element-type 'rune)))
|
||||
(map-into result #'code-rune buffer)
|
||||
result))
|
||||
|
||||
(defclass octet-input-stream
|
||||
(trivial-gray-stream-mixin fundamental-binary-input-stream)
|
||||
((octets :initarg :octets)
|
||||
(pos :initform 0)))
|
||||
|
||||
(defmethod close ((stream octet-input-stream) &key abort)
|
||||
(declare (ignore abort))
|
||||
(open-stream-p stream))
|
||||
|
||||
(defmethod stream-read-byte ((stream octet-input-stream))
|
||||
(with-slots (octets pos) stream
|
||||
(if (>= pos (length octets))
|
||||
:eof
|
||||
(prog1
|
||||
(elt octets pos)
|
||||
(incf pos)))))
|
||||
|
||||
(defmethod stream-read-sequence
|
||||
((stream octet-input-stream) sequence start end &key &allow-other-keys)
|
||||
(with-slots (octets pos) stream
|
||||
(let* ((length (min (- end start) (- (length octets) pos)))
|
||||
(end1 (+ start length))
|
||||
(end2 (+ pos length)))
|
||||
(replace sequence octets :start1 start :end1 end1 :start2 pos :end2 end2)
|
||||
(setf pos end2)
|
||||
end1)))
|
||||
|
||||
(defun make-octet-input-stream (octets)
|
||||
(make-instance 'octet-input-stream :octets octets))
|
||||
|
||||
Reference in New Issue
Block a user