Files
closure-common/xstream.lisp
dlichteblau ffd1ff2ff9 kleine anpassungen fuer cl-xmpp
neu: do-node-map, map-node-map
2005-12-30 14:21:46 +00:00

406 lines
15 KiB
Common Lisp

;;; -*- Mode: Lisp; Syntax: Common-Lisp; readtable: runes; Encoding: utf-8; -*-
;;; ---------------------------------------------------------------------------
;;; Title: Fast streams
;;; Created: 1999-07-17
;;; Author: Gilbert Baumann <unk6@rz.uni-karlsruhe.de>
;;; License: Lisp-LGPL (See file COPYING for details).
;;; ---------------------------------------------------------------------------
;;; © copyright 1999 by Gilbert Baumann
;;; This library is free software; you can redistribute it and/or
;;; modify it under the terms of the GNU Library General Public
;;; License as published by the Free Software Foundation; either
;;; version 2 of the License, or (at your option) any later version.
;;;
;;; This library is distributed in the hope that it will be useful,
;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
;;; Library General Public License for more details.
;;;
;;; You should have received a copy of the GNU Library General Public
;;; License along with this library; if not, write to the
;;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
;;; Boston, MA 02111-1307 USA.
(in-package :runes)
;;; API
;;
;; MAKE-XSTREAM cl-stream &key name! speed initial-speed initial-encoding
;; [function]
;; MAKE-ROD-XSTREAM rod &key name [function]
;; CLOSE-XSTREAM xstream [function]
;; XSTREAM-P object [function]
;;
;; READ-RUNE xstream [macro]
;; PEEK-RUNE xstream [macro]
;; FREAD-RUNE xstream [function]
;; FPEEK-RUNE xstream [function]
;; CONSUME-RUNE xstream [macro]
;; UNREAD-RUNE rune xstream [function]
;;
;; XSTREAM-NAME xstream [accessor]
;; XSTREAM-POSITION xstream [function]
;; XSTREAM-LINE-NUMBER xstream [function]
;; XSTREAM-COLUMN-NUMBER xstream [function]
;; XSTREAM-PLIST xstream [accessor]
;; XSTREAM-ENCODING xstream [accessor] <-- be careful here. [*]
;; SET-TO-FULL-SPEED xstream [function]
;; [*] switching the encoding on the fly is only possible when the
;; stream's buffer is empty; therefore to be able to switch the
;; encoding, while some runes are already read, set the stream's speed
;; to 1 initially (via the initial-speed argument for MAKE-XSTREAM)
;; and later set it to full speed. (The encoding of the runes
;; sequence, you fetch off with READ-RUNE is always UTF-16 though).
;; After switching the encoding, SET-TO-FULL-SPEED can be used to bump the
;; speed up to a full buffer length.
;; An encoding is simply something, which provides the DECODE-SEQUENCE
;; method.
;;; Controller protocol
;;
;; READ-OCTECTS sequence os-stream start end -> first-non-written
;; XSTREAM/CLOSE os-stream
;;
(eval-when (:compile-toplevel :load-toplevel :execute)
(defparameter *fast* '(optimize (speed 3) (safety 0))))
;; Let us first define fast fixnum arithmetric get rid of type
;; checks. (After all we know what we do here).
(defmacro fx-op (op &rest xs)
`(the fixnum (,op ,@(mapcar (lambda (x) `(the fixnum ,x)) xs))))
(defmacro fx-pred (op &rest xs)
`(,op ,@(mapcar (lambda (x) `(the fixnum ,x)) xs)))
(defmacro %+ (&rest xs) `(fx-op + ,@xs))
(defmacro %= (&rest xs) `(fx-pred = ,@xs))
(deftype buffer-index ()
`(unsigned-byte ,(integer-length array-total-size-limit)))
(deftype buffer-byte ()
`(unsigned-byte 16))
(deftype octet ()
`(unsigned-byte 8))
;; The usage of a special marker for EOF is experimental and
;; considered unhygenic.
(defconstant +end+ #xFFFF
"Special marker inserted into stream buffers to indicate end of buffered data.")
(defvar +null-buffer+ (make-array 0 :element-type 'buffer-byte))
(defvar +null-octet-buffer+ (make-array 0 :element-type 'octet))
(defstruct (xstream
(:constructor make-xstream/low)
(:copier nil)
(:print-function print-xstream))
;;; Read buffer
;; the buffer itself
(buffer +null-buffer+
:type (simple-array buffer-byte (*)))
;; points to the next element of `buffer' containing the next rune
;; about to be read.
(read-ptr 0 :type buffer-index)
;; points to the first element of `buffer' not containing a rune to
;; be read.
(fill-ptr 0 :type buffer-index)
;;; OS buffer
;; a scratch pad for READ-SEQUENCE
(os-buffer +null-octet-buffer+
:type (simple-array octet (*)))
;; `os-left-start', `os-left-end' designate a region of os-buffer,
;; which still contains some undecoded data. This is needed because
;; of the DECODE-SEQUENCE protocol
(os-left-start 0 :type buffer-index)
(os-left-end 0 :type buffer-index)
;; How much to read each time
(speed 0 :type buffer-index)
;; Some stream object obeying to a certain protcol
os-stream
;; The external format
;; (some object offering the ENCODING protocol)
(encoding :utf-8)
;;A STREAM-NAME object
(name nil)
;; a plist a struct keeps the hack away
(plist nil)
;; Stream Position
(line-number 1 :type integer) ;current line number
(line-start 0 :type integer) ;stream position the current line starts at
(buffer-start 0 :type integer) ;stream position the current buffer starts at
;; There is no need to maintain a column counter for each character
;; read, since we can easily compute it from `line-start' and
;; `buffer-start'.
)
(defun print-xstream (self sink depth)
(declare (ignore depth))
(format sink "#<~S ~S>" (type-of self) (xstream-name self)))
(defmacro read-rune (input)
"Read a single rune off the xstream `input'. In case of end of file :EOF
is returned."
`((lambda (input)
(declare (type xstream input)
#.*fast*)
(let ((rp (xstream-read-ptr input)))
(declare (type buffer-index rp))
(let ((ch (aref (the (simple-array buffer-byte (*)) (xstream-buffer input))
rp)))
(declare (type buffer-byte ch))
(setf (xstream-read-ptr input) (%+ rp 1))
(cond ((%= ch +end+)
(the (or (member :eof) rune)
(xstream-underflow input)))
((%= ch #x000A) ;line break
(account-for-line-break input)
(code-rune ch))
(t
(code-rune ch))))))
,input))
(defmacro peek-rune (input)
"Peek a single rune off the xstream `input'. In case of end of file :EOF
is returned."
`((lambda (input)
(declare (type xstream input)
#.*fast*)
(let ((rp (xstream-read-ptr input)))
(declare (type buffer-index rp))
(let ((ch (aref (the (simple-array buffer-byte (*)) (xstream-buffer input))
rp)))
(declare (type buffer-byte ch))
(cond ((%= ch +end+)
(prog1
(the (or (member :eof) rune) (xstream-underflow input))
(setf (xstream-read-ptr input) 0)))
(t
(code-rune ch))))))
,input))
(defmacro consume-rune (input)
"Like READ-RUNE, but does not actually return the read rune."
`((lambda (input)
(declare (type xstream input)
#.*fast*)
(let ((rp (xstream-read-ptr input)))
(declare (type buffer-index rp))
(let ((ch (aref (the (simple-array buffer-byte (*)) (xstream-buffer input))
rp)))
(declare (type buffer-byte ch))
(setf (xstream-read-ptr input) (%+ rp 1))
(when (%= ch +end+)
(xstream-underflow input))
(when (%= ch #x000A) ;line break
(account-for-line-break input) )))
nil)
,input))
(definline unread-rune (rune input)
"Unread the last recently read rune; if there wasn't such a rune, you
deserve to lose."
(declare (ignore rune))
(decf (xstream-read-ptr input))
(when (rune= (peek-rune input) #/u+000A) ;was it a line break?
(unaccount-for-line-break input)))
(defun fread-rune (input)
(read-rune input))
(defun fpeek-rune (input)
(peek-rune input))
;;; Line counting
(defun account-for-line-break (input)
(declare (type xstream input))
(incf (xstream-line-number input))
(setf (xstream-line-start input)
(+ (xstream-buffer-start input) (xstream-read-ptr input))))
(defun unaccount-for-line-break (input)
;; incomplete!
;; We better use a traditional lookahead technique or forbid unread-rune.
(decf (xstream-line-number input)))
;; User API:
(defun xstream-position (input)
(+ (xstream-buffer-start input) (xstream-read-ptr input)))
;; xstream-line-number is structure accessor
(defun xstream-column-number (input)
(+ (- (xstream-position input)
(xstream-line-start input))
1))
;;; Underflow
(defmethod xstream-underflow ((input xstream))
(declare (type xstream input))
;; we are about to fill new data into the buffer, so we need to
;; adjust buffer-start.
(incf (xstream-buffer-start input)
(- (xstream-fill-ptr input) 0))
(let (n m)
;; when there is something left in the os-buffer, we move it to
;; the start of the buffer.
(setf m (- (xstream-os-left-end input) (xstream-os-left-start input)))
(unless (zerop m)
(replace (xstream-os-buffer input) (xstream-os-buffer input)
:start1 0 :end1 m
:start2 (xstream-os-left-start input)
:end2 (xstream-os-left-end input))
;; then we take care that the buffer is large enough to carry at
;; least 100 bytes (a random number)
;;
;; david: was heisst da random? ich nehme an, dass 100 einfach
;; ausreichend sein soll, um die laengste utf-8 bytesequenz oder die
;; beiden utf-16 surrogates zu halten? dann ist 100 ja wohl dicke
;; ausreichend und koennte in make-xstream ordentlich geprueft werden.
;; oder was geht hier vor?
(unless (>= (length (xstream-os-buffer input)) 100)
(error "You lost")))
(setf n
(read-octets (xstream-os-buffer input) (xstream-os-stream input)
m (min (1- (length (xstream-os-buffer input)))
(+ m (xstream-speed input)))))
(cond ((%= n 0)
(setf (xstream-read-ptr input) 0
(xstream-fill-ptr input) n)
(setf (aref (xstream-buffer input) (xstream-fill-ptr input)) +end+)
:eof)
(t
(multiple-value-bind (fnw fnr)
(runes-encoding:decode-sequence
(xstream-encoding input)
(xstream-os-buffer input) 0 n
(xstream-buffer input) 0 (1- (length (xstream-buffer input)))
(= n m))
(setf (xstream-os-left-start input) fnr
(xstream-os-left-end input) n
(xstream-read-ptr input) 0
(xstream-fill-ptr input) fnw)
(setf (aref (xstream-buffer input) (xstream-fill-ptr input)) +end+)
(read-rune input))))))
;;; constructor
(defun make-xstream (os-stream &key name
(speed 8192)
(initial-speed 1)
(initial-encoding :guess))
;; XXX if initial-speed isn't 1, encoding will me munged up
(assert (eql initial-speed 1))
(multiple-value-bind (encoding preread)
(if (eq initial-encoding :guess)
(figure-encoding os-stream)
(values initial-encoding nil))
(let ((osbuf (make-array speed :element-type '(unsigned-byte 8))))
(replace osbuf preread)
(make-xstream/low
:buffer (let ((r (make-array speed :element-type 'buffer-byte)))
(setf (elt r 0) #xFFFF)
r)
:read-ptr 0
:fill-ptr 0
:os-buffer osbuf
:speed initial-speed
:os-stream os-stream
:os-left-start 0
:os-left-end (length preread)
:encoding encoding
:name name))))
(defun make-rod-xstream (string &key name)
;; XXX encoding is mis-handled by this kind of stream
(let ((n (length string)))
(let ((buffer (make-array (1+ n) :element-type 'buffer-byte)))
(declare (type (simple-array buffer-byte (*)) buffer))
;; copy the rod
(do ((i (1- n) (- i 1)))
((< i 0))
(declare (type fixnum i))
(setf (aref buffer i) (rune-code (%rune string i))))
(setf (aref buffer n) +end+)
;;
(make-xstream/low :buffer buffer
:read-ptr 0
:fill-ptr n
;; :os-buffer nil
:speed 1
:os-stream nil
:name name))))
(defmethod figure-encoding ((stream null))
(values :utf-8 nil))
(defmethod figure-encoding ((stream stream))
(let ((c0 (read-byte stream nil :eof)))
(cond ((eq c0 :eof)
(values :utf-8 nil))
(t
(let ((c1 (read-byte stream nil :eof)))
(cond ((eq c1 :eof)
(values :utf-8 (list c0)))
(t
(cond ((and (= c0 #xFE) (= c1 #xFF)) (values :utf-16-big-endian nil))
((and (= c0 #xFF) (= c1 #xFE)) (values :utf-16-little-endian nil))
(t
(values :utf-8 (list c0 c1)))))))))))
;;; misc
(defun close-xstream (input)
(xstream/close (xstream-os-stream input)))
(defun set-to-full-speed (xstream)
(setf (xstream-speed xstream) (length (xstream-os-buffer xstream))))
;;; controller implementations
(defmethod read-octets (sequence (stream stream) start end)
(#+CLISP ext:read-byte-sequence
#-CLISP read-sequence
sequence stream :start start :end end))
#+cmu
(defmethod read-octets :around (sequence (stream stream) start end)
;; CMUCL <= 19a on non-SunOS accidentally triggers EFAULT in read(2)
;; if SEQUENCE has been write protected by GC. Workaround: Touch all pages
;; in SEQUENCE and make sure no GC happens between that and the read(2).
(ext::without-gcing
(loop for i from start below end
do (setf (elt sequence i) (elt sequence i)))
(call-next-method)))
(defmethod read-octets (sequence (stream null) start end)
(declare (ignore sequence start end))
0)
(defmethod xstream/close ((stream stream))
(close stream))
(defmethod xstream/close ((stream null))
nil)