Initial revision
This commit is contained in:
391
xstream.lisp
Normal file
391
xstream.lisp
Normal file
@ -0,0 +1,391 @@
|
||||
;;; -*- Mode: Lisp; Syntax: Common-Lisp; Package: runes; readtable: runes; Encoding: utf-8; -*-
|
||||
;;; ---------------------------------------------------------------------------
|
||||
;;; Title: Fast streams
|
||||
;;; Created: 1999-07-17
|
||||
;;; Author: Gilbert Baumann <unk6@rz.uni-karlsruhe.de>
|
||||
;;; License: LGPL (See file COPYING for details).
|
||||
;;; ---------------------------------------------------------------------------
|
||||
;;; © copyright 1999 by Gilbert Baumann
|
||||
|
||||
;;; This library is free software; you can redistribute it and/or
|
||||
;;; modify it under the terms of the GNU Library General Public
|
||||
;;; License as published by the Free Software Foundation; either
|
||||
;;; version 2 of the License, or (at your option) any later version.
|
||||
;;;
|
||||
;;; This library is distributed in the hope that it will be useful,
|
||||
;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
;;; Library General Public License for more details.
|
||||
;;;
|
||||
;;; You should have received a copy of the GNU Library General Public
|
||||
;;; License along with this library; if not, write to the
|
||||
;;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
||||
;;; Boston, MA 02111-1307 USA.
|
||||
|
||||
(in-package :runes)
|
||||
|
||||
;;; API
|
||||
;;
|
||||
;; MAKE-XSTREAM cl-stream &key name! speed initial-speed initial-encoding
|
||||
;; [function]
|
||||
;; MAKE-ROD-XSTREAM rod &key name [function]
|
||||
;; CLOSE-XSTREAM xstream [function]
|
||||
;; XSTREAM-P object [function]
|
||||
;;
|
||||
;; READ-RUNE xstream [macro]
|
||||
;; PEEK-RUNE xstream [macro]
|
||||
;; FREAD-RUNE xstream [function]
|
||||
;; FPEEK-RUNE xstream [function]
|
||||
;; CONSUME-RUNE xstream [macro]
|
||||
;; UNREAD-RUNE rune xstream [function]
|
||||
;;
|
||||
;; XSTREAM-NAME xstream [accessor]
|
||||
;; XSTREAM-POSITION xstream [function]
|
||||
;; XSTREAM-LINE-NUMBER xstream [function]
|
||||
;; XSTREAM-COLUMN-NUMBER xstream [function]
|
||||
;; XSTREAM-PLIST xstream [accessor]
|
||||
;; XSTREAM-ENCODING xstream [accessor] <-- be careful here. [*]
|
||||
;; SET-TO-FULL-SPEED xstream [function]
|
||||
|
||||
;; [*] switching the encoding on the fly is only possible when the
|
||||
;; stream's buffer is empty; therefore to be able to switch the
|
||||
;; encoding, while some runes are already read, set the stream's speed
|
||||
;; to 1 initially (via the initial-speed argument for MAKE-XSTREAM)
|
||||
;; and later set it to full speed. (The encoding of the runes
|
||||
;; sequence, you fetch off with READ-RUNE is always UTF-16 though).
|
||||
;; After switching the encoding, SET-TO-FULL-SPEED can be used to bump the
|
||||
;; speed up to a full buffer length.
|
||||
|
||||
;; An encoding is simply something, which provides the DECODE-SEQUENCE
|
||||
;; method.
|
||||
|
||||
;;; Controller protocol
|
||||
;;
|
||||
;; READ-OCTECTS sequence os-stream start end -> first-non-written
|
||||
;; XSTREAM/CLOSE os-stream
|
||||
;;
|
||||
|
||||
(eval-when (eval compile load)
|
||||
(defparameter *fast* '(optimize (speed 3) (safety 0)))
|
||||
;;(defparameter *fast* '(optimize (speed 2) (safety 3)))
|
||||
)
|
||||
|
||||
;; Let us first define fast fixnum arithmetric get rid of type
|
||||
;; checks. (After all we know what we do here).
|
||||
|
||||
(defmacro fx-op (op &rest xs)
|
||||
`(the fixnum (,op ,@(mapcar (lambda (x) `(the fixnum ,x)) xs))))
|
||||
(defmacro fx-pred (op &rest xs)
|
||||
`(,op ,@(mapcar (lambda (x) `(the fixnum ,x)) xs)))
|
||||
|
||||
(defmacro %+ (&rest xs) `(fx-op + ,@xs))
|
||||
(defmacro %= (&rest xs) `(fx-pred = ,@xs))
|
||||
|
||||
(deftype buffer-index ()
|
||||
`(unsigned-byte ,(integer-length array-total-size-limit)))
|
||||
|
||||
(deftype buffer-byte ()
|
||||
`(unsigned-byte 16))
|
||||
|
||||
(deftype octet ()
|
||||
`(unsigned-byte 8))
|
||||
|
||||
;; The usage of a special marker for EOF is experimental and
|
||||
;; considered unhygenic.
|
||||
|
||||
(defconstant +end+ #xFFFF
|
||||
"Special marker inserted into stream buffers to indicate end of buffered data.")
|
||||
|
||||
(defvar +null-buffer+ (make-array 0 :element-type 'buffer-byte))
|
||||
(defvar +null-octet-buffer+ (make-array 0 :element-type 'octet))
|
||||
|
||||
(defstruct (xstream
|
||||
(:constructor make-xstream/low)
|
||||
(:copier nil)
|
||||
(:print-function print-xstream))
|
||||
|
||||
;;; Read buffer
|
||||
|
||||
;; the buffer itself
|
||||
(buffer +null-buffer+
|
||||
:type (simple-array buffer-byte (*)))
|
||||
;; points to the next element of `buffer' containing the next rune
|
||||
;; about to be read.
|
||||
(read-ptr 0 :type buffer-index)
|
||||
;; points to the first element of `buffer' not containing a rune to
|
||||
;; be read.
|
||||
(fill-ptr 0 :type buffer-index)
|
||||
|
||||
;;; OS buffer
|
||||
|
||||
;; a scratch pad for READ-SEQUENCE
|
||||
(os-buffer +null-octet-buffer+
|
||||
:type (simple-array octet (*)))
|
||||
|
||||
;; `os-left-start', `os-left-end' designate a region of os-buffer,
|
||||
;; which still contains some undecoded data. This is needed because
|
||||
;; of the DECODE-SEQUENCE protocol
|
||||
(os-left-start 0 :type buffer-index)
|
||||
(os-left-end 0 :type buffer-index)
|
||||
|
||||
;; How much to read each time
|
||||
(speed 0 :type buffer-index)
|
||||
|
||||
;; Some stream object obeying to a certain protcol
|
||||
os-stream
|
||||
|
||||
;; The external format
|
||||
;; (some object offering the ENCODING protocol)
|
||||
(encoding :utf-8)
|
||||
|
||||
;;A STREAM-NAME object
|
||||
(name nil)
|
||||
|
||||
;; a plist a struct keeps the hack away
|
||||
(plist nil)
|
||||
|
||||
;; Stream Position
|
||||
(line-number 1 :type integer) ;current line number
|
||||
(line-start 0 :type integer) ;stream position the current line starts at
|
||||
(buffer-start 0 :type integer) ;stream position the current buffer starts at
|
||||
|
||||
;; There is no need to maintain a column counter for each character
|
||||
;; read, since we can easily compute it from `line-start' and
|
||||
;; `buffer-start'.
|
||||
)
|
||||
|
||||
(defmacro read-rune (input)
|
||||
"Read a single rune off the xstream `input'. In case of end of file :EOF
|
||||
is returned."
|
||||
`((lambda (input)
|
||||
(declare (type xstream input)
|
||||
#.*fast*)
|
||||
(let ((rp (xstream-read-ptr input)))
|
||||
(declare (type buffer-index rp))
|
||||
(let ((ch (aref (the (simple-array buffer-byte (*)) (xstream-buffer input))
|
||||
rp)))
|
||||
(declare (type buffer-byte ch))
|
||||
(setf (xstream-read-ptr input) (%+ rp 1))
|
||||
(cond ((%= ch +end+)
|
||||
(the (or (member :eof) rune)
|
||||
(xstream-underflow input)))
|
||||
((%= ch #x000A) ;line break
|
||||
(account-for-line-break input)
|
||||
(code-rune ch))
|
||||
(t
|
||||
(code-rune ch))))))
|
||||
,input))
|
||||
|
||||
(defmacro peek-rune (input)
|
||||
"Peek a single rune off the xstream `input'. In case of end of file :EOF
|
||||
is returned."
|
||||
`((lambda (input)
|
||||
(declare (type xstream input)
|
||||
#.*fast*)
|
||||
(let ((rp (xstream-read-ptr input)))
|
||||
(declare (type buffer-index rp))
|
||||
(let ((ch (aref (the (simple-array buffer-byte (*)) (xstream-buffer input))
|
||||
rp)))
|
||||
(declare (type buffer-byte ch))
|
||||
(cond ((%= ch +end+)
|
||||
(prog1
|
||||
(the (or (member :eof) rune) (xstream-underflow input))
|
||||
(setf (xstream-read-ptr input) 0)))
|
||||
(t
|
||||
(code-rune ch))))))
|
||||
,input))
|
||||
|
||||
(defmacro consume-rune (input)
|
||||
"Like READ-RUNE, but does not actually return the read rune."
|
||||
`((lambda (input)
|
||||
(declare (type xstream input)
|
||||
#.*fast*)
|
||||
(let ((rp (xstream-read-ptr input)))
|
||||
(declare (type buffer-index rp))
|
||||
(let ((ch (aref (the (simple-array buffer-byte (*)) (xstream-buffer input))
|
||||
rp)))
|
||||
(declare (type buffer-byte ch))
|
||||
(setf (xstream-read-ptr input) (%+ rp 1))
|
||||
(when (%= ch +end+)
|
||||
(xstream-underflow input))
|
||||
(when (%= ch #x000A) ;line break
|
||||
(account-for-line-break input) )))
|
||||
nil)
|
||||
,input))
|
||||
|
||||
(defsubst unread-rune (rune input)
|
||||
"Unread the last recently read rune; if there wasn't such a rune, you
|
||||
deserve to lose."
|
||||
(declare (ignore rune))
|
||||
(decf (xstream-read-ptr input))
|
||||
(when (rune= (peek-rune input) #/u+000A) ;was it a line break?
|
||||
(unaccount-for-line-break input)))
|
||||
|
||||
(defun fread-rune (input)
|
||||
(read-rune input))
|
||||
|
||||
(defun fpeek-rune (input)
|
||||
(peek-rune input))
|
||||
|
||||
;;; Line counting
|
||||
|
||||
(defun account-for-line-break (input)
|
||||
(declare (type xstream input))
|
||||
(incf (xstream-line-number input))
|
||||
(setf (xstream-line-start input)
|
||||
(+ (xstream-buffer-start input) (xstream-read-ptr input))))
|
||||
|
||||
(defun unaccount-for-line-break (input)
|
||||
;; incomplete!
|
||||
;; We better use a traditional lookahead technique or forbid unread-rune.
|
||||
(decf (xstream-line-number input)))
|
||||
|
||||
;; User API:
|
||||
|
||||
(defun xstream-position (input)
|
||||
(+ (xstream-buffer-start input) (xstream-read-ptr input)))
|
||||
|
||||
;; xstream-line-number is structure accessor
|
||||
|
||||
(defun xstream-column-number (input)
|
||||
(+ (- (xstream-position input)
|
||||
(xstream-line-start input))
|
||||
1))
|
||||
|
||||
;;; Underflow
|
||||
|
||||
;;(defun read-runes (sequence input))
|
||||
|
||||
(defun xstream-underflow (input)
|
||||
(declare (type xstream input))
|
||||
;; we are about to fill new data into the buffer, so we need to
|
||||
;; adjust buffer-start.
|
||||
(incf (xstream-buffer-start input)
|
||||
(- (xstream-fill-ptr input) 0))
|
||||
(let (n m)
|
||||
;; when there is something left in the os-buffer, we move it to
|
||||
;; the start of the buffer.
|
||||
(setf m (- (xstream-os-left-end input) (xstream-os-left-start input)))
|
||||
(unless (zerop m)
|
||||
(replace (xstream-os-buffer input) (xstream-os-buffer input)
|
||||
:start1 0 :end1 m
|
||||
:start2 (xstream-os-left-start input)
|
||||
:end2 (xstream-os-left-end input))
|
||||
;; then we take care that the buffer is large enough to carry at
|
||||
;; least 100 bytes (a random number)
|
||||
(unless (>= (length (xstream-os-buffer input)) 100)
|
||||
(error "You lost")
|
||||
;; todo: enlarge buffer
|
||||
))
|
||||
(setf n
|
||||
(read-octets (xstream-os-buffer input) (xstream-os-stream input)
|
||||
m (min (1- (length (xstream-os-buffer input)))
|
||||
(+ m (xstream-speed input)))))
|
||||
(cond ((%= n 0)
|
||||
(setf (xstream-read-ptr input) 0
|
||||
(xstream-fill-ptr input) n)
|
||||
(setf (aref (xstream-buffer input) (xstream-fill-ptr input)) +end+)
|
||||
:eof)
|
||||
(t
|
||||
(multiple-value-bind (fnw fnr)
|
||||
(encoding:decode-sequence
|
||||
(xstream-encoding input)
|
||||
(xstream-os-buffer input) 0 n
|
||||
(xstream-buffer input) 0 (1- (length (xstream-buffer input)))
|
||||
(= n m))
|
||||
(setf (xstream-os-left-start input) fnr
|
||||
(xstream-os-left-end input) n
|
||||
(xstream-read-ptr input) 0
|
||||
(xstream-fill-ptr input) fnw)
|
||||
(setf (aref (xstream-buffer input) (xstream-fill-ptr input)) +end+)
|
||||
(read-rune input))))))
|
||||
|
||||
;;; constructor
|
||||
|
||||
(defun make-xstream (os-stream &key name
|
||||
(speed 8192)
|
||||
(initial-speed 1)
|
||||
(initial-encoding :guess))
|
||||
;; XXX if initial-speed isn't 1, encoding will me munged up
|
||||
(assert (eql initial-speed 1))
|
||||
(multiple-value-bind (encoding preread)
|
||||
(if (eq initial-encoding :guess)
|
||||
(figure-encoding os-stream)
|
||||
(values initial-encoding nil))
|
||||
(let ((osbuf (make-array speed :element-type '(unsigned-byte 8))))
|
||||
(replace osbuf preread)
|
||||
(make-xstream/low
|
||||
:buffer (let ((r (make-array speed :element-type 'buffer-byte)))
|
||||
(setf (elt r 0) #xFFFF)
|
||||
r)
|
||||
:read-ptr 0
|
||||
:fill-ptr 0
|
||||
:os-buffer osbuf
|
||||
:speed initial-speed
|
||||
:os-stream os-stream
|
||||
:os-left-start 0
|
||||
:os-left-end (length preread)
|
||||
:encoding encoding
|
||||
:name name))))
|
||||
|
||||
(defun make-rod-xstream (string &key name)
|
||||
;; XXX encoding is mis-handled by this kind of stream
|
||||
(let ((n (length string)))
|
||||
(let ((buffer (make-array (1+ n) :element-type 'buffer-byte)))
|
||||
(declare (type (simple-array buffer-byte (*)) buffer))
|
||||
;; copy the rod
|
||||
(do ((i (1- n) (- i 1)))
|
||||
((< i 0))
|
||||
(declare (type fixnum i))
|
||||
(setf (aref buffer i) (rune-code (%rune string i))))
|
||||
(setf (aref buffer n) +end+)
|
||||
;;
|
||||
(make-xstream/low :buffer buffer
|
||||
:read-ptr 0
|
||||
:fill-ptr n
|
||||
;; :os-buffer nil
|
||||
:speed 1
|
||||
:os-stream nil
|
||||
:name name))))
|
||||
|
||||
(defmethod figure-encoding ((stream null))
|
||||
(values :utf-8 nil))
|
||||
|
||||
(defmethod figure-encoding ((stream stream))
|
||||
(let ((c0 (read-byte stream nil :eof)))
|
||||
(cond ((eq c0 :eof)
|
||||
(values :utf-8 nil))
|
||||
(t
|
||||
(let ((c1 (read-byte stream nil :eof)))
|
||||
(cond ((eq c1 :eof)
|
||||
(values :utf-8 (list c0)))
|
||||
(t
|
||||
(cond ((and (= c0 #xFE) (= c1 #xFF)) (values :utf-16-big-endian nil))
|
||||
((and (= c0 #xFF) (= c1 #xFE)) (values :utf-16-little-endian nil))
|
||||
(t
|
||||
(values :utf-8 (list c0 c1)))))))))))
|
||||
|
||||
;;; misc
|
||||
|
||||
(defun close-xstream (input)
|
||||
(xstream/close (xstream-os-stream input)))
|
||||
|
||||
(defun set-to-full-speed (xstream)
|
||||
(setf (xstream-speed xstream) (length (xstream-os-buffer xstream))))
|
||||
|
||||
;;; controller implementations
|
||||
|
||||
(defmethod read-octets (sequence (stream stream) start end)
|
||||
(#+CLISP lisp:read-byte-sequence
|
||||
#-CLISP read-sequence
|
||||
sequence stream :start start :end end))
|
||||
|
||||
(defmethod read-octets (sequence (stream null) start end)
|
||||
(declare (ignore sequence start end))
|
||||
0)
|
||||
|
||||
(defmethod xstream/close ((stream stream))
|
||||
(close stream))
|
||||
|
||||
(defmethod xstream/close ((stream null))
|
||||
nil)
|
||||
Reference in New Issue
Block a user