diff --git a/cxml.asd b/cxml.asd index 2989ecf..2592acf 100644 --- a/cxml.asd +++ b/cxml.asd @@ -2,8 +2,13 @@ (:use :asdf :cl)) (in-package :cxml-system) -;; force loading of runes.asd, which installs *features* this file depends on -(find-system :runes) +;; force loading of closure-common.asd, which installs *FEATURES* this +;; file depends on. Use MISSING-DEPENDENCY for asdf-install. +(unless (find-system :closure-common nil) + (error 'missing-dependency + :required-by :cxml + :version nil + :requires :closure-common)) (defclass closure-source-file (cl-source-file) ()) @@ -36,7 +41,7 @@ (:file "space-normalizer" :depends-on ("xml-parse")) (:file "catalog" :depends-on ("xml-parse")) (:file "sax-proxy" :depends-on ("xml-parse"))) - :depends-on (:runes :puri #-scl :trivial-gray-streams)) + :depends-on (:closure-common :puri #-scl :trivial-gray-streams)) (defclass utf8dom-file (closure-source-file) ((of))) @@ -57,7 +62,8 @@ (defmethod perform ((operation compile-op) (c utf8dom-file)) (let ((*features* (cons 'utf8dom-file *features*)) (*readtable* - (symbol-value (find-symbol "*UTF8-RUNES-READTABLE*" :runes-system)))) + (symbol-value (find-symbol "*UTF8-RUNES-READTABLE*" + :closure-common-system)))) (call-next-method))) (asdf:defsystem :cxml-dom diff --git a/runes/characters.lisp b/runes/characters.lisp deleted file mode 100644 index 5fa1aa5..0000000 --- a/runes/characters.lisp +++ /dev/null @@ -1,148 +0,0 @@ -;;; copyright (c) 2004 knowledgeTools Int. GmbH -;;; Author of this version: David Lichteblau -;;; -;;; derived from runes.lisp, (c) copyright 1998,1999 by Gilbert Baumann -;;; -;;; License: Lisp-LGPL (See file COPYING for details). -;;; -;;; This code is free software; you can redistribute it and/or modify it -;;; under the terms of the version 2.1 of the GNU Lesser General Public -;;; License as published by the Free Software Foundation, as clarified -;;; by the "Preamble to the Gnu Lesser General Public License" found in -;;; the file COPYING. -;;; -;;; This code is distributed in the hope that it will be useful, -;;; but without any warranty; without even the implied warranty of -;;; merchantability or fitness for a particular purpose. See the GNU -;;; Lesser General Public License for more details. -;;; -;;; Version 2.1 of the GNU Lesser General Public License is in the file -;;; COPYING that was distributed with this file. If it is not present, -;;; you can access it from http://www.gnu.org/copyleft/lesser.txt (until -;;; superseded by a newer version) or write to the Free Software -;;; Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - -(in-package :runes) - -(deftype rune () #-lispworks 'character #+lispworks 'lw:simple-char) -(deftype rod () '(vector rune)) -(deftype simple-rod () '(simple-array rune)) - -(definline rune (rod index) - (char rod index)) - -(defun (setf rune) (new rod index) - (setf (char rod index) new)) - -(definline %rune (rod index) - (aref (the simple-string rod) (the fixnum index))) - -(definline (setf %rune) (new rod index) - (setf (aref (the simple-string rod) (the fixnum index)) new)) - -(defun rod-capitalize (rod) - (string-upcase rod)) - -(definline code-rune (x) (code-char x)) -(definline rune-code (x) (char-code x)) - -(definline rune= (x y) - (char= x y)) - -(defun rune-downcase (rune) - (char-downcase rune)) - -(definline rune-upcase (rune) - (char-upcase rune)) - -(defun rune-upper-case-letter-p (rune) - (upper-case-p rune)) - -(defun rune-lower-case-letter-p (rune) - (lower-case-p rune)) - -(defun rune-equal (x y) - (char-equal x y)) - -(defun rod-downcase (rod) - (string-downcase rod)) - -(defun rod-upcase (rod) - (string-upcase rod)) - -(definline white-space-rune-p (char) - (or (char= char #\tab) - (char= char #.(code-char 10)) ;Linefeed - (char= char #.(code-char 13)) ;Carriage Return - (char= char #\space))) - -(definline digit-rune-p (char &optional (radix 10)) - (digit-char-p char radix)) - -(defun rod (x) - (cond - ((stringp x) x) - ((symbolp x) (string x)) - ((characterp x) (string x)) - ((vectorp x) (coerce x 'string)) - ((integerp x) (string (code-char x))) - (t (error "Cannot convert ~S to a ~S" x 'rod)))) - -(defun runep (x) - (characterp x)) - -(defun sloopy-rod-p (x) - (stringp x)) - -(defun rod= (x y) - (if (zerop (length x)) - (zerop (length y)) - (and (plusp (length y)) (string= x y)))) - -(defun rod-equal (x y) - (string-equal x y)) - -(definline make-rod (size) - (make-string size :element-type 'rune)) - -(defun char-rune (char) - char) - -(defun rune-char (rune &optional default) - (declare (ignore default)) - rune) - -(defun rod-string (rod &optional (default-char #\?)) - (declare (ignore default-char)) - rod) - -(defun string-rod (string) - string) - -;;;; - -(defun rune<= (rune &rest more-runes) - (loop - for (a b) on (cons rune more-runes) - while b - always (char<= a b))) - -(defun rune>= (rune &rest more-runes) - (loop - for (a b) on (cons rune more-runes) - while b - always (char>= a b))) - -(defun rodp (object) - (stringp object)) - -(defun rod-subseq (source start &optional (end (length source))) - (unless (stringp source) - (error "~S is not of type ~S." source 'rod)) - (subseq source start end)) - -(defun rod-subseq* (source start &optional (end (length source))) - (rod-subseq source start end)) - -(defun rod< (rod1 rod2) - (string< rod1 rod2)) diff --git a/runes/definline.lisp b/runes/definline.lisp deleted file mode 100644 index 696cc15..0000000 --- a/runes/definline.lisp +++ /dev/null @@ -1,63 +0,0 @@ -;;; -*- Mode: Lisp; Syntax: Common-Lisp; Package: CL-USER; -*- -;;; --------------------------------------------------------------------------- -;;; Title: definline -;;; Created: 1999-05-25 22:32 -;;; Author: Gilbert Baumann -;;; License: Lisp-LGPL (See file COPYING for details). -;;; --------------------------------------------------------------------------- -;;; (c) copyright 1999 by Gilbert Baumann - -;;; This code is free software; you can redistribute it and/or modify it -;;; under the terms of the version 2.1 of the GNU Lesser General Public -;;; License as published by the Free Software Foundation, as clarified -;;; by the "Preamble to the Gnu Lesser General Public License" found in -;;; the file COPYING. -;;; -;;; This code is distributed in the hope that it will be useful, -;;; but without any warranty; without even the implied warranty of -;;; merchantability or fitness for a particular purpose. See the GNU -;;; Lesser General Public License for more details. -;;; -;;; Version 2.1 of the GNU Lesser General Public License is in the file -;;; COPYING that was distributed with this file. If it is not present, -;;; you can access it from http://www.gnu.org/copyleft/lesser.txt (until -;;; superseded by a newer version) or write to the Free Software -;;; Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - -(in-package :runes) - -#-(or allegro openmcl) -(defmacro definline (name args &body body) - `(progn - (declaim (inline ,name)) - (defun ,name ,args .,body))) - -#+openmcl -(defmacro runes::definline (fun args &body body) - (if (consp fun) - `(defun ,fun ,args - ,@body) - `(progn - (defun ,fun ,args .,body) - (define-compiler-macro ,fun (&rest .args.) - (cons '(lambda ,args .,body) - .args.))))) - -#+allegro -(defmacro definline (fun args &body body) - (if (and (consp fun) (eq (car fun) 'setf)) - (let ((fnam (intern (concatenate 'string "(SETF " (symbol-name (cadr fun)) ")") - (symbol-package (cadr fun))))) - `(progn - (defsetf ,(cadr fun) (&rest ap) (new-value) (list* ',fnam new-value ap)) - (definline ,fnam ,args .,body))) - (labels ((declp (x) - (and (consp x) (eq (car x) 'declare)))) - `(progn - (defun ,fun ,args .,body) - (define-compiler-macro ,fun (&rest .args.) - (cons '(lambda ,args - ,@(remove-if-not #'declp body) - (block ,fun - ,@(remove-if #'declp body))) - .args.)))))) diff --git a/runes/encodings-data.lisp b/runes/encodings-data.lisp deleted file mode 100644 index c10131a..0000000 --- a/runes/encodings-data.lisp +++ /dev/null @@ -1,568 +0,0 @@ -(in-package :runes-encoding) - -(progn - (add-name :us-ascii "ANSI_X3.4-1968") - (add-name :us-ascii "iso-ir-6") - (add-name :us-ascii "ANSI_X3.4-1986") - (add-name :us-ascii "ISO_646.irv:1991") - (add-name :us-ascii "ASCII") - (add-name :us-ascii "ISO646-US") - (add-name :us-ascii "US-ASCII") - (add-name :us-ascii "us") - (add-name :us-ascii "IBM367") - (add-name :us-ascii "cp367") - (add-name :us-ascii "csASCII") - - (add-name :iso-8859-1 "ISO_8859-1:1987") - (add-name :iso-8859-1 "iso-ir-100") - (add-name :iso-8859-1 "ISO_8859-1") - (add-name :iso-8859-1 "ISO-8859-1") - (add-name :iso-8859-1 "latin1") - (add-name :iso-8859-1 "l1") - (add-name :iso-8859-1 "IBM819") - (add-name :iso-8859-1 "CP819") - (add-name :iso-8859-1 "csISOLatin1") - - (add-name :iso-8859-2 "ISO_8859-2:1987") - (add-name :iso-8859-2 "iso-ir-101") - (add-name :iso-8859-2 "ISO_8859-2") - (add-name :iso-8859-2 "ISO-8859-2") - (add-name :iso-8859-2 "latin2") - (add-name :iso-8859-2 "l2") - (add-name :iso-8859-2 "csISOLatin2") - - (add-name :iso-8859-3 "ISO_8859-3:1988") - (add-name :iso-8859-3 "iso-ir-109") - (add-name :iso-8859-3 "ISO_8859-3") - (add-name :iso-8859-3 "ISO-8859-3") - (add-name :iso-8859-3 "latin3") - (add-name :iso-8859-3 "l3") - (add-name :iso-8859-3 "csISOLatin3") - - (add-name :iso-8859-4 "ISO_8859-4:1988") - (add-name :iso-8859-4 "iso-ir-110") - (add-name :iso-8859-4 "ISO_8859-4") - (add-name :iso-8859-4 "ISO-8859-4") - (add-name :iso-8859-4 "latin4") - (add-name :iso-8859-4 "l4") - (add-name :iso-8859-4 "csISOLatin4") - - (add-name :iso-8859-6 "ISO_8859-6:1987") - (add-name :iso-8859-6 "iso-ir-127") - (add-name :iso-8859-6 "ISO_8859-6") - (add-name :iso-8859-6 "ISO-8859-6") - (add-name :iso-8859-6 "ECMA-114") - (add-name :iso-8859-6 "ASMO-708") - (add-name :iso-8859-6 "arabic") - (add-name :iso-8859-6 "csISOLatinArabic") - - (add-name :iso-8859-7 "ISO_8859-7:1987") - (add-name :iso-8859-7 "iso-ir-126") - (add-name :iso-8859-7 "ISO_8859-7") - (add-name :iso-8859-7 "ISO-8859-7") - (add-name :iso-8859-7 "ELOT_928") - (add-name :iso-8859-7 "ECMA-118") - (add-name :iso-8859-7 "greek") - (add-name :iso-8859-7 "greek8") - (add-name :iso-8859-7 "csISOLatinGreek") - - (add-name :iso-8859-8 "ISO_8859-8:1988") - (add-name :iso-8859-8 "iso-ir-138") - (add-name :iso-8859-8 "ISO_8859-8") - (add-name :iso-8859-8 "ISO-8859-8") - (add-name :iso-8859-8 "hebrew") - (add-name :iso-8859-8 "csISOLatinHebrew") - - (add-name :iso-8859-5 "ISO_8859-5:1988") - (add-name :iso-8859-5 "iso-ir-144") - (add-name :iso-8859-5 "ISO_8859-5") - (add-name :iso-8859-5 "ISO-8859-5") - (add-name :iso-8859-5 "cyrillic") - (add-name :iso-8859-5 "csISOLatinCyrillic") - - (add-name :iso-8859-9 "ISO_8859-9:1989") - (add-name :iso-8859-9 "iso-ir-148") - (add-name :iso-8859-9 "ISO_8859-9") - (add-name :iso-8859-9 "ISO-8859-9") - (add-name :iso-8859-9 "latin5") - (add-name :iso-8859-9 "l5") - (add-name :iso-8859-9 "csISOLatin5") - - (add-name :iso-8859-15 "ISO_8859-15") - (add-name :iso-8859-15 "ISO-8859-15") - - (add-name :iso-8859-14 "ISO_8859-14") - (add-name :iso-8859-14 "ISO-8859-14") - - (add-name :koi8-r "KOI8-R") - (add-name :koi8-r "csKOI8R") - - (add-name :utf-8 "UTF-8") - - (add-name :utf-16 "UTF-16") - - (add-name :ucs-4 "ISO-10646-UCS-4") - (add-name :ucs-4 "UCS-4") - - (add-name :ucs-2 "ISO-10646-UCS-2") - (add-name :ucs-2 "UCS-2") ) - - -(progn - (define-encoding :iso-8859-1 - (make-simple-8-bit-encoding - :charset (find-charset :iso-8859-1))) - - (define-encoding :iso-8859-2 - (make-simple-8-bit-encoding - :charset (find-charset :iso-8859-2))) - - (define-encoding :iso-8859-3 - (make-simple-8-bit-encoding - :charset (find-charset :iso-8859-3))) - - (define-encoding :iso-8859-4 - (make-simple-8-bit-encoding - :charset (find-charset :iso-8859-4))) - - (define-encoding :iso-8859-5 - (make-simple-8-bit-encoding - :charset (find-charset :iso-8859-5))) - - (define-encoding :iso-8859-6 - (make-simple-8-bit-encoding - :charset (find-charset :iso-8859-6))) - - (define-encoding :iso-8859-7 - (make-simple-8-bit-encoding - :charset (find-charset :iso-8859-7))) - - (define-encoding :iso-8859-8 - (make-simple-8-bit-encoding - :charset (find-charset :iso-8859-8))) - - (define-encoding :iso-8859-14 - (make-simple-8-bit-encoding - :charset (find-charset :iso-8859-14))) - - (define-encoding :iso-8859-15 - (make-simple-8-bit-encoding - :charset (find-charset :iso-8859-15))) - - (define-encoding :koi8-r - (make-simple-8-bit-encoding - :charset (find-charset :koi8-r))) - - (define-encoding :utf-8 :utf-8) - ) - -(progn - (define-8-bit-charset :iso-8859-1 - #| #o00x |# #x0000 #x0001 #x0002 #x0003 #x0004 #x0005 #x0006 #x0007 - #| #o01x |# #x0008 #x0009 #x000A #x000B #x000C #x000A #x000E #x000F - #| #o02x |# #x0010 #x0011 #x0012 #x0013 #x0014 #x0015 #x0016 #x0017 - #| #o03x |# #x0018 #x0019 #x001A #x001B #x001C #x001D #x001E #x001F - #| #o04x |# #x0020 #x0021 #x0022 #x0023 #x0024 #x0025 #x0026 #x0027 - #| #o05x |# #x0028 #x0029 #x002A #x002B #x002C #x002D #x002E #x002F - #| #o06x |# #x0030 #x0031 #x0032 #x0033 #x0034 #x0035 #x0036 #x0037 - #| #o07x |# #x0038 #x0039 #x003A #x003B #x003C #x003D #x003E #x003F - #| #o10x |# #x0040 #x0041 #x0042 #x0043 #x0044 #x0045 #x0046 #x0047 - #| #o11x |# #x0048 #x0049 #x004A #x004B #x004C #x004D #x004E #x004F - #| #o12x |# #x0050 #x0051 #x0052 #x0053 #x0054 #x0055 #x0056 #x0057 - #| #o13x |# #x0058 #x0059 #x005A #x005B #x005C #x005D #x005E #x005F - #| #o14x |# #x0060 #x0061 #x0062 #x0063 #x0064 #x0065 #x0066 #x0067 - #| #o15x |# #x0068 #x0069 #x006A #x006B #x006C #x006D #x006E #x006F - #| #o16x |# #x0070 #x0071 #x0072 #x0073 #x0074 #x0075 #x0076 #x0077 - #| #o17x |# #x0078 #x0079 #x007A #x007B #x007C #x007D #x007E #x007F - #| #o20x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF - #| #o21x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF - #| #o22x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF - #| #o23x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF - #| #o24x |# #x00A0 #x00A1 #x00A2 #x00A3 #x00A4 #x00A5 #x00A6 #x00A7 - #| #o25x |# #x00A8 #x00A9 #x00AA #x00AB #x00AC #x00AD #x00AE #x00AF - #| #o26x |# #x00B0 #x00B1 #x00B2 #x00B3 #x00B4 #x00B5 #x00B6 #x00B7 - #| #o27x |# #x00B8 #x00B9 #x00BA #x00BB #x00BC #x00BD #x00BE #x00BF - #| #o30x |# #x00C0 #x00C1 #x00C2 #x00C3 #x00C4 #x00C5 #x00C6 #x00C7 - #| #o31x |# #x00C8 #x00C9 #x00CA #x00CB #x00CC #x00CD #x00CE #x00CF - #| #o32x |# #x00D0 #x00D1 #x00D2 #x00D3 #x00D4 #x00D5 #x00D6 #x00D7 - #| #o33x |# #x00D8 #x00D9 #x00DA #x00DB #x00DC #x00DD #x00DE #x00DF - #| #o34x |# #x00E0 #x00E1 #x00E2 #x00E3 #x00E4 #x00E5 #x00E6 #x00E7 - #| #o35x |# #x00E8 #x00E9 #x00EA #x00EB #x00EC #x00ED #x00EE #x00EF - #| #o36x |# #x00F0 #x00F1 #x00F2 #x00F3 #x00F4 #x00F5 #x00F6 #x00F7 - #| #o37x |# #x00F8 #x00F9 #x00FA #x00FB #x00FC #x00FD #x00FE #x00FF) - - (define-8-bit-charset :iso-8859-2 - #| #o00x |# #x0000 #x0001 #x0002 #x0003 #x0004 #x0005 #x0006 #x0007 - #| #o01x |# #x0008 #x0009 #x000A #x000B #x000C #x000A #x000E #x000F - #| #o02x |# #x0010 #x0011 #x0012 #x0013 #x0014 #x0015 #x0016 #x0017 - #| #o03x |# #x0018 #x0019 #x001A #x001B #x001C #x001D #x001E #x001F - #| #o04x |# #x0020 #x0021 #x0022 #x0023 #x0024 #x0025 #x0026 #x0027 - #| #o05x |# #x0028 #x0029 #x002A #x002B #x002C #x002D #x002E #x002F - #| #o06x |# #x0030 #x0031 #x0032 #x0033 #x0034 #x0035 #x0036 #x0037 - #| #o07x |# #x0038 #x0039 #x003A #x003B #x003C #x003D #x003E #x003F - #| #o10x |# #x0040 #x0041 #x0042 #x0043 #x0044 #x0045 #x0046 #x0047 - #| #o11x |# #x0048 #x0049 #x004A #x004B #x004C #x004D #x004E #x004F - #| #o12x |# #x0050 #x0051 #x0052 #x0053 #x0054 #x0055 #x0056 #x0057 - #| #o13x |# #x0058 #x0059 #x005A #x005B #x005C #x005D #x005E #x005F - #| #o14x |# #x0060 #x0061 #x0062 #x0063 #x0064 #x0065 #x0066 #x0067 - #| #o15x |# #x0068 #x0069 #x006A #x006B #x006C #x006D #x006E #x006F - #| #o16x |# #x0070 #x0071 #x0072 #x0073 #x0074 #x0075 #x0076 #x0077 - #| #o17x |# #x0078 #x0079 #x007A #x007B #x007C #x007D #x007E #x007F - #| #o20x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF - #| #o21x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF - #| #o22x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF - #| #o23x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF - #| #o24x |# #x00A0 #x0104 #x02D8 #x0141 #x00A4 #x013D #x015A #x00A7 - #| #o25x |# #x00A8 #x0160 #x015E #x0164 #x0179 #x00AD #x017D #x017B - #| #o26x |# #x00B0 #x0105 #x02DB #x0142 #x00B4 #x013E #x015B #x02C7 - #| #o27x |# #x00B8 #x0161 #x015F #x0165 #x017A #x02DD #x017E #x017C - #| #o30x |# #x0154 #x00C1 #x00C2 #x0102 #x00C4 #x0139 #x0106 #x00C7 - #| #o31x |# #x010C #x00C9 #x0118 #x00CB #x011A #x00CD #x00CE #x010E - #| #o32x |# #x0110 #x0143 #x0147 #x00D3 #x00D4 #x0150 #x00D6 #x00D7 - #| #o33x |# #x0158 #x016E #x00DA #x0170 #x00DC #x00DD #x0162 #x00DF - #| #o34x |# #x0155 #x00E1 #x00E2 #x0103 #x00E4 #x013A #x0107 #x00E7 - #| #o35x |# #x010D #x00E9 #x0119 #x00EB #x011B #x00ED #x00EE #x010F - #| #o36x |# #x0111 #x0144 #x0148 #x00F3 #x00F4 #x0151 #x00F6 #x00F7 - #| #o37x |# #x0159 #x016F #x00FA #x0171 #x00FC #x00FD #x0163 #x02D9) - - (define-8-bit-charset :iso-8859-3 - #| #o00x |# #x0000 #x0001 #x0002 #x0003 #x0004 #x0005 #x0006 #x0007 - #| #o01x |# #x0008 #x0009 #x000A #x000B #x000C #x000A #x000E #x000F - #| #o02x |# #x0010 #x0011 #x0012 #x0013 #x0014 #x0015 #x0016 #x0017 - #| #o03x |# #x0018 #x0019 #x001A #x001B #x001C #x001D #x001E #x001F - #| #o04x |# #x0020 #x0021 #x0022 #x0023 #x0024 #x0025 #x0026 #x0027 - #| #o05x |# #x0028 #x0029 #x002A #x002B #x002C #x002D #x002E #x002F - #| #o06x |# #x0030 #x0031 #x0032 #x0033 #x0034 #x0035 #x0036 #x0037 - #| #o07x |# #x0038 #x0039 #x003A #x003B #x003C #x003D #x003E #x003F - #| #o10x |# #x0040 #x0041 #x0042 #x0043 #x0044 #x0045 #x0046 #x0047 - #| #o11x |# #x0048 #x0049 #x004A #x004B #x004C #x004D #x004E #x004F - #| #o12x |# #x0050 #x0051 #x0052 #x0053 #x0054 #x0055 #x0056 #x0057 - #| #o13x |# #x0058 #x0059 #x005A #x005B #x005C #x005D #x005E #x005F - #| #o14x |# #x0060 #x0061 #x0062 #x0063 #x0064 #x0065 #x0066 #x0067 - #| #o15x |# #x0068 #x0069 #x006A #x006B #x006C #x006D #x006E #x006F - #| #o16x |# #x0070 #x0071 #x0072 #x0073 #x0074 #x0075 #x0076 #x0077 - #| #o17x |# #x0078 #x0079 #x007A #x007B #x007C #x007D #x007E #x007F - #| #o20x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF - #| #o21x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF - #| #o22x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF - #| #o23x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF - #| #o24x |# #x00A0 #x0126 #x02D8 #x00A3 #x00A4 #xFFFF #x0124 #x00A7 - #| #o25x |# #x00A8 #x0130 #x015E #x011E #x0134 #x00AD #xFFFF #x017B - #| #o26x |# #x00B0 #x0127 #x00B2 #x00B3 #x00B4 #x00B5 #x0125 #x00B7 - #| #o27x |# #x00B8 #x0131 #x015F #x011F #x0135 #x00BD #xFFFF #x017C - #| #o30x |# #x00C0 #x00C1 #x00C2 #xFFFF #x00C4 #x010A #x0108 #x00C7 - #| #o31x |# #x00C8 #x00C9 #x00CA #x00CB #x00CC #x00CD #x00CE #x00CF - #| #o32x |# #xFFFF #x00D1 #x00D2 #x00D3 #x00D4 #x0120 #x00D6 #x00D7 - #| #o33x |# #x011C #x00D9 #x00DA #x00DB #x00DC #x016C #x015C #x00DF - #| #o34x |# #x00E0 #x00E1 #x00E2 #xFFFF #x00E4 #x010B #x0109 #x00E7 - #| #o35x |# #x00E8 #x00E9 #x00EA #x00EB #x00EC #x00ED #x00EE #x00EF - #| #o36x |# #xFFFF #x00F1 #x00F2 #x00F3 #x00F4 #x0121 #x00F6 #x00F7 - #| #o37x |# #x011D #x00F9 #x00FA #x00FB #x00FC #x016D #x015D #x02D9) - - (define-8-bit-charset :iso-8859-4 - #| #o00x |# #x0000 #x0001 #x0002 #x0003 #x0004 #x0005 #x0006 #x0007 - #| #o01x |# #x0008 #x0009 #x000A #x000B #x000C #x000A #x000E #x000F - #| #o02x |# #x0010 #x0011 #x0012 #x0013 #x0014 #x0015 #x0016 #x0017 - #| #o03x |# #x0018 #x0019 #x001A #x001B #x001C #x001D #x001E #x001F - #| #o04x |# #x0020 #x0021 #x0022 #x0023 #x0024 #x0025 #x0026 #x0027 - #| #o05x |# #x0028 #x0029 #x002A #x002B #x002C #x002D #x002E #x002F - #| #o06x |# #x0030 #x0031 #x0032 #x0033 #x0034 #x0035 #x0036 #x0037 - #| #o07x |# #x0038 #x0039 #x003A #x003B #x003C #x003D #x003E #x003F - #| #o10x |# #x0040 #x0041 #x0042 #x0043 #x0044 #x0045 #x0046 #x0047 - #| #o11x |# #x0048 #x0049 #x004A #x004B #x004C #x004D #x004E #x004F - #| #o12x |# #x0050 #x0051 #x0052 #x0053 #x0054 #x0055 #x0056 #x0057 - #| #o13x |# #x0058 #x0059 #x005A #x005B #x005C #x005D #x005E #x005F - #| #o14x |# #x0060 #x0061 #x0062 #x0063 #x0064 #x0065 #x0066 #x0067 - #| #o15x |# #x0068 #x0069 #x006A #x006B #x006C #x006D #x006E #x006F - #| #o16x |# #x0070 #x0071 #x0072 #x0073 #x0074 #x0075 #x0076 #x0077 - #| #o17x |# #x0078 #x0079 #x007A #x007B #x007C #x007D #x007E #x007F - #| #o20x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF - #| #o21x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF - #| #o22x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF - #| #o23x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF - #| #o24x |# #x00A0 #x0104 #x0138 #x0156 #x00A4 #x0128 #x013B #x00A7 - #| #o25x |# #x00A8 #x0160 #x0112 #x0122 #x0166 #x00AD #x017D #x00AF - #| #o26x |# #x00B0 #x0105 #x02DB #x0157 #x00B4 #x0129 #x013C #x02C7 - #| #o27x |# #x00B8 #x0161 #x0113 #x0123 #x0167 #x014A #x017E #x014B - #| #o30x |# #x0100 #x00C1 #x00C2 #x00C3 #x00C4 #x00C5 #x00C6 #x012E - #| #o31x |# #x010C #x00C9 #x0118 #x00CB #x0116 #x00CD #x00CE #x012A - #| #o32x |# #x0110 #x0145 #x014C #x0136 #x00D4 #x00D5 #x00D6 #x00D7 - #| #o33x |# #x00D8 #x0172 #x00DA #x00DB #x00DC #x0168 #x016A #x00DF - #| #o34x |# #x0101 #x00E1 #x00E2 #x00E3 #x00E4 #x00E5 #x00E6 #x012F - #| #o35x |# #x010D #x00E9 #x0119 #x00EB #x0117 #x00ED #x00EE #x012B - #| #o36x |# #x0111 #x0146 #x014D #x0137 #x00F4 #x00F5 #x00F6 #x00F7 - #| #o37x |# #x00F8 #x0173 #x00FA #x00FB #x00FC #x0169 #x016B #x02D9) - - (define-8-bit-charset :iso-8859-5 - #| #o00x |# #x0000 #x0001 #x0002 #x0003 #x0004 #x0005 #x0006 #x0007 - #| #o01x |# #x0008 #x0009 #x000A #x000B #x000C #x000A #x000E #x000F - #| #o02x |# #x0010 #x0011 #x0012 #x0013 #x0014 #x0015 #x0016 #x0017 - #| #o03x |# #x0018 #x0019 #x001A #x001B #x001C #x001D #x001E #x001F - #| #o04x |# #x0020 #x0021 #x0022 #x0023 #x0024 #x0025 #x0026 #x0027 - #| #o05x |# #x0028 #x0029 #x002A #x002B #x002C #x002D #x002E #x002F - #| #o06x |# #x0030 #x0031 #x0032 #x0033 #x0034 #x0035 #x0036 #x0037 - #| #o07x |# #x0038 #x0039 #x003A #x003B #x003C #x003D #x003E #x003F - #| #o10x |# #x0040 #x0041 #x0042 #x0043 #x0044 #x0045 #x0046 #x0047 - #| #o11x |# #x0048 #x0049 #x004A #x004B #x004C #x004D #x004E #x004F - #| #o12x |# #x0050 #x0051 #x0052 #x0053 #x0054 #x0055 #x0056 #x0057 - #| #o13x |# #x0058 #x0059 #x005A #x005B #x005C #x005D #x005E #x005F - #| #o14x |# #x0060 #x0061 #x0062 #x0063 #x0064 #x0065 #x0066 #x0067 - #| #o15x |# #x0068 #x0069 #x006A #x006B #x006C #x006D #x006E #x006F - #| #o16x |# #x0070 #x0071 #x0072 #x0073 #x0074 #x0075 #x0076 #x0077 - #| #o17x |# #x0078 #x0079 #x007A #x007B #x007C #x007D #x007E #x007F - #| #o20x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF - #| #o21x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF - #| #o22x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF - #| #o23x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF - #| #o24x |# #x00A0 #x0401 #x0402 #x0403 #x0404 #x0405 #x0406 #x0407 - #| #o25x |# #x0408 #x0409 #x040A #x040B #x040C #x00AD #x040E #x040F - #| #o26x |# #x0410 #x0411 #x0412 #x0413 #x0414 #x0415 #x0416 #x0417 - #| #o27x |# #x0418 #x0419 #x041A #x041B #x041C #x041D #x041E #x041F - #| #o30x |# #x0420 #x0421 #x0422 #x0423 #x0424 #x0425 #x0426 #x0427 - #| #o31x |# #x0428 #x0429 #x042A #x042B #x042C #x042D #x042E #x042F - #| #o32x |# #x0430 #x0431 #x0432 #x0433 #x0434 #x0435 #x0436 #x0437 - #| #o33x |# #x0438 #x0439 #x043A #x043B #x043C #x043D #x043E #x043F - #| #o34x |# #x0440 #x0441 #x0442 #x0443 #x0444 #x0445 #x0446 #x0447 - #| #o35x |# #x0448 #x0449 #x044A #x044B #x044C #x044D #x044E #x044F - #| #o36x |# #x2116 #x0451 #x0452 #x0453 #x0454 #x0455 #x0456 #x0457 - #| #o37x |# #x0458 #x0459 #x045A #x045B #x045C #x00A7 #x045E #x045F) - - (define-8-bit-charset :iso-8859-6 - #| #o00x |# #x0000 #x0001 #x0002 #x0003 #x0004 #x0005 #x0006 #x0007 - #| #o01x |# #x0008 #x0009 #x000A #x000B #x000C #x000A #x000E #x000F - #| #o02x |# #x0010 #x0011 #x0012 #x0013 #x0014 #x0015 #x0016 #x0017 - #| #o03x |# #x0018 #x0019 #x001A #x001B #x001C #x001D #x001E #x001F - #| #o04x |# #x0020 #x0021 #x0022 #x0023 #x0024 #x0025 #x0026 #x0027 - #| #o05x |# #x0028 #x0029 #x002A #x002B #x002C #x002D #x002E #x002F - #| #o06x |# #x0660 #x0661 #x0662 #x0663 #x0664 #x0665 #x0666 #x0667 - #| #o07x |# #x0668 #x0669 #x003A #x003B #x003C #x003D #x003E #x003F - #| #o10x |# #x0040 #x0041 #x0042 #x0043 #x0044 #x0045 #x0046 #x0047 - #| #o11x |# #x0048 #x0049 #x004A #x004B #x004C #x004D #x004E #x004F - #| #o12x |# #x0050 #x0051 #x0052 #x0053 #x0054 #x0055 #x0056 #x0057 - #| #o13x |# #x0058 #x0059 #x005A #x005B #x005C #x005D #x005E #x005F - #| #o14x |# #x0060 #x0061 #x0062 #x0063 #x0064 #x0065 #x0066 #x0067 - #| #o15x |# #x0068 #x0069 #x006A #x006B #x006C #x006D #x006E #x006F - #| #o16x |# #x0070 #x0071 #x0072 #x0073 #x0074 #x0075 #x0076 #x0077 - #| #o17x |# #x0078 #x0079 #x007A #x007B #x007C #x007D #x007E #x007F - #| #o20x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF - #| #o21x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF - #| #o22x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF - #| #o23x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF - #| #o24x |# #x00A0 #xFFFF #xFFFF #xFFFF #x00A4 #xFFFF #xFFFF #xFFFF - #| #o25x |# #xFFFF #xFFFF #xFFFF #xFFFF #x060C #x00AD #xFFFF #xFFFF - #| #o26x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF - #| #o27x |# #xFFFF #xFFFF #xFFFF #x061B #xFFFF #xFFFF #xFFFF #x061F - #| #o30x |# #xFFFF #x0621 #x0622 #x0623 #x0624 #x0625 #x0626 #x0627 - #| #o31x |# #x0628 #x0629 #x062A #x062B #x062C #x062D #x062E #x062F - #| #o32x |# #x0630 #x0631 #x0632 #x0633 #x0634 #x0635 #x0636 #x0637 - #| #o33x |# #x0638 #x0639 #x063A #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF - #| #o34x |# #x0640 #x0641 #x0642 #x0643 #x0644 #x0645 #x0646 #x0647 - #| #o35x |# #x0648 #x0649 #x064A #x064B #x064C #x064D #x064E #x064F - #| #o36x |# #x0650 #x0651 #x0652 #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF - #| #o37x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF) - - (define-8-bit-charset :iso-8859-7 - #| #o00x |# #x0000 #x0001 #x0002 #x0003 #x0004 #x0005 #x0006 #x0007 - #| #o01x |# #x0008 #x0009 #x000A #x000B #x000C #x000A #x000E #x000F - #| #o02x |# #x0010 #x0011 #x0012 #x0013 #x0014 #x0015 #x0016 #x0017 - #| #o03x |# #x0018 #x0019 #x001A #x001B #x001C #x001D #x001E #x001F - #| #o04x |# #x0020 #x0021 #x0022 #x0023 #x0024 #x0025 #x0026 #x0027 - #| #o05x |# #x0028 #x0029 #x002A #x002B #x002C #x002D #x002E #x002F - #| #o06x |# #x0030 #x0031 #x0032 #x0033 #x0034 #x0035 #x0036 #x0037 - #| #o07x |# #x0038 #x0039 #x003A #x003B #x003C #x003D #x003E #x003F - #| #o10x |# #x0040 #x0041 #x0042 #x0043 #x0044 #x0045 #x0046 #x0047 - #| #o11x |# #x0048 #x0049 #x004A #x004B #x004C #x004D #x004E #x004F - #| #o12x |# #x0050 #x0051 #x0052 #x0053 #x0054 #x0055 #x0056 #x0057 - #| #o13x |# #x0058 #x0059 #x005A #x005B #x005C #x005D #x005E #x005F - #| #o14x |# #x0060 #x0061 #x0062 #x0063 #x0064 #x0065 #x0066 #x0067 - #| #o15x |# #x0068 #x0069 #x006A #x006B #x006C #x006D #x006E #x006F - #| #o16x |# #x0070 #x0071 #x0072 #x0073 #x0074 #x0075 #x0076 #x0077 - #| #o17x |# #x0078 #x0079 #x007A #x007B #x007C #x007D #x007E #x007F - #| #o20x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF - #| #o21x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF - #| #o22x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF - #| #o23x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF - #| #o24x |# #x00A0 #x02BD #x02BC #x00A3 #xFFFF #xFFFF #x00A6 #x00A7 - #| #o25x |# #x00A8 #x00A9 #xFFFF #x00AB #x00AC #x00AD #xFFFF #x2015 - #| #o26x |# #x00B0 #x00B1 #x00B2 #x00B3 #x0384 #x0385 #x0386 #x00B7 - #| #o27x |# #x0388 #x0389 #x038A #x00BB #x038C #x00BD #x038E #x038F - #| #o30x |# #x0390 #x0391 #x0392 #x0393 #x0394 #x0395 #x0396 #x0397 - #| #o31x |# #x0398 #x0399 #x039A #x039B #x039C #x039D #x039E #x039F - #| #o32x |# #x03A0 #x03A1 #xFFFF #x03A3 #x03A4 #x03A5 #x03A6 #x03A7 - #| #o33x |# #x03A8 #x03A9 #x03AA #x03AB #x03AC #x03AD #x03AE #x03AF - #| #o34x |# #x03B0 #x03B1 #x03B2 #x03B3 #x03B4 #x03B5 #x03B6 #x03B7 - #| #o35x |# #x03B8 #x03B9 #x03BA #x03BB #x03BC #x03BD #x03BE #x03BF - #| #o36x |# #x03C0 #x03C1 #x03C2 #x03C3 #x03C4 #x03C5 #x03C6 #x03C7 - #| #o37x |# #x03C8 #x03C9 #x03CA #x03CB #x03CC #x03CD #x03CE #xFFFF) - - (define-8-bit-charset :iso-8859-8 - #| #o00x |# #x0000 #x0001 #x0002 #x0003 #x0004 #x0005 #x0006 #x0007 - #| #o01x |# #x0008 #x0009 #x000A #x000B #x000C #x000A #x000E #x000F - #| #o02x |# #x0010 #x0011 #x0012 #x0013 #x0014 #x0015 #x0016 #x0017 - #| #o03x |# #x0018 #x0019 #x001A #x001B #x001C #x001D #x001E #x001F - #| #o04x |# #x0020 #x0021 #x0022 #x0023 #x0024 #x0025 #x0026 #x0027 - #| #o05x |# #x0028 #x0029 #x002A #x002B #x002C #x002D #x002E #x002F - #| #o06x |# #x0030 #x0031 #x0032 #x0033 #x0034 #x0035 #x0036 #x0037 - #| #o07x |# #x0038 #x0039 #x003A #x003B #x003C #x003D #x003E #x003F - #| #o10x |# #x0040 #x0041 #x0042 #x0043 #x0044 #x0045 #x0046 #x0047 - #| #o11x |# #x0048 #x0049 #x004A #x004B #x004C #x004D #x004E #x004F - #| #o12x |# #x0050 #x0051 #x0052 #x0053 #x0054 #x0055 #x0056 #x0057 - #| #o13x |# #x0058 #x0059 #x005A #x005B #x005C #x005D #x005E #x005F - #| #o14x |# #x0060 #x0061 #x0062 #x0063 #x0064 #x0065 #x0066 #x0067 - #| #o15x |# #x0068 #x0069 #x006A #x006B #x006C #x006D #x006E #x006F - #| #o16x |# #x0070 #x0071 #x0072 #x0073 #x0074 #x0075 #x0076 #x0077 - #| #o17x |# #x0078 #x0079 #x007A #x007B #x007C #x007D #x007E #x007F - #| #o20x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF - #| #o21x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF - #| #o22x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF - #| #o23x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF - #| #o24x |# #x00A0 #xFFFF #x00A2 #x00A3 #x00A4 #x00A5 #x00A6 #x00A7 - #| #o25x |# #x00A8 #x00A9 #x00D7 #x00AB #x00AC #x00AD #x00AE #x203E - #| #o26x |# #x00B0 #x00B1 #x00B2 #x00B3 #x00B4 #x00B5 #x00B6 #x00B7 - #| #o27x |# #x00B8 #x00B9 #x00F7 #x00BB #x00BC #x00BD #x00BE #xFFFF - #| #o30x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF - #| #o31x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF - #| #o32x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF - #| #o33x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #x2017 - #| #o34x |# #x05D0 #x05D1 #x05D2 #x05D3 #x05D4 #x05D5 #x05D6 #x05D7 - #| #o35x |# #x05D8 #x05D9 #x05DA #x05DB #x05DC #x05DD #x05DE #x05DF - #| #o36x |# #x05E0 #x05E1 #x05E2 #x05E3 #x05E4 #x05E5 #x05E6 #x05E7 - #| #o37x |# #x05E8 #x05E9 #x05EA #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF) - - (define-8-bit-charset :iso-8859-9 - #| #o00x |# #x0000 #x0001 #x0002 #x0003 #x0004 #x0005 #x0006 #x0007 - #| #o01x |# #x0008 #x0009 #x000A #x000B #x000C #x000A #x000E #x000F - #| #o02x |# #x0010 #x0011 #x0012 #x0013 #x0014 #x0015 #x0016 #x0017 - #| #o03x |# #x0018 #x0019 #x001A #x001B #x001C #x001D #x001E #x001F - #| #o04x |# #x0020 #x0021 #x0022 #x0023 #x0024 #x0025 #x0026 #x0027 - #| #o05x |# #x0028 #x0029 #x002A #x002B #x002C #x002D #x002E #x002F - #| #o06x |# #x0030 #x0031 #x0032 #x0033 #x0034 #x0035 #x0036 #x0037 - #| #o07x |# #x0038 #x0039 #x003A #x003B #x003C #x003D #x003E #x003F - #| #o10x |# #x0040 #x0041 #x0042 #x0043 #x0044 #x0045 #x0046 #x0047 - #| #o11x |# #x0048 #x0049 #x004A #x004B #x004C #x004D #x004E #x004F - #| #o12x |# #x0050 #x0051 #x0052 #x0053 #x0054 #x0055 #x0056 #x0057 - #| #o13x |# #x0058 #x0059 #x005A #x005B #x005C #x005D #x005E #x005F - #| #o14x |# #x0060 #x0061 #x0062 #x0063 #x0064 #x0065 #x0066 #x0067 - #| #o15x |# #x0068 #x0069 #x006A #x006B #x006C #x006D #x006E #x006F - #| #o16x |# #x0070 #x0071 #x0072 #x0073 #x0074 #x0075 #x0076 #x0077 - #| #o17x |# #x0078 #x0079 #x007A #x007B #x007C #x007D #x007E #x007F - #| #o20x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF - #| #o21x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF - #| #o22x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF - #| #o23x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF - #| #o24x |# #x00A0 #x00A1 #x00A2 #x00A3 #x00A4 #x00A5 #x00A6 #x00A7 - #| #o25x |# #x00A8 #x00A9 #x00AA #x00AB #x00AC #x00AD #x00AE #x00AF - #| #o26x |# #x00B0 #x00B1 #x00B2 #x00B3 #x00B4 #x00B5 #x00B6 #x00B7 - #| #o27x |# #x00B8 #x00B9 #x00BA #x00BB #x00BC #x00BD #x00BE #x00BF - #| #o30x |# #x00C0 #x00C1 #x00C2 #x00C3 #x00C4 #x00C5 #x00C6 #x00C7 - #| #o31x |# #x00C8 #x00C9 #x00CA #x00CB #x00CC #x00CD #x00CE #x00CF - #| #o32x |# #x011E #x00D1 #x00D2 #x00D3 #x00D4 #x00D5 #x00D6 #x00D7 - #| #o33x |# #x00D8 #x00D9 #x00DA #x00DB #x00DC #x0130 #x015E #x00DF - #| #o34x |# #x00E0 #x00E1 #x00E2 #x00E3 #x00E4 #x00E5 #x00E6 #x00E7 - #| #o35x |# #x00E8 #x00E9 #x00EA #x00EB #x00EC #x00ED #x00EE #x00EF - #| #o36x |# #x011F #x00F1 #x00F2 #x00F3 #x00F4 #x00F5 #x00F6 #x00F7 - #| #o37x |# #x00F8 #x00F9 #x00FA #x00FB #x00FC #x0131 #x015F #x00FF) - - (define-8-bit-charset :iso-8859-14 - #| #o00x |# #x0000 #x0001 #x0002 #x0003 #x0004 #x0005 #x0006 #x0007 - #| #o01x |# #x0008 #x0009 #x000A #x000B #x000C #x000A #x000E #x000F - #| #o02x |# #x0010 #x0011 #x0012 #x0013 #x0014 #x0015 #x0016 #x0017 - #| #o03x |# #x0018 #x0019 #x001A #x001B #x001C #x001D #x001E #x001F - #| #o04x |# #x0020 #x0021 #x0022 #x0023 #x0024 #x0025 #x0026 #x0027 - #| #o05x |# #x0028 #x0029 #x002A #x002B #x002C #x002D #x002E #x002F - #| #o06x |# #x0030 #x0031 #x0032 #x0033 #x0034 #x0035 #x0036 #x0037 - #| #o07x |# #x0038 #x0039 #x003A #x003B #x003C #x003D #x003E #x003F - #| #o10x |# #x0040 #x0041 #x0042 #x0043 #x0044 #x0045 #x0046 #x0047 - #| #o11x |# #x0048 #x0049 #x004A #x004B #x004C #x004D #x004E #x004F - #| #o12x |# #x0050 #x0051 #x0052 #x0053 #x0054 #x0055 #x0056 #x0057 - #| #o13x |# #x0058 #x0059 #x005A #x005B #x005C #x005D #x005E #x005F - #| #o14x |# #x0060 #x0061 #x0062 #x0063 #x0064 #x0065 #x0066 #x0067 - #| #o15x |# #x0068 #x0069 #x006A #x006B #x006C #x006D #x006E #x006F - #| #o16x |# #x0070 #x0071 #x0072 #x0073 #x0074 #x0075 #x0076 #x0077 - #| #o17x |# #x0078 #x0079 #x007A #x007B #x007C #x007D #x007E #x007F - #| #o20x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF - #| #o21x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF - #| #o22x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF - #| #o23x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF - #| #o24x |# #x00A0 #x1E02 #x1E03 #x00A3 #x010A #x010B #x1E0A #x00A7 - #| #o25x |# #x1E80 #x00A9 #x1E82 #x1E0B #x1EF2 #x00AD #x00AE #x0178 - #| #o26x |# #x1E1E #x1E1F #x0120 #x0121 #x1E40 #x1E41 #x00B6 #x1E56 - #| #o27x |# #x1E81 #x1E57 #x1E83 #x1E60 #x1EF3 #x1E84 #x1E85 #x1E61 - #| #o30x |# #x00C0 #x00C1 #x00C2 #x00C3 #x00C4 #x00C5 #x00C6 #x00C7 - #| #o31x |# #x00C8 #x00C9 #x00CA #x00CB #x00CC #x00CD #x00CE #x00CF - #| #o32x |# #x0174 #x00D1 #x00D2 #x00D3 #x00D4 #x00D5 #x00D6 #x1E6A - #| #o33x |# #x00D8 #x00D9 #x00DA #x00DB #x00DC #x00DD #x0176 #x00DF - #| #o34x |# #x00E0 #x00E1 #x00E2 #x00E3 #x00E4 #x00E5 #x00E6 #x00E7 - #| #o35x |# #x00E8 #x00E9 #x00EA #x00EB #x00EC #x00ED #x00EE #x00EF - #| #o36x |# #x0175 #x00F1 #x00F2 #x00F3 #x00F4 #x00F5 #x00F6 #x1E6B - #| #o37x |# #x00F8 #x00F9 #x00FA #x00FB #x00FC #x00FD #x0177 #x00FF) - - (define-8-bit-charset :iso-8859-15 - #| #o00x |# #x0000 #x0001 #x0002 #x0003 #x0004 #x0005 #x0006 #x0007 - #| #o01x |# #x0008 #x0009 #x000A #x000B #x000C #x000A #x000E #x000F - #| #o02x |# #x0010 #x0011 #x0012 #x0013 #x0014 #x0015 #x0016 #x0017 - #| #o03x |# #x0018 #x0019 #x001A #x001B #x001C #x001D #x001E #x001F - #| #o04x |# #x0020 #x0021 #x0022 #x0023 #x0024 #x0025 #x0026 #x0027 - #| #o05x |# #x0028 #x0029 #x002A #x002B #x002C #x002D #x002E #x002F - #| #o06x |# #x0030 #x0031 #x0032 #x0033 #x0034 #x0035 #x0036 #x0037 - #| #o07x |# #x0038 #x0039 #x003A #x003B #x003C #x003D #x003E #x003F - #| #o10x |# #x0040 #x0041 #x0042 #x0043 #x0044 #x0045 #x0046 #x0047 - #| #o11x |# #x0048 #x0049 #x004A #x004B #x004C #x004D #x004E #x004F - #| #o12x |# #x0050 #x0051 #x0052 #x0053 #x0054 #x0055 #x0056 #x0057 - #| #o13x |# #x0058 #x0059 #x005A #x005B #x005C #x005D #x005E #x005F - #| #o14x |# #x0060 #x0061 #x0062 #x0063 #x0064 #x0065 #x0066 #x0067 - #| #o15x |# #x0068 #x0069 #x006A #x006B #x006C #x006D #x006E #x006F - #| #o16x |# #x0070 #x0071 #x0072 #x0073 #x0074 #x0075 #x0076 #x0077 - #| #o17x |# #x0078 #x0079 #x007A #x007B #x007C #x007D #x007E #x007F - #| #o20x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF - #| #o21x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF - #| #o22x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF - #| #o23x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF - #| #o24x |# #x00A0 #x00A1 #x00A2 #x00A3 #x20AC #x00A5 #x0160 #x00A7 - #| #o25x |# #x0161 #x00A9 #x00AA #x00AB #x00AC #x00AD #x00AE #x00AF - #| #o26x |# #x00B0 #x00B1 #x00B2 #x00B3 #x017D #x00B5 #x00B6 #x00B7 - #| #o27x |# #x017E #x00B9 #x00BA #x00BB #x0152 #x0153 #x0178 #x00BF - #| #o30x |# #x00C0 #x00C1 #x00C2 #x00C3 #x00C4 #x00C5 #x00C6 #x00C7 - #| #o31x |# #x00C8 #x00C9 #x00CA #x00CB #x00CC #x00CD #x00CE #x00CF - #| #o32x |# #x00D0 #x00D1 #x00D2 #x00D3 #x00D4 #x00D5 #x00D6 #x00D7 - #| #o33x |# #x00D8 #x00D9 #x00DA #x00DB #x00DC #x00DD #x00DE #x00DF - #| #o34x |# #x00E0 #x00E1 #x00E2 #x00E3 #x00E4 #x00E5 #x00E6 #x00E7 - #| #o35x |# #x00E8 #x00E9 #x00EA #x00EB #x00EC #x00ED #x00EE #x00EF - #| #o36x |# #x00F0 #x00F1 #x00F2 #x00F3 #x00F4 #x00F5 #x00F6 #x00F7 - #| #o37x |# #x00F8 #x00F9 #x00FA #x00FB #x00FC #x00FD #x00FE #x00FF) - - (define-8-bit-charset :koi8-r - #| #o00x |# #x0000 #x0001 #x0002 #x0003 #x0004 #x0005 #x0006 #x0007 - #| #o01x |# #x0008 #x0009 #x000A #x000B #x000C #x000A #x000E #x000F - #| #o02x |# #x0010 #x0011 #x0012 #x0013 #x0014 #x0015 #x0016 #x0017 - #| #o03x |# #x0018 #x0019 #x001A #x001B #x001C #x001D #x001E #x001F - #| #o04x |# #x0020 #x0021 #x0022 #x0023 #x0024 #x0025 #x0026 #x0027 - #| #o05x |# #x0028 #x0029 #x002A #x002B #x002C #x002D #x002E #x002F - #| #o06x |# #x0030 #x0031 #x0032 #x0033 #x0034 #x0035 #x0036 #x0037 - #| #o07x |# #x0038 #x0039 #x003A #x003B #x003C #x003D #x003E #x003F - #| #o10x |# #x0040 #x0041 #x0042 #x0043 #x0044 #x0045 #x0046 #x0047 - #| #o11x |# #x0048 #x0049 #x004A #x004B #x004C #x004D #x004E #x004F - #| #o12x |# #x0050 #x0051 #x0052 #x0053 #x0054 #x0055 #x0056 #x0057 - #| #o13x |# #x0058 #x0059 #x005A #x005B #x005C #x005D #x005E #x005F - #| #o14x |# #x0060 #x0061 #x0062 #x0063 #x0064 #x0065 #x0066 #x0067 - #| #o15x |# #x0068 #x0069 #x006A #x006B #x006C #x006D #x006E #x006F - #| #o16x |# #x0070 #x0071 #x0072 #x0073 #x0074 #x0075 #x0076 #x0077 - #| #o17x |# #x0078 #x0079 #x007A #x007B #x007C #x007D #x007E #x007F - #| #o20x |# #x2500 #x2502 #x250C #x2510 #x2514 #x2518 #x251C #x2524 - #| #o21x |# #x252C #x2534 #x253C #x2580 #x2584 #x2588 #x258C #x2590 - #| #o22x |# #x2591 #x2592 #x2593 #x2320 #x25A0 #x2219 #x221A #x2248 - #| #o23x |# #x2264 #x2265 #x00A0 #x2321 #x00B0 #x00B2 #x00B7 #x00F7 - #| #o24x |# #x2550 #x2551 #x2552 #x0451 #x2553 #x2554 #x2555 #x2556 - #| #o25x |# #x2557 #x2558 #x2559 #x255A #x255B #x255C #x255D #x255E - #| #o26x |# #x255F #x2560 #x2561 #x0401 #x2562 #x2563 #x2564 #x2565 - #| #o27x |# #x2566 #x2567 #x2568 #x2569 #x256A #x256B #x256C #x00A9 - #| #o30x |# #x044E #x0430 #x0431 #x0446 #x0434 #x0435 #x0444 #x0433 - #| #o31x |# #x0445 #x0438 #x0439 #x043A #x043B #x043C #x043D #x043E - #| #o32x |# #x043F #x044F #x0440 #x0441 #x0442 #x0443 #x0436 #x0432 - #| #o33x |# #x044C #x044B #x0437 #x0448 #x044D #x0449 #x0447 #x044A - #| #o34x |# #x042E #x0410 #x0411 #x0426 #x0414 #x0415 #x0424 #x0413 - #| #o35x |# #x0425 #x0418 #x0419 #x041A #x041B #x041C #x041D #x041E - #| #o36x |# #x041F #x042F #x0420 #x0421 #x0422 #x0423 #x0416 #x0412 - #| #o37x |# #x042C #x042B #x0417 #x0428 #x042D #x0429 #x0427 #x042A) - ) - diff --git a/runes/encodings.lisp b/runes/encodings.lisp deleted file mode 100644 index 5788adb..0000000 --- a/runes/encodings.lisp +++ /dev/null @@ -1,396 +0,0 @@ -(in-package :runes-encoding) - -(define-condition encoding-error (simple-error) ()) - -(defun xerror (fmt &rest args) - (error 'encoding-error :format-control fmt :format-arguments args)) - -;;;; --------------------------------------------------------------------------- -;;;; Encoding names -;;;; - -(defvar *names* (make-hash-table :test #'eq)) - -(defun canon-name (string) - (with-output-to-string (bag) - (map nil (lambda (ch) - (cond ((char= ch #\_) (write-char #\- bag)) - (t (write-char (char-upcase ch) bag)))) - string))) - -(defun canon-name-2 (string) - (with-output-to-string (bag) - (map nil (lambda (ch) - (cond ((char= ch #\_)) - ((char= ch #\-)) - (t (write-char (char-upcase ch) bag)))) - string))) - -(defmethod encoding-names ((encoding symbol)) - (gethash encoding *names*)) - -(defmethod (setf encoding-names) (new-value (encoding symbol)) - (setf (gethash encoding *names*) new-value)) - -(defun add-name (encoding name) - (pushnew (canon-name name) (encoding-names encoding) :test #'string=)) - -(defun resolve-name (string) - (cond ((symbolp string) - string) - (t - (setq string (canon-name string)) - (or - (block nil - (maphash (lambda (x y) - (when (member string y :test #'string=) - (return x))) - *names*) - nil) - (block nil - (maphash (lambda (x y) - (when (member string y - :test #'(lambda (x y) - (string= (canon-name-2 x) - (canon-name-2 y)))) - (return x))) - *names*) - nil))))) - -;;;; --------------------------------------------------------------------------- -;;;; Encodings -;;;; - -(defvar *encodings* (make-hash-table :test #'eq)) - -(defmacro define-encoding (name init-form) - `(progn - (setf (gethash ',name *encodings*) - (list nil (lambda () ,init-form))) - ',name)) - -(defun find-encoding (name) - (let ((x (gethash (resolve-name name) *encodings*))) - (and x - (or (first x) - (setf (first x) (funcall (second x))))))) - -(defclass encoding () ()) - -(defclass simple-8-bit-encoding (encoding) - ((table :initarg :table))) - -(defun make-simple-8-bit-encoding (&key charset) - (make-instance 'simple-8-bit-encoding - :table (coerce (to-unicode-table charset) '(simple-array (unsigned-byte 16) (256))))) - -;;;;;;; - -(defmacro fx-op (op &rest xs) - `(the fixnum (,op ,@(mapcar (lambda (x) `(the fixnum ,x)) xs)))) -(defmacro fx-pred (op &rest xs) - `(,op ,@(mapcar (lambda (x) `(the fixnum ,x)) xs))) - -(defmacro %+ (&rest xs) `(fx-op + ,@xs)) -(defmacro %- (&rest xs) `(fx-op - ,@xs)) -(defmacro %* (&rest xs) `(fx-op * ,@xs)) -(defmacro %/ (&rest xs) `(fx-op floor ,@xs)) -(defmacro %and (&rest xs) `(fx-op logand ,@xs)) -(defmacro %ior (&rest xs) `(fx-op logior ,@xs)) -(defmacro %xor (&rest xs) `(fx-op logxor ,@xs)) -(defmacro %ash (&rest xs) `(fx-op ash ,@xs)) -(defmacro %mod (&rest xs) `(fx-op mod ,@xs)) - -(defmacro %= (&rest xs) `(fx-pred = ,@xs)) -(defmacro %<= (&rest xs) `(fx-pred <= ,@xs)) -(defmacro %>= (&rest xs) `(fx-pred >= ,@xs)) -(defmacro %< (&rest xs) `(fx-pred < ,@xs)) -(defmacro %> (&rest xs) `(fx-pred > ,@xs)) - -;;; Decoders - -;; The decoders share a common signature: -;; -;; DECODE input input-start input-end -;; output output-start output-end -;; eof-p -;; -> first-not-written ; first-not-read -;; -;; These decode functions should decode as much characters off `input' -;; into the `output' as possible and return the indexes to the first -;; not read and first not written element of `input' and `output' -;; respectively. If there are not enough bytes in `input' to decode a -;; full character, decoding shold be abandomed; the caller has to -;; ensure that the remaining bytes of `input' are passed to the -;; decoder again with more bytes appended. -;; -;; `eof-p' now in turn indicates, if the given input sequence, is all -;; the producer does have and might be used to produce error messages -;; in case of incomplete codes or decided what to do. -;; -;; Decoders are expected to handle the various CR/NL conventions and -;; canonicalize each end of line into a single NL rune (#xA) in good -;; old Lisp tradition. -;; - -;; TODO: change this to an encoding class, which then might carry -;; additional state. Stateless encodings could been represented by -;; keywords. e.g. -;; -;; defmethod DECODE-SEQUENCE ((encoding (eql :utf-8)) ...) -;; - -(defmethod decode-sequence ((encoding (eql :utf-16-big-endian)) - in in-start in-end out out-start out-end eof?) - ;; -> new wptr, new rptr - (let ((wptr out-start) - (rptr in-start)) - (loop - (when (%= wptr out-end) - (return)) - (when (>= (%+ rptr 1) in-end) - (return)) - (let ((hi (aref in rptr)) - (lo (aref in (%+ 1 rptr)))) - (setf rptr (%+ 2 rptr)) - ;; FIXME: Wenn wir hier ein Surrogate sehen, muessen wir das naechste - ;; Zeichen abwarten und nachgucken, dass nicht etwa die andere - ;; Haelfte fehlt! - (let ((x (logior (ash hi 8) lo))) - (when (or (eql x #xFFFE) (eql x #xFFFF)) - (xerror "not a valid code point: #x~X" x)) - (setf (aref out wptr) x)) - (setf wptr (%+ 1 wptr)))) - (values wptr rptr))) - -(defmethod decode-sequence ((encoding (eql :utf-16-little-endian)) - in in-start in-end out out-start out-end eof?) - ;; -> new wptr, new rptr - (let ((wptr out-start) - (rptr in-start)) - (loop - (when (%= wptr out-end) - (return)) - (when (>= (%+ rptr 1) in-end) - (return)) - (let ((lo (aref in (%+ 0 rptr))) - (hi (aref in (%+ 1 rptr)))) - (setf rptr (%+ 2 rptr)) - ;; FIXME: Wenn wir hier ein Surrogate sehen, muessen wir das naechste - ;; Zeichen abwarten und nachgucken, dass nicht etwa die andere - ;; Haelfte fehlt! - (let ((x (logior (ash hi 8) lo))) - (when (or (eql x #xFFFE) (eql x #xFFFF)) - (xerror "not a valid code point: #x~X" x)) - (setf (aref out wptr) x)) - (setf wptr (%+ 1 wptr)))) - (values wptr rptr))) - -(defmethod decode-sequence ((encoding (eql :utf-8)) - in in-start in-end out out-start out-end eof?) - (declare (optimize (speed 3) (safety 0)) - (type (simple-array (unsigned-byte 8) (*)) in) - (type (simple-array (unsigned-byte 16) (*)) out) - (type fixnum in-start in-end out-start out-end)) - (let ((wptr out-start) - (rptr in-start) - byte0) - (macrolet ((put (x) - `((lambda (x) - (when (or (<= #xD800 x #xDBFF) - (<= #xDC00 x #xDFFF)) - (xerror "surrogate encoded in UTF-8: #x~X." x)) - (cond ((or (%> x #x10FFFF) - (eql x #xFFFE) - (eql x #xFFFF)) - (xerror "not a valid code point: #x~X" x)) - ((%> x #xFFFF) - (setf (aref out (%+ 0 wptr)) (%+ #xD7C0 (ash x -10)) - (aref out (%+ 1 wptr)) (%ior #xDC00 (%and x #x3FF))) - (setf wptr (%+ wptr 2))) - (t - (setf (aref out wptr) x) - (setf wptr (%+ wptr 1))))) - ,x)) - (put1 (x) - `(progn - (setf (aref out wptr) ,x) - (setf wptr (%+ wptr 1))))) - (loop - (when (%= (+ wptr 1) out-end) (return)) - (when (%>= rptr in-end) (return)) - (setq byte0 (aref in rptr)) - (cond ((= byte0 #x0D) - ;; CR handling - ;; we need to know the following character - (cond ((>= (%+ rptr 1) in-end) - ;; no characters in buffer - (cond (eof? - ;; at EOF, pass it as NL - (put #x0A) - (setf rptr (%+ rptr 1))) - (t - ;; demand more characters - (return)))) - ((= (aref in (%+ rptr 1)) #x0A) - ;; we see CR NL, so forget this CR and the next NL will be - ;; inserted literally - (setf rptr (%+ rptr 1))) - (t - ;; singleton CR, pass it as NL - (put #x0A) - (setf rptr (%+ rptr 1))))) - - ((%<= #|#b00000000|# byte0 #b01111111) - (put1 byte0) - (setf rptr (%+ rptr 1))) - - ((%<= #|#b10000000|# byte0 #b10111111) - (xerror "Corrupted UTF-8 input (initial byte was #b~8,'0B)" byte0) - (setf rptr (%+ rptr 1))) - - ((%<= #|#b11000000|# byte0 #b11011111) - (cond ((<= (%+ rptr 2) in-end) - (put - (dpb (ldb (byte 5 0) byte0) (byte 5 6) - (dpb (ldb (byte 6 0) (aref in (%+ rptr 1))) (byte 6 0) - 0))) - (setf rptr (%+ rptr 2))) - (t - (return)))) - - ((%<= #|#b11100000|# byte0 #b11101111) - (cond ((<= (%+ rptr 3) in-end) - (put - (dpb (ldb (byte 4 0) byte0) (byte 4 12) - (dpb (ldb (byte 6 0) (aref in (%+ 1 rptr))) (byte 6 6) - (dpb (ldb (byte 6 0) (aref in (%+ 2 rptr))) (byte 6 0) - 0)))) - (setf rptr (%+ rptr 3))) - (t - (return)))) - - ((%<= #|#b11110000|# byte0 #b11110111) - (cond ((<= (%+ rptr 4) in-end) - (put - (dpb (ldb (byte 3 0) byte0) (byte 3 18) - (dpb (ldb (byte 6 0) (aref in (%+ 1 rptr))) (byte 6 12) - (dpb (ldb (byte 6 0) (aref in (%+ 2 rptr))) (byte 6 6) - (dpb (ldb (byte 6 0) (aref in (%+ 3 rptr))) (byte 6 0) - 0))))) - (setf rptr (%+ rptr 4))) - (t - (return)))) - - ((%<= #|#b11111000|# byte0 #b11111011) - (cond ((<= (%+ rptr 5) in-end) - (put - (dpb (ldb (byte 2 0) byte0) (byte 2 24) - (dpb (ldb (byte 6 0) (aref in (%+ 1 rptr))) (byte 6 18) - (dpb (ldb (byte 6 0) (aref in (%+ 2 rptr))) (byte 6 12) - (dpb (ldb (byte 6 0) (aref in (%+ 3 rptr))) (byte 6 6) - (dpb (ldb (byte 6 0) (aref in (%+ 4 rptr))) (byte 6 0) - 0)))))) - (setf rptr (%+ rptr 5))) - (t - (return)))) - - ((%<= #|#b11111100|# byte0 #b11111101) - (cond ((<= (%+ rptr 6) in-end) - (put - (dpb (ldb (byte 1 0) byte0) (byte 1 30) - (dpb (ldb (byte 6 0) (aref in (%+ 1 rptr))) (byte 6 24) - (dpb (ldb (byte 6 0) (aref in (%+ 2 rptr))) (byte 6 18) - (dpb (ldb (byte 6 0) (aref in (%+ 3 rptr))) (byte 6 12) - (dpb (ldb (byte 6 0) (aref in (%+ 4 rptr))) (byte 6 6) - (dpb (ldb (byte 6 0) (aref in (%+ 5 rptr))) (byte 6 0) - 0))))))) - (setf rptr (%+ rptr 6))) - (t - (return)))) - - (t - (xerror "Corrupted UTF-8 input (initial byte was #b~8,'0B)" byte0)) ) )) - (values wptr rptr)) ) - -(defmethod encoding-p ((object (eql :utf-16-little-endian))) t) -(defmethod encoding-p ((object (eql :utf-16-big-endian))) t) -(defmethod encoding-p ((object (eql :utf-8))) t) - -(defmethod encoding-p ((object encoding)) t) - -(defmethod decode-sequence ((encoding simple-8-bit-encoding) - in in-start in-end - out out-start out-end - eof?) - (declare (optimize (speed 3) (safety 0)) - (type (simple-array (unsigned-byte 8) (*)) in) - (type (simple-array (unsigned-byte 16) (*)) out) - (type fixnum in-start in-end out-start out-end)) - (let ((wptr out-start) - (rptr in-start) - (byte 0) - (table (slot-value encoding 'table))) - (declare (type fixnum wptr rptr) - (type (unsigned-byte 8) byte) - (type (simple-array (unsigned-byte 16) (*)) table)) - (loop - (when (%= wptr out-end) (return)) - (when (%>= rptr in-end) (return)) - (setq byte (aref in rptr)) - (cond ((= byte #x0D) - ;; CR handling - ;; we need to know the following character - (cond ((>= (%+ rptr 1) in-end) - ;; no characters in buffer - (cond (eof? - ;; at EOF, pass it as NL - (setf (aref out wptr) #x0A) - (setf wptr (%+ wptr 1)) - (setf rptr (%+ rptr 1))) - (t - ;; demand more characters - (return)))) - ((= (aref in (%+ rptr 1)) #x0A) - ;; we see CR NL, so forget this CR and the next NL will be - ;; inserted literally - (setf rptr (%+ rptr 1))) - (t - ;; singleton CR, pass it as NL - (setf (aref out wptr) #x0A) - (setf wptr (%+ wptr 1)) - (setf rptr (%+ rptr 1))))) - - (t - (setf (aref out wptr) (aref table byte)) - (setf wptr (%+ wptr 1)) - (setf rptr (%+ rptr 1))) )) - (values wptr rptr))) - -;;;; --------------------------------------------------------------------------- -;;;; Character sets -;;;; - -(defvar *charsets* (make-hash-table :test #'eq)) - -(defclass 8-bit-charset () - ((name :initarg :name) - (to-unicode-table - :initarg :to-unicode-table - :reader to-unicode-table))) - -(defmacro define-8-bit-charset (name &rest codes) - (assert (= 256 (length codes))) - `(progn - (setf (gethash ',name *charsets*) - (make-instance '8-bit-charset - :name ',name - :to-unicode-table - ',(make-array 256 - :element-type '(unsigned-byte 16) - :initial-contents codes))) - ',name)) - -(defun find-charset (name) - (or (gethash name *charsets*) - (xerror "There is no character set named ~S." name))) diff --git a/runes/package.lisp b/runes/package.lisp deleted file mode 100644 index bd5bc68..0000000 --- a/runes/package.lisp +++ /dev/null @@ -1,99 +0,0 @@ -;;; -*- Mode: Lisp; Syntax: Common-Lisp; Package: CL-USER; -*- -;;; --------------------------------------------------------------------------- -;;; Title: Generating a sane DEFPACKAGE for RUNES -;;; Created: 1999-05-25 -;;; Author: Gilbert Baumann -;;; --------------------------------------------------------------------------- -;;; (c) copyright 1999,2000 by Gilbert Baumann - -(in-package :cl-user) - -(defpackage :runes - (:use :cl #-scl :trivial-gray-streams) - (:export #:definline - - ;; runes.lisp - #:rune - #:rod - #:simple-rod - #:%rune - #:rod-capitalize - #:code-rune - #:rune-code - #:rune-downcase - #:rune-upcase - #:rod-downcase - #:rod-upcase - #:white-space-rune-p - #:digit-rune-p - #:rune= - #:rune<= - #:rune>= - #:rune-equal - #:runep - #:sloopy-rod-p - #:rod= - #:rod-equal - #:make-rod - #:char-rune - #:rune-char - #:rod-string - #:string-rod - #:rod-subseq - #:rod< - - ;; xstream.lisp - #:xstream - #:make-xstream - #:make-rod-xstream - #:close-xstream - #:xstream-p - #:read-rune - #:peek-rune - #:fread-rune - #:fpeek-rune - #:consume-rune - #:unread-rune - #:xstream-position - #:xstream-line-number - #:xstream-column-number - #:xstream-plist - #:xstream-encoding - #:set-to-full-speed - #:xstream-name - - ;; ystream.lisp - #:ystream - #:close-ystream - #:write-rune - #:write-rod - #:ystream-column - #:make-octet-vector-ystream - #:make-octet-stream-ystream - #:make-rod-ystream - #+rune-is-character #:make-character-stream-ystream - ;; These don't make too much sense on Unicode-enabled, - ;; implementations but for those applications using them anyway, - ;; I have commented out the reader conditionals now: - ;; #+rune-is-integer - #:make-string-ystream/utf8 - ;; #+rune-is-integer - #:make-character-stream-ystream/utf8 - #:runes-to-utf8/adjustable-string - - #:rod-to-utf8-string - #:utf8-string-to-rod - #:make-octet-input-stream)) - -(defpackage :utf8-runes - (:use :cl) - (:export *utf8-runes-readtable* - #:rune #:rod #:simple-rod #:rod-string #:rod= #:make-rod - #:string-rod)) - -(defpackage :runes-encoding - (:use :cl :runes) - (:export - #:encoding-error - #:find-encoding - #:decode-sequence)) diff --git a/runes/runes.lisp b/runes/runes.lisp deleted file mode 100644 index 4f7f2be..0000000 --- a/runes/runes.lisp +++ /dev/null @@ -1,230 +0,0 @@ -;;; -*- Mode: Lisp; Syntax: Common-Lisp; Package: RUNES; -*- -;;; --------------------------------------------------------------------------- -;;; Title: Unicode strings (called RODs) -;;; Created: 1999-05-25 22:29 -;;; Author: Gilbert Baumann -;;; License: Lisp-LGPL (See file COPYING for details). -;;; --------------------------------------------------------------------------- -;;; (c) copyright 1998,1999 by Gilbert Baumann - -;;; This code is free software; you can redistribute it and/or modify it -;;; under the terms of the version 2.1 of the GNU Lesser General Public -;;; License as published by the Free Software Foundation, as clarified -;;; by the "Preamble to the Gnu Lesser General Public License" found in -;;; the file COPYING. -;;; -;;; This code is distributed in the hope that it will be useful, -;;; but without any warranty; without even the implied warranty of -;;; merchantability or fitness for a particular purpose. See the GNU -;;; Lesser General Public License for more details. -;;; -;;; Version 2.1 of the GNU Lesser General Public License is in the file -;;; COPYING that was distributed with this file. If it is not present, -;;; you can access it from http://www.gnu.org/copyleft/lesser.txt (until -;;; superseded by a newer version) or write to the Free Software -;;; Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - -;; Changes -;; -;; When Who What -;; ---------------------------------------------------------------------------- -;; 1999-08-15 GB - ROD=, ROD-EQUAL -;; RUNE<=, RUNE>= -;; MAKE-ROD, ROD-SUBSEQ -;; CHAR-RUNE, RUNE-CHAR, ROD-STRING, STRING-ROD -;; new functions -;; - Added rune reader -;; - -(in-package :runes) - -(deftype rune () '(unsigned-byte 16)) -(deftype rod () '(array rune (*))) -(deftype simple-rod () '(simple-array rune (*))) - -(definline rune (rod index) - (aref rod index)) - -(defun (setf rune) (new rod index) - (setf (aref rod index) new)) - -(definline %rune (rod index) - (aref (the (simple-array (unsigned-byte 16) (*)) rod) (the fixnum index))) - -(definline (setf %rune) (new rod index) - (setf (aref (the (simple-array (unsigned-byte 16) (*)) rod) (the fixnum index)) new)) - -(defun rod-capitalize (rod) - (warn "~S is not implemented." 'rod-capitalize) - rod) - -(definline code-rune (x) x) -(definline rune-code (x) x) - -(definline rune= (x y) - (= x y)) - -(defun rune-downcase (rune) - (cond ((<= #x0041 rune #x005a) (+ rune #x20)) - ((= rune #x00d7) rune) - ((<= #x00c0 rune #x00de) (+ rune #x20)) - (t rune))) - -(definline rune-upcase (rune) - (cond ((<= #x0061 rune #x007a) (- rune #x20)) - ((= rune #x00f7) rune) - ((<= #x00e0 rune #x00fe) (- rune #x20)) - (t rune))) - -(defun rune-upper-case-letter-p (rune) - (or (<= #x0041 rune #x005a) (<= #x00c0 rune #x00de))) - -(defun rune-lower-case-letter-p (rune) - (or (<= #x0061 rune #x007a) (<= #x00e0 rune #x00fe) - (= rune #x00d7))) - - -(defun rune-equal (x y) - (rune= (rune-upcase x) (rune-upcase y))) - -(defun rod-downcase (rod) - ;; FIXME - (map '(simple-array (unsigned-byte 16) (*)) #'rune-downcase rod)) - -(defun rod-upcase (rod) - ;; FIXME - (map '(simple-array (unsigned-byte 16) (*)) #'rune-upcase rod)) - -(definline white-space-rune-p (char) - (or (= char 9) ;TAB - (= char 10) ;Linefeed - (= char 13) ;Carriage Return - (= char 32))) ;Space - -(definline digit-rune-p (char &optional (radix 10)) - (cond ((<= #.(char-code #\0) char #.(char-code #\9)) - (and (< (- char #.(char-code #\0)) radix) - (- char #.(char-code #\0)))) - ((<= #.(char-code #\A) char #.(char-code #\Z)) - (and (< (- char #.(char-code #\A) -10) radix) - (- char #.(char-code #\A) -10))) - ((<= #.(char-code #\a) char #.(char-code #\z)) - (and (< (- char #.(char-code #\a) -10) radix) - (- char #.(char-code #\a) -10))) )) - -(defun rod (x) - (cond ((stringp x) (map 'rod #'char-code x)) - ((symbolp x) (rod (string x))) - ((characterp x) (rod (string x))) - ((vectorp x) (coerce x 'rod)) - ((integerp x) (map 'rod #'identity (list x))) - (t (error "Cannot convert ~S to a ~S" x 'rod)))) - -(defun runep (x) - (and (integerp x) - (<= 0 x #xFFFF))) - -(defun sloopy-rod-p (x) - (and (not (stringp x)) - (vectorp x) - (every #'runep x))) - -(defun rod= (x y) - (and (= (length x) (length y)) - (dotimes (i (length x) t) - (unless (rune= (rune x i) (rune y i)) - (return nil))))) - -(defun rod-equal (x y) - (and (= (length x) (length y)) - (dotimes (i (length x) t) - (unless (rune-equal (rune x i) (rune y i)) - (return nil))))) - -(definline make-rod (size) - (make-array size :element-type 'rune)) - -(defun char-rune (char) - (code-rune (char-code char))) - -(defparameter *invalid-rune* nil ;;#\? - "Rune to use as a replacement in RUNE-CHAR and ROD-STRING for runes not - representable as characters. If NIL, an error is signalled instead.") - -(defun rune-char (rune &optional (default *invalid-rune*)) - (or (if (>= rune char-code-limit) - default - (or (code-char rune) default)) - (error "rune cannot be represented as a character: ~A" rune))) - -(defun rod-string (rod &optional (default-char *invalid-rune*)) - (map 'string (lambda (x) (rune-char x default-char)) rod)) - -(defun string-rod (string) - (let* ((n (length string)) - (res (make-rod n))) - (dotimes (i n) - (setf (%rune res i) (char-rune (char string i)))) - res)) - -;;;; - -(defun rune<= (rune &rest more-runes) - (apply #'<= rune more-runes)) - -(defun rune>= (rune &rest more-runes) - (apply #'>= rune more-runes)) - -(defun rodp (object) - (typep object 'rod)) - -(defun rod-subseq (source start &optional (end (length source))) - (unless (rodp source) - (error "~S is not of type ~S." source 'rod)) - (unless (and (typep start 'fixnum) (>= start 0)) - (error "~S is not a non-negative fixnum." start)) - (unless (and (typep end 'fixnum) (>= end start)) - (error "END argument, ~S, is not a fixnum no less than START, ~S." end start)) - (when (> start (length source)) - (error "START argument, ~S, should be no greater than length of rod." start)) - (when (> end (length source)) - (error "END argument, ~S, should be no greater than length of rod." end)) - (locally - (declare (type rod source) - (type fixnum start end)) - (let ((res (make-rod (- end start)))) - (declare (type rod res)) - (do ((i (- (- end start) 1) (the fixnum (- i 1)))) - ((< i 0) res) - (declare (type fixnum i)) - (setf (%rune res i) (%rune source (the fixnum (+ i start)))))))) - -(defun rod-subseq* (source start &optional (end (length source))) - (unless (and (typep start 'fixnum) (>= start 0)) - (error "~S is not a non-negative fixnum." start)) - (unless (and (typep end 'fixnum) (>= end start)) - (error "END argument, ~S, is not a fixnum no less than START, ~S." end start)) - (when (> start (length source)) - (error "START argument, ~S, should be no greater than length of rod." start)) - (when (> end (length source)) - (error "END argument, ~S, should be no greater than length of rod." end)) - (locally - (declare (type fixnum start end)) - (let ((res (make-rod (- end start)))) - (declare (type rod res)) - (do ((i (- (- end start) 1) (the fixnum (- i 1)))) - ((< i 0) res) - (declare (type fixnum i)) - (setf (%rune res i) (aref source (the fixnum (+ i start)))))))) - -(defun rod< (rod1 rod2) - (do ((i 0 (+ i 1))) - (nil) - (cond ((= i (length rod1)) - (return t)) - ((= i (length rod2)) - (return nil)) - ((< (aref rod1 i) (aref rod2 i)) - (return t)) - ((> (aref rod1 i) (aref rod2 i)) - (return nil))))) diff --git a/runes/stream-scl.lisp b/runes/stream-scl.lisp deleted file mode 100644 index 085e05f..0000000 --- a/runes/stream-scl.lisp +++ /dev/null @@ -1,253 +0,0 @@ -;;; -*- Mode: Lisp; Syntax: Common-Lisp; readtable: runes; Encoding: utf-8; -*- -;;; --------------------------------------------------------------------------- -;;; Title: Fast streams -;;; Created: 1999-07-17 -;;; Author: Douglas Crosher -;;; License: Lisp-LGPL (See file COPYING for details). -;;; --------------------------------------------------------------------------- -;;; (c) copyright 2007 by Douglas Crosher - -;;; This library is free software; you can redistribute it and/or -;;; modify it under the terms of the GNU Library General Public -;;; License as published by the Free Software Foundation; either -;;; version 2 of the License, or (at your option) any later version. -;;; -;;; This library is distributed in the hope that it will be useful, -;;; but WITHOUT ANY WARRANTY; without even the implied warranty of -;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -;;; Library General Public License for more details. -;;; -;;; You should have received a copy of the GNU Library General Public -;;; License along with this library; if not, write to the -;;; Free Software Foundation, Inc., 59 Temple Place - Suite 330, -;;; Boston, MA 02111-1307 USA. - -(in-package :runes) - -(eval-when (:compile-toplevel :load-toplevel :execute) - (defparameter *fast* '(optimize (speed 3) (safety 3)))) - -(deftype runes-encoding:encoding-error () - 'ext:character-conversion-error) - - -;;; xstream - -(eval-when (:compile-toplevel :load-toplevel :execute) - -(defclass xstream (ext:character-stream) - ((name :initarg :name :initform nil - :accessor xstream-name) - (column :initarg :column :initform 0) - (line :initarg :line :initform 1) - (unread-column :initarg :unread-column :initform 0))) - -(defclass eol-conversion-xstream (lisp::eol-conversion-input-stream xstream) - ()) - -) ; eval-when - -(defun make-eol-conversion-xstream (source-stream) - "Returns a character stream that conversion CR-LF pairs and lone CR - characters into single linefeed character." - (declare (type stream source-stream)) - (let ((stream (ext:make-eol-conversion-stream source-stream - :input t - :close-stream-p t))) - (change-class stream 'eol-conversion-xstream))) - -(definline xstream-p (stream) - (typep stream 'xstream)) - -(defun close-xstream (input) - (close input)) - -(definline read-rune (input) - (declare (type stream input) - (inline read-char) - #.*fast*) - (let ((char (read-char input nil :eof))) - (cond ((member char '(#\UFFFE #\UFFFF)) - ;; These characters are illegal within XML documents. - (simple-error 'ext:character-conversion-error - "~@" char)) - ((eql char #\linefeed) - (setf (slot-value input 'unread-column) (slot-value input 'column)) - (setf (slot-value input 'column) 0) - (incf (the kernel:index (slot-value input 'line)))) - (t - (incf (the kernel:index (slot-value input 'column))))) - char)) - -(definline peek-rune (input) - (declare (type stream input) - (inline peek-char) - #.*fast*) - (peek-char nil input nil :eof)) - -(definline consume-rune (input) - (declare (type stream input) - (inline read-rune) - #.*fast*) - (read-rune input) - nil) - -(definline unread-rune (rune input) - (declare (type stream input) - (inline unread-char) - #.*fast*) - (unread-char rune input) - (cond ((eql rune #\linefeed) - (setf (slot-value input 'column) (slot-value input 'unread-column)) - (setf (slot-value input 'unread-column) 0) - (decf (the kernel:index (slot-value input 'line)))) - (t - (decf (the kernel:index (slot-value input 'column))))) - nil) - -(defun fread-rune (input) - (read-rune input)) - -(defun fpeek-rune (input) - (peek-rune input)) - -(defun xstream-position (input) - (file-position input)) - -(defun runes-encoding:find-encoding (encoding) - encoding) - -(defun make-xstream (os-stream &key name - (speed 8192) - (initial-speed 1) - (initial-encoding :guess)) - (declare (ignore speed)) - (assert (eql initial-speed 1)) - (assert (eq initial-encoding :guess)) - (let* ((stream (ext:make-xml-character-conversion-stream os-stream - :input t - :close-stream-p t)) - (xstream (make-eol-conversion-xstream stream))) - (setf (xstream-name xstream) name) - xstream)) - - -(defclass xstream-string-input-stream (lisp::string-input-stream xstream) - ()) - -(defun make-rod-xstream (string &key name) - (declare (type string string)) - (let ((stream (make-string-input-stream string))) - (change-class stream 'xstream-string-input-stream :name name))) - -;;; already at 'full speed' so just return the buffer size. -(defun set-to-full-speed (stream) - (length (ext:stream-in-buffer stream))) - -(defun xstream-speed (stream) - (length (ext:stream-in-buffer stream))) - -(defun xstream-line-number (stream) - (slot-value stream 'line)) - -(defun xstream-column-number (stream) - (slot-value stream 'column)) - -(defun xstream-encoding (stream) - (stream-external-format stream)) - -;;; the encoding will have already been detected, but it is checked against the -;;; declared encoding here. -(defun (setf xstream-encoding) (declared-encoding stream) - (let* ((initial-encoding (xstream-encoding stream)) - (canonical-encoding - (cond ((and (eq initial-encoding :utf-16le) - (member declared-encoding '(:utf-16 :utf16 :utf-16le :utf16le) - :test 'string-equal)) - :utf-16le) - ((and (eq initial-encoding :utf-16be) - (member declared-encoding '(:utf-16 :utf16 :utf-16be :utf16be) - :test 'string-equal)) - :utf-16be) - ((and (eq initial-encoding :ucs-4be) - (member declared-encoding '(:ucs-4 :ucs4 :ucs-4be :ucs4be) - :test 'string-equal)) - :ucs4-be) - ((and (eq initial-encoding :ucs-4le) - (member declared-encoding '(:ucs-4 :ucs4 :ucs-4le :ucs4le) - :test 'string-equal)) - :ucs4-le) - (t - declared-encoding)))) - (unless (string-equal initial-encoding canonical-encoding) - (warn "Unable to change xstream encoding from ~S to ~S (~S)~%" - initial-encoding declared-encoding canonical-encoding)) - declared-encoding)) - - -;;; ystream - a run output stream. - -(deftype ystream () 'stream) - -(defun ystream-column (stream) - (ext:line-column stream)) - -(definline write-rune (rune stream) - (declare (inline write-char)) - (write-char rune stream)) - -(defun write-rod (rod stream) - (declare (type rod rod) - (type stream stream)) - (write-string rod stream)) - -(defun make-rod-ystream () - (make-string-output-stream)) - -(defun close-ystream (stream) - (etypecase stream - (ext:string-output-stream - (get-output-stream-string stream)) - (ext:character-conversion-output-stream - (let ((target (slot-value stream 'stream))) - (close stream) - (if (typep target 'ext:byte-output-stream) - (ext:get-output-stream-bytes target) - stream))))) - -;;;; CHARACTER-STREAM-YSTREAM - -(defun make-character-stream-ystream (target-stream) - target-stream) - - -;;;; OCTET-VECTOR-YSTREAM - -(defun make-octet-vector-ystream () - (let ((target (ext:make-byte-output-stream))) - (ext:make-character-conversion-stream target :output t - :external-format :utf-8 - :close-stream-p t))) - -;;;; OCTET-STREAM-YSTREAM - -(defun make-octet-stream-ystream (os-stream) - (ext:make-character-conversion-stream os-stream :output t - :external-format :utf-8 - :close-stream-p t)) - - -;;;; helper functions - -(defun rod-to-utf8-string (rod) - (ext:make-string-from-bytes (ext:make-bytes-from-string rod :utf8) - :iso-8859-1)) - -(defun utf8-string-to-rod (str) - (let ((bytes (map '(vector (unsigned-byte 8)) #'char-code str))) - (ext:make-string-from-bytes bytes :utf-8))) - -(defun make-octet-input-stream (octets) - (ext:make-byte-input-stream octets)) - - diff --git a/runes/syntax.lisp b/runes/syntax.lisp deleted file mode 100644 index 34ccfbe..0000000 --- a/runes/syntax.lisp +++ /dev/null @@ -1,181 +0,0 @@ -;;; -*- Mode: Lisp; Syntax: Common-Lisp; Package: RUNES; -*- -;;; --------------------------------------------------------------------------- -;;; Title: Unicode strings (called RODs) -;;; Created: 1999-05-25 22:29 -;;; Author: Gilbert Baumann -;;; License: Lisp-LGPL (See file COPYING for details). -;;; --------------------------------------------------------------------------- -;;; (c) copyright 1998,1999 by Gilbert Baumann - -;;; This code is free software; you can redistribute it and/or modify it -;;; under the terms of the version 2.1 of the GNU Lesser General Public -;;; License as published by the Free Software Foundation, as clarified -;;; by the "Preamble to the Gnu Lesser General Public License" found in -;;; the file COPYING. -;;; -;;; This code is distributed in the hope that it will be useful, -;;; but without any warranty; without even the implied warranty of -;;; merchantability or fitness for a particular purpose. See the GNU -;;; Lesser General Public License for more details. -;;; -;;; Version 2.1 of the GNU Lesser General Public License is in the file -;;; COPYING that was distributed with this file. If it is not present, -;;; you can access it from http://www.gnu.org/copyleft/lesser.txt (until -;;; superseded by a newer version) or write to the Free Software -;;; Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - -;; Changes -;; -;; When Who What -;; ---------------------------------------------------------------------------- -;; 1999-08-15 GB - ROD=, ROD-EQUAL -;; RUNE<=, RUNE>= -;; MAKE-ROD, ROD-SUBSEQ -;; CHAR-RUNE, RUNE-CHAR, ROD-STRING, STRING-ROD -;; new functions -;; - Added rune reader -;; - -(in-package :runes) - -;;;; -;;;; RUNE Reader -;;;; - -;; Portable implementation of WHITE-SPACE-P with regard to the current -;; read table -- this is bit tricky. - -(defun rt-white-space-p (char) - (let ((stream (make-string-input-stream (string char)))) - (eq :eof (peek-char t stream nil :eof)))) - -(defun read-rune-name (input) - ;; the first char is unconditionally read - (let ((char0 (read-char input t nil t))) - (when (char= char0 #\\) - (setf char0 (read-char input t nil t))) - (with-output-to-string (res) - (write-char char0 res) - (do ((ch (peek-char nil input nil :eof t) (peek-char nil input nil :eof t))) - ((or (eq ch :eof) - (rt-white-space-p ch) - (multiple-value-bind (function non-terminating-p) (get-macro-character ch) - (and function (not non-terminating-p))))) - (write-char ch res) - (read-char input))))) ;consume this character - -(defun iso-10646-char-code (char) - (char-code char)) - -(defvar *rune-names* (make-hash-table :test #'equal) - "Hashtable, which maps all known rune names to rune codes; - Names are stored in uppercase.") - -(defun define-rune-name (name code) - (setf (gethash (string-upcase name) *rune-names*) code) - name) - -(defun lookup-rune-name (name) - (gethash (string-upcase name) *rune-names*)) - -(define-rune-name "null" #x0000) -(define-rune-name "space" #x0020) -(define-rune-name "newline" #x000A) -(define-rune-name "return" #x000D) -(define-rune-name "tab" #x0009) -(define-rune-name "page" #x000C) - -;; and just for fun: -(define-rune-name "euro" #x20AC) - -;; ASCII control characters -(define-rune-name "nul" #x0000) ;null -(define-rune-name "soh" #x0001) ;start of header -(define-rune-name "stx" #x0002) ;start of text -(define-rune-name "etx" #x0003) ;end of text -(define-rune-name "eot" #x0004) ;end of transmission -(define-rune-name "enq" #x0005) ; -(define-rune-name "ack" #x0006) ;acknowledge -(define-rune-name "bel" #x0007) ;bell -(define-rune-name "bs" #x0008) ;backspace -(define-rune-name "ht" #x0009) ;horizontal tab -(define-rune-name "lf" #X000A) ;line feed, new line -(define-rune-name "vt" #X000B) ;vertical tab -(define-rune-name "ff" #x000C) ;form feed -(define-rune-name "cr" #x000D) ;carriage return -(define-rune-name "so" #x000E) ;shift out -(define-rune-name "si" #x000F) ;shift in -(define-rune-name "dle" #x0010) ;device latch enable ? -(define-rune-name "dc1" #x0011) ;device control 1 -(define-rune-name "dc2" #x0012) ;device control 2 -(define-rune-name "dc3" #x0013) ;device control 3 -(define-rune-name "dc4" #x0014) ;device control 4 -(define-rune-name "nak" #x0015) ;negative acknowledge -(define-rune-name "syn" #x0016) ; -(define-rune-name "etb" #x0017) ; -(define-rune-name "can" #x0018) ; -(define-rune-name "em" #x0019) ;end of message -(define-rune-name "sub" #x001A) ; -(define-rune-name "esc" #x001B) ;escape -(define-rune-name "fs" #x001C) ;field separator ? -(define-rune-name "gs" #x001D) ;group separator -(define-rune-name "rs" #x001E) ; -(define-rune-name "us" #x001F) ; - -(define-rune-name "del" #x007F) ;delete - -;; iso-latin -(define-rune-name "nbsp" #x00A0) ;non breakable space -(define-rune-name "shy" #x00AD) ;soft hyphen - -(defun rune-from-read-name (name) - (code-rune - (cond ((= (length name) 1) - (iso-10646-char-code (char name 0))) - ((and (= (length name) 2) - (char= (char name 0) #\\)) - (iso-10646-char-code (char name 1))) - ((and (>= (length name) 3) - (char-equal (char name 0) #\u) - (char-equal (char name 1) #\+) - (every (lambda (x) (digit-char-p x 16)) (subseq name 2))) - (parse-integer name :start 2 :radix 16)) - ((lookup-rune-name name)) - (t - (error "Meaningless rune name ~S." name))))) - -(defun rune-reader (stream subchar arg) - subchar arg - (values (rune-from-read-name (read-rune-name stream)))) - -(set-dispatch-macro-character #\# #\/ 'rune-reader) - -;;; ROD ext syntax - -(defun rod-reader (stream subchar arg) - (declare (ignore arg)) - (rod - (with-output-to-string (bag) - (do ((c (read-char stream t nil t) - (read-char stream t nil t))) - ((char= c subchar)) - (cond ((char= c #\\) - (setf c (read-char stream t nil t)))) - (princ c bag))))) - -#-rune-is-character -(defun rod-printer (stream rod) - (princ #\# stream) - (princ #\" stream) - (loop for x across rod do - (cond ((or (rune= x #.(char-rune #\\)) - (rune= x #.(char-rune #\"))) - (princ #\\ stream) - (princ (code-char x) stream)) - ((< x char-code-limit) - (princ (code-char x) stream)) - (t - (format stream "\\u~4,'0X" x)))) - (princ #\" stream)) - -(set-dispatch-macro-character #\# #\" 'rod-reader) diff --git a/runes/utf8.lisp b/runes/utf8.lisp deleted file mode 100644 index 5aef7f1..0000000 --- a/runes/utf8.lisp +++ /dev/null @@ -1,36 +0,0 @@ -;;; copyright (c) 2005 David Lichteblau -;;; License: Lisp-LGPL (See file COPYING for details). -;;; -;;; Rune emulation for the UTF-8-compatible DOM implementation. -;;; Used only with 8 bit characters on non-unicode Lisps. - -(in-package :utf8-runes) - -(deftype rune () 'character) -(deftype rod () '(vector rune)) -(deftype simple-rod () '(simple-array rune)) - -(defun rod= (r s) - (string= r s)) - -(defun rod-string (rod &optional default) - (declare (ignore default)) - rod) - -(defun string-rod (string) - string) - -(defun make-rod (size) - (make-string size :element-type 'rune)) - -(defun rune-reader (stream subchar arg) - (runes::rune-char (runes::rune-reader stream subchar arg))) - -(defun rod-reader (stream subchar arg) - (runes::rod-string (runes::rod-reader stream subchar arg))) - -(setf runes-system:*utf8-runes-readtable* - (let ((rt (copy-readtable))) - (set-dispatch-macro-character #\# #\/ 'rune-reader rt) - (set-dispatch-macro-character #\# #\" 'rod-reader rt) - rt)) diff --git a/runes/xstream.lisp b/runes/xstream.lisp deleted file mode 100644 index 1479d7e..0000000 --- a/runes/xstream.lisp +++ /dev/null @@ -1,409 +0,0 @@ -;;; -*- Mode: Lisp; Syntax: Common-Lisp; readtable: runes; Encoding: utf-8; -*- -;;; --------------------------------------------------------------------------- -;;; Title: Fast streams -;;; Created: 1999-07-17 -;;; Author: Gilbert Baumann -;;; License: Lisp-LGPL (See file COPYING for details). -;;; --------------------------------------------------------------------------- -;;; (c) copyright 1999 by Gilbert Baumann - -;;; This library is free software; you can redistribute it and/or -;;; modify it under the terms of the GNU Library General Public -;;; License as published by the Free Software Foundation; either -;;; version 2 of the License, or (at your option) any later version. -;;; -;;; This library is distributed in the hope that it will be useful, -;;; but WITHOUT ANY WARRANTY; without even the implied warranty of -;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -;;; Library General Public License for more details. -;;; -;;; You should have received a copy of the GNU Library General Public -;;; License along with this library; if not, write to the -;;; Free Software Foundation, Inc., 59 Temple Place - Suite 330, -;;; Boston, MA 02111-1307 USA. - -(in-package :runes) - -;;; API -;; -;; MAKE-XSTREAM cl-stream &key name! speed initial-speed initial-encoding -;; [function] -;; MAKE-ROD-XSTREAM rod &key name [function] -;; CLOSE-XSTREAM xstream [function] -;; XSTREAM-P object [function] -;; -;; READ-RUNE xstream [macro] -;; PEEK-RUNE xstream [macro] -;; FREAD-RUNE xstream [function] -;; FPEEK-RUNE xstream [function] -;; CONSUME-RUNE xstream [macro] -;; UNREAD-RUNE rune xstream [function] -;; -;; XSTREAM-NAME xstream [accessor] -;; XSTREAM-POSITION xstream [function] -;; XSTREAM-LINE-NUMBER xstream [function] -;; XSTREAM-COLUMN-NUMBER xstream [function] -;; XSTREAM-PLIST xstream [accessor] -;; XSTREAM-ENCODING xstream [accessor] <-- be careful here. [*] -;; SET-TO-FULL-SPEED xstream [function] - -;; [*] switching the encoding on the fly is only possible when the -;; stream's buffer is empty; therefore to be able to switch the -;; encoding, while some runes are already read, set the stream's speed -;; to 1 initially (via the initial-speed argument for MAKE-XSTREAM) -;; and later set it to full speed. (The encoding of the runes -;; sequence, you fetch off with READ-RUNE is always UTF-16 though). -;; After switching the encoding, SET-TO-FULL-SPEED can be used to bump the -;; speed up to a full buffer length. - -;; An encoding is simply something, which provides the DECODE-SEQUENCE -;; method. - -;;; Controller protocol -;; -;; READ-OCTECTS sequence os-stream start end -> first-non-written -;; XSTREAM/CLOSE os-stream -;; - -(eval-when (:compile-toplevel :load-toplevel :execute) - (defparameter *fast* '(optimize (speed 3) (safety 0)))) - -;; Let us first define fast fixnum arithmetric get rid of type -;; checks. (After all we know what we do here). - -(defmacro fx-op (op &rest xs) - `(the fixnum (,op ,@(mapcar (lambda (x) `(the fixnum ,x)) xs)))) -(defmacro fx-pred (op &rest xs) - `(,op ,@(mapcar (lambda (x) `(the fixnum ,x)) xs))) - -(defmacro %+ (&rest xs) `(fx-op + ,@xs)) -(defmacro %= (&rest xs) `(fx-pred = ,@xs)) - -(deftype buffer-index () - `(unsigned-byte ,(integer-length array-total-size-limit))) - -(deftype buffer-byte () - `(unsigned-byte 16)) - -(deftype octet () - `(unsigned-byte 8)) - -;; The usage of a special marker for EOF is experimental and -;; considered unhygenic. - -(defconstant +end+ #xFFFF - "Special marker inserted into stream buffers to indicate end of buffered data.") - -(defvar +null-buffer+ (make-array 0 :element-type 'buffer-byte)) -(defvar +null-octet-buffer+ (make-array 0 :element-type 'octet)) - -(defstruct (xstream - (:constructor make-xstream/low) - (:copier nil) - (:print-function print-xstream)) - - ;;; Read buffer - - ;; the buffer itself - (buffer +null-buffer+ - :type (simple-array buffer-byte (*))) - ;; points to the next element of `buffer' containing the next rune - ;; about to be read. - (read-ptr 0 :type buffer-index) - ;; points to the first element of `buffer' not containing a rune to - ;; be read. - (fill-ptr 0 :type buffer-index) - - ;;; OS buffer - - ;; a scratch pad for READ-SEQUENCE - (os-buffer +null-octet-buffer+ - :type (simple-array octet (*))) - - ;; `os-left-start', `os-left-end' designate a region of os-buffer, - ;; which still contains some undecoded data. This is needed because - ;; of the DECODE-SEQUENCE protocol - (os-left-start 0 :type buffer-index) - (os-left-end 0 :type buffer-index) - - ;; How much to read each time - (speed 0 :type buffer-index) - (full-speed 0 :type buffer-index) - - ;; Some stream object obeying to a certain protcol - os-stream - - ;; The external format - ;; (some object offering the ENCODING protocol) - (encoding :utf-8) - - ;;A STREAM-NAME object - (name nil) - - ;; a plist a struct keeps the hack away - (plist nil) - - ;; Stream Position - (line-number 1 :type integer) ;current line number - (line-start 0 :type integer) ;stream position the current line starts at - (buffer-start 0 :type integer) ;stream position the current buffer starts at - - ;; There is no need to maintain a column counter for each character - ;; read, since we can easily compute it from `line-start' and - ;; `buffer-start'. - ) - -(defun print-xstream (self sink depth) - (declare (ignore depth)) - (format sink "#<~S ~S>" (type-of self) (xstream-name self))) - -(defmacro read-rune (input) - "Read a single rune off the xstream `input'. In case of end of file :EOF - is returned." - `((lambda (input) - (declare (type xstream input) - #.*fast*) - (let ((rp (xstream-read-ptr input))) - (declare (type buffer-index rp)) - (let ((ch (aref (the (simple-array buffer-byte (*)) (xstream-buffer input)) - rp))) - (declare (type buffer-byte ch)) - (setf (xstream-read-ptr input) (%+ rp 1)) - (cond ((%= ch +end+) - (the (or (member :eof) rune) - (xstream-underflow input))) - ((%= ch #x000A) ;line break - (account-for-line-break input) - (code-rune ch)) - (t - (code-rune ch)))))) - ,input)) - -(defmacro peek-rune (input) - "Peek a single rune off the xstream `input'. In case of end of file :EOF - is returned." - `((lambda (input) - (declare (type xstream input) - #.*fast*) - (let ((rp (xstream-read-ptr input))) - (declare (type buffer-index rp)) - (let ((ch (aref (the (simple-array buffer-byte (*)) (xstream-buffer input)) - rp))) - (declare (type buffer-byte ch)) - (cond ((%= ch +end+) - (prog1 - (the (or (member :eof) rune) (xstream-underflow input)) - (setf (xstream-read-ptr input) 0))) - (t - (code-rune ch)))))) - ,input)) - -(defmacro consume-rune (input) - "Like READ-RUNE, but does not actually return the read rune." - `((lambda (input) - (declare (type xstream input) - #.*fast*) - (let ((rp (xstream-read-ptr input))) - (declare (type buffer-index rp)) - (let ((ch (aref (the (simple-array buffer-byte (*)) (xstream-buffer input)) - rp))) - (declare (type buffer-byte ch)) - (setf (xstream-read-ptr input) (%+ rp 1)) - (when (%= ch +end+) - (xstream-underflow input)) - (when (%= ch #x000A) ;line break - (account-for-line-break input) ))) - nil) - ,input)) - -(definline unread-rune (rune input) - "Unread the last recently read rune; if there wasn't such a rune, you - deserve to lose." - (declare (ignore rune)) - (decf (xstream-read-ptr input)) - (when (rune= (peek-rune input) #/u+000A) ;was it a line break? - (unaccount-for-line-break input))) - -(defun fread-rune (input) - (read-rune input)) - -(defun fpeek-rune (input) - (peek-rune input)) - -;;; Line counting - -(defun account-for-line-break (input) - (declare (type xstream input)) - (incf (xstream-line-number input)) - (setf (xstream-line-start input) - (+ (xstream-buffer-start input) (xstream-read-ptr input)))) - -(defun unaccount-for-line-break (input) - ;; incomplete! - ;; We better use a traditional lookahead technique or forbid unread-rune. - (decf (xstream-line-number input))) - -;; User API: - -(defun xstream-position (input) - (+ (xstream-buffer-start input) (xstream-read-ptr input))) - -;; xstream-line-number is structure accessor - -(defun xstream-column-number (input) - (+ (- (xstream-position input) - (xstream-line-start input)) - 1)) - -;;; Underflow - -(defconstant +default-buffer-size+ 100) - -(defmethod xstream-underflow ((input xstream)) - (declare (type xstream input)) - ;; we are about to fill new data into the buffer, so we need to - ;; adjust buffer-start. - (incf (xstream-buffer-start input) - (- (xstream-fill-ptr input) 0)) - (let (n m) - ;; when there is something left in the os-buffer, we move it to - ;; the start of the buffer. - (setf m (- (xstream-os-left-end input) (xstream-os-left-start input))) - (unless (zerop m) - (replace (xstream-os-buffer input) (xstream-os-buffer input) - :start1 0 :end1 m - :start2 (xstream-os-left-start input) - :end2 (xstream-os-left-end input)) - ;; then we take care that the buffer is large enough to carry at - ;; least 100 bytes (a random number) - ;; - ;; David: My understanding is that any number of octets large enough - ;; to record the longest UTF-8 sequence or UTF-16 sequence is okay, - ;; so 100 is plenty for this purpose. - (unless (>= (length (xstream-os-buffer input)) - +default-buffer-size+) - (error "You lost"))) - (setf n - (read-octets (xstream-os-buffer input) (xstream-os-stream input) - m (min (1- (length (xstream-os-buffer input))) - (+ m (xstream-speed input))))) - (cond ((%= n 0) - (setf (xstream-read-ptr input) 0 - (xstream-fill-ptr input) n) - (setf (aref (xstream-buffer input) (xstream-fill-ptr input)) +end+) - :eof) - (t - (multiple-value-bind (fnw fnr) - (runes-encoding:decode-sequence - (xstream-encoding input) - (xstream-os-buffer input) 0 n - (xstream-buffer input) 0 (1- (length (xstream-buffer input))) - (= n m)) - (setf (xstream-os-left-start input) fnr - (xstream-os-left-end input) n - (xstream-read-ptr input) 0 - (xstream-fill-ptr input) fnw) - (setf (aref (xstream-buffer input) (xstream-fill-ptr input)) +end+) - (read-rune input)))))) - -;;; constructor - -(defun make-xstream (os-stream &key name - (speed 8192) - (initial-speed 1) - (initial-encoding :guess)) - ;; XXX if initial-speed isn't 1, encoding will me munged up - (assert (eql initial-speed 1)) - (multiple-value-bind (encoding preread) - (if (eq initial-encoding :guess) - (figure-encoding os-stream) - (values initial-encoding nil)) - (let* ((bufsize (max speed +default-buffer-size+)) - (osbuf (make-array bufsize :element-type '(unsigned-byte 8)))) - (replace osbuf preread) - (make-xstream/low - :buffer (let ((r (make-array bufsize :element-type 'buffer-byte))) - (setf (elt r 0) #xFFFF) - r) - :read-ptr 0 - :fill-ptr 0 - :os-buffer osbuf - :speed initial-speed - :full-speed speed - :os-stream os-stream - :os-left-start 0 - :os-left-end (length preread) - :encoding encoding - :name name)))) - -(defun make-rod-xstream (string &key name) - ;; XXX encoding is mis-handled by this kind of stream - (let ((n (length string))) - (let ((buffer (make-array (1+ n) :element-type 'buffer-byte))) - (declare (type (simple-array buffer-byte (*)) buffer)) - ;; copy the rod - (do ((i (1- n) (- i 1))) - ((< i 0)) - (declare (type fixnum i)) - (setf (aref buffer i) (rune-code (%rune string i)))) - (setf (aref buffer n) +end+) - ;; - (make-xstream/low :buffer buffer - :read-ptr 0 - :fill-ptr n - ;; :os-buffer nil - :speed 1 - :os-stream nil - :name name)))) - -(defmethod figure-encoding ((stream null)) - (values :utf-8 nil)) - -(defmethod figure-encoding ((stream stream)) - (let ((c0 (read-byte stream nil :eof))) - (cond ((eq c0 :eof) - (values :utf-8 nil)) - (t - (let ((c1 (read-byte stream nil :eof))) - (cond ((eq c1 :eof) - (values :utf-8 (list c0))) - (t - (cond ((and (= c0 #xFE) (= c1 #xFF)) (values :utf-16-big-endian nil)) - ((and (= c0 #xFF) (= c1 #xFE)) (values :utf-16-little-endian nil)) - (t - (values :utf-8 (list c0 c1))))))))))) - -;;; misc - -(defun close-xstream (input) - (xstream/close (xstream-os-stream input))) - -(defun set-to-full-speed (xstream) - (setf (xstream-speed xstream) (xstream-full-speed xstream))) - -;;; controller implementations - -(defmethod read-octets (sequence (stream stream) start end) - (#+CLISP ext:read-byte-sequence - #-CLISP read-sequence - sequence stream :start start :end end)) - -#+cmu -(defmethod read-octets :around (sequence (stream stream) start end) - ;; CMUCL <= 19a on non-SunOS accidentally triggers EFAULT in read(2) - ;; if SEQUENCE has been write protected by GC. Workaround: Touch all pages - ;; in SEQUENCE and make sure no GC happens between that and the read(2). - (ext::without-gcing - (loop for i from start below end - do (setf (elt sequence i) (elt sequence i))) - (call-next-method))) - -(defmethod read-octets (sequence (stream null) start end) - (declare (ignore sequence start end)) - 0) - -(defmethod xstream/close ((stream stream)) - (close stream)) - -(defmethod xstream/close ((stream null)) - nil) diff --git a/runes/ystream.lisp b/runes/ystream.lisp deleted file mode 100644 index b98d0e8..0000000 --- a/runes/ystream.lisp +++ /dev/null @@ -1,297 +0,0 @@ -;;; (c) 2005 David Lichteblau -;;; License: Lisp-LGPL (See file COPYING for details). -;;; -;;; ystream (for lack of a better name): a rune output "stream" - -(in-package :runes) - -(defconstant +ystream-bufsize+ 1024) - -(defun make-ub8-array (n) - (make-array n :element-type '(unsigned-byte 8))) - -(defun make-ub16-array (n) - (make-array n :element-type '(unsigned-byte 16))) - -(defun make-buffer (&key (element-type '(unsigned-byte 8))) - (make-array 1 - :element-type element-type - :adjustable t - :fill-pointer 0)) - -(defmacro while (test &body body) - `(until (not ,test) ,@body)) - -(defmacro until (test &body body) - `(do () (,test) ,@body)) - -;;; ystream -;;; +- utf8-ystream -;;; | +- octet-vector-ystream -;;; | \- %stream-ystream -;;; | +- octet-stream-ystream -;;; | \- character-stream-ystream/utf8 -;;; | \- string-ystream/utf8 -;;; +- rod-ystream -;;; \-- character-stream-ystream - -(defstruct ystream - (column 0 :type integer) - (in-ptr 0 :type fixnum) - (in-buffer (make-rod +ystream-bufsize+) :type simple-rod)) - -(defstruct (utf8-ystream - (:include ystream) - (:conc-name "YSTREAM-")) - (out-buffer (make-ub8-array (* 6 +ystream-bufsize+)) - :type (simple-array (unsigned-byte 8) (*)))) - -(defstruct (%stream-ystream (:include utf8-ystream) (:conc-name "YSTREAM-")) - (os-stream nil)) - -(definline write-rune (rune ystream) - (let ((in (ystream-in-buffer ystream))) - (when (eql (ystream-in-ptr ystream) (length in)) - (flush-ystream ystream) - (setf in (ystream-in-buffer ystream))) - (setf (elt in (ystream-in-ptr ystream)) rune) - (incf (ystream-in-ptr ystream)) - (setf (ystream-column ystream) - (if (eql rune #/U+0010) 0 (1+ (ystream-column ystream)))) - rune)) - -(defmethod close-ystream :before ((ystream ystream)) - (flush-ystream ystream)) - - -;;;; UTF8-YSTREAM (abstract) - -(defmethod close-ystream ((ystream %stream-ystream)) - (ystream-os-stream ystream)) - -(defgeneric ystream-device-write (ystream buf nbytes)) - -(defmethod flush-ystream ((ystream utf8-ystream)) - (let ((ptr (ystream-in-ptr ystream))) - (when (plusp ptr) - (let* ((in (ystream-in-buffer ystream)) - (out (ystream-out-buffer ystream)) - (surrogatep (<= #xD800 (rune-code (elt in (1- ptr))) #xDBFF)) - n) - (when surrogatep - (decf ptr)) - (when (plusp ptr) - (setf n (runes-to-utf8 out in ptr)) - (ystream-device-write ystream out n) - (cond - (surrogatep - (setf (elt in 0) (elt in (1- ptr))) - (setf (ystream-in-ptr ystream) 1)) - (t - (setf (ystream-in-ptr ystream) 0)))))))) - -(defun write-rod (rod sink) - (loop for rune across rod do (write-rune rune sink))) - -(defun fast-push (new-element vector) - (vector-push-extend new-element vector (max 1 (array-dimension vector 0)))) - -(macrolet ((define-utf8-writer (name (byte &rest aux) result &body body) - `(defun ,name (out in n) - (let ((high-surrogate nil) - ,@aux) - (labels - ((write0 (,byte) - ,@body) - (write1 (r) - (cond - ((<= #x00000000 r #x0000007F) - (write0 r)) - ((<= #x00000080 r #x000007FF) - (write0 (logior #b11000000 (ldb (byte 5 6) r))) - (write0 (logior #b10000000 (ldb (byte 6 0) r)))) - ((<= #x00000800 r #x0000FFFF) - (write0 (logior #b11100000 (ldb (byte 4 12) r))) - (write0 (logior #b10000000 (ldb (byte 6 6) r))) - (write0 (logior #b10000000 (ldb (byte 6 0) r)))) - ((<= #x00010000 r #x001FFFFF) - (write0 (logior #b11110000 (ldb (byte 3 18) r))) - (write0 (logior #b10000000 (ldb (byte 6 12) r))) - (write0 (logior #b10000000 (ldb (byte 6 6) r))) - (write0 (logior #b10000000 (ldb (byte 6 0) r)))) - ((<= #x00200000 r #x03FFFFFF) - (write0 (logior #b11111000 (ldb (byte 2 24) r))) - (write0 (logior #b10000000 (ldb (byte 6 18) r))) - (write0 (logior #b10000000 (ldb (byte 6 12) r))) - (write0 (logior #b10000000 (ldb (byte 6 6) r))) - (write0 (logior #b10000000 (ldb (byte 6 0) r)))) - ((<= #x04000000 r #x7FFFFFFF) - (write0 (logior #b11111100 (ldb (byte 1 30) r))) - (write0 (logior #b10000000 (ldb (byte 6 24) r))) - (write0 (logior #b10000000 (ldb (byte 6 18) r))) - (write0 (logior #b10000000 (ldb (byte 6 12) r))) - (write0 (logior #b10000000 (ldb (byte 6 6) r))) - (write0 (logior #b10000000 (ldb (byte 6 0) r)))))) - (write2 (r) - (cond - ((<= #xD800 r #xDBFF) - (setf high-surrogate r)) - ((<= #xDC00 r #xDFFF) - (let ((q (logior (ash (- high-surrogate #xD7C0) 10) - (- r #xDC00)))) - (write1 q)) - (setf high-surrogate nil)) - (t - (write1 r))))) - (dotimes (j n) - (write2 (rune-code (elt in j))))) - ,result)))) - (define-utf8-writer runes-to-utf8 (x (i 0)) - i - (setf (elt out i) x) - (incf i)) - (define-utf8-writer runes-to-utf8/adjustable-string (x) - nil - (fast-push (code-char x) out))) - - -;;;; ROD-YSTREAM - -(defstruct (rod-ystream (:include ystream))) - -(defmethod flush-ystream ((ystream rod-ystream)) - (let* ((old (ystream-in-buffer ystream)) - (new (make-rod (* 2 (length old))))) - (replace new old) - (setf (ystream-in-buffer ystream) new))) - -(defmethod close-ystream ((ystream rod-ystream)) - (subseq (ystream-in-buffer ystream) 0 (ystream-in-ptr ystream))) - - -;;;; CHARACTER-STREAM-YSTREAM - -#+rune-is-character -(progn - (defstruct (character-stream-ystream - (:constructor make-character-stream-ystream (target-stream)) - (:include ystream) - (:conc-name "YSTREAM-")) - (target-stream nil)) - - (defmethod flush-ystream ((ystream character-stream-ystream)) - (write-string (ystream-in-buffer ystream) - (ystream-target-stream ystream) - :end (ystream-in-ptr ystream)) - (setf (ystream-in-ptr ystream) 0)) - - (defmethod close-ystream ((ystream character-stream-ystream)) - (ystream-target-stream ystream))) - - -;;;; OCTET-VECTOR-YSTREAM - -(defstruct (octet-vector-ystream - (:include utf8-ystream) - (:conc-name "YSTREAM-")) - (result (make-buffer))) - -(defmethod ystream-device-write ((ystream octet-vector-ystream) buf nbytes) - (let* ((result (ystream-result ystream)) - (start (length result)) - (size (array-dimension result 0))) - (while (> (+ start nbytes) size) - (setf size (* 2 size))) - (adjust-array result size :fill-pointer (+ start nbytes)) - (replace result buf :start1 start :end2 nbytes))) - -(defmethod close-ystream ((ystream octet-vector-ystream)) - (ystream-result ystream)) - - -;;;; OCTET-STREAM-YSTREAM - -(defstruct (octet-stream-ystream - (:include %stream-ystream) - (:constructor make-octet-stream-ystream (os-stream)) - (:conc-name "YSTREAM-"))) - -(defmethod ystream-device-write ((ystream octet-stream-ystream) buf nbytes) - (write-sequence buf (ystream-os-stream ystream) :end nbytes)) - - -;;;; CHARACTER-STREAM-YSTREAM/UTF8 - -;; #+rune-is-integer -(progn - (defstruct (character-stream-ystream/utf8 - (:constructor make-character-stream-ystream/utf8 (os-stream)) - (:include %stream-ystream) - (:conc-name "YSTREAM-"))) - - (defmethod ystream-device-write - ((ystream character-stream-ystream/utf8) buf nbytes) - (declare (type (simple-array (unsigned-byte 8) (*)) buf)) - (let ((out (ystream-os-stream ystream))) - (dotimes (x nbytes) - (write-char (code-char (elt buf x)) out))))) - - -;;;; STRING-YSTREAM/UTF8 - -;; #+rune-is-integer -(progn - (defstruct (string-ystream/utf8 - (:include character-stream-ystream/utf8 - (os-stream (make-string-output-stream))) - (:conc-name "YSTREAM-"))) - - (defmethod close-ystream ((ystream string-ystream/utf8)) - (get-output-stream-string (ystream-os-stream ystream)))) - - -;;;; helper functions - -(defun rod-to-utf8-string (rod) - (let ((out (make-buffer :element-type 'character))) - (runes-to-utf8/adjustable-string out rod (length rod)) - out)) - -(defun utf8-string-to-rod (str) - (let* ((bytes (map '(vector (unsigned-byte 8)) #'char-code str)) - (buffer (make-array (length bytes) :element-type '(unsigned-byte 16))) - (n (runes-encoding:decode-sequence - :utf-8 bytes 0 (length bytes) buffer 0 0 nil)) - (result (make-array n :element-type 'rune))) - (map-into result #'code-rune buffer) - result)) - -(defclass octet-input-stream - (trivial-gray-stream-mixin fundamental-binary-input-stream) - ((octets :initarg :octets) - (pos :initform 0))) - -(defmethod close ((stream octet-input-stream) &key abort) - (declare (ignore abort)) - (open-stream-p stream)) - -(defmethod stream-read-byte ((stream octet-input-stream)) - (with-slots (octets pos) stream - (if (>= pos (length octets)) - :eof - (prog1 - (elt octets pos) - (incf pos))))) - -(defmethod stream-read-sequence - ((stream octet-input-stream) sequence start end &key &allow-other-keys) - (with-slots (octets pos) stream - (let* ((length (min (- end start) (- (length octets) pos))) - (end1 (+ start length)) - (end2 (+ pos length))) - (replace sequence octets :start1 start :end1 end1 :start2 pos :end2 end2) - (setf pos end2) - end1))) - -(defun make-octet-input-stream (octets) - (make-instance 'octet-input-stream :octets octets))