Moved runes/ into its own cvs module under its new name, closure-common.
This commit is contained in:
14
cxml.asd
14
cxml.asd
@ -2,8 +2,13 @@
|
||||
(:use :asdf :cl))
|
||||
(in-package :cxml-system)
|
||||
|
||||
;; force loading of runes.asd, which installs *features* this file depends on
|
||||
(find-system :runes)
|
||||
;; force loading of closure-common.asd, which installs *FEATURES* this
|
||||
;; file depends on. Use MISSING-DEPENDENCY for asdf-install.
|
||||
(unless (find-system :closure-common nil)
|
||||
(error 'missing-dependency
|
||||
:required-by :cxml
|
||||
:version nil
|
||||
:requires :closure-common))
|
||||
|
||||
(defclass closure-source-file (cl-source-file) ())
|
||||
|
||||
@ -36,7 +41,7 @@
|
||||
(:file "space-normalizer" :depends-on ("xml-parse"))
|
||||
(:file "catalog" :depends-on ("xml-parse"))
|
||||
(:file "sax-proxy" :depends-on ("xml-parse")))
|
||||
:depends-on (:runes :puri #-scl :trivial-gray-streams))
|
||||
:depends-on (:closure-common :puri #-scl :trivial-gray-streams))
|
||||
|
||||
(defclass utf8dom-file (closure-source-file) ((of)))
|
||||
|
||||
@ -57,7 +62,8 @@
|
||||
(defmethod perform ((operation compile-op) (c utf8dom-file))
|
||||
(let ((*features* (cons 'utf8dom-file *features*))
|
||||
(*readtable*
|
||||
(symbol-value (find-symbol "*UTF8-RUNES-READTABLE*" :runes-system))))
|
||||
(symbol-value (find-symbol "*UTF8-RUNES-READTABLE*"
|
||||
:closure-common-system))))
|
||||
(call-next-method)))
|
||||
|
||||
(asdf:defsystem :cxml-dom
|
||||
|
||||
@ -1,148 +0,0 @@
|
||||
;;; copyright (c) 2004 knowledgeTools Int. GmbH
|
||||
;;; Author of this version: David Lichteblau <david@knowledgetools.de>
|
||||
;;;
|
||||
;;; derived from runes.lisp, (c) copyright 1998,1999 by Gilbert Baumann
|
||||
;;;
|
||||
;;; License: Lisp-LGPL (See file COPYING for details).
|
||||
;;;
|
||||
;;; This code is free software; you can redistribute it and/or modify it
|
||||
;;; under the terms of the version 2.1 of the GNU Lesser General Public
|
||||
;;; License as published by the Free Software Foundation, as clarified
|
||||
;;; by the "Preamble to the Gnu Lesser General Public License" found in
|
||||
;;; the file COPYING.
|
||||
;;;
|
||||
;;; This code is distributed in the hope that it will be useful,
|
||||
;;; but without any warranty; without even the implied warranty of
|
||||
;;; merchantability or fitness for a particular purpose. See the GNU
|
||||
;;; Lesser General Public License for more details.
|
||||
;;;
|
||||
;;; Version 2.1 of the GNU Lesser General Public License is in the file
|
||||
;;; COPYING that was distributed with this file. If it is not present,
|
||||
;;; you can access it from http://www.gnu.org/copyleft/lesser.txt (until
|
||||
;;; superseded by a newer version) or write to the Free Software
|
||||
;;; Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
|
||||
(in-package :runes)
|
||||
|
||||
(deftype rune () #-lispworks 'character #+lispworks 'lw:simple-char)
|
||||
(deftype rod () '(vector rune))
|
||||
(deftype simple-rod () '(simple-array rune))
|
||||
|
||||
(definline rune (rod index)
|
||||
(char rod index))
|
||||
|
||||
(defun (setf rune) (new rod index)
|
||||
(setf (char rod index) new))
|
||||
|
||||
(definline %rune (rod index)
|
||||
(aref (the simple-string rod) (the fixnum index)))
|
||||
|
||||
(definline (setf %rune) (new rod index)
|
||||
(setf (aref (the simple-string rod) (the fixnum index)) new))
|
||||
|
||||
(defun rod-capitalize (rod)
|
||||
(string-upcase rod))
|
||||
|
||||
(definline code-rune (x) (code-char x))
|
||||
(definline rune-code (x) (char-code x))
|
||||
|
||||
(definline rune= (x y)
|
||||
(char= x y))
|
||||
|
||||
(defun rune-downcase (rune)
|
||||
(char-downcase rune))
|
||||
|
||||
(definline rune-upcase (rune)
|
||||
(char-upcase rune))
|
||||
|
||||
(defun rune-upper-case-letter-p (rune)
|
||||
(upper-case-p rune))
|
||||
|
||||
(defun rune-lower-case-letter-p (rune)
|
||||
(lower-case-p rune))
|
||||
|
||||
(defun rune-equal (x y)
|
||||
(char-equal x y))
|
||||
|
||||
(defun rod-downcase (rod)
|
||||
(string-downcase rod))
|
||||
|
||||
(defun rod-upcase (rod)
|
||||
(string-upcase rod))
|
||||
|
||||
(definline white-space-rune-p (char)
|
||||
(or (char= char #\tab)
|
||||
(char= char #.(code-char 10)) ;Linefeed
|
||||
(char= char #.(code-char 13)) ;Carriage Return
|
||||
(char= char #\space)))
|
||||
|
||||
(definline digit-rune-p (char &optional (radix 10))
|
||||
(digit-char-p char radix))
|
||||
|
||||
(defun rod (x)
|
||||
(cond
|
||||
((stringp x) x)
|
||||
((symbolp x) (string x))
|
||||
((characterp x) (string x))
|
||||
((vectorp x) (coerce x 'string))
|
||||
((integerp x) (string (code-char x)))
|
||||
(t (error "Cannot convert ~S to a ~S" x 'rod))))
|
||||
|
||||
(defun runep (x)
|
||||
(characterp x))
|
||||
|
||||
(defun sloopy-rod-p (x)
|
||||
(stringp x))
|
||||
|
||||
(defun rod= (x y)
|
||||
(if (zerop (length x))
|
||||
(zerop (length y))
|
||||
(and (plusp (length y)) (string= x y))))
|
||||
|
||||
(defun rod-equal (x y)
|
||||
(string-equal x y))
|
||||
|
||||
(definline make-rod (size)
|
||||
(make-string size :element-type 'rune))
|
||||
|
||||
(defun char-rune (char)
|
||||
char)
|
||||
|
||||
(defun rune-char (rune &optional default)
|
||||
(declare (ignore default))
|
||||
rune)
|
||||
|
||||
(defun rod-string (rod &optional (default-char #\?))
|
||||
(declare (ignore default-char))
|
||||
rod)
|
||||
|
||||
(defun string-rod (string)
|
||||
string)
|
||||
|
||||
;;;;
|
||||
|
||||
(defun rune<= (rune &rest more-runes)
|
||||
(loop
|
||||
for (a b) on (cons rune more-runes)
|
||||
while b
|
||||
always (char<= a b)))
|
||||
|
||||
(defun rune>= (rune &rest more-runes)
|
||||
(loop
|
||||
for (a b) on (cons rune more-runes)
|
||||
while b
|
||||
always (char>= a b)))
|
||||
|
||||
(defun rodp (object)
|
||||
(stringp object))
|
||||
|
||||
(defun rod-subseq (source start &optional (end (length source)))
|
||||
(unless (stringp source)
|
||||
(error "~S is not of type ~S." source 'rod))
|
||||
(subseq source start end))
|
||||
|
||||
(defun rod-subseq* (source start &optional (end (length source)))
|
||||
(rod-subseq source start end))
|
||||
|
||||
(defun rod< (rod1 rod2)
|
||||
(string< rod1 rod2))
|
||||
@ -1,63 +0,0 @@
|
||||
;;; -*- Mode: Lisp; Syntax: Common-Lisp; Package: CL-USER; -*-
|
||||
;;; ---------------------------------------------------------------------------
|
||||
;;; Title: definline
|
||||
;;; Created: 1999-05-25 22:32
|
||||
;;; Author: Gilbert Baumann <unk6@rz.uni-karlsruhe.de>
|
||||
;;; License: Lisp-LGPL (See file COPYING for details).
|
||||
;;; ---------------------------------------------------------------------------
|
||||
;;; (c) copyright 1999 by Gilbert Baumann
|
||||
|
||||
;;; This code is free software; you can redistribute it and/or modify it
|
||||
;;; under the terms of the version 2.1 of the GNU Lesser General Public
|
||||
;;; License as published by the Free Software Foundation, as clarified
|
||||
;;; by the "Preamble to the Gnu Lesser General Public License" found in
|
||||
;;; the file COPYING.
|
||||
;;;
|
||||
;;; This code is distributed in the hope that it will be useful,
|
||||
;;; but without any warranty; without even the implied warranty of
|
||||
;;; merchantability or fitness for a particular purpose. See the GNU
|
||||
;;; Lesser General Public License for more details.
|
||||
;;;
|
||||
;;; Version 2.1 of the GNU Lesser General Public License is in the file
|
||||
;;; COPYING that was distributed with this file. If it is not present,
|
||||
;;; you can access it from http://www.gnu.org/copyleft/lesser.txt (until
|
||||
;;; superseded by a newer version) or write to the Free Software
|
||||
;;; Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
|
||||
(in-package :runes)
|
||||
|
||||
#-(or allegro openmcl)
|
||||
(defmacro definline (name args &body body)
|
||||
`(progn
|
||||
(declaim (inline ,name))
|
||||
(defun ,name ,args .,body)))
|
||||
|
||||
#+openmcl
|
||||
(defmacro runes::definline (fun args &body body)
|
||||
(if (consp fun)
|
||||
`(defun ,fun ,args
|
||||
,@body)
|
||||
`(progn
|
||||
(defun ,fun ,args .,body)
|
||||
(define-compiler-macro ,fun (&rest .args.)
|
||||
(cons '(lambda ,args .,body)
|
||||
.args.)))))
|
||||
|
||||
#+allegro
|
||||
(defmacro definline (fun args &body body)
|
||||
(if (and (consp fun) (eq (car fun) 'setf))
|
||||
(let ((fnam (intern (concatenate 'string "(SETF " (symbol-name (cadr fun)) ")")
|
||||
(symbol-package (cadr fun)))))
|
||||
`(progn
|
||||
(defsetf ,(cadr fun) (&rest ap) (new-value) (list* ',fnam new-value ap))
|
||||
(definline ,fnam ,args .,body)))
|
||||
(labels ((declp (x)
|
||||
(and (consp x) (eq (car x) 'declare))))
|
||||
`(progn
|
||||
(defun ,fun ,args .,body)
|
||||
(define-compiler-macro ,fun (&rest .args.)
|
||||
(cons '(lambda ,args
|
||||
,@(remove-if-not #'declp body)
|
||||
(block ,fun
|
||||
,@(remove-if #'declp body)))
|
||||
.args.))))))
|
||||
@ -1,568 +0,0 @@
|
||||
(in-package :runes-encoding)
|
||||
|
||||
(progn
|
||||
(add-name :us-ascii "ANSI_X3.4-1968")
|
||||
(add-name :us-ascii "iso-ir-6")
|
||||
(add-name :us-ascii "ANSI_X3.4-1986")
|
||||
(add-name :us-ascii "ISO_646.irv:1991")
|
||||
(add-name :us-ascii "ASCII")
|
||||
(add-name :us-ascii "ISO646-US")
|
||||
(add-name :us-ascii "US-ASCII")
|
||||
(add-name :us-ascii "us")
|
||||
(add-name :us-ascii "IBM367")
|
||||
(add-name :us-ascii "cp367")
|
||||
(add-name :us-ascii "csASCII")
|
||||
|
||||
(add-name :iso-8859-1 "ISO_8859-1:1987")
|
||||
(add-name :iso-8859-1 "iso-ir-100")
|
||||
(add-name :iso-8859-1 "ISO_8859-1")
|
||||
(add-name :iso-8859-1 "ISO-8859-1")
|
||||
(add-name :iso-8859-1 "latin1")
|
||||
(add-name :iso-8859-1 "l1")
|
||||
(add-name :iso-8859-1 "IBM819")
|
||||
(add-name :iso-8859-1 "CP819")
|
||||
(add-name :iso-8859-1 "csISOLatin1")
|
||||
|
||||
(add-name :iso-8859-2 "ISO_8859-2:1987")
|
||||
(add-name :iso-8859-2 "iso-ir-101")
|
||||
(add-name :iso-8859-2 "ISO_8859-2")
|
||||
(add-name :iso-8859-2 "ISO-8859-2")
|
||||
(add-name :iso-8859-2 "latin2")
|
||||
(add-name :iso-8859-2 "l2")
|
||||
(add-name :iso-8859-2 "csISOLatin2")
|
||||
|
||||
(add-name :iso-8859-3 "ISO_8859-3:1988")
|
||||
(add-name :iso-8859-3 "iso-ir-109")
|
||||
(add-name :iso-8859-3 "ISO_8859-3")
|
||||
(add-name :iso-8859-3 "ISO-8859-3")
|
||||
(add-name :iso-8859-3 "latin3")
|
||||
(add-name :iso-8859-3 "l3")
|
||||
(add-name :iso-8859-3 "csISOLatin3")
|
||||
|
||||
(add-name :iso-8859-4 "ISO_8859-4:1988")
|
||||
(add-name :iso-8859-4 "iso-ir-110")
|
||||
(add-name :iso-8859-4 "ISO_8859-4")
|
||||
(add-name :iso-8859-4 "ISO-8859-4")
|
||||
(add-name :iso-8859-4 "latin4")
|
||||
(add-name :iso-8859-4 "l4")
|
||||
(add-name :iso-8859-4 "csISOLatin4")
|
||||
|
||||
(add-name :iso-8859-6 "ISO_8859-6:1987")
|
||||
(add-name :iso-8859-6 "iso-ir-127")
|
||||
(add-name :iso-8859-6 "ISO_8859-6")
|
||||
(add-name :iso-8859-6 "ISO-8859-6")
|
||||
(add-name :iso-8859-6 "ECMA-114")
|
||||
(add-name :iso-8859-6 "ASMO-708")
|
||||
(add-name :iso-8859-6 "arabic")
|
||||
(add-name :iso-8859-6 "csISOLatinArabic")
|
||||
|
||||
(add-name :iso-8859-7 "ISO_8859-7:1987")
|
||||
(add-name :iso-8859-7 "iso-ir-126")
|
||||
(add-name :iso-8859-7 "ISO_8859-7")
|
||||
(add-name :iso-8859-7 "ISO-8859-7")
|
||||
(add-name :iso-8859-7 "ELOT_928")
|
||||
(add-name :iso-8859-7 "ECMA-118")
|
||||
(add-name :iso-8859-7 "greek")
|
||||
(add-name :iso-8859-7 "greek8")
|
||||
(add-name :iso-8859-7 "csISOLatinGreek")
|
||||
|
||||
(add-name :iso-8859-8 "ISO_8859-8:1988")
|
||||
(add-name :iso-8859-8 "iso-ir-138")
|
||||
(add-name :iso-8859-8 "ISO_8859-8")
|
||||
(add-name :iso-8859-8 "ISO-8859-8")
|
||||
(add-name :iso-8859-8 "hebrew")
|
||||
(add-name :iso-8859-8 "csISOLatinHebrew")
|
||||
|
||||
(add-name :iso-8859-5 "ISO_8859-5:1988")
|
||||
(add-name :iso-8859-5 "iso-ir-144")
|
||||
(add-name :iso-8859-5 "ISO_8859-5")
|
||||
(add-name :iso-8859-5 "ISO-8859-5")
|
||||
(add-name :iso-8859-5 "cyrillic")
|
||||
(add-name :iso-8859-5 "csISOLatinCyrillic")
|
||||
|
||||
(add-name :iso-8859-9 "ISO_8859-9:1989")
|
||||
(add-name :iso-8859-9 "iso-ir-148")
|
||||
(add-name :iso-8859-9 "ISO_8859-9")
|
||||
(add-name :iso-8859-9 "ISO-8859-9")
|
||||
(add-name :iso-8859-9 "latin5")
|
||||
(add-name :iso-8859-9 "l5")
|
||||
(add-name :iso-8859-9 "csISOLatin5")
|
||||
|
||||
(add-name :iso-8859-15 "ISO_8859-15")
|
||||
(add-name :iso-8859-15 "ISO-8859-15")
|
||||
|
||||
(add-name :iso-8859-14 "ISO_8859-14")
|
||||
(add-name :iso-8859-14 "ISO-8859-14")
|
||||
|
||||
(add-name :koi8-r "KOI8-R")
|
||||
(add-name :koi8-r "csKOI8R")
|
||||
|
||||
(add-name :utf-8 "UTF-8")
|
||||
|
||||
(add-name :utf-16 "UTF-16")
|
||||
|
||||
(add-name :ucs-4 "ISO-10646-UCS-4")
|
||||
(add-name :ucs-4 "UCS-4")
|
||||
|
||||
(add-name :ucs-2 "ISO-10646-UCS-2")
|
||||
(add-name :ucs-2 "UCS-2") )
|
||||
|
||||
|
||||
(progn
|
||||
(define-encoding :iso-8859-1
|
||||
(make-simple-8-bit-encoding
|
||||
:charset (find-charset :iso-8859-1)))
|
||||
|
||||
(define-encoding :iso-8859-2
|
||||
(make-simple-8-bit-encoding
|
||||
:charset (find-charset :iso-8859-2)))
|
||||
|
||||
(define-encoding :iso-8859-3
|
||||
(make-simple-8-bit-encoding
|
||||
:charset (find-charset :iso-8859-3)))
|
||||
|
||||
(define-encoding :iso-8859-4
|
||||
(make-simple-8-bit-encoding
|
||||
:charset (find-charset :iso-8859-4)))
|
||||
|
||||
(define-encoding :iso-8859-5
|
||||
(make-simple-8-bit-encoding
|
||||
:charset (find-charset :iso-8859-5)))
|
||||
|
||||
(define-encoding :iso-8859-6
|
||||
(make-simple-8-bit-encoding
|
||||
:charset (find-charset :iso-8859-6)))
|
||||
|
||||
(define-encoding :iso-8859-7
|
||||
(make-simple-8-bit-encoding
|
||||
:charset (find-charset :iso-8859-7)))
|
||||
|
||||
(define-encoding :iso-8859-8
|
||||
(make-simple-8-bit-encoding
|
||||
:charset (find-charset :iso-8859-8)))
|
||||
|
||||
(define-encoding :iso-8859-14
|
||||
(make-simple-8-bit-encoding
|
||||
:charset (find-charset :iso-8859-14)))
|
||||
|
||||
(define-encoding :iso-8859-15
|
||||
(make-simple-8-bit-encoding
|
||||
:charset (find-charset :iso-8859-15)))
|
||||
|
||||
(define-encoding :koi8-r
|
||||
(make-simple-8-bit-encoding
|
||||
:charset (find-charset :koi8-r)))
|
||||
|
||||
(define-encoding :utf-8 :utf-8)
|
||||
)
|
||||
|
||||
(progn
|
||||
(define-8-bit-charset :iso-8859-1
|
||||
#| #o00x |# #x0000 #x0001 #x0002 #x0003 #x0004 #x0005 #x0006 #x0007
|
||||
#| #o01x |# #x0008 #x0009 #x000A #x000B #x000C #x000A #x000E #x000F
|
||||
#| #o02x |# #x0010 #x0011 #x0012 #x0013 #x0014 #x0015 #x0016 #x0017
|
||||
#| #o03x |# #x0018 #x0019 #x001A #x001B #x001C #x001D #x001E #x001F
|
||||
#| #o04x |# #x0020 #x0021 #x0022 #x0023 #x0024 #x0025 #x0026 #x0027
|
||||
#| #o05x |# #x0028 #x0029 #x002A #x002B #x002C #x002D #x002E #x002F
|
||||
#| #o06x |# #x0030 #x0031 #x0032 #x0033 #x0034 #x0035 #x0036 #x0037
|
||||
#| #o07x |# #x0038 #x0039 #x003A #x003B #x003C #x003D #x003E #x003F
|
||||
#| #o10x |# #x0040 #x0041 #x0042 #x0043 #x0044 #x0045 #x0046 #x0047
|
||||
#| #o11x |# #x0048 #x0049 #x004A #x004B #x004C #x004D #x004E #x004F
|
||||
#| #o12x |# #x0050 #x0051 #x0052 #x0053 #x0054 #x0055 #x0056 #x0057
|
||||
#| #o13x |# #x0058 #x0059 #x005A #x005B #x005C #x005D #x005E #x005F
|
||||
#| #o14x |# #x0060 #x0061 #x0062 #x0063 #x0064 #x0065 #x0066 #x0067
|
||||
#| #o15x |# #x0068 #x0069 #x006A #x006B #x006C #x006D #x006E #x006F
|
||||
#| #o16x |# #x0070 #x0071 #x0072 #x0073 #x0074 #x0075 #x0076 #x0077
|
||||
#| #o17x |# #x0078 #x0079 #x007A #x007B #x007C #x007D #x007E #x007F
|
||||
#| #o20x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF
|
||||
#| #o21x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF
|
||||
#| #o22x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF
|
||||
#| #o23x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF
|
||||
#| #o24x |# #x00A0 #x00A1 #x00A2 #x00A3 #x00A4 #x00A5 #x00A6 #x00A7
|
||||
#| #o25x |# #x00A8 #x00A9 #x00AA #x00AB #x00AC #x00AD #x00AE #x00AF
|
||||
#| #o26x |# #x00B0 #x00B1 #x00B2 #x00B3 #x00B4 #x00B5 #x00B6 #x00B7
|
||||
#| #o27x |# #x00B8 #x00B9 #x00BA #x00BB #x00BC #x00BD #x00BE #x00BF
|
||||
#| #o30x |# #x00C0 #x00C1 #x00C2 #x00C3 #x00C4 #x00C5 #x00C6 #x00C7
|
||||
#| #o31x |# #x00C8 #x00C9 #x00CA #x00CB #x00CC #x00CD #x00CE #x00CF
|
||||
#| #o32x |# #x00D0 #x00D1 #x00D2 #x00D3 #x00D4 #x00D5 #x00D6 #x00D7
|
||||
#| #o33x |# #x00D8 #x00D9 #x00DA #x00DB #x00DC #x00DD #x00DE #x00DF
|
||||
#| #o34x |# #x00E0 #x00E1 #x00E2 #x00E3 #x00E4 #x00E5 #x00E6 #x00E7
|
||||
#| #o35x |# #x00E8 #x00E9 #x00EA #x00EB #x00EC #x00ED #x00EE #x00EF
|
||||
#| #o36x |# #x00F0 #x00F1 #x00F2 #x00F3 #x00F4 #x00F5 #x00F6 #x00F7
|
||||
#| #o37x |# #x00F8 #x00F9 #x00FA #x00FB #x00FC #x00FD #x00FE #x00FF)
|
||||
|
||||
(define-8-bit-charset :iso-8859-2
|
||||
#| #o00x |# #x0000 #x0001 #x0002 #x0003 #x0004 #x0005 #x0006 #x0007
|
||||
#| #o01x |# #x0008 #x0009 #x000A #x000B #x000C #x000A #x000E #x000F
|
||||
#| #o02x |# #x0010 #x0011 #x0012 #x0013 #x0014 #x0015 #x0016 #x0017
|
||||
#| #o03x |# #x0018 #x0019 #x001A #x001B #x001C #x001D #x001E #x001F
|
||||
#| #o04x |# #x0020 #x0021 #x0022 #x0023 #x0024 #x0025 #x0026 #x0027
|
||||
#| #o05x |# #x0028 #x0029 #x002A #x002B #x002C #x002D #x002E #x002F
|
||||
#| #o06x |# #x0030 #x0031 #x0032 #x0033 #x0034 #x0035 #x0036 #x0037
|
||||
#| #o07x |# #x0038 #x0039 #x003A #x003B #x003C #x003D #x003E #x003F
|
||||
#| #o10x |# #x0040 #x0041 #x0042 #x0043 #x0044 #x0045 #x0046 #x0047
|
||||
#| #o11x |# #x0048 #x0049 #x004A #x004B #x004C #x004D #x004E #x004F
|
||||
#| #o12x |# #x0050 #x0051 #x0052 #x0053 #x0054 #x0055 #x0056 #x0057
|
||||
#| #o13x |# #x0058 #x0059 #x005A #x005B #x005C #x005D #x005E #x005F
|
||||
#| #o14x |# #x0060 #x0061 #x0062 #x0063 #x0064 #x0065 #x0066 #x0067
|
||||
#| #o15x |# #x0068 #x0069 #x006A #x006B #x006C #x006D #x006E #x006F
|
||||
#| #o16x |# #x0070 #x0071 #x0072 #x0073 #x0074 #x0075 #x0076 #x0077
|
||||
#| #o17x |# #x0078 #x0079 #x007A #x007B #x007C #x007D #x007E #x007F
|
||||
#| #o20x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF
|
||||
#| #o21x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF
|
||||
#| #o22x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF
|
||||
#| #o23x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF
|
||||
#| #o24x |# #x00A0 #x0104 #x02D8 #x0141 #x00A4 #x013D #x015A #x00A7
|
||||
#| #o25x |# #x00A8 #x0160 #x015E #x0164 #x0179 #x00AD #x017D #x017B
|
||||
#| #o26x |# #x00B0 #x0105 #x02DB #x0142 #x00B4 #x013E #x015B #x02C7
|
||||
#| #o27x |# #x00B8 #x0161 #x015F #x0165 #x017A #x02DD #x017E #x017C
|
||||
#| #o30x |# #x0154 #x00C1 #x00C2 #x0102 #x00C4 #x0139 #x0106 #x00C7
|
||||
#| #o31x |# #x010C #x00C9 #x0118 #x00CB #x011A #x00CD #x00CE #x010E
|
||||
#| #o32x |# #x0110 #x0143 #x0147 #x00D3 #x00D4 #x0150 #x00D6 #x00D7
|
||||
#| #o33x |# #x0158 #x016E #x00DA #x0170 #x00DC #x00DD #x0162 #x00DF
|
||||
#| #o34x |# #x0155 #x00E1 #x00E2 #x0103 #x00E4 #x013A #x0107 #x00E7
|
||||
#| #o35x |# #x010D #x00E9 #x0119 #x00EB #x011B #x00ED #x00EE #x010F
|
||||
#| #o36x |# #x0111 #x0144 #x0148 #x00F3 #x00F4 #x0151 #x00F6 #x00F7
|
||||
#| #o37x |# #x0159 #x016F #x00FA #x0171 #x00FC #x00FD #x0163 #x02D9)
|
||||
|
||||
(define-8-bit-charset :iso-8859-3
|
||||
#| #o00x |# #x0000 #x0001 #x0002 #x0003 #x0004 #x0005 #x0006 #x0007
|
||||
#| #o01x |# #x0008 #x0009 #x000A #x000B #x000C #x000A #x000E #x000F
|
||||
#| #o02x |# #x0010 #x0011 #x0012 #x0013 #x0014 #x0015 #x0016 #x0017
|
||||
#| #o03x |# #x0018 #x0019 #x001A #x001B #x001C #x001D #x001E #x001F
|
||||
#| #o04x |# #x0020 #x0021 #x0022 #x0023 #x0024 #x0025 #x0026 #x0027
|
||||
#| #o05x |# #x0028 #x0029 #x002A #x002B #x002C #x002D #x002E #x002F
|
||||
#| #o06x |# #x0030 #x0031 #x0032 #x0033 #x0034 #x0035 #x0036 #x0037
|
||||
#| #o07x |# #x0038 #x0039 #x003A #x003B #x003C #x003D #x003E #x003F
|
||||
#| #o10x |# #x0040 #x0041 #x0042 #x0043 #x0044 #x0045 #x0046 #x0047
|
||||
#| #o11x |# #x0048 #x0049 #x004A #x004B #x004C #x004D #x004E #x004F
|
||||
#| #o12x |# #x0050 #x0051 #x0052 #x0053 #x0054 #x0055 #x0056 #x0057
|
||||
#| #o13x |# #x0058 #x0059 #x005A #x005B #x005C #x005D #x005E #x005F
|
||||
#| #o14x |# #x0060 #x0061 #x0062 #x0063 #x0064 #x0065 #x0066 #x0067
|
||||
#| #o15x |# #x0068 #x0069 #x006A #x006B #x006C #x006D #x006E #x006F
|
||||
#| #o16x |# #x0070 #x0071 #x0072 #x0073 #x0074 #x0075 #x0076 #x0077
|
||||
#| #o17x |# #x0078 #x0079 #x007A #x007B #x007C #x007D #x007E #x007F
|
||||
#| #o20x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF
|
||||
#| #o21x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF
|
||||
#| #o22x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF
|
||||
#| #o23x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF
|
||||
#| #o24x |# #x00A0 #x0126 #x02D8 #x00A3 #x00A4 #xFFFF #x0124 #x00A7
|
||||
#| #o25x |# #x00A8 #x0130 #x015E #x011E #x0134 #x00AD #xFFFF #x017B
|
||||
#| #o26x |# #x00B0 #x0127 #x00B2 #x00B3 #x00B4 #x00B5 #x0125 #x00B7
|
||||
#| #o27x |# #x00B8 #x0131 #x015F #x011F #x0135 #x00BD #xFFFF #x017C
|
||||
#| #o30x |# #x00C0 #x00C1 #x00C2 #xFFFF #x00C4 #x010A #x0108 #x00C7
|
||||
#| #o31x |# #x00C8 #x00C9 #x00CA #x00CB #x00CC #x00CD #x00CE #x00CF
|
||||
#| #o32x |# #xFFFF #x00D1 #x00D2 #x00D3 #x00D4 #x0120 #x00D6 #x00D7
|
||||
#| #o33x |# #x011C #x00D9 #x00DA #x00DB #x00DC #x016C #x015C #x00DF
|
||||
#| #o34x |# #x00E0 #x00E1 #x00E2 #xFFFF #x00E4 #x010B #x0109 #x00E7
|
||||
#| #o35x |# #x00E8 #x00E9 #x00EA #x00EB #x00EC #x00ED #x00EE #x00EF
|
||||
#| #o36x |# #xFFFF #x00F1 #x00F2 #x00F3 #x00F4 #x0121 #x00F6 #x00F7
|
||||
#| #o37x |# #x011D #x00F9 #x00FA #x00FB #x00FC #x016D #x015D #x02D9)
|
||||
|
||||
(define-8-bit-charset :iso-8859-4
|
||||
#| #o00x |# #x0000 #x0001 #x0002 #x0003 #x0004 #x0005 #x0006 #x0007
|
||||
#| #o01x |# #x0008 #x0009 #x000A #x000B #x000C #x000A #x000E #x000F
|
||||
#| #o02x |# #x0010 #x0011 #x0012 #x0013 #x0014 #x0015 #x0016 #x0017
|
||||
#| #o03x |# #x0018 #x0019 #x001A #x001B #x001C #x001D #x001E #x001F
|
||||
#| #o04x |# #x0020 #x0021 #x0022 #x0023 #x0024 #x0025 #x0026 #x0027
|
||||
#| #o05x |# #x0028 #x0029 #x002A #x002B #x002C #x002D #x002E #x002F
|
||||
#| #o06x |# #x0030 #x0031 #x0032 #x0033 #x0034 #x0035 #x0036 #x0037
|
||||
#| #o07x |# #x0038 #x0039 #x003A #x003B #x003C #x003D #x003E #x003F
|
||||
#| #o10x |# #x0040 #x0041 #x0042 #x0043 #x0044 #x0045 #x0046 #x0047
|
||||
#| #o11x |# #x0048 #x0049 #x004A #x004B #x004C #x004D #x004E #x004F
|
||||
#| #o12x |# #x0050 #x0051 #x0052 #x0053 #x0054 #x0055 #x0056 #x0057
|
||||
#| #o13x |# #x0058 #x0059 #x005A #x005B #x005C #x005D #x005E #x005F
|
||||
#| #o14x |# #x0060 #x0061 #x0062 #x0063 #x0064 #x0065 #x0066 #x0067
|
||||
#| #o15x |# #x0068 #x0069 #x006A #x006B #x006C #x006D #x006E #x006F
|
||||
#| #o16x |# #x0070 #x0071 #x0072 #x0073 #x0074 #x0075 #x0076 #x0077
|
||||
#| #o17x |# #x0078 #x0079 #x007A #x007B #x007C #x007D #x007E #x007F
|
||||
#| #o20x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF
|
||||
#| #o21x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF
|
||||
#| #o22x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF
|
||||
#| #o23x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF
|
||||
#| #o24x |# #x00A0 #x0104 #x0138 #x0156 #x00A4 #x0128 #x013B #x00A7
|
||||
#| #o25x |# #x00A8 #x0160 #x0112 #x0122 #x0166 #x00AD #x017D #x00AF
|
||||
#| #o26x |# #x00B0 #x0105 #x02DB #x0157 #x00B4 #x0129 #x013C #x02C7
|
||||
#| #o27x |# #x00B8 #x0161 #x0113 #x0123 #x0167 #x014A #x017E #x014B
|
||||
#| #o30x |# #x0100 #x00C1 #x00C2 #x00C3 #x00C4 #x00C5 #x00C6 #x012E
|
||||
#| #o31x |# #x010C #x00C9 #x0118 #x00CB #x0116 #x00CD #x00CE #x012A
|
||||
#| #o32x |# #x0110 #x0145 #x014C #x0136 #x00D4 #x00D5 #x00D6 #x00D7
|
||||
#| #o33x |# #x00D8 #x0172 #x00DA #x00DB #x00DC #x0168 #x016A #x00DF
|
||||
#| #o34x |# #x0101 #x00E1 #x00E2 #x00E3 #x00E4 #x00E5 #x00E6 #x012F
|
||||
#| #o35x |# #x010D #x00E9 #x0119 #x00EB #x0117 #x00ED #x00EE #x012B
|
||||
#| #o36x |# #x0111 #x0146 #x014D #x0137 #x00F4 #x00F5 #x00F6 #x00F7
|
||||
#| #o37x |# #x00F8 #x0173 #x00FA #x00FB #x00FC #x0169 #x016B #x02D9)
|
||||
|
||||
(define-8-bit-charset :iso-8859-5
|
||||
#| #o00x |# #x0000 #x0001 #x0002 #x0003 #x0004 #x0005 #x0006 #x0007
|
||||
#| #o01x |# #x0008 #x0009 #x000A #x000B #x000C #x000A #x000E #x000F
|
||||
#| #o02x |# #x0010 #x0011 #x0012 #x0013 #x0014 #x0015 #x0016 #x0017
|
||||
#| #o03x |# #x0018 #x0019 #x001A #x001B #x001C #x001D #x001E #x001F
|
||||
#| #o04x |# #x0020 #x0021 #x0022 #x0023 #x0024 #x0025 #x0026 #x0027
|
||||
#| #o05x |# #x0028 #x0029 #x002A #x002B #x002C #x002D #x002E #x002F
|
||||
#| #o06x |# #x0030 #x0031 #x0032 #x0033 #x0034 #x0035 #x0036 #x0037
|
||||
#| #o07x |# #x0038 #x0039 #x003A #x003B #x003C #x003D #x003E #x003F
|
||||
#| #o10x |# #x0040 #x0041 #x0042 #x0043 #x0044 #x0045 #x0046 #x0047
|
||||
#| #o11x |# #x0048 #x0049 #x004A #x004B #x004C #x004D #x004E #x004F
|
||||
#| #o12x |# #x0050 #x0051 #x0052 #x0053 #x0054 #x0055 #x0056 #x0057
|
||||
#| #o13x |# #x0058 #x0059 #x005A #x005B #x005C #x005D #x005E #x005F
|
||||
#| #o14x |# #x0060 #x0061 #x0062 #x0063 #x0064 #x0065 #x0066 #x0067
|
||||
#| #o15x |# #x0068 #x0069 #x006A #x006B #x006C #x006D #x006E #x006F
|
||||
#| #o16x |# #x0070 #x0071 #x0072 #x0073 #x0074 #x0075 #x0076 #x0077
|
||||
#| #o17x |# #x0078 #x0079 #x007A #x007B #x007C #x007D #x007E #x007F
|
||||
#| #o20x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF
|
||||
#| #o21x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF
|
||||
#| #o22x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF
|
||||
#| #o23x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF
|
||||
#| #o24x |# #x00A0 #x0401 #x0402 #x0403 #x0404 #x0405 #x0406 #x0407
|
||||
#| #o25x |# #x0408 #x0409 #x040A #x040B #x040C #x00AD #x040E #x040F
|
||||
#| #o26x |# #x0410 #x0411 #x0412 #x0413 #x0414 #x0415 #x0416 #x0417
|
||||
#| #o27x |# #x0418 #x0419 #x041A #x041B #x041C #x041D #x041E #x041F
|
||||
#| #o30x |# #x0420 #x0421 #x0422 #x0423 #x0424 #x0425 #x0426 #x0427
|
||||
#| #o31x |# #x0428 #x0429 #x042A #x042B #x042C #x042D #x042E #x042F
|
||||
#| #o32x |# #x0430 #x0431 #x0432 #x0433 #x0434 #x0435 #x0436 #x0437
|
||||
#| #o33x |# #x0438 #x0439 #x043A #x043B #x043C #x043D #x043E #x043F
|
||||
#| #o34x |# #x0440 #x0441 #x0442 #x0443 #x0444 #x0445 #x0446 #x0447
|
||||
#| #o35x |# #x0448 #x0449 #x044A #x044B #x044C #x044D #x044E #x044F
|
||||
#| #o36x |# #x2116 #x0451 #x0452 #x0453 #x0454 #x0455 #x0456 #x0457
|
||||
#| #o37x |# #x0458 #x0459 #x045A #x045B #x045C #x00A7 #x045E #x045F)
|
||||
|
||||
(define-8-bit-charset :iso-8859-6
|
||||
#| #o00x |# #x0000 #x0001 #x0002 #x0003 #x0004 #x0005 #x0006 #x0007
|
||||
#| #o01x |# #x0008 #x0009 #x000A #x000B #x000C #x000A #x000E #x000F
|
||||
#| #o02x |# #x0010 #x0011 #x0012 #x0013 #x0014 #x0015 #x0016 #x0017
|
||||
#| #o03x |# #x0018 #x0019 #x001A #x001B #x001C #x001D #x001E #x001F
|
||||
#| #o04x |# #x0020 #x0021 #x0022 #x0023 #x0024 #x0025 #x0026 #x0027
|
||||
#| #o05x |# #x0028 #x0029 #x002A #x002B #x002C #x002D #x002E #x002F
|
||||
#| #o06x |# #x0660 #x0661 #x0662 #x0663 #x0664 #x0665 #x0666 #x0667
|
||||
#| #o07x |# #x0668 #x0669 #x003A #x003B #x003C #x003D #x003E #x003F
|
||||
#| #o10x |# #x0040 #x0041 #x0042 #x0043 #x0044 #x0045 #x0046 #x0047
|
||||
#| #o11x |# #x0048 #x0049 #x004A #x004B #x004C #x004D #x004E #x004F
|
||||
#| #o12x |# #x0050 #x0051 #x0052 #x0053 #x0054 #x0055 #x0056 #x0057
|
||||
#| #o13x |# #x0058 #x0059 #x005A #x005B #x005C #x005D #x005E #x005F
|
||||
#| #o14x |# #x0060 #x0061 #x0062 #x0063 #x0064 #x0065 #x0066 #x0067
|
||||
#| #o15x |# #x0068 #x0069 #x006A #x006B #x006C #x006D #x006E #x006F
|
||||
#| #o16x |# #x0070 #x0071 #x0072 #x0073 #x0074 #x0075 #x0076 #x0077
|
||||
#| #o17x |# #x0078 #x0079 #x007A #x007B #x007C #x007D #x007E #x007F
|
||||
#| #o20x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF
|
||||
#| #o21x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF
|
||||
#| #o22x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF
|
||||
#| #o23x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF
|
||||
#| #o24x |# #x00A0 #xFFFF #xFFFF #xFFFF #x00A4 #xFFFF #xFFFF #xFFFF
|
||||
#| #o25x |# #xFFFF #xFFFF #xFFFF #xFFFF #x060C #x00AD #xFFFF #xFFFF
|
||||
#| #o26x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF
|
||||
#| #o27x |# #xFFFF #xFFFF #xFFFF #x061B #xFFFF #xFFFF #xFFFF #x061F
|
||||
#| #o30x |# #xFFFF #x0621 #x0622 #x0623 #x0624 #x0625 #x0626 #x0627
|
||||
#| #o31x |# #x0628 #x0629 #x062A #x062B #x062C #x062D #x062E #x062F
|
||||
#| #o32x |# #x0630 #x0631 #x0632 #x0633 #x0634 #x0635 #x0636 #x0637
|
||||
#| #o33x |# #x0638 #x0639 #x063A #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF
|
||||
#| #o34x |# #x0640 #x0641 #x0642 #x0643 #x0644 #x0645 #x0646 #x0647
|
||||
#| #o35x |# #x0648 #x0649 #x064A #x064B #x064C #x064D #x064E #x064F
|
||||
#| #o36x |# #x0650 #x0651 #x0652 #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF
|
||||
#| #o37x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF)
|
||||
|
||||
(define-8-bit-charset :iso-8859-7
|
||||
#| #o00x |# #x0000 #x0001 #x0002 #x0003 #x0004 #x0005 #x0006 #x0007
|
||||
#| #o01x |# #x0008 #x0009 #x000A #x000B #x000C #x000A #x000E #x000F
|
||||
#| #o02x |# #x0010 #x0011 #x0012 #x0013 #x0014 #x0015 #x0016 #x0017
|
||||
#| #o03x |# #x0018 #x0019 #x001A #x001B #x001C #x001D #x001E #x001F
|
||||
#| #o04x |# #x0020 #x0021 #x0022 #x0023 #x0024 #x0025 #x0026 #x0027
|
||||
#| #o05x |# #x0028 #x0029 #x002A #x002B #x002C #x002D #x002E #x002F
|
||||
#| #o06x |# #x0030 #x0031 #x0032 #x0033 #x0034 #x0035 #x0036 #x0037
|
||||
#| #o07x |# #x0038 #x0039 #x003A #x003B #x003C #x003D #x003E #x003F
|
||||
#| #o10x |# #x0040 #x0041 #x0042 #x0043 #x0044 #x0045 #x0046 #x0047
|
||||
#| #o11x |# #x0048 #x0049 #x004A #x004B #x004C #x004D #x004E #x004F
|
||||
#| #o12x |# #x0050 #x0051 #x0052 #x0053 #x0054 #x0055 #x0056 #x0057
|
||||
#| #o13x |# #x0058 #x0059 #x005A #x005B #x005C #x005D #x005E #x005F
|
||||
#| #o14x |# #x0060 #x0061 #x0062 #x0063 #x0064 #x0065 #x0066 #x0067
|
||||
#| #o15x |# #x0068 #x0069 #x006A #x006B #x006C #x006D #x006E #x006F
|
||||
#| #o16x |# #x0070 #x0071 #x0072 #x0073 #x0074 #x0075 #x0076 #x0077
|
||||
#| #o17x |# #x0078 #x0079 #x007A #x007B #x007C #x007D #x007E #x007F
|
||||
#| #o20x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF
|
||||
#| #o21x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF
|
||||
#| #o22x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF
|
||||
#| #o23x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF
|
||||
#| #o24x |# #x00A0 #x02BD #x02BC #x00A3 #xFFFF #xFFFF #x00A6 #x00A7
|
||||
#| #o25x |# #x00A8 #x00A9 #xFFFF #x00AB #x00AC #x00AD #xFFFF #x2015
|
||||
#| #o26x |# #x00B0 #x00B1 #x00B2 #x00B3 #x0384 #x0385 #x0386 #x00B7
|
||||
#| #o27x |# #x0388 #x0389 #x038A #x00BB #x038C #x00BD #x038E #x038F
|
||||
#| #o30x |# #x0390 #x0391 #x0392 #x0393 #x0394 #x0395 #x0396 #x0397
|
||||
#| #o31x |# #x0398 #x0399 #x039A #x039B #x039C #x039D #x039E #x039F
|
||||
#| #o32x |# #x03A0 #x03A1 #xFFFF #x03A3 #x03A4 #x03A5 #x03A6 #x03A7
|
||||
#| #o33x |# #x03A8 #x03A9 #x03AA #x03AB #x03AC #x03AD #x03AE #x03AF
|
||||
#| #o34x |# #x03B0 #x03B1 #x03B2 #x03B3 #x03B4 #x03B5 #x03B6 #x03B7
|
||||
#| #o35x |# #x03B8 #x03B9 #x03BA #x03BB #x03BC #x03BD #x03BE #x03BF
|
||||
#| #o36x |# #x03C0 #x03C1 #x03C2 #x03C3 #x03C4 #x03C5 #x03C6 #x03C7
|
||||
#| #o37x |# #x03C8 #x03C9 #x03CA #x03CB #x03CC #x03CD #x03CE #xFFFF)
|
||||
|
||||
(define-8-bit-charset :iso-8859-8
|
||||
#| #o00x |# #x0000 #x0001 #x0002 #x0003 #x0004 #x0005 #x0006 #x0007
|
||||
#| #o01x |# #x0008 #x0009 #x000A #x000B #x000C #x000A #x000E #x000F
|
||||
#| #o02x |# #x0010 #x0011 #x0012 #x0013 #x0014 #x0015 #x0016 #x0017
|
||||
#| #o03x |# #x0018 #x0019 #x001A #x001B #x001C #x001D #x001E #x001F
|
||||
#| #o04x |# #x0020 #x0021 #x0022 #x0023 #x0024 #x0025 #x0026 #x0027
|
||||
#| #o05x |# #x0028 #x0029 #x002A #x002B #x002C #x002D #x002E #x002F
|
||||
#| #o06x |# #x0030 #x0031 #x0032 #x0033 #x0034 #x0035 #x0036 #x0037
|
||||
#| #o07x |# #x0038 #x0039 #x003A #x003B #x003C #x003D #x003E #x003F
|
||||
#| #o10x |# #x0040 #x0041 #x0042 #x0043 #x0044 #x0045 #x0046 #x0047
|
||||
#| #o11x |# #x0048 #x0049 #x004A #x004B #x004C #x004D #x004E #x004F
|
||||
#| #o12x |# #x0050 #x0051 #x0052 #x0053 #x0054 #x0055 #x0056 #x0057
|
||||
#| #o13x |# #x0058 #x0059 #x005A #x005B #x005C #x005D #x005E #x005F
|
||||
#| #o14x |# #x0060 #x0061 #x0062 #x0063 #x0064 #x0065 #x0066 #x0067
|
||||
#| #o15x |# #x0068 #x0069 #x006A #x006B #x006C #x006D #x006E #x006F
|
||||
#| #o16x |# #x0070 #x0071 #x0072 #x0073 #x0074 #x0075 #x0076 #x0077
|
||||
#| #o17x |# #x0078 #x0079 #x007A #x007B #x007C #x007D #x007E #x007F
|
||||
#| #o20x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF
|
||||
#| #o21x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF
|
||||
#| #o22x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF
|
||||
#| #o23x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF
|
||||
#| #o24x |# #x00A0 #xFFFF #x00A2 #x00A3 #x00A4 #x00A5 #x00A6 #x00A7
|
||||
#| #o25x |# #x00A8 #x00A9 #x00D7 #x00AB #x00AC #x00AD #x00AE #x203E
|
||||
#| #o26x |# #x00B0 #x00B1 #x00B2 #x00B3 #x00B4 #x00B5 #x00B6 #x00B7
|
||||
#| #o27x |# #x00B8 #x00B9 #x00F7 #x00BB #x00BC #x00BD #x00BE #xFFFF
|
||||
#| #o30x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF
|
||||
#| #o31x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF
|
||||
#| #o32x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF
|
||||
#| #o33x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #x2017
|
||||
#| #o34x |# #x05D0 #x05D1 #x05D2 #x05D3 #x05D4 #x05D5 #x05D6 #x05D7
|
||||
#| #o35x |# #x05D8 #x05D9 #x05DA #x05DB #x05DC #x05DD #x05DE #x05DF
|
||||
#| #o36x |# #x05E0 #x05E1 #x05E2 #x05E3 #x05E4 #x05E5 #x05E6 #x05E7
|
||||
#| #o37x |# #x05E8 #x05E9 #x05EA #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF)
|
||||
|
||||
(define-8-bit-charset :iso-8859-9
|
||||
#| #o00x |# #x0000 #x0001 #x0002 #x0003 #x0004 #x0005 #x0006 #x0007
|
||||
#| #o01x |# #x0008 #x0009 #x000A #x000B #x000C #x000A #x000E #x000F
|
||||
#| #o02x |# #x0010 #x0011 #x0012 #x0013 #x0014 #x0015 #x0016 #x0017
|
||||
#| #o03x |# #x0018 #x0019 #x001A #x001B #x001C #x001D #x001E #x001F
|
||||
#| #o04x |# #x0020 #x0021 #x0022 #x0023 #x0024 #x0025 #x0026 #x0027
|
||||
#| #o05x |# #x0028 #x0029 #x002A #x002B #x002C #x002D #x002E #x002F
|
||||
#| #o06x |# #x0030 #x0031 #x0032 #x0033 #x0034 #x0035 #x0036 #x0037
|
||||
#| #o07x |# #x0038 #x0039 #x003A #x003B #x003C #x003D #x003E #x003F
|
||||
#| #o10x |# #x0040 #x0041 #x0042 #x0043 #x0044 #x0045 #x0046 #x0047
|
||||
#| #o11x |# #x0048 #x0049 #x004A #x004B #x004C #x004D #x004E #x004F
|
||||
#| #o12x |# #x0050 #x0051 #x0052 #x0053 #x0054 #x0055 #x0056 #x0057
|
||||
#| #o13x |# #x0058 #x0059 #x005A #x005B #x005C #x005D #x005E #x005F
|
||||
#| #o14x |# #x0060 #x0061 #x0062 #x0063 #x0064 #x0065 #x0066 #x0067
|
||||
#| #o15x |# #x0068 #x0069 #x006A #x006B #x006C #x006D #x006E #x006F
|
||||
#| #o16x |# #x0070 #x0071 #x0072 #x0073 #x0074 #x0075 #x0076 #x0077
|
||||
#| #o17x |# #x0078 #x0079 #x007A #x007B #x007C #x007D #x007E #x007F
|
||||
#| #o20x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF
|
||||
#| #o21x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF
|
||||
#| #o22x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF
|
||||
#| #o23x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF
|
||||
#| #o24x |# #x00A0 #x00A1 #x00A2 #x00A3 #x00A4 #x00A5 #x00A6 #x00A7
|
||||
#| #o25x |# #x00A8 #x00A9 #x00AA #x00AB #x00AC #x00AD #x00AE #x00AF
|
||||
#| #o26x |# #x00B0 #x00B1 #x00B2 #x00B3 #x00B4 #x00B5 #x00B6 #x00B7
|
||||
#| #o27x |# #x00B8 #x00B9 #x00BA #x00BB #x00BC #x00BD #x00BE #x00BF
|
||||
#| #o30x |# #x00C0 #x00C1 #x00C2 #x00C3 #x00C4 #x00C5 #x00C6 #x00C7
|
||||
#| #o31x |# #x00C8 #x00C9 #x00CA #x00CB #x00CC #x00CD #x00CE #x00CF
|
||||
#| #o32x |# #x011E #x00D1 #x00D2 #x00D3 #x00D4 #x00D5 #x00D6 #x00D7
|
||||
#| #o33x |# #x00D8 #x00D9 #x00DA #x00DB #x00DC #x0130 #x015E #x00DF
|
||||
#| #o34x |# #x00E0 #x00E1 #x00E2 #x00E3 #x00E4 #x00E5 #x00E6 #x00E7
|
||||
#| #o35x |# #x00E8 #x00E9 #x00EA #x00EB #x00EC #x00ED #x00EE #x00EF
|
||||
#| #o36x |# #x011F #x00F1 #x00F2 #x00F3 #x00F4 #x00F5 #x00F6 #x00F7
|
||||
#| #o37x |# #x00F8 #x00F9 #x00FA #x00FB #x00FC #x0131 #x015F #x00FF)
|
||||
|
||||
(define-8-bit-charset :iso-8859-14
|
||||
#| #o00x |# #x0000 #x0001 #x0002 #x0003 #x0004 #x0005 #x0006 #x0007
|
||||
#| #o01x |# #x0008 #x0009 #x000A #x000B #x000C #x000A #x000E #x000F
|
||||
#| #o02x |# #x0010 #x0011 #x0012 #x0013 #x0014 #x0015 #x0016 #x0017
|
||||
#| #o03x |# #x0018 #x0019 #x001A #x001B #x001C #x001D #x001E #x001F
|
||||
#| #o04x |# #x0020 #x0021 #x0022 #x0023 #x0024 #x0025 #x0026 #x0027
|
||||
#| #o05x |# #x0028 #x0029 #x002A #x002B #x002C #x002D #x002E #x002F
|
||||
#| #o06x |# #x0030 #x0031 #x0032 #x0033 #x0034 #x0035 #x0036 #x0037
|
||||
#| #o07x |# #x0038 #x0039 #x003A #x003B #x003C #x003D #x003E #x003F
|
||||
#| #o10x |# #x0040 #x0041 #x0042 #x0043 #x0044 #x0045 #x0046 #x0047
|
||||
#| #o11x |# #x0048 #x0049 #x004A #x004B #x004C #x004D #x004E #x004F
|
||||
#| #o12x |# #x0050 #x0051 #x0052 #x0053 #x0054 #x0055 #x0056 #x0057
|
||||
#| #o13x |# #x0058 #x0059 #x005A #x005B #x005C #x005D #x005E #x005F
|
||||
#| #o14x |# #x0060 #x0061 #x0062 #x0063 #x0064 #x0065 #x0066 #x0067
|
||||
#| #o15x |# #x0068 #x0069 #x006A #x006B #x006C #x006D #x006E #x006F
|
||||
#| #o16x |# #x0070 #x0071 #x0072 #x0073 #x0074 #x0075 #x0076 #x0077
|
||||
#| #o17x |# #x0078 #x0079 #x007A #x007B #x007C #x007D #x007E #x007F
|
||||
#| #o20x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF
|
||||
#| #o21x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF
|
||||
#| #o22x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF
|
||||
#| #o23x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF
|
||||
#| #o24x |# #x00A0 #x1E02 #x1E03 #x00A3 #x010A #x010B #x1E0A #x00A7
|
||||
#| #o25x |# #x1E80 #x00A9 #x1E82 #x1E0B #x1EF2 #x00AD #x00AE #x0178
|
||||
#| #o26x |# #x1E1E #x1E1F #x0120 #x0121 #x1E40 #x1E41 #x00B6 #x1E56
|
||||
#| #o27x |# #x1E81 #x1E57 #x1E83 #x1E60 #x1EF3 #x1E84 #x1E85 #x1E61
|
||||
#| #o30x |# #x00C0 #x00C1 #x00C2 #x00C3 #x00C4 #x00C5 #x00C6 #x00C7
|
||||
#| #o31x |# #x00C8 #x00C9 #x00CA #x00CB #x00CC #x00CD #x00CE #x00CF
|
||||
#| #o32x |# #x0174 #x00D1 #x00D2 #x00D3 #x00D4 #x00D5 #x00D6 #x1E6A
|
||||
#| #o33x |# #x00D8 #x00D9 #x00DA #x00DB #x00DC #x00DD #x0176 #x00DF
|
||||
#| #o34x |# #x00E0 #x00E1 #x00E2 #x00E3 #x00E4 #x00E5 #x00E6 #x00E7
|
||||
#| #o35x |# #x00E8 #x00E9 #x00EA #x00EB #x00EC #x00ED #x00EE #x00EF
|
||||
#| #o36x |# #x0175 #x00F1 #x00F2 #x00F3 #x00F4 #x00F5 #x00F6 #x1E6B
|
||||
#| #o37x |# #x00F8 #x00F9 #x00FA #x00FB #x00FC #x00FD #x0177 #x00FF)
|
||||
|
||||
(define-8-bit-charset :iso-8859-15
|
||||
#| #o00x |# #x0000 #x0001 #x0002 #x0003 #x0004 #x0005 #x0006 #x0007
|
||||
#| #o01x |# #x0008 #x0009 #x000A #x000B #x000C #x000A #x000E #x000F
|
||||
#| #o02x |# #x0010 #x0011 #x0012 #x0013 #x0014 #x0015 #x0016 #x0017
|
||||
#| #o03x |# #x0018 #x0019 #x001A #x001B #x001C #x001D #x001E #x001F
|
||||
#| #o04x |# #x0020 #x0021 #x0022 #x0023 #x0024 #x0025 #x0026 #x0027
|
||||
#| #o05x |# #x0028 #x0029 #x002A #x002B #x002C #x002D #x002E #x002F
|
||||
#| #o06x |# #x0030 #x0031 #x0032 #x0033 #x0034 #x0035 #x0036 #x0037
|
||||
#| #o07x |# #x0038 #x0039 #x003A #x003B #x003C #x003D #x003E #x003F
|
||||
#| #o10x |# #x0040 #x0041 #x0042 #x0043 #x0044 #x0045 #x0046 #x0047
|
||||
#| #o11x |# #x0048 #x0049 #x004A #x004B #x004C #x004D #x004E #x004F
|
||||
#| #o12x |# #x0050 #x0051 #x0052 #x0053 #x0054 #x0055 #x0056 #x0057
|
||||
#| #o13x |# #x0058 #x0059 #x005A #x005B #x005C #x005D #x005E #x005F
|
||||
#| #o14x |# #x0060 #x0061 #x0062 #x0063 #x0064 #x0065 #x0066 #x0067
|
||||
#| #o15x |# #x0068 #x0069 #x006A #x006B #x006C #x006D #x006E #x006F
|
||||
#| #o16x |# #x0070 #x0071 #x0072 #x0073 #x0074 #x0075 #x0076 #x0077
|
||||
#| #o17x |# #x0078 #x0079 #x007A #x007B #x007C #x007D #x007E #x007F
|
||||
#| #o20x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF
|
||||
#| #o21x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF
|
||||
#| #o22x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF
|
||||
#| #o23x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF
|
||||
#| #o24x |# #x00A0 #x00A1 #x00A2 #x00A3 #x20AC #x00A5 #x0160 #x00A7
|
||||
#| #o25x |# #x0161 #x00A9 #x00AA #x00AB #x00AC #x00AD #x00AE #x00AF
|
||||
#| #o26x |# #x00B0 #x00B1 #x00B2 #x00B3 #x017D #x00B5 #x00B6 #x00B7
|
||||
#| #o27x |# #x017E #x00B9 #x00BA #x00BB #x0152 #x0153 #x0178 #x00BF
|
||||
#| #o30x |# #x00C0 #x00C1 #x00C2 #x00C3 #x00C4 #x00C5 #x00C6 #x00C7
|
||||
#| #o31x |# #x00C8 #x00C9 #x00CA #x00CB #x00CC #x00CD #x00CE #x00CF
|
||||
#| #o32x |# #x00D0 #x00D1 #x00D2 #x00D3 #x00D4 #x00D5 #x00D6 #x00D7
|
||||
#| #o33x |# #x00D8 #x00D9 #x00DA #x00DB #x00DC #x00DD #x00DE #x00DF
|
||||
#| #o34x |# #x00E0 #x00E1 #x00E2 #x00E3 #x00E4 #x00E5 #x00E6 #x00E7
|
||||
#| #o35x |# #x00E8 #x00E9 #x00EA #x00EB #x00EC #x00ED #x00EE #x00EF
|
||||
#| #o36x |# #x00F0 #x00F1 #x00F2 #x00F3 #x00F4 #x00F5 #x00F6 #x00F7
|
||||
#| #o37x |# #x00F8 #x00F9 #x00FA #x00FB #x00FC #x00FD #x00FE #x00FF)
|
||||
|
||||
(define-8-bit-charset :koi8-r
|
||||
#| #o00x |# #x0000 #x0001 #x0002 #x0003 #x0004 #x0005 #x0006 #x0007
|
||||
#| #o01x |# #x0008 #x0009 #x000A #x000B #x000C #x000A #x000E #x000F
|
||||
#| #o02x |# #x0010 #x0011 #x0012 #x0013 #x0014 #x0015 #x0016 #x0017
|
||||
#| #o03x |# #x0018 #x0019 #x001A #x001B #x001C #x001D #x001E #x001F
|
||||
#| #o04x |# #x0020 #x0021 #x0022 #x0023 #x0024 #x0025 #x0026 #x0027
|
||||
#| #o05x |# #x0028 #x0029 #x002A #x002B #x002C #x002D #x002E #x002F
|
||||
#| #o06x |# #x0030 #x0031 #x0032 #x0033 #x0034 #x0035 #x0036 #x0037
|
||||
#| #o07x |# #x0038 #x0039 #x003A #x003B #x003C #x003D #x003E #x003F
|
||||
#| #o10x |# #x0040 #x0041 #x0042 #x0043 #x0044 #x0045 #x0046 #x0047
|
||||
#| #o11x |# #x0048 #x0049 #x004A #x004B #x004C #x004D #x004E #x004F
|
||||
#| #o12x |# #x0050 #x0051 #x0052 #x0053 #x0054 #x0055 #x0056 #x0057
|
||||
#| #o13x |# #x0058 #x0059 #x005A #x005B #x005C #x005D #x005E #x005F
|
||||
#| #o14x |# #x0060 #x0061 #x0062 #x0063 #x0064 #x0065 #x0066 #x0067
|
||||
#| #o15x |# #x0068 #x0069 #x006A #x006B #x006C #x006D #x006E #x006F
|
||||
#| #o16x |# #x0070 #x0071 #x0072 #x0073 #x0074 #x0075 #x0076 #x0077
|
||||
#| #o17x |# #x0078 #x0079 #x007A #x007B #x007C #x007D #x007E #x007F
|
||||
#| #o20x |# #x2500 #x2502 #x250C #x2510 #x2514 #x2518 #x251C #x2524
|
||||
#| #o21x |# #x252C #x2534 #x253C #x2580 #x2584 #x2588 #x258C #x2590
|
||||
#| #o22x |# #x2591 #x2592 #x2593 #x2320 #x25A0 #x2219 #x221A #x2248
|
||||
#| #o23x |# #x2264 #x2265 #x00A0 #x2321 #x00B0 #x00B2 #x00B7 #x00F7
|
||||
#| #o24x |# #x2550 #x2551 #x2552 #x0451 #x2553 #x2554 #x2555 #x2556
|
||||
#| #o25x |# #x2557 #x2558 #x2559 #x255A #x255B #x255C #x255D #x255E
|
||||
#| #o26x |# #x255F #x2560 #x2561 #x0401 #x2562 #x2563 #x2564 #x2565
|
||||
#| #o27x |# #x2566 #x2567 #x2568 #x2569 #x256A #x256B #x256C #x00A9
|
||||
#| #o30x |# #x044E #x0430 #x0431 #x0446 #x0434 #x0435 #x0444 #x0433
|
||||
#| #o31x |# #x0445 #x0438 #x0439 #x043A #x043B #x043C #x043D #x043E
|
||||
#| #o32x |# #x043F #x044F #x0440 #x0441 #x0442 #x0443 #x0436 #x0432
|
||||
#| #o33x |# #x044C #x044B #x0437 #x0448 #x044D #x0449 #x0447 #x044A
|
||||
#| #o34x |# #x042E #x0410 #x0411 #x0426 #x0414 #x0415 #x0424 #x0413
|
||||
#| #o35x |# #x0425 #x0418 #x0419 #x041A #x041B #x041C #x041D #x041E
|
||||
#| #o36x |# #x041F #x042F #x0420 #x0421 #x0422 #x0423 #x0416 #x0412
|
||||
#| #o37x |# #x042C #x042B #x0417 #x0428 #x042D #x0429 #x0427 #x042A)
|
||||
)
|
||||
|
||||
@ -1,396 +0,0 @@
|
||||
(in-package :runes-encoding)
|
||||
|
||||
(define-condition encoding-error (simple-error) ())
|
||||
|
||||
(defun xerror (fmt &rest args)
|
||||
(error 'encoding-error :format-control fmt :format-arguments args))
|
||||
|
||||
;;;; ---------------------------------------------------------------------------
|
||||
;;;; Encoding names
|
||||
;;;;
|
||||
|
||||
(defvar *names* (make-hash-table :test #'eq))
|
||||
|
||||
(defun canon-name (string)
|
||||
(with-output-to-string (bag)
|
||||
(map nil (lambda (ch)
|
||||
(cond ((char= ch #\_) (write-char #\- bag))
|
||||
(t (write-char (char-upcase ch) bag))))
|
||||
string)))
|
||||
|
||||
(defun canon-name-2 (string)
|
||||
(with-output-to-string (bag)
|
||||
(map nil (lambda (ch)
|
||||
(cond ((char= ch #\_))
|
||||
((char= ch #\-))
|
||||
(t (write-char (char-upcase ch) bag))))
|
||||
string)))
|
||||
|
||||
(defmethod encoding-names ((encoding symbol))
|
||||
(gethash encoding *names*))
|
||||
|
||||
(defmethod (setf encoding-names) (new-value (encoding symbol))
|
||||
(setf (gethash encoding *names*) new-value))
|
||||
|
||||
(defun add-name (encoding name)
|
||||
(pushnew (canon-name name) (encoding-names encoding) :test #'string=))
|
||||
|
||||
(defun resolve-name (string)
|
||||
(cond ((symbolp string)
|
||||
string)
|
||||
(t
|
||||
(setq string (canon-name string))
|
||||
(or
|
||||
(block nil
|
||||
(maphash (lambda (x y)
|
||||
(when (member string y :test #'string=)
|
||||
(return x)))
|
||||
*names*)
|
||||
nil)
|
||||
(block nil
|
||||
(maphash (lambda (x y)
|
||||
(when (member string y
|
||||
:test #'(lambda (x y)
|
||||
(string= (canon-name-2 x)
|
||||
(canon-name-2 y))))
|
||||
(return x)))
|
||||
*names*)
|
||||
nil)))))
|
||||
|
||||
;;;; ---------------------------------------------------------------------------
|
||||
;;;; Encodings
|
||||
;;;;
|
||||
|
||||
(defvar *encodings* (make-hash-table :test #'eq))
|
||||
|
||||
(defmacro define-encoding (name init-form)
|
||||
`(progn
|
||||
(setf (gethash ',name *encodings*)
|
||||
(list nil (lambda () ,init-form)))
|
||||
',name))
|
||||
|
||||
(defun find-encoding (name)
|
||||
(let ((x (gethash (resolve-name name) *encodings*)))
|
||||
(and x
|
||||
(or (first x)
|
||||
(setf (first x) (funcall (second x)))))))
|
||||
|
||||
(defclass encoding () ())
|
||||
|
||||
(defclass simple-8-bit-encoding (encoding)
|
||||
((table :initarg :table)))
|
||||
|
||||
(defun make-simple-8-bit-encoding (&key charset)
|
||||
(make-instance 'simple-8-bit-encoding
|
||||
:table (coerce (to-unicode-table charset) '(simple-array (unsigned-byte 16) (256)))))
|
||||
|
||||
;;;;;;;
|
||||
|
||||
(defmacro fx-op (op &rest xs)
|
||||
`(the fixnum (,op ,@(mapcar (lambda (x) `(the fixnum ,x)) xs))))
|
||||
(defmacro fx-pred (op &rest xs)
|
||||
`(,op ,@(mapcar (lambda (x) `(the fixnum ,x)) xs)))
|
||||
|
||||
(defmacro %+ (&rest xs) `(fx-op + ,@xs))
|
||||
(defmacro %- (&rest xs) `(fx-op - ,@xs))
|
||||
(defmacro %* (&rest xs) `(fx-op * ,@xs))
|
||||
(defmacro %/ (&rest xs) `(fx-op floor ,@xs))
|
||||
(defmacro %and (&rest xs) `(fx-op logand ,@xs))
|
||||
(defmacro %ior (&rest xs) `(fx-op logior ,@xs))
|
||||
(defmacro %xor (&rest xs) `(fx-op logxor ,@xs))
|
||||
(defmacro %ash (&rest xs) `(fx-op ash ,@xs))
|
||||
(defmacro %mod (&rest xs) `(fx-op mod ,@xs))
|
||||
|
||||
(defmacro %= (&rest xs) `(fx-pred = ,@xs))
|
||||
(defmacro %<= (&rest xs) `(fx-pred <= ,@xs))
|
||||
(defmacro %>= (&rest xs) `(fx-pred >= ,@xs))
|
||||
(defmacro %< (&rest xs) `(fx-pred < ,@xs))
|
||||
(defmacro %> (&rest xs) `(fx-pred > ,@xs))
|
||||
|
||||
;;; Decoders
|
||||
|
||||
;; The decoders share a common signature:
|
||||
;;
|
||||
;; DECODE input input-start input-end
|
||||
;; output output-start output-end
|
||||
;; eof-p
|
||||
;; -> first-not-written ; first-not-read
|
||||
;;
|
||||
;; These decode functions should decode as much characters off `input'
|
||||
;; into the `output' as possible and return the indexes to the first
|
||||
;; not read and first not written element of `input' and `output'
|
||||
;; respectively. If there are not enough bytes in `input' to decode a
|
||||
;; full character, decoding shold be abandomed; the caller has to
|
||||
;; ensure that the remaining bytes of `input' are passed to the
|
||||
;; decoder again with more bytes appended.
|
||||
;;
|
||||
;; `eof-p' now in turn indicates, if the given input sequence, is all
|
||||
;; the producer does have and might be used to produce error messages
|
||||
;; in case of incomplete codes or decided what to do.
|
||||
;;
|
||||
;; Decoders are expected to handle the various CR/NL conventions and
|
||||
;; canonicalize each end of line into a single NL rune (#xA) in good
|
||||
;; old Lisp tradition.
|
||||
;;
|
||||
|
||||
;; TODO: change this to an encoding class, which then might carry
|
||||
;; additional state. Stateless encodings could been represented by
|
||||
;; keywords. e.g.
|
||||
;;
|
||||
;; defmethod DECODE-SEQUENCE ((encoding (eql :utf-8)) ...)
|
||||
;;
|
||||
|
||||
(defmethod decode-sequence ((encoding (eql :utf-16-big-endian))
|
||||
in in-start in-end out out-start out-end eof?)
|
||||
;; -> new wptr, new rptr
|
||||
(let ((wptr out-start)
|
||||
(rptr in-start))
|
||||
(loop
|
||||
(when (%= wptr out-end)
|
||||
(return))
|
||||
(when (>= (%+ rptr 1) in-end)
|
||||
(return))
|
||||
(let ((hi (aref in rptr))
|
||||
(lo (aref in (%+ 1 rptr))))
|
||||
(setf rptr (%+ 2 rptr))
|
||||
;; FIXME: Wenn wir hier ein Surrogate sehen, muessen wir das naechste
|
||||
;; Zeichen abwarten und nachgucken, dass nicht etwa die andere
|
||||
;; Haelfte fehlt!
|
||||
(let ((x (logior (ash hi 8) lo)))
|
||||
(when (or (eql x #xFFFE) (eql x #xFFFF))
|
||||
(xerror "not a valid code point: #x~X" x))
|
||||
(setf (aref out wptr) x))
|
||||
(setf wptr (%+ 1 wptr))))
|
||||
(values wptr rptr)))
|
||||
|
||||
(defmethod decode-sequence ((encoding (eql :utf-16-little-endian))
|
||||
in in-start in-end out out-start out-end eof?)
|
||||
;; -> new wptr, new rptr
|
||||
(let ((wptr out-start)
|
||||
(rptr in-start))
|
||||
(loop
|
||||
(when (%= wptr out-end)
|
||||
(return))
|
||||
(when (>= (%+ rptr 1) in-end)
|
||||
(return))
|
||||
(let ((lo (aref in (%+ 0 rptr)))
|
||||
(hi (aref in (%+ 1 rptr))))
|
||||
(setf rptr (%+ 2 rptr))
|
||||
;; FIXME: Wenn wir hier ein Surrogate sehen, muessen wir das naechste
|
||||
;; Zeichen abwarten und nachgucken, dass nicht etwa die andere
|
||||
;; Haelfte fehlt!
|
||||
(let ((x (logior (ash hi 8) lo)))
|
||||
(when (or (eql x #xFFFE) (eql x #xFFFF))
|
||||
(xerror "not a valid code point: #x~X" x))
|
||||
(setf (aref out wptr) x))
|
||||
(setf wptr (%+ 1 wptr))))
|
||||
(values wptr rptr)))
|
||||
|
||||
(defmethod decode-sequence ((encoding (eql :utf-8))
|
||||
in in-start in-end out out-start out-end eof?)
|
||||
(declare (optimize (speed 3) (safety 0))
|
||||
(type (simple-array (unsigned-byte 8) (*)) in)
|
||||
(type (simple-array (unsigned-byte 16) (*)) out)
|
||||
(type fixnum in-start in-end out-start out-end))
|
||||
(let ((wptr out-start)
|
||||
(rptr in-start)
|
||||
byte0)
|
||||
(macrolet ((put (x)
|
||||
`((lambda (x)
|
||||
(when (or (<= #xD800 x #xDBFF)
|
||||
(<= #xDC00 x #xDFFF))
|
||||
(xerror "surrogate encoded in UTF-8: #x~X." x))
|
||||
(cond ((or (%> x #x10FFFF)
|
||||
(eql x #xFFFE)
|
||||
(eql x #xFFFF))
|
||||
(xerror "not a valid code point: #x~X" x))
|
||||
((%> x #xFFFF)
|
||||
(setf (aref out (%+ 0 wptr)) (%+ #xD7C0 (ash x -10))
|
||||
(aref out (%+ 1 wptr)) (%ior #xDC00 (%and x #x3FF)))
|
||||
(setf wptr (%+ wptr 2)))
|
||||
(t
|
||||
(setf (aref out wptr) x)
|
||||
(setf wptr (%+ wptr 1)))))
|
||||
,x))
|
||||
(put1 (x)
|
||||
`(progn
|
||||
(setf (aref out wptr) ,x)
|
||||
(setf wptr (%+ wptr 1)))))
|
||||
(loop
|
||||
(when (%= (+ wptr 1) out-end) (return))
|
||||
(when (%>= rptr in-end) (return))
|
||||
(setq byte0 (aref in rptr))
|
||||
(cond ((= byte0 #x0D)
|
||||
;; CR handling
|
||||
;; we need to know the following character
|
||||
(cond ((>= (%+ rptr 1) in-end)
|
||||
;; no characters in buffer
|
||||
(cond (eof?
|
||||
;; at EOF, pass it as NL
|
||||
(put #x0A)
|
||||
(setf rptr (%+ rptr 1)))
|
||||
(t
|
||||
;; demand more characters
|
||||
(return))))
|
||||
((= (aref in (%+ rptr 1)) #x0A)
|
||||
;; we see CR NL, so forget this CR and the next NL will be
|
||||
;; inserted literally
|
||||
(setf rptr (%+ rptr 1)))
|
||||
(t
|
||||
;; singleton CR, pass it as NL
|
||||
(put #x0A)
|
||||
(setf rptr (%+ rptr 1)))))
|
||||
|
||||
((%<= #|#b00000000|# byte0 #b01111111)
|
||||
(put1 byte0)
|
||||
(setf rptr (%+ rptr 1)))
|
||||
|
||||
((%<= #|#b10000000|# byte0 #b10111111)
|
||||
(xerror "Corrupted UTF-8 input (initial byte was #b~8,'0B)" byte0)
|
||||
(setf rptr (%+ rptr 1)))
|
||||
|
||||
((%<= #|#b11000000|# byte0 #b11011111)
|
||||
(cond ((<= (%+ rptr 2) in-end)
|
||||
(put
|
||||
(dpb (ldb (byte 5 0) byte0) (byte 5 6)
|
||||
(dpb (ldb (byte 6 0) (aref in (%+ rptr 1))) (byte 6 0)
|
||||
0)))
|
||||
(setf rptr (%+ rptr 2)))
|
||||
(t
|
||||
(return))))
|
||||
|
||||
((%<= #|#b11100000|# byte0 #b11101111)
|
||||
(cond ((<= (%+ rptr 3) in-end)
|
||||
(put
|
||||
(dpb (ldb (byte 4 0) byte0) (byte 4 12)
|
||||
(dpb (ldb (byte 6 0) (aref in (%+ 1 rptr))) (byte 6 6)
|
||||
(dpb (ldb (byte 6 0) (aref in (%+ 2 rptr))) (byte 6 0)
|
||||
0))))
|
||||
(setf rptr (%+ rptr 3)))
|
||||
(t
|
||||
(return))))
|
||||
|
||||
((%<= #|#b11110000|# byte0 #b11110111)
|
||||
(cond ((<= (%+ rptr 4) in-end)
|
||||
(put
|
||||
(dpb (ldb (byte 3 0) byte0) (byte 3 18)
|
||||
(dpb (ldb (byte 6 0) (aref in (%+ 1 rptr))) (byte 6 12)
|
||||
(dpb (ldb (byte 6 0) (aref in (%+ 2 rptr))) (byte 6 6)
|
||||
(dpb (ldb (byte 6 0) (aref in (%+ 3 rptr))) (byte 6 0)
|
||||
0)))))
|
||||
(setf rptr (%+ rptr 4)))
|
||||
(t
|
||||
(return))))
|
||||
|
||||
((%<= #|#b11111000|# byte0 #b11111011)
|
||||
(cond ((<= (%+ rptr 5) in-end)
|
||||
(put
|
||||
(dpb (ldb (byte 2 0) byte0) (byte 2 24)
|
||||
(dpb (ldb (byte 6 0) (aref in (%+ 1 rptr))) (byte 6 18)
|
||||
(dpb (ldb (byte 6 0) (aref in (%+ 2 rptr))) (byte 6 12)
|
||||
(dpb (ldb (byte 6 0) (aref in (%+ 3 rptr))) (byte 6 6)
|
||||
(dpb (ldb (byte 6 0) (aref in (%+ 4 rptr))) (byte 6 0)
|
||||
0))))))
|
||||
(setf rptr (%+ rptr 5)))
|
||||
(t
|
||||
(return))))
|
||||
|
||||
((%<= #|#b11111100|# byte0 #b11111101)
|
||||
(cond ((<= (%+ rptr 6) in-end)
|
||||
(put
|
||||
(dpb (ldb (byte 1 0) byte0) (byte 1 30)
|
||||
(dpb (ldb (byte 6 0) (aref in (%+ 1 rptr))) (byte 6 24)
|
||||
(dpb (ldb (byte 6 0) (aref in (%+ 2 rptr))) (byte 6 18)
|
||||
(dpb (ldb (byte 6 0) (aref in (%+ 3 rptr))) (byte 6 12)
|
||||
(dpb (ldb (byte 6 0) (aref in (%+ 4 rptr))) (byte 6 6)
|
||||
(dpb (ldb (byte 6 0) (aref in (%+ 5 rptr))) (byte 6 0)
|
||||
0)))))))
|
||||
(setf rptr (%+ rptr 6)))
|
||||
(t
|
||||
(return))))
|
||||
|
||||
(t
|
||||
(xerror "Corrupted UTF-8 input (initial byte was #b~8,'0B)" byte0)) ) ))
|
||||
(values wptr rptr)) )
|
||||
|
||||
(defmethod encoding-p ((object (eql :utf-16-little-endian))) t)
|
||||
(defmethod encoding-p ((object (eql :utf-16-big-endian))) t)
|
||||
(defmethod encoding-p ((object (eql :utf-8))) t)
|
||||
|
||||
(defmethod encoding-p ((object encoding)) t)
|
||||
|
||||
(defmethod decode-sequence ((encoding simple-8-bit-encoding)
|
||||
in in-start in-end
|
||||
out out-start out-end
|
||||
eof?)
|
||||
(declare (optimize (speed 3) (safety 0))
|
||||
(type (simple-array (unsigned-byte 8) (*)) in)
|
||||
(type (simple-array (unsigned-byte 16) (*)) out)
|
||||
(type fixnum in-start in-end out-start out-end))
|
||||
(let ((wptr out-start)
|
||||
(rptr in-start)
|
||||
(byte 0)
|
||||
(table (slot-value encoding 'table)))
|
||||
(declare (type fixnum wptr rptr)
|
||||
(type (unsigned-byte 8) byte)
|
||||
(type (simple-array (unsigned-byte 16) (*)) table))
|
||||
(loop
|
||||
(when (%= wptr out-end) (return))
|
||||
(when (%>= rptr in-end) (return))
|
||||
(setq byte (aref in rptr))
|
||||
(cond ((= byte #x0D)
|
||||
;; CR handling
|
||||
;; we need to know the following character
|
||||
(cond ((>= (%+ rptr 1) in-end)
|
||||
;; no characters in buffer
|
||||
(cond (eof?
|
||||
;; at EOF, pass it as NL
|
||||
(setf (aref out wptr) #x0A)
|
||||
(setf wptr (%+ wptr 1))
|
||||
(setf rptr (%+ rptr 1)))
|
||||
(t
|
||||
;; demand more characters
|
||||
(return))))
|
||||
((= (aref in (%+ rptr 1)) #x0A)
|
||||
;; we see CR NL, so forget this CR and the next NL will be
|
||||
;; inserted literally
|
||||
(setf rptr (%+ rptr 1)))
|
||||
(t
|
||||
;; singleton CR, pass it as NL
|
||||
(setf (aref out wptr) #x0A)
|
||||
(setf wptr (%+ wptr 1))
|
||||
(setf rptr (%+ rptr 1)))))
|
||||
|
||||
(t
|
||||
(setf (aref out wptr) (aref table byte))
|
||||
(setf wptr (%+ wptr 1))
|
||||
(setf rptr (%+ rptr 1))) ))
|
||||
(values wptr rptr)))
|
||||
|
||||
;;;; ---------------------------------------------------------------------------
|
||||
;;;; Character sets
|
||||
;;;;
|
||||
|
||||
(defvar *charsets* (make-hash-table :test #'eq))
|
||||
|
||||
(defclass 8-bit-charset ()
|
||||
((name :initarg :name)
|
||||
(to-unicode-table
|
||||
:initarg :to-unicode-table
|
||||
:reader to-unicode-table)))
|
||||
|
||||
(defmacro define-8-bit-charset (name &rest codes)
|
||||
(assert (= 256 (length codes)))
|
||||
`(progn
|
||||
(setf (gethash ',name *charsets*)
|
||||
(make-instance '8-bit-charset
|
||||
:name ',name
|
||||
:to-unicode-table
|
||||
',(make-array 256
|
||||
:element-type '(unsigned-byte 16)
|
||||
:initial-contents codes)))
|
||||
',name))
|
||||
|
||||
(defun find-charset (name)
|
||||
(or (gethash name *charsets*)
|
||||
(xerror "There is no character set named ~S." name)))
|
||||
@ -1,99 +0,0 @@
|
||||
;;; -*- Mode: Lisp; Syntax: Common-Lisp; Package: CL-USER; -*-
|
||||
;;; ---------------------------------------------------------------------------
|
||||
;;; Title: Generating a sane DEFPACKAGE for RUNES
|
||||
;;; Created: 1999-05-25
|
||||
;;; Author: Gilbert Baumann <unk6@rz.uni-karlsruhe.de>
|
||||
;;; ---------------------------------------------------------------------------
|
||||
;;; (c) copyright 1999,2000 by Gilbert Baumann
|
||||
|
||||
(in-package :cl-user)
|
||||
|
||||
(defpackage :runes
|
||||
(:use :cl #-scl :trivial-gray-streams)
|
||||
(:export #:definline
|
||||
|
||||
;; runes.lisp
|
||||
#:rune
|
||||
#:rod
|
||||
#:simple-rod
|
||||
#:%rune
|
||||
#:rod-capitalize
|
||||
#:code-rune
|
||||
#:rune-code
|
||||
#:rune-downcase
|
||||
#:rune-upcase
|
||||
#:rod-downcase
|
||||
#:rod-upcase
|
||||
#:white-space-rune-p
|
||||
#:digit-rune-p
|
||||
#:rune=
|
||||
#:rune<=
|
||||
#:rune>=
|
||||
#:rune-equal
|
||||
#:runep
|
||||
#:sloopy-rod-p
|
||||
#:rod=
|
||||
#:rod-equal
|
||||
#:make-rod
|
||||
#:char-rune
|
||||
#:rune-char
|
||||
#:rod-string
|
||||
#:string-rod
|
||||
#:rod-subseq
|
||||
#:rod<
|
||||
|
||||
;; xstream.lisp
|
||||
#:xstream
|
||||
#:make-xstream
|
||||
#:make-rod-xstream
|
||||
#:close-xstream
|
||||
#:xstream-p
|
||||
#:read-rune
|
||||
#:peek-rune
|
||||
#:fread-rune
|
||||
#:fpeek-rune
|
||||
#:consume-rune
|
||||
#:unread-rune
|
||||
#:xstream-position
|
||||
#:xstream-line-number
|
||||
#:xstream-column-number
|
||||
#:xstream-plist
|
||||
#:xstream-encoding
|
||||
#:set-to-full-speed
|
||||
#:xstream-name
|
||||
|
||||
;; ystream.lisp
|
||||
#:ystream
|
||||
#:close-ystream
|
||||
#:write-rune
|
||||
#:write-rod
|
||||
#:ystream-column
|
||||
#:make-octet-vector-ystream
|
||||
#:make-octet-stream-ystream
|
||||
#:make-rod-ystream
|
||||
#+rune-is-character #:make-character-stream-ystream
|
||||
;; These don't make too much sense on Unicode-enabled,
|
||||
;; implementations but for those applications using them anyway,
|
||||
;; I have commented out the reader conditionals now:
|
||||
;; #+rune-is-integer
|
||||
#:make-string-ystream/utf8
|
||||
;; #+rune-is-integer
|
||||
#:make-character-stream-ystream/utf8
|
||||
#:runes-to-utf8/adjustable-string
|
||||
|
||||
#:rod-to-utf8-string
|
||||
#:utf8-string-to-rod
|
||||
#:make-octet-input-stream))
|
||||
|
||||
(defpackage :utf8-runes
|
||||
(:use :cl)
|
||||
(:export *utf8-runes-readtable*
|
||||
#:rune #:rod #:simple-rod #:rod-string #:rod= #:make-rod
|
||||
#:string-rod))
|
||||
|
||||
(defpackage :runes-encoding
|
||||
(:use :cl :runes)
|
||||
(:export
|
||||
#:encoding-error
|
||||
#:find-encoding
|
||||
#:decode-sequence))
|
||||
230
runes/runes.lisp
230
runes/runes.lisp
@ -1,230 +0,0 @@
|
||||
;;; -*- Mode: Lisp; Syntax: Common-Lisp; Package: RUNES; -*-
|
||||
;;; ---------------------------------------------------------------------------
|
||||
;;; Title: Unicode strings (called RODs)
|
||||
;;; Created: 1999-05-25 22:29
|
||||
;;; Author: Gilbert Baumann <unk6@rz.uni-karlsruhe.de>
|
||||
;;; License: Lisp-LGPL (See file COPYING for details).
|
||||
;;; ---------------------------------------------------------------------------
|
||||
;;; (c) copyright 1998,1999 by Gilbert Baumann
|
||||
|
||||
;;; This code is free software; you can redistribute it and/or modify it
|
||||
;;; under the terms of the version 2.1 of the GNU Lesser General Public
|
||||
;;; License as published by the Free Software Foundation, as clarified
|
||||
;;; by the "Preamble to the Gnu Lesser General Public License" found in
|
||||
;;; the file COPYING.
|
||||
;;;
|
||||
;;; This code is distributed in the hope that it will be useful,
|
||||
;;; but without any warranty; without even the implied warranty of
|
||||
;;; merchantability or fitness for a particular purpose. See the GNU
|
||||
;;; Lesser General Public License for more details.
|
||||
;;;
|
||||
;;; Version 2.1 of the GNU Lesser General Public License is in the file
|
||||
;;; COPYING that was distributed with this file. If it is not present,
|
||||
;;; you can access it from http://www.gnu.org/copyleft/lesser.txt (until
|
||||
;;; superseded by a newer version) or write to the Free Software
|
||||
;;; Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
|
||||
;; Changes
|
||||
;;
|
||||
;; When Who What
|
||||
;; ----------------------------------------------------------------------------
|
||||
;; 1999-08-15 GB - ROD=, ROD-EQUAL
|
||||
;; RUNE<=, RUNE>=
|
||||
;; MAKE-ROD, ROD-SUBSEQ
|
||||
;; CHAR-RUNE, RUNE-CHAR, ROD-STRING, STRING-ROD
|
||||
;; new functions
|
||||
;; - Added rune reader
|
||||
;;
|
||||
|
||||
(in-package :runes)
|
||||
|
||||
(deftype rune () '(unsigned-byte 16))
|
||||
(deftype rod () '(array rune (*)))
|
||||
(deftype simple-rod () '(simple-array rune (*)))
|
||||
|
||||
(definline rune (rod index)
|
||||
(aref rod index))
|
||||
|
||||
(defun (setf rune) (new rod index)
|
||||
(setf (aref rod index) new))
|
||||
|
||||
(definline %rune (rod index)
|
||||
(aref (the (simple-array (unsigned-byte 16) (*)) rod) (the fixnum index)))
|
||||
|
||||
(definline (setf %rune) (new rod index)
|
||||
(setf (aref (the (simple-array (unsigned-byte 16) (*)) rod) (the fixnum index)) new))
|
||||
|
||||
(defun rod-capitalize (rod)
|
||||
(warn "~S is not implemented." 'rod-capitalize)
|
||||
rod)
|
||||
|
||||
(definline code-rune (x) x)
|
||||
(definline rune-code (x) x)
|
||||
|
||||
(definline rune= (x y)
|
||||
(= x y))
|
||||
|
||||
(defun rune-downcase (rune)
|
||||
(cond ((<= #x0041 rune #x005a) (+ rune #x20))
|
||||
((= rune #x00d7) rune)
|
||||
((<= #x00c0 rune #x00de) (+ rune #x20))
|
||||
(t rune)))
|
||||
|
||||
(definline rune-upcase (rune)
|
||||
(cond ((<= #x0061 rune #x007a) (- rune #x20))
|
||||
((= rune #x00f7) rune)
|
||||
((<= #x00e0 rune #x00fe) (- rune #x20))
|
||||
(t rune)))
|
||||
|
||||
(defun rune-upper-case-letter-p (rune)
|
||||
(or (<= #x0041 rune #x005a) (<= #x00c0 rune #x00de)))
|
||||
|
||||
(defun rune-lower-case-letter-p (rune)
|
||||
(or (<= #x0061 rune #x007a) (<= #x00e0 rune #x00fe)
|
||||
(= rune #x00d7)))
|
||||
|
||||
|
||||
(defun rune-equal (x y)
|
||||
(rune= (rune-upcase x) (rune-upcase y)))
|
||||
|
||||
(defun rod-downcase (rod)
|
||||
;; FIXME
|
||||
(map '(simple-array (unsigned-byte 16) (*)) #'rune-downcase rod))
|
||||
|
||||
(defun rod-upcase (rod)
|
||||
;; FIXME
|
||||
(map '(simple-array (unsigned-byte 16) (*)) #'rune-upcase rod))
|
||||
|
||||
(definline white-space-rune-p (char)
|
||||
(or (= char 9) ;TAB
|
||||
(= char 10) ;Linefeed
|
||||
(= char 13) ;Carriage Return
|
||||
(= char 32))) ;Space
|
||||
|
||||
(definline digit-rune-p (char &optional (radix 10))
|
||||
(cond ((<= #.(char-code #\0) char #.(char-code #\9))
|
||||
(and (< (- char #.(char-code #\0)) radix)
|
||||
(- char #.(char-code #\0))))
|
||||
((<= #.(char-code #\A) char #.(char-code #\Z))
|
||||
(and (< (- char #.(char-code #\A) -10) radix)
|
||||
(- char #.(char-code #\A) -10)))
|
||||
((<= #.(char-code #\a) char #.(char-code #\z))
|
||||
(and (< (- char #.(char-code #\a) -10) radix)
|
||||
(- char #.(char-code #\a) -10))) ))
|
||||
|
||||
(defun rod (x)
|
||||
(cond ((stringp x) (map 'rod #'char-code x))
|
||||
((symbolp x) (rod (string x)))
|
||||
((characterp x) (rod (string x)))
|
||||
((vectorp x) (coerce x 'rod))
|
||||
((integerp x) (map 'rod #'identity (list x)))
|
||||
(t (error "Cannot convert ~S to a ~S" x 'rod))))
|
||||
|
||||
(defun runep (x)
|
||||
(and (integerp x)
|
||||
(<= 0 x #xFFFF)))
|
||||
|
||||
(defun sloopy-rod-p (x)
|
||||
(and (not (stringp x))
|
||||
(vectorp x)
|
||||
(every #'runep x)))
|
||||
|
||||
(defun rod= (x y)
|
||||
(and (= (length x) (length y))
|
||||
(dotimes (i (length x) t)
|
||||
(unless (rune= (rune x i) (rune y i))
|
||||
(return nil)))))
|
||||
|
||||
(defun rod-equal (x y)
|
||||
(and (= (length x) (length y))
|
||||
(dotimes (i (length x) t)
|
||||
(unless (rune-equal (rune x i) (rune y i))
|
||||
(return nil)))))
|
||||
|
||||
(definline make-rod (size)
|
||||
(make-array size :element-type 'rune))
|
||||
|
||||
(defun char-rune (char)
|
||||
(code-rune (char-code char)))
|
||||
|
||||
(defparameter *invalid-rune* nil ;;#\?
|
||||
"Rune to use as a replacement in RUNE-CHAR and ROD-STRING for runes not
|
||||
representable as characters. If NIL, an error is signalled instead.")
|
||||
|
||||
(defun rune-char (rune &optional (default *invalid-rune*))
|
||||
(or (if (>= rune char-code-limit)
|
||||
default
|
||||
(or (code-char rune) default))
|
||||
(error "rune cannot be represented as a character: ~A" rune)))
|
||||
|
||||
(defun rod-string (rod &optional (default-char *invalid-rune*))
|
||||
(map 'string (lambda (x) (rune-char x default-char)) rod))
|
||||
|
||||
(defun string-rod (string)
|
||||
(let* ((n (length string))
|
||||
(res (make-rod n)))
|
||||
(dotimes (i n)
|
||||
(setf (%rune res i) (char-rune (char string i))))
|
||||
res))
|
||||
|
||||
;;;;
|
||||
|
||||
(defun rune<= (rune &rest more-runes)
|
||||
(apply #'<= rune more-runes))
|
||||
|
||||
(defun rune>= (rune &rest more-runes)
|
||||
(apply #'>= rune more-runes))
|
||||
|
||||
(defun rodp (object)
|
||||
(typep object 'rod))
|
||||
|
||||
(defun rod-subseq (source start &optional (end (length source)))
|
||||
(unless (rodp source)
|
||||
(error "~S is not of type ~S." source 'rod))
|
||||
(unless (and (typep start 'fixnum) (>= start 0))
|
||||
(error "~S is not a non-negative fixnum." start))
|
||||
(unless (and (typep end 'fixnum) (>= end start))
|
||||
(error "END argument, ~S, is not a fixnum no less than START, ~S." end start))
|
||||
(when (> start (length source))
|
||||
(error "START argument, ~S, should be no greater than length of rod." start))
|
||||
(when (> end (length source))
|
||||
(error "END argument, ~S, should be no greater than length of rod." end))
|
||||
(locally
|
||||
(declare (type rod source)
|
||||
(type fixnum start end))
|
||||
(let ((res (make-rod (- end start))))
|
||||
(declare (type rod res))
|
||||
(do ((i (- (- end start) 1) (the fixnum (- i 1))))
|
||||
((< i 0) res)
|
||||
(declare (type fixnum i))
|
||||
(setf (%rune res i) (%rune source (the fixnum (+ i start))))))))
|
||||
|
||||
(defun rod-subseq* (source start &optional (end (length source)))
|
||||
(unless (and (typep start 'fixnum) (>= start 0))
|
||||
(error "~S is not a non-negative fixnum." start))
|
||||
(unless (and (typep end 'fixnum) (>= end start))
|
||||
(error "END argument, ~S, is not a fixnum no less than START, ~S." end start))
|
||||
(when (> start (length source))
|
||||
(error "START argument, ~S, should be no greater than length of rod." start))
|
||||
(when (> end (length source))
|
||||
(error "END argument, ~S, should be no greater than length of rod." end))
|
||||
(locally
|
||||
(declare (type fixnum start end))
|
||||
(let ((res (make-rod (- end start))))
|
||||
(declare (type rod res))
|
||||
(do ((i (- (- end start) 1) (the fixnum (- i 1))))
|
||||
((< i 0) res)
|
||||
(declare (type fixnum i))
|
||||
(setf (%rune res i) (aref source (the fixnum (+ i start))))))))
|
||||
|
||||
(defun rod< (rod1 rod2)
|
||||
(do ((i 0 (+ i 1)))
|
||||
(nil)
|
||||
(cond ((= i (length rod1))
|
||||
(return t))
|
||||
((= i (length rod2))
|
||||
(return nil))
|
||||
((< (aref rod1 i) (aref rod2 i))
|
||||
(return t))
|
||||
((> (aref rod1 i) (aref rod2 i))
|
||||
(return nil)))))
|
||||
@ -1,253 +0,0 @@
|
||||
;;; -*- Mode: Lisp; Syntax: Common-Lisp; readtable: runes; Encoding: utf-8; -*-
|
||||
;;; ---------------------------------------------------------------------------
|
||||
;;; Title: Fast streams
|
||||
;;; Created: 1999-07-17
|
||||
;;; Author: Douglas Crosher
|
||||
;;; License: Lisp-LGPL (See file COPYING for details).
|
||||
;;; ---------------------------------------------------------------------------
|
||||
;;; (c) copyright 2007 by Douglas Crosher
|
||||
|
||||
;;; This library is free software; you can redistribute it and/or
|
||||
;;; modify it under the terms of the GNU Library General Public
|
||||
;;; License as published by the Free Software Foundation; either
|
||||
;;; version 2 of the License, or (at your option) any later version.
|
||||
;;;
|
||||
;;; This library is distributed in the hope that it will be useful,
|
||||
;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
;;; Library General Public License for more details.
|
||||
;;;
|
||||
;;; You should have received a copy of the GNU Library General Public
|
||||
;;; License along with this library; if not, write to the
|
||||
;;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
||||
;;; Boston, MA 02111-1307 USA.
|
||||
|
||||
(in-package :runes)
|
||||
|
||||
(eval-when (:compile-toplevel :load-toplevel :execute)
|
||||
(defparameter *fast* '(optimize (speed 3) (safety 3))))
|
||||
|
||||
(deftype runes-encoding:encoding-error ()
|
||||
'ext:character-conversion-error)
|
||||
|
||||
|
||||
;;; xstream
|
||||
|
||||
(eval-when (:compile-toplevel :load-toplevel :execute)
|
||||
|
||||
(defclass xstream (ext:character-stream)
|
||||
((name :initarg :name :initform nil
|
||||
:accessor xstream-name)
|
||||
(column :initarg :column :initform 0)
|
||||
(line :initarg :line :initform 1)
|
||||
(unread-column :initarg :unread-column :initform 0)))
|
||||
|
||||
(defclass eol-conversion-xstream (lisp::eol-conversion-input-stream xstream)
|
||||
())
|
||||
|
||||
) ; eval-when
|
||||
|
||||
(defun make-eol-conversion-xstream (source-stream)
|
||||
"Returns a character stream that conversion CR-LF pairs and lone CR
|
||||
characters into single linefeed character."
|
||||
(declare (type stream source-stream))
|
||||
(let ((stream (ext:make-eol-conversion-stream source-stream
|
||||
:input t
|
||||
:close-stream-p t)))
|
||||
(change-class stream 'eol-conversion-xstream)))
|
||||
|
||||
(definline xstream-p (stream)
|
||||
(typep stream 'xstream))
|
||||
|
||||
(defun close-xstream (input)
|
||||
(close input))
|
||||
|
||||
(definline read-rune (input)
|
||||
(declare (type stream input)
|
||||
(inline read-char)
|
||||
#.*fast*)
|
||||
(let ((char (read-char input nil :eof)))
|
||||
(cond ((member char '(#\UFFFE #\UFFFF))
|
||||
;; These characters are illegal within XML documents.
|
||||
(simple-error 'ext:character-conversion-error
|
||||
"~@<Illegal XML document character: ~S~:@>" char))
|
||||
((eql char #\linefeed)
|
||||
(setf (slot-value input 'unread-column) (slot-value input 'column))
|
||||
(setf (slot-value input 'column) 0)
|
||||
(incf (the kernel:index (slot-value input 'line))))
|
||||
(t
|
||||
(incf (the kernel:index (slot-value input 'column)))))
|
||||
char))
|
||||
|
||||
(definline peek-rune (input)
|
||||
(declare (type stream input)
|
||||
(inline peek-char)
|
||||
#.*fast*)
|
||||
(peek-char nil input nil :eof))
|
||||
|
||||
(definline consume-rune (input)
|
||||
(declare (type stream input)
|
||||
(inline read-rune)
|
||||
#.*fast*)
|
||||
(read-rune input)
|
||||
nil)
|
||||
|
||||
(definline unread-rune (rune input)
|
||||
(declare (type stream input)
|
||||
(inline unread-char)
|
||||
#.*fast*)
|
||||
(unread-char rune input)
|
||||
(cond ((eql rune #\linefeed)
|
||||
(setf (slot-value input 'column) (slot-value input 'unread-column))
|
||||
(setf (slot-value input 'unread-column) 0)
|
||||
(decf (the kernel:index (slot-value input 'line))))
|
||||
(t
|
||||
(decf (the kernel:index (slot-value input 'column)))))
|
||||
nil)
|
||||
|
||||
(defun fread-rune (input)
|
||||
(read-rune input))
|
||||
|
||||
(defun fpeek-rune (input)
|
||||
(peek-rune input))
|
||||
|
||||
(defun xstream-position (input)
|
||||
(file-position input))
|
||||
|
||||
(defun runes-encoding:find-encoding (encoding)
|
||||
encoding)
|
||||
|
||||
(defun make-xstream (os-stream &key name
|
||||
(speed 8192)
|
||||
(initial-speed 1)
|
||||
(initial-encoding :guess))
|
||||
(declare (ignore speed))
|
||||
(assert (eql initial-speed 1))
|
||||
(assert (eq initial-encoding :guess))
|
||||
(let* ((stream (ext:make-xml-character-conversion-stream os-stream
|
||||
:input t
|
||||
:close-stream-p t))
|
||||
(xstream (make-eol-conversion-xstream stream)))
|
||||
(setf (xstream-name xstream) name)
|
||||
xstream))
|
||||
|
||||
|
||||
(defclass xstream-string-input-stream (lisp::string-input-stream xstream)
|
||||
())
|
||||
|
||||
(defun make-rod-xstream (string &key name)
|
||||
(declare (type string string))
|
||||
(let ((stream (make-string-input-stream string)))
|
||||
(change-class stream 'xstream-string-input-stream :name name)))
|
||||
|
||||
;;; already at 'full speed' so just return the buffer size.
|
||||
(defun set-to-full-speed (stream)
|
||||
(length (ext:stream-in-buffer stream)))
|
||||
|
||||
(defun xstream-speed (stream)
|
||||
(length (ext:stream-in-buffer stream)))
|
||||
|
||||
(defun xstream-line-number (stream)
|
||||
(slot-value stream 'line))
|
||||
|
||||
(defun xstream-column-number (stream)
|
||||
(slot-value stream 'column))
|
||||
|
||||
(defun xstream-encoding (stream)
|
||||
(stream-external-format stream))
|
||||
|
||||
;;; the encoding will have already been detected, but it is checked against the
|
||||
;;; declared encoding here.
|
||||
(defun (setf xstream-encoding) (declared-encoding stream)
|
||||
(let* ((initial-encoding (xstream-encoding stream))
|
||||
(canonical-encoding
|
||||
(cond ((and (eq initial-encoding :utf-16le)
|
||||
(member declared-encoding '(:utf-16 :utf16 :utf-16le :utf16le)
|
||||
:test 'string-equal))
|
||||
:utf-16le)
|
||||
((and (eq initial-encoding :utf-16be)
|
||||
(member declared-encoding '(:utf-16 :utf16 :utf-16be :utf16be)
|
||||
:test 'string-equal))
|
||||
:utf-16be)
|
||||
((and (eq initial-encoding :ucs-4be)
|
||||
(member declared-encoding '(:ucs-4 :ucs4 :ucs-4be :ucs4be)
|
||||
:test 'string-equal))
|
||||
:ucs4-be)
|
||||
((and (eq initial-encoding :ucs-4le)
|
||||
(member declared-encoding '(:ucs-4 :ucs4 :ucs-4le :ucs4le)
|
||||
:test 'string-equal))
|
||||
:ucs4-le)
|
||||
(t
|
||||
declared-encoding))))
|
||||
(unless (string-equal initial-encoding canonical-encoding)
|
||||
(warn "Unable to change xstream encoding from ~S to ~S (~S)~%"
|
||||
initial-encoding declared-encoding canonical-encoding))
|
||||
declared-encoding))
|
||||
|
||||
|
||||
;;; ystream - a run output stream.
|
||||
|
||||
(deftype ystream () 'stream)
|
||||
|
||||
(defun ystream-column (stream)
|
||||
(ext:line-column stream))
|
||||
|
||||
(definline write-rune (rune stream)
|
||||
(declare (inline write-char))
|
||||
(write-char rune stream))
|
||||
|
||||
(defun write-rod (rod stream)
|
||||
(declare (type rod rod)
|
||||
(type stream stream))
|
||||
(write-string rod stream))
|
||||
|
||||
(defun make-rod-ystream ()
|
||||
(make-string-output-stream))
|
||||
|
||||
(defun close-ystream (stream)
|
||||
(etypecase stream
|
||||
(ext:string-output-stream
|
||||
(get-output-stream-string stream))
|
||||
(ext:character-conversion-output-stream
|
||||
(let ((target (slot-value stream 'stream)))
|
||||
(close stream)
|
||||
(if (typep target 'ext:byte-output-stream)
|
||||
(ext:get-output-stream-bytes target)
|
||||
stream)))))
|
||||
|
||||
;;;; CHARACTER-STREAM-YSTREAM
|
||||
|
||||
(defun make-character-stream-ystream (target-stream)
|
||||
target-stream)
|
||||
|
||||
|
||||
;;;; OCTET-VECTOR-YSTREAM
|
||||
|
||||
(defun make-octet-vector-ystream ()
|
||||
(let ((target (ext:make-byte-output-stream)))
|
||||
(ext:make-character-conversion-stream target :output t
|
||||
:external-format :utf-8
|
||||
:close-stream-p t)))
|
||||
|
||||
;;;; OCTET-STREAM-YSTREAM
|
||||
|
||||
(defun make-octet-stream-ystream (os-stream)
|
||||
(ext:make-character-conversion-stream os-stream :output t
|
||||
:external-format :utf-8
|
||||
:close-stream-p t))
|
||||
|
||||
|
||||
;;;; helper functions
|
||||
|
||||
(defun rod-to-utf8-string (rod)
|
||||
(ext:make-string-from-bytes (ext:make-bytes-from-string rod :utf8)
|
||||
:iso-8859-1))
|
||||
|
||||
(defun utf8-string-to-rod (str)
|
||||
(let ((bytes (map '(vector (unsigned-byte 8)) #'char-code str)))
|
||||
(ext:make-string-from-bytes bytes :utf-8)))
|
||||
|
||||
(defun make-octet-input-stream (octets)
|
||||
(ext:make-byte-input-stream octets))
|
||||
|
||||
|
||||
@ -1,181 +0,0 @@
|
||||
;;; -*- Mode: Lisp; Syntax: Common-Lisp; Package: RUNES; -*-
|
||||
;;; ---------------------------------------------------------------------------
|
||||
;;; Title: Unicode strings (called RODs)
|
||||
;;; Created: 1999-05-25 22:29
|
||||
;;; Author: Gilbert Baumann <unk6@rz.uni-karlsruhe.de>
|
||||
;;; License: Lisp-LGPL (See file COPYING for details).
|
||||
;;; ---------------------------------------------------------------------------
|
||||
;;; (c) copyright 1998,1999 by Gilbert Baumann
|
||||
|
||||
;;; This code is free software; you can redistribute it and/or modify it
|
||||
;;; under the terms of the version 2.1 of the GNU Lesser General Public
|
||||
;;; License as published by the Free Software Foundation, as clarified
|
||||
;;; by the "Preamble to the Gnu Lesser General Public License" found in
|
||||
;;; the file COPYING.
|
||||
;;;
|
||||
;;; This code is distributed in the hope that it will be useful,
|
||||
;;; but without any warranty; without even the implied warranty of
|
||||
;;; merchantability or fitness for a particular purpose. See the GNU
|
||||
;;; Lesser General Public License for more details.
|
||||
;;;
|
||||
;;; Version 2.1 of the GNU Lesser General Public License is in the file
|
||||
;;; COPYING that was distributed with this file. If it is not present,
|
||||
;;; you can access it from http://www.gnu.org/copyleft/lesser.txt (until
|
||||
;;; superseded by a newer version) or write to the Free Software
|
||||
;;; Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
|
||||
;; Changes
|
||||
;;
|
||||
;; When Who What
|
||||
;; ----------------------------------------------------------------------------
|
||||
;; 1999-08-15 GB - ROD=, ROD-EQUAL
|
||||
;; RUNE<=, RUNE>=
|
||||
;; MAKE-ROD, ROD-SUBSEQ
|
||||
;; CHAR-RUNE, RUNE-CHAR, ROD-STRING, STRING-ROD
|
||||
;; new functions
|
||||
;; - Added rune reader
|
||||
;;
|
||||
|
||||
(in-package :runes)
|
||||
|
||||
;;;;
|
||||
;;;; RUNE Reader
|
||||
;;;;
|
||||
|
||||
;; Portable implementation of WHITE-SPACE-P with regard to the current
|
||||
;; read table -- this is bit tricky.
|
||||
|
||||
(defun rt-white-space-p (char)
|
||||
(let ((stream (make-string-input-stream (string char))))
|
||||
(eq :eof (peek-char t stream nil :eof))))
|
||||
|
||||
(defun read-rune-name (input)
|
||||
;; the first char is unconditionally read
|
||||
(let ((char0 (read-char input t nil t)))
|
||||
(when (char= char0 #\\)
|
||||
(setf char0 (read-char input t nil t)))
|
||||
(with-output-to-string (res)
|
||||
(write-char char0 res)
|
||||
(do ((ch (peek-char nil input nil :eof t) (peek-char nil input nil :eof t)))
|
||||
((or (eq ch :eof)
|
||||
(rt-white-space-p ch)
|
||||
(multiple-value-bind (function non-terminating-p) (get-macro-character ch)
|
||||
(and function (not non-terminating-p)))))
|
||||
(write-char ch res)
|
||||
(read-char input))))) ;consume this character
|
||||
|
||||
(defun iso-10646-char-code (char)
|
||||
(char-code char))
|
||||
|
||||
(defvar *rune-names* (make-hash-table :test #'equal)
|
||||
"Hashtable, which maps all known rune names to rune codes;
|
||||
Names are stored in uppercase.")
|
||||
|
||||
(defun define-rune-name (name code)
|
||||
(setf (gethash (string-upcase name) *rune-names*) code)
|
||||
name)
|
||||
|
||||
(defun lookup-rune-name (name)
|
||||
(gethash (string-upcase name) *rune-names*))
|
||||
|
||||
(define-rune-name "null" #x0000)
|
||||
(define-rune-name "space" #x0020)
|
||||
(define-rune-name "newline" #x000A)
|
||||
(define-rune-name "return" #x000D)
|
||||
(define-rune-name "tab" #x0009)
|
||||
(define-rune-name "page" #x000C)
|
||||
|
||||
;; and just for fun:
|
||||
(define-rune-name "euro" #x20AC)
|
||||
|
||||
;; ASCII control characters
|
||||
(define-rune-name "nul" #x0000) ;null
|
||||
(define-rune-name "soh" #x0001) ;start of header
|
||||
(define-rune-name "stx" #x0002) ;start of text
|
||||
(define-rune-name "etx" #x0003) ;end of text
|
||||
(define-rune-name "eot" #x0004) ;end of transmission
|
||||
(define-rune-name "enq" #x0005) ;
|
||||
(define-rune-name "ack" #x0006) ;acknowledge
|
||||
(define-rune-name "bel" #x0007) ;bell
|
||||
(define-rune-name "bs" #x0008) ;backspace
|
||||
(define-rune-name "ht" #x0009) ;horizontal tab
|
||||
(define-rune-name "lf" #X000A) ;line feed, new line
|
||||
(define-rune-name "vt" #X000B) ;vertical tab
|
||||
(define-rune-name "ff" #x000C) ;form feed
|
||||
(define-rune-name "cr" #x000D) ;carriage return
|
||||
(define-rune-name "so" #x000E) ;shift out
|
||||
(define-rune-name "si" #x000F) ;shift in
|
||||
(define-rune-name "dle" #x0010) ;device latch enable ?
|
||||
(define-rune-name "dc1" #x0011) ;device control 1
|
||||
(define-rune-name "dc2" #x0012) ;device control 2
|
||||
(define-rune-name "dc3" #x0013) ;device control 3
|
||||
(define-rune-name "dc4" #x0014) ;device control 4
|
||||
(define-rune-name "nak" #x0015) ;negative acknowledge
|
||||
(define-rune-name "syn" #x0016) ;
|
||||
(define-rune-name "etb" #x0017) ;
|
||||
(define-rune-name "can" #x0018) ;
|
||||
(define-rune-name "em" #x0019) ;end of message
|
||||
(define-rune-name "sub" #x001A) ;
|
||||
(define-rune-name "esc" #x001B) ;escape
|
||||
(define-rune-name "fs" #x001C) ;field separator ?
|
||||
(define-rune-name "gs" #x001D) ;group separator
|
||||
(define-rune-name "rs" #x001E) ;
|
||||
(define-rune-name "us" #x001F) ;
|
||||
|
||||
(define-rune-name "del" #x007F) ;delete
|
||||
|
||||
;; iso-latin
|
||||
(define-rune-name "nbsp" #x00A0) ;non breakable space
|
||||
(define-rune-name "shy" #x00AD) ;soft hyphen
|
||||
|
||||
(defun rune-from-read-name (name)
|
||||
(code-rune
|
||||
(cond ((= (length name) 1)
|
||||
(iso-10646-char-code (char name 0)))
|
||||
((and (= (length name) 2)
|
||||
(char= (char name 0) #\\))
|
||||
(iso-10646-char-code (char name 1)))
|
||||
((and (>= (length name) 3)
|
||||
(char-equal (char name 0) #\u)
|
||||
(char-equal (char name 1) #\+)
|
||||
(every (lambda (x) (digit-char-p x 16)) (subseq name 2)))
|
||||
(parse-integer name :start 2 :radix 16))
|
||||
((lookup-rune-name name))
|
||||
(t
|
||||
(error "Meaningless rune name ~S." name)))))
|
||||
|
||||
(defun rune-reader (stream subchar arg)
|
||||
subchar arg
|
||||
(values (rune-from-read-name (read-rune-name stream))))
|
||||
|
||||
(set-dispatch-macro-character #\# #\/ 'rune-reader)
|
||||
|
||||
;;; ROD ext syntax
|
||||
|
||||
(defun rod-reader (stream subchar arg)
|
||||
(declare (ignore arg))
|
||||
(rod
|
||||
(with-output-to-string (bag)
|
||||
(do ((c (read-char stream t nil t)
|
||||
(read-char stream t nil t)))
|
||||
((char= c subchar))
|
||||
(cond ((char= c #\\)
|
||||
(setf c (read-char stream t nil t))))
|
||||
(princ c bag)))))
|
||||
|
||||
#-rune-is-character
|
||||
(defun rod-printer (stream rod)
|
||||
(princ #\# stream)
|
||||
(princ #\" stream)
|
||||
(loop for x across rod do
|
||||
(cond ((or (rune= x #.(char-rune #\\))
|
||||
(rune= x #.(char-rune #\")))
|
||||
(princ #\\ stream)
|
||||
(princ (code-char x) stream))
|
||||
((< x char-code-limit)
|
||||
(princ (code-char x) stream))
|
||||
(t
|
||||
(format stream "\\u~4,'0X" x))))
|
||||
(princ #\" stream))
|
||||
|
||||
(set-dispatch-macro-character #\# #\" 'rod-reader)
|
||||
@ -1,36 +0,0 @@
|
||||
;;; copyright (c) 2005 David Lichteblau <david@lichteblau.com>
|
||||
;;; License: Lisp-LGPL (See file COPYING for details).
|
||||
;;;
|
||||
;;; Rune emulation for the UTF-8-compatible DOM implementation.
|
||||
;;; Used only with 8 bit characters on non-unicode Lisps.
|
||||
|
||||
(in-package :utf8-runes)
|
||||
|
||||
(deftype rune () 'character)
|
||||
(deftype rod () '(vector rune))
|
||||
(deftype simple-rod () '(simple-array rune))
|
||||
|
||||
(defun rod= (r s)
|
||||
(string= r s))
|
||||
|
||||
(defun rod-string (rod &optional default)
|
||||
(declare (ignore default))
|
||||
rod)
|
||||
|
||||
(defun string-rod (string)
|
||||
string)
|
||||
|
||||
(defun make-rod (size)
|
||||
(make-string size :element-type 'rune))
|
||||
|
||||
(defun rune-reader (stream subchar arg)
|
||||
(runes::rune-char (runes::rune-reader stream subchar arg)))
|
||||
|
||||
(defun rod-reader (stream subchar arg)
|
||||
(runes::rod-string (runes::rod-reader stream subchar arg)))
|
||||
|
||||
(setf runes-system:*utf8-runes-readtable*
|
||||
(let ((rt (copy-readtable)))
|
||||
(set-dispatch-macro-character #\# #\/ 'rune-reader rt)
|
||||
(set-dispatch-macro-character #\# #\" 'rod-reader rt)
|
||||
rt))
|
||||
@ -1,409 +0,0 @@
|
||||
;;; -*- Mode: Lisp; Syntax: Common-Lisp; readtable: runes; Encoding: utf-8; -*-
|
||||
;;; ---------------------------------------------------------------------------
|
||||
;;; Title: Fast streams
|
||||
;;; Created: 1999-07-17
|
||||
;;; Author: Gilbert Baumann <unk6@rz.uni-karlsruhe.de>
|
||||
;;; License: Lisp-LGPL (See file COPYING for details).
|
||||
;;; ---------------------------------------------------------------------------
|
||||
;;; (c) copyright 1999 by Gilbert Baumann
|
||||
|
||||
;;; This library is free software; you can redistribute it and/or
|
||||
;;; modify it under the terms of the GNU Library General Public
|
||||
;;; License as published by the Free Software Foundation; either
|
||||
;;; version 2 of the License, or (at your option) any later version.
|
||||
;;;
|
||||
;;; This library is distributed in the hope that it will be useful,
|
||||
;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
;;; Library General Public License for more details.
|
||||
;;;
|
||||
;;; You should have received a copy of the GNU Library General Public
|
||||
;;; License along with this library; if not, write to the
|
||||
;;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
||||
;;; Boston, MA 02111-1307 USA.
|
||||
|
||||
(in-package :runes)
|
||||
|
||||
;;; API
|
||||
;;
|
||||
;; MAKE-XSTREAM cl-stream &key name! speed initial-speed initial-encoding
|
||||
;; [function]
|
||||
;; MAKE-ROD-XSTREAM rod &key name [function]
|
||||
;; CLOSE-XSTREAM xstream [function]
|
||||
;; XSTREAM-P object [function]
|
||||
;;
|
||||
;; READ-RUNE xstream [macro]
|
||||
;; PEEK-RUNE xstream [macro]
|
||||
;; FREAD-RUNE xstream [function]
|
||||
;; FPEEK-RUNE xstream [function]
|
||||
;; CONSUME-RUNE xstream [macro]
|
||||
;; UNREAD-RUNE rune xstream [function]
|
||||
;;
|
||||
;; XSTREAM-NAME xstream [accessor]
|
||||
;; XSTREAM-POSITION xstream [function]
|
||||
;; XSTREAM-LINE-NUMBER xstream [function]
|
||||
;; XSTREAM-COLUMN-NUMBER xstream [function]
|
||||
;; XSTREAM-PLIST xstream [accessor]
|
||||
;; XSTREAM-ENCODING xstream [accessor] <-- be careful here. [*]
|
||||
;; SET-TO-FULL-SPEED xstream [function]
|
||||
|
||||
;; [*] switching the encoding on the fly is only possible when the
|
||||
;; stream's buffer is empty; therefore to be able to switch the
|
||||
;; encoding, while some runes are already read, set the stream's speed
|
||||
;; to 1 initially (via the initial-speed argument for MAKE-XSTREAM)
|
||||
;; and later set it to full speed. (The encoding of the runes
|
||||
;; sequence, you fetch off with READ-RUNE is always UTF-16 though).
|
||||
;; After switching the encoding, SET-TO-FULL-SPEED can be used to bump the
|
||||
;; speed up to a full buffer length.
|
||||
|
||||
;; An encoding is simply something, which provides the DECODE-SEQUENCE
|
||||
;; method.
|
||||
|
||||
;;; Controller protocol
|
||||
;;
|
||||
;; READ-OCTECTS sequence os-stream start end -> first-non-written
|
||||
;; XSTREAM/CLOSE os-stream
|
||||
;;
|
||||
|
||||
(eval-when (:compile-toplevel :load-toplevel :execute)
|
||||
(defparameter *fast* '(optimize (speed 3) (safety 0))))
|
||||
|
||||
;; Let us first define fast fixnum arithmetric get rid of type
|
||||
;; checks. (After all we know what we do here).
|
||||
|
||||
(defmacro fx-op (op &rest xs)
|
||||
`(the fixnum (,op ,@(mapcar (lambda (x) `(the fixnum ,x)) xs))))
|
||||
(defmacro fx-pred (op &rest xs)
|
||||
`(,op ,@(mapcar (lambda (x) `(the fixnum ,x)) xs)))
|
||||
|
||||
(defmacro %+ (&rest xs) `(fx-op + ,@xs))
|
||||
(defmacro %= (&rest xs) `(fx-pred = ,@xs))
|
||||
|
||||
(deftype buffer-index ()
|
||||
`(unsigned-byte ,(integer-length array-total-size-limit)))
|
||||
|
||||
(deftype buffer-byte ()
|
||||
`(unsigned-byte 16))
|
||||
|
||||
(deftype octet ()
|
||||
`(unsigned-byte 8))
|
||||
|
||||
;; The usage of a special marker for EOF is experimental and
|
||||
;; considered unhygenic.
|
||||
|
||||
(defconstant +end+ #xFFFF
|
||||
"Special marker inserted into stream buffers to indicate end of buffered data.")
|
||||
|
||||
(defvar +null-buffer+ (make-array 0 :element-type 'buffer-byte))
|
||||
(defvar +null-octet-buffer+ (make-array 0 :element-type 'octet))
|
||||
|
||||
(defstruct (xstream
|
||||
(:constructor make-xstream/low)
|
||||
(:copier nil)
|
||||
(:print-function print-xstream))
|
||||
|
||||
;;; Read buffer
|
||||
|
||||
;; the buffer itself
|
||||
(buffer +null-buffer+
|
||||
:type (simple-array buffer-byte (*)))
|
||||
;; points to the next element of `buffer' containing the next rune
|
||||
;; about to be read.
|
||||
(read-ptr 0 :type buffer-index)
|
||||
;; points to the first element of `buffer' not containing a rune to
|
||||
;; be read.
|
||||
(fill-ptr 0 :type buffer-index)
|
||||
|
||||
;;; OS buffer
|
||||
|
||||
;; a scratch pad for READ-SEQUENCE
|
||||
(os-buffer +null-octet-buffer+
|
||||
:type (simple-array octet (*)))
|
||||
|
||||
;; `os-left-start', `os-left-end' designate a region of os-buffer,
|
||||
;; which still contains some undecoded data. This is needed because
|
||||
;; of the DECODE-SEQUENCE protocol
|
||||
(os-left-start 0 :type buffer-index)
|
||||
(os-left-end 0 :type buffer-index)
|
||||
|
||||
;; How much to read each time
|
||||
(speed 0 :type buffer-index)
|
||||
(full-speed 0 :type buffer-index)
|
||||
|
||||
;; Some stream object obeying to a certain protcol
|
||||
os-stream
|
||||
|
||||
;; The external format
|
||||
;; (some object offering the ENCODING protocol)
|
||||
(encoding :utf-8)
|
||||
|
||||
;;A STREAM-NAME object
|
||||
(name nil)
|
||||
|
||||
;; a plist a struct keeps the hack away
|
||||
(plist nil)
|
||||
|
||||
;; Stream Position
|
||||
(line-number 1 :type integer) ;current line number
|
||||
(line-start 0 :type integer) ;stream position the current line starts at
|
||||
(buffer-start 0 :type integer) ;stream position the current buffer starts at
|
||||
|
||||
;; There is no need to maintain a column counter for each character
|
||||
;; read, since we can easily compute it from `line-start' and
|
||||
;; `buffer-start'.
|
||||
)
|
||||
|
||||
(defun print-xstream (self sink depth)
|
||||
(declare (ignore depth))
|
||||
(format sink "#<~S ~S>" (type-of self) (xstream-name self)))
|
||||
|
||||
(defmacro read-rune (input)
|
||||
"Read a single rune off the xstream `input'. In case of end of file :EOF
|
||||
is returned."
|
||||
`((lambda (input)
|
||||
(declare (type xstream input)
|
||||
#.*fast*)
|
||||
(let ((rp (xstream-read-ptr input)))
|
||||
(declare (type buffer-index rp))
|
||||
(let ((ch (aref (the (simple-array buffer-byte (*)) (xstream-buffer input))
|
||||
rp)))
|
||||
(declare (type buffer-byte ch))
|
||||
(setf (xstream-read-ptr input) (%+ rp 1))
|
||||
(cond ((%= ch +end+)
|
||||
(the (or (member :eof) rune)
|
||||
(xstream-underflow input)))
|
||||
((%= ch #x000A) ;line break
|
||||
(account-for-line-break input)
|
||||
(code-rune ch))
|
||||
(t
|
||||
(code-rune ch))))))
|
||||
,input))
|
||||
|
||||
(defmacro peek-rune (input)
|
||||
"Peek a single rune off the xstream `input'. In case of end of file :EOF
|
||||
is returned."
|
||||
`((lambda (input)
|
||||
(declare (type xstream input)
|
||||
#.*fast*)
|
||||
(let ((rp (xstream-read-ptr input)))
|
||||
(declare (type buffer-index rp))
|
||||
(let ((ch (aref (the (simple-array buffer-byte (*)) (xstream-buffer input))
|
||||
rp)))
|
||||
(declare (type buffer-byte ch))
|
||||
(cond ((%= ch +end+)
|
||||
(prog1
|
||||
(the (or (member :eof) rune) (xstream-underflow input))
|
||||
(setf (xstream-read-ptr input) 0)))
|
||||
(t
|
||||
(code-rune ch))))))
|
||||
,input))
|
||||
|
||||
(defmacro consume-rune (input)
|
||||
"Like READ-RUNE, but does not actually return the read rune."
|
||||
`((lambda (input)
|
||||
(declare (type xstream input)
|
||||
#.*fast*)
|
||||
(let ((rp (xstream-read-ptr input)))
|
||||
(declare (type buffer-index rp))
|
||||
(let ((ch (aref (the (simple-array buffer-byte (*)) (xstream-buffer input))
|
||||
rp)))
|
||||
(declare (type buffer-byte ch))
|
||||
(setf (xstream-read-ptr input) (%+ rp 1))
|
||||
(when (%= ch +end+)
|
||||
(xstream-underflow input))
|
||||
(when (%= ch #x000A) ;line break
|
||||
(account-for-line-break input) )))
|
||||
nil)
|
||||
,input))
|
||||
|
||||
(definline unread-rune (rune input)
|
||||
"Unread the last recently read rune; if there wasn't such a rune, you
|
||||
deserve to lose."
|
||||
(declare (ignore rune))
|
||||
(decf (xstream-read-ptr input))
|
||||
(when (rune= (peek-rune input) #/u+000A) ;was it a line break?
|
||||
(unaccount-for-line-break input)))
|
||||
|
||||
(defun fread-rune (input)
|
||||
(read-rune input))
|
||||
|
||||
(defun fpeek-rune (input)
|
||||
(peek-rune input))
|
||||
|
||||
;;; Line counting
|
||||
|
||||
(defun account-for-line-break (input)
|
||||
(declare (type xstream input))
|
||||
(incf (xstream-line-number input))
|
||||
(setf (xstream-line-start input)
|
||||
(+ (xstream-buffer-start input) (xstream-read-ptr input))))
|
||||
|
||||
(defun unaccount-for-line-break (input)
|
||||
;; incomplete!
|
||||
;; We better use a traditional lookahead technique or forbid unread-rune.
|
||||
(decf (xstream-line-number input)))
|
||||
|
||||
;; User API:
|
||||
|
||||
(defun xstream-position (input)
|
||||
(+ (xstream-buffer-start input) (xstream-read-ptr input)))
|
||||
|
||||
;; xstream-line-number is structure accessor
|
||||
|
||||
(defun xstream-column-number (input)
|
||||
(+ (- (xstream-position input)
|
||||
(xstream-line-start input))
|
||||
1))
|
||||
|
||||
;;; Underflow
|
||||
|
||||
(defconstant +default-buffer-size+ 100)
|
||||
|
||||
(defmethod xstream-underflow ((input xstream))
|
||||
(declare (type xstream input))
|
||||
;; we are about to fill new data into the buffer, so we need to
|
||||
;; adjust buffer-start.
|
||||
(incf (xstream-buffer-start input)
|
||||
(- (xstream-fill-ptr input) 0))
|
||||
(let (n m)
|
||||
;; when there is something left in the os-buffer, we move it to
|
||||
;; the start of the buffer.
|
||||
(setf m (- (xstream-os-left-end input) (xstream-os-left-start input)))
|
||||
(unless (zerop m)
|
||||
(replace (xstream-os-buffer input) (xstream-os-buffer input)
|
||||
:start1 0 :end1 m
|
||||
:start2 (xstream-os-left-start input)
|
||||
:end2 (xstream-os-left-end input))
|
||||
;; then we take care that the buffer is large enough to carry at
|
||||
;; least 100 bytes (a random number)
|
||||
;;
|
||||
;; David: My understanding is that any number of octets large enough
|
||||
;; to record the longest UTF-8 sequence or UTF-16 sequence is okay,
|
||||
;; so 100 is plenty for this purpose.
|
||||
(unless (>= (length (xstream-os-buffer input))
|
||||
+default-buffer-size+)
|
||||
(error "You lost")))
|
||||
(setf n
|
||||
(read-octets (xstream-os-buffer input) (xstream-os-stream input)
|
||||
m (min (1- (length (xstream-os-buffer input)))
|
||||
(+ m (xstream-speed input)))))
|
||||
(cond ((%= n 0)
|
||||
(setf (xstream-read-ptr input) 0
|
||||
(xstream-fill-ptr input) n)
|
||||
(setf (aref (xstream-buffer input) (xstream-fill-ptr input)) +end+)
|
||||
:eof)
|
||||
(t
|
||||
(multiple-value-bind (fnw fnr)
|
||||
(runes-encoding:decode-sequence
|
||||
(xstream-encoding input)
|
||||
(xstream-os-buffer input) 0 n
|
||||
(xstream-buffer input) 0 (1- (length (xstream-buffer input)))
|
||||
(= n m))
|
||||
(setf (xstream-os-left-start input) fnr
|
||||
(xstream-os-left-end input) n
|
||||
(xstream-read-ptr input) 0
|
||||
(xstream-fill-ptr input) fnw)
|
||||
(setf (aref (xstream-buffer input) (xstream-fill-ptr input)) +end+)
|
||||
(read-rune input))))))
|
||||
|
||||
;;; constructor
|
||||
|
||||
(defun make-xstream (os-stream &key name
|
||||
(speed 8192)
|
||||
(initial-speed 1)
|
||||
(initial-encoding :guess))
|
||||
;; XXX if initial-speed isn't 1, encoding will me munged up
|
||||
(assert (eql initial-speed 1))
|
||||
(multiple-value-bind (encoding preread)
|
||||
(if (eq initial-encoding :guess)
|
||||
(figure-encoding os-stream)
|
||||
(values initial-encoding nil))
|
||||
(let* ((bufsize (max speed +default-buffer-size+))
|
||||
(osbuf (make-array bufsize :element-type '(unsigned-byte 8))))
|
||||
(replace osbuf preread)
|
||||
(make-xstream/low
|
||||
:buffer (let ((r (make-array bufsize :element-type 'buffer-byte)))
|
||||
(setf (elt r 0) #xFFFF)
|
||||
r)
|
||||
:read-ptr 0
|
||||
:fill-ptr 0
|
||||
:os-buffer osbuf
|
||||
:speed initial-speed
|
||||
:full-speed speed
|
||||
:os-stream os-stream
|
||||
:os-left-start 0
|
||||
:os-left-end (length preread)
|
||||
:encoding encoding
|
||||
:name name))))
|
||||
|
||||
(defun make-rod-xstream (string &key name)
|
||||
;; XXX encoding is mis-handled by this kind of stream
|
||||
(let ((n (length string)))
|
||||
(let ((buffer (make-array (1+ n) :element-type 'buffer-byte)))
|
||||
(declare (type (simple-array buffer-byte (*)) buffer))
|
||||
;; copy the rod
|
||||
(do ((i (1- n) (- i 1)))
|
||||
((< i 0))
|
||||
(declare (type fixnum i))
|
||||
(setf (aref buffer i) (rune-code (%rune string i))))
|
||||
(setf (aref buffer n) +end+)
|
||||
;;
|
||||
(make-xstream/low :buffer buffer
|
||||
:read-ptr 0
|
||||
:fill-ptr n
|
||||
;; :os-buffer nil
|
||||
:speed 1
|
||||
:os-stream nil
|
||||
:name name))))
|
||||
|
||||
(defmethod figure-encoding ((stream null))
|
||||
(values :utf-8 nil))
|
||||
|
||||
(defmethod figure-encoding ((stream stream))
|
||||
(let ((c0 (read-byte stream nil :eof)))
|
||||
(cond ((eq c0 :eof)
|
||||
(values :utf-8 nil))
|
||||
(t
|
||||
(let ((c1 (read-byte stream nil :eof)))
|
||||
(cond ((eq c1 :eof)
|
||||
(values :utf-8 (list c0)))
|
||||
(t
|
||||
(cond ((and (= c0 #xFE) (= c1 #xFF)) (values :utf-16-big-endian nil))
|
||||
((and (= c0 #xFF) (= c1 #xFE)) (values :utf-16-little-endian nil))
|
||||
(t
|
||||
(values :utf-8 (list c0 c1)))))))))))
|
||||
|
||||
;;; misc
|
||||
|
||||
(defun close-xstream (input)
|
||||
(xstream/close (xstream-os-stream input)))
|
||||
|
||||
(defun set-to-full-speed (xstream)
|
||||
(setf (xstream-speed xstream) (xstream-full-speed xstream)))
|
||||
|
||||
;;; controller implementations
|
||||
|
||||
(defmethod read-octets (sequence (stream stream) start end)
|
||||
(#+CLISP ext:read-byte-sequence
|
||||
#-CLISP read-sequence
|
||||
sequence stream :start start :end end))
|
||||
|
||||
#+cmu
|
||||
(defmethod read-octets :around (sequence (stream stream) start end)
|
||||
;; CMUCL <= 19a on non-SunOS accidentally triggers EFAULT in read(2)
|
||||
;; if SEQUENCE has been write protected by GC. Workaround: Touch all pages
|
||||
;; in SEQUENCE and make sure no GC happens between that and the read(2).
|
||||
(ext::without-gcing
|
||||
(loop for i from start below end
|
||||
do (setf (elt sequence i) (elt sequence i)))
|
||||
(call-next-method)))
|
||||
|
||||
(defmethod read-octets (sequence (stream null) start end)
|
||||
(declare (ignore sequence start end))
|
||||
0)
|
||||
|
||||
(defmethod xstream/close ((stream stream))
|
||||
(close stream))
|
||||
|
||||
(defmethod xstream/close ((stream null))
|
||||
nil)
|
||||
@ -1,297 +0,0 @@
|
||||
;;; (c) 2005 David Lichteblau <david@lichteblau.com>
|
||||
;;; License: Lisp-LGPL (See file COPYING for details).
|
||||
;;;
|
||||
;;; ystream (for lack of a better name): a rune output "stream"
|
||||
|
||||
(in-package :runes)
|
||||
|
||||
(defconstant +ystream-bufsize+ 1024)
|
||||
|
||||
(defun make-ub8-array (n)
|
||||
(make-array n :element-type '(unsigned-byte 8)))
|
||||
|
||||
(defun make-ub16-array (n)
|
||||
(make-array n :element-type '(unsigned-byte 16)))
|
||||
|
||||
(defun make-buffer (&key (element-type '(unsigned-byte 8)))
|
||||
(make-array 1
|
||||
:element-type element-type
|
||||
:adjustable t
|
||||
:fill-pointer 0))
|
||||
|
||||
(defmacro while (test &body body)
|
||||
`(until (not ,test) ,@body))
|
||||
|
||||
(defmacro until (test &body body)
|
||||
`(do () (,test) ,@body))
|
||||
|
||||
;;; ystream
|
||||
;;; +- utf8-ystream
|
||||
;;; | +- octet-vector-ystream
|
||||
;;; | \- %stream-ystream
|
||||
;;; | +- octet-stream-ystream
|
||||
;;; | \- character-stream-ystream/utf8
|
||||
;;; | \- string-ystream/utf8
|
||||
;;; +- rod-ystream
|
||||
;;; \-- character-stream-ystream
|
||||
|
||||
(defstruct ystream
|
||||
(column 0 :type integer)
|
||||
(in-ptr 0 :type fixnum)
|
||||
(in-buffer (make-rod +ystream-bufsize+) :type simple-rod))
|
||||
|
||||
(defstruct (utf8-ystream
|
||||
(:include ystream)
|
||||
(:conc-name "YSTREAM-"))
|
||||
(out-buffer (make-ub8-array (* 6 +ystream-bufsize+))
|
||||
:type (simple-array (unsigned-byte 8) (*))))
|
||||
|
||||
(defstruct (%stream-ystream (:include utf8-ystream) (:conc-name "YSTREAM-"))
|
||||
(os-stream nil))
|
||||
|
||||
(definline write-rune (rune ystream)
|
||||
(let ((in (ystream-in-buffer ystream)))
|
||||
(when (eql (ystream-in-ptr ystream) (length in))
|
||||
(flush-ystream ystream)
|
||||
(setf in (ystream-in-buffer ystream)))
|
||||
(setf (elt in (ystream-in-ptr ystream)) rune)
|
||||
(incf (ystream-in-ptr ystream))
|
||||
(setf (ystream-column ystream)
|
||||
(if (eql rune #/U+0010) 0 (1+ (ystream-column ystream))))
|
||||
rune))
|
||||
|
||||
(defmethod close-ystream :before ((ystream ystream))
|
||||
(flush-ystream ystream))
|
||||
|
||||
|
||||
;;;; UTF8-YSTREAM (abstract)
|
||||
|
||||
(defmethod close-ystream ((ystream %stream-ystream))
|
||||
(ystream-os-stream ystream))
|
||||
|
||||
(defgeneric ystream-device-write (ystream buf nbytes))
|
||||
|
||||
(defmethod flush-ystream ((ystream utf8-ystream))
|
||||
(let ((ptr (ystream-in-ptr ystream)))
|
||||
(when (plusp ptr)
|
||||
(let* ((in (ystream-in-buffer ystream))
|
||||
(out (ystream-out-buffer ystream))
|
||||
(surrogatep (<= #xD800 (rune-code (elt in (1- ptr))) #xDBFF))
|
||||
n)
|
||||
(when surrogatep
|
||||
(decf ptr))
|
||||
(when (plusp ptr)
|
||||
(setf n (runes-to-utf8 out in ptr))
|
||||
(ystream-device-write ystream out n)
|
||||
(cond
|
||||
(surrogatep
|
||||
(setf (elt in 0) (elt in (1- ptr)))
|
||||
(setf (ystream-in-ptr ystream) 1))
|
||||
(t
|
||||
(setf (ystream-in-ptr ystream) 0))))))))
|
||||
|
||||
(defun write-rod (rod sink)
|
||||
(loop for rune across rod do (write-rune rune sink)))
|
||||
|
||||
(defun fast-push (new-element vector)
|
||||
(vector-push-extend new-element vector (max 1 (array-dimension vector 0))))
|
||||
|
||||
(macrolet ((define-utf8-writer (name (byte &rest aux) result &body body)
|
||||
`(defun ,name (out in n)
|
||||
(let ((high-surrogate nil)
|
||||
,@aux)
|
||||
(labels
|
||||
((write0 (,byte)
|
||||
,@body)
|
||||
(write1 (r)
|
||||
(cond
|
||||
((<= #x00000000 r #x0000007F)
|
||||
(write0 r))
|
||||
((<= #x00000080 r #x000007FF)
|
||||
(write0 (logior #b11000000 (ldb (byte 5 6) r)))
|
||||
(write0 (logior #b10000000 (ldb (byte 6 0) r))))
|
||||
((<= #x00000800 r #x0000FFFF)
|
||||
(write0 (logior #b11100000 (ldb (byte 4 12) r)))
|
||||
(write0 (logior #b10000000 (ldb (byte 6 6) r)))
|
||||
(write0 (logior #b10000000 (ldb (byte 6 0) r))))
|
||||
((<= #x00010000 r #x001FFFFF)
|
||||
(write0 (logior #b11110000 (ldb (byte 3 18) r)))
|
||||
(write0 (logior #b10000000 (ldb (byte 6 12) r)))
|
||||
(write0 (logior #b10000000 (ldb (byte 6 6) r)))
|
||||
(write0 (logior #b10000000 (ldb (byte 6 0) r))))
|
||||
((<= #x00200000 r #x03FFFFFF)
|
||||
(write0 (logior #b11111000 (ldb (byte 2 24) r)))
|
||||
(write0 (logior #b10000000 (ldb (byte 6 18) r)))
|
||||
(write0 (logior #b10000000 (ldb (byte 6 12) r)))
|
||||
(write0 (logior #b10000000 (ldb (byte 6 6) r)))
|
||||
(write0 (logior #b10000000 (ldb (byte 6 0) r))))
|
||||
((<= #x04000000 r #x7FFFFFFF)
|
||||
(write0 (logior #b11111100 (ldb (byte 1 30) r)))
|
||||
(write0 (logior #b10000000 (ldb (byte 6 24) r)))
|
||||
(write0 (logior #b10000000 (ldb (byte 6 18) r)))
|
||||
(write0 (logior #b10000000 (ldb (byte 6 12) r)))
|
||||
(write0 (logior #b10000000 (ldb (byte 6 6) r)))
|
||||
(write0 (logior #b10000000 (ldb (byte 6 0) r))))))
|
||||
(write2 (r)
|
||||
(cond
|
||||
((<= #xD800 r #xDBFF)
|
||||
(setf high-surrogate r))
|
||||
((<= #xDC00 r #xDFFF)
|
||||
(let ((q (logior (ash (- high-surrogate #xD7C0) 10)
|
||||
(- r #xDC00))))
|
||||
(write1 q))
|
||||
(setf high-surrogate nil))
|
||||
(t
|
||||
(write1 r)))))
|
||||
(dotimes (j n)
|
||||
(write2 (rune-code (elt in j)))))
|
||||
,result))))
|
||||
(define-utf8-writer runes-to-utf8 (x (i 0))
|
||||
i
|
||||
(setf (elt out i) x)
|
||||
(incf i))
|
||||
(define-utf8-writer runes-to-utf8/adjustable-string (x)
|
||||
nil
|
||||
(fast-push (code-char x) out)))
|
||||
|
||||
|
||||
;;;; ROD-YSTREAM
|
||||
|
||||
(defstruct (rod-ystream (:include ystream)))
|
||||
|
||||
(defmethod flush-ystream ((ystream rod-ystream))
|
||||
(let* ((old (ystream-in-buffer ystream))
|
||||
(new (make-rod (* 2 (length old)))))
|
||||
(replace new old)
|
||||
(setf (ystream-in-buffer ystream) new)))
|
||||
|
||||
(defmethod close-ystream ((ystream rod-ystream))
|
||||
(subseq (ystream-in-buffer ystream) 0 (ystream-in-ptr ystream)))
|
||||
|
||||
|
||||
;;;; CHARACTER-STREAM-YSTREAM
|
||||
|
||||
#+rune-is-character
|
||||
(progn
|
||||
(defstruct (character-stream-ystream
|
||||
(:constructor make-character-stream-ystream (target-stream))
|
||||
(:include ystream)
|
||||
(:conc-name "YSTREAM-"))
|
||||
(target-stream nil))
|
||||
|
||||
(defmethod flush-ystream ((ystream character-stream-ystream))
|
||||
(write-string (ystream-in-buffer ystream)
|
||||
(ystream-target-stream ystream)
|
||||
:end (ystream-in-ptr ystream))
|
||||
(setf (ystream-in-ptr ystream) 0))
|
||||
|
||||
(defmethod close-ystream ((ystream character-stream-ystream))
|
||||
(ystream-target-stream ystream)))
|
||||
|
||||
|
||||
;;;; OCTET-VECTOR-YSTREAM
|
||||
|
||||
(defstruct (octet-vector-ystream
|
||||
(:include utf8-ystream)
|
||||
(:conc-name "YSTREAM-"))
|
||||
(result (make-buffer)))
|
||||
|
||||
(defmethod ystream-device-write ((ystream octet-vector-ystream) buf nbytes)
|
||||
(let* ((result (ystream-result ystream))
|
||||
(start (length result))
|
||||
(size (array-dimension result 0)))
|
||||
(while (> (+ start nbytes) size)
|
||||
(setf size (* 2 size)))
|
||||
(adjust-array result size :fill-pointer (+ start nbytes))
|
||||
(replace result buf :start1 start :end2 nbytes)))
|
||||
|
||||
(defmethod close-ystream ((ystream octet-vector-ystream))
|
||||
(ystream-result ystream))
|
||||
|
||||
|
||||
;;;; OCTET-STREAM-YSTREAM
|
||||
|
||||
(defstruct (octet-stream-ystream
|
||||
(:include %stream-ystream)
|
||||
(:constructor make-octet-stream-ystream (os-stream))
|
||||
(:conc-name "YSTREAM-")))
|
||||
|
||||
(defmethod ystream-device-write ((ystream octet-stream-ystream) buf nbytes)
|
||||
(write-sequence buf (ystream-os-stream ystream) :end nbytes))
|
||||
|
||||
|
||||
;;;; CHARACTER-STREAM-YSTREAM/UTF8
|
||||
|
||||
;; #+rune-is-integer
|
||||
(progn
|
||||
(defstruct (character-stream-ystream/utf8
|
||||
(:constructor make-character-stream-ystream/utf8 (os-stream))
|
||||
(:include %stream-ystream)
|
||||
(:conc-name "YSTREAM-")))
|
||||
|
||||
(defmethod ystream-device-write
|
||||
((ystream character-stream-ystream/utf8) buf nbytes)
|
||||
(declare (type (simple-array (unsigned-byte 8) (*)) buf))
|
||||
(let ((out (ystream-os-stream ystream)))
|
||||
(dotimes (x nbytes)
|
||||
(write-char (code-char (elt buf x)) out)))))
|
||||
|
||||
|
||||
;;;; STRING-YSTREAM/UTF8
|
||||
|
||||
;; #+rune-is-integer
|
||||
(progn
|
||||
(defstruct (string-ystream/utf8
|
||||
(:include character-stream-ystream/utf8
|
||||
(os-stream (make-string-output-stream)))
|
||||
(:conc-name "YSTREAM-")))
|
||||
|
||||
(defmethod close-ystream ((ystream string-ystream/utf8))
|
||||
(get-output-stream-string (ystream-os-stream ystream))))
|
||||
|
||||
|
||||
;;;; helper functions
|
||||
|
||||
(defun rod-to-utf8-string (rod)
|
||||
(let ((out (make-buffer :element-type 'character)))
|
||||
(runes-to-utf8/adjustable-string out rod (length rod))
|
||||
out))
|
||||
|
||||
(defun utf8-string-to-rod (str)
|
||||
(let* ((bytes (map '(vector (unsigned-byte 8)) #'char-code str))
|
||||
(buffer (make-array (length bytes) :element-type '(unsigned-byte 16)))
|
||||
(n (runes-encoding:decode-sequence
|
||||
:utf-8 bytes 0 (length bytes) buffer 0 0 nil))
|
||||
(result (make-array n :element-type 'rune)))
|
||||
(map-into result #'code-rune buffer)
|
||||
result))
|
||||
|
||||
(defclass octet-input-stream
|
||||
(trivial-gray-stream-mixin fundamental-binary-input-stream)
|
||||
((octets :initarg :octets)
|
||||
(pos :initform 0)))
|
||||
|
||||
(defmethod close ((stream octet-input-stream) &key abort)
|
||||
(declare (ignore abort))
|
||||
(open-stream-p stream))
|
||||
|
||||
(defmethod stream-read-byte ((stream octet-input-stream))
|
||||
(with-slots (octets pos) stream
|
||||
(if (>= pos (length octets))
|
||||
:eof
|
||||
(prog1
|
||||
(elt octets pos)
|
||||
(incf pos)))))
|
||||
|
||||
(defmethod stream-read-sequence
|
||||
((stream octet-input-stream) sequence start end &key &allow-other-keys)
|
||||
(with-slots (octets pos) stream
|
||||
(let* ((length (min (- end start) (- (length octets) pos)))
|
||||
(end1 (+ start length))
|
||||
(end2 (+ pos length)))
|
||||
(replace sequence octets :start1 start :end1 end1 :start2 pos :end2 end2)
|
||||
(setf pos end2)
|
||||
end1)))
|
||||
|
||||
(defun make-octet-input-stream (octets)
|
||||
(make-instance 'octet-input-stream :octets octets))
|
||||
Reference in New Issue
Block a user