From 0a7fe86e1c0c05a39d8ab26e79218e3480b7320d Mon Sep 17 00:00:00 2001 From: david Date: Sun, 13 Mar 2005 18:02:27 +0000 Subject: [PATCH] Initial revision --- COPYING | 521 ++++++++++++++++++++++++++++++++++++++++ characters.lisp | 149 ++++++++++++ dep-acl.lisp | 42 ++++ dep-acl5.lisp | 59 +++++ dep-clisp.lisp | 59 +++++ dep-cmucl-dtc.lisp | 30 +++ dep-cmucl.lisp | 30 +++ dep-openmcl.lisp | 16 ++ dep-sbcl.lisp | 30 +++ encodings-data.lisp | 568 ++++++++++++++++++++++++++++++++++++++++++++ encodings.lisp | 347 +++++++++++++++++++++++++++ package.lisp | 50 ++++ runes.lisp | 273 +++++++++++++++++++++ syntax.lisp | 196 +++++++++++++++ util.lisp | 73 ++++++ xstream.lisp | 391 ++++++++++++++++++++++++++++++ 16 files changed, 2834 insertions(+) create mode 100644 COPYING create mode 100644 characters.lisp create mode 100644 dep-acl.lisp create mode 100644 dep-acl5.lisp create mode 100644 dep-clisp.lisp create mode 100644 dep-cmucl-dtc.lisp create mode 100644 dep-cmucl.lisp create mode 100644 dep-openmcl.lisp create mode 100644 dep-sbcl.lisp create mode 100644 encodings-data.lisp create mode 100644 encodings.lisp create mode 100644 package.lisp create mode 100644 runes.lisp create mode 100644 syntax.lisp create mode 100644 util.lisp create mode 100644 xstream.lisp diff --git a/COPYING b/COPYING new file mode 100644 index 0000000..243648d --- /dev/null +++ b/COPYING @@ -0,0 +1,521 @@ +Preamble to the Gnu Lesser General Public License + +Copyright (c) 2000 Franz Incorporated, Berkeley, CA 94704 + +The concept of the GNU Lesser General Public License version 2.1 +("LGPL") has been adopted to govern the use and distribution of +above-mentioned application. However, the LGPL uses terminology that is +more appropriate for a program written in C than one written in +Lisp. Nevertheless, the LGPL can still be applied to a Lisp program if +certain clarifications are made. This document details those +clarifications. Accordingly, the license for the open-source Lisp +applications consists of this document plus the LGPL. Wherever there is +a conflict between this document and the LGPL, this document takes +precedence over the LGPL. + +A "Library" in Lisp is a collection of Lisp functions, data and foreign +modules. The form of the Library can be Lisp source code (for processing +by an interpreter) or object code (usually the result of compilation of +source code or built with some other mechanisms). Foreign modules are +object code in a form that can be linked into a Lisp executable. When we +speak of functions we do so in the most general way to include, in +addition, methods and unnamed functions. Lisp "data" is also a general +term that includes the data structures resulting from defining Lisp +classes. A Lisp application may include the same set of Lisp objects as +does a Library, but this does not mean that the application is +necessarily a "work based on the Library" it contains. + +The Library consists of everything in the distribution file set before +any modifications are made to the files. If any of the functions or +classes in the Library are redefined in other files, then those +redefinitions ARE considered a work based on the Library. If additional +methods are added to generic functions in the Library, those additional +methods are NOT considered a work based on the Library. If Library +classes are subclassed, these subclasses are NOT considered a work based +on the Library. If the Library is modified to explicitly call other +functions that are neither part of Lisp itself nor an available add-on +module to Lisp, then the functions called by the modified Library ARE +considered a work based on the Library. The goal is to ensure that the +Library will compile and run without getting undefined function errors. + +It is permitted to add proprietary source code to the Library, but it +must be done in a way such that the Library will still run without that +proprietary code present. Section 5 of the LGPL distinguishes between +the case of a library being dynamically linked at runtime and one being +statically linked at build time. Section 5 of the LGPL states that the +former results in an executable that is a "work that uses the Library." +Section 5 of the LGPL states that the latter results in one that is a +"derivative of the Library", which is therefore covered by the +LGPL. Since Lisp only offers one choice, which is to link the Library +into an executable at build time, we declare that, for the purpose +applying the LGPL to the Library, an executable that results from +linking a "work that uses the Library" with the Library is considered a +"work that uses the Library" and is therefore NOT covered by the LGPL. + +Because of this declaration, section 6 of LGPL is not applicable to the +Library. However, in connection with each distribution of this +executable, you must also deliver, in accordance with the terms and +conditions of the LGPL, the source code of Library (or your derivative +thereof) that is incorporated into this executable. + +End of Document +------------------------------------------------------------------------ + GNU LESSER GENERAL PUBLIC LICENSE + Version 2.1, February 1999 + + Copyright (C) 1991, 1999 Free Software Foundation, Inc. + 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + +[This is the first released version of the Lesser GPL. It also counts + as the successor of the GNU Library Public License, version 2, hence + the version number 2.1.] + + Preamble + + The licenses for most software are designed to take away your +freedom to share and change it. By contrast, the GNU General Public +Licenses are intended to guarantee your freedom to share and change +free software--to make sure the software is free for all its users. + + This license, the Lesser General Public License, applies to some +specially designated software packages--typically libraries--of the +Free Software Foundation and other authors who decide to use it. You +can use it too, but we suggest you first think carefully about whether +this license or the ordinary General Public License is the better +strategy to use in any particular case, based on the explanations below. + + When we speak of free software, we are referring to freedom of use, +not price. Our General Public Licenses are designed to make sure that +you have the freedom to distribute copies of free software (and charge +for this service if you wish); that you receive source code or can get +it if you want it; that you can change the software and use pieces of +it in new free programs; and that you are informed that you can do +these things. + + To protect your rights, we need to make restrictions that forbid +distributors to deny you these rights or to ask you to surrender these +rights. These restrictions translate to certain responsibilities for +you if you distribute copies of the library or if you modify it. + + For example, if you distribute copies of the library, whether gratis +or for a fee, you must give the recipients all the rights that we gave +you. You must make sure that they, too, receive or can get the source +code. If you link other code with the library, you must provide +complete object files to the recipients, so that they can relink them +with the library after making changes to the library and recompiling +it. And you must show them these terms so they know their rights. + + We protect your rights with a two-step method: (1) we copyright the +library, and (2) we offer you this license, which gives you legal +permission to copy, distribute and/or modify the library. + + To protect each distributor, we want to make it very clear that +there is no warranty for the free library. Also, if the library is +modified by someone else and passed on, the recipients should know +that what they have is not the original version, so that the original +author's reputation will not be affected by problems that might be +introduced by others. + + Finally, software patents pose a constant threat to the existence of +any free program. We wish to make sure that a company cannot +effectively restrict the users of a free program by obtaining a +restrictive license from a patent holder. Therefore, we insist that +any patent license obtained for a version of the library must be +consistent with the full freedom of use specified in this license. + + Most GNU software, including some libraries, is covered by the +ordinary GNU General Public License. This license, the GNU Lesser +General Public License, applies to certain designated libraries, and +is quite different from the ordinary General Public License. We use +this license for certain libraries in order to permit linking those +libraries into non-free programs. + + When a program is linked with a library, whether statically or using +a shared library, the combination of the two is legally speaking a +combined work, a derivative of the original library. The ordinary +General Public License therefore permits such linking only if the +entire combination fits its criteria of freedom. The Lesser General +Public License permits more lax criteria for linking other code with +the library. + + We call this license the "Lesser" General Public License because it +does Less to protect the user's freedom than the ordinary General +Public License. It also provides other free software developers Less +of an advantage over competing non-free programs. These disadvantages +are the reason we use the ordinary General Public License for many +libraries. However, the Lesser license provides advantages in certain +special circumstances. + + For example, on rare occasions, there may be a special need to +encourage the widest possible use of a certain library, so that it becomes +a de-facto standard. To achieve this, non-free programs must be +allowed to use the library. A more frequent case is that a free +library does the same job as widely used non-free libraries. In this +case, there is little to gain by limiting the free library to free +software only, so we use the Lesser General Public License. + + In other cases, permission to use a particular library in non-free +programs enables a greater number of people to use a large body of +free software. For example, permission to use the GNU C Library in +non-free programs enables many more people to use the whole GNU +operating system, as well as its variant, the GNU/Linux operating +system. + + Although the Lesser General Public License is Less protective of the +users' freedom, it does ensure that the user of a program that is +linked with the Library has the freedom and the wherewithal to run +that program using a modified version of the Library. + + The precise terms and conditions for copying, distribution and +modification follow. Pay close attention to the difference between a +"work based on the library" and a "work that uses the library". The +former contains code derived from the library, whereas the latter must +be combined with the library in order to run. + + GNU LESSER GENERAL PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. This License Agreement applies to any software library or other +program which contains a notice placed by the copyright holder or +other authorized party saying it may be distributed under the terms of +this Lesser General Public License (also called "this License"). +Each licensee is addressed as "you". + + A "library" means a collection of software functions and/or data +prepared so as to be conveniently linked with application programs +(which use some of those functions and data) to form executables. + + The "Library", below, refers to any such software library or work +which has been distributed under these terms. A "work based on the +Library" means either the Library or any derivative work under +copyright law: that is to say, a work containing the Library or a +portion of it, either verbatim or with modifications and/or translated +straightforwardly into another language. (Hereinafter, translation is +included without limitation in the term "modification".) + + "Source code" for a work means the preferred form of the work for +making modifications to it. For a library, complete source code means +all the source code for all modules it contains, plus any associated +interface definition files, plus the scripts used to control compilation +and installation of the library. + + Activities other than copying, distribution and modification are not +covered by this License; they are outside its scope. The act of +running a program using the Library is not restricted, and output from +such a program is covered only if its contents constitute a work based +on the Library (independent of the use of the Library in a tool for +writing it). Whether that is true depends on what the Library does +and what the program that uses the Library does. + + 1. You may copy and distribute verbatim copies of the Library's +complete source code as you receive it, in any medium, provided that +you conspicuously and appropriately publish on each copy an +appropriate copyright notice and disclaimer of warranty; keep intact +all the notices that refer to this License and to the absence of any +warranty; and distribute a copy of this License along with the +Library. + + You may charge a fee for the physical act of transferring a copy, +and you may at your option offer warranty protection in exchange for a +fee. + + 2. You may modify your copy or copies of the Library or any portion +of it, thus forming a work based on the Library, and copy and +distribute such modifications or work under the terms of Section 1 +above, provided that you also meet all of these conditions: + + a) The modified work must itself be a software library. + + b) You must cause the files modified to carry prominent notices + stating that you changed the files and the date of any change. + + c) You must cause the whole of the work to be licensed at no + charge to all third parties under the terms of this License. + + d) If a facility in the modified Library refers to a function or a + table of data to be supplied by an application program that uses + the facility, other than as an argument passed when the facility + is invoked, then you must make a good faith effort to ensure that, + in the event an application does not supply such function or + table, the facility still operates, and performs whatever part of + its purpose remains meaningful. + + (For example, a function in a library to compute square roots has + a purpose that is entirely well-defined independent of the + application. Therefore, Subsection 2d requires that any + application-supplied function or table used by this function must + be optional: if the application does not supply it, the square + root function must still compute square roots.) + +These requirements apply to the modified work as a whole. If +identifiable sections of that work are not derived from the Library, +and can be reasonably considered independent and separate works in +themselves, then this License, and its terms, do not apply to those +sections when you distribute them as separate works. But when you +distribute the same sections as part of a whole which is a work based +on the Library, the distribution of the whole must be on the terms of +this License, whose permissions for other licensees extend to the +entire whole, and thus to each and every part regardless of who wrote +it. + +Thus, it is not the intent of this section to claim rights or contest +your rights to work written entirely by you; rather, the intent is to +exercise the right to control the distribution of derivative or +collective works based on the Library. + +In addition, mere aggregation of another work not based on the Library +with the Library (or with a work based on the Library) on a volume of +a storage or distribution medium does not bring the other work under +the scope of this License. + + 3. You may opt to apply the terms of the ordinary GNU General Public +License instead of this License to a given copy of the Library. To do +this, you must alter all the notices that refer to this License, so +that they refer to the ordinary GNU General Public License, version 2, +instead of to this License. (If a newer version than version 2 of the +ordinary GNU General Public License has appeared, then you can specify +that version instead if you wish.) Do not make any other change in +these notices. + + Once this change is made in a given copy, it is irreversible for +that copy, so the ordinary GNU General Public License applies to all +subsequent copies and derivative works made from that copy. + + This option is useful when you wish to copy part of the code of +the Library into a program that is not a library. + + 4. You may copy and distribute the Library (or a portion or +derivative of it, under Section 2) in object code or executable form +under the terms of Sections 1 and 2 above provided that you accompany +it with the complete corresponding machine-readable source code, which +must be distributed under the terms of Sections 1 and 2 above on a +medium customarily used for software interchange. + + If distribution of object code is made by offering access to copy +from a designated place, then offering equivalent access to copy the +source code from the same place satisfies the requirement to +distribute the source code, even though third parties are not +compelled to copy the source along with the object code. + + 5. A program that contains no derivative of any portion of the +Library, but is designed to work with the Library by being compiled or +linked with it, is called a "work that uses the Library". Such a +work, in isolation, is not a derivative work of the Library, and +therefore falls outside the scope of this License. + + However, linking a "work that uses the Library" with the Library +creates an executable that is a derivative of the Library (because it +contains portions of the Library), rather than a "work that uses the +library". The executable is therefore covered by this License. +Section 6 states terms for distribution of such executables. + + When a "work that uses the Library" uses material from a header file +that is part of the Library, the object code for the work may be a +derivative work of the Library even though the source code is not. +Whether this is true is especially significant if the work can be +linked without the Library, or if the work is itself a library. The +threshold for this to be true is not precisely defined by law. + + If such an object file uses only numerical parameters, data +structure layouts and accessors, and small macros and small inline +functions (ten lines or less in length), then the use of the object +file is unrestricted, regardless of whether it is legally a derivative +work. (Executables containing this object code plus portions of the +Library will still fall under Section 6.) + + Otherwise, if the work is a derivative of the Library, you may +distribute the object code for the work under the terms of Section 6. +Any executables containing that work also fall under Section 6, +whether or not they are linked directly with the Library itself. + + 6. As an exception to the Sections above, you may also combine or +link a "work that uses the Library" with the Library to produce a +work containing portions of the Library, and distribute that work +under terms of your choice, provided that the terms permit +modification of the work for the customer's own use and reverse +engineering for debugging such modifications. + + You must give prominent notice with each copy of the work that the +Library is used in it and that the Library and its use are covered by +this License. You must supply a copy of this License. If the work +during execution displays copyright notices, you must include the +copyright notice for the Library among them, as well as a reference +directing the user to the copy of this License. Also, you must do one +of these things: + + a) Accompany the work with the complete corresponding + machine-readable source code for the Library including whatever + changes were used in the work (which must be distributed under + Sections 1 and 2 above); and, if the work is an executable linked + with the Library, with the complete machine-readable "work that + uses the Library", as object code and/or source code, so that the + user can modify the Library and then relink to produce a modified + executable containing the modified Library. (It is understood + that the user who changes the contents of definitions files in the + Library will not necessarily be able to recompile the application + to use the modified definitions.) + + b) Use a suitable shared library mechanism for linking with the + Library. A suitable mechanism is one that (1) uses at run time a + copy of the library already present on the user's computer system, + rather than copying library functions into the executable, and (2) + will operate properly with a modified version of the library, if + the user installs one, as long as the modified version is + interface-compatible with the version that the work was made with. + + c) Accompany the work with a written offer, valid for at + least three years, to give the same user the materials + specified in Subsection 6a, above, for a charge no more + than the cost of performing this distribution. + + d) If distribution of the work is made by offering access to copy + from a designated place, offer equivalent access to copy the above + specified materials from the same place. + + e) Verify that the user has already received a copy of these + materials or that you have already sent this user a copy. + + For an executable, the required form of the "work that uses the +Library" must include any data and utility programs needed for +reproducing the executable from it. However, as a special exception, +the materials to be distributed need not include anything that is +normally distributed (in either source or binary form) with the major +components (compiler, kernel, and so on) of the operating system on +which the executable runs, unless that component itself accompanies +the executable. + + It may happen that this requirement contradicts the license +restrictions of other proprietary libraries that do not normally +accompany the operating system. Such a contradiction means you cannot +use both them and the Library together in an executable that you +distribute. + + 7. You may place library facilities that are a work based on the +Library side-by-side in a single library together with other library +facilities not covered by this License, and distribute such a combined +library, provided that the separate distribution of the work based on +the Library and of the other library facilities is otherwise +permitted, and provided that you do these two things: + + a) Accompany the combined library with a copy of the same work + based on the Library, uncombined with any other library + facilities. This must be distributed under the terms of the + Sections above. + + b) Give prominent notice with the combined library of the fact + that part of it is a work based on the Library, and explaining + where to find the accompanying uncombined form of the same work. + + 8. You may not copy, modify, sublicense, link with, or distribute +the Library except as expressly provided under this License. Any +attempt otherwise to copy, modify, sublicense, link with, or +distribute the Library is void, and will automatically terminate your +rights under this License. However, parties who have received copies, +or rights, from you under this License will not have their licenses +terminated so long as such parties remain in full compliance. + + 9. You are not required to accept this License, since you have not +signed it. However, nothing else grants you permission to modify or +distribute the Library or its derivative works. These actions are +prohibited by law if you do not accept this License. Therefore, by +modifying or distributing the Library (or any work based on the +Library), you indicate your acceptance of this License to do so, and +all its terms and conditions for copying, distributing or modifying +the Library or works based on it. + + 10. Each time you redistribute the Library (or any work based on the +Library), the recipient automatically receives a license from the +original licensor to copy, distribute, link with or modify the Library +subject to these terms and conditions. You may not impose any further +restrictions on the recipients' exercise of the rights granted herein. +You are not responsible for enforcing compliance by third parties with +this License. + + 11. If, as a consequence of a court judgment or allegation of patent +infringement or for any other reason (not limited to patent issues), +conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot +distribute so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you +may not distribute the Library at all. For example, if a patent +license would not permit royalty-free redistribution of the Library by +all those who receive copies directly or indirectly through you, then +the only way you could satisfy both it and this License would be to +refrain entirely from distribution of the Library. + +If any portion of this section is held invalid or unenforceable under any +particular circumstance, the balance of the section is intended to apply, +and the section as a whole is intended to apply in other circumstances. + +It is not the purpose of this section to induce you to infringe any +patents or other property right claims or to contest validity of any +such claims; this section has the sole purpose of protecting the +integrity of the free software distribution system which is +implemented by public license practices. Many people have made +generous contributions to the wide range of software distributed +through that system in reliance on consistent application of that +system; it is up to the author/donor to decide if he or she is willing +to distribute software through any other system and a licensee cannot +impose that choice. + +This section is intended to make thoroughly clear what is believed to +be a consequence of the rest of this License. + + 12. If the distribution and/or use of the Library is restricted in +certain countries either by patents or by copyrighted interfaces, the +original copyright holder who places the Library under this License may add +an explicit geographical distribution limitation excluding those countries, +so that distribution is permitted only in or among countries not thus +excluded. In such case, this License incorporates the limitation as if +written in the body of this License. + + 13. The Free Software Foundation may publish revised and/or new +versions of the Lesser General Public License from time to time. +Such new versions will be similar in spirit to the present version, +but may differ in detail to address new problems or concerns. + +Each version is given a distinguishing version number. If the Library +specifies a version number of this License which applies to it and +"any later version", you have the option of following the terms and +conditions either of that version or of any later version published by +the Free Software Foundation. If the Library does not specify a +license version number, you may choose any version ever published by +the Free Software Foundation. + + 14. If you wish to incorporate parts of the Library into other free +programs whose distribution conditions are incompatible with these, +write to the author to ask for permission. For software which is +copyrighted by the Free Software Foundation, write to the Free +Software Foundation; we sometimes make exceptions for this. Our +decision will be guided by the two goals of preserving the free status +of all derivatives of our free software and of promoting the sharing +and reuse of software generally. + + NO WARRANTY + + 15. BECAUSE THE LIBRARY IS LICENSED FREE OF CHARGE, THERE IS NO +WARRANTY FOR THE LIBRARY, TO THE EXTENT PERMITTED BY APPLICABLE LAW. +EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR +OTHER PARTIES PROVIDE THE LIBRARY "AS IS" WITHOUT WARRANTY OF ANY +KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE +LIBRARY IS WITH YOU. SHOULD THE LIBRARY PROVE DEFECTIVE, YOU ASSUME +THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. + + 16. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN +WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY +AND/OR REDISTRIBUTE THE LIBRARY AS PERMITTED ABOVE, BE LIABLE TO YOU +FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR +CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE +LIBRARY (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING +RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A +FAILURE OF THE LIBRARY TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF +SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH +DAMAGES. + + END OF TERMS AND CONDITIONS + diff --git a/characters.lisp b/characters.lisp new file mode 100644 index 0000000..a8fa7e9 --- /dev/null +++ b/characters.lisp @@ -0,0 +1,149 @@ +;;; copyright (c) 2004 knowledgeTools Int. GmbH +;;; Author of this version: David Lichteblau +;;; +;;; derived from runes.lisp, (c) copyright 1998,1999 by Gilbert Baumann +;;; +;;; License: LLGPL (See file COPYING for details). +;;; +;;; This code is free software; you can redistribute it and/or modify it +;;; under the terms of the version 2.1 of the GNU Lesser General Public +;;; License as published by the Free Software Foundation, as clarified +;;; by the "Preamble to the Gnu Lesser General Public License" found in +;;; the file COPYING. +;;; +;;; This code is distributed in the hope that it will be useful, +;;; but without any warranty; without even the implied warranty of +;;; merchantability or fitness for a particular purpose. See the GNU +;;; Lesser General Public License for more details. +;;; +;;; Version 2.1 of the GNU Lesser General Public License is in the file +;;; COPYING that was distributed with this file. If it is not present, +;;; you can access it from http://www.gnu.org/copyleft/lesser.txt (until +;;; superseded by a newer version) or write to the Free Software +;;; Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +(in-package :runes) + +(deftype rune () 'base-char) +(deftype rod () 'base-string) +(deftype simple-rod () 'simple-string) + +(defsubst rune (rod index) + (char rod index)) + +(defun (setf rune) (new rod index) + (setf (char rod index) new)) + +(defsubst %rune (rod index) + (aref (the simple-string rod) (the fixnum index))) + +(defsubst (setf %rune) (new rod index) + (setf (aref (the simple-string rod) (the fixnum index)) new)) + +(defun rod-capitalize (rod) + (string-upcase rod)) + +(defsubst code-rune (x) (code-char x)) +(defsubst rune-code (x) (char-code x)) + +(defsubst rune= (x y) + (char= x y)) + +(defun rune-downcase (rune) + (char-downcase rune)) + +(defsubst rune-upcase (rune) + (char-upcase rune)) + +(defun rune-upper-case-letter-p (rune) + (upper-case-p rune)) + +(defun rune-lower-case-letter-p (rune) + (lower-case-p rune)) + +(defun rune-equal (x y) + (char-equal x y)) + +(defun rod-downcase (rod) + (string-downcase rod)) + +(defun rod-upcase (rod) + (string-upcase rod)) + +(defsubst white-space-rune-p (char) + (or (char= char #\tab) + (char= char #.(code-char 10)) ;Linefeed + (char= char #.(code-char 13)) ;Carriage Return + (char= char #\space))) + +(defsubst digit-rune-p (char &optional (radix 10)) + (digit-char-p char radix)) + +(defun rod (x) + (cond + ((stringp x) x) + ((symbolp x) (string x)) + ((characterp x) (string x)) + ((vectorp x) (coerce x 'string)) + ((integerp x) (string (code-char x))) + (t (error "Cannot convert ~S to a ~S" x 'rod)))) + +(defun runep (x) + (characterp x)) + +(defun sloopy-rod-p (x) + (stringp x)) + +(defun rod= (x y) + (string= x y)) + +(defun rod-equal (x y) + (string-equal x y)) + +(defsubst make-rod (size) + (make-string size)) + +(defun char-rune (char) + char) + +(defun rune-char (rune &optional default) + (declare (ignore default)) + rune) + +(defun rod-string (rod &optional (default-char #\?)) + (declare (ignore default-char)) + rod) + +(defun string-rod (string) + string) + +;;;; + +(defun rune<= (rune &rest more-runes) + (loop + for (a b) on (cons rune more-runes) + while b + always (char<= a b))) + +(defun rune>= (rune &rest more-runes) + (loop + for (a b) on (cons rune more-runes) + while b + always (char>= a b))) + +(defun rodp (object) + (stringp object)) + +(defun really-rod-p (object) + (stringp object)) + +(defun rod-subseq (source start &optional (end (length source))) + (unless (stringp source) + (error "~S is not of type ~S." source 'rod)) + (subseq source start end)) + +(defun rod-subseq* (source start &optional (end (length source))) + (rod-subseq source start end)) + +(defun rod< (rod1 rod2) + (string< rod1 rod2)) diff --git a/dep-acl.lisp b/dep-acl.lisp new file mode 100644 index 0000000..5bbda45 --- /dev/null +++ b/dep-acl.lisp @@ -0,0 +1,42 @@ +;;; -*- Mode: Lisp; Syntax: Common-Lisp; Package: RUNES; -*- +;;; --------------------------------------------------------------------------- +;;; Title: ACL-4.3 dependent stuff + fixups +;;; Created: 1999-05-25 22:33 +;;; Author: Gilbert Baumann +;;; License: LLGPL (See file COPYING for details). +;;; --------------------------------------------------------------------------- +;;; (c) copyright 1998,1999 by Gilbert Baumann + +;;; This code is free software; you can redistribute it and/or modify it +;;; under the terms of the version 2.1 of the GNU Lesser General Public +;;; License as published by the Free Software Foundation, as clarified +;;; by the "Preamble to the Gnu Lesser General Public License" found in +;;; the file COPYING. +;;; +;;; This code is distributed in the hope that it will be useful, +;;; but without any warranty; without even the implied warranty of +;;; merchantability or fitness for a particular purpose. See the GNU +;;; Lesser General Public License for more details. +;;; +;;; Version 2.1 of the GNU Lesser General Public License is in the file +;;; COPYING that was distributed with this file. If it is not present, +;;; you can access it from http://www.gnu.org/copyleft/lesser.txt (until +;;; superseded by a newer version) or write to the Free Software +;;; Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +;; ACL is incapable to define compiler macros on (setf foo) +;; Unfortunately it is also incapable to declaim such functions inline. +;; So we revoke the DEFUN hack from dep-gcl here. + +(defmacro runes::defsubst (fun args &body body) + (if (and (consp fun) (eq (car fun) 'setf)) + (let ((fnam (intern (concatenate 'string "(SETF " (symbol-name (cadr fun)) ")") + (symbol-package (cadr fun))))) + `(progn + (defsetf ,(cadr fun) (&rest ap) (new-value) (list* ',fnam new-value ap)) + (runes::defsubst ,fnam ,args .,body))) + `(progn + (defun ,fun ,args .,body) + (define-compiler-macro ,fun (&rest .args.) + (cons '(lambda ,args .,body) + .args.))))) diff --git a/dep-acl5.lisp b/dep-acl5.lisp new file mode 100644 index 0000000..64534d9 --- /dev/null +++ b/dep-acl5.lisp @@ -0,0 +1,59 @@ +;;; -*- Mode: Lisp; Syntax: Common-Lisp; Package: CL-USER; Encoding: utf-8; -*- +;;; --------------------------------------------------------------------------- +;;; Title: ACL-5.0 dependent stuff + fixups +;;; Created: 1999-05-25 22:32 +;;; Author: Gilbert Baumann +;;; License: LLGPL (See file COPYING for details). +;;; --------------------------------------------------------------------------- +;;; (c) copyright 1998,1999 by Gilbert Baumann + +;;; This code is free software; you can redistribute it and/or modify it +;;; under the terms of the version 2.1 of the GNU Lesser General Public +;;; License as published by the Free Software Foundation, as clarified +;;; by the "Preamble to the Gnu Lesser General Public License" found in +;;; the file COPYING. +;;; +;;; This code is distributed in the hope that it will be useful, +;;; but without any warranty; without even the implied warranty of +;;; merchantability or fitness for a particular purpose. See the GNU +;;; Lesser General Public License for more details. +;;; +;;; Version 2.1 of the GNU Lesser General Public License is in the file +;;; COPYING that was distributed with this file. If it is not present, +;;; you can access it from http://www.gnu.org/copyleft/lesser.txt (until +;;; superseded by a newer version) or write to the Free Software +;;; Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +;;; Changes +;;; ======= + +;;; When Who What +;;; --------------------------------------------------------------------------- +;;; 2002-01-04 GB spend BLOCK for DEFSUBST +;;; 1999-08-31 SES Stig Erik Sandø +;;; +;;; Changed #+allegro-v5.0 to +;;; #+(and allegro-version>= (version>= 5)) +;;; + +;; ACL is incapable to define compiler macros on (setf foo) +;; Unfortunately it is also incapable to declaim such functions inline. +;; So we revoke the DEFUN hack from dep-gcl here. + +(defmacro runes::defsubst (fun args &body body) + (if (and (consp fun) (eq (car fun) 'setf)) + (let ((fnam (intern (concatenate 'string "(SETF " (symbol-name (cadr fun)) ")") + (symbol-package (cadr fun))))) + `(progn + (defsetf ,(cadr fun) (&rest ap) (new-value) (list* ',fnam new-value ap)) + (runes::defsubst ,fnam ,args .,body))) + (labels ((declp (x) + (and (consp x) (eq (car x) 'declare)))) + `(progn + (defun ,fun ,args .,body) + (define-compiler-macro ,fun (&rest .args.) + (cons '(lambda ,args + ,@(remove-if-not #'declp body) + (block ,fun + ,@(remove-if #'declp body))) + .args.)))))) diff --git a/dep-clisp.lisp b/dep-clisp.lisp new file mode 100644 index 0000000..2d9216b --- /dev/null +++ b/dep-clisp.lisp @@ -0,0 +1,59 @@ +;;; -*- Mode: Lisp; Syntax: Common-Lisp; Package: CL-USER; -*- +;;; --------------------------------------------------------------------------- +;;; Title: CLISP dependent stuff + fixups +;;; Created: 1999-05-25 22:32 +;;; Author: Gilbert Baumann +;;; License: LLGPL (See file COPYING for details). +;;; --------------------------------------------------------------------------- +;;; (c) copyright 1999 by Gilbert Baumann + +;;; This code is free software; you can redistribute it and/or modify it +;;; under the terms of the version 2.1 of the GNU Lesser General Public +;;; License as published by the Free Software Foundation, as clarified +;;; by the "Preamble to the Gnu Lesser General Public License" found in +;;; the file COPYING. +;;; +;;; This code is distributed in the hope that it will be useful, +;;; but without any warranty; without even the implied warranty of +;;; merchantability or fitness for a particular purpose. See the GNU +;;; Lesser General Public License for more details. +;;; +;;; Version 2.1 of the GNU Lesser General Public License is in the file +;;; COPYING that was distributed with this file. If it is not present, +;;; you can access it from http://www.gnu.org/copyleft/lesser.txt (until +;;; superseded by a newer version) or write to the Free Software +;;; Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +(in-package :CL-USER) + +(eval-when (compile load eval) + (if (fboundp 'cl::define-compiler-macro) + (pushnew 'define-compiler-macro *features*))) + +(setq lisp:*load-paths* '(#P"./")) + +#+DEFINE-COMPILER-MACRO +(cl:define-compiler-macro ldb (bytespec value &whole whole) + (let (pos size) + (cond ((and (consp bytespec) + (= (length bytespec) 3) + (eq (car bytespec) 'byte) + (constantp (setq size (second bytespec))) + (constantp (setq pos (third bytespec)))) + `(logand ,(if (eql pos 0) value `(ash ,value (- ,pos))) + (1- (ash 1 ,size)))) + (t + whole)))) + +#-DEFINE-COMPILER-MACRO +(progn + (export 'runes::define-compiler-macro :runes) + (defmacro runes::define-compiler-macro (name args &body body) + (declare (ignore args body)) + `(progn + ',name))) + +(defmacro runes::defsubst (name args &body body) + `(progn + (declaim (inline ,name)) + (defun ,name ,args .,body))) diff --git a/dep-cmucl-dtc.lisp b/dep-cmucl-dtc.lisp new file mode 100644 index 0000000..2e080c3 --- /dev/null +++ b/dep-cmucl-dtc.lisp @@ -0,0 +1,30 @@ +;;; -*- Mode: Lisp; Syntax: Common-Lisp; Package: CL-USER; -*- +;;; --------------------------------------------------------------------------- +;;; Title: CMUCL dependent stuff + fixups +;;; Created: 1999-05-25 22:32 +;;; Author: Gilbert Baumann +;;; License: LLGPL (See file COPYING for details). +;;; --------------------------------------------------------------------------- +;;; (c) copyright 1999 by Gilbert Baumann + +;;; This code is free software; you can redistribute it and/or modify it +;;; under the terms of the version 2.1 of the GNU Lesser General Public +;;; License as published by the Free Software Foundation, as clarified +;;; by the "Preamble to the Gnu Lesser General Public License" found in +;;; the file COPYING. +;;; +;;; This code is distributed in the hope that it will be useful, +;;; but without any warranty; without even the implied warranty of +;;; merchantability or fitness for a particular purpose. See the GNU +;;; Lesser General Public License for more details. +;;; +;;; Version 2.1 of the GNU Lesser General Public License is in the file +;;; COPYING that was distributed with this file. If it is not present, +;;; you can access it from http://www.gnu.org/copyleft/lesser.txt (until +;;; superseded by a newer version) or write to the Free Software +;;; Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +(defmacro runes::defsubst (name args &body body) + `(progn + (declaim (inline ,name)) + (defun ,name ,args .,body))) diff --git a/dep-cmucl.lisp b/dep-cmucl.lisp new file mode 100644 index 0000000..2e080c3 --- /dev/null +++ b/dep-cmucl.lisp @@ -0,0 +1,30 @@ +;;; -*- Mode: Lisp; Syntax: Common-Lisp; Package: CL-USER; -*- +;;; --------------------------------------------------------------------------- +;;; Title: CMUCL dependent stuff + fixups +;;; Created: 1999-05-25 22:32 +;;; Author: Gilbert Baumann +;;; License: LLGPL (See file COPYING for details). +;;; --------------------------------------------------------------------------- +;;; (c) copyright 1999 by Gilbert Baumann + +;;; This code is free software; you can redistribute it and/or modify it +;;; under the terms of the version 2.1 of the GNU Lesser General Public +;;; License as published by the Free Software Foundation, as clarified +;;; by the "Preamble to the Gnu Lesser General Public License" found in +;;; the file COPYING. +;;; +;;; This code is distributed in the hope that it will be useful, +;;; but without any warranty; without even the implied warranty of +;;; merchantability or fitness for a particular purpose. See the GNU +;;; Lesser General Public License for more details. +;;; +;;; Version 2.1 of the GNU Lesser General Public License is in the file +;;; COPYING that was distributed with this file. If it is not present, +;;; you can access it from http://www.gnu.org/copyleft/lesser.txt (until +;;; superseded by a newer version) or write to the Free Software +;;; Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +(defmacro runes::defsubst (name args &body body) + `(progn + (declaim (inline ,name)) + (defun ,name ,args .,body))) diff --git a/dep-openmcl.lisp b/dep-openmcl.lisp new file mode 100644 index 0000000..3ff2c6f --- /dev/null +++ b/dep-openmcl.lisp @@ -0,0 +1,16 @@ +;;;; dep-openmcl.lisp +;;;; +;;;; This file is part of the CXML parser, released under (L)LGPL. +;;;; See file COPYING for details. +;;;; +;;;; (c) copyright 1999 by Gilbert Baumann + +(defmacro runes::defsubst (fun args &body body) + (if (consp fun) + `(defun ,fun ,args + ,@body) + `(progn + (defun ,fun ,args .,body) + (define-compiler-macro ,fun (&rest .args.) + (cons '(lambda ,args .,body) + .args.))))) diff --git a/dep-sbcl.lisp b/dep-sbcl.lisp new file mode 100644 index 0000000..9431fb3 --- /dev/null +++ b/dep-sbcl.lisp @@ -0,0 +1,30 @@ +;;; -*- Mode: Lisp; Syntax: Common-Lisp; Package: CL-USER; -*- +;;; --------------------------------------------------------------------------- +;;; Title: SBCL dependent stuff + fixups +;;; Created: 1999-05-25 22:32 +;;; Author: Gilbert Baumann +;;; License: LLGPL (See file COPYING for details). +;;; --------------------------------------------------------------------------- +;;; (c) copyright 1999 by Gilbert Baumann + +;;; This code is free software; you can redistribute it and/or modify it +;;; under the terms of the version 2.1 of the GNU Lesser General Public +;;; License as published by the Free Software Foundation, as clarified +;;; by the "Preamble to the Gnu Lesser General Public License" found in +;;; the file COPYING. +;;; +;;; This code is distributed in the hope that it will be useful, +;;; but without any warranty; without even the implied warranty of +;;; merchantability or fitness for a particular purpose. See the GNU +;;; Lesser General Public License for more details. +;;; +;;; Version 2.1 of the GNU Lesser General Public License is in the file +;;; COPYING that was distributed with this file. If it is not present, +;;; you can access it from http://www.gnu.org/copyleft/lesser.txt (until +;;; superseded by a newer version) or write to the Free Software +;;; Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +(defmacro runes::defsubst (name args &body body) + `(progn + (declaim (inline ,name)) + (defun ,name ,args .,body))) diff --git a/encodings-data.lisp b/encodings-data.lisp new file mode 100644 index 0000000..e29a683 --- /dev/null +++ b/encodings-data.lisp @@ -0,0 +1,568 @@ +(in-package :encoding) + +(progn + (add-name :us-ascii "ANSI_X3.4-1968") + (add-name :us-ascii "iso-ir-6") + (add-name :us-ascii "ANSI_X3.4-1986") + (add-name :us-ascii "ISO_646.irv:1991") + (add-name :us-ascii "ASCII") + (add-name :us-ascii "ISO646-US") + (add-name :us-ascii "US-ASCII") + (add-name :us-ascii "us") + (add-name :us-ascii "IBM367") + (add-name :us-ascii "cp367") + (add-name :us-ascii "csASCII") + + (add-name :iso-8859-1 "ISO_8859-1:1987") + (add-name :iso-8859-1 "iso-ir-100") + (add-name :iso-8859-1 "ISO_8859-1") + (add-name :iso-8859-1 "ISO-8859-1") + (add-name :iso-8859-1 "latin1") + (add-name :iso-8859-1 "l1") + (add-name :iso-8859-1 "IBM819") + (add-name :iso-8859-1 "CP819") + (add-name :iso-8859-1 "csISOLatin1") + + (add-name :iso-8859-2 "ISO_8859-2:1987") + (add-name :iso-8859-2 "iso-ir-101") + (add-name :iso-8859-2 "ISO_8859-2") + (add-name :iso-8859-2 "ISO-8859-2") + (add-name :iso-8859-2 "latin2") + (add-name :iso-8859-2 "l2") + (add-name :iso-8859-2 "csISOLatin2") + + (add-name :iso-8859-3 "ISO_8859-3:1988") + (add-name :iso-8859-3 "iso-ir-109") + (add-name :iso-8859-3 "ISO_8859-3") + (add-name :iso-8859-3 "ISO-8859-3") + (add-name :iso-8859-3 "latin3") + (add-name :iso-8859-3 "l3") + (add-name :iso-8859-3 "csISOLatin3") + + (add-name :iso-8859-4 "ISO_8859-4:1988") + (add-name :iso-8859-4 "iso-ir-110") + (add-name :iso-8859-4 "ISO_8859-4") + (add-name :iso-8859-4 "ISO-8859-4") + (add-name :iso-8859-4 "latin4") + (add-name :iso-8859-4 "l4") + (add-name :iso-8859-4 "csISOLatin4") + + (add-name :iso-8859-6 "ISO_8859-6:1987") + (add-name :iso-8859-6 "iso-ir-127") + (add-name :iso-8859-6 "ISO_8859-6") + (add-name :iso-8859-6 "ISO-8859-6") + (add-name :iso-8859-6 "ECMA-114") + (add-name :iso-8859-6 "ASMO-708") + (add-name :iso-8859-6 "arabic") + (add-name :iso-8859-6 "csISOLatinArabic") + + (add-name :iso-8859-7 "ISO_8859-7:1987") + (add-name :iso-8859-7 "iso-ir-126") + (add-name :iso-8859-7 "ISO_8859-7") + (add-name :iso-8859-7 "ISO-8859-7") + (add-name :iso-8859-7 "ELOT_928") + (add-name :iso-8859-7 "ECMA-118") + (add-name :iso-8859-7 "greek") + (add-name :iso-8859-7 "greek8") + (add-name :iso-8859-7 "csISOLatinGreek") + + (add-name :iso-8859-8 "ISO_8859-8:1988") + (add-name :iso-8859-8 "iso-ir-138") + (add-name :iso-8859-8 "ISO_8859-8") + (add-name :iso-8859-8 "ISO-8859-8") + (add-name :iso-8859-8 "hebrew") + (add-name :iso-8859-8 "csISOLatinHebrew") + + (add-name :iso-8859-5 "ISO_8859-5:1988") + (add-name :iso-8859-5 "iso-ir-144") + (add-name :iso-8859-5 "ISO_8859-5") + (add-name :iso-8859-5 "ISO-8859-5") + (add-name :iso-8859-5 "cyrillic") + (add-name :iso-8859-5 "csISOLatinCyrillic") + + (add-name :iso-8859-9 "ISO_8859-9:1989") + (add-name :iso-8859-9 "iso-ir-148") + (add-name :iso-8859-9 "ISO_8859-9") + (add-name :iso-8859-9 "ISO-8859-9") + (add-name :iso-8859-9 "latin5") + (add-name :iso-8859-9 "l5") + (add-name :iso-8859-9 "csISOLatin5") + + (add-name :iso-8859-15 "ISO_8859-15") + (add-name :iso-8859-15 "ISO-8859-15") + + (add-name :iso-8859-14 "ISO_8859-14") + (add-name :iso-8859-14 "ISO-8859-14") + + (add-name :koi8-r "KOI8-R") + (add-name :koi8-r "csKOI8R") + + (add-name :utf-8 "UTF-8") + + (add-name :utf-16 "UTF-16") + + (add-name :ucs-4 "ISO-10646-UCS-4") + (add-name :ucs-4 "UCS-4") + + (add-name :ucs-2 "ISO-10646-UCS-2") + (add-name :ucs-2 "UCS-2") ) + + +(progn + (define-encoding :iso-8859-1 + (make-simple-8-bit-encoding + :charset (find-charset :iso-8859-1))) + + (define-encoding :iso-8859-2 + (make-simple-8-bit-encoding + :charset (find-charset :iso-8859-2))) + + (define-encoding :iso-8859-3 + (make-simple-8-bit-encoding + :charset (find-charset :iso-8859-3))) + + (define-encoding :iso-8859-4 + (make-simple-8-bit-encoding + :charset (find-charset :iso-8859-4))) + + (define-encoding :iso-8859-5 + (make-simple-8-bit-encoding + :charset (find-charset :iso-8859-5))) + + (define-encoding :iso-8859-6 + (make-simple-8-bit-encoding + :charset (find-charset :iso-8859-6))) + + (define-encoding :iso-8859-7 + (make-simple-8-bit-encoding + :charset (find-charset :iso-8859-7))) + + (define-encoding :iso-8859-8 + (make-simple-8-bit-encoding + :charset (find-charset :iso-8859-8))) + + (define-encoding :iso-8859-14 + (make-simple-8-bit-encoding + :charset (find-charset :iso-8859-14))) + + (define-encoding :iso-8859-15 + (make-simple-8-bit-encoding + :charset (find-charset :iso-8859-15))) + + (define-encoding :koi8-r + (make-simple-8-bit-encoding + :charset (find-charset :koi8-r))) + + (define-encoding :utf-8 :utf-8) + ) + +(progn + (define-8-bit-charset :iso-8859-1 + #| #o00x |# #x0000 #x0001 #x0002 #x0003 #x0004 #x0005 #x0006 #x0007 + #| #o01x |# #x0008 #x0009 #x000A #x000B #x000C #x000A #x000E #x000F + #| #o02x |# #x0010 #x0011 #x0012 #x0013 #x0014 #x0015 #x0016 #x0017 + #| #o03x |# #x0018 #x0019 #x001A #x001B #x001C #x001D #x001E #x001F + #| #o04x |# #x0020 #x0021 #x0022 #x0023 #x0024 #x0025 #x0026 #x0027 + #| #o05x |# #x0028 #x0029 #x002A #x002B #x002C #x002D #x002E #x002F + #| #o06x |# #x0030 #x0031 #x0032 #x0033 #x0034 #x0035 #x0036 #x0037 + #| #o07x |# #x0038 #x0039 #x003A #x003B #x003C #x003D #x003E #x003F + #| #o10x |# #x0040 #x0041 #x0042 #x0043 #x0044 #x0045 #x0046 #x0047 + #| #o11x |# #x0048 #x0049 #x004A #x004B #x004C #x004D #x004E #x004F + #| #o12x |# #x0050 #x0051 #x0052 #x0053 #x0054 #x0055 #x0056 #x0057 + #| #o13x |# #x0058 #x0059 #x005A #x005B #x005C #x005D #x005E #x005F + #| #o14x |# #x0060 #x0061 #x0062 #x0063 #x0064 #x0065 #x0066 #x0067 + #| #o15x |# #x0068 #x0069 #x006A #x006B #x006C #x006D #x006E #x006F + #| #o16x |# #x0070 #x0071 #x0072 #x0073 #x0074 #x0075 #x0076 #x0077 + #| #o17x |# #x0078 #x0079 #x007A #x007B #x007C #x007D #x007E #x007F + #| #o20x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF + #| #o21x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF + #| #o22x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF + #| #o23x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF + #| #o24x |# #x00A0 #x00A1 #x00A2 #x00A3 #x00A4 #x00A5 #x00A6 #x00A7 + #| #o25x |# #x00A8 #x00A9 #x00AA #x00AB #x00AC #x00AD #x00AE #x00AF + #| #o26x |# #x00B0 #x00B1 #x00B2 #x00B3 #x00B4 #x00B5 #x00B6 #x00B7 + #| #o27x |# #x00B8 #x00B9 #x00BA #x00BB #x00BC #x00BD #x00BE #x00BF + #| #o30x |# #x00C0 #x00C1 #x00C2 #x00C3 #x00C4 #x00C5 #x00C6 #x00C7 + #| #o31x |# #x00C8 #x00C9 #x00CA #x00CB #x00CC #x00CD #x00CE #x00CF + #| #o32x |# #x00D0 #x00D1 #x00D2 #x00D3 #x00D4 #x00D5 #x00D6 #x00D7 + #| #o33x |# #x00D8 #x00D9 #x00DA #x00DB #x00DC #x00DD #x00DE #x00DF + #| #o34x |# #x00E0 #x00E1 #x00E2 #x00E3 #x00E4 #x00E5 #x00E6 #x00E7 + #| #o35x |# #x00E8 #x00E9 #x00EA #x00EB #x00EC #x00ED #x00EE #x00EF + #| #o36x |# #x00F0 #x00F1 #x00F2 #x00F3 #x00F4 #x00F5 #x00F6 #x00F7 + #| #o37x |# #x00F8 #x00F9 #x00FA #x00FB #x00FC #x00FD #x00FE #x00FF) + + (define-8-bit-charset :iso-8859-2 + #| #o00x |# #x0000 #x0001 #x0002 #x0003 #x0004 #x0005 #x0006 #x0007 + #| #o01x |# #x0008 #x0009 #x000A #x000B #x000C #x000A #x000E #x000F + #| #o02x |# #x0010 #x0011 #x0012 #x0013 #x0014 #x0015 #x0016 #x0017 + #| #o03x |# #x0018 #x0019 #x001A #x001B #x001C #x001D #x001E #x001F + #| #o04x |# #x0020 #x0021 #x0022 #x0023 #x0024 #x0025 #x0026 #x0027 + #| #o05x |# #x0028 #x0029 #x002A #x002B #x002C #x002D #x002E #x002F + #| #o06x |# #x0030 #x0031 #x0032 #x0033 #x0034 #x0035 #x0036 #x0037 + #| #o07x |# #x0038 #x0039 #x003A #x003B #x003C #x003D #x003E #x003F + #| #o10x |# #x0040 #x0041 #x0042 #x0043 #x0044 #x0045 #x0046 #x0047 + #| #o11x |# #x0048 #x0049 #x004A #x004B #x004C #x004D #x004E #x004F + #| #o12x |# #x0050 #x0051 #x0052 #x0053 #x0054 #x0055 #x0056 #x0057 + #| #o13x |# #x0058 #x0059 #x005A #x005B #x005C #x005D #x005E #x005F + #| #o14x |# #x0060 #x0061 #x0062 #x0063 #x0064 #x0065 #x0066 #x0067 + #| #o15x |# #x0068 #x0069 #x006A #x006B #x006C #x006D #x006E #x006F + #| #o16x |# #x0070 #x0071 #x0072 #x0073 #x0074 #x0075 #x0076 #x0077 + #| #o17x |# #x0078 #x0079 #x007A #x007B #x007C #x007D #x007E #x007F + #| #o20x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF + #| #o21x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF + #| #o22x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF + #| #o23x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF + #| #o24x |# #x00A0 #x0104 #x02D8 #x0141 #x00A4 #x013D #x015A #x00A7 + #| #o25x |# #x00A8 #x0160 #x015E #x0164 #x0179 #x00AD #x017D #x017B + #| #o26x |# #x00B0 #x0105 #x02DB #x0142 #x00B4 #x013E #x015B #x02C7 + #| #o27x |# #x00B8 #x0161 #x015F #x0165 #x017A #x02DD #x017E #x017C + #| #o30x |# #x0154 #x00C1 #x00C2 #x0102 #x00C4 #x0139 #x0106 #x00C7 + #| #o31x |# #x010C #x00C9 #x0118 #x00CB #x011A #x00CD #x00CE #x010E + #| #o32x |# #x0110 #x0143 #x0147 #x00D3 #x00D4 #x0150 #x00D6 #x00D7 + #| #o33x |# #x0158 #x016E #x00DA #x0170 #x00DC #x00DD #x0162 #x00DF + #| #o34x |# #x0155 #x00E1 #x00E2 #x0103 #x00E4 #x013A #x0107 #x00E7 + #| #o35x |# #x010D #x00E9 #x0119 #x00EB #x011B #x00ED #x00EE #x010F + #| #o36x |# #x0111 #x0144 #x0148 #x00F3 #x00F4 #x0151 #x00F6 #x00F7 + #| #o37x |# #x0159 #x016F #x00FA #x0171 #x00FC #x00FD #x0163 #x02D9) + + (define-8-bit-charset :iso-8859-3 + #| #o00x |# #x0000 #x0001 #x0002 #x0003 #x0004 #x0005 #x0006 #x0007 + #| #o01x |# #x0008 #x0009 #x000A #x000B #x000C #x000A #x000E #x000F + #| #o02x |# #x0010 #x0011 #x0012 #x0013 #x0014 #x0015 #x0016 #x0017 + #| #o03x |# #x0018 #x0019 #x001A #x001B #x001C #x001D #x001E #x001F + #| #o04x |# #x0020 #x0021 #x0022 #x0023 #x0024 #x0025 #x0026 #x0027 + #| #o05x |# #x0028 #x0029 #x002A #x002B #x002C #x002D #x002E #x002F + #| #o06x |# #x0030 #x0031 #x0032 #x0033 #x0034 #x0035 #x0036 #x0037 + #| #o07x |# #x0038 #x0039 #x003A #x003B #x003C #x003D #x003E #x003F + #| #o10x |# #x0040 #x0041 #x0042 #x0043 #x0044 #x0045 #x0046 #x0047 + #| #o11x |# #x0048 #x0049 #x004A #x004B #x004C #x004D #x004E #x004F + #| #o12x |# #x0050 #x0051 #x0052 #x0053 #x0054 #x0055 #x0056 #x0057 + #| #o13x |# #x0058 #x0059 #x005A #x005B #x005C #x005D #x005E #x005F + #| #o14x |# #x0060 #x0061 #x0062 #x0063 #x0064 #x0065 #x0066 #x0067 + #| #o15x |# #x0068 #x0069 #x006A #x006B #x006C #x006D #x006E #x006F + #| #o16x |# #x0070 #x0071 #x0072 #x0073 #x0074 #x0075 #x0076 #x0077 + #| #o17x |# #x0078 #x0079 #x007A #x007B #x007C #x007D #x007E #x007F + #| #o20x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF + #| #o21x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF + #| #o22x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF + #| #o23x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF + #| #o24x |# #x00A0 #x0126 #x02D8 #x00A3 #x00A4 #xFFFF #x0124 #x00A7 + #| #o25x |# #x00A8 #x0130 #x015E #x011E #x0134 #x00AD #xFFFF #x017B + #| #o26x |# #x00B0 #x0127 #x00B2 #x00B3 #x00B4 #x00B5 #x0125 #x00B7 + #| #o27x |# #x00B8 #x0131 #x015F #x011F #x0135 #x00BD #xFFFF #x017C + #| #o30x |# #x00C0 #x00C1 #x00C2 #xFFFF #x00C4 #x010A #x0108 #x00C7 + #| #o31x |# #x00C8 #x00C9 #x00CA #x00CB #x00CC #x00CD #x00CE #x00CF + #| #o32x |# #xFFFF #x00D1 #x00D2 #x00D3 #x00D4 #x0120 #x00D6 #x00D7 + #| #o33x |# #x011C #x00D9 #x00DA #x00DB #x00DC #x016C #x015C #x00DF + #| #o34x |# #x00E0 #x00E1 #x00E2 #xFFFF #x00E4 #x010B #x0109 #x00E7 + #| #o35x |# #x00E8 #x00E9 #x00EA #x00EB #x00EC #x00ED #x00EE #x00EF + #| #o36x |# #xFFFF #x00F1 #x00F2 #x00F3 #x00F4 #x0121 #x00F6 #x00F7 + #| #o37x |# #x011D #x00F9 #x00FA #x00FB #x00FC #x016D #x015D #x02D9) + + (define-8-bit-charset :iso-8859-4 + #| #o00x |# #x0000 #x0001 #x0002 #x0003 #x0004 #x0005 #x0006 #x0007 + #| #o01x |# #x0008 #x0009 #x000A #x000B #x000C #x000A #x000E #x000F + #| #o02x |# #x0010 #x0011 #x0012 #x0013 #x0014 #x0015 #x0016 #x0017 + #| #o03x |# #x0018 #x0019 #x001A #x001B #x001C #x001D #x001E #x001F + #| #o04x |# #x0020 #x0021 #x0022 #x0023 #x0024 #x0025 #x0026 #x0027 + #| #o05x |# #x0028 #x0029 #x002A #x002B #x002C #x002D #x002E #x002F + #| #o06x |# #x0030 #x0031 #x0032 #x0033 #x0034 #x0035 #x0036 #x0037 + #| #o07x |# #x0038 #x0039 #x003A #x003B #x003C #x003D #x003E #x003F + #| #o10x |# #x0040 #x0041 #x0042 #x0043 #x0044 #x0045 #x0046 #x0047 + #| #o11x |# #x0048 #x0049 #x004A #x004B #x004C #x004D #x004E #x004F + #| #o12x |# #x0050 #x0051 #x0052 #x0053 #x0054 #x0055 #x0056 #x0057 + #| #o13x |# #x0058 #x0059 #x005A #x005B #x005C #x005D #x005E #x005F + #| #o14x |# #x0060 #x0061 #x0062 #x0063 #x0064 #x0065 #x0066 #x0067 + #| #o15x |# #x0068 #x0069 #x006A #x006B #x006C #x006D #x006E #x006F + #| #o16x |# #x0070 #x0071 #x0072 #x0073 #x0074 #x0075 #x0076 #x0077 + #| #o17x |# #x0078 #x0079 #x007A #x007B #x007C #x007D #x007E #x007F + #| #o20x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF + #| #o21x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF + #| #o22x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF + #| #o23x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF + #| #o24x |# #x00A0 #x0104 #x0138 #x0156 #x00A4 #x0128 #x013B #x00A7 + #| #o25x |# #x00A8 #x0160 #x0112 #x0122 #x0166 #x00AD #x017D #x00AF + #| #o26x |# #x00B0 #x0105 #x02DB #x0157 #x00B4 #x0129 #x013C #x02C7 + #| #o27x |# #x00B8 #x0161 #x0113 #x0123 #x0167 #x014A #x017E #x014B + #| #o30x |# #x0100 #x00C1 #x00C2 #x00C3 #x00C4 #x00C5 #x00C6 #x012E + #| #o31x |# #x010C #x00C9 #x0118 #x00CB #x0116 #x00CD #x00CE #x012A + #| #o32x |# #x0110 #x0145 #x014C #x0136 #x00D4 #x00D5 #x00D6 #x00D7 + #| #o33x |# #x00D8 #x0172 #x00DA #x00DB #x00DC #x0168 #x016A #x00DF + #| #o34x |# #x0101 #x00E1 #x00E2 #x00E3 #x00E4 #x00E5 #x00E6 #x012F + #| #o35x |# #x010D #x00E9 #x0119 #x00EB #x0117 #x00ED #x00EE #x012B + #| #o36x |# #x0111 #x0146 #x014D #x0137 #x00F4 #x00F5 #x00F6 #x00F7 + #| #o37x |# #x00F8 #x0173 #x00FA #x00FB #x00FC #x0169 #x016B #x02D9) + + (define-8-bit-charset :iso-8859-5 + #| #o00x |# #x0000 #x0001 #x0002 #x0003 #x0004 #x0005 #x0006 #x0007 + #| #o01x |# #x0008 #x0009 #x000A #x000B #x000C #x000A #x000E #x000F + #| #o02x |# #x0010 #x0011 #x0012 #x0013 #x0014 #x0015 #x0016 #x0017 + #| #o03x |# #x0018 #x0019 #x001A #x001B #x001C #x001D #x001E #x001F + #| #o04x |# #x0020 #x0021 #x0022 #x0023 #x0024 #x0025 #x0026 #x0027 + #| #o05x |# #x0028 #x0029 #x002A #x002B #x002C #x002D #x002E #x002F + #| #o06x |# #x0030 #x0031 #x0032 #x0033 #x0034 #x0035 #x0036 #x0037 + #| #o07x |# #x0038 #x0039 #x003A #x003B #x003C #x003D #x003E #x003F + #| #o10x |# #x0040 #x0041 #x0042 #x0043 #x0044 #x0045 #x0046 #x0047 + #| #o11x |# #x0048 #x0049 #x004A #x004B #x004C #x004D #x004E #x004F + #| #o12x |# #x0050 #x0051 #x0052 #x0053 #x0054 #x0055 #x0056 #x0057 + #| #o13x |# #x0058 #x0059 #x005A #x005B #x005C #x005D #x005E #x005F + #| #o14x |# #x0060 #x0061 #x0062 #x0063 #x0064 #x0065 #x0066 #x0067 + #| #o15x |# #x0068 #x0069 #x006A #x006B #x006C #x006D #x006E #x006F + #| #o16x |# #x0070 #x0071 #x0072 #x0073 #x0074 #x0075 #x0076 #x0077 + #| #o17x |# #x0078 #x0079 #x007A #x007B #x007C #x007D #x007E #x007F + #| #o20x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF + #| #o21x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF + #| #o22x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF + #| #o23x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF + #| #o24x |# #x00A0 #x0401 #x0402 #x0403 #x0404 #x0405 #x0406 #x0407 + #| #o25x |# #x0408 #x0409 #x040A #x040B #x040C #x00AD #x040E #x040F + #| #o26x |# #x0410 #x0411 #x0412 #x0413 #x0414 #x0415 #x0416 #x0417 + #| #o27x |# #x0418 #x0419 #x041A #x041B #x041C #x041D #x041E #x041F + #| #o30x |# #x0420 #x0421 #x0422 #x0423 #x0424 #x0425 #x0426 #x0427 + #| #o31x |# #x0428 #x0429 #x042A #x042B #x042C #x042D #x042E #x042F + #| #o32x |# #x0430 #x0431 #x0432 #x0433 #x0434 #x0435 #x0436 #x0437 + #| #o33x |# #x0438 #x0439 #x043A #x043B #x043C #x043D #x043E #x043F + #| #o34x |# #x0440 #x0441 #x0442 #x0443 #x0444 #x0445 #x0446 #x0447 + #| #o35x |# #x0448 #x0449 #x044A #x044B #x044C #x044D #x044E #x044F + #| #o36x |# #x2116 #x0451 #x0452 #x0453 #x0454 #x0455 #x0456 #x0457 + #| #o37x |# #x0458 #x0459 #x045A #x045B #x045C #x00A7 #x045E #x045F) + + (define-8-bit-charset :iso-8859-6 + #| #o00x |# #x0000 #x0001 #x0002 #x0003 #x0004 #x0005 #x0006 #x0007 + #| #o01x |# #x0008 #x0009 #x000A #x000B #x000C #x000A #x000E #x000F + #| #o02x |# #x0010 #x0011 #x0012 #x0013 #x0014 #x0015 #x0016 #x0017 + #| #o03x |# #x0018 #x0019 #x001A #x001B #x001C #x001D #x001E #x001F + #| #o04x |# #x0020 #x0021 #x0022 #x0023 #x0024 #x0025 #x0026 #x0027 + #| #o05x |# #x0028 #x0029 #x002A #x002B #x002C #x002D #x002E #x002F + #| #o06x |# #x0660 #x0661 #x0662 #x0663 #x0664 #x0665 #x0666 #x0667 + #| #o07x |# #x0668 #x0669 #x003A #x003B #x003C #x003D #x003E #x003F + #| #o10x |# #x0040 #x0041 #x0042 #x0043 #x0044 #x0045 #x0046 #x0047 + #| #o11x |# #x0048 #x0049 #x004A #x004B #x004C #x004D #x004E #x004F + #| #o12x |# #x0050 #x0051 #x0052 #x0053 #x0054 #x0055 #x0056 #x0057 + #| #o13x |# #x0058 #x0059 #x005A #x005B #x005C #x005D #x005E #x005F + #| #o14x |# #x0060 #x0061 #x0062 #x0063 #x0064 #x0065 #x0066 #x0067 + #| #o15x |# #x0068 #x0069 #x006A #x006B #x006C #x006D #x006E #x006F + #| #o16x |# #x0070 #x0071 #x0072 #x0073 #x0074 #x0075 #x0076 #x0077 + #| #o17x |# #x0078 #x0079 #x007A #x007B #x007C #x007D #x007E #x007F + #| #o20x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF + #| #o21x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF + #| #o22x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF + #| #o23x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF + #| #o24x |# #x00A0 #xFFFF #xFFFF #xFFFF #x00A4 #xFFFF #xFFFF #xFFFF + #| #o25x |# #xFFFF #xFFFF #xFFFF #xFFFF #x060C #x00AD #xFFFF #xFFFF + #| #o26x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF + #| #o27x |# #xFFFF #xFFFF #xFFFF #x061B #xFFFF #xFFFF #xFFFF #x061F + #| #o30x |# #xFFFF #x0621 #x0622 #x0623 #x0624 #x0625 #x0626 #x0627 + #| #o31x |# #x0628 #x0629 #x062A #x062B #x062C #x062D #x062E #x062F + #| #o32x |# #x0630 #x0631 #x0632 #x0633 #x0634 #x0635 #x0636 #x0637 + #| #o33x |# #x0638 #x0639 #x063A #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF + #| #o34x |# #x0640 #x0641 #x0642 #x0643 #x0644 #x0645 #x0646 #x0647 + #| #o35x |# #x0648 #x0649 #x064A #x064B #x064C #x064D #x064E #x064F + #| #o36x |# #x0650 #x0651 #x0652 #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF + #| #o37x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF) + + (define-8-bit-charset :iso-8859-7 + #| #o00x |# #x0000 #x0001 #x0002 #x0003 #x0004 #x0005 #x0006 #x0007 + #| #o01x |# #x0008 #x0009 #x000A #x000B #x000C #x000A #x000E #x000F + #| #o02x |# #x0010 #x0011 #x0012 #x0013 #x0014 #x0015 #x0016 #x0017 + #| #o03x |# #x0018 #x0019 #x001A #x001B #x001C #x001D #x001E #x001F + #| #o04x |# #x0020 #x0021 #x0022 #x0023 #x0024 #x0025 #x0026 #x0027 + #| #o05x |# #x0028 #x0029 #x002A #x002B #x002C #x002D #x002E #x002F + #| #o06x |# #x0030 #x0031 #x0032 #x0033 #x0034 #x0035 #x0036 #x0037 + #| #o07x |# #x0038 #x0039 #x003A #x003B #x003C #x003D #x003E #x003F + #| #o10x |# #x0040 #x0041 #x0042 #x0043 #x0044 #x0045 #x0046 #x0047 + #| #o11x |# #x0048 #x0049 #x004A #x004B #x004C #x004D #x004E #x004F + #| #o12x |# #x0050 #x0051 #x0052 #x0053 #x0054 #x0055 #x0056 #x0057 + #| #o13x |# #x0058 #x0059 #x005A #x005B #x005C #x005D #x005E #x005F + #| #o14x |# #x0060 #x0061 #x0062 #x0063 #x0064 #x0065 #x0066 #x0067 + #| #o15x |# #x0068 #x0069 #x006A #x006B #x006C #x006D #x006E #x006F + #| #o16x |# #x0070 #x0071 #x0072 #x0073 #x0074 #x0075 #x0076 #x0077 + #| #o17x |# #x0078 #x0079 #x007A #x007B #x007C #x007D #x007E #x007F + #| #o20x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF + #| #o21x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF + #| #o22x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF + #| #o23x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF + #| #o24x |# #x00A0 #x02BD #x02BC #x00A3 #xFFFF #xFFFF #x00A6 #x00A7 + #| #o25x |# #x00A8 #x00A9 #xFFFF #x00AB #x00AC #x00AD #xFFFF #x2015 + #| #o26x |# #x00B0 #x00B1 #x00B2 #x00B3 #x0384 #x0385 #x0386 #x00B7 + #| #o27x |# #x0388 #x0389 #x038A #x00BB #x038C #x00BD #x038E #x038F + #| #o30x |# #x0390 #x0391 #x0392 #x0393 #x0394 #x0395 #x0396 #x0397 + #| #o31x |# #x0398 #x0399 #x039A #x039B #x039C #x039D #x039E #x039F + #| #o32x |# #x03A0 #x03A1 #xFFFF #x03A3 #x03A4 #x03A5 #x03A6 #x03A7 + #| #o33x |# #x03A8 #x03A9 #x03AA #x03AB #x03AC #x03AD #x03AE #x03AF + #| #o34x |# #x03B0 #x03B1 #x03B2 #x03B3 #x03B4 #x03B5 #x03B6 #x03B7 + #| #o35x |# #x03B8 #x03B9 #x03BA #x03BB #x03BC #x03BD #x03BE #x03BF + #| #o36x |# #x03C0 #x03C1 #x03C2 #x03C3 #x03C4 #x03C5 #x03C6 #x03C7 + #| #o37x |# #x03C8 #x03C9 #x03CA #x03CB #x03CC #x03CD #x03CE #xFFFF) + + (define-8-bit-charset :iso-8859-8 + #| #o00x |# #x0000 #x0001 #x0002 #x0003 #x0004 #x0005 #x0006 #x0007 + #| #o01x |# #x0008 #x0009 #x000A #x000B #x000C #x000A #x000E #x000F + #| #o02x |# #x0010 #x0011 #x0012 #x0013 #x0014 #x0015 #x0016 #x0017 + #| #o03x |# #x0018 #x0019 #x001A #x001B #x001C #x001D #x001E #x001F + #| #o04x |# #x0020 #x0021 #x0022 #x0023 #x0024 #x0025 #x0026 #x0027 + #| #o05x |# #x0028 #x0029 #x002A #x002B #x002C #x002D #x002E #x002F + #| #o06x |# #x0030 #x0031 #x0032 #x0033 #x0034 #x0035 #x0036 #x0037 + #| #o07x |# #x0038 #x0039 #x003A #x003B #x003C #x003D #x003E #x003F + #| #o10x |# #x0040 #x0041 #x0042 #x0043 #x0044 #x0045 #x0046 #x0047 + #| #o11x |# #x0048 #x0049 #x004A #x004B #x004C #x004D #x004E #x004F + #| #o12x |# #x0050 #x0051 #x0052 #x0053 #x0054 #x0055 #x0056 #x0057 + #| #o13x |# #x0058 #x0059 #x005A #x005B #x005C #x005D #x005E #x005F + #| #o14x |# #x0060 #x0061 #x0062 #x0063 #x0064 #x0065 #x0066 #x0067 + #| #o15x |# #x0068 #x0069 #x006A #x006B #x006C #x006D #x006E #x006F + #| #o16x |# #x0070 #x0071 #x0072 #x0073 #x0074 #x0075 #x0076 #x0077 + #| #o17x |# #x0078 #x0079 #x007A #x007B #x007C #x007D #x007E #x007F + #| #o20x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF + #| #o21x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF + #| #o22x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF + #| #o23x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF + #| #o24x |# #x00A0 #xFFFF #x00A2 #x00A3 #x00A4 #x00A5 #x00A6 #x00A7 + #| #o25x |# #x00A8 #x00A9 #x00D7 #x00AB #x00AC #x00AD #x00AE #x203E + #| #o26x |# #x00B0 #x00B1 #x00B2 #x00B3 #x00B4 #x00B5 #x00B6 #x00B7 + #| #o27x |# #x00B8 #x00B9 #x00F7 #x00BB #x00BC #x00BD #x00BE #xFFFF + #| #o30x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF + #| #o31x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF + #| #o32x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF + #| #o33x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #x2017 + #| #o34x |# #x05D0 #x05D1 #x05D2 #x05D3 #x05D4 #x05D5 #x05D6 #x05D7 + #| #o35x |# #x05D8 #x05D9 #x05DA #x05DB #x05DC #x05DD #x05DE #x05DF + #| #o36x |# #x05E0 #x05E1 #x05E2 #x05E3 #x05E4 #x05E5 #x05E6 #x05E7 + #| #o37x |# #x05E8 #x05E9 #x05EA #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF) + + (define-8-bit-charset :iso-8859-9 + #| #o00x |# #x0000 #x0001 #x0002 #x0003 #x0004 #x0005 #x0006 #x0007 + #| #o01x |# #x0008 #x0009 #x000A #x000B #x000C #x000A #x000E #x000F + #| #o02x |# #x0010 #x0011 #x0012 #x0013 #x0014 #x0015 #x0016 #x0017 + #| #o03x |# #x0018 #x0019 #x001A #x001B #x001C #x001D #x001E #x001F + #| #o04x |# #x0020 #x0021 #x0022 #x0023 #x0024 #x0025 #x0026 #x0027 + #| #o05x |# #x0028 #x0029 #x002A #x002B #x002C #x002D #x002E #x002F + #| #o06x |# #x0030 #x0031 #x0032 #x0033 #x0034 #x0035 #x0036 #x0037 + #| #o07x |# #x0038 #x0039 #x003A #x003B #x003C #x003D #x003E #x003F + #| #o10x |# #x0040 #x0041 #x0042 #x0043 #x0044 #x0045 #x0046 #x0047 + #| #o11x |# #x0048 #x0049 #x004A #x004B #x004C #x004D #x004E #x004F + #| #o12x |# #x0050 #x0051 #x0052 #x0053 #x0054 #x0055 #x0056 #x0057 + #| #o13x |# #x0058 #x0059 #x005A #x005B #x005C #x005D #x005E #x005F + #| #o14x |# #x0060 #x0061 #x0062 #x0063 #x0064 #x0065 #x0066 #x0067 + #| #o15x |# #x0068 #x0069 #x006A #x006B #x006C #x006D #x006E #x006F + #| #o16x |# #x0070 #x0071 #x0072 #x0073 #x0074 #x0075 #x0076 #x0077 + #| #o17x |# #x0078 #x0079 #x007A #x007B #x007C #x007D #x007E #x007F + #| #o20x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF + #| #o21x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF + #| #o22x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF + #| #o23x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF + #| #o24x |# #x00A0 #x00A1 #x00A2 #x00A3 #x00A4 #x00A5 #x00A6 #x00A7 + #| #o25x |# #x00A8 #x00A9 #x00AA #x00AB #x00AC #x00AD #x00AE #x00AF + #| #o26x |# #x00B0 #x00B1 #x00B2 #x00B3 #x00B4 #x00B5 #x00B6 #x00B7 + #| #o27x |# #x00B8 #x00B9 #x00BA #x00BB #x00BC #x00BD #x00BE #x00BF + #| #o30x |# #x00C0 #x00C1 #x00C2 #x00C3 #x00C4 #x00C5 #x00C6 #x00C7 + #| #o31x |# #x00C8 #x00C9 #x00CA #x00CB #x00CC #x00CD #x00CE #x00CF + #| #o32x |# #x011E #x00D1 #x00D2 #x00D3 #x00D4 #x00D5 #x00D6 #x00D7 + #| #o33x |# #x00D8 #x00D9 #x00DA #x00DB #x00DC #x0130 #x015E #x00DF + #| #o34x |# #x00E0 #x00E1 #x00E2 #x00E3 #x00E4 #x00E5 #x00E6 #x00E7 + #| #o35x |# #x00E8 #x00E9 #x00EA #x00EB #x00EC #x00ED #x00EE #x00EF + #| #o36x |# #x011F #x00F1 #x00F2 #x00F3 #x00F4 #x00F5 #x00F6 #x00F7 + #| #o37x |# #x00F8 #x00F9 #x00FA #x00FB #x00FC #x0131 #x015F #x00FF) + + (define-8-bit-charset :iso-8859-14 + #| #o00x |# #x0000 #x0001 #x0002 #x0003 #x0004 #x0005 #x0006 #x0007 + #| #o01x |# #x0008 #x0009 #x000A #x000B #x000C #x000A #x000E #x000F + #| #o02x |# #x0010 #x0011 #x0012 #x0013 #x0014 #x0015 #x0016 #x0017 + #| #o03x |# #x0018 #x0019 #x001A #x001B #x001C #x001D #x001E #x001F + #| #o04x |# #x0020 #x0021 #x0022 #x0023 #x0024 #x0025 #x0026 #x0027 + #| #o05x |# #x0028 #x0029 #x002A #x002B #x002C #x002D #x002E #x002F + #| #o06x |# #x0030 #x0031 #x0032 #x0033 #x0034 #x0035 #x0036 #x0037 + #| #o07x |# #x0038 #x0039 #x003A #x003B #x003C #x003D #x003E #x003F + #| #o10x |# #x0040 #x0041 #x0042 #x0043 #x0044 #x0045 #x0046 #x0047 + #| #o11x |# #x0048 #x0049 #x004A #x004B #x004C #x004D #x004E #x004F + #| #o12x |# #x0050 #x0051 #x0052 #x0053 #x0054 #x0055 #x0056 #x0057 + #| #o13x |# #x0058 #x0059 #x005A #x005B #x005C #x005D #x005E #x005F + #| #o14x |# #x0060 #x0061 #x0062 #x0063 #x0064 #x0065 #x0066 #x0067 + #| #o15x |# #x0068 #x0069 #x006A #x006B #x006C #x006D #x006E #x006F + #| #o16x |# #x0070 #x0071 #x0072 #x0073 #x0074 #x0075 #x0076 #x0077 + #| #o17x |# #x0078 #x0079 #x007A #x007B #x007C #x007D #x007E #x007F + #| #o20x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF + #| #o21x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF + #| #o22x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF + #| #o23x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF + #| #o24x |# #x00A0 #x1E02 #x1E03 #x00A3 #x010A #x010B #x1E0A #x00A7 + #| #o25x |# #x1E80 #x00A9 #x1E82 #x1E0B #x1EF2 #x00AD #x00AE #x0178 + #| #o26x |# #x1E1E #x1E1F #x0120 #x0121 #x1E40 #x1E41 #x00B6 #x1E56 + #| #o27x |# #x1E81 #x1E57 #x1E83 #x1E60 #x1EF3 #x1E84 #x1E85 #x1E61 + #| #o30x |# #x00C0 #x00C1 #x00C2 #x00C3 #x00C4 #x00C5 #x00C6 #x00C7 + #| #o31x |# #x00C8 #x00C9 #x00CA #x00CB #x00CC #x00CD #x00CE #x00CF + #| #o32x |# #x0174 #x00D1 #x00D2 #x00D3 #x00D4 #x00D5 #x00D6 #x1E6A + #| #o33x |# #x00D8 #x00D9 #x00DA #x00DB #x00DC #x00DD #x0176 #x00DF + #| #o34x |# #x00E0 #x00E1 #x00E2 #x00E3 #x00E4 #x00E5 #x00E6 #x00E7 + #| #o35x |# #x00E8 #x00E9 #x00EA #x00EB #x00EC #x00ED #x00EE #x00EF + #| #o36x |# #x0175 #x00F1 #x00F2 #x00F3 #x00F4 #x00F5 #x00F6 #x1E6B + #| #o37x |# #x00F8 #x00F9 #x00FA #x00FB #x00FC #x00FD #x0177 #x00FF) + + (define-8-bit-charset :iso-8859-15 + #| #o00x |# #x0000 #x0001 #x0002 #x0003 #x0004 #x0005 #x0006 #x0007 + #| #o01x |# #x0008 #x0009 #x000A #x000B #x000C #x000A #x000E #x000F + #| #o02x |# #x0010 #x0011 #x0012 #x0013 #x0014 #x0015 #x0016 #x0017 + #| #o03x |# #x0018 #x0019 #x001A #x001B #x001C #x001D #x001E #x001F + #| #o04x |# #x0020 #x0021 #x0022 #x0023 #x0024 #x0025 #x0026 #x0027 + #| #o05x |# #x0028 #x0029 #x002A #x002B #x002C #x002D #x002E #x002F + #| #o06x |# #x0030 #x0031 #x0032 #x0033 #x0034 #x0035 #x0036 #x0037 + #| #o07x |# #x0038 #x0039 #x003A #x003B #x003C #x003D #x003E #x003F + #| #o10x |# #x0040 #x0041 #x0042 #x0043 #x0044 #x0045 #x0046 #x0047 + #| #o11x |# #x0048 #x0049 #x004A #x004B #x004C #x004D #x004E #x004F + #| #o12x |# #x0050 #x0051 #x0052 #x0053 #x0054 #x0055 #x0056 #x0057 + #| #o13x |# #x0058 #x0059 #x005A #x005B #x005C #x005D #x005E #x005F + #| #o14x |# #x0060 #x0061 #x0062 #x0063 #x0064 #x0065 #x0066 #x0067 + #| #o15x |# #x0068 #x0069 #x006A #x006B #x006C #x006D #x006E #x006F + #| #o16x |# #x0070 #x0071 #x0072 #x0073 #x0074 #x0075 #x0076 #x0077 + #| #o17x |# #x0078 #x0079 #x007A #x007B #x007C #x007D #x007E #x007F + #| #o20x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF + #| #o21x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF + #| #o22x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF + #| #o23x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF + #| #o24x |# #x00A0 #x00A1 #x00A2 #x00A3 #x20AC #x00A5 #x0160 #x00A7 + #| #o25x |# #x0161 #x00A9 #x00AA #x00AB #x00AC #x00AD #x00AE #x00AF + #| #o26x |# #x00B0 #x00B1 #x00B2 #x00B3 #x017D #x00B5 #x00B6 #x00B7 + #| #o27x |# #x017E #x00B9 #x00BA #x00BB #x0152 #x0153 #x0178 #x00BF + #| #o30x |# #x00C0 #x00C1 #x00C2 #x00C3 #x00C4 #x00C5 #x00C6 #x00C7 + #| #o31x |# #x00C8 #x00C9 #x00CA #x00CB #x00CC #x00CD #x00CE #x00CF + #| #o32x |# #x00D0 #x00D1 #x00D2 #x00D3 #x00D4 #x00D5 #x00D6 #x00D7 + #| #o33x |# #x00D8 #x00D9 #x00DA #x00DB #x00DC #x00DD #x00DE #x00DF + #| #o34x |# #x00E0 #x00E1 #x00E2 #x00E3 #x00E4 #x00E5 #x00E6 #x00E7 + #| #o35x |# #x00E8 #x00E9 #x00EA #x00EB #x00EC #x00ED #x00EE #x00EF + #| #o36x |# #x00F0 #x00F1 #x00F2 #x00F3 #x00F4 #x00F5 #x00F6 #x00F7 + #| #o37x |# #x00F8 #x00F9 #x00FA #x00FB #x00FC #x00FD #x00FE #x00FF) + + (define-8-bit-charset :koi8-r + #| #o00x |# #x0000 #x0001 #x0002 #x0003 #x0004 #x0005 #x0006 #x0007 + #| #o01x |# #x0008 #x0009 #x000A #x000B #x000C #x000A #x000E #x000F + #| #o02x |# #x0010 #x0011 #x0012 #x0013 #x0014 #x0015 #x0016 #x0017 + #| #o03x |# #x0018 #x0019 #x001A #x001B #x001C #x001D #x001E #x001F + #| #o04x |# #x0020 #x0021 #x0022 #x0023 #x0024 #x0025 #x0026 #x0027 + #| #o05x |# #x0028 #x0029 #x002A #x002B #x002C #x002D #x002E #x002F + #| #o06x |# #x0030 #x0031 #x0032 #x0033 #x0034 #x0035 #x0036 #x0037 + #| #o07x |# #x0038 #x0039 #x003A #x003B #x003C #x003D #x003E #x003F + #| #o10x |# #x0040 #x0041 #x0042 #x0043 #x0044 #x0045 #x0046 #x0047 + #| #o11x |# #x0048 #x0049 #x004A #x004B #x004C #x004D #x004E #x004F + #| #o12x |# #x0050 #x0051 #x0052 #x0053 #x0054 #x0055 #x0056 #x0057 + #| #o13x |# #x0058 #x0059 #x005A #x005B #x005C #x005D #x005E #x005F + #| #o14x |# #x0060 #x0061 #x0062 #x0063 #x0064 #x0065 #x0066 #x0067 + #| #o15x |# #x0068 #x0069 #x006A #x006B #x006C #x006D #x006E #x006F + #| #o16x |# #x0070 #x0071 #x0072 #x0073 #x0074 #x0075 #x0076 #x0077 + #| #o17x |# #x0078 #x0079 #x007A #x007B #x007C #x007D #x007E #x007F + #| #o20x |# #x2500 #x2502 #x250C #x2510 #x2514 #x2518 #x251C #x2524 + #| #o21x |# #x252C #x2534 #x253C #x2580 #x2584 #x2588 #x258C #x2590 + #| #o22x |# #x2591 #x2592 #x2593 #x2320 #x25A0 #x2219 #x221A #x2248 + #| #o23x |# #x2264 #x2265 #x00A0 #x2321 #x00B0 #x00B2 #x00B7 #x00F7 + #| #o24x |# #x2550 #x2551 #x2552 #x0451 #x2553 #x2554 #x2555 #x2556 + #| #o25x |# #x2557 #x2558 #x2559 #x255A #x255B #x255C #x255D #x255E + #| #o26x |# #x255F #x2560 #x2561 #x0401 #x2562 #x2563 #x2564 #x2565 + #| #o27x |# #x2566 #x2567 #x2568 #x2569 #x256A #x256B #x256C #x00A9 + #| #o30x |# #x044E #x0430 #x0431 #x0446 #x0434 #x0435 #x0444 #x0433 + #| #o31x |# #x0445 #x0438 #x0439 #x043A #x043B #x043C #x043D #x043E + #| #o32x |# #x043F #x044F #x0440 #x0441 #x0442 #x0443 #x0436 #x0432 + #| #o33x |# #x044C #x044B #x0437 #x0448 #x044D #x0449 #x0447 #x044A + #| #o34x |# #x042E #x0410 #x0411 #x0426 #x0414 #x0415 #x0424 #x0413 + #| #o35x |# #x0425 #x0418 #x0419 #x041A #x041B #x041C #x041D #x041E + #| #o36x |# #x041F #x042F #x0420 #x0421 #x0422 #x0423 #x0416 #x0412 + #| #o37x |# #x042C #x042B #x0417 #x0428 #x042D #x0429 #x0427 #x042A) + ) + diff --git a/encodings.lisp b/encodings.lisp new file mode 100644 index 0000000..0982caa --- /dev/null +++ b/encodings.lisp @@ -0,0 +1,347 @@ +(in-package :encoding) + +;;;; --------------------------------------------------------------------------- +;;;; Encoding names +;;;; + +(defvar *names* (make-hash-table :test #'eq)) + +(defun canon-name (string) + (with-output-to-string (bag) + (map nil (lambda (ch) + (cond ((char= ch #\_) (write-char #\- bag)) + (t (write-char (char-upcase ch) bag)))) + string))) + +(defun canon-name-2 (string) + (with-output-to-string (bag) + (map nil (lambda (ch) + (cond ((char= ch #\_)) + ((char= ch #\-)) + (t (write-char (char-upcase ch) bag)))) + string))) + +(defmethod encoding-names ((encoding symbol)) + (gethash encoding *names*)) + +(defmethod (setf encoding-names) (new-value (encoding symbol)) + (setf (gethash encoding *names*) new-value)) + +(defun add-name (encoding name) + (pushnew (canon-name name) (encoding-names encoding) :test #'string=)) + +(defun resolve-name (string) + (cond ((symbolp string) + string) + (t + (setq string (canon-name string)) + (or + (block nil + (maphash (lambda (x y) + (when (member string y :test #'string=) + (return x))) + *names*) + nil) + (block nil + (maphash (lambda (x y) + (when (member string y + :test #'(lambda (x y) + (string= (canon-name-2 x) + (canon-name-2 y)))) + (return x))) + *names*) + nil))))) + +;;;; --------------------------------------------------------------------------- +;;;; Encodings +;;;; + +(defvar *encodings* (make-hash-table :test #'eq)) + +(defmacro define-encoding (name init-form) + `(progn + (setf (gethash ',name *encodings*) + (list nil (lambda () ,init-form))) + ',name)) + +(defun find-encoding (name) + (let ((x (gethash (resolve-name name) *encodings*))) + (and x + (or (first x) + (setf (first x) (funcall (second x))))))) + +(defclass encoding () ()) + +(defclass simple-8-bit-encoding (encoding) + ((table :initarg :table))) + +(defun make-simple-8-bit-encoding (&key charset) + (make-instance 'simple-8-bit-encoding + :table (coerce (to-unicode-table charset) '(simple-array (unsigned-byte 16) (256))))) + +;;;;;;; + +(defmacro fx-op (op &rest xs) + `(the fixnum (,op ,@(mapcar (lambda (x) `(the fixnum ,x)) xs)))) +(defmacro fx-pred (op &rest xs) + `(,op ,@(mapcar (lambda (x) `(the fixnum ,x)) xs))) + +(defmacro %+ (&rest xs) `(fx-op + ,@xs)) +(defmacro %- (&rest xs) `(fx-op - ,@xs)) +(defmacro %* (&rest xs) `(fx-op * ,@xs)) +(defmacro %/ (&rest xs) `(fx-op floor ,@xs)) +(defmacro %and (&rest xs) `(fx-op logand ,@xs)) +(defmacro %ior (&rest xs) `(fx-op logior ,@xs)) +(defmacro %xor (&rest xs) `(fx-op logxor ,@xs)) +(defmacro %ash (&rest xs) `(fx-op ash ,@xs)) +(defmacro %mod (&rest xs) `(fx-op mod ,@xs)) + +(defmacro %= (&rest xs) `(fx-pred = ,@xs)) +(defmacro %<= (&rest xs) `(fx-pred <= ,@xs)) +(defmacro %>= (&rest xs) `(fx-pred >= ,@xs)) +(defmacro %< (&rest xs) `(fx-pred < ,@xs)) +(defmacro %> (&rest xs) `(fx-pred > ,@xs)) + +(defmethod decode-sequence ((encoding (eql :utf-16-big-endian)) + in in-start in-end out out-start out-end eof?) + ;; -> new wptr, new rptr + (let ((wptr out-start) + (rptr in-start)) + (loop + (when (%= wptr out-end) + (return)) + (when (>= (%+ rptr 1) in-end) + (return)) + (let ((hi (aref in rptr)) + (lo (aref in (%+ 1 rptr)))) + (setf rptr (%+ 2 rptr)) + (setf (aref out wptr) (logior (ash hi 8) lo)) + (setf wptr (%+ 1 wptr)))) + (values wptr rptr))) + +(defmethod decode-sequence ((encoding (eql :utf-16-little-endian)) + in in-start in-end out out-start out-end eof?) + ;; -> new wptr, new rptr + (let ((wptr out-start) + (rptr in-start)) + (loop + (when (%= wptr out-end) + (return)) + (when (>= (%+ rptr 1) in-end) + (return)) + (let ((lo (aref in (%+ 0 rptr))) + (hi (aref in (%+ 1 rptr)))) + (setf rptr (%+ 2 rptr)) + (setf (aref out wptr) (logior (ash hi 8) lo)) + (setf wptr (%+ 1 wptr)))) + (values wptr rptr))) + +(defmethod decode-sequence ((encoding (eql :utf-8)) + in in-start in-end out out-start out-end eof?) + (declare (optimize (speed 3) (safety 0)) + (type (simple-array (unsigned-byte 8) (*)) in) + (type (simple-array (unsigned-byte 16) (*)) out) + (type fixnum in-start in-end out-start out-end)) + (let ((wptr out-start) + (rptr in-start) + byte0) + (macrolet ((put (x) + `((lambda (x) + (cond ((or (<= #xD800 x #xDBFF) + (<= #xDC00 x #xDFFF)) + (error "Encoding UTF-16 in UTF-8? : #x~x." x))) + '(unless (data-char-p x) + (error "#x~x is not a data character." x)) + ;;(fresh-line) + ;;(prin1 x) (princ "-> ") + (cond ((%> x #xFFFF) + (setf (aref out (%+ 0 wptr)) (%+ #xD7C0 (ash x -10)) + (aref out (%+ 1 wptr)) (%ior #xDC00 (%and x #x3FF))) + (setf wptr (%+ wptr 2))) + (t + (setf (aref out wptr) x) + (setf wptr (%+ wptr 1))))) + ,x)) + (put1 (x) + `(progn + (setf (aref out wptr) ,x) + (setf wptr (%+ wptr 1))))) + (loop + (when (%= (+ wptr 1) out-end) (return)) + (when (%>= rptr in-end) (return)) + (setq byte0 (aref in rptr)) + (cond ((= byte0 #x0D) + ;; CR handling + ;; we need to know the following character + (cond ((>= (%+ rptr 1) in-end) + ;; no characters in buffer + (cond (eof? + ;; at EOF, pass it as NL + (put #x0A) + (setf rptr (%+ rptr 1))) + (t + ;; demand more characters + (return)))) + ((= (aref in (%+ rptr 1)) #x0A) + ;; we see CR NL, so forget this CR and the next NL will be + ;; inserted literally + (setf rptr (%+ rptr 1))) + (t + ;; singleton CR, pass it as NL + (put #x0A) + (setf rptr (%+ rptr 1))))) + + ((%<= #|#b00000000|# byte0 #b01111111) + (put1 byte0) + (setf rptr (%+ rptr 1))) + + ((%<= #|#b10000000|# byte0 #b10111111) + (error "Corrupted UTF-8 input (initial byte was #b~8,'0B)" byte0) + (setf rptr (%+ rptr 1))) + + ((%<= #|#b11000000|# byte0 #b11011111) + (cond ((< (%+ rptr 2) in-end) + (put + (dpb (ldb (byte 5 0) byte0) (byte 5 6) + (dpb (ldb (byte 6 0) (aref in (%+ rptr 1))) (byte 6 0) + 0))) + (setf rptr (%+ rptr 2))) + (t + (return)))) + + ((%<= #|#b11100000|# byte0 #b11101111) + (cond ((< (%+ rptr 3) in-end) + (put + (dpb (ldb (byte 4 0) byte0) (byte 4 12) + (dpb (ldb (byte 6 0) (aref in (%+ 1 rptr))) (byte 6 6) + (dpb (ldb (byte 6 0) (aref in (%+ 2 rptr))) (byte 6 0) + 0)))) + (setf rptr (%+ rptr 3))) + (t + (return)))) + + ((%<= #|#b11110000|# byte0 #b11110111) + (cond ((< (%+ rptr 4) in-end) + (put + (dpb (ldb (byte 3 0) byte0) (byte 3 18) + (dpb (ldb (byte 6 0) (aref in (%+ 1 rptr))) (byte 6 12) + (dpb (ldb (byte 6 0) (aref in (%+ 2 rptr))) (byte 6 6) + (dpb (ldb (byte 6 0) (aref in (%+ 3 rptr))) (byte 6 0) + 0))))) + (setf rptr (%+ rptr 4))) + (t + (return)))) + + ((%<= #|#b11111000|# byte0 #b11111011) + (cond ((< (%+ rptr 5) in-end) + (put + (dpb (ldb (byte 2 0) byte0) (byte 2 24) + (dpb (ldb (byte 6 0) (aref in (%+ 1 rptr))) (byte 6 18) + (dpb (ldb (byte 6 0) (aref in (%+ 2 rptr))) (byte 6 12) + (dpb (ldb (byte 6 0) (aref in (%+ 3 rptr))) (byte 6 6) + (dpb (ldb (byte 6 0) (aref in (%+ 4 rptr))) (byte 6 0) + 0)))))) + (setf rptr (%+ rptr 5))) + (t + (return)))) + + ((%<= #|#b11111100|# byte0 #b11111101) + (cond ((< (%+ rptr 6) in-end) + (put + (dpb (ldb (byte 1 0) byte0) (byte 1 30) + (dpb (ldb (byte 6 0) (aref in (%+ 1 rptr))) (byte 6 24) + (dpb (ldb (byte 6 0) (aref in (%+ 2 rptr))) (byte 6 18) + (dpb (ldb (byte 6 0) (aref in (%+ 3 rptr))) (byte 6 12) + (dpb (ldb (byte 6 0) (aref in (%+ 4 rptr))) (byte 6 6) + (dpb (ldb (byte 6 0) (aref in (%+ 5 rptr))) (byte 6 0) + 0))))))) + (setf rptr (%+ rptr 6))) + (t + (return)))) + + (t + (error "Corrupted UTF-8 input (initial byte was #b~8,'0B)" byte0)) ) )) + (values wptr rptr)) ) + +(defmethod encoding-p ((object (eql :utf-16-little-endian))) t) +(defmethod encoding-p ((object (eql :utf-16-big-endian))) t) +(defmethod encoding-p ((object (eql :utf-8))) t) + +(defmethod encoding-p ((object encoding)) t) + +(defmethod decode-sequence ((encoding simple-8-bit-encoding) + in in-start in-end + out out-start out-end + eof?) + (declare (optimize (speed 3) (safety 0)) + (type (simple-array (unsigned-byte 8) (*)) in) + (type (simple-array (unsigned-byte 16) (*)) out) + (type fixnum in-start in-end out-start out-end)) + (let ((wptr out-start) + (rptr in-start) + (byte 0) + (table (slot-value encoding 'table))) + (declare (type fixnum wptr rptr) + (type (unsigned-byte 8) byte) + (type (simple-array (unsigned-byte 16) (*)) table)) + (loop + (when (%= wptr out-end) (return)) + (when (%>= rptr in-end) (return)) + (setq byte (aref in rptr)) + (cond ((= byte #x0D) + ;; CR handling + ;; we need to know the following character + (cond ((>= (%+ rptr 1) in-end) + ;; no characters in buffer + (cond (eof? + ;; at EOF, pass it as NL + (setf (aref out wptr) #x0A) + (setf wptr (%+ wptr 1)) + (setf rptr (%+ rptr 1))) + (t + ;; demand more characters + (return)))) + ((= (aref in (%+ rptr 1)) #x0A) + ;; we see CR NL, so forget this CR and the next NL will be + ;; inserted literally + (setf rptr (%+ rptr 1))) + (t + ;; singleton CR, pass it as NL + (setf (aref out wptr) #x0A) + (setf wptr (%+ wptr 1)) + (setf rptr (%+ rptr 1))))) + + (t + (setf (aref out wptr) (aref table byte)) + (setf wptr (%+ wptr 1)) + (setf rptr (%+ rptr 1))) )) + (values wptr rptr))) + +;;;; --------------------------------------------------------------------------- +;;;; Character sets +;;;; + +(defvar *charsets* (make-hash-table :test #'eq)) + +(defclass 8-bit-charset () + ((name :initarg :name) + (to-unicode-table + :initarg :to-unicode-table + :reader to-unicode-table))) + +(defmacro define-8-bit-charset (name &rest codes) + (assert (= 256 (length codes))) + `(progn + (setf (gethash ',name *charsets*) + (make-instance '8-bit-charset + :name ',name + :to-unicode-table + ',(make-array 256 + :element-type '(unsigned-byte 16) + :initial-contents codes))) + ',name)) + +(defun find-charset (name) + (or (gethash name *charsets*) + (error "There is no character set named ~S." name))) + diff --git a/package.lisp b/package.lisp new file mode 100644 index 0000000..921c457 --- /dev/null +++ b/package.lisp @@ -0,0 +1,50 @@ +;;; -*- Mode: Lisp; Syntax: Common-Lisp; Package: CL-USER; -*- +;;; --------------------------------------------------------------------------- +;;; Title: Generating a sane DEFPACKAGE for RUNES +;;; Created: 1999-05-25 +;;; Author: Gilbert Baumann +;;; --------------------------------------------------------------------------- +;;; (c) copyright 1999,2000 by Gilbert Baumann + +(in-package :cl-user) + +(defpackage :runes + (:use :cl) + (:export #:defsubst + + ;; util.lisp : + #:compose + #:curry + #:rcurry + #:until + #:while + + ;; runes.lisp + #:rune + #:rod + #:simple-rod + #:%rune + #:rod-capitalize + #:code-rune + #:rune-code + #:rune-downcase + #:rune-upcase + #:rod-downcase + #:rod-upcase + #:white-space-rune-p + #:digit-rune-p + #:rune= + #:rune<= + #:rune>= + #:rune-equal + #:runep + #:sloopy-rod-p + #:rod= + #:rod-equal + #:make-rod + #:char-rune + #:rune-char + #:rod-string + #:string-rod + #:rod-subseq + #:rod<)) diff --git a/runes.lisp b/runes.lisp new file mode 100644 index 0000000..7aed6d0 --- /dev/null +++ b/runes.lisp @@ -0,0 +1,273 @@ +;;; -*- Mode: Lisp; Syntax: Common-Lisp; Package: RUNES; -*- +;;; --------------------------------------------------------------------------- +;;; Title: Unicode strings (called RODs) +;;; Created: 1999-05-25 22:29 +;;; Author: Gilbert Baumann +;;; License: LLGPL (See file COPYING for details). +;;; --------------------------------------------------------------------------- +;;; (c) copyright 1998,1999 by Gilbert Baumann + +;;; This code is free software; you can redistribute it and/or modify it +;;; under the terms of the version 2.1 of the GNU Lesser General Public +;;; License as published by the Free Software Foundation, as clarified +;;; by the "Preamble to the Gnu Lesser General Public License" found in +;;; the file COPYING. +;;; +;;; This code is distributed in the hope that it will be useful, +;;; but without any warranty; without even the implied warranty of +;;; merchantability or fitness for a particular purpose. See the GNU +;;; Lesser General Public License for more details. +;;; +;;; Version 2.1 of the GNU Lesser General Public License is in the file +;;; COPYING that was distributed with this file. If it is not present, +;;; you can access it from http://www.gnu.org/copyleft/lesser.txt (until +;;; superseded by a newer version) or write to the Free Software +;;; Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +;; Changes +;; +;; When Who What +;; ---------------------------------------------------------------------------- +;; 1999-08-15 GB - ROD=, ROD-EQUAL +;; RUNE<=, RUNE>= +;; MAKE-ROD, ROD-SUBSEQ +;; CHAR-RUNE, RUNE-CHAR, ROD-STRING, STRING-ROD +;; new functions +;; - Added rune reader +;; + +(in-package :runes) + +(deftype rune () '(unsigned-byte 16)) +(deftype rod () '(array rune (*))) +(deftype simple-rod () '(simple-array rune (*))) + +(defsubst rune (rod index) + (aref rod index)) + +(defun (setf rune) (new rod index) + (setf (aref rod index) new)) + +(defsubst %rune (rod index) + (aref (the (simple-array (unsigned-byte 16) (*)) rod) (the fixnum index))) + +(defsubst (setf %rune) (new rod index) + (setf (aref (the (simple-array (unsigned-byte 16) (*)) rod) (the fixnum index)) new)) + +(defun rod-capitalize (rod) + (warn "~S is not implemented." 'rod-capitalize) + rod) + +(defsubst code-rune (x) x) +(defsubst rune-code (x) x) + +(defsubst rune= (x y) + (= x y)) + +(defun rune-downcase (rune) + (cond ((<= #x0041 rune #x005a) (+ rune #x20)) + ((= rune #x00d7) rune) + ((<= #x00c0 rune #x00de) (+ rune #x20)) + (t rune))) + +(defsubst rune-upcase (rune) + (cond ((<= #x0061 rune #x007a) (- rune #x20)) + ((= rune #x00f7) rune) + ((<= #x00e0 rune #x00fe) (- rune #x20)) + (t rune))) + +(defun rune-upper-case-letter-p (rune) + (or (<= #x0041 rune #x005a) (<= #x00c0 rune #x00de))) + +(defun rune-lower-case-letter-p (rune) + (or (<= #x0061 rune #x007a) (<= #x00e0 rune #x00fe) + (= rune #x00d7))) + + +(defun rune-equal (x y) + (rune= (rune-upcase x) (rune-upcase y))) + +(defun rod-downcase (rod) + ;; FIXME + (register-rod + (map '(simple-array (unsigned-byte 16) (*)) #'rune-downcase rod))) + +(defun rod-upcase (rod) + ;; FIXME + (register-rod + (map '(simple-array (unsigned-byte 16) (*)) #'rune-upcase rod))) + +(defsubst white-space-rune-p (char) + (or (= char 9) ;TAB + (= char 10) ;Linefeed + (= char 13) ;Carriage Return + (= char 32))) ;Space + +(defsubst digit-rune-p (char &optional (radix 10)) + (cond ((<= #.(char-code #\0) char #.(char-code #\9)) + (and (< (- char #.(char-code #\0)) radix) + (- char #.(char-code #\0)))) + ((<= #.(char-code #\A) char #.(char-code #\Z)) + (and (< (- char #.(char-code #\A) -10) radix) + (- char #.(char-code #\A) -10))) + ((<= #.(char-code #\a) char #.(char-code #\z)) + (and (< (- char #.(char-code #\a) -10) radix) + (- char #.(char-code #\a) -10))) )) + +(defun rod (x) + (cond ((stringp x) (register-rod (map 'rod #'char-code x))) + ((symbolp x) (rod (string x))) + ((characterp x) (rod (string x))) + ((vectorp x) (register-rod (coerce x 'rod))) + ((integerp x) (register-rod (map 'rod #'identity (list x)))) + (t (error "Cannot convert ~S to a ~S" x 'rod)))) + +(defun runep (x) + (and (integerp x) + (<= 0 x #xFFFF))) + +(defun sloopy-rod-p (x) + (and (not (stringp x)) + (vectorp x) + (every #'runep x))) + +(defun rod= (x y) + (and (= (length x) (length y)) + (dotimes (i (length x) t) + (unless (rune= (rune x i) (rune y i)) + (return nil))))) + +(defun rod-equal (x y) + (and (= (length x) (length y)) + (dotimes (i (length x) t) + (unless (rune-equal (rune x i) (rune y i)) + (return nil))))) + +(defsubst make-rod (size) + (let ((res (make-array size :element-type 'rune))) + (register-rod res) + res)) + +(defun char-rune (char) + (code-rune (char-code char))) + +(defun rune-char (rune &optional (default #\?)) + #+CMU + (if (< rune 256) (code-char rune) default) + #-CMU + (or (code-char rune) default)) + +(defun rod-string (rod &optional (default-char #\?)) + (map 'string (lambda (x) (rune-char x default-char)) rod)) + +(defun string-rod (string) + (let* ((n (length string)) + (res (make-rod n))) + (dotimes (i n) + (setf (%rune res i) (char-rune (char string i)))) + res)) + +;;;; + +(defun rune<= (rune &rest more-runes) + (apply #'<= rune more-runes)) + +(defun rune>= (rune &rest more-runes) + (apply #'>= rune more-runes)) + +(defun rodp (object) + (typep object 'rod)) + +(defun really-rod-p (object) + (and (typep object 'rod) + (really-really-rod-p object))) + +(defun rod-subseq (source start &optional (end (length source))) + (unless (rodp source) + (error "~S is not of type ~S." source 'rod)) + (unless (and (typep start 'fixnum) (>= start 0)) + (error "~S is not a non-negative fixnum." start)) + (unless (and (typep end 'fixnum) (>= end start)) + (error "END argument, ~S, is not a fixnum no less than START, ~S." end start)) + (when (> start (length source)) + (error "START argument, ~S, should be no greater than length of rod." start)) + (when (> end (length source)) + (error "END argument, ~S, should be no greater than length of rod." end)) + (locally + (declare (type rod source) + (type fixnum start end)) + (let ((res (make-rod (- end start)))) + (declare (type rod res)) + (do ((i (- (- end start) 1) (the fixnum (- i 1)))) + ((< i 0) res) + (declare (type fixnum i)) + (setf (%rune res i) (%rune source (the fixnum (+ i start)))))))) + +(defun rod-subseq* (source start &optional (end (length source))) + (unless (and (typep start 'fixnum) (>= start 0)) + (error "~S is not a non-negative fixnum." start)) + (unless (and (typep end 'fixnum) (>= end start)) + (error "END argument, ~S, is not a fixnum no less than START, ~S." end start)) + (when (> start (length source)) + (error "START argument, ~S, should be no greater than length of rod." start)) + (when (> end (length source)) + (error "END argument, ~S, should be no greater than length of rod." end)) + (locally + (declare (type fixnum start end)) + (let ((res (make-rod (- end start)))) + (declare (type rod res)) + (do ((i (- (- end start) 1) (the fixnum (- i 1)))) + ((< i 0) res) + (declare (type fixnum i)) + (setf (%rune res i) (aref source (the fixnum (+ i start)))))))) + +;;; Support for telling ROD and arrays apart: + +#+CMU +(progn + (defvar *rod-hash-table* + (make-array 5003 :initial-element nil))) + +(defun register-rod (rod) + #+CMU + (unless (really-really-rod-p rod) + (push (ext:make-weak-pointer rod) + (aref *rod-hash-table* (mod (cl::pointer-hash rod) + (length *rod-hash-table*))))) + rod) + +(defun really-really-rod-p (rod) + #+CMU + (find rod (aref *rod-hash-table* (mod (cl::pointer-hash rod) + (length *rod-hash-table*))) + :key #'ext:weak-pointer-value)) + +#+CMU +(progn + (defun rod-hash-table-rehash () + (let* ((n 5003) + (new (make-array n :initial-element nil))) + (loop for bucket across *rod-hash-table* do + (loop for item in bucket do + (let ((v (ext:weak-pointer-value item))) + (when v + (push item (aref new (mod (cl::pointer-hash v) n))))))) + (setf *rod-hash-table* new))) + + (defun rod-hash-after-gc-hook () + ;; hmm interesting question: should we rehash? + (rod-hash-table-rehash)) + + (pushnew 'rod-hash-after-gc-hook extensions:*after-gc-hooks*) ) + +(defun rod< (rod1 rod2) + (do ((i 0 (+ i 1))) + (nil) + (cond ((= i (length rod1)) + (return t)) + ((= i (length rod2)) + (return nil)) + ((< (aref rod1 i) (aref rod2 i)) + (return t)) + ((> (aref rod1 i) (aref rod2 i)) + (return nil))))) diff --git a/syntax.lisp b/syntax.lisp new file mode 100644 index 0000000..1c35251 --- /dev/null +++ b/syntax.lisp @@ -0,0 +1,196 @@ +;;; -*- Mode: Lisp; Syntax: Common-Lisp; Package: RUNES; -*- +;;; --------------------------------------------------------------------------- +;;; Title: Unicode strings (called RODs) +;;; Created: 1999-05-25 22:29 +;;; Author: Gilbert Baumann +;;; License: LLGPL (See file COPYING for details). +;;; --------------------------------------------------------------------------- +;;; (c) copyright 1998,1999 by Gilbert Baumann + +;;; This code is free software; you can redistribute it and/or modify it +;;; under the terms of the version 2.1 of the GNU Lesser General Public +;;; License as published by the Free Software Foundation, as clarified +;;; by the "Preamble to the Gnu Lesser General Public License" found in +;;; the file COPYING. +;;; +;;; This code is distributed in the hope that it will be useful, +;;; but without any warranty; without even the implied warranty of +;;; merchantability or fitness for a particular purpose. See the GNU +;;; Lesser General Public License for more details. +;;; +;;; Version 2.1 of the GNU Lesser General Public License is in the file +;;; COPYING that was distributed with this file. If it is not present, +;;; you can access it from http://www.gnu.org/copyleft/lesser.txt (until +;;; superseded by a newer version) or write to the Free Software +;;; Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +;; Changes +;; +;; When Who What +;; ---------------------------------------------------------------------------- +;; 1999-08-15 GB - ROD=, ROD-EQUAL +;; RUNE<=, RUNE>= +;; MAKE-ROD, ROD-SUBSEQ +;; CHAR-RUNE, RUNE-CHAR, ROD-STRING, STRING-ROD +;; new functions +;; - Added rune reader +;; + +(in-package :runes) + +;;;; +;;;; RUNE Reader +;;;; + +;; Portable implementation of WHITE-SPACE-P with regard to the current +;; read table -- this is bit tricky. + +(defun rt-white-space-p (char) + (let ((stream (make-string-input-stream (string char)))) + (eq :eof (peek-char t stream nil :eof)))) + +(defun read-rune-name (input) + ;; the first char is unconditionally read + (let ((char0 (read-char input t nil t))) + (when (char= char0 #\\) + (setf char0 (read-char input t nil t))) + (with-output-to-string (res) + (write-char char0 res) + (do ((ch (peek-char nil input nil :eof t) (peek-char nil input nil :eof t))) + ((or (eq ch :eof) + (rt-white-space-p ch) + (multiple-value-bind (function non-terminating-p) (get-macro-character ch) + (and function (not non-terminating-p))))) + (write-char ch res) + (read-char input))))) ;consume this character + +(defun iso-10646-char-code (char) + (char-code char)) + +(defvar *rune-names* (make-hash-table :test #'equal) + "Hashtable, which maps all known rune names to rune codes; + Names are stored in uppercase.") + +(defun define-rune-name (name code) + (setf (gethash (string-upcase name) *rune-names*) code) + name) + +(defun lookup-rune-name (name) + (gethash (string-upcase name) *rune-names*)) + +(define-rune-name "null" #x0000) +(define-rune-name "space" #x0020) +(define-rune-name "newline" #x000A) +(define-rune-name "return" #x000D) +(define-rune-name "tab" #x0009) +(define-rune-name "page" #x000C) + +;; and just for fun: +(define-rune-name "euro" #x20AC) + +;; ASCII control characters +(define-rune-name "nul" #x0000) ;null +(define-rune-name "soh" #x0001) ;start of header +(define-rune-name "stx" #x0002) ;start of text +(define-rune-name "etx" #x0003) ;end of text +(define-rune-name "eot" #x0004) ;end of transmission +(define-rune-name "enq" #x0005) ; +(define-rune-name "ack" #x0006) ;acknowledge +(define-rune-name "bel" #x0007) ;bell +(define-rune-name "bs" #x0008) ;backspace +(define-rune-name "ht" #x0009) ;horizontal tab +(define-rune-name "lf" #X000A) ;line feed, new line +(define-rune-name "vt" #X000B) ;vertical tab +(define-rune-name "ff" #x000C) ;form feed +(define-rune-name "cr" #x000D) ;carriage return +(define-rune-name "so" #x000E) ;shift out +(define-rune-name "si" #x000F) ;shift in +(define-rune-name "dle" #x0010) ;device latch enable ? +(define-rune-name "dc1" #x0011) ;device control 1 +(define-rune-name "dc2" #x0012) ;device control 2 +(define-rune-name "dc3" #x0013) ;device control 3 +(define-rune-name "dc4" #x0014) ;device control 4 +(define-rune-name "nak" #x0015) ;negative acknowledge +(define-rune-name "syn" #x0016) ; +(define-rune-name "etb" #x0017) ; +(define-rune-name "can" #x0018) ; +(define-rune-name "em" #x0019) ;end of message +(define-rune-name "sub" #x001A) ; +(define-rune-name "esc" #x001B) ;escape +(define-rune-name "fs" #x001C) ;field separator ? +(define-rune-name "gs" #x001D) ;group separator +(define-rune-name "rs" #x001E) ; +(define-rune-name "us" #x001F) ; + +(define-rune-name "del" #x007F) ;delete + +;; iso-latin +(define-rune-name "nbsp" #x00A0) ;non breakable space +(define-rune-name "shy" #x00AD) ;soft hyphen + +(defun rune-from-read-name (name) + (code-rune + (cond ((= (length name) 1) + (iso-10646-char-code (char name 0))) + ((and (= (length name) 2) + (char= (char name 0) #\\)) + (iso-10646-char-code (char name 1))) + ((and (>= (length name) 3) + (char-equal (char name 0) #\u) + (char-equal (char name 1) #\+) + (every (lambda (x) (digit-char-p x 16)) (subseq name 2))) + (parse-integer name :start 2 :radix 16)) + ((lookup-rune-name name)) + (t + (error "Meaningless rune name ~S." name))))) + +(defun rune-reader (stream subchar arg) + subchar arg + (values (rune-from-read-name (read-rune-name stream)))) + +(set-dispatch-macro-character #\# #\/ 'rune-reader) + +;;; ROD ext syntax + +(defun rod-reader (stream subchar arg) + (declare (ignore arg)) + (rod + (with-output-to-string (bag) + (do ((c (read-char stream t nil t) + (read-char stream t nil t))) + ((char= c subchar)) + (cond ((char= c #\\) + (setf c (read-char stream t nil t)))) + (princ c bag))))) + +#-rune-is-character +(defun rod-printer (stream rod) + (princ #\# stream) + (princ #\" stream) + (loop for x across rod do + (cond ((or (rune= x #.(char-rune #\\)) + (rune= x #.(char-rune #\"))) + (princ #\\ stream) + (princ (code-char x) stream)) + ((< x char-code-limit) + (princ (code-char x) stream)) + (t + (format stream "\\u~4,'0X" x)))) + (princ #\" stream)) + +#-rune-is-character +(set-pprint-dispatch '(satisfies really-rod-p) #'rod-printer) + +(set-dispatch-macro-character #\# #\" 'rod-reader) + +#|| +(defun longish-array-p (arr) + (and (arrayp arr) + (> (array-total-size arr) 10))) + +(set-pprint-dispatch '(satisfies longish-array-p) + #'(lambda (stream object) + (let ((*print-array* nil) + (*print-pretty* nil)) + (prin1 object stream)))) +||# diff --git a/util.lisp b/util.lisp new file mode 100644 index 0000000..60cd74c --- /dev/null +++ b/util.lisp @@ -0,0 +1,73 @@ +;;; -*- Mode: Lisp; Syntax: Common-Lisp; Package: RUNES; -*- +;;; --------------------------------------------------------------------------- +;;; Title: Some common utilities for the Closure browser +;;; Created: 1997-12-27 +;;; Author: Gilbert Baumann +;;; License: LLGPL (See file COPYING for details). +;;; --------------------------------------------------------------------------- +;;; (c) copyright 1997-1999 by Gilbert Baumann + +;;; This code is free software; you can redistribute it and/or modify it +;;; under the terms of the version 2.1 of the GNU Lesser General Public +;;; License as published by the Free Software Foundation, as clarified +;;; by the "Preamble to the Gnu Lesser General Public License" found in +;;; the file COPYING. +;;; +;;; This code is distributed in the hope that it will be useful, +;;; but without any warranty; without even the implied warranty of +;;; merchantability or fitness for a particular purpose. See the GNU +;;; Lesser General Public License for more details. +;;; +;;; Version 2.1 of the GNU Lesser General Public License is in the file +;;; COPYING that was distributed with this file. If it is not present, +;;; you can access it from http://www.gnu.org/copyleft/lesser.txt (until +;;; superseded by a newer version) or write to the Free Software +;;; Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +;; Changes +;; +;; When Who What +;; ---------------------------------------------------------------------------- +;; 1999-08-24 GB = fixed MULTIPLE-VALUE-OR it now takes any number of +;; subforms +;; + +(in-package :runes) + +;;; -------------------------------------------------------------------------------- +;;; Meta functions + +(defun curry (fun &rest args) + #'(lambda (&rest more) + (apply fun (append args more)))) + +(defun rcurry (fun &rest args) + #'(lambda (&rest more) + (apply fun (append more args)))) + +(defun compose (f g) + #'(lambda (&rest args) + (funcall f (apply g args)))) + +;;; -------------------------------------------------------------------------------- +;;; while and until + +(defmacro while (test &body body) + `(until (not ,test) ,@body)) + +(defmacro until (test &body body) + `(do () (,test) ,@body)) + +;; prime numbers + +(defun primep (n) + "Returns true, iff `n' is prime." + (and (> n 2) + (do ((i 2 (+ i 1))) + ((> (* i i) n) t) + (cond ((zerop (mod n i)) (return nil)))))) + +(defun nearest-greater-prime (n) + "Returns the smallest prime number no less than `n'." + (cond ((primep n) n) + ((nearest-greater-prime (+ n 1))))) diff --git a/xstream.lisp b/xstream.lisp new file mode 100644 index 0000000..9032a7b --- /dev/null +++ b/xstream.lisp @@ -0,0 +1,391 @@ +;;; -*- Mode: Lisp; Syntax: Common-Lisp; Package: runes; readtable: runes; Encoding: utf-8; -*- +;;; --------------------------------------------------------------------------- +;;; Title: Fast streams +;;; Created: 1999-07-17 +;;; Author: Gilbert Baumann +;;; License: LGPL (See file COPYING for details). +;;; --------------------------------------------------------------------------- +;;; © copyright 1999 by Gilbert Baumann + +;;; This library is free software; you can redistribute it and/or +;;; modify it under the terms of the GNU Library General Public +;;; License as published by the Free Software Foundation; either +;;; version 2 of the License, or (at your option) any later version. +;;; +;;; This library is distributed in the hope that it will be useful, +;;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;;; Library General Public License for more details. +;;; +;;; You should have received a copy of the GNU Library General Public +;;; License along with this library; if not, write to the +;;; Free Software Foundation, Inc., 59 Temple Place - Suite 330, +;;; Boston, MA 02111-1307 USA. + +(in-package :runes) + +;;; API +;; +;; MAKE-XSTREAM cl-stream &key name! speed initial-speed initial-encoding +;; [function] +;; MAKE-ROD-XSTREAM rod &key name [function] +;; CLOSE-XSTREAM xstream [function] +;; XSTREAM-P object [function] +;; +;; READ-RUNE xstream [macro] +;; PEEK-RUNE xstream [macro] +;; FREAD-RUNE xstream [function] +;; FPEEK-RUNE xstream [function] +;; CONSUME-RUNE xstream [macro] +;; UNREAD-RUNE rune xstream [function] +;; +;; XSTREAM-NAME xstream [accessor] +;; XSTREAM-POSITION xstream [function] +;; XSTREAM-LINE-NUMBER xstream [function] +;; XSTREAM-COLUMN-NUMBER xstream [function] +;; XSTREAM-PLIST xstream [accessor] +;; XSTREAM-ENCODING xstream [accessor] <-- be careful here. [*] +;; SET-TO-FULL-SPEED xstream [function] + +;; [*] switching the encoding on the fly is only possible when the +;; stream's buffer is empty; therefore to be able to switch the +;; encoding, while some runes are already read, set the stream's speed +;; to 1 initially (via the initial-speed argument for MAKE-XSTREAM) +;; and later set it to full speed. (The encoding of the runes +;; sequence, you fetch off with READ-RUNE is always UTF-16 though). +;; After switching the encoding, SET-TO-FULL-SPEED can be used to bump the +;; speed up to a full buffer length. + +;; An encoding is simply something, which provides the DECODE-SEQUENCE +;; method. + +;;; Controller protocol +;; +;; READ-OCTECTS sequence os-stream start end -> first-non-written +;; XSTREAM/CLOSE os-stream +;; + +(eval-when (eval compile load) + (defparameter *fast* '(optimize (speed 3) (safety 0))) + ;;(defparameter *fast* '(optimize (speed 2) (safety 3))) + ) + +;; Let us first define fast fixnum arithmetric get rid of type +;; checks. (After all we know what we do here). + +(defmacro fx-op (op &rest xs) + `(the fixnum (,op ,@(mapcar (lambda (x) `(the fixnum ,x)) xs)))) +(defmacro fx-pred (op &rest xs) + `(,op ,@(mapcar (lambda (x) `(the fixnum ,x)) xs))) + +(defmacro %+ (&rest xs) `(fx-op + ,@xs)) +(defmacro %= (&rest xs) `(fx-pred = ,@xs)) + +(deftype buffer-index () + `(unsigned-byte ,(integer-length array-total-size-limit))) + +(deftype buffer-byte () + `(unsigned-byte 16)) + +(deftype octet () + `(unsigned-byte 8)) + +;; The usage of a special marker for EOF is experimental and +;; considered unhygenic. + +(defconstant +end+ #xFFFF + "Special marker inserted into stream buffers to indicate end of buffered data.") + +(defvar +null-buffer+ (make-array 0 :element-type 'buffer-byte)) +(defvar +null-octet-buffer+ (make-array 0 :element-type 'octet)) + +(defstruct (xstream + (:constructor make-xstream/low) + (:copier nil) + (:print-function print-xstream)) + + ;;; Read buffer + + ;; the buffer itself + (buffer +null-buffer+ + :type (simple-array buffer-byte (*))) + ;; points to the next element of `buffer' containing the next rune + ;; about to be read. + (read-ptr 0 :type buffer-index) + ;; points to the first element of `buffer' not containing a rune to + ;; be read. + (fill-ptr 0 :type buffer-index) + + ;;; OS buffer + + ;; a scratch pad for READ-SEQUENCE + (os-buffer +null-octet-buffer+ + :type (simple-array octet (*))) + + ;; `os-left-start', `os-left-end' designate a region of os-buffer, + ;; which still contains some undecoded data. This is needed because + ;; of the DECODE-SEQUENCE protocol + (os-left-start 0 :type buffer-index) + (os-left-end 0 :type buffer-index) + + ;; How much to read each time + (speed 0 :type buffer-index) + + ;; Some stream object obeying to a certain protcol + os-stream + + ;; The external format + ;; (some object offering the ENCODING protocol) + (encoding :utf-8) + + ;;A STREAM-NAME object + (name nil) + + ;; a plist a struct keeps the hack away + (plist nil) + + ;; Stream Position + (line-number 1 :type integer) ;current line number + (line-start 0 :type integer) ;stream position the current line starts at + (buffer-start 0 :type integer) ;stream position the current buffer starts at + + ;; There is no need to maintain a column counter for each character + ;; read, since we can easily compute it from `line-start' and + ;; `buffer-start'. + ) + +(defmacro read-rune (input) + "Read a single rune off the xstream `input'. In case of end of file :EOF + is returned." + `((lambda (input) + (declare (type xstream input) + #.*fast*) + (let ((rp (xstream-read-ptr input))) + (declare (type buffer-index rp)) + (let ((ch (aref (the (simple-array buffer-byte (*)) (xstream-buffer input)) + rp))) + (declare (type buffer-byte ch)) + (setf (xstream-read-ptr input) (%+ rp 1)) + (cond ((%= ch +end+) + (the (or (member :eof) rune) + (xstream-underflow input))) + ((%= ch #x000A) ;line break + (account-for-line-break input) + (code-rune ch)) + (t + (code-rune ch)))))) + ,input)) + +(defmacro peek-rune (input) + "Peek a single rune off the xstream `input'. In case of end of file :EOF + is returned." + `((lambda (input) + (declare (type xstream input) + #.*fast*) + (let ((rp (xstream-read-ptr input))) + (declare (type buffer-index rp)) + (let ((ch (aref (the (simple-array buffer-byte (*)) (xstream-buffer input)) + rp))) + (declare (type buffer-byte ch)) + (cond ((%= ch +end+) + (prog1 + (the (or (member :eof) rune) (xstream-underflow input)) + (setf (xstream-read-ptr input) 0))) + (t + (code-rune ch)))))) + ,input)) + +(defmacro consume-rune (input) + "Like READ-RUNE, but does not actually return the read rune." + `((lambda (input) + (declare (type xstream input) + #.*fast*) + (let ((rp (xstream-read-ptr input))) + (declare (type buffer-index rp)) + (let ((ch (aref (the (simple-array buffer-byte (*)) (xstream-buffer input)) + rp))) + (declare (type buffer-byte ch)) + (setf (xstream-read-ptr input) (%+ rp 1)) + (when (%= ch +end+) + (xstream-underflow input)) + (when (%= ch #x000A) ;line break + (account-for-line-break input) ))) + nil) + ,input)) + +(defsubst unread-rune (rune input) + "Unread the last recently read rune; if there wasn't such a rune, you + deserve to lose." + (declare (ignore rune)) + (decf (xstream-read-ptr input)) + (when (rune= (peek-rune input) #/u+000A) ;was it a line break? + (unaccount-for-line-break input))) + +(defun fread-rune (input) + (read-rune input)) + +(defun fpeek-rune (input) + (peek-rune input)) + +;;; Line counting + +(defun account-for-line-break (input) + (declare (type xstream input)) + (incf (xstream-line-number input)) + (setf (xstream-line-start input) + (+ (xstream-buffer-start input) (xstream-read-ptr input)))) + +(defun unaccount-for-line-break (input) + ;; incomplete! + ;; We better use a traditional lookahead technique or forbid unread-rune. + (decf (xstream-line-number input))) + +;; User API: + +(defun xstream-position (input) + (+ (xstream-buffer-start input) (xstream-read-ptr input))) + +;; xstream-line-number is structure accessor + +(defun xstream-column-number (input) + (+ (- (xstream-position input) + (xstream-line-start input)) + 1)) + +;;; Underflow + +;;(defun read-runes (sequence input)) + +(defun xstream-underflow (input) + (declare (type xstream input)) + ;; we are about to fill new data into the buffer, so we need to + ;; adjust buffer-start. + (incf (xstream-buffer-start input) + (- (xstream-fill-ptr input) 0)) + (let (n m) + ;; when there is something left in the os-buffer, we move it to + ;; the start of the buffer. + (setf m (- (xstream-os-left-end input) (xstream-os-left-start input))) + (unless (zerop m) + (replace (xstream-os-buffer input) (xstream-os-buffer input) + :start1 0 :end1 m + :start2 (xstream-os-left-start input) + :end2 (xstream-os-left-end input)) + ;; then we take care that the buffer is large enough to carry at + ;; least 100 bytes (a random number) + (unless (>= (length (xstream-os-buffer input)) 100) + (error "You lost") + ;; todo: enlarge buffer + )) + (setf n + (read-octets (xstream-os-buffer input) (xstream-os-stream input) + m (min (1- (length (xstream-os-buffer input))) + (+ m (xstream-speed input))))) + (cond ((%= n 0) + (setf (xstream-read-ptr input) 0 + (xstream-fill-ptr input) n) + (setf (aref (xstream-buffer input) (xstream-fill-ptr input)) +end+) + :eof) + (t + (multiple-value-bind (fnw fnr) + (encoding:decode-sequence + (xstream-encoding input) + (xstream-os-buffer input) 0 n + (xstream-buffer input) 0 (1- (length (xstream-buffer input))) + (= n m)) + (setf (xstream-os-left-start input) fnr + (xstream-os-left-end input) n + (xstream-read-ptr input) 0 + (xstream-fill-ptr input) fnw) + (setf (aref (xstream-buffer input) (xstream-fill-ptr input)) +end+) + (read-rune input)))))) + +;;; constructor + +(defun make-xstream (os-stream &key name + (speed 8192) + (initial-speed 1) + (initial-encoding :guess)) + ;; XXX if initial-speed isn't 1, encoding will me munged up + (assert (eql initial-speed 1)) + (multiple-value-bind (encoding preread) + (if (eq initial-encoding :guess) + (figure-encoding os-stream) + (values initial-encoding nil)) + (let ((osbuf (make-array speed :element-type '(unsigned-byte 8)))) + (replace osbuf preread) + (make-xstream/low + :buffer (let ((r (make-array speed :element-type 'buffer-byte))) + (setf (elt r 0) #xFFFF) + r) + :read-ptr 0 + :fill-ptr 0 + :os-buffer osbuf + :speed initial-speed + :os-stream os-stream + :os-left-start 0 + :os-left-end (length preread) + :encoding encoding + :name name)))) + +(defun make-rod-xstream (string &key name) + ;; XXX encoding is mis-handled by this kind of stream + (let ((n (length string))) + (let ((buffer (make-array (1+ n) :element-type 'buffer-byte))) + (declare (type (simple-array buffer-byte (*)) buffer)) + ;; copy the rod + (do ((i (1- n) (- i 1))) + ((< i 0)) + (declare (type fixnum i)) + (setf (aref buffer i) (rune-code (%rune string i)))) + (setf (aref buffer n) +end+) + ;; + (make-xstream/low :buffer buffer + :read-ptr 0 + :fill-ptr n + ;; :os-buffer nil + :speed 1 + :os-stream nil + :name name)))) + +(defmethod figure-encoding ((stream null)) + (values :utf-8 nil)) + +(defmethod figure-encoding ((stream stream)) + (let ((c0 (read-byte stream nil :eof))) + (cond ((eq c0 :eof) + (values :utf-8 nil)) + (t + (let ((c1 (read-byte stream nil :eof))) + (cond ((eq c1 :eof) + (values :utf-8 (list c0))) + (t + (cond ((and (= c0 #xFE) (= c1 #xFF)) (values :utf-16-big-endian nil)) + ((and (= c0 #xFF) (= c1 #xFE)) (values :utf-16-little-endian nil)) + (t + (values :utf-8 (list c0 c1))))))))))) + +;;; misc + +(defun close-xstream (input) + (xstream/close (xstream-os-stream input))) + +(defun set-to-full-speed (xstream) + (setf (xstream-speed xstream) (length (xstream-os-buffer xstream)))) + +;;; controller implementations + +(defmethod read-octets (sequence (stream stream) start end) + (#+CLISP lisp:read-byte-sequence + #-CLISP read-sequence + sequence stream :start start :end end)) + +(defmethod read-octets (sequence (stream null) start end) + (declare (ignore sequence start end)) + 0) + +(defmethod xstream/close ((stream stream)) + (close stream)) + +(defmethod xstream/close ((stream null)) + nil)