From d6ca7664f402786f5212c28317689cfbbe509f97 Mon Sep 17 00:00:00 2001 From: david Date: Sun, 13 Mar 2005 18:02:10 +0000 Subject: [PATCH] Initial revision --- CLISP.diff | 17 + GNUmakefile | 8 + NEWS | 151 + OLDNEWS | 272 ++ README.html | 100 + TIMES | 41 + XMLS-SYMBOLS.diff | 98 + catalog.dtd | 149 + contrib/xhtmlgen.lisp | 352 +++ cxml.asd | 58 + documentation.css | 0 dom/COPYING | 459 +++ dom/dom-builder.lisp | 139 + dom/dom-impl.lisp | 983 +++++++ dom/dom-sax.lisp | 60 + dom/package.lisp | 111 + dom/simple-dom.lisp | 46 + dom/string-dom.lisp | 66 + dom/unparse.lisp | 9 + dom/xml-canonic.lisp | 161 ++ domtest.lisp | 433 +++ glisp/COPYING | 521 ++++ glisp/characters.lisp | 132 + glisp/dep-acl.lisp | 127 + glisp/dep-acl5.lisp | 162 ++ glisp/dep-clisp.lisp | 176 ++ glisp/dep-cmucl-dtc.lisp | 212 ++ glisp/dep-cmucl.lisp | 241 ++ glisp/dep-gcl-2.lisp | 93 + glisp/dep-gcl.lisp | 344 +++ glisp/dep-sbcl.lisp | 141 + glisp/gendep.lisp | 427 +++ glisp/match.lisp | 207 ++ glisp/package.lisp | 406 +++ glisp/runes.lisp | 412 +++ glisp/syntax.lisp | 190 ++ glisp/util.lisp | 1113 ++++++++ mlisp-patch.diff | 68 + runes/COPYING | 521 ++++ runes/characters.lisp | 149 + runes/dep-acl.lisp | 42 + runes/dep-acl5.lisp | 59 + runes/dep-clisp.lisp | 59 + runes/dep-cmucl-dtc.lisp | 30 + runes/dep-cmucl.lisp | 30 + runes/dep-openmcl.lisp | 16 + runes/dep-sbcl.lisp | 30 + runes/encodings-data.lisp | 568 ++++ runes/encodings.lisp | 347 +++ runes/package.lisp | 50 + runes/runes.lisp | 273 ++ runes/syntax.lisp | 196 ++ runes/util.lisp | 73 + runes/xstream.lisp | 391 +++ test/domtest.lisp | 634 +++++ test/xmlconf-base.diff | 53 + test/xmlconf.lisp | 104 + xml/COPYING | 459 +++ xml/catalog.lisp | 161 ++ xml/characters.lisp | 127 + xml/dom-builder.lisp | 46 + xml/dom-impl.lisp | 512 ++++ xml/dompack.lisp | 102 + xml/encodings-data.lisp | 568 ++++ xml/encodings.lisp | 347 +++ xml/package.lisp | 41 + xml/recoder.lisp | 110 + xml/sax-handler.lisp | 227 ++ xml/sax-proxy.lisp | 38 + xml/sax-tests/event-collecting-handler.lisp | 37 + xml/sax-tests/package.lisp | 4 + xml/sax-tests/tests.lisp | 332 +++ xml/split-sequence.lisp | 44 + xml/string-dom.lisp | 35 + xml/unparse.lisp | 438 +++ xml/xml-canonic.lisp | 172 ++ xml/xml-name-rune-p.lisp | 218 ++ xml/xml-parse.lisp | 2824 +++++++++++++++++++ xml/xml-stream.lisp | 370 +++ xml/xmls-compat.lisp | 118 + xmlconf.lisp | 23 + 81 files changed, 19663 insertions(+) create mode 100644 CLISP.diff create mode 100644 GNUmakefile create mode 100644 NEWS create mode 100644 OLDNEWS create mode 100644 README.html create mode 100644 TIMES create mode 100644 XMLS-SYMBOLS.diff create mode 100644 catalog.dtd create mode 100644 contrib/xhtmlgen.lisp create mode 100644 cxml.asd create mode 100644 documentation.css create mode 100644 dom/COPYING create mode 100644 dom/dom-builder.lisp create mode 100644 dom/dom-impl.lisp create mode 100644 dom/dom-sax.lisp create mode 100644 dom/package.lisp create mode 100644 dom/simple-dom.lisp create mode 100644 dom/string-dom.lisp create mode 100644 dom/unparse.lisp create mode 100644 dom/xml-canonic.lisp create mode 100644 domtest.lisp create mode 100644 glisp/COPYING create mode 100644 glisp/characters.lisp create mode 100644 glisp/dep-acl.lisp create mode 100644 glisp/dep-acl5.lisp create mode 100644 glisp/dep-clisp.lisp create mode 100644 glisp/dep-cmucl-dtc.lisp create mode 100644 glisp/dep-cmucl.lisp create mode 100644 glisp/dep-gcl-2.lisp create mode 100644 glisp/dep-gcl.lisp create mode 100644 glisp/dep-sbcl.lisp create mode 100644 glisp/gendep.lisp create mode 100644 glisp/match.lisp create mode 100644 glisp/package.lisp create mode 100644 glisp/runes.lisp create mode 100644 glisp/syntax.lisp create mode 100644 glisp/util.lisp create mode 100644 mlisp-patch.diff create mode 100644 runes/COPYING create mode 100644 runes/characters.lisp create mode 100644 runes/dep-acl.lisp create mode 100644 runes/dep-acl5.lisp create mode 100644 runes/dep-clisp.lisp create mode 100644 runes/dep-cmucl-dtc.lisp create mode 100644 runes/dep-cmucl.lisp create mode 100644 runes/dep-openmcl.lisp create mode 100644 runes/dep-sbcl.lisp create mode 100644 runes/encodings-data.lisp create mode 100644 runes/encodings.lisp create mode 100644 runes/package.lisp create mode 100644 runes/runes.lisp create mode 100644 runes/syntax.lisp create mode 100644 runes/util.lisp create mode 100644 runes/xstream.lisp create mode 100644 test/domtest.lisp create mode 100644 test/xmlconf-base.diff create mode 100644 test/xmlconf.lisp create mode 100644 xml/COPYING create mode 100644 xml/catalog.lisp create mode 100644 xml/characters.lisp create mode 100644 xml/dom-builder.lisp create mode 100644 xml/dom-impl.lisp create mode 100644 xml/dompack.lisp create mode 100644 xml/encodings-data.lisp create mode 100644 xml/encodings.lisp create mode 100644 xml/package.lisp create mode 100644 xml/recoder.lisp create mode 100644 xml/sax-handler.lisp create mode 100644 xml/sax-proxy.lisp create mode 100644 xml/sax-tests/event-collecting-handler.lisp create mode 100644 xml/sax-tests/package.lisp create mode 100644 xml/sax-tests/tests.lisp create mode 100644 xml/split-sequence.lisp create mode 100644 xml/string-dom.lisp create mode 100644 xml/unparse.lisp create mode 100644 xml/xml-canonic.lisp create mode 100644 xml/xml-name-rune-p.lisp create mode 100644 xml/xml-parse.lisp create mode 100644 xml/xml-stream.lisp create mode 100644 xml/xmls-compat.lisp create mode 100644 xmlconf.lisp diff --git a/CLISP.diff b/CLISP.diff new file mode 100644 index 0000000..e78ae79 --- /dev/null +++ b/CLISP.diff @@ -0,0 +1,17 @@ +--- orig/xml/xml-parse.lisp ++++ mod/xml/xml-parse.lisp +@@ -525,7 +525,10 @@ + (declaim (type (simple-array rune (*)) + *scratch-pad* *scratch-pad-2* *scratch-pad-3* *scratch-pad-4*)) + +-(defmacro with-scratch-pads (() &body body) ++(defmacro with-scratch-pads (#-clisp () ++ ;; clisp does not allow () as macro argument list ++ #+clisp (&key &allow-other-keys) ++ &body body) + `(let ((*scratch-pad* (make-array 1024 :element-type 'rune)) + (*scratch-pad-2* (make-array 1024 :element-type 'rune)) + (*scratch-pad-3* (make-array 1024 :element-type 'rune)) + + + diff --git a/GNUmakefile b/GNUmakefile new file mode 100644 index 0000000..d9c60d6 --- /dev/null +++ b/GNUmakefile @@ -0,0 +1,8 @@ +all: + @echo no such target + @exit 1 + +.PHONY: clean +clean: + touch dummy.fasl + find . \( -name \*.fasl -o -name \*.x86f \) -print0 | xargs -0 rm diff --git a/NEWS b/NEWS new file mode 100644 index 0000000..9045bce --- /dev/null +++ b/NEWS @@ -0,0 +1,151 @@ +Changes to Gilbert Baumann's Code +======================================== +(Stand dieser Liste: patch-54) + +base-0 + Import of Closure's src/xml and src/glisp + + +Build system +---------------- +patch-14 + dom-builder.lsp braucht package.lisp +patch-17 + xml-parse braucht dom-impl +patch-18 + xml-parse braucht encodings +patch-19 + xml-parse.lisp needs xml-stream.lisp + + +Interface changes +---------------- +patch-2 + export UNPARSE-DOCUMENT + + +Portability issues +---------------- +patch-1 + ACL port aktualisiert +patch-8 + fixed more mlisp breakage + + +DOM fixes +---------------- +patch-3 + add dom:remove-child, dom:import-node +patch-6 + fixed dom:remove-child +patch-7 + strings->rods in set-attribute, too +patch-21 + dom:item und dom:length fuer NodeList implementiert +patch-22 + s/remove-atttribute/remove-attribute +patch-23 + dom:remove-attribute-node korrigiert +patch-24 + neu: dom:remove-attribute +patch-25 + dom:normalize implementiert +patch-26 + get-elements-by-tag-name fuer Element implementiert +patch-32 + s/data/value/ fuer CHARACTER-DATA +patch-33 + Aufruf von Setter-Methoden +patch-34 + (setf value) nachgetragen +patch-35 + (DOM:NODE-VALUE ATTRIBUTE) korrigiert +patch-36 + writer fuer DOM:DATA +patch-37 + (setf dom:node-value) implementiert +patch-43 + hack: implemented CHILD-NODES for ENTITY-REFERENCE +patch-44 + ENTITY-REFERENCE-Kinder als read-only markieren +patch-45 + DOM-EXCEPTION implementiert +patch-46 + fixed special cases in delete-data and replace-data +patch-47 + delete-data: Arraytyp korrigiert +patch-48 + DOM:INSERT-DATA implementiert +patch-49 + bugfix: replace-data for count != (length arg) +patch-50 + patch-46 nachgebessert: offset == length ist OK +patch-51 + fixed special cases in dom:substring-data +patch-52 + fixed patch-36, my (setf dom:data) implementation was bogus + + +xml-parse.lisp changes +---------------- +patch-5 + (assert (eql initial-speed 1)) in make-xstream +patch-20 + added a forward declaration for *namespace-bindings* +patch-39 + fix for thread safety in p/document +patch-41 + Warnung ueber (nicht) redefinierte Attribute abschalten koennen +patch-54 + call sax:comment; create comment nodes + + +String-Handling +---------------- +patch-4 + renamed dom to cdom, added string-dom +patch-38 + diverse setter nachgetragen + + +Misc. +---------------- +patch-9 + print elements with their tag-name +patch-11 + print attributes with name and value + +patch-10 + (reverted by patch-10) +patch-12 + REVERT patch-10 + + +domtest.cl +---------------- +patch-27 + alle DOM Level 1 CORE Tests uebersetzen koennen (mehr schlecht als recht) +patch-28 + so, jetzt kompilieren die DOM-Tests auch (wenngleich zwei drittel noch fehlschlagen) +patch-29 + einzelnen Test ausfuehren koennen +patch-30 + workaround +patch-31 + fuer nicht-Strings +patch-40 + ? +patch-42 + implementationAttribute-Probleme zwar ausgeben, aber kein WARN machen +patch-53 + domtest fixes fuer und + + +xmlconf.cl +---------------- +patch-13 + Testfunktion fuer XML Conformance Test Suite +patch-15 + run only tests for namespace-aware XML-1.0 parsers +patch-16 + mit korrektem OUTPUT abgleichen diff --git a/OLDNEWS b/OLDNEWS new file mode 100644 index 0000000..f7bf54a --- /dev/null +++ b/OLDNEWS @@ -0,0 +1,272 @@ +Changes to Gilbert Baumann's Code +======================================== +(Stand dieser Liste: patch-190) + +base-0 + Import of Closure's src/xml and src/glisp + + +Build system +---------------- +patch-14 + dom-builder.lsp braucht package.lisp +patch-17 + xml-parse braucht dom-impl +patch-18 + xml-parse braucht encodings +patch-19 + xml-parse.lisp needs xml-stream.lisp +patch-157 + DOM in eigenes Verzeichnis und System verschoben +patch-158 + COPYING auch im DOM +patch-160 + tests in eigenes Verzeichnis verschoben +patch-184 + commented out most of dep-clisp for now +patch-185 + CLISP fixes + + +glisp durch runes ersetzt +---------------- +patch-139 patch-140 patch-141 patch-142 patch-143 + unbenutzte Funktionen aus glisp entfernt + GLISP keine COMMON-LISP-Symbole mehr exportieren lassen + glisp defpackage weiter vereinfacht +patch-148 + runes.lisp aufgeteilt in runes.lisp und syntax.lisp +patch-149 + CHARACTER-basierte Runen-Implementation +patch-150 + removed support for oldish gcl +patch-151 + removed dep-gcl-2.lisp +patch-152 + clarified glisp license as LLGPL as per Gilbert Baumann +patch-155 + GLISP in RUNES umbenannt +patch-156 + xstream (und encoding) nach runes verschoben +patch-178 +patch-180 + really fixed rune-char + + +DOM fixes +---------------- +patch-3 + add dom:remove-child, dom:import-node +patch-6 + fixed dom:remove-child +patch-7 + strings->rods in set-attribute, too +patch-21 + dom:item und dom:length fuer NodeList implementiert +patch-22 + s/remove-atttribute/remove-attribute +patch-23 + dom:remove-attribute-node korrigiert +patch-24 + neu: dom:remove-attribute +patch-25 + dom:normalize implementiert +patch-26 + get-elements-by-tag-name fuer Element implementiert +patch-32 + s/data/value/ fuer CHARACTER-DATA +patch-33 + Aufruf von Setter-Methoden +patch-34 + (setf value) nachgetragen +patch-35 + (DOM:NODE-VALUE ATTRIBUTE) korrigiert +patch-36 + writer fuer DOM:DATA +patch-37 + (setf dom:node-value) implementiert +patch-43 + hack: implemented CHILD-NODES for ENTITY-REFERENCE +patch-44 + ENTITY-REFERENCE-Kinder als read-only markieren +patch-45 + DOM-EXCEPTION implementiert +patch-46 + fixed special cases in delete-data and replace-data +patch-47 + delete-data: Arraytyp korrigiert +patch-48 + DOM:INSERT-DATA implementiert +patch-49 + bugfix: replace-data for count != (length arg) +patch-50 + patch-46 nachgebessert: offset == length ist OK +patch-51 + fixed special cases in dom:substring-data +patch-52 + fixed patch-36, my (setf dom:data) implementation was bogus +patch-55 + temporary fix: attributes are created with value "" +patch-58 + START-DTD, END-DTD, DOCUMENT-TYPE initialisation +patch-60 + neu: CLONE-NODE +patch-65, patch-66 + verify attribute name syntax in createAttribute +patch-67 + more NAME syntax checks: CREATE-ELEMENT, SET-ATTRIBUTE +patch-68 + CREATE-ATTRIBUTE: set SPECIFIED to true +patch-69, patch-70 + INUSE_ATTRIBUTE_ERR +patch-71 + hacked my resolve-entity function to return NIL for undefined entities +patch-72 + INVALID_CHARACTER_ERR in create-entity-reference, too +patch-73 + Implement no-op methods on (setf node-value) where required... +patch-74 + fixed get-elements-by-tag-name not to include the argument itself +patch-76, patch-77 + implemented DOM:SPLIT-TEXT +patch-80 + noch unfertig: initialisiere dom:enitities richtig, erzeuge Entity-Knoten +patch-82 + dom:notations fuellen +patch-85 + WRONG_DOCUMENT_ERR auch in set-attribute-node +patch-86 + WRONG_DOCUMENT_ERR nicht nur in set-attribute-node, sondern prinzipiell in set-named-item +patch-91 + :NOT_FOUND_ERR in remove-named-item +patch-94 + can-adopt-p implementiert +patch-95 + ENSURE-VALID-INSERTION-REQUEST korrigiert +patch-96 + normalize korrigiert: cdata-section nicht beruehren +patch-98 + DOCUMENTs have owner NIL +patch-101 + (setf dom:data) fuer PI korrigiert +patch-102 + NOT_FOUND_ERR in REMOVE-CHILD +patch-104 + oops, split-text korrigiert +patch-106 + NOT_FOUND_ERROR in removeAttributeNode sucht das Objekt, nicht seinen Namen +patch-107, patch-113 + Defaultwert fuer fehlende Attribute ist der leere Rod-String, nicht NIL +patch-118 + entity und notation maps sind read-only +patch-119 + dom:item liefert NIL bei ungueltigem index +patch-120, patch-122, patch-124 + NodeList reimplementiert +patch-121 + NAMED-NODE-MAP muss auch auf HIERARCHY_REQUEST_ERR pruefen... +patch-128 + ATTRIBUTE hat jetzt Kinder +patch-129 + auch Attribute normalisieren +patch-130 + (setf dom:value) auf einem Attribut darf ein etwaiges Kinderobjekt nicht wiederverwenden +patch-131 + replace-child fuer document-fragment implementiert +patch-132 + CAN-ADOPT-P fuer Parent ATTRIBUTE und Kind CDATA-SECTION korrigiert +patch-133 + DOCUMENT darf nur jeweils ein ELEMENT- und DOCTYPE-Kind haben +patch-137 + neu: map-node-list, do-node-list. ensure-valid-insertion-request korrigiert +patch-165 + ANSI conformance fix in MOVE +patch-181 + ignore fill-pointers in MOVE + +xml-parse.lisp changes +---------------- +patch-5 + (assert (eql initial-speed 1)) in make-xstream +patch-20 + added a forward declaration for *namespace-bindings* +patch-39 + fix for thread safety in p/document +patch-41 + Warnung ueber (nicht) redefinierte Attribute abschalten koennen +patch-54 + call sax:comment; create comment nodes +patch-89 + public-id und system-id der Entities uebergeben +patch-100 + Die XML Deklaration ist keine Processing Instruction. +patch-146 + SAX-Aufrufe korrigiert fuer DTD ohne ID; Entitydeklaration mit SYSTEM ID +patch-166 + added missing format argument in internal-entity-expansion +patch-172 + fixed rod type in appenddata +patch-174 + reordered definitions to avoid forward references +patch-177 + more SBCL warnings removed +patch-188 + new function parse-octets +(See also: patch-58, patch-80, patch-82) + + +DOM-Builder und SAX-Interface +---------------- +patch-57 + Warnungen beseitigt ("undefined variable") +patch-75 + fixed PARENT slot initialization and added a rant about the current implementation +patch-97 + CDATA sections bauen +patch-136 + normalisierte Elemente bauen + +(See also: patch-58, patch-80, patch-82, patch-86, patch-118, patch-120) + + +unparse +---------------- +patch-2 + export UNPARSE-DOCUMENT +patch-144 + Kommentare verstehen (und nicht ausgeben) +patch-189 + new function UNPARSE-DOCUMENT-TO-OCTETS + + +Misc. +---------------- +patch-9 + print elements with their tag-name +patch-11 + print attributes with name and value + +patch-138 + workaround, need to revert this later + +patch-10 reverted by patch-12 +patch-114 reverted by patch-115 +patch-63 reverted by patch-134 +patch-4 patch-38 patch-87 patch-90 patch-103 reverted by patch-154 +patch-154 STRING-DOM nicht mehr verwenden. File ist aber noch da. + + +domtest.cl +---------------- +patch-27 patch-28 patch-29 patch-30 patch-31 patch-40 patch-42 patch-53 +patch-59 patch-61 patch-62 patch-64 patch-78 patch-79 patch-83 patch-84 +patch-88 patch-92 patch-93 patch-99 patch-105 patch-108 patch-111 +patch-116 patch-117 patch-123 patch-153 patch-182 + DOM tests + + +xmlconf.cl +---------------- +patch-13 patch-15 patch-16 patch-147 patch-186 + Testfunktion fuer XML Conformance Test Suite +(need to merge this with Gilbert's work) diff --git a/README.html b/README.html new file mode 100644 index 0000000..462e65a --- /dev/null +++ b/README.html @@ -0,0 +1,100 @@ + + +

Closure XML Parser

+ +

An XML parser written in Common Lisp.

+ +

+ Closure XML was written by Gilbert Baumann + (unk6 at rz.uni-karlsruhe.de) as part of the Closure web + browser.
+ Contributions to the parser by +

    +
  • + Henrik Motakef (hmot at henrik-motakef.de)
    + (SAX layer; namespace support) +
  • +
  • + David Lichteblau at knowledgeTools <david@knowledgetools.de>
    + (conversion into an independent package; DOM bug fixing) +
  • +
+

+ +

+

CXML Modules

+ + CXML provides three packages: +
    +
  • + RUNES, a portable implementation of Unicode strings. +
  • +
  • + XML, a namespace-aware SAX parser implementing the XML 1.0 + specification. +
  • +
  • + DOM, an implementation of the DOM + Level 1 Core interfaces. +
  • +
+

+ +

+

Installation

+ + Prerequisites. CXML is written in Common Lisp and should be + portable to all Common Lisp implementations.  Currently known + to work are ACL, SBCL, CMUCL, and CLISP. (fixme: check + this list) +

+

+ ASDF is used for + compilation. These instructions assume that ASDF has already been + loaded. (Some Lisps include ASDF, for example SBCL and any Lisp + on Debian and Gentoo. For other Lisps please load asdf.lisp + manually before proceeding.) +

+ +

+ Configuration (optional). + CXML has full Unicode code support -- even on Lisps without + Unicode strings. On non-unicode aware Lisps, DOMString + is implemented as an array of character codes. If your Lisp + supports 16 bit characters natively, you can enable feature + RUNE-IS-CHARACTER to select an alternative + DOMString implementatation, which uses real characters + instead of characters codes. +

  * (pushnew :rune-is-character *features*)
+

+ +

+ Compiling and loading CXML. + Register the .asd file, e.g. by symlinking it: +

  $ ln -sf `pwd`/cxms.asd /path/to/your/registry
+ Compile CXML using: +
  * (asdf:operate 'asdf:load-op :cxml)
+

+ +

+

Tests

+ + Check out the XML and DOM testsuites: +
  $ export CVSROOT=:pserver:anonymous@dev.w3.org:/sources/public
+  $ cvs login    # password is "anonymous"
+  $ cvs co 2001/XML-Test-Suite/xmlconf
+  $ cvs co 2001/DOM-Test-Suite
+ Run all applicable tests using: +
  * (xmlconf:run-all-tests "/path/to/2001/XML-Test-Suite/xmlconf/")
+  * (domtest:run-all-tests "/path/to/2001/2001/DOM-Test-Suite/")
+ (As always in Lisp, the trailing slash is significant.) +

+ +

+ fixme: Add an explanation of xml/sax-tests here. +

+ + diff --git a/TIMES b/TIMES new file mode 100644 index 0000000..59c494a --- /dev/null +++ b/TIMES @@ -0,0 +1,41 @@ +Time required for parsing a simple document (wc: 99621 298859 3267087). + +;; CXML with NIL builder +;; (cxml:parse-file "~/test.xml" nil) + +; cpu time (non-gc) 12,940 msec user, 20 msec system +; cpu time (gc) 0 msec user, 0 msec system +; cpu time (total) 12,940 msec user, 20 msec system +; real time 12,991 msec +; space allocation: +; 4,184,599 cons cells, 47,682,392 other bytes, 0 static bytes + +;; CXML with xmls-compatible builder +;; (cxml:parse-file "~/test.xml" (cxml-xmls:make-xmls-builder)) + +; cpu time (non-gc) 14,370 msec user, 20 msec system +; cpu time (gc) 0 msec user, 0 msec system +; cpu time (total) 14,370 msec user, 20 msec system +; real time 14,387 msec +; space allocation: +; 8,667,564 cons cells, 47,682,600 other bytes, 0 static bytes + +;; For comparison: xmls.lisp +;; (with-open-file (s "~/test.xml") (xmls:parse s :compress-whitespace nil)) + +; cpu time (non-gc) 27,440 msec user, 50 msec system +; cpu time (gc) 860 msec user, 0 msec system +; cpu time (total) 28,300 msec user, 50 msec system +; real time 28,813 msec +; space allocation: +; 14,821,161 cons cells, 243,886,592 other bytes, 0 static bytes + +;; CXML with DOM builder +;; (cxml:parse-file "~/test.xml" (dom:make-dom-builder)) + +; cpu time (non-gc) 34,900 msec user, 40 msec system +; cpu time (gc) 760 msec user, 0 msec system +; cpu time (total) 35,660 msec user, 40 msec system +; real time 35,822 msec +; space allocation: +; 14,645,503 cons cells, 300,235,640 other bytes, 0 static bytes diff --git a/XMLS-SYMBOLS.diff b/XMLS-SYMBOLS.diff new file mode 100644 index 0000000..5b83c02 --- /dev/null +++ b/XMLS-SYMBOLS.diff @@ -0,0 +1,98 @@ +* looking for david@knowledgetools.de--cxml/cxml--devel--1.0--patch-309 to compare with +* comparing to david@knowledgetools.de--cxml/cxml--devel--1.0--patch-309 +M xml/xmls-compat.lisp + +* modified files + +--- orig/xml/xmls-compat.lisp ++++ mod/xml/xmls-compat.lisp +@@ -12,7 +12,8 @@ + (defpackage cxml-xmls + (:use :cl :runes) + (:export #:make-node #:node-name #:node-ns #:node-attrs #:node-children +- #:make-xmls-builder #:map-node)) ++ #:make-xmls-builder #:map-node ++ #:*identifier-case*)) + + (in-package :cxml-xmls) + +@@ -64,6 +65,10 @@ + + ;;;; SAX-Handler (Parser) + ++(defvar *identifier-case* nil ++ "One of NIL (don't intern names), :PRESERVE, :UPCASE, :DOWNCASE, or :INVERT ++ (intern name into the keyword package after adjusting case).") ++ + (defclass xmls-builder () + ((element-stack :initform nil :accessor element-stack) + (root :initform nil :accessor root))) +@@ -74,16 +79,46 @@ + (defmethod sax:end-document ((handler xmls-builder)) + (root handler)) + ++(defun string-invert-case (str) ++ (map 'string ++ (lambda (c) ++ (cond ++ ((upper-case-p c) (char-downcase c)) ++ ((lower-case-p c) (char-upcase c)) ++ (t c))) ++ str)) ++ ++(defun maybe-intern (name) ++ (if *identifier-case* ++ (let ((str (if (stringp name) name (rod-string name)))) ++ (intern (ecase *identifier-case* ++ (:preserve str) ++ (:upcase (string-upcase str)) ++ (:downcase (string-downcase str)) ++ (:invert (string-invert-case str))) ++ :keyword)) ++ name)) ++ ++(defun maybe-stringify (name) ++ (if (symbolp name) ++ (let ((str (symbol-name name))) ++ (ecase *identifier-case* ++ (:preserve str) ++ (:upcase (string-downcase str)) ++ (:downcase (string-upcase str)) ++ (:invert (string-invert-case str)))) ++ name)) ++ + (defmethod sax:start-element + ((handler xmls-builder) namespace-uri local-name qname attributes) + (declare (ignore namespace-uri)) + (setf local-name (or local-name qname)) + (let* ((attributes + (mapcar (lambda (attr) +- (list (sax:attribute-qname attr) ++ (list (maybe-intern (sax:attribute-qname attr)) + (sax:attribute-value attr))) + attributes)) +- (node (make-node :name local-name ++ (node (make-node :name (maybe-intern local-name) + :ns (let ((lq (length qname)) + (ll (length local-name))) + (if (eql lq ll) +@@ -124,7 +159,7 @@ + (labels ((walk (node) + (let* ((attlist + (compute-attributes node include-xmlns-attributes)) +- (lname (rod (node-name node))) ++ (lname (rod (maybe-stringify (node-name node)))) + (ns (rod (node-ns node))) + (qname (concatenate 'rod ns (rod ":") lname))) + ;; fixme: namespaces +@@ -141,6 +176,7 @@ + (remove nil + (mapcar (lambda (a) + (destructuring-bind (name value) a ++ (setf name (maybe-stringify name)) + (if (or xmlnsp (not (cxml::xmlns-attr-p (rod name)))) + (sax:make-attribute :qname (rod name) + :value (rod value) + + + diff --git a/catalog.dtd b/catalog.dtd new file mode 100644 index 0000000..a755b61 --- /dev/null +++ b/catalog.dtd @@ -0,0 +1,149 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/contrib/xhtmlgen.lisp b/contrib/xhtmlgen.lisp new file mode 100644 index 0000000..0202ba7 --- /dev/null +++ b/contrib/xhtmlgen.lisp @@ -0,0 +1,352 @@ +;; xhtmlgen.lisp +;; This version by david@lichteblau.com for headcraft (http://headcraft.de/) +;; +;; Derived from htmlgen.cl: +;; copyright (c) 1986-2000 Franz Inc, Berkeley, CA +;; +;; This code is free software; you can redistribute it and/or +;; modify it under the terms of the version 2.1 of +;; the GNU Lesser General Public License as published by +;; the Free Software Foundation, as clarified by the AllegroServe +;; prequel found in license-allegroserve.txt. +;; +;; This code is distributed in the hope that it will be useful, +;; but without any warranty; without even the implied warranty of +;; merchantability or fitness for a particular purpose. See the GNU +;; Lesser General Public License for more details. +;; +;; Version 2.1 of the GNU Lesser General Public License is in the file +;; license-lgpl.txt that was distributed with this file. +;; If it is not present, you can access it from +;; http://www.gnu.org/copyleft/lesser.txt (until superseded by a newer +;; version) or write to the Free Software Foundation, Inc., 59 Temple Place, +;; Suite 330, Boston, MA 02111-1307 USA + +(defpackage :xhtml-generator + (:use :common-lisp) + (:export #:with-html #:write-doctype)) + +(in-package :xhtml-generator) + +;; html generation + +(defstruct (html-process (:type list) (:constructor + make-html-process (key macro special + name-attr + ))) + key ; keyword naming this tag + macro ; the macro to define this + special ; if true then call this to process the keyword and return + ; the macroexpansion + name-attr ; attribute symbols which can name this object for subst purposes + ) + + +(defparameter *html-process-table* + (make-hash-table :test #'equal) ; #'eq is accurate but want to avoid rehashes + ) + +(defvar *html-sink*) + +(defun write-doctype (sink) + (sax:start-dtd sink + "html" + "-//W3C//DTD XHTML 1.0 Transitional//EN" + "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd") + (sax:end-dtd sink)) + +(defmacro with-html (sink &rest forms &environment env) + `(let ((*html-sink* ,sink)) + ,(process-html-forms forms env))) + +(defun get-process (form) + (let ((ent (gethash form *html-process-table*))) + (unless ent + (error "unknown html keyword ~s" form)) + ent)) + +(defun process-html-forms (forms env) + (let (res) + (flet ((do-ent (ent args argsp body) + ;; ent is an html-process object associated with the + ;; html tag we're processing + ;; args is the list of values after the tag in the form + ;; ((:tag &rest args) ....) + ;; argsp is true if this isn't a singleton tag (i.e. it has + ;; a body) .. (:tag ...) or ((:tag ...) ...) + ;; body is the body if any of the form + ;; + (let ((special (html-process-special ent))) + (push (if special + (funcall special ent args argsp body) + `(,(html-process-macro ent) + ,args + ,(process-html-forms body env))) + res)))) + (do* ((xforms forms (cdr xforms)) + (form (car xforms) (car xforms))) + ((null xforms)) + + (setq form (macroexpand form env)) + + (if (atom form) + (typecase form + (keyword (do-ent (get-process form) nil nil nil)) + (string (push `(sax:characters *html-sink* ,form) res)) + (t (push form res))) + (let ((first (car form))) + (cond + ((keywordp first) + ;; (:xxx . body) form + (do-ent (get-process (car form)) nil t (cdr form))) + ((and (consp first) (keywordp (car first))) + ;; ((:xxx args ) . body) + (do-ent (get-process (caar form)) (cdr first) t (cdr form))) + (t + (push form res))))))) + `(progn ,@(nreverse res)))) + +(defun html-body-key-form (string-code args body) + (unless (evenp (length args)) + (error "attribute list ~S isn't even" args)) + `(let ((.tagname. ,string-code)) + (sax:start-element *html-sink* nil nil .tagname. + (list + ,@(loop + for (name value) on args by #'cddr + collect + `(sax:make-attribute + :qname ,(etypecase name + (symbol (symbol-name name)) + (string name)) + :value ,value + :specified-p t)))) + ,@body + (sax:end-element *html-sink* nil nil .tagname.))) + +(defun emit-without-quoting (str) + (let ((s (cxml::chained-handler *html-sink*))) + (cxml::maybe-close-tag s) + (map nil (lambda (c) (cxml::write-rune (char-code c) s)) str))) + +(defun princ-http (val) + (warn "use of deprecated :PRINC (use :PRINC-SAFE instead?)") + (emit-without-quoting (princ-to-string val))) + +(defun prin1-http (val) + (warn "use of deprecated :PRIN1 (use :PRIN1-SAFE instead?)") + (emit-without-quoting (prin1-to-string val))) + +(defun princ-safe-http (val) + (sax:characters *html-sink* (princ-to-string val))) + +(defun prin1-safe-http (val) + (sax:characters *html-sink* (prin1-to-string val))) + + +;; -- defining how html tags are handled. -- +;; +;; most tags are handled in a standard way and the def-std-html +;; macro is used to define such tags +;; +;; Some tags need special treatment and def-special-html defines +;; how these are handled. The tags requiring special treatment +;; are the pseudo tags we added to control operations +;; in the html generator. +;; +;; +;; tags can be found in three ways: +;; :br - singleton, no attributes, no body +;; (:b "foo") - no attributes but with a body +;; ((:a href="foo") "balh") - attributes and body +;; + +(defmacro def-special-html (kwd fcn) + ;; kwd - the tag we're defining behavior for. + ;; fcn - function to compute the macroexpansion of a use of this + ;; tag. args to fcn are: + ;; ent - html-process object holding info on this tag + ;; args - list of attribute-values following tag + ;; argsp - true if there is a body in this use of the tag + ;; body - list of body forms. + `(setf (gethash ,kwd *html-process-table*) + (make-html-process ,kwd nil ,fcn nil))) + +(def-special-html :newline + #'(lambda (ent args argsp body) + (declare (ignore ent args argsp)) + (when body + (error "can't have a body with :newline -- body is ~s" body)) + (emit-without-quoting (string #\newline)))) + +(def-special-html :princ + #'(lambda (ent args argsp body) + (declare (ignore ent args argsp)) + `(progn ,@(mapcar #'(lambda (bod) + `(princ-http ,bod)) + body)))) + +(def-special-html :princ-safe + #'(lambda (ent args argsp body) + (declare (ignore ent args argsp)) + `(progn ,@(mapcar #'(lambda (bod) + `(princ-safe-http ,bod)) + body)))) + +(def-special-html :prin1 + #'(lambda (ent args argsp body) + (declare (ignore ent args argsp)) + `(progn ,@(mapcar #'(lambda (bod) + `(prin1-http ,bod)) + body)))) + +(def-special-html :prin1-safe + #'(lambda (ent args argsp body) + (declare (ignore ent args argsp)) + `(progn ,@(mapcar #'(lambda (bod) + `(prin1-safe-http ,bod)) + body)))) + +(def-special-html :comment + #'(lambda (ent args argsp body) + (declare (ignore ent args argsp body)) + `(warn ":COMMENT in html macro not supported yet"))) + +(defmacro def-std-html (kwd name-attrs) + (let ((mac-name (intern (format nil "~a-~a" :with-html kwd))) + (string-code (string-downcase (string kwd)))) + `(progn (setf (gethash ,kwd *html-process-table*) + (make-html-process ,kwd + ',mac-name + nil + ',name-attrs)) + (defmacro ,mac-name (args &rest body) + (html-body-key-form ,string-code args body))))) + +(def-std-html :a nil) +(def-std-html :abbr nil) +(def-std-html :acronym nil) +(def-std-html :address nil) +(def-std-html :applet nil) +(def-std-html :area nil) + +(def-std-html :b nil) +(def-std-html :base nil) +(def-std-html :basefont nil) +(def-std-html :bdo nil) +(def-std-html :bgsound nil) +(def-std-html :big nil) +(def-std-html :blink nil) +(def-std-html :blockquote nil) +(def-std-html :body nil) +(def-std-html :br nil) +(def-std-html :button nil) + +(def-std-html :caption nil) +(def-std-html :center nil) +(def-std-html :cite nil) +(def-std-html :code nil) +(def-std-html :col nil) +(def-std-html :colgroup nil) + +(def-std-html :dd nil) +(def-std-html :del nil) +(def-std-html :dfn nil) +(def-std-html :dir nil) +(def-std-html :div nil) +(def-std-html :dl nil) +(def-std-html :dt nil) + +(def-std-html :em nil) +(def-std-html :embed nil) + +(def-std-html :fieldset nil) +(def-std-html :font nil) +(def-std-html :form :name) +(def-std-html :frame nil) +(def-std-html :frameset nil) + +(def-std-html :h1 nil) +(def-std-html :h2 nil) +(def-std-html :h3 nil) +(def-std-html :h4 nil) +(def-std-html :h5 nil) +(def-std-html :h6 nil) +(def-std-html :head nil) +(def-std-html :hr nil) +(def-std-html :html nil) + +(def-std-html :i nil) +(def-std-html :iframe nil) +(def-std-html :ilayer nil) +(def-std-html :img :id) +(def-std-html :input nil) +(def-std-html :ins nil) +(def-std-html :isindex nil) + +(def-std-html :kbd nil) +(def-std-html :keygen nil) + +(def-std-html :label nil) +(def-std-html :layer nil) +(def-std-html :legend nil) +(def-std-html :li nil) +(def-std-html :link nil) +(def-std-html :listing nil) + +(def-std-html :map nil) +(def-std-html :marquee nil) +(def-std-html :menu nil) +(def-std-html :meta nil) +(def-std-html :multicol nil) + +(def-std-html :nobr nil) +(def-std-html :noembed nil) +(def-std-html :noframes nil) +(def-std-html :noscript nil) + +(def-std-html :object nil) +(def-std-html :ol nil) +(def-std-html :optgroup nil) +(def-std-html :option nil) + +(def-std-html :p nil) +(def-std-html :param nil) +(def-std-html :plaintext nil) +(def-std-html :pre nil) + +(def-std-html :q nil) + +(def-std-html :s nil) +(def-std-html :samp nil) +(def-std-html :script nil) +(def-std-html :select nil) +(def-std-html :server nil) +(def-std-html :small nil) +(def-std-html :spacer nil) +(def-std-html :span :id) +(def-std-html :strike nil) +(def-std-html :strong nil) +(def-std-html :style nil) +(def-std-html :sub nil) +(def-std-html :sup nil) + +(def-std-html :table :name) +(def-std-html :tbody nil) +(def-std-html :td nil) +(def-std-html :textarea nil) +(def-std-html :tfoot nil) +(def-std-html :th nil) +(def-std-html :thead nil) +(def-std-html :title nil) +(def-std-html :tr nil) +(def-std-html :tt nil) + +(def-std-html :u nil) +(def-std-html :ul nil) + +(def-std-html :var nil) + +(def-std-html :wbr nil) + +(def-std-html :xmp nil) diff --git a/cxml.asd b/cxml.asd new file mode 100644 index 0000000..b3b8102 --- /dev/null +++ b/cxml.asd @@ -0,0 +1,58 @@ +(defpackage :cxml-system + (:use :asdf :cl)) +(in-package :cxml-system) + +(defclass closure-source-file (cl-source-file) ()) + +#+sbcl +(defmethod perform :around ((o compile-op) (s closure-source-file)) + ;; shut up already. Correctness first. + (handler-bind ((sb-ext:compiler-note #'muffle-warning)) + (call-next-method))) + +(unless (find-package :glisp) + (defpackage :glisp)) + +(defsystem glisp + :default-component-class closure-source-file + :pathname (merge-pathnames + "glisp/" + (make-pathname :name nil :type nil :defaults *load-truename*)) + :components + ((:file dependent + :pathname + #+CLISP "dep-clisp" + #+(AND :CMU (NOT :PTHREAD)) "dep-cmucl" + #+sbcl "dep-sbcl" + #+(AND :CMU :PTHREAD) "dep-cmucl-dtc" + #+(and allegro allegro-v5.0) "dep-acl5" + #+(and allegro (not allegro-v5.0)) "dep-acl" + #+GCL "dep-gcl" + #-(or sbcl CLISP CMU allegro GCL) #.(error "Configure!")) + (:file "package" + :depends-on (dependent)) + (:file "runes" + :depends-on ("package" dependent)) + (:file "util" + :depends-on ("package" dependent "runes")) + (:file "match" + :depends-on ("package" dependent "runes" "util")))) + +(asdf:defsystem :cxml + :default-component-class closure-source-file + :pathname (merge-pathnames + "cxml/" + (make-pathname :name nil :type nil :defaults *load-truename*)) + :components + ((:file "package") + (:file "encodings" :depends-on ("package")) + (:file "encodings-data" :depends-on ("package" "encodings")) + (:file "sax-handler") + (:file "dompack") + (:file "dom-impl" :depends-on ("dompack")) + (:file "dom-builder" :depends-on ("dom-impl" "sax-handler")) + (:file "xml-stream" :depends-on ("package")) + (:file "xml-name-rune-p" :depends-on ("package")) + (:file "xml-parse" :depends-on ("package" "dompack" "sax-handler")) + (:file "xml-canonic" :depends-on ("package" "dompack" "xml-parse"))) + :depends-on (:glisp)) diff --git a/documentation.css b/documentation.css new file mode 100644 index 0000000..e69de29 diff --git a/dom/COPYING b/dom/COPYING new file mode 100644 index 0000000..5615459 --- /dev/null +++ b/dom/COPYING @@ -0,0 +1,459 @@ + GNU LESSER GENERAL PUBLIC LICENSE + Version 2.1, February 1999 + + Copyright (C) 1991, 1999 Free Software Foundation, Inc. + 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + +[This is the first released version of the Lesser GPL. It also counts + as the successor of the GNU Library Public License, version 2, hence + the version number 2.1.] + + Preamble + + The licenses for most software are designed to take away your +freedom to share and change it. By contrast, the GNU General Public +Licenses are intended to guarantee your freedom to share and change +free software--to make sure the software is free for all its users. + + This license, the Lesser General Public License, applies to some +specially designated software packages--typically libraries--of the +Free Software Foundation and other authors who decide to use it. You +can use it too, but we suggest you first think carefully about whether +this license or the ordinary General Public License is the better +strategy to use in any particular case, based on the explanations below. + + When we speak of free software, we are referring to freedom of use, +not price. Our General Public Licenses are designed to make sure that +you have the freedom to distribute copies of free software (and charge +for this service if you wish); that you receive source code or can get +it if you want it; that you can change the software and use pieces of +it in new free programs; and that you are informed that you can do +these things. + + To protect your rights, we need to make restrictions that forbid +distributors to deny you these rights or to ask you to surrender these +rights. These restrictions translate to certain responsibilities for +you if you distribute copies of the library or if you modify it. + + For example, if you distribute copies of the library, whether gratis +or for a fee, you must give the recipients all the rights that we gave +you. You must make sure that they, too, receive or can get the source +code. If you link other code with the library, you must provide +complete object files to the recipients, so that they can relink them +with the library after making changes to the library and recompiling +it. And you must show them these terms so they know their rights. + + We protect your rights with a two-step method: (1) we copyright the +library, and (2) we offer you this license, which gives you legal +permission to copy, distribute and/or modify the library. + + To protect each distributor, we want to make it very clear that +there is no warranty for the free library. Also, if the library is +modified by someone else and passed on, the recipients should know +that what they have is not the original version, so that the original +author's reputation will not be affected by problems that might be +introduced by others. + + Finally, software patents pose a constant threat to the existence of +any free program. We wish to make sure that a company cannot +effectively restrict the users of a free program by obtaining a +restrictive license from a patent holder. Therefore, we insist that +any patent license obtained for a version of the library must be +consistent with the full freedom of use specified in this license. + + Most GNU software, including some libraries, is covered by the +ordinary GNU General Public License. This license, the GNU Lesser +General Public License, applies to certain designated libraries, and +is quite different from the ordinary General Public License. We use +this license for certain libraries in order to permit linking those +libraries into non-free programs. + + When a program is linked with a library, whether statically or using +a shared library, the combination of the two is legally speaking a +combined work, a derivative of the original library. The ordinary +General Public License therefore permits such linking only if the +entire combination fits its criteria of freedom. The Lesser General +Public License permits more lax criteria for linking other code with +the library. + + We call this license the "Lesser" General Public License because it +does Less to protect the user's freedom than the ordinary General +Public License. It also provides other free software developers Less +of an advantage over competing non-free programs. These disadvantages +are the reason we use the ordinary General Public License for many +libraries. However, the Lesser license provides advantages in certain +special circumstances. + + For example, on rare occasions, there may be a special need to +encourage the widest possible use of a certain library, so that it becomes +a de-facto standard. To achieve this, non-free programs must be +allowed to use the library. A more frequent case is that a free +library does the same job as widely used non-free libraries. In this +case, there is little to gain by limiting the free library to free +software only, so we use the Lesser General Public License. + + In other cases, permission to use a particular library in non-free +programs enables a greater number of people to use a large body of +free software. For example, permission to use the GNU C Library in +non-free programs enables many more people to use the whole GNU +operating system, as well as its variant, the GNU/Linux operating +system. + + Although the Lesser General Public License is Less protective of the +users' freedom, it does ensure that the user of a program that is +linked with the Library has the freedom and the wherewithal to run +that program using a modified version of the Library. + + The precise terms and conditions for copying, distribution and +modification follow. Pay close attention to the difference between a +"work based on the library" and a "work that uses the library". The +former contains code derived from the library, whereas the latter must +be combined with the library in order to run. + + GNU LESSER GENERAL PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. This License Agreement applies to any software library or other +program which contains a notice placed by the copyright holder or +other authorized party saying it may be distributed under the terms of +this Lesser General Public License (also called "this License"). +Each licensee is addressed as "you". + + A "library" means a collection of software functions and/or data +prepared so as to be conveniently linked with application programs +(which use some of those functions and data) to form executables. + + The "Library", below, refers to any such software library or work +which has been distributed under these terms. A "work based on the +Library" means either the Library or any derivative work under +copyright law: that is to say, a work containing the Library or a +portion of it, either verbatim or with modifications and/or translated +straightforwardly into another language. (Hereinafter, translation is +included without limitation in the term "modification".) + + "Source code" for a work means the preferred form of the work for +making modifications to it. For a library, complete source code means +all the source code for all modules it contains, plus any associated +interface definition files, plus the scripts used to control compilation +and installation of the library. + + Activities other than copying, distribution and modification are not +covered by this License; they are outside its scope. The act of +running a program using the Library is not restricted, and output from +such a program is covered only if its contents constitute a work based +on the Library (independent of the use of the Library in a tool for +writing it). Whether that is true depends on what the Library does +and what the program that uses the Library does. + + 1. You may copy and distribute verbatim copies of the Library's +complete source code as you receive it, in any medium, provided that +you conspicuously and appropriately publish on each copy an +appropriate copyright notice and disclaimer of warranty; keep intact +all the notices that refer to this License and to the absence of any +warranty; and distribute a copy of this License along with the +Library. + + You may charge a fee for the physical act of transferring a copy, +and you may at your option offer warranty protection in exchange for a +fee. + + 2. You may modify your copy or copies of the Library or any portion +of it, thus forming a work based on the Library, and copy and +distribute such modifications or work under the terms of Section 1 +above, provided that you also meet all of these conditions: + + a) The modified work must itself be a software library. + + b) You must cause the files modified to carry prominent notices + stating that you changed the files and the date of any change. + + c) You must cause the whole of the work to be licensed at no + charge to all third parties under the terms of this License. + + d) If a facility in the modified Library refers to a function or a + table of data to be supplied by an application program that uses + the facility, other than as an argument passed when the facility + is invoked, then you must make a good faith effort to ensure that, + in the event an application does not supply such function or + table, the facility still operates, and performs whatever part of + its purpose remains meaningful. + + (For example, a function in a library to compute square roots has + a purpose that is entirely well-defined independent of the + application. Therefore, Subsection 2d requires that any + application-supplied function or table used by this function must + be optional: if the application does not supply it, the square + root function must still compute square roots.) + +These requirements apply to the modified work as a whole. If +identifiable sections of that work are not derived from the Library, +and can be reasonably considered independent and separate works in +themselves, then this License, and its terms, do not apply to those +sections when you distribute them as separate works. But when you +distribute the same sections as part of a whole which is a work based +on the Library, the distribution of the whole must be on the terms of +this License, whose permissions for other licensees extend to the +entire whole, and thus to each and every part regardless of who wrote +it. + +Thus, it is not the intent of this section to claim rights or contest +your rights to work written entirely by you; rather, the intent is to +exercise the right to control the distribution of derivative or +collective works based on the Library. + +In addition, mere aggregation of another work not based on the Library +with the Library (or with a work based on the Library) on a volume of +a storage or distribution medium does not bring the other work under +the scope of this License. + + 3. You may opt to apply the terms of the ordinary GNU General Public +License instead of this License to a given copy of the Library. To do +this, you must alter all the notices that refer to this License, so +that they refer to the ordinary GNU General Public License, version 2, +instead of to this License. (If a newer version than version 2 of the +ordinary GNU General Public License has appeared, then you can specify +that version instead if you wish.) Do not make any other change in +these notices. + + Once this change is made in a given copy, it is irreversible for +that copy, so the ordinary GNU General Public License applies to all +subsequent copies and derivative works made from that copy. + + This option is useful when you wish to copy part of the code of +the Library into a program that is not a library. + + 4. You may copy and distribute the Library (or a portion or +derivative of it, under Section 2) in object code or executable form +under the terms of Sections 1 and 2 above provided that you accompany +it with the complete corresponding machine-readable source code, which +must be distributed under the terms of Sections 1 and 2 above on a +medium customarily used for software interchange. + + If distribution of object code is made by offering access to copy +from a designated place, then offering equivalent access to copy the +source code from the same place satisfies the requirement to +distribute the source code, even though third parties are not +compelled to copy the source along with the object code. + + 5. A program that contains no derivative of any portion of the +Library, but is designed to work with the Library by being compiled or +linked with it, is called a "work that uses the Library". Such a +work, in isolation, is not a derivative work of the Library, and +therefore falls outside the scope of this License. + + However, linking a "work that uses the Library" with the Library +creates an executable that is a derivative of the Library (because it +contains portions of the Library), rather than a "work that uses the +library". The executable is therefore covered by this License. +Section 6 states terms for distribution of such executables. + + When a "work that uses the Library" uses material from a header file +that is part of the Library, the object code for the work may be a +derivative work of the Library even though the source code is not. +Whether this is true is especially significant if the work can be +linked without the Library, or if the work is itself a library. The +threshold for this to be true is not precisely defined by law. + + If such an object file uses only numerical parameters, data +structure layouts and accessors, and small macros and small inline +functions (ten lines or less in length), then the use of the object +file is unrestricted, regardless of whether it is legally a derivative +work. (Executables containing this object code plus portions of the +Library will still fall under Section 6.) + + Otherwise, if the work is a derivative of the Library, you may +distribute the object code for the work under the terms of Section 6. +Any executables containing that work also fall under Section 6, +whether or not they are linked directly with the Library itself. + + 6. As an exception to the Sections above, you may also combine or +link a "work that uses the Library" with the Library to produce a +work containing portions of the Library, and distribute that work +under terms of your choice, provided that the terms permit +modification of the work for the customer's own use and reverse +engineering for debugging such modifications. + + You must give prominent notice with each copy of the work that the +Library is used in it and that the Library and its use are covered by +this License. You must supply a copy of this License. If the work +during execution displays copyright notices, you must include the +copyright notice for the Library among them, as well as a reference +directing the user to the copy of this License. Also, you must do one +of these things: + + a) Accompany the work with the complete corresponding + machine-readable source code for the Library including whatever + changes were used in the work (which must be distributed under + Sections 1 and 2 above); and, if the work is an executable linked + with the Library, with the complete machine-readable "work that + uses the Library", as object code and/or source code, so that the + user can modify the Library and then relink to produce a modified + executable containing the modified Library. (It is understood + that the user who changes the contents of definitions files in the + Library will not necessarily be able to recompile the application + to use the modified definitions.) + + b) Use a suitable shared library mechanism for linking with the + Library. A suitable mechanism is one that (1) uses at run time a + copy of the library already present on the user's computer system, + rather than copying library functions into the executable, and (2) + will operate properly with a modified version of the library, if + the user installs one, as long as the modified version is + interface-compatible with the version that the work was made with. + + c) Accompany the work with a written offer, valid for at + least three years, to give the same user the materials + specified in Subsection 6a, above, for a charge no more + than the cost of performing this distribution. + + d) If distribution of the work is made by offering access to copy + from a designated place, offer equivalent access to copy the above + specified materials from the same place. + + e) Verify that the user has already received a copy of these + materials or that you have already sent this user a copy. + + For an executable, the required form of the "work that uses the +Library" must include any data and utility programs needed for +reproducing the executable from it. However, as a special exception, +the materials to be distributed need not include anything that is +normally distributed (in either source or binary form) with the major +components (compiler, kernel, and so on) of the operating system on +which the executable runs, unless that component itself accompanies +the executable. + + It may happen that this requirement contradicts the license +restrictions of other proprietary libraries that do not normally +accompany the operating system. Such a contradiction means you cannot +use both them and the Library together in an executable that you +distribute. + + 7. You may place library facilities that are a work based on the +Library side-by-side in a single library together with other library +facilities not covered by this License, and distribute such a combined +library, provided that the separate distribution of the work based on +the Library and of the other library facilities is otherwise +permitted, and provided that you do these two things: + + a) Accompany the combined library with a copy of the same work + based on the Library, uncombined with any other library + facilities. This must be distributed under the terms of the + Sections above. + + b) Give prominent notice with the combined library of the fact + that part of it is a work based on the Library, and explaining + where to find the accompanying uncombined form of the same work. + + 8. You may not copy, modify, sublicense, link with, or distribute +the Library except as expressly provided under this License. Any +attempt otherwise to copy, modify, sublicense, link with, or +distribute the Library is void, and will automatically terminate your +rights under this License. However, parties who have received copies, +or rights, from you under this License will not have their licenses +terminated so long as such parties remain in full compliance. + + 9. You are not required to accept this License, since you have not +signed it. However, nothing else grants you permission to modify or +distribute the Library or its derivative works. These actions are +prohibited by law if you do not accept this License. Therefore, by +modifying or distributing the Library (or any work based on the +Library), you indicate your acceptance of this License to do so, and +all its terms and conditions for copying, distributing or modifying +the Library or works based on it. + + 10. Each time you redistribute the Library (or any work based on the +Library), the recipient automatically receives a license from the +original licensor to copy, distribute, link with or modify the Library +subject to these terms and conditions. You may not impose any further +restrictions on the recipients' exercise of the rights granted herein. +You are not responsible for enforcing compliance by third parties with +this License. + + 11. If, as a consequence of a court judgment or allegation of patent +infringement or for any other reason (not limited to patent issues), +conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot +distribute so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you +may not distribute the Library at all. For example, if a patent +license would not permit royalty-free redistribution of the Library by +all those who receive copies directly or indirectly through you, then +the only way you could satisfy both it and this License would be to +refrain entirely from distribution of the Library. + +If any portion of this section is held invalid or unenforceable under any +particular circumstance, the balance of the section is intended to apply, +and the section as a whole is intended to apply in other circumstances. + +It is not the purpose of this section to induce you to infringe any +patents or other property right claims or to contest validity of any +such claims; this section has the sole purpose of protecting the +integrity of the free software distribution system which is +implemented by public license practices. Many people have made +generous contributions to the wide range of software distributed +through that system in reliance on consistent application of that +system; it is up to the author/donor to decide if he or she is willing +to distribute software through any other system and a licensee cannot +impose that choice. + +This section is intended to make thoroughly clear what is believed to +be a consequence of the rest of this License. + + 12. If the distribution and/or use of the Library is restricted in +certain countries either by patents or by copyrighted interfaces, the +original copyright holder who places the Library under this License may add +an explicit geographical distribution limitation excluding those countries, +so that distribution is permitted only in or among countries not thus +excluded. In such case, this License incorporates the limitation as if +written in the body of this License. + + 13. The Free Software Foundation may publish revised and/or new +versions of the Lesser General Public License from time to time. +Such new versions will be similar in spirit to the present version, +but may differ in detail to address new problems or concerns. + +Each version is given a distinguishing version number. If the Library +specifies a version number of this License which applies to it and +"any later version", you have the option of following the terms and +conditions either of that version or of any later version published by +the Free Software Foundation. If the Library does not specify a +license version number, you may choose any version ever published by +the Free Software Foundation. + + 14. If you wish to incorporate parts of the Library into other free +programs whose distribution conditions are incompatible with these, +write to the author to ask for permission. For software which is +copyrighted by the Free Software Foundation, write to the Free +Software Foundation; we sometimes make exceptions for this. Our +decision will be guided by the two goals of preserving the free status +of all derivatives of our free software and of promoting the sharing +and reuse of software generally. + + NO WARRANTY + + 15. BECAUSE THE LIBRARY IS LICENSED FREE OF CHARGE, THERE IS NO +WARRANTY FOR THE LIBRARY, TO THE EXTENT PERMITTED BY APPLICABLE LAW. +EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR +OTHER PARTIES PROVIDE THE LIBRARY "AS IS" WITHOUT WARRANTY OF ANY +KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE +LIBRARY IS WITH YOU. SHOULD THE LIBRARY PROVE DEFECTIVE, YOU ASSUME +THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. + + 16. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN +WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY +AND/OR REDISTRIBUTE THE LIBRARY AS PERMITTED ABOVE, BE LIABLE TO YOU +FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR +CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE +LIBRARY (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING +RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A +FAILURE OF THE LIBRARY TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF +SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH +DAMAGES. + + END OF TERMS AND CONDITIONS + diff --git a/dom/dom-builder.lisp b/dom/dom-builder.lisp new file mode 100644 index 0000000..4337fdb --- /dev/null +++ b/dom/dom-builder.lisp @@ -0,0 +1,139 @@ +;;; XXX this DOM builder knows too much about the specifics of the DOM +;;; implementation for my taste. While document creation is not specified +;;; by the DOM Level 1 spec, we shouldn't really be manually setting slots +;;; in other nodes IMHO. +;;; +;;; As a follow-up to that, the children list is created in the wrong order +;;; and then reversed. Is it really worth the improved speed to do this? +;;; Calling APPEND-NODE would be portable. +;;; +;;; In particular, that design choice has lead to other bugs, for example the +;;; PARENT slot has to be set manually, too. A DOM test finally showed +;;; that this had been forgotten for Text nodes and PIs. +;;; +;;; Opinions? +;;; +;;; -- David + +;;; Now at least the children list isn't reversed anymore, because I changed +;;; the representation to be an extensible vector. Still its not clear to +;;; me whether the DOM Builder should be affected by such changes at all. +;;; +;;; -- David + +(in-package :dom-impl) + +(defclass dom-builder () + ((document :initform nil :accessor document) + (element-stack :initform '() :accessor element-stack))) + +(defun dom:make-dom-builder () + (make-instance 'dom-builder)) + +(defun fast-push (new-element vector) + (vector-push-extend new-element vector (max 1 (array-dimension vector 0)))) + +(defmethod sax:start-document ((handler dom-builder)) + (let ((document (make-instance 'dom-impl::document))) + (setf (slot-value document 'dom-impl::owner) nil + (slot-value document 'dom-impl::doc-type) nil) + (setf (document handler) document) + (push document (element-stack handler)))) + +(defmethod sax:end-document ((handler dom-builder)) + (setf (slot-value (document handler) 'entities) xml::*entities*) + (let ((doctype (dom:doctype (document handler)))) + (when doctype + (setf (slot-value (dom:entities doctype) 'read-only-p) t) + (setf (slot-value (dom:notations doctype) 'read-only-p) t))) + (document handler)) + +(defmethod sax:start-dtd ((handler dom-builder) name publicid systemid) + (declare (ignore publicid systemid)) + (let* ((document (document handler)) + (doctype (make-instance 'dom-impl::document-type + :name name + :notations (make-instance 'dom-impl::named-node-map + :element-type :notation + :owner document) + :entities (make-instance 'dom-impl::named-node-map + :element-type :entity + :owner document)))) + (setf (slot-value doctype 'dom-impl::owner) document + (slot-value document 'dom-impl::doc-type) doctype))) + +(defmethod sax:start-element ((handler dom-builder) namespace-uri local-name qname attributes) + (with-slots (document element-stack) handler + (let ((element (dom:create-element document qname)) + (parent (car element-stack))) + (dolist (attr attributes) + (dom:set-attribute element (xml::attribute-qname attr) (xml::attribute-value attr))) + (setf (slot-value element 'dom-impl::parent) parent) + (fast-push element (slot-value parent 'dom-impl::children)) + (push element element-stack)))) + +(defmethod sax:end-element ((handler dom-builder) namespace-uri local-name qname) + (pop (element-stack handler))) + +(defmethod sax:characters ((handler dom-builder) data) + (with-slots (document element-stack) handler + (let* ((parent (car element-stack)) + (last-child (dom:last-child parent))) + (cond + ((eq (dom:node-type parent) :cdata-section) + (setf (dom:data parent) data)) + ((and last-child (eq (dom:node-type last-child) :text)) + ;; um entities herum wird SAX:CHARACTERS mehrfach aufgerufen fuer + ;; den gleichen Textknoten. Hier muessen wir den bestehenden Knoten + ;; erweitern, sonst ist das Dokument nicht normalisiert. + ;; (XXX Oder sollte man besser den Parser entsprechend aendern?) + (dom:append-data last-child data)) + (t + (let ((node (dom:create-text-node document data))) + (setf (slot-value node 'dom-impl::parent) parent) + (fast-push node (slot-value (car element-stack) 'dom-impl::children)))))))) + +(defmethod sax:start-cdata ((handler dom-builder)) + (with-slots (document element-stack) handler + (let ((node (dom:create-cdata-section document #"")) + (parent (car element-stack))) + (setf (slot-value node 'dom-impl::parent) parent) + (fast-push node (slot-value parent 'dom-impl::children)) + (push node element-stack)))) + +(defmethod sax:end-cdata ((handler dom-builder)) + (let ((node (pop (slot-value handler 'element-stack)))) + (assert (eq (dom:node-type node) :cdata-section)))) + +(defmethod sax:processing-instruction ((handler dom-builder) target data) + (with-slots (document element-stack) handler + (let ((node (dom:create-processing-instruction document target data)) + (parent (car element-stack))) + (setf (slot-value node 'dom-impl::parent) parent) + (fast-push node (slot-value (car element-stack) 'dom-impl::children))))) + +(defmethod sax:comment ((handler dom-builder) data) + (with-slots (document element-stack) handler + (let ((node (dom:create-comment document data)) + (parent (car element-stack))) + (setf (slot-value node 'dom-impl::parent) parent) + (fast-push node (slot-value (car element-stack) 'dom-impl::children))))) + +(defmethod sax:unparsed-entity-declaration + ((handler dom-builder) name public-id system-id notation-name) + (dom:set-named-item (dom:entities (dom:doctype (document handler))) + (make-instance 'dom-impl::entity + :owner (document handler) + :name name + :public-id public-id + :system-id system-id + :notation-name notation-name))) + +(defmethod sax:notation-declaration + ((handler dom-builder) name public-id system-id) + (dom:set-named-item (dom:notations (dom:doctype (document handler))) + (make-instance 'dom-impl::notation + :owner (document handler) + :name name + :public-id public-id + :system-id system-id))) diff --git a/dom/dom-impl.lisp b/dom/dom-impl.lisp new file mode 100644 index 0000000..823e642 --- /dev/null +++ b/dom/dom-impl.lisp @@ -0,0 +1,983 @@ +(defpackage :dom-impl + (:use :cl :runes)) + +(in-package :dom-impl) + +;; Classes + +(define-condition dom-exception (error) + ((key :initarg :key :reader dom-exception-key) + (string :initarg :string :reader dom-exception-string) + (arguments :initarg :arguments :reader dom-exception-arguments)) + (:report + (lambda (c s) + (format s "~A (~D):~%~?" + (dom-exception-key c) + (dom:code c) + (dom-exception-string c) + (dom-exception-arguments c))))) + +(defclass node () + ((parent :initarg :parent :initform nil) + (children :initarg :children :initform (make-node-list)) + (owner :initarg :owner :initform nil) + (read-only-p :initform nil :reader read-only-p) + (map :initform nil))) + +(defclass document (node) + ((doc-type :initarg :doc-type :reader dom:doctype) + (entities :initform nil :reader entities))) + +(defclass document-fragment (node) + ()) + +(defclass character-data (node) + ((value :initarg :data :reader dom:data))) + +(defclass attribute (node) + ((name :initarg :name :reader dom:name) + (specified-p :initarg :specified-p :reader dom:specified))) + +(defmethod print-object ((object attribute) stream) + (print-unreadable-object (object stream :type t :identity t) + (format stream "~A=~S" + (rod-string (dom:name object)) + (rod-string (dom:value object))))) + +(defclass element (node) + ((tag-name :initarg :tag-name :reader dom:tag-name) + (attributes :initarg :attributes :reader dom:attributes))) + +(defmethod print-object ((object element) stream) + (print-unreadable-object (object stream :type t :identity t) + (princ (rod-string (dom:tag-name object)) stream))) + +(defclass text (character-data) + ()) + +(defclass comment (character-data) + ()) + +(defclass cdata-section (text) + ()) + +(defclass document-type (node) + ((name :initarg :name :reader dom:name) + (entities :initarg :entities :reader dom:entities) + (notations :initarg :notations :reader dom:notations))) + +(defclass notation (node) + ((name :initarg :name :reader dom:name) + (public-id :initarg :public-id :reader dom:public-id) + (system-id :initarg :system-id :reader dom:system-id))) + +(defclass entity (node) + ((name :initarg :name :reader dom:name) + (public-id :initarg :public-id :reader dom:public-id) + (system-id :initarg :system-id :reader dom:system-id) + (notation-name :initarg :notation-name :reader dom:notation-name))) + +(defclass entity-reference (node) + ((name :initarg :name :reader dom:name))) + +(defclass processing-instruction (node) + ((target :initarg :target :reader dom:target) + (data :initarg :data :reader dom:data))) + +(defclass named-node-map () + ((items :initarg :items :reader dom:items + :initform nil) + (owner :initarg :owner :reader dom:owner-document) + (read-only-p :initform nil :reader read-only-p) + (element-type :initarg :element-type))) + + +;;; Implementation + +(defun assert-writeable (node) + (when (read-only-p node) + (dom-error :NO_MODIFICATION_ALLOWED_ERR "~S is marked read-only." node))) + +(defun dom:map-node-list (fn nodelist) + (dotimes (i (dom:length nodelist)) + (funcall fn (dom:item nodelist i)))) + +(defmacro dom:do-node-list ((var nodelist &optional resultform) &body body) + `(block nil + (dom:map-node-list (lambda (,var) ,@body) ,nodelist) + ,resultform)) + +(defmacro dovector ((var vector &optional resultform) &body body) + `(loop + for ,var across ,vector do (progn ,@body) + ,@(when resultform `(finally (return ,resultform))))) + +(defun move (from to from-start to-start length) + ;; like (setf (subseq to to-start (+ to-start length)) + ;; (subseq from from-start (+ from-start length))) + ;; but without creating the garbage + (if (< to-start from-start) + (loop + repeat length + for i from from-start + for j from to-start + do (setf (elt to j) (elt from i))) + (loop + repeat length + for i from (+ from-start length -1) by -1 + for j from (+ to-start length -1) by -1 + do (setf (elt to j) (elt from i))))) + +(defun adjust-vector-exponentially (vector new-dimension set-fill-pointer-p) + (let ((d (array-dimension vector 0))) + (when (< d new-dimension) + (loop + do (setf d (* 2 d)) + while (< d new-dimension)) + (adjust-array vector d)) + (when set-fill-pointer-p + (setf (fill-pointer vector) new-dimension)))) + +(defun make-space (vector &optional (n 1)) + (adjust-vector-exponentially vector (+ (length vector) n) nil)) + +(defun extension (vector) + (max (array-dimension vector 0) 1)) + +;; dom-exception + +(defun dom-error (key fmt &rest args) + (error 'dom-exception :key key :string fmt :arguments args)) + +(defmethod dom:code ((self dom-exception)) + (ecase (dom-exception-key self) + (:INDEX_SIZE_ERR 1) + (:DOMSTRING_SIZE_ERR 2) + (:HIERARCHY_REQUEST_ERR 3) + (:WRONG_DOCUMENT_ERR 4) + (:INVALID_CHARACTER_ERR 5) + (:NO_DATA_ALLOWED_ERR 6) + (:NO_MODIFICATION_ALLOWED_ERR 7) + (:NOT_FOUND_ERR 8) + (:NOT_SUPPORTED_ERR 9) + (:INUSE_ATTRIBUTE_ERR 10))) + +;; document-fragment protocol +;; document protocol + +(defmethod dom:implementation ((document document)) + 'implementation) + +(defmethod dom:document-element ((document document)) + (dovector (k (dom:child-nodes document)) + (cond ((typep k 'element) + (return k))))) + +(defmethod dom:create-element ((document document) tag-name) + (setf tag-name (rod tag-name)) + (unless (xml::valid-name-p tag-name) + (dom-error :INVALID_CHARACTER_ERR "not a name: ~A" (rod-string tag-name))) + (make-instance 'element + :tag-name tag-name + :owner document + :attributes (make-instance 'named-node-map + :element-type :attribute + :owner document))) + +(defmethod dom:create-document-fragment ((document document)) + (make-instance 'document-fragment + :owner document)) + +(defmethod dom:create-text-node ((document document) data) + (setf data (rod data)) + (make-instance 'text + :data data + :owner document)) + +(defmethod dom:create-comment ((document document) data) + (setf data (rod data)) + (make-instance 'comment + :data data + :owner document)) + +(defmethod dom:create-cdata-section ((document document) data) + (setf data (rod data)) + (make-instance 'cdata-section + :data data + :owner document)) + +(defmethod dom:create-processing-instruction ((document document) target data) + (setf target (rod target)) + (setf data (rod data)) + (unless (xml::valid-name-p target) + (dom-error :INVALID_CHARACTER_ERR "not a name: ~A" (rod-string target))) + (make-instance 'processing-instruction + :owner document + :target target + :data data)) + +(defmethod dom:create-attribute ((document document) name) + (setf name (rod name)) + (unless (xml::valid-name-p name) + (dom-error :INVALID_CHARACTER_ERR "not a name: ~A" (rod-string name))) + (make-instance 'attribute + :name name + :specified-p t + :owner document)) + +(defmethod dom:create-entity-reference ((document document) name) + (setf name (rod name)) + (unless (xml::valid-name-p name) + (dom-error :INVALID_CHARACTER_ERR "not a name: ~A" (rod-string name))) + (make-instance 'entity-reference + :name name + :owner document)) + +(defmethod get-elements-by-tag-name-internal (node tag-name) + (setf tag-name (rod tag-name)) + (let ((result (make-node-list))) + (setf tag-name (rod tag-name)) + (let ((wild-p (rod= tag-name '#.(string-rod "*")))) + (labels ((walk (n) + (dovector (c (dom:child-nodes n)) + (when (dom:element-p c) + (when (or wild-p (rod= tag-name (dom:node-name c))) + (vector-push-extend c result (extension result))) + (walk c))))) + (walk node))) + result)) + +(defmethod dom:get-elements-by-tag-name ((document document) tag-name) + (get-elements-by-tag-name-internal document tag-name)) + +;;; Node + +(defmethod dom:parent-node ((node node)) + (slot-value node 'parent)) + +(defmethod dom:child-nodes ((node node)) + (slot-value node 'children)) + +(defmethod dom:first-child ((node node)) + (dom:item (slot-value node 'children) 0)) + +(defmethod dom:last-child ((node node)) + (with-slots (children) node + (if (plusp (length children)) + (elt children (1- (length children))) + nil))) + +(defmethod dom:previous-sibling ((node node)) + (with-slots (parent) node + (when parent + (with-slots (children) parent + (let ((index (1- (position node children)))) + (if (eql index -1) + nil + (elt children index))))))) + +(defmethod dom:next-sibling ((node node)) + (with-slots (parent) node + (when parent + (with-slots (children) parent + (let ((index (1+ (position node children)))) + (if (eql index (length children)) + nil + (elt children index))))))) + +(defmethod dom:owner-document ((node node)) + (slot-value node 'owner)) + +(defun ensure-valid-insertion-request (node new-child) + (assert-writeable node) + (unless (can-adopt-p node new-child) + (dom-error :HIERARCHY_REQUEST_ERR "~S cannot adopt ~S." node new-child)) + #+(or) ;XXX needs to be moved elsewhere + (when (eq (dom:node-type node) :document) + (let ((child-type (dom:node-type new-child))) + (when (and (member child-type '(:element :document-type)) + (find child-type (dom:child-nodes node) :key #'dom:node-type)) + (dom-error :HIERARCHY_REQUEST_ERR + "~S cannot adopt a second child of type ~S." + node child-type)))) + (unless (eq (if (eq (dom:node-type node) :document) + node + (dom:owner-document node)) + (dom:owner-document new-child)) + (dom-error :WRONG_DOCUMENT_ERR + "~S cannot adopt ~S, since it was created by a different document." + node new-child)) + (do ((n node (dom:parent-node n))) + ((null n)) + (when (eq n new-child) + (dom-error :HIERARCHY_REQUEST_ERR + "~S cannot adopt ~S, since that would create a cycle" + node new-child))) + (unless (null (slot-value new-child 'parent)) + (dom:remove-child (slot-value new-child 'parent) new-child))) + +(defmethod dom:insert-before ((node node) (new-child node) ref-child) + (ensure-valid-insertion-request node new-child) + (with-slots (children) node + (if ref-child + (let ((i (position ref-child children))) + (unless i + (dom-error :NOT_FOUND_ERR "~S is no child of ~S." ref-child node)) + (make-space children 1) + (move children children i (1+ i) (- (length children) i)) + (incf (fill-pointer children)) + (setf (elt children i) new-child)) + (vector-push-extend new-child children (extension children))) + (setf (slot-value new-child 'parent) node) + new-child)) + +(defmethod dom:insert-before ((node node) (fragment document-fragment) ref-child) + (dovector (child (dom:child-nodes fragment)) + (dom:insert-before node child ref-child)) + fragment) + +(defmethod dom:replace-child ((node node) (new-child node) (old-child node)) + (ensure-valid-insertion-request node new-child) + (with-slots (children) node + (let ((i (position old-child children))) + (unless i + (dom-error :NOT_FOUND_ERR "~S is no child of ~S." old-child node)) + (setf (elt children i) new-child)) + (setf (slot-value new-child 'parent) node) + (setf (slot-value old-child 'parent) nil) + old-child)) + +(defmethod dom:replace-child + ((node node) (new-child document-fragment) (old-child node)) + (dom:insert-before node new-child old-child) + (dom:remove-child node old-child)) + +(defmethod dom:remove-child ((node node) (old-child node)) + (assert-writeable node) + (with-slots (children) node + (let ((i (position old-child children))) + (unless i + (dom-error :NOT_FOUND_ERR "~A not found in ~A" old-child node)) + (move children children (1+ i) i (- (length children) i 1)) + (decf (fill-pointer children))) + (setf (slot-value old-child 'parent) nil) + old-child)) + +(defmethod dom:append-child ((node node) (new-child node)) + (ensure-valid-insertion-request node new-child) + (with-slots (children) node + (vector-push-extend new-child children (extension children)) + (setf (slot-value new-child 'parent) node) + new-child)) + +(defmethod dom:has-child-nodes ((node node)) + (plusp (length (slot-value node 'children)))) + +(defmethod dom:append-child ((node node) (new-child document-fragment)) + (assert-writeable node) + (dovector (child (dom:child-nodes new-child)) + (dom:append-child node child)) + new-child) + +;; was auf node noch implemetiert werden muss: +;; - node-type +;; - can-adopt-p +;; - ggf attributes +;; - node-name +;; - node-value + +;; node-name + +(defmethod dom:node-name ((self document)) + '#.(string-rod "#document")) + +(defmethod dom:node-name ((self document-fragment)) + '#.(string-rod "#document-fragment")) + +(defmethod dom:node-name ((self text)) + '#.(string-rod "#text")) + +(defmethod dom:node-name ((self cdata-section)) + '#.(string-rod "#cdata-section")) + +(defmethod dom:node-name ((self comment)) + '#.(string-rod "#comment")) + +(defmethod dom:node-name ((self attribute)) + (dom:name self)) + +(defmethod dom:node-name ((self element)) + (dom:tag-name self)) + +(defmethod dom:node-name ((self document-type)) + (dom:name self)) + +(defmethod dom:node-name ((self notation)) + (dom:name self)) + +(defmethod dom:node-name ((self entity)) + (dom:name self)) + +(defmethod dom:node-name ((self entity-reference)) + (dom:name self)) + +(defmethod dom:node-name ((self processing-instruction)) + (dom:target self)) + +;; node-type + +(defmethod dom:node-type ((self document)) :document) +(defmethod dom:node-type ((self document-fragment)) :document-fragment) +(defmethod dom:node-type ((self text)) :text) +(defmethod dom:node-type ((self comment)) :comment) +(defmethod dom:node-type ((self cdata-section)) :cdata-section) +(defmethod dom:node-type ((self attribute)) :attribute) +(defmethod dom:node-type ((self element)) :element) +(defmethod dom:node-type ((self document-type)) :document-type) +(defmethod dom:node-type ((self notation)) :notation) +(defmethod dom:node-type ((self entity)) :entity) +(defmethod dom:node-type ((self entity-reference)) :entity-reference) +(defmethod dom:node-type ((self processing-instruction)) :processing-instruction) + +;; node-value + +(defmethod dom:node-value ((self document)) nil) +(defmethod dom:node-value ((self document-fragment)) nil) +(defmethod dom:node-value ((self character-data)) (dom:data self)) +(defmethod dom:node-value ((self attribute)) (dom:value self)) +(defmethod dom:node-value ((self element)) nil) +(defmethod dom:node-value ((self document-type)) nil) +(defmethod dom:node-value ((self notation)) nil) +(defmethod dom:node-value ((self entity)) nil) +(defmethod dom:node-value ((self entity-reference)) nil) +(defmethod dom:node-value ((self processing-instruction)) (dom:data self)) + +;; (setf node-value), first the meaningful cases... + +(defmethod (setf dom:node-value) (newval (self character-data)) + (assert-writeable self) + (setf (dom:data self) newval)) + +(defmethod (setf dom:node-value) (newval (self attribute)) + (assert-writeable self) + (setf (dom:value self) newval)) + +(defmethod (setf dom:node-value) (newval (self processing-instruction)) + (assert-writeable self) + (setf (dom:data self) newval)) + +;; ... and (setf node-value), part II. The DOM Level 1 spec fails to explain +;; this case, but it is covered by the (Level 1) test suite and clarified +;; in Level 2: +;; nodeValue of type DOMString +;; The value of this node, depending on its type; see the +;; table above. When it is defined to be null, setting +;; it has no effect. + +(defmethod (setf dom:node-value) (newval (self element)) + (declare (ignore newval))) + +(defmethod (setf dom:node-value) (newval (self entity-reference)) + (declare (ignore newval))) + +(defmethod (setf dom:node-value) (newval (self entity)) + (declare (ignore newval))) + +(defmethod (setf dom:node-value) (newval (self document)) + (declare (ignore newval))) + +(defmethod (setf dom:node-value) (newval (self document-type)) + (declare (ignore newval))) + +(defmethod (setf dom:node-value) (newval (self document-fragment)) + (declare (ignore newval))) + +(defmethod (setf dom:node-value) (newval (self notation)) + (declare (ignore newval))) + +;; attributes + +;; (gibt es nur auf element) + +(defmethod dom:attributes ((self node)) + nil) + +;; dann fehlt noch can-adopt und attribute conventions fuer adoption + +;;; NodeList + +(defun make-node-list (&optional initial-contents) + (make-array (length initial-contents) + :adjustable t + :fill-pointer (length initial-contents) + :initial-contents initial-contents)) + +(defmethod dom:item ((self vector) index) + (if (< index (length self)) + (elt self index) + nil)) + +(defmethod dom:length ((self vector)) + (length self)) + +;;; NAMED-NODE-MAP + +(defmethod dom:get-named-item ((self named-node-map) name) + (setf name (rod name)) + (with-slots (items) self + (dolist (k items nil) + (cond ((rod= name (dom:node-name k)) + (return k)))))) + +(defmethod dom:set-named-item ((self named-node-map) arg) + (assert-writeable self) + (unless (eq (dom:node-type arg) (slot-value self 'element-type)) + (dom-error :HIERARCHY_REQUEST_ERR + "~S cannot adopt ~S, since it is not of type ~S." + self arg (slot-value self 'element-type))) + (unless (eq (dom:owner-document self) (dom:owner-document arg)) + (dom-error :WRONG_DOCUMENT_ERR + "~S cannot adopt ~S, since it was created by a different document." + self arg)) + (let ((old-map (slot-value arg 'map))) + (when (and old-map (not (eq old-map self))) + (dom-error :INUSE_ATTRIBUTE_ERR "Attribute node already mapped" arg))) + (setf (slot-value arg 'map) self) + (let ((name (dom:node-name arg))) + (with-slots (items) self + (dolist (k items (progn (setf items (cons arg items))nil)) + (cond ((rod= name (dom:node-name k)) + (setf items (cons arg (delete k items))) + (return k))))))) + +(defmethod dom:remove-named-item ((self named-node-map) name) + (assert-writeable self) + (setf name (rod name)) + (with-slots (items) self + (dolist (k items (dom-error :NOT_FOUND_ERR "~A not found in ~A" name self)) + (cond ((rod= name (dom:node-name k)) + (setf items (delete k items)) + (return k)))))) + +(defmethod dom:length ((self named-node-map)) + (with-slots (items) self + (length items))) + +(defmethod dom:item ((self named-node-map) index) + (with-slots (items) self + (do ((nthcdr items (cdr nthcdr)) + (i index (1- i))) + ((zerop i) (car nthcdr))))) + +;;; CHARACTER-DATA + +(defmethod (setf dom:data) (newval (self character-data)) + (assert-writeable self) + (setf newval (rod newval)) + (setf (slot-value self 'value) newval)) + +(defmethod dom:length ((node character-data)) + (length (slot-value node 'value))) + +(defmethod dom:substring-data ((node character-data) offset count) + (with-slots (value) node + (unless (<= 0 offset (length value)) + (dom-error :INDEX_SIZE_ERR "offset is invalid")) + (let ((end (min (length value) (+ offset count)))) + (subseq value offset end)))) + +(defmethod dom:append-data ((node character-data) arg) + (assert-writeable node) + (setq arg (rod arg)) + (with-slots (value) node + (setf value (concatenate (type-of value) value arg))) + (values)) + +(defmethod dom:delete-data ((node character-data) offset count) + (assert-writeable node) + (with-slots (value) node + (unless (<= 0 offset (length value)) + (dom-error :INDEX_SIZE_ERR "offset is invalid")) + (when (minusp count) + (dom-error :INDEX_SIZE_ERR "count is negative")) + (setf count (min count (- (length value) offset))) + (let ((new (make-array (- (length value) count) + :element-type (array-element-type value)))) + (replace new value + :start1 0 :end1 offset + :start2 0 :end2 offset) + (replace new value + :start1 offset :end1 (length new) + :start2 (+ offset count) :end2 (length value)) + (setf value new))) + (values)) + +(defmethod dom:replace-data ((node character-data) offset count arg) + ;; Although we could implement this by calling DELETE-DATA, then INSERT-DATA, + ;; we implement this function directly to avoid creating temporary garbage. + (assert-writeable node) + (setf arg (rod arg)) + (with-slots (value) node + (unless (<= 0 offset (length value)) + (dom-error :INDEX_SIZE_ERR "offset is invalid")) + (when (minusp count) + (dom-error :INDEX_SIZE_ERR "count is negative")) + (setf count (min count (- (length value) offset))) + (if (= count (length arg)) + (replace value arg + :start1 offset :end1 (+ offset count) + :start2 0 :end2 count) + (let ((new (make-array (+ (length value) (length arg) (- count)) + :element-type (array-element-type value)))) + (replace new value :end1 offset) + (replace new arg :start1 offset) + (replace new value + :start1 (+ offset (length arg)) + :start2 (+ offset count)) + (setf value new)))) + (values)) + +(defmethod dom:insert-data ((node character-data) offset arg) + (assert-writeable node) + (setf arg (rod arg)) + (with-slots (value) node + (unless (<= 0 offset (length value)) + (dom-error :INDEX_SIZE_ERR "offset is invalid")) + (let ((new (make-array (+ (length value) (length arg)) + :element-type (array-element-type value))) + (arglen (length arg))) + (replace new value :end1 offset) + (replace new arg :start1 offset) + (replace new value :start1 (+ offset arglen) :start2 offset) + (setf value new))) + (values)) + +;;; ATTR +;;; +;;; An attribute value can be read and set as a string using DOM:VALUE +;;; or frobbed by changing the attribute's children! +;;; +;;; We store the value in a TEXT node and read this node's DATA slot +;;; when asked for our VALUE -- until the user changes the child nodes, +;;; in which case we have to compute VALUE by traversing the children. + +(defmethod dom:value ((node attribute)) + (with-slots (children) node + (cond + ((zerop (length children)) + #.(rod-string "")) + ((and (eql (length children) 1) + (eq (dom:node-type (elt children 0)) :text)) + ;; we have as single TEXT-NODE child, just return its DATA + (dom:data (elt children 0))) + (t + ;; traverse children to compute value + (attribute-to-string node))))) + +(defmethod (setf dom:value) (new-value (node attribute)) + (assert-writeable node) + (let ((rod (rod new-value))) + (with-slots (children owner) node + ;; remove children, add new TEXT-NODE child + ;; (alas, we must not reuse an old TEXT-NODE) + (while (plusp (length children)) + (dom:remove-child node (dom:last-child node))) + (dom:append-child node (dom:create-text-node owner rod)))) + new-value) + +(defun attribute-to-string (attribute) + (let ((stream (make-rod-stream))) + (flet ((doit () + (dovector (child (dom:child-nodes attribute)) + (write-attribute-child child stream)))) + (doit) + (initialize-rod-stream stream) + (doit)) + (rod-stream-buf stream))) + +(defmethod write-attribute-child ((node node) stream) + (write-rod (dom:node-value node) stream)) + +(defmethod write-attribute-child ((node entity-reference) stream) + (dovector (child (dom:child-nodes node)) + (write-attribute-child child stream))) + +;;; ROD-STREAM als Ersatz fuer MAKE-STRING-OUTPUT-STREAM zu verwenden, +;;; nur dass der Buffer statische Groesse hat. Solange er NIL ist, +;;; zaehlt der Stream nur die Runen. Dann ruft man INITIALIZE-ROD-STREAM +;;; auf, um den Buffer zu erzeugen und die Position zurueckzusetzen, und +;;; schreibt alles abermals. Dann ist der Buffer gefuellt. +(defstruct rod-stream + (buf nil) + (position 0)) + +(defun write-rod (rod rod-stream) + (let ((buf (rod-stream-buf rod-stream))) + (when buf + (move rod buf 0 (rod-stream-position rod-stream) (length rod))) + (incf (rod-stream-position rod-stream) (length rod))) + rod) + +(defun initialize-rod-stream (stream) + (setf (rod-stream-buf stream) (make-rod (rod-stream-position stream))) + (setf (rod-stream-position stream) 0) + stream) + +;;; ELEMENT + +(defmethod dom:get-attribute-node ((element element) name) + (dom:get-named-item (dom:attributes element) name)) + +(defmethod dom:set-attribute-node ((element element) (new-attr attribute)) + (assert-writeable element) + (dom:set-named-item (dom:attributes element) new-attr)) + +(defmethod dom:get-attribute ((element element) name) + (let ((a (dom:get-attribute-node element name))) + (if a + (dom:value a) + #.(string-rod "")))) + +(defmethod dom:set-attribute ((element element) name value) + (assert-writeable element) + (with-slots (owner) element + (let ((attr (dom:create-attribute owner name))) + (setf (dom:value attr) value) + (dom:set-attribute-node element attr)) + (values))) + +(defmethod dom:remove-attribute ((element element) name) + (assert-writeable element) + (dom:remove-attribute-node element (dom:get-attribute-node element name))) + +(defmethod dom:remove-attribute-node ((element element) (old-attr attribute)) + (assert-writeable element) + (with-slots (items) (dom:attributes element) + (unless (find old-attr items) + (dom-error :NOT_FOUND_ERR "Attribute not found.")) + (setf items (remove old-attr items)) + old-attr)) + +(defmethod dom:get-elements-by-tag-name ((element element) name) + (assert-writeable element) + (get-elements-by-tag-name-internal element name)) + +(defmethod dom:normalize ((element element)) + (assert-writeable element) + (labels ((walk (n) + (when (eq (dom:node-type n) :element) + (map nil #'walk (dom:items (dom:attributes n)))) + (let ((children (dom:child-nodes n)) + (i 0) + (previous nil)) + ;; careful here, we're modifying the array we are iterating over + (while (< i (length children)) + (let ((child (elt children i))) + (cond + ((not (eq (dom:node-type child) :text)) + (setf previous nil) + (incf i)) + ((and previous (eq (dom:node-type previous) :text)) + (setf (slot-value previous 'value) + (concatenate 'vector + (dom:data previous) + (dom:data child))) + (dom:remove-child n child) + ;; not (incf i) + ) + (t + (setf previous child) + (incf i)))))) + (map nil #'walk (dom:child-nodes n)))) + (walk element)) + (values)) + +;;; TEXT + +(defmethod dom:split-text ((text text) offset) + (assert-writeable text) + (with-slots (owner parent value) text + (unless (<= 0 offset (length value)) + (dom-error :INDEX_SIZE_ERR "offset is invalid")) + (prog1 + (dom:insert-before parent + (dom:create-text-node owner (subseq value offset)) + (dom:next-sibling text)) + (setf value (subseq value 0 offset))))) + +;;; COMMENT -- nix +;;; CDATA-SECTION -- nix + +;;; DOCUMENT-TYPE -- missing +;;; NOTATION -- nix +;;; ENTITY -- nix + +;;; ENTITY-REFERENCE + +(defmethod initialize-instance :after ((instance entity-reference) &key) + (let* ((owner (dom:owner-document instance)) + (entities (or (entities owner) xml::*entities*)) + (children (xml::resolve-entity (dom:name instance) entities))) + (setf (slot-value instance 'children) + (make-node-list + (map 'vector + (lambda (node) (dom:import-node owner node t)) + children)))) + (labels ((walk (n) + (setf (slot-value n 'read-only-p) t) + (when (dom:element-p n) + (map nil #'walk (dom:items (dom:attributes n)))) + (map nil #'walk (dom:child-nodes n)))) + (walk instance))) + +;;; PROCESSING-INSTRUCTION + +(defmethod (setf dom:data) (newval (self processing-instruction)) + (assert-writeable self) + (setf newval (rod newval)) + (setf (slot-value self 'data) newval)) + +;; das koennte man auch mit einer GF machen +(defun can-adopt-p (parent child) + (member (dom:node-type child) + (let ((default '(:element :processing-instruction :comment :text + :cdata-section :entity-reference))) + (etypecase parent + (document + '(:element :processing-instruction :comment :document-type)) + (document-fragment default) + (document-type nil) + (entity-reference default) + (element default) + (attribute '(:text :entity-reference)) + (processing-instruction nil) + (comment nil) + (text nil) + (cdata-section nil) + (entity default) + (notation nil))))) + + +;;; predicates + +(defmethod dom:node-p ((object node)) t) +(defmethod dom:node-p ((object t)) nil) + +(defmethod dom:document-p ((object document)) t) +(defmethod dom:document-p ((object t)) nil) + +(defmethod dom:document-fragment-p ((object document-fragment)) t) +(defmethod dom:document-fragment-p ((object t)) nil) + +(defmethod dom:character-data-p ((object character-data)) t) +(defmethod dom:character-data-p ((object t)) nil) + +(defmethod dom:attribute-p ((object attribute)) t) +(defmethod dom:attribute-p ((object t)) nil) + +(defmethod dom:element-p ((object element)) t) +(defmethod dom:element-p ((object t)) nil) + +(defmethod dom:text-node-p ((object text)) t) +(defmethod dom:text-node-p ((object t)) nil) + +(defmethod dom:comment-p ((object comment)) t) +(defmethod dom:comment-p ((object t)) nil) + +(defmethod dom:cdata-section-p ((object cdata-section)) t) +(defmethod dom:cdata-section-p ((object t)) nil) + +(defmethod dom:document-type-p ((object document-type)) t) +(defmethod dom:document-type-p ((object t)) nil) + +(defmethod dom:notation-p ((object notation)) t) +(defmethod dom:notation-p ((object t)) nil) + +(defmethod dom:entity-p ((object entity)) t) +(defmethod dom:entity-p ((object t)) nil) + +(defmethod dom:entity-reference-p ((object entity-reference)) t) +(defmethod dom:entity-reference-p ((object t)) nil) + +(defmethod dom:processing-instruction-p ((object processing-instruction)) t) +(defmethod dom:processing-instruction-p ((object t)) nil) + +(defmethod dom:named-node-map-p ((object named-node-map)) t) +(defmethod dom:named-node-map-p ((object t)) nil) + + +;;; IMPORT-NODE + +(defvar *clone-not-import* nil) ;not beautiful, I know. See below. + +(defmethod import-node-internal (class document node deep &rest initargs) + (let ((result (apply #'make-instance class :owner document initargs))) + (when deep + (dovector (child (dom:child-nodes node)) + (dom:append-child result (dom:import-node document child t)))) + result)) + +(defmethod dom:import-node ((document document) (node attribute) deep) + (declare (ignore deep)) + (import-node-internal 'attribute document node t :name (dom:name node))) + +(defmethod dom:import-node ((document document) (node document-fragment) deep) + (import-node-internal 'document-fragment document node deep)) + +(defmethod dom:import-node ((document document) (node element) deep) + (let* ((attributes (make-instance 'named-node-map + :element-type :attribute + :owner document)) + (result (import-node-internal 'element document node deep + :attributes attributes + :tag-name (dom:tag-name node)))) + (dolist (attribute (dom:items (dom:attributes node))) + (when (or (dom:specified attribute) *clone-not-import*) + (dom:set-attribute result (dom:name attribute) (dom:value attribute)))) + result)) + +(defmethod dom:import-node ((document document) (node entity) deep) + (import-node-internal 'entity document node deep + :public-id (dom:public-id node) + :system-id (dom:system-id node) + :notation-name (dom:notation-name node))) + +(defmethod dom:import-node ((document document) (node entity-reference) deep) + (declare (ignore deep)) + #+(or) + (import-node-internal 'entity-reference document node nil + :name (dom:name node)) + ;; XXX If the document being imported into provides a definition for + ;; this entity name, its value is assigned. + (dom-error :NOT_SUPPORTED_ERR "not implemented")) + +(defmethod dom:import-node ((document document) (node notation) deep) + (import-node-internal 'notation document node deep + :name (dom:name node) + :public-id (dom:public-id node) + :system-id (dom:system-id node))) + +(defmethod dom:import-node + ((document document) (node processing-instruction) deep) + (import-node-internal 'processing-instruction document node deep + :target (dom:target node) + :data (dom:data node))) + +;; TEXT_NODE, CDATA_SECTION_NODE, COMMENT_NODE +(defmethod dom:import-node + ((document document) (node character-data) deep) + (import-node-internal (class-of node) document node deep + :data (copy-seq (dom:data node)))) + +;;; CLONE-NODE +;;; +;;; As far as I can tell, cloneNode is the same as importNode, except +;;; for one difference involving element attributes: importNode imports +;;; only specified attributes, cloneNode copies even default values. +;;; +;;; Since I don't want to reimplement all of importNode here, we run +;;; importNode with a special flag... + +(defmethod dom:clone-node ((node node) deep) + (let ((*clone-not-import* t)) + (dom:import-node (dom:owner-document node) node deep))) diff --git a/dom/dom-sax.lisp b/dom/dom-sax.lisp new file mode 100644 index 0000000..75a90e6 --- /dev/null +++ b/dom/dom-sax.lisp @@ -0,0 +1,60 @@ +(in-package :dom-impl) + +(defun dom:map-document + (handler document + &key (include-xmlns-attributes sax:*include-xmlns-attributes*) + include-default-values) + (sax:start-document handler) + (let ((doctype (dom:doctype document))) + (when doctype + (sax:start-dtd handler (dom:name doctype) nil nil) + (let ((ns (dom:notations doctype))) + (dotimes (k (dom:length ns)) + (let ((n (dom:item ns k))) + (sax:notation-declaration handler + (dom:name n) + (dom:public-id n) + (dom:system-id n))) + ;; fixme: entities! + ) + (sax:end-dtd handler)))) + (labels ((walk (node) + (dom:do-node-list (child (dom:child-nodes node)) + (ecase (dom:node-type child) + (:element + ;; fixme: namespaces + (let ((attlist + (compute-attributes child + include-xmlns-attributes + include-default-values)) + (lname (dom:tag-name child)) + (qname (dom:tag-name child))) + (sax:start-element handler nil lname qname attlist) + (walk child) + (sax:end-element handler nil lname qname))) + (:cdata-section + (sax:start-cdata handler) + (sax:characters handler (dom:data child)) + (sax:end-cdata handler)) + (:text + (sax:characters handler (dom:data child))) + (:comment + (sax:comment handler (dom:data child))) + (:processing-instruction + (sax:processing-instruction handler + (dom:target child) + (dom:data child))))))) + (walk document)) + (sax:end-document handler)) + +(defun compute-attributes (element xmlnsp defaultp) + (let ((results '())) + (dom:do-node-list (a (dom:attributes element)) + (when (and (or defaultp (dom:specified a)) + (or xmlnsp (not (cxml::xmlns-attr-p (dom:name a))))) + (push + (cxml::make-attribute :qname (dom:name a) + :value (dom:value a) + :specified-p (dom:specified a)) + results))) + (reverse results))) diff --git a/dom/package.lisp b/dom/package.lisp new file mode 100644 index 0000000..971ed0c --- /dev/null +++ b/dom/package.lisp @@ -0,0 +1,111 @@ +(in-package :cl-user) + +(defpackage :dom + (:use) + (:export + + ;; lisp-specific extensions + #:make-dom-builder + + ;; methods + #:has-feature + #:doctype + #:implementation + #:document-element + #:create-element + #:create-document-fragment + #:create-text-node + #:create-comment + #:create-cdata-section + #:create-processing-instruction + #:create-attribute + #:create-entity-reference + #:get-elements-by-tag-name + #:node-name + #:node-value + #:node-type + #:parent-node + #:child-nodes + #:first-child + #:last-child + #:previous-sibling + #:next-sibling + #:attributes + #:owner-document + #:insert-before + #:replace-child + #:remove-child + #:append-child + #:has-child-nodes + #:clone-node + #:item + #:length + #:get-named-item + #:set-named-item + #:remove-named-item + #:data + #:substring-data + #:append-data + #:insert-data + #:delete-data + #:replace-data + #:name + #:specified + #:value + #:tag-name + #:get-attribute + #:set-attribute + #:remove-attribute + #:get-attribute-node + #:set-attribute-node + #:remove-attribute-node + #:normalize + #:split-text + #:entities + #:notations + #:public-id + #:system-id + #:notation-name + #:target + #:import-node + #:code + + ;; protocol classes + #:dom-implementation + #:document-fragment + #:document + #:node + #:node-list + #:named-node-map + #:character-data + #:attr + #:element + #:text + #:comment + #:cdata-section + #:document-type + #:notation + #:entity + #:entity-reference + #:processing-instruction + ;; + #:items + ;; + #:node-p + #:document-p + #:document-fragment-p + #:character-data-p + #:attribute-p + #:element-p + #:text-node-p + #:comment-p + #:cdata-section-p + #:document-type-p + #:notation-p + #:entity-p + #:entity-reference-p + #:processing-instruction-p + #:named-node-map-p + + #:map-node-list + #:do-node-list)) diff --git a/dom/simple-dom.lisp b/dom/simple-dom.lisp new file mode 100644 index 0000000..5de8215 --- /dev/null +++ b/dom/simple-dom.lisp @@ -0,0 +1,46 @@ +(in-package :xml) + +;;; Implementation of a simple but faster DOM. + +(defclass simple-document () + ((children :initform nil :accessor simple-document-children))) + +(defstruct node + parent) + +(defstruct (processing-instruction (:include node)) + target + data) + +(defstruct (text (:include node) + (:constructor make-text-boa (parent data))) + data) + +(defstruct (element (:include node)) + gi + attributes + children) + +(defmethod dom:create-processing-instruction ((document simple-document) target data) + (make-processing-instruction :target target :data data)) + +(defmethod dom:append-child ((node element) child) + (setf (node-parent child) node) + (push child (element-children node))) + +(defmethod dom:append-child ((node simple-document) child) + (push child (simple-document-children node)) + nil) + +(defmethod dom:create-element ((document simple-document) name) + (make-element :gi name)) + +(defmethod dom:set-attribute ((node element) name value) + (push (cons name value) + (element-attributes node))) + +(defmethod dom:create-text-node ((document simple-document) data) + (make-text-boa nil data)) + +(defmethod dom:create-cdata-section ((document simple-document) data) + (make-text-boa nil data)) diff --git a/dom/string-dom.lisp b/dom/string-dom.lisp new file mode 100644 index 0000000..0830cb9 --- /dev/null +++ b/dom/string-dom.lisp @@ -0,0 +1,66 @@ +;;; A wrapper package STRING-DOM around the ordinary DOM presents +;;; DOMString as Lisp STRING. This was a workaround until +;;; RUNE-IS-CHARACTER was implemented, but might still be useful on +;;; Lisps without Unicode support. + +(defpackage :string-dom + (:use)) + +(eval-when (:compile-toplevel :load-toplevel :execute) + (do-external-symbols (var :dom) + (let* ((home-package + (if (member var '(dom:data dom:name dom:value dom:tag-name + dom:node-name dom:node-value + dom:substring-data dom:get-attribute + dom:set-attribute dom:public-id dom:system-id + dom:notation-name dom:target)) + :string-dom + :dom)) + (symbol (intern (symbol-name var) home-package))) + (import symbol :string-dom) + (export (list symbol) :string-dom)))) + +(defpackage :string-dom-impl (:use :cl)) +(in-package :string-dom-impl) + +(defun rod-to-string (frob) + (if (null frob) + nil + (map 'string #'code-char frob))) + +(defun string-dom:data (node) (rod-to-string (dom:data node))) +(defun string-dom:name (node) (rod-to-string (dom:name node))) +(defun string-dom:value (node) (rod-to-string (dom:value node))) +(defun string-dom:tag-name (node) (rod-to-string (dom:tag-name node))) +(defun string-dom:node-name (node) (rod-to-string (dom:node-name node))) +(defun string-dom:node-value (node) (rod-to-string (dom:node-value node))) + +(defun (setf string-dom:data) (newval node) + (setf (dom:data node) newval)) + +(defun (setf string-dom:value) (newval node) + (setf (dom:value node) newval)) + +(defun (setf string-dom:node-value) (newval node) + (setf (dom:node-value node) newval)) + +(defun string-dom:substring-data (node offset count) + (rod-to-string (dom:substring-data node offset count))) + +(defun string-dom:get-attribute (elt name) + (rod-to-string (dom:get-attribute elt name))) + +(defun string-dom:set-attribute (elt name value) + (dom:set-attribute elt (runes:rod name) (runes:rod value))) + +(defun string-dom:public-id (node) + (rod-to-string (dom:public-id node))) + +(defun string-dom:system-id (node) + (rod-to-string (dom:system-id node))) + +(defun string-dom:notation-name (node) + (rod-to-string (dom:notation-name node))) + +(defun string-dom:target (node) + (rod-to-string (dom:target node))) diff --git a/dom/unparse.lisp b/dom/unparse.lisp new file mode 100644 index 0000000..d39a58f --- /dev/null +++ b/dom/unparse.lisp @@ -0,0 +1,9 @@ +(in-package :cxml) + +(defun unparse-document-to-octets (doc &rest initargs) + (let ((sink (apply #'make-octet-vector-sink initargs))) + (dom:map-document sink doc :include-default-values t))) + +(defun unparse-document (doc character-stream &rest initargs) + (let ((sink (apply #'make-character-stream-sink character-stream initargs))) + (dom:map-document sink doc :include-default-values t))) diff --git a/dom/xml-canonic.lisp b/dom/xml-canonic.lisp new file mode 100644 index 0000000..f5908f6 --- /dev/null +++ b/dom/xml-canonic.lisp @@ -0,0 +1,161 @@ +;;; -*- Mode: Lisp; Syntax: Common-Lisp; Package: XML; readtable: runes; Encoding: utf-8; -*- +;;; --------------------------------------------------------------------------- +;;; Title: Dump canonic XML according to J.Clark +;;; Created: 1999-09-09 +;;; Author: Gilbert Baumann +;;; License: LGPL (See file COPYING for details). +;;; --------------------------------------------------------------------------- +;;; © copyright 1999 by Gilbert Baumann + +;;; This library is free software; you can redistribute it and/or +;;; modify it under the terms of the GNU Library General Public +;;; License as published by the Free Software Foundation; either +;;; version 2 of the License, or (at your option) any later version. +;;; +;;; This library is distributed in the hope that it will be useful, +;;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;;; Library General Public License for more details. +;;; +;;; You should have received a copy of the GNU Library General Public +;;; License along with this library; if not, write to the +;;; Free Software Foundation, Inc., 59 Temple Place - Suite 330, +;;; Boston, MA 02111-1307 USA. + +(in-package :xml) + +;; +;; | Canonical XML +;; | ============= +;; | +;; | This document defines a subset of XML called canonical XML. The +;; | intended use of canonical XML is in testing XML processors, as a +;; | representation of the result of parsing an XML document. +;; | +;; | Every well-formed XML document has a unique structurally equivalent +;; | canonical XML document. Two structurally equivalent XML documents have +;; | a byte-for-byte identical canonical XML document. Canonicalizing an +;; | XML document requires only information that an XML processor is +;; | required to make available to an application. +;; | +;; | A canonical XML document conforms to the following grammar: +;; | +;; | CanonXML ::= Pi* element Pi* +;; | element ::= Stag (Datachar | Pi | element)* Etag +;; | Stag ::= '<' Name Atts '>' +;; | Etag ::= '' +;; | Pi ::= '' Char*)) '?>' +;; | Atts ::= (' ' Name '=' '"' Datachar* '"')* +;; | Datachar ::= '&' | '<' | '>' | '"' +;; | | ' '| ' '| ' ' +;; | | (Char - ('&' | '<' | '>' | '"' | #x9 | #xA | #xD)) +;; | Name ::= (see XML spec) +;; | Char ::= (see XML spec) +;; | S ::= (see XML spec) +;; | +;; | Attributes are in lexicographical order (in Unicode bit order). +;; | +;; | A canonical XML document is encoded in UTF-8. +;; | +;; | Ignorable white space is considered significant and is treated +;; | equivalently to data. +;; +;; -- James Clark (jjc@jclark.com) + +(defvar *quux*) ;!!!BIG HACK!!! + +(defun unparse-document (doc sink) + (map nil (rcurry #'unparse-node sink) (dom:child-nodes doc))) + +(defun unparse-node (node sink) + (cond ((dom:element-p node) + (write-rune #/< sink) + (write-rod (dom:tag-name node) sink) + ;; atts + (let ((atts (sort (copy-list (dom:items (dom:attributes node))) + #'rod< :key #'dom:name))) + (dolist (a atts) + (write-rune #/space sink) + (write-rod (dom:name a) sink) + (write-rune #/= sink) + (write-rune #/\" sink) + (let ((*quux* nil)) + (map nil (lambda (c) (unparse-datachar c sink)) (dom:value a))) + (write-rune #/\" sink))) + (write-rod '#.(string-rod ">") sink) + (dom:do-node-list (k (dom:child-nodes node)) + (unparse-node k sink)) + (write-rod '#.(string-rod "") sink)) + ((dom:processing-instruction-p node) + (unless (rod-equal (dom:target node) '#.(string-rod "xml")) + (write-rod '#.(string-rod "") sink) )) + ((dom:text-node-p node) + (let ((*quux* nil)) + (map nil (lambda (c) (unparse-datachar c sink)) + (dom:data node)))) + ((dom:comment-p node)) + (t + (error "Oops in unparse: ~S." node)))) + +(defun unparse-datachar (c sink) + (cond ((rune= c #/&) (write-rod '#.(string-rod "&") sink)) + ((rune= c #/<) (write-rod '#.(string-rod "<") sink)) + ((rune= c #/>) (write-rod '#.(string-rod ">") sink)) + ((rune= c #/\") (write-rod '#.(string-rod """) sink)) + ((rune= c #/U+0009) (write-rod '#.(string-rod " ") sink)) + ((rune= c #/U+000A) (write-rod '#.(string-rod " ") sink)) + ((rune= c #/U+000D) (write-rod '#.(string-rod " ") sink)) + (t + (write-rune c sink)))) + +(defun write-rod (rod sink) + (let ((*quux* nil)) + (map nil (lambda (c) (write-rune c sink)) rod))) + +(defun write-rune (rune sink) + (let ((code (rune-code rune))) + (cond ((<= #xD800 code #xDBFF) + (setf *quux* code)) + ((<= #xDC00 code #xDFFF) + (let ((q (logior (ash (- *quux* #xD7C0) 10) (- code #xDC00)))) + (write-rune-0 q sink)) + (setf *quux* nil)) + (t + (write-rune-0 code sink))))) + +(defun write-rune-0 (code sink) + (labels ((wr (x) + (write-char (code-char x) sink))) + (cond ((<= #x00000000 code #x0000007F) + (wr code)) + ((<= #x00000080 code #x000007FF) + (wr (logior #b11000000 (ldb (byte 5 6) code))) + (wr (logior #b10000000 (ldb (byte 6 0) code)))) + ((<= #x00000800 code #x0000FFFF) + (wr (logior #b11100000 (ldb (byte 4 12) code))) + (wr (logior #b10000000 (ldb (byte 6 6) code))) + (wr (logior #b10000000 (ldb (byte 6 0) code)))) + ((<= #x00010000 code #x001FFFFF) + (wr (logior #b11110000 (ldb (byte 3 18) code))) + (wr (logior #b10000000 (ldb (byte 6 12) code))) + (wr (logior #b10000000 (ldb (byte 6 6) code))) + (wr (logior #b10000000 (ldb (byte 6 0) code)))) + ((<= #x00200000 code #x03FFFFFF) + (wr (logior #b11111000 (ldb (byte 2 24) code))) + (wr (logior #b10000000 (ldb (byte 6 18) code))) + (wr (logior #b10000000 (ldb (byte 6 12) code))) + (wr (logior #b10000000 (ldb (byte 6 6) code))) + (wr (logior #b10000000 (ldb (byte 6 0) code)))) + ((<= #x04000000 code #x7FFFFFFF) + (wr (logior #b11111100 (ldb (byte 1 30) code))) + (wr (logior #b10000000 (ldb (byte 6 24) code))) + (wr (logior #b10000000 (ldb (byte 6 18) code))) + (wr (logior #b10000000 (ldb (byte 6 12) code))) + (wr (logior #b10000000 (ldb (byte 6 6) code))) + (wr (logior #b10000000 (ldb (byte 6 0) code))))))) diff --git a/domtest.lisp b/domtest.lisp new file mode 100644 index 0000000..c9fc910 --- /dev/null +++ b/domtest.lisp @@ -0,0 +1,433 @@ +#+(or) +(defpackage :domtest + (:use :cl :xml) + (:alias (:string-dom :dom))) +(defpackage :domtest-tests + (:use)) +(in-package :domtest) + +(defparameter *directory* "~/src/2001/DOM-Test-Suite/") + + +;;;; allgemeine Hilfsfunktionen + +(defmacro string-case (keyform &rest clauses) + (let ((key (gensym "key"))) + `(let ((,key ,keyform)) + (declare (ignorable ,key)) + (cond + ,@(loop + for (keys . forms) in clauses + for test = (etypecase keys + (string `(string= ,key ,keys)) + (sequence `(find ,key ,keys :test 'string=)) + ((eql t) t)) + collect + `(,test ,@forms)))))) + +(defun rcurry (function &rest args) + (lambda (&rest more-args) + (apply function (append more-args args)))) + +(defmacro for ((&rest clauses) &rest body-forms) + `(%for ,clauses (progn ,@body-forms))) + +(defmacro for* ((&rest clauses) &rest body-forms) + `(%for* ,clauses (progn ,@body-forms))) + +(defmacro %for ((&rest clauses) body-form &rest finally-forms) + (for-aux 'for clauses body-form finally-forms)) + +(defmacro %for* ((&rest clauses) body-form &rest finally-forms) + (for-aux 'for* clauses body-form finally-forms)) + +(defmacro for-finish () + '(loop-finish)) + +(eval-when (:compile-toplevel :load-toplevel :execute) + (defun for-aux (kind clauses body-form finally-forms) + ` (loop ,@ (loop for firstp = t then nil + for %clauses = clauses then (rest %clauses) + for clause = (first %clauses) then (first %clauses) + while (and %clauses (listp clause)) + append (cons (ecase kind + (for (if firstp 'as 'and)) + (for* 'as)) + (if (= 2 (length clause)) + (list (first clause) '= (second clause)) + clause)) + into result + finally (return (append result %clauses))) + do (progn ,body-form) + finally (progn ,@finally-forms)))) + + +;;;; spezielle Hilfsfunktionen + +(defmacro with-attributes ((&rest attributes) element &body body) + (let ((e (gensym "element"))) + `(let* ((,e ,element) + ,@(mapcar (lambda (var) + `(,var (dom:get-attribute ,e ,(symbol-name var)))) + attributes)) + ,@body))) + +(defun map-child-elements (result-type fn element &key name) + (remove '#1=#:void + (map result-type + (lambda (node) + (if (and (eq (dom:node-type node) :element) + (or (null name) + (equal (dom:tag-name node) name))) + (funcall fn node) + '#1#)) + (dom:child-nodes element)))) + +(defmacro do-child-elements ((var element &key name) &body body) + `(block nil + (map-child-elements nil (lambda (,var) ,@body) ,element :name ,name))) + +(defun find-child-element (name element) + (do-child-elements (child element :name name) + (return child))) + +(defun %intern (name) + (intern name :domtest-tests)) + +(defun replace-studly-caps (str) + ;; s/([A-Z][a-z])/-\1/ + (with-output-to-string (out) + (with-input-from-string (in str) + (for ((first = t :then nil) + (c = (read-char in nil nil)) + (next = (peek-char nil in nil nil)) + :while c) + (when (and (not first) (upper-case-p c) next (lower-case-p next)) + (write-char #\- out)) + (write-char (char-downcase c) out))))) + +(defun intern-dom (name) + (intern (replace-studly-caps name) :dom)) + +(defun child-elements (element) + (map-child-elements 'list #'identity element)) + +(defun parse-java-literal (str) + (cond + ((null str) nil) ;? + ((equal str "true") + t) + ((equal str "false") + nil) + ((digit-char-p (char str 0)) + (parse-integer str)) + ((char= (char str 0) #\") + (let ((end (1- (length str)))) + (assert (char= (char str end) #\")) + (subseq str 1 end))) + (t + (%intern str)))) + +(defmacro maybe-setf (place form) + (if place + `(setf ,place ,form) + form)) + + +;;;; dom1-interfaces.xml auslesen + +(defvar *methods* '()) +(defvar *fields* '()) + +(defun read-members (&optional (directory *directory*)) + (let* ((pathname (merge-pathnames "patches/dom1-interfaces.xml" directory)) + (library (dom:document-element (xml:parse-file pathname))) + (methods '()) + (fields '())) + (do-child-elements (interface library :name "interface") + (do-child-elements (method interface :name "method") + (let ((parameters (find-child-element "parameters" method))) + (push (cons (dom:get-attribute method "name") + (map-child-elements 'list + (rcurry #'dom:get-attribute "name") + parameters + :name "param")) + methods))) + (do-child-elements (attribute interface :name "attribute") + (push (dom:get-attribute attribute "name") fields))) + (values methods fields))) + + +;;;; Conditions uebersetzen + +(defun translate-condition (element) + (string-case (dom:tag-name element) + ("equals" (translate-equals element)) + ("contentType" (translate-content-type element)) + ("implementationAttribute" (assert-have-implementation-attribute element)) + ("isNull" (translate-is-null element)) + ("not" (translate-is-null element)) + ("notNull" (translate-not-null element)) + ("same" (translate-same element)) + (t (error "unknown condition: ~A" element)))) + +(defun translate-equals (element) + (with-attributes (|actual| |expected| |ignoreCase|) element + `(,(if (parse-java-literal |ignoreCase|) 'string-equal 'string=) + ,(%intern actual) + ,(parse-java-literal expected)))) + +(defun translate-same (element) + (with-attributes (|actual| |expected|) element + `(eql ,(%intern actual) ,(parse-java-literal expected)))) + +(defun translate-instance-of (element) + (with-attributes (|obj| |type|) element + `(typep ,(%intern |obj|) ,(intern-dom |type|)))) + +(defun translate-is-null (element) + (with-attributes (|obj|) element + `(null ,(%intern |obj|)))) + +(defun translate-not-null (element) + (with-attributes (|obj|) element + (%intern |obj|))) + +(defun translate-content-type (element) ;XXX verstehe ich nicht + (with-attributes (|type|) element + `(equal ,(parse-java-literal |type|) "text/xml"))) + +(defun translate-uri-equals (element) + (with-attributes + (|actual| + |scheme| |path| |host| |file| |name| |query| |fragment| |isAbsolute|) + element + |isAbsolute| + `(let ((uri ,(%intern |actual|))) + (and (string-equalp ,|scheme| (net.uri:uri-scheme uri)) + (equal ,|host| (net.uri:uri-host uri)) + (equal ,|path| (net.uri:uri-path uri)) + (equal ,|file| "???") + (equal ,|name| "???") + (equal ,|query| (net.uri:uri-query uri)) + (equal ,|fragment| (net.uri:uri-fragment uri)) + ;; isabsolute + nil)))) + + +;;;; Statements uebersetzen + +(defun translate-statement (element) + (string-case (dom:tag-name element) + ("append" (translate-append element)) + ("assertDOMException" (translate-assert-domexception element)) + ("assertEquals" (translate-assert-equals element)) + ("assertNotNull" (translate-assert-not-null element)) + ("assertInstanceOf" (translate-assert-instance-of element)) + ("assertNull" (translate-assert-null element)) + ("assertSame" (translate-assert-same element)) + ("assertSize" (translate-assert-size element)) + ("assertTrue" (translate-assert-true element)) + ("assertFalse" (translate-assert-true element)) + ("assertURIEquals" (translate-assert-uri-equals element)) + ("for-each" (translate-for-each element)) + ("fail" (translate-fail element)) + ("if" (translate-if element)) + ("increment" (translate-unary-assignment '+ element)) + ("decrement" (translate-unary-assignment '- element)) + ("load" (translate-load element)) + ("plus" (translate-binary-assignment '+ element)) + ("try" (translate-try element)) + ("while" (translate-while element)) + (t (translate-member element)))) + +(defun translate-binary-assignment (fn element) + (with-attributes (|var| |op1| |op2|) element + `(maybe-setf ,(%intern |var|) (,fn ,(%intern |op1|) ,(%intern |op2|))))) + +(defun translate-unary-assignment (fn element) + (with-attributes (|var| |value|) element + `(maybe-setf ,(%intern |var|) + (,fn ,(%intern |var|) ,(parse-java-literal |value|))))) + +(defun translate-load (load) + (with-attributes (|var| |href| |willBeModified|) load + `(maybe-setf ,(%intern |var|) + (load-file ,|href| ,(parse-java-literal |willBeModified|))))) + +(defun translate-call (call method) + (let ((name (car method)) + (args (mapcar (lambda (name) + (parse-java-literal (dom:get-attribute call name))) + (cdr method)))) + (with-attributes (|var| |obj|) call + `(maybe-setf ,(%intern |var|) (,(intern-dom name) ,|obj| ,@args))))) + +(defun translate-get (call name) + (with-attributes (|var| |obj|) call + `(maybe-setf ,(%intern |var|) (,(intern-dom name) ,|obj|)))) + +(defun translate-fail (element) + (declare (ignore element)) + `(error "failed")) + +(defun translate-member (element) + (let* ((name (dom:tag-name element)) + (method (find name *methods* :key #'car :test #'equal)) + (field (find name *fields* :test #'equal))) + (cond + (method (translate-call element method)) + (field (translate-get element field)) + (t (error "unknown element ~A" element))))) + +(defun translate-assert-equals (element) + `(assert ,(translate-equals element))) + +(defun translate-assert-same (element) + `(assert ,(translate-same element))) + +(defun translate-assert-null (element) + (with-attributes (|actual|) element + `(assert (null ,(%intern |actual|))))) + +(defun translate-assert-not-null (element) + (with-attributes (|actual|) element + `(assert ,(%intern |actual|)))) + +(defun translate-assert-size (element) + (with-attributes (|collection| |size|) element + `(assert (eql (length ,(%intern |collection|)) ,(%intern |size|))))) + +(defun translate-assert-instance-of (element) + `(assert ,(translate-instance-of element))) + +(defun translate-if (element) + (destructuring-bind (condition &rest rest) + (child-elements element) + (let (then else) + (dolist (r rest) + (when (equal (dom:tag-name r) "else") + (setf else (child-elements r)) + (return)) + (push r then)) + `(cond + (,(translate-condition condition) + ,@(mapcar #'translate-statement (reverse then))) + (t + ,@(mapcar #'translate-statement else)))))) + +(defun translate-while (element) + (destructuring-bind (condition &rest body) + (child-elements element) + `(loop + while ,(translate-condition condition) + do (progn ,@(mapcar #'translate-statement body))))) + +(defun translate-assert-domexception (element) + (do-child-elements (c element) + (unless (equal (dom:tag-name c) "metadata") + (return + `(progn + ,@(translate-body c) + ;; XXX haben noch keine Exceptions + (error "expected exception ~A" (dom:tag-name element))))))) + +(defun translate-try (element) + (map-child-elements 'list + (lambda (c) + (if (equal (dom:tag-name c) "catch") + nil + (translate-statement c))) + element) + ;; XXX haben noch keine Exceptions + ) + +(defun translate-append (element) + (with-attributes (|collection| |item|) element + (let ((c (%intern |collection|)) + (i (%intern |item|))) + `(maybe-setf ,c (append ,c (list ,i)))))) + +(defun translate-assert-true (element) + (with-attributes (|actual|) element + `(assert ,(%intern |actual|)))) + +(defun translate-assert-false (element) + (with-attributes (|actual|) element + `(assert (not ,(%intern |actual|))))) + +(defun translate-assert-uri-equals (element) + `(assert ,(translate-uri-equals element))) + + +;;;; Tests uebersetzen + +(defun translate-body (element) + (map-child-elements 'list #'translate-statement element)) + +(defun translate-for-each (element) + (with-attributes (|collection| |member|) element + `(dolist (,(%intern |member|) ,(%intern |collection|)) + ,@(translate-body element)))) + +(defun test (name &optional (directory *directory*)) + (let* ((test-directory (merge-pathnames "tests/level1/core/" directory))) + (slurp-test + (make-pathname :name name :type "xml" :defaults test-directory)))) + +(defun assert-have-implementation-attribute (element) + (string-case (dom:get-attribute element "name") + (t + (warn "implementationAttribute ~A not supported, skipping test" + (dom:get-attribute element "name")) + (throw 'give-up nil)))) + +(defun slurp-test (pathname) + (unless *fields* + (multiple-value-setq (*methods* *fields*) (read-members *directory*))) + (catch 'give-up + (let* ((test (dom:document-element (xml:parse-file pathname))) + title + (variables '()) + (code '())) + (do-child-elements (e test) + (string-case (dom:tag-name e) + ("metadata" + (let ((title-element (find-child-element "title" e))) + (setf title (dom:data (dom:first-child title-element))))) + ("var" + (push (%intern (dom:get-attribute e "name")) variables)) + ("implementationAttribute" + (assert-have-implementation-attribute e)) + (t + (push (translate-statement e) code)))) + `(defun ,(%intern (concatenate 'string "test-" title)) () + (let (,@variables) + ,@(reverse code)))))) + +(defun test2 (&optional verbose) + (let* ((test-directory (merge-pathnames "tests/level1/core/" *directory*)) + (suite + (dom:document-element + (xml:parse-file (merge-pathnames "alltests.xml" test-directory)))) + (n 0) + (i 0)) + (do-child-elements (member suite) + (declare (ignore member)) + (incf n)) + (do-child-elements (member suite) + (let ((href (dom:get-attribute member "href"))) + (format t "~&~D/~D ~A~%" i n href) + (let ((lisp (slurp-test (merge-pathnames href test-directory)))) + (when verbose + (print lisp)))) + (incf i)))) + +#+(or) +(test "attrname") + +#+(or) +(read-methods) + +#+(or) +(test2) diff --git a/glisp/COPYING b/glisp/COPYING new file mode 100644 index 0000000..243648d --- /dev/null +++ b/glisp/COPYING @@ -0,0 +1,521 @@ +Preamble to the Gnu Lesser General Public License + +Copyright (c) 2000 Franz Incorporated, Berkeley, CA 94704 + +The concept of the GNU Lesser General Public License version 2.1 +("LGPL") has been adopted to govern the use and distribution of +above-mentioned application. However, the LGPL uses terminology that is +more appropriate for a program written in C than one written in +Lisp. Nevertheless, the LGPL can still be applied to a Lisp program if +certain clarifications are made. This document details those +clarifications. Accordingly, the license for the open-source Lisp +applications consists of this document plus the LGPL. Wherever there is +a conflict between this document and the LGPL, this document takes +precedence over the LGPL. + +A "Library" in Lisp is a collection of Lisp functions, data and foreign +modules. The form of the Library can be Lisp source code (for processing +by an interpreter) or object code (usually the result of compilation of +source code or built with some other mechanisms). Foreign modules are +object code in a form that can be linked into a Lisp executable. When we +speak of functions we do so in the most general way to include, in +addition, methods and unnamed functions. Lisp "data" is also a general +term that includes the data structures resulting from defining Lisp +classes. A Lisp application may include the same set of Lisp objects as +does a Library, but this does not mean that the application is +necessarily a "work based on the Library" it contains. + +The Library consists of everything in the distribution file set before +any modifications are made to the files. If any of the functions or +classes in the Library are redefined in other files, then those +redefinitions ARE considered a work based on the Library. If additional +methods are added to generic functions in the Library, those additional +methods are NOT considered a work based on the Library. If Library +classes are subclassed, these subclasses are NOT considered a work based +on the Library. If the Library is modified to explicitly call other +functions that are neither part of Lisp itself nor an available add-on +module to Lisp, then the functions called by the modified Library ARE +considered a work based on the Library. The goal is to ensure that the +Library will compile and run without getting undefined function errors. + +It is permitted to add proprietary source code to the Library, but it +must be done in a way such that the Library will still run without that +proprietary code present. Section 5 of the LGPL distinguishes between +the case of a library being dynamically linked at runtime and one being +statically linked at build time. Section 5 of the LGPL states that the +former results in an executable that is a "work that uses the Library." +Section 5 of the LGPL states that the latter results in one that is a +"derivative of the Library", which is therefore covered by the +LGPL. Since Lisp only offers one choice, which is to link the Library +into an executable at build time, we declare that, for the purpose +applying the LGPL to the Library, an executable that results from +linking a "work that uses the Library" with the Library is considered a +"work that uses the Library" and is therefore NOT covered by the LGPL. + +Because of this declaration, section 6 of LGPL is not applicable to the +Library. However, in connection with each distribution of this +executable, you must also deliver, in accordance with the terms and +conditions of the LGPL, the source code of Library (or your derivative +thereof) that is incorporated into this executable. + +End of Document +------------------------------------------------------------------------ + GNU LESSER GENERAL PUBLIC LICENSE + Version 2.1, February 1999 + + Copyright (C) 1991, 1999 Free Software Foundation, Inc. + 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + +[This is the first released version of the Lesser GPL. It also counts + as the successor of the GNU Library Public License, version 2, hence + the version number 2.1.] + + Preamble + + The licenses for most software are designed to take away your +freedom to share and change it. By contrast, the GNU General Public +Licenses are intended to guarantee your freedom to share and change +free software--to make sure the software is free for all its users. + + This license, the Lesser General Public License, applies to some +specially designated software packages--typically libraries--of the +Free Software Foundation and other authors who decide to use it. You +can use it too, but we suggest you first think carefully about whether +this license or the ordinary General Public License is the better +strategy to use in any particular case, based on the explanations below. + + When we speak of free software, we are referring to freedom of use, +not price. Our General Public Licenses are designed to make sure that +you have the freedom to distribute copies of free software (and charge +for this service if you wish); that you receive source code or can get +it if you want it; that you can change the software and use pieces of +it in new free programs; and that you are informed that you can do +these things. + + To protect your rights, we need to make restrictions that forbid +distributors to deny you these rights or to ask you to surrender these +rights. These restrictions translate to certain responsibilities for +you if you distribute copies of the library or if you modify it. + + For example, if you distribute copies of the library, whether gratis +or for a fee, you must give the recipients all the rights that we gave +you. You must make sure that they, too, receive or can get the source +code. If you link other code with the library, you must provide +complete object files to the recipients, so that they can relink them +with the library after making changes to the library and recompiling +it. And you must show them these terms so they know their rights. + + We protect your rights with a two-step method: (1) we copyright the +library, and (2) we offer you this license, which gives you legal +permission to copy, distribute and/or modify the library. + + To protect each distributor, we want to make it very clear that +there is no warranty for the free library. Also, if the library is +modified by someone else and passed on, the recipients should know +that what they have is not the original version, so that the original +author's reputation will not be affected by problems that might be +introduced by others. + + Finally, software patents pose a constant threat to the existence of +any free program. We wish to make sure that a company cannot +effectively restrict the users of a free program by obtaining a +restrictive license from a patent holder. Therefore, we insist that +any patent license obtained for a version of the library must be +consistent with the full freedom of use specified in this license. + + Most GNU software, including some libraries, is covered by the +ordinary GNU General Public License. This license, the GNU Lesser +General Public License, applies to certain designated libraries, and +is quite different from the ordinary General Public License. We use +this license for certain libraries in order to permit linking those +libraries into non-free programs. + + When a program is linked with a library, whether statically or using +a shared library, the combination of the two is legally speaking a +combined work, a derivative of the original library. The ordinary +General Public License therefore permits such linking only if the +entire combination fits its criteria of freedom. The Lesser General +Public License permits more lax criteria for linking other code with +the library. + + We call this license the "Lesser" General Public License because it +does Less to protect the user's freedom than the ordinary General +Public License. It also provides other free software developers Less +of an advantage over competing non-free programs. These disadvantages +are the reason we use the ordinary General Public License for many +libraries. However, the Lesser license provides advantages in certain +special circumstances. + + For example, on rare occasions, there may be a special need to +encourage the widest possible use of a certain library, so that it becomes +a de-facto standard. To achieve this, non-free programs must be +allowed to use the library. A more frequent case is that a free +library does the same job as widely used non-free libraries. In this +case, there is little to gain by limiting the free library to free +software only, so we use the Lesser General Public License. + + In other cases, permission to use a particular library in non-free +programs enables a greater number of people to use a large body of +free software. For example, permission to use the GNU C Library in +non-free programs enables many more people to use the whole GNU +operating system, as well as its variant, the GNU/Linux operating +system. + + Although the Lesser General Public License is Less protective of the +users' freedom, it does ensure that the user of a program that is +linked with the Library has the freedom and the wherewithal to run +that program using a modified version of the Library. + + The precise terms and conditions for copying, distribution and +modification follow. Pay close attention to the difference between a +"work based on the library" and a "work that uses the library". The +former contains code derived from the library, whereas the latter must +be combined with the library in order to run. + + GNU LESSER GENERAL PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. This License Agreement applies to any software library or other +program which contains a notice placed by the copyright holder or +other authorized party saying it may be distributed under the terms of +this Lesser General Public License (also called "this License"). +Each licensee is addressed as "you". + + A "library" means a collection of software functions and/or data +prepared so as to be conveniently linked with application programs +(which use some of those functions and data) to form executables. + + The "Library", below, refers to any such software library or work +which has been distributed under these terms. A "work based on the +Library" means either the Library or any derivative work under +copyright law: that is to say, a work containing the Library or a +portion of it, either verbatim or with modifications and/or translated +straightforwardly into another language. (Hereinafter, translation is +included without limitation in the term "modification".) + + "Source code" for a work means the preferred form of the work for +making modifications to it. For a library, complete source code means +all the source code for all modules it contains, plus any associated +interface definition files, plus the scripts used to control compilation +and installation of the library. + + Activities other than copying, distribution and modification are not +covered by this License; they are outside its scope. The act of +running a program using the Library is not restricted, and output from +such a program is covered only if its contents constitute a work based +on the Library (independent of the use of the Library in a tool for +writing it). Whether that is true depends on what the Library does +and what the program that uses the Library does. + + 1. You may copy and distribute verbatim copies of the Library's +complete source code as you receive it, in any medium, provided that +you conspicuously and appropriately publish on each copy an +appropriate copyright notice and disclaimer of warranty; keep intact +all the notices that refer to this License and to the absence of any +warranty; and distribute a copy of this License along with the +Library. + + You may charge a fee for the physical act of transferring a copy, +and you may at your option offer warranty protection in exchange for a +fee. + + 2. You may modify your copy or copies of the Library or any portion +of it, thus forming a work based on the Library, and copy and +distribute such modifications or work under the terms of Section 1 +above, provided that you also meet all of these conditions: + + a) The modified work must itself be a software library. + + b) You must cause the files modified to carry prominent notices + stating that you changed the files and the date of any change. + + c) You must cause the whole of the work to be licensed at no + charge to all third parties under the terms of this License. + + d) If a facility in the modified Library refers to a function or a + table of data to be supplied by an application program that uses + the facility, other than as an argument passed when the facility + is invoked, then you must make a good faith effort to ensure that, + in the event an application does not supply such function or + table, the facility still operates, and performs whatever part of + its purpose remains meaningful. + + (For example, a function in a library to compute square roots has + a purpose that is entirely well-defined independent of the + application. Therefore, Subsection 2d requires that any + application-supplied function or table used by this function must + be optional: if the application does not supply it, the square + root function must still compute square roots.) + +These requirements apply to the modified work as a whole. If +identifiable sections of that work are not derived from the Library, +and can be reasonably considered independent and separate works in +themselves, then this License, and its terms, do not apply to those +sections when you distribute them as separate works. But when you +distribute the same sections as part of a whole which is a work based +on the Library, the distribution of the whole must be on the terms of +this License, whose permissions for other licensees extend to the +entire whole, and thus to each and every part regardless of who wrote +it. + +Thus, it is not the intent of this section to claim rights or contest +your rights to work written entirely by you; rather, the intent is to +exercise the right to control the distribution of derivative or +collective works based on the Library. + +In addition, mere aggregation of another work not based on the Library +with the Library (or with a work based on the Library) on a volume of +a storage or distribution medium does not bring the other work under +the scope of this License. + + 3. You may opt to apply the terms of the ordinary GNU General Public +License instead of this License to a given copy of the Library. To do +this, you must alter all the notices that refer to this License, so +that they refer to the ordinary GNU General Public License, version 2, +instead of to this License. (If a newer version than version 2 of the +ordinary GNU General Public License has appeared, then you can specify +that version instead if you wish.) Do not make any other change in +these notices. + + Once this change is made in a given copy, it is irreversible for +that copy, so the ordinary GNU General Public License applies to all +subsequent copies and derivative works made from that copy. + + This option is useful when you wish to copy part of the code of +the Library into a program that is not a library. + + 4. You may copy and distribute the Library (or a portion or +derivative of it, under Section 2) in object code or executable form +under the terms of Sections 1 and 2 above provided that you accompany +it with the complete corresponding machine-readable source code, which +must be distributed under the terms of Sections 1 and 2 above on a +medium customarily used for software interchange. + + If distribution of object code is made by offering access to copy +from a designated place, then offering equivalent access to copy the +source code from the same place satisfies the requirement to +distribute the source code, even though third parties are not +compelled to copy the source along with the object code. + + 5. A program that contains no derivative of any portion of the +Library, but is designed to work with the Library by being compiled or +linked with it, is called a "work that uses the Library". Such a +work, in isolation, is not a derivative work of the Library, and +therefore falls outside the scope of this License. + + However, linking a "work that uses the Library" with the Library +creates an executable that is a derivative of the Library (because it +contains portions of the Library), rather than a "work that uses the +library". The executable is therefore covered by this License. +Section 6 states terms for distribution of such executables. + + When a "work that uses the Library" uses material from a header file +that is part of the Library, the object code for the work may be a +derivative work of the Library even though the source code is not. +Whether this is true is especially significant if the work can be +linked without the Library, or if the work is itself a library. The +threshold for this to be true is not precisely defined by law. + + If such an object file uses only numerical parameters, data +structure layouts and accessors, and small macros and small inline +functions (ten lines or less in length), then the use of the object +file is unrestricted, regardless of whether it is legally a derivative +work. (Executables containing this object code plus portions of the +Library will still fall under Section 6.) + + Otherwise, if the work is a derivative of the Library, you may +distribute the object code for the work under the terms of Section 6. +Any executables containing that work also fall under Section 6, +whether or not they are linked directly with the Library itself. + + 6. As an exception to the Sections above, you may also combine or +link a "work that uses the Library" with the Library to produce a +work containing portions of the Library, and distribute that work +under terms of your choice, provided that the terms permit +modification of the work for the customer's own use and reverse +engineering for debugging such modifications. + + You must give prominent notice with each copy of the work that the +Library is used in it and that the Library and its use are covered by +this License. You must supply a copy of this License. If the work +during execution displays copyright notices, you must include the +copyright notice for the Library among them, as well as a reference +directing the user to the copy of this License. Also, you must do one +of these things: + + a) Accompany the work with the complete corresponding + machine-readable source code for the Library including whatever + changes were used in the work (which must be distributed under + Sections 1 and 2 above); and, if the work is an executable linked + with the Library, with the complete machine-readable "work that + uses the Library", as object code and/or source code, so that the + user can modify the Library and then relink to produce a modified + executable containing the modified Library. (It is understood + that the user who changes the contents of definitions files in the + Library will not necessarily be able to recompile the application + to use the modified definitions.) + + b) Use a suitable shared library mechanism for linking with the + Library. A suitable mechanism is one that (1) uses at run time a + copy of the library already present on the user's computer system, + rather than copying library functions into the executable, and (2) + will operate properly with a modified version of the library, if + the user installs one, as long as the modified version is + interface-compatible with the version that the work was made with. + + c) Accompany the work with a written offer, valid for at + least three years, to give the same user the materials + specified in Subsection 6a, above, for a charge no more + than the cost of performing this distribution. + + d) If distribution of the work is made by offering access to copy + from a designated place, offer equivalent access to copy the above + specified materials from the same place. + + e) Verify that the user has already received a copy of these + materials or that you have already sent this user a copy. + + For an executable, the required form of the "work that uses the +Library" must include any data and utility programs needed for +reproducing the executable from it. However, as a special exception, +the materials to be distributed need not include anything that is +normally distributed (in either source or binary form) with the major +components (compiler, kernel, and so on) of the operating system on +which the executable runs, unless that component itself accompanies +the executable. + + It may happen that this requirement contradicts the license +restrictions of other proprietary libraries that do not normally +accompany the operating system. Such a contradiction means you cannot +use both them and the Library together in an executable that you +distribute. + + 7. You may place library facilities that are a work based on the +Library side-by-side in a single library together with other library +facilities not covered by this License, and distribute such a combined +library, provided that the separate distribution of the work based on +the Library and of the other library facilities is otherwise +permitted, and provided that you do these two things: + + a) Accompany the combined library with a copy of the same work + based on the Library, uncombined with any other library + facilities. This must be distributed under the terms of the + Sections above. + + b) Give prominent notice with the combined library of the fact + that part of it is a work based on the Library, and explaining + where to find the accompanying uncombined form of the same work. + + 8. You may not copy, modify, sublicense, link with, or distribute +the Library except as expressly provided under this License. Any +attempt otherwise to copy, modify, sublicense, link with, or +distribute the Library is void, and will automatically terminate your +rights under this License. However, parties who have received copies, +or rights, from you under this License will not have their licenses +terminated so long as such parties remain in full compliance. + + 9. You are not required to accept this License, since you have not +signed it. However, nothing else grants you permission to modify or +distribute the Library or its derivative works. These actions are +prohibited by law if you do not accept this License. Therefore, by +modifying or distributing the Library (or any work based on the +Library), you indicate your acceptance of this License to do so, and +all its terms and conditions for copying, distributing or modifying +the Library or works based on it. + + 10. Each time you redistribute the Library (or any work based on the +Library), the recipient automatically receives a license from the +original licensor to copy, distribute, link with or modify the Library +subject to these terms and conditions. You may not impose any further +restrictions on the recipients' exercise of the rights granted herein. +You are not responsible for enforcing compliance by third parties with +this License. + + 11. If, as a consequence of a court judgment or allegation of patent +infringement or for any other reason (not limited to patent issues), +conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot +distribute so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you +may not distribute the Library at all. For example, if a patent +license would not permit royalty-free redistribution of the Library by +all those who receive copies directly or indirectly through you, then +the only way you could satisfy both it and this License would be to +refrain entirely from distribution of the Library. + +If any portion of this section is held invalid or unenforceable under any +particular circumstance, the balance of the section is intended to apply, +and the section as a whole is intended to apply in other circumstances. + +It is not the purpose of this section to induce you to infringe any +patents or other property right claims or to contest validity of any +such claims; this section has the sole purpose of protecting the +integrity of the free software distribution system which is +implemented by public license practices. Many people have made +generous contributions to the wide range of software distributed +through that system in reliance on consistent application of that +system; it is up to the author/donor to decide if he or she is willing +to distribute software through any other system and a licensee cannot +impose that choice. + +This section is intended to make thoroughly clear what is believed to +be a consequence of the rest of this License. + + 12. If the distribution and/or use of the Library is restricted in +certain countries either by patents or by copyrighted interfaces, the +original copyright holder who places the Library under this License may add +an explicit geographical distribution limitation excluding those countries, +so that distribution is permitted only in or among countries not thus +excluded. In such case, this License incorporates the limitation as if +written in the body of this License. + + 13. The Free Software Foundation may publish revised and/or new +versions of the Lesser General Public License from time to time. +Such new versions will be similar in spirit to the present version, +but may differ in detail to address new problems or concerns. + +Each version is given a distinguishing version number. If the Library +specifies a version number of this License which applies to it and +"any later version", you have the option of following the terms and +conditions either of that version or of any later version published by +the Free Software Foundation. If the Library does not specify a +license version number, you may choose any version ever published by +the Free Software Foundation. + + 14. If you wish to incorporate parts of the Library into other free +programs whose distribution conditions are incompatible with these, +write to the author to ask for permission. For software which is +copyrighted by the Free Software Foundation, write to the Free +Software Foundation; we sometimes make exceptions for this. Our +decision will be guided by the two goals of preserving the free status +of all derivatives of our free software and of promoting the sharing +and reuse of software generally. + + NO WARRANTY + + 15. BECAUSE THE LIBRARY IS LICENSED FREE OF CHARGE, THERE IS NO +WARRANTY FOR THE LIBRARY, TO THE EXTENT PERMITTED BY APPLICABLE LAW. +EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR +OTHER PARTIES PROVIDE THE LIBRARY "AS IS" WITHOUT WARRANTY OF ANY +KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE +LIBRARY IS WITH YOU. SHOULD THE LIBRARY PROVE DEFECTIVE, YOU ASSUME +THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. + + 16. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN +WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY +AND/OR REDISTRIBUTE THE LIBRARY AS PERMITTED ABOVE, BE LIABLE TO YOU +FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR +CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE +LIBRARY (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING +RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A +FAILURE OF THE LIBRARY TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF +SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH +DAMAGES. + + END OF TERMS AND CONDITIONS + diff --git a/glisp/characters.lisp b/glisp/characters.lisp new file mode 100644 index 0000000..ace440e --- /dev/null +++ b/glisp/characters.lisp @@ -0,0 +1,132 @@ +;;; copyright (c) 2004 knowledgeTools Int. GmbH +;;; Author of this version: David Lichteblau +;;; +;;; License: LGPL (See file COPYING for details). +;;; +;;; derived from runes.lisp, (c) copyright 1998,1999 by Gilbert Baumann + +(in-package :glisp) + +(deftype rune () 'base-char) +(deftype rod () 'base-string) +(deftype simple-rod () 'simple-string) + +(defsubst rune (rod index) + (char rod index)) + +(defun (setf rune) (new rod index) + (setf (char rod index) new)) + +(defsubst %rune (rod index) + (aref (the simple-string rod) (the fixnum index))) + +(defsubst (setf %rune) (new rod index) + (setf (aref (the simple-string rod) (the fixnum index)) new)) + +(defun rod-capitalize (rod) + (string-upcase rod)) + +(defsubst code-rune (x) (code-char x)) +(defsubst rune-code (x) (char-code x)) + +(defsubst rune= (x y) + (char= x y)) + +(defun rune-downcase (rune) + (char-downcase rune)) + +(defsubst rune-upcase (rune) + (char-upcase rune)) + +(defun rune-upper-case-letter-p (rune) + (upper-case-p rune)) + +(defun rune-lower-case-letter-p (rune) + (lower-case-p rune)) + +(defun rune-equal (x y) + (char-equal x y)) + +(defun rod-downcase (rod) + (string-downcase rod)) + +(defun rod-upcase (rod) + (string-upcase rod)) + +(defsubst white-space-rune-p (char) + (or (char= char #\tab) + (char= char #.(code-char 10)) ;Linefeed + (char= char #.(code-char 13)) ;Carriage Return + (char= char #\space))) + +(defsubst digit-rune-p (char &optional (radix 10)) + (digit-char-p char radix)) + +(defun rod (x) + (cond + ((stringp x) x) + ((symbolp x) (string x)) + ((characterp x) (string x)) + ((vectorp x) (coerce x 'string)) + ((integerp x) (string (code-char x))) + (t (error "Cannot convert ~S to a ~S" x 'rod)))) + +(defun runep (x) + (characterp x)) + +(defun sloopy-rod-p (x) + (stringp x)) + +(defun rod= (x y) + (string= x y)) + +(defun rod-equal (x y) + (string-equal x y)) + +(defsubst make-rod (size) + (make-string size)) + +(defun char-rune (char) + char) + +(defun rune-char (rune &optional default) + (declare (ignore default)) + rune) + +(defun rod-string (rod &optional (default-char #\?)) + (declare (ignore default-char)) + rod) + +(defun string-rod (string) + string) + +;;;; + +(defun rune<= (rune &rest more-runes) + (loop + for (a b) on (cons rune more-runes) + while b + always (char<= a b))) + +(defun rune>= (rune &rest more-runes) + (loop + for (a b) on (cons rune more-runes) + while b + always (char>= a b))) + +(defun rodp (object) + (stringp object)) + +(defun really-rod-p (object) + (stringp object)) + +(defun rod-subseq (source start &optional (end (length source))) + (unless (stringp source) + (error "~S is not of type ~S." source 'rod)) + (subseq source start end)) + +(defun rod-subseq* (source start &optional (end (length source))) + (rod-subseq source start end)) + +(defun rod< (rod1 rod2) + (string< rod1 rod2)) diff --git a/glisp/dep-acl.lisp b/glisp/dep-acl.lisp new file mode 100644 index 0000000..5c953cf --- /dev/null +++ b/glisp/dep-acl.lisp @@ -0,0 +1,127 @@ +;;; -*- Mode: Lisp; Syntax: Common-Lisp; Package: GLISP; -*- +;;; --------------------------------------------------------------------------- +;;; Title: ACL-4.3 dependent stuff + fixups +;;; Created: 1999-05-25 22:33 +;;; Author: Gilbert Baumann +;;; License: GPL (See file COPYING for details). +;;; --------------------------------------------------------------------------- +;;; (c) copyright 1998,1999 by Gilbert Baumann + +;;; This program is free software; you can redistribute it and/or modify +;;; it under the terms of the GNU General Public License as published by +;;; the Free Software Foundation; either version 2 of the License, or +;;; (at your option) any later version. +;;; +;;; This program is distributed in the hope that it will be useful, +;;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;;; GNU General Public License for more details. +;;; +;;; You should have received a copy of the GNU General Public License +;;; along with this program; if not, write to the Free Software +;;; Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +(export 'glisp::read-byte-sequence :glisp) +(export 'glisp::read-char-sequence :glisp) +(export 'glisp::run-unix-shell-command :glisp) +(export 'glisp::mp/process-run-function :glisp) +(export 'glisp::mp/process-kill :glisp) +(export 'glisp::mp/seize-lock :glisp) +(export 'glisp::mp/release-lock :glisp) +(export 'glisp::mp/transfer-lock-owner :glisp) +(export 'glisp::mp/current-process :glisp) +(export 'glisp::mp/process-yield :glisp) +(export 'glisp::mp/process-wait :glisp) +(export 'glisp::getenv :glisp) + +(defun glisp::read-byte-sequence (&rest ap) + (apply #'read-sequence ap)) + +(defun glisp::read-char-sequence (&rest ap) + (apply #'read-sequence ap)) + +#+ALLEGRO-V5.0 +(defun glisp::open-inet-socket (hostname port) + (values + (socket:make-socket :remote-host hostname + :remote-port port + :format :binary) + :byte)) + +#-ALLEGRO-V5.0 +(defun glisp::open-inet-socket (hostname port) + (values + (ipc:open-network-stream :host hostname + :port port + :element-type '(unsigned-byte 8) + :class 'EXCL::BIDIRECTIONAL-BINARY-SOCKET-STREAM) + :byte)) + +#|| +(defun glisp::make-server-socket (port &key (element-type '(unsigned-byte 8))) +) +||# + +(defun glisp::mp/make-lock (&key name) + (mp:make-process-lock :name name)) + +(defmacro glisp::mp/with-lock ((lock) &body body) + `(mp:with-process-lock (,lock) + ,@body)) + +(defmacro glisp::with-timeout ((&rest options) &body body) + `(mp:with-timeout ,options . ,body)) + +(defun glisp::g/make-string (length &rest options) + (apply #'make-array length :element-type 'base-char options)) + +(defun glisp:run-unix-shell-command (cmd) + (excl:shell cmd)) + +(defun glisp:mp/process-run-function (name fn &rest args) + (apply #'mp:process-run-function name fn args)) + +(defun glisp:mp/process-kill (proc) + (mp:process-kill proc)) + +(defun glisp:mp/current-process () + sys:*current-process*) + +(defun glisp::mp/seize-lock (lock &key whostate) + whostate + (mp:process-lock lock)) + +(defun glisp::mp/transfer-lock-owner (lock old-process new-process) + (assert (eql (mp:process-lock-locker lock) old-process)) + (setf (mp:process-lock-locker lock) new-process) + ) + +(defun glisp::mp/release-lock (lock) + (mp:process-unlock lock)) + +(defun glisp::mp/process-yield (&optional process-to-run) + (mp:process-allow-schedule process-to-run)) + +(defun glisp::mp/process-wait (whostate predicate) + (mp:process-wait whostate predicate)) + +;; ACL is incapable to define compiler macros on (setf foo) +;; Unfortunately it is also incapable to declaim such functions inline. +;; So we revoke the DEFUN hack from dep-gcl here. + +(defmacro glisp::defsubst (fun args &body body) + (if (and (consp fun) (eq (car fun) 'setf)) + (let ((fnam (intern (concatenate 'string "(SETF " (symbol-name (cadr fun)) ")") + (symbol-package (cadr fun))))) + `(progn + (defsetf ,(cadr fun) (&rest ap) (new-value) (list* ',fnam new-value ap)) + (glisp::defsubst ,fnam ,args .,body))) + `(progn + (defun ,fun ,args .,body) + (define-compiler-macro ,fun (&rest .args.) + (cons '(lambda ,args .,body) + .args.))))) + + +(defun glisp::getenv (string) + (sys:getenv string)) diff --git a/glisp/dep-acl5.lisp b/glisp/dep-acl5.lisp new file mode 100644 index 0000000..1335ee4 --- /dev/null +++ b/glisp/dep-acl5.lisp @@ -0,0 +1,162 @@ +;;; -*- Mode: Lisp; Syntax: Common-Lisp; Package: CL-USER; Encoding: utf-8; -*- +;;; --------------------------------------------------------------------------- +;;; Title: ACL-5.0 dependent stuff + fixups +;;; Created: 1999-05-25 22:32 +;;; Author: Gilbert Baumann +;;; License: GPL (See file COPYING for details). +;;; --------------------------------------------------------------------------- +;;; (c) copyright 1998,1999 by Gilbert Baumann + +;;; This program is free software; you can redistribute it and/or modify +;;; it under the terms of the GNU General Public License as published by +;;; the Free Software Foundation; either version 2 of the License, or +;;; (at your option) any later version. +;;; +;;; This program is distributed in the hope that it will be useful, +;;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;;; GNU General Public License for more details. +;;; +;;; You should have received a copy of the GNU General Public License +;;; along with this program; if not, write to the Free Software +;;; Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +;;; Changes +;;; ======= + +;;; When Who What +;;; --------------------------------------------------------------------------- +;;; 2002-01-04 GB spend BLOCK for DEFSUBST +;;; 1999-08-31 SES Stig Erik Sandø +;;; +;;; Changed #+allegro-v5.0 to +;;; #+(and allegro-version>= (version>= 5)) +;;; + +(export 'glisp::read-byte-sequence :glisp) +(export 'glisp::read-char-sequence :glisp) +(export 'glisp::run-unix-shell-command :glisp) +(export 'glisp::mp/process-run-function :glisp) +(export 'glisp::mp/process-kill :glisp) +(export 'glisp::mp/current-process :glisp) +(export 'glisp::mp/seize-lock :glisp) +(export 'glisp::mp/release-lock :glisp) +(export 'glisp::mp/process-yield :glisp) +(export 'glisp::mp/process-wait :glisp) +(export 'glisp::getenv :glisp) + +(export 'glisp::make-server-socket :glisp) + +(defun glisp::mp/seize-lock (lock &key whostate) + whostate + (mp:process-lock lock)) + +(defun glisp::mp/release-lock (lock) + (mp:process-unlock lock)) + +(defun glisp::read-byte-sequence (&rest ap) + (apply #'read-sequence ap)) + +(defun glisp::read-char-sequence (&rest ap) + (apply #'read-sequence ap)) + +#+(and allegro-version>= (version>= 5)) +(defun glisp::open-inet-socket (hostname port) + (values + (socket:make-socket :remote-host hostname + :remote-port port + :format :binary) + :byte)) + +(defun glisp::make-server-socket (port &key (element-type '(unsigned-byte 8))) + (socket:make-socket :connect :passive + :local-port port + :format (cond ((subtypep element-type '(unsigned-byte 8)) + :binary) + ((subtypep element-type 'character) + :text) + (t + (error "Unknown element type: ~S." element-type))))) + +(defun glisp::accept-connection/low (socket) + (values + (socket:accept-connection socket :wait t) + :byte)) + + +#-(and allegro-version>= (version>= 5)) +(defun glisp::open-inet-socket (hostname port) + (values + (ipc:open-network-stream :host hostname + :port port + :element-type '(unsigned-byte 8) + :class 'EXCL::BIDIRECTIONAL-BINARY-SOCKET-STREAM) + :byte)) + +(defun glisp::mp/make-lock (&key name) + (mp:make-process-lock :name name)) + +(defmacro glisp::mp/with-lock ((lock) &body body) + `(mp:with-process-lock (,lock) + ,@body)) + +(defmacro glisp::with-timeout ((&rest options) &body body) + `(mp:with-timeout ,options . ,body)) + +(defun glisp::g/make-string (length &rest options) + (apply #'make-array length :element-type 'base-char options)) + +(defun glisp:run-unix-shell-command (cmd) + (excl:shell cmd)) + +(defparameter glisp::*inherited-vars* + '(*terminal-io* *standard-input* *standard-output* *error-output* *trace-output* *query-io* *debug-io*)) + +(defparameter glisp::*inherited-vars* nil) + +(defun glisp:mp/process-run-function (name fn &rest args) + (mp:process-run-function + name + (lambda (vars vals fn args) + (progv vars vals + (apply fn args))) + glisp::*inherited-vars* (mapcar #'symbol-value glisp::*inherited-vars*) + fn args)) + +(defun glisp:mp/current-process () + sys:*current-process*) + +(defun glisp::mp/process-yield (&optional process-to-run) + (mp:process-allow-schedule process-to-run)) + +(defun glisp::mp/process-wait (whostate predicate) + (mp:process-wait whostate predicate)) + +(defun glisp::mp/process-kill (proc) + (mp:process-kill proc)) + +;; ACL is incapable to define compiler macros on (setf foo) +;; Unfortunately it is also incapable to declaim such functions inline. +;; So we revoke the DEFUN hack from dep-gcl here. + +(defmacro glisp::defsubst (fun args &body body) + (if (and (consp fun) (eq (car fun) 'setf)) + (let ((fnam (intern (concatenate 'string "(SETF " (symbol-name (cadr fun)) ")") + (symbol-package (cadr fun))))) + `(progn + (defsetf ,(cadr fun) (&rest ap) (new-value) (list* ',fnam new-value ap)) + (glisp::defsubst ,fnam ,args .,body))) + (labels ((declp (x) + (and (consp x) (eq (car x) 'declare)))) + `(progn + (defun ,fun ,args .,body) + (define-compiler-macro ,fun (&rest .args.) + (cons '(lambda ,args + ,@(remove-if-not #'declp body) + (block ,fun + ,@(remove-if #'declp body))) + .args.)))))) + + +(defun glisp::getenv (string) + (sys:getenv string)) \ No newline at end of file diff --git a/glisp/dep-clisp.lisp b/glisp/dep-clisp.lisp new file mode 100644 index 0000000..af740f5 --- /dev/null +++ b/glisp/dep-clisp.lisp @@ -0,0 +1,176 @@ +;;; -*- Mode: Lisp; Syntax: Common-Lisp; Package: CL-USER; -*- +;;; --------------------------------------------------------------------------- +;;; Title: CLISP dependent stuff + fixups +;;; Created: 1999-05-25 22:32 +;;; Author: Gilbert Baumann +;;; License: GPL (See file COPYING for details). +;;; --------------------------------------------------------------------------- +;;; (c) copyright 1999 by Gilbert Baumann + +;;; This program is free software; you can redistribute it and/or modify +;;; it under the terms of the GNU General Public License as published by +;;; the Free Software Foundation; either version 2 of the License, or +;;; (at your option) any later version. +;;; +;;; This program is distributed in the hope that it will be useful, +;;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;;; GNU General Public License for more details. +;;; +;;; You should have received a copy of the GNU General Public License +;;; along with this program; if not, write to the Free Software +;;; Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +(in-package :CL-USER) + +(eval-when (compile load eval) + (if (fboundp 'cl::define-compiler-macro) + (pushnew 'define-compiler-macro *features*))) + +(setq lisp:*load-paths* '(#P"./")) + +(import 'lisp:read-byte-sequence :glisp) +(export 'lisp:read-byte-sequence :glisp) +(import 'lisp:read-char-sequence :glisp) +(export 'lisp:read-char-sequence :glisp) +(export 'glisp::compile-file :glisp) +(export 'glisp::run-unix-shell-command :glisp) +(export 'glisp::make-server-socket :glisp) + + +#|| +(export 'glisp::read-byte-sequence :glisp) +(defun glisp::read-byte-sequence (sequence input &key (start 0) (end (length sequence))) + (let (c (i start)) + (loop + (cond ((= i end) (return i))) + (setq c (read-byte input nil :eof)) + (cond ((eql c :eof) (return i))) + (setf (aref sequence i) c) + (incf i) ))) +||# + + +(defun glisp::compile-file (&rest ap) + (and (apply #'compile-file ap) + (apply #'compile-file-pathname ap))) + +(defmacro glisp::with-timeout ((&rest ignore) &body body) + (declare (ignore ignore)) + `(progn + ,@body)) + +(defun glisp::open-inet-socket (hostname port) + (values + (lisp:socket-connect port hostname) + :byte)) + +(defun glisp:make-server-socket (port) + (lisp:socket-server port)) + +(defun glisp::accept-connection/low (socket) + (let ((stream (lisp:socket-accept socket))) + (setf (stream-element-type stream) '(unsigned-byte 8)) + (values + stream + :byte))) + +(defun glisp::g/make-string (length &rest options) + (apply #'make-array length + :element-type + '#.(cond ((stringp (make-array 1 :element-type 'string-char)) + 'string-char) + ((stringp (make-array 1 :element-type 'base-char)) + 'base-char) + (t + (error "What is the string element type of the day?"))) + options)) + +(defun glisp:run-unix-shell-command (command) + (lisp:shell command)) + +#+DEFINE-COMPILER-MACRO +(cl:define-compiler-macro ldb (bytespec value &whole whole) + (let (pos size) + (cond ((and (consp bytespec) + (= (length bytespec) 3) + (eq (car bytespec) 'byte) + (constantp (setq size (second bytespec))) + (constantp (setq pos (third bytespec)))) + `(logand ,(if (eql pos 0) value `(ash ,value (- ,pos))) + (1- (ash 1 ,size)))) + (t + whole)))) + +#-DEFINE-COMPILER-MACRO +(progn + (export 'glisp::define-compiler-macro :glisp) + (defmacro glisp::define-compiler-macro (name args &body body) + (declare (ignore args body)) + `(progn + ',name))) + +#|| +(defun xlib:draw-glyph (drawable gcontext x y elt &rest more) + (apply #'xlib:draw-glyphs drawable gcontext x y (vector elt) more)) +||# + +(defmacro glisp::defsubst (name args &body body) + `(progn + (declaim (inline ,name)) + (defun ,name ,args .,body))) + +(export 'glisp::getenv :glisp) +(defun glisp::getenv (var) + (sys::getenv var)) + + + +(export 'glisp::mp/process-run-function :glisp) +(defun glisp:mp/process-run-function (name fn &rest args) + (apply #'mp:process-run-function name fn args)) + +(export 'glisp::mp/process-kill :glisp) +(defun glisp:mp/process-kill (proc) + (mp:process-kill proc)) + +(export 'glisp::mp/current-process :glisp) +(defun glisp:mp/current-process () + (mp:current-process)) + +(export 'glisp::mp/seize-lock :glisp) +(defun glisp::mp/seize-lock (lock &key whostate) + whostate + (mp:process-lock lock)) + +(export 'glisp::mp/release-lock :glisp) +(defun glisp::mp/release-lock (lock) + (mp:process-unlock lock)) + +(export 'glisp::mp/process-yield :glisp) +(defun glisp::mp/process-yield (&optional process-to-run) + process-to-run + (mp:process-allow-schedule)) + +(export 'glisp::mp/process-wait :glisp) +(defun glisp::mp/process-wait (whostate predicate) + (mp::process-wait whostate predicate)) + +(defmacro glisp::mp/with-lock ((lock) &body body) + `(mp:with-process-lock (,lock) + ,@body)) + +(defun glisp::mp/make-lock (&key name) + (mp:make-process-lock :name name)) + + + + + + + + + + + + diff --git a/glisp/dep-cmucl-dtc.lisp b/glisp/dep-cmucl-dtc.lisp new file mode 100644 index 0000000..a99171a --- /dev/null +++ b/glisp/dep-cmucl-dtc.lisp @@ -0,0 +1,212 @@ +;;; -*- Mode: Lisp; Syntax: Common-Lisp; Package: CL-USER; -*- +;;; --------------------------------------------------------------------------- +;;; Title: CMUCL dependent stuff + fixups +;;; Created: 1999-05-25 22:32 +;;; Author: Gilbert Baumann +;;; License: GPL (See file COPYING for details). +;;; --------------------------------------------------------------------------- +;;; (c) copyright 1999 by Gilbert Baumann + +;;; This program is free software; you can redistribute it and/or modify +;;; it under the terms of the GNU General Public License as published by +;;; the Free Software Foundation; either version 2 of the License, or +;;; (at your option) any later version. +;;; +;;; This program is distributed in the hope that it will be useful, +;;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;;; GNU General Public License for more details. +;;; +;;; You should have received a copy of the GNU General Public License +;;; along with this program; if not, write to the Free Software +;;; Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +(export 'glisp::read-byte-sequence :glisp) +(export 'glisp::read-char-sequence :glisp) +(export 'glisp::run-unix-shell-command :glisp) + +(export 'glisp::getenv :glisp) + +(defun glisp::read-byte-sequence (&rest ap) + (apply #'read-sequence ap)) + +(defun glisp::read-char-sequence (&rest ap) + (apply #'read-sequence ap)) + +(defun glisp::read-byte-sequence (sequence input &key (start 0) (end (length sequence))) + (let (c (i start)) + (loop + (cond ((= i end) (return i))) + (setq c (read-byte input nil :eof)) + (cond ((eql c :eof) (return i))) + (setf (aref sequence i) c) + (incf i) ))) + +(defun glisp::read-byte-sequence (sequence input &key (start 0) (end (length sequence))) + (let ((r (read-sequence sequence input :start start :end end))) + (cond ((and (= r start) (> end start)) + (let ((byte (read-byte input nil :eof))) + (cond ((eq byte :eof) + r) + (t + (setf (aref sequence start) byte) + (incf start) + (if (> end start) + (glisp::read-byte-sequence sequence input :start start :end end) + start))))) + (t + r)))) + +#|| +(defun glisp::read-char-sequence (sequence input &key (start 0) (end (length sequence))) + (let (c (i start)) + (loop + (cond ((= i end) (return i))) + (setq c (read-byte input nil :eof)) + (cond ((eql c :eof) (return i))) + (setf (aref sequence i) c) + (incf i) ))) +||# + +(defmacro glisp::with-timeout ((&rest ignore) &body body) + (declare (ignore ignore)) + `(progn + ,@body)) + +(defun glisp::open-inet-socket (hostname port) + (let ((fd (extensions:connect-to-inet-socket hostname port))) + (values + (sys:make-fd-stream fd + :input t + :output t + :element-type '(unsigned-byte 8) + :name (format nil "Network connection to ~A:~D" hostname port)) + :byte))) + +(defun glisp::g/make-string (length &rest options) + (apply #'make-array length :element-type 'base-char options)) + +#|| + +RUN-PROGRAM is an external symbol in the EXTENSIONS package. +Function: # +Function arguments: + (program args &key (env *environment-list*) (wait t) pty input + if-input-does-not-exist output (if-output-exists :error) (error :output) + (if-error-exists :error) status-hook) +Function documentation: + Run-program creates a new process and runs the unix progam in the + file specified by the simple-string program. Args are the standard + arguments that can be passed to a Unix program, for no arguments + use NIL (which means just the name of the program is passed as arg 0). + + Run program will either return NIL or a PROCESS structure. See the CMU + Common Lisp Users Manual for details about the PROCESS structure. + + The keyword arguments have the following meanings: + :env - + An A-LIST mapping keyword environment variables to simple-string + values. + :wait - + If non-NIL (default), wait until the created process finishes. If + NIL, continue running Lisp until the program finishes. + :pty - + Either T, NIL, or a stream. Unless NIL, the subprocess is established + under a PTY. If :pty is a stream, all output to this pty is sent to + this stream, otherwise the PROCESS-PTY slot is filled in with a stream + connected to pty that can read output and write input. + :input - + Either T, NIL, a pathname, a stream, or :STREAM. If T, the standard + input for the current process is inherited. If NIL, /dev/null + is used. If a pathname, the file so specified is used. If a stream, + all the input is read from that stream and send to the subprocess. If + :STREAM, the PROCESS-INPUT slot is filled in with a stream that sends + its output to the process. Defaults to NIL. + :if-input-does-not-exist (when :input is the name of a file) - + can be one of: + :error - generate an error. + :create - create an empty file. + nil (default) - return nil from run-program. + :output - + Either T, NIL, a pathname, a stream, or :STREAM. If T, the standard + output for the current process is inherited. If NIL, /dev/null + is used. If a pathname, the file so specified is used. If a stream, + all the output from the process is written to this stream. If + :STREAM, the PROCESS-OUTPUT slot is filled in with a stream that can + be read to get the output. Defaults to NIL. + :if-output-exists (when :input is the name of a file) - + can be one of: + :error (default) - generates an error if the file already exists. + :supersede - output from the program supersedes the file. + :append - output from the program is appended to the file. + nil - run-program returns nil without doing anything. + :error and :if-error-exists - + Same as :output and :if-output-exists, except that :error can also be + specified as :output in which case all error output is routed to the + same place as normal output. + :status-hook - + This is a function the system calls whenever the status of the + process changes. The function takes the process as an argument. +Its defined argument types are: + (T T &KEY (:ENV T) (:WAIT T) (:PTY T) (:INPUT T) (:IF-INPUT-DOES-NOT-EXIST T) + (:OUTPUT T) (:IF-OUTPUT-EXISTS T) (:ERROR T) (:IF-ERROR-EXISTS T) + (:STATUS-HOOK T)) +Its result type is: + (OR EXTENSIONS::PROCESS NULL) +On Wednesday, 7/1/98 12:48:51 pm [-1] it was compiled from: +target:code/run-program.lisp + Created: Saturday, 6/20/98 07:13:08 pm [-1] + Comment: $Header: /home/david/gitconversion/cvsroot/cxml/glisp/Attic/dep-cmucl-dtc.lisp,v 1.1 2005-03-13 18:02:10 david Exp $ +||# + +;; (process-exit-code (run-program "/bin/sh" (list "-c" "ls") :wait t :input nil :output nil)) + +(defun glisp:run-unix-shell-command (command) + (ext:process-exit-code (ext:run-program "/bin/sh" (list "-c" command) :wait t :input nil :output nil))) + +(defmacro glisp::defsubst (name args &body body) + `(progn + (declaim (inline ,name)) + (defun ,name ,args .,body))) + + +;;; MP + +(export 'glisp::mp/process-yield :glisp) +(export 'glisp::mp/process-wait :glisp) +(export 'glisp::mp/process-run-function :glisp) +(export 'glisp::mp/make-lock :glisp) +(export 'glisp::mp/current-process :glisp) +(export 'glisp::mp/process-kill :glisp) + +(defun glisp::mp/make-lock (&key name) + (pthread::make-lock name)) + +(defmacro glisp::mp/with-lock ((lock) &body body) + `(pthread::with-lock-held (,lock) + ,@body)) + +(defun glisp::mp/process-yield (&optional process-to-run) + (declare (ignore process-to-run)) + (PTHREAD:SCHED-YIELD)) + +(defun glisp::mp/process-wait (whostate predicate) + (do () + ((funcall predicate)) + (sleep .1))) + +(defun glisp::mp/process-run-function (name fun &rest args) + (pthread::thread-create + (lambda () + (apply fun args)) + :name name)) + +(defun glisp::mp/current-process () + 'blah) + +(defun glisp::mp/process-kill (process) + (warn "*** Define GLISP:MP/PROCESS-KILL for CMUCL.")) + +(defun glisp::getenv (string) + (cdr (assoc string ext:*environment-list* :test #'string-equal))) + diff --git a/glisp/dep-cmucl.lisp b/glisp/dep-cmucl.lisp new file mode 100644 index 0000000..85b24cd --- /dev/null +++ b/glisp/dep-cmucl.lisp @@ -0,0 +1,241 @@ +;;; -*- Mode: Lisp; Syntax: Common-Lisp; Package: CL-USER; -*- +;;; --------------------------------------------------------------------------- +;;; Title: CMUCL dependent stuff + fixups +;;; Created: 1999-05-25 22:32 +;;; Author: Gilbert Baumann +;;; License: GPL (See file COPYING for details). +;;; --------------------------------------------------------------------------- +;;; (c) copyright 1999 by Gilbert Baumann + +;;; This program is free software; you can redistribute it and/or modify +;;; it under the terms of the GNU General Public License as published by +;;; the Free Software Foundation; either version 2 of the License, or +;;; (at your option) any later version. +;;; +;;; This program is distributed in the hope that it will be useful, +;;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;;; GNU General Public License for more details. +;;; +;;; You should have received a copy of the GNU General Public License +;;; along with this program; if not, write to the Free Software +;;; Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +(export 'glisp::read-byte-sequence :glisp) +(export 'glisp::read-char-sequence :glisp) +(export 'glisp::run-unix-shell-command :glisp) + +(export 'glisp::getenv :glisp) + +(export 'glisp::make-server-socket :glisp) +(export 'glisp::close-server-socket :glisp) + +(defun glisp::read-byte-sequence (&rest ap) + (apply #'read-sequence ap)) + +(defun glisp::read-char-sequence (&rest ap) + (apply #'read-sequence ap)) + +(defun glisp::read-byte-sequence (sequence input &key (start 0) (end (length sequence))) + (let (c (i start)) + (loop + (cond ((= i end) (return i))) + (setq c (read-byte input nil :eof)) + (cond ((eql c :eof) (return i))) + (setf (aref sequence i) c) + (incf i) ))) + +(defun glisp::read-byte-sequence (sequence input &key (start 0) (end (length sequence))) + (let ((r (read-sequence sequence input :start start :end end))) + (cond ((and (= r start) (> end start)) + (let ((byte (read-byte input nil :eof))) + (cond ((eq byte :eof) + r) + (t + (setf (aref sequence start) byte) + (incf start) + (if (> end start) + (glisp::read-byte-sequence sequence input :start start :end end) + start))))) + (t + r)))) + +#|| +(defun glisp::read-char-sequence (sequence input &key (start 0) (end (length sequence))) + (let (c (i start)) + (loop + (cond ((= i end) (return i))) + (setq c (read-byte input nil :eof)) + (cond ((eql c :eof) (return i))) + (setf (aref sequence i) c) + (incf i) ))) +||# + +(defmacro glisp::with-timeout ((&rest ignore) &body body) + (declare (ignore ignore)) + `(progn + ,@body)) + +(defun glisp::open-inet-socket (hostname port) + (let ((fd (extensions:connect-to-inet-socket hostname port))) + (values + (sys:make-fd-stream fd + :input t + :output t + :element-type '(unsigned-byte 8) + :name (format nil "Network connection to ~A:~D" hostname port)) + :byte))) + +(defstruct (server-socket (:constructor make-server-socket-struct)) + fd + element-type + port) + +(defun glisp::make-server-socket (port &key (element-type '(unsigned-byte 8))) + (make-server-socket-struct :fd (ext:create-inet-listener port) + :element-type element-type + :port port)) + +(defun glisp::accept-connection/low (socket) + (mp:process-wait-until-fd-usable (server-socket-fd socket) :input) + (values + (sys:make-fd-stream (ext:accept-tcp-connection (server-socket-fd socket)) + :input t :output t + :element-type (server-socket-element-type socket)) + (cond ((subtypep (server-socket-element-type socket) 'integer) + :byte) + (t + :char)))) + +(defun glisp::close-server-socket (socket) + (unix:unix-close (server-socket-fd socket))) + +;;;;;; + +(defun glisp::g/make-string (length &rest options) + (apply #'make-array length :element-type 'base-char options)) + + + +#|| + +RUN-PROGRAM is an external symbol in the EXTENSIONS package. +Function: # +Function arguments: + (program args &key (env *environment-list*) (wait t) pty input + if-input-does-not-exist output (if-output-exists :error) (error :output) + (if-error-exists :error) status-hook) +Function documentation: + Run-program creates a new process and runs the unix progam in the + file specified by the simple-string program. Args are the standard + arguments that can be passed to a Unix program, for no arguments + use NIL (which means just the name of the program is passed as arg 0). + + Run program will either return NIL or a PROCESS structure. See the CMU + Common Lisp Users Manual for details about the PROCESS structure. + + The keyword arguments have the following meanings: + :env - + An A-LIST mapping keyword environment variables to simple-string + values. + :wait - + If non-NIL (default), wait until the created process finishes. If + NIL, continue running Lisp until the program finishes. + :pty - + Either T, NIL, or a stream. Unless NIL, the subprocess is established + under a PTY. If :pty is a stream, all output to this pty is sent to + this stream, otherwise the PROCESS-PTY slot is filled in with a stream + connected to pty that can read output and write input. + :input - + Either T, NIL, a pathname, a stream, or :STREAM. If T, the standard + input for the current process is inherited. If NIL, /dev/null + is used. If a pathname, the file so specified is used. If a stream, + all the input is read from that stream and send to the subprocess. If + :STREAM, the PROCESS-INPUT slot is filled in with a stream that sends + its output to the process. Defaults to NIL. + :if-input-does-not-exist (when :input is the name of a file) - + can be one of: + :error - generate an error. + :create - create an empty file. + nil (default) - return nil from run-program. + :output - + Either T, NIL, a pathname, a stream, or :STREAM. If T, the standard + output for the current process is inherited. If NIL, /dev/null + is used. If a pathname, the file so specified is used. If a stream, + all the output from the process is written to this stream. If + :STREAM, the PROCESS-OUTPUT slot is filled in with a stream that can + be read to get the output. Defaults to NIL. + :if-output-exists (when :input is the name of a file) - + can be one of: + :error (default) - generates an error if the file already exists. + :supersede - output from the program supersedes the file. + :append - output from the program is appended to the file. + nil - run-program returns nil without doing anything. + :error and :if-error-exists - + Same as :output and :if-output-exists, except that :error can also be + specified as :output in which case all error output is routed to the + same place as normal output. + :status-hook - + This is a function the system calls whenever the status of the + process changes. The function takes the process as an argument. +Its defined argument types are: + (T T &KEY (:ENV T) (:WAIT T) (:PTY T) (:INPUT T) (:IF-INPUT-DOES-NOT-EXIST T) + (:OUTPUT T) (:IF-OUTPUT-EXISTS T) (:ERROR T) (:IF-ERROR-EXISTS T) + (:STATUS-HOOK T)) +Its result type is: + (OR EXTENSIONS::PROCESS NULL) +On Wednesday, 7/1/98 12:48:51 pm [-1] it was compiled from: +target:code/run-program.lisp + Created: Saturday, 6/20/98 07:13:08 pm [-1] + Comment: $Header: /home/david/gitconversion/cvsroot/cxml/glisp/Attic/dep-cmucl.lisp,v 1.1 2005-03-13 18:02:10 david Exp $ +||# + +;; (process-exit-code (run-program "/bin/sh" (list "-c" "ls") :wait t :input nil :output nil)) + +(defun glisp:run-unix-shell-command (command) + (ext:process-exit-code (ext:run-program "/bin/sh" (list "-c" command) :wait t :input nil :output nil))) + +(defmacro glisp::defsubst (name args &body body) + `(progn + (declaim (inline ,name)) + (defun ,name ,args .,body))) + + +;;; MP + +(export 'glisp::mp/process-yield :glisp) +(export 'glisp::mp/process-wait :glisp) +(export 'glisp::mp/process-run-function :glisp) +(export 'glisp::mp/make-lock :glisp) +(export 'glisp::mp/current-process :glisp) +(export 'glisp::mp/process-kill :glisp) + +(defun glisp::mp/make-lock (&key name) + (mp:make-lock name)) + +(defmacro glisp::mp/with-lock ((lock) &body body) + `(mp:with-lock-held (,lock) + ,@body)) + +(defun glisp::mp/process-yield (&optional process-to-run) + (declare (ignore process-to-run)) + (mp:process-yield)) + +(defun glisp::mp/process-wait (whostate predicate) + (mp:process-wait whostate predicate)) + +(defun glisp::mp/process-run-function (name fun &rest args) + (mp:make-process + (lambda () + (apply fun args)) + :name name)) + +(defun glisp::mp/current-process () + mp:*current-process*) + +(defun glisp::mp/process-kill (process) + (mp:destroy-process process)) + +(defun glisp::getenv (string) + (cdr (assoc string ext:*environment-list* :test #'string-equal))) + diff --git a/glisp/dep-gcl-2.lisp b/glisp/dep-gcl-2.lisp new file mode 100644 index 0000000..5fcd8d5 --- /dev/null +++ b/glisp/dep-gcl-2.lisp @@ -0,0 +1,93 @@ +;;; -*- Mode: Lisp; Syntax: Common-Lisp; Package: GLISP; -*- +;;; --------------------------------------------------------------------------- +;;; Title: Second part of GCL dependent stuff +;;; Created: 1999-05-25 22:31 +;;; Author: Gilbert Baumann +;;; License: GPL (See file COPYING for details). +;;; --------------------------------------------------------------------------- +;;; (c) copyright 1999 by Gilbert Baumann + +;;; This program is free software; you can redistribute it and/or modify +;;; it under the terms of the GNU General Public License as published by +;;; the Free Software Foundation; either version 2 of the License, or +;;; (at your option) any later version. +;;; +;;; This program is distributed in the hope that it will be useful, +;;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;;; GNU General Public License for more details. +;;; +;;; You should have received a copy of the GNU General Public License +;;; along with this program; if not, write to the Free Software +;;; Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +(in-package :GLISP) + +(lisp::clines + "#include " + "#include " + "#include " + "#include " + "#include " + "#include " + "#include " + "#include " + ) + +(lisp::defcfun "static object open_inet_socket_aux (object x, object y, char *hostname, int port)" 2 + "FILE *fp;" + "object stream;" + + "struct hostent *hostinfo;" + "struct sockaddr_in addr;" + "int sock;" + "vs_mark;" + + "hostinfo = gethostbyname (hostname);" + + "if (hostinfo == 0)" + "{" + " return Cnil;" + "}" + + "addr.sin_family = AF_INET;" + "addr.sin_port = htons (port);" + "addr.sin_addr = *(struct in_addr*) hostinfo->h_addr;" + "" + "sock = socket (PF_INET, SOCK_STREAM, 0);" + "if (sock < 0)" + " return Cnil;" + "" + "if (connect (sock, (struct sockaddr *) &addr, sizeof (addr)) != 0)" + "{" + " close (sock);" + " return Cnil;" + "}" + + + "fp = fdopen (sock, \"rb+\");" + "stream = (object) alloc_object(t_stream);" + "stream->sm.sm_mode = (short)smm_io;" + "stream->sm.sm_fp = fp;" + "stream->sm.sm_object0 = x;" + "stream->sm.sm_object1 = y;" + "stream->sm.sm_int0 = stream->sm.sm_int1 = 0;" + "vs_push(stream);" + "setup_stream_buffer(stream);" + "vs_reset;" + "return stream;" + ) + +(lisp::defentry open-inet-socket-aux (lisp::object lisp::object lisp::string lisp::int) + (lisp::object "open_inet_socket_aux")) + +(lisp::defentry unix/system (lisp::string) + (lisp::int "system")) + +(defun open-inet-socket (hostname port) + (values (or (open-inet-socket-aux '(unsigned-byte 8) + (format nil "Network connection to ~A:~D" hostname port) + hostname port) + (error "Cannot connect to `~A' on port ~D." + hostname port)) + :byte)) diff --git a/glisp/dep-gcl.lisp b/glisp/dep-gcl.lisp new file mode 100644 index 0000000..f53ae07 --- /dev/null +++ b/glisp/dep-gcl.lisp @@ -0,0 +1,344 @@ +;;; -*- Mode: Lisp; Syntax: Common-Lisp; Package: CL-USER; -*- +;;; --------------------------------------------------------------------------- +;;; Title: GCL dependent stuff + fixups +;;; Created: 1999-05-25 22:31 +;;; Author: Gilbert Baumann +;;; License: GPL (See file COPYING for details). +;;; --------------------------------------------------------------------------- +;;; (c) copyright 1999 by Gilbert Baumann + +;;; This program is free software; you can redistribute it and/or modify +;;; it under the terms of the GNU General Public License as published by +;;; the Free Software Foundation; either version 2 of the License, or +;;; (at your option) any later version. +;;; +;;; This program is distributed in the hope that it will be useful, +;;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;;; GNU General Public License for more details. +;;; +;;; You should have received a copy of the GNU General Public License +;;; along with this program; if not, write to the Free Software +;;; Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +(shadow '(make-pathname pathname-directory) :glisp) + +(export '(glisp::defun + glisp::read-byte-sequence + glisp::read-char-sequence + glisp::define-compiler-macro + glisp::formatter + glisp::destructuring-bind + glisp::parse-macro + glisp::loop + glisp::*print-readably* + glisp::compile-file-pathname + glisp::ignore-errors + glisp::pathname-directory + glisp::make-pathname + glisp::run-unix-shell-command) + :glisp) + +(defmacro glisp::defun (name args &body body) + (cond ((and (consp name) + (eq (car name) 'setf)) + (let ((fnam (intern (concatenate 'string "(SETF " (symbol-name (cadr name)) ")") + (symbol-package (cadr name))))) + `(progn + (defsetf ,(cadr name) (&rest ap) (new-value) (list* ',fnam new-value ap)) + (defun ,fnam ,args .,body)))) + (t + `(defun ,name ,args .,body)) )) + +(defun glisp::read-byte-sequence (sequence input &key (start 0) (end (length sequence))) + (let (c (i start)) + (loop + (cond ((= i end) (return i))) + (setq c (read-byte input nil :eof)) + (cond ((eql c :eof) (return i))) + (setf (aref sequence i) c) + (incf i) ))) + +(defun glisp::read-char-sequence (sequence input &key (start 0) (end (length sequence))) + (let (c (i start)) + (loop + (cond ((= i end) (return i))) + (setq c (read-char input nil :eof)) + (cond ((eql c :eof) (return i))) + (setf (aref sequence i) c) + (incf i) ))) + +(defmacro glisp::define-compiler-macro (&rest ignore) + ignore + nil) + +(defun glisp::formatter (string) + #'(lambda (sink &rest ap) + (apply #'format sink string ap))) + +(defmacro lambda (&rest x) + `#'(lambda .,x)) + + +(defun glisp::row-major-aref (array index) + ;; Wir sollten hier wirklich was effizienteres haben + (aref (make-array (array-total-size array) + :displaced-to array + :element-type (array-element-type array)) + index)) + +(glisp::defun (setf glisp::row-major-aref) (value array index) + ;; Wir sollten hier wirklich was effizienteres haben + (setf (aref (make-array (array-total-size array) + :displaced-to array + :element-type (array-element-type array)) + index) + value)) + +(defun glisp::mp/make-lock (&key name) + name + nil) + +(defmacro glisp::mp/with-lock ((lock) &body body) + (declare (ignore lock)) + `(progn + ,@body)) + +(defmacro glisp::with-timeout ((&rest ignore) &body body) + (declare (ignore ignore)) + `(progn + ,@body)) + +(defvar glisp::*print-readably* nil) + +(defun glisp::g/make-string (length &rest options) + (apply #'make-array length :element-type 'string-char options)) + +(defun parse-macro-lambda-list (name lambda-list whole &optional environment-value (real-whole whole)) + "The work horse for destructing-bind and parse-macro." + (let ((orig-lambda-list lambda-list) + required optionals rest-var keys aux-vars whole-var env-var + allow-other-keys-p + (my-lambda-list-keywords '(&OPTIONAL &REST &KEY &AUX &BODY))) + + (labels ((COLLECT (&optional on-keys-p) + (let (result) + (do () + ((or (atom lambda-list) (member (car lambda-list) my-lambda-list-keywords)) + (nreverse result)) + (cond ((eq (car lambda-list) '&WHOLE) + (push (cadr lambda-list) whole-var) + (setf lambda-list (cddr lambda-list))) + ((eq (car lambda-list) '&ENVIRONMENT) + (push (cadr lambda-list) env-var) + (setf lambda-list (cddr lambda-list))) + ((eq (car lambda-list) '&ALLOW-OTHER-KEYS) + (unless on-keys-p + (cerror "Ignore this syntax restriction and set the allow-other-keys-p flag." + "In lambda list of macro ~S: &ALLOW-OTHER-KEYS may only be specified ~ + in the &KEYS section: ~S" + name orig-lambda-list)) + (setq allow-other-keys-p T lambda-list (cdr lambda-list))) + (T (push (pop lambda-list) result)) ))) ) + + (CHECK-ONLY-ONE (kind lst) + (unless (<= (length lst) 1) + (error "In lambda list of macro ~S: You may only specify one ~S parameter, but I got ~S.~%~ + Lambda list: ~S." + name kind lst orig-lambda-list)) + (car lst)) ) + + ;; Now collect the various elements of the lambda-list + (setq required (collect)) + (when (and (consp lambda-list) (eq (car lambda-list) '&OPTIONAL)) (pop lambda-list) (setq optionals (collect))) + (when (and (consp lambda-list) (member (car lambda-list) '(&REST &BODY))) (pop lambda-list) (setq rest-var (collect))) + (when (and (consp lambda-list) (eq (car lambda-list) '&KEY)) (pop lambda-list) (setq keys (collect T))) + (when (and (consp lambda-list) (eq (car lambda-list) '&AUX)) (pop lambda-list) (setq aux-vars (collect))) + + ;; Inspect the remaining value of lambda-list + (cond ((consp lambda-list) + ;; Not all was parsed correctly ... + (error "In lambda list of macro ~S: Found lambda list keyword ~S out of order;~%~ + The order must be &OPTIONAL, &REST/&BODY, &KEY, &AUX; &WHOLE and &ENVIRONMENT may apear anywhere.~%~ + Lambda list: ~S." + name (car lambda-list) orig-lambda-list)) + ((null lambda-list)) ; Everything is just fine. + ((symbolp lambda-list) + ;; Dotted with a symbol = specification of a rest-var + (push lambda-list rest-var)) + (T + ;; List is odd-ly dotted. + (error "In lambda list of macro ~S: A lambda list may only be dotted with a symbol.~%~ + Lambda list: ~S." + name orig-lambda-list)) ) + + ;; Now check for rest-var, whole-var and env-var, which may all specify only one variable ... + (setf rest-var (check-only-one '&REST rest-var)) + (setf whole-var (check-only-one '&WHOLE whole-var)) + (when (and env-var (not environment-value)) + (cerror "Ignore the &ENVIRONMENT parameter." + "In lambda list of macro ~S: An &ENVIRONMENT parameter may only be specified on the top-level lambda list.~%~ + Lambda list: ~S." + name orig-lambda-list) + (setq env-var nil)) + (setf env-var (check-only-one '&ENVIRONMENT env-var)) + + (when (and (null rest-var) keys) + (setf rest-var (gensym))) + + ;; Build up the bindings + (let ((bindings nil) (constraints nil) (w whole)) + (labels ((add-one (x) (add (list x))) + (add-bind (spec val) + (if (consp spec) + (let ((gsym (gensym))) + (add-one `(,gsym ,val)) + (multiple-value-bind (bndngs cnstrnts) (parse-macro-lambda-list name spec gsym) + (add bndngs) + (setq contraints (nconc constraints cnstrnts))) ) + (add-one `(,spec ,val)))) + (add (x) (setf bindings (nconc bindings x)))) + + (when whole-var + (add-one `(,whole-var ,real-whole)) + (when (eq whole real-whole) (setq w whole-var))) + + ;; Calculate the constraints ... + (let ((min nil) + (max nil)) + (when (or required optionals rest-var) (setq min (length required))) + (when (and (null rest-var) (or required optionals)) + (setq max (+ (length required) (length optionals)))) + (cond ((and (null min) (null max))) + ((eql min max) + (push `(listp ,w) constraints) + (push `(= (length ,w) ,min) constraints)) + (T + (push `(listp ,w) constraints) + (when (and min (> min 0)) (push `(>= (length ,w) ,min) constraints)) + (when max (push `(<= (length ,w) ,max) constraints))) )) + + (setq constraints (nreverse constraints)) + + (dolist (spec required) + (add-bind spec `(CAR ,w)) + (setf w (list 'cdr w))) + + (dolist (spec optionals) + ;; CHECK + (cond ((consp spec) + (when (caddr spec) ;svar + (add-one `(,(caddr spec) (NOT (NULL ,w))))) + (add-bind (car spec) `(if (NOT (NULL ,w)) (CAR ,w) ,(cadr spec)))) + (T + (add-one `(,spec (CAR ,w)))) ) + (setf w (list 'cdr w))) + + (when rest-var (add-one `(,rest-var ,w))) + + (dolist (spec keys) + ;; CHECK + (let (kw var svar default) + (cond ((consp spec) + (setq var (car spec) default (cadr spec) svar (caddr spec)) + (when (consp var) (setq kw (car var) var (cadr var)))) + (T (setq var spec default nil svar nil))) + ;; SVAR + (unless kw (setq kw (intern (symbol-name var) :keyword))) + (add-bind var `(getf ,rest-var ,kw ,default)) )) + + (dolist (spec aux-vars) (add-one spec)) + + (when env-var + (add-one `(,env-var ,environment-value))) + + (values bindings constraints env-var)) )))) + +(defun glisp::parse-macro (name lambda-list body &optional env) + "This is used to process a macro definition in the same way as defmacro and + macrolet. It returns a lambda-expression that accepts two arguments, a form + and an environment. The name, lambda-list, and body arguments correspond to + the parts of a defmacro or macrolet definition. + + The lambda-list argument may inclue &environment and &whole and may include + destructing. The name argument is used to enclose the body in an implicat + block and might also be used for implementation-depend purposes (such as + including the name of the macro in error messages if the form does not match + the lambda-list)." + + (let ((call (gensym)) (env (gensym))) + (multiple-value-bind (bindings constraints) + (parse-macro-lambda-list name lambda-list `(CDR call) env call) + `(lambda (,call ,env) + (block ,name + (let* ,bindings + (unless (and ,@constraints) + (error "Macro ~S called with wrong number/nesting of arguments: ~S" + ',name ,call)) + ,@body))) )) ) + +(defmacro glisp::destructuring-bind (lambda-list expression &body body) + "This macro binds the variables specified in lambda-list to the corresponding + values in the tree structure resulting from evaluating the expression, then + executes the forms as an implicit progn. + + A destructing-bind lambda-list may contain the lambda-list keywords &optional, + &rest, &key, &allow-other-keys, and &aux; &body and &whole may also be used as + they are in defmacro, but &environment may not be used. Nested and dotted + lambda-lists are also permitted as for defmacro. The idea is that a + destructing-bind lambda-list has the same format as inner levels of a defmacro + lambda-list. + + If the result of evaluating the expressions does not match the destructuring + pattern, an error should be signaled." + + (let ((call (gensym))) + (multiple-value-bind (bindings constraints) + (parse-macro-lambda-list nil lambda-list call) + `(let* ((,call ,expression) ,@bindings) + (unless (and ,@constraints) + (error "DESTRUCTING-BIND with wrong number/nesting of arguments: ~S~%~ + Lambda list to match with: ~S." ,call ',lambda-list)) + (locally ,@body)) )) ) + + +(defmacro glisp::loop (&rest args) + `(sloop:sloop ,@args)) + +(defun glisp:compile-file-pathname (filename &rest options) + (declare (ignore options)) + (merge-pathnames (make-pathname :type "o") filename)) + + +(defmacro glisp:ignore-errors (&rest body) + `(IGNORE-ERRORS-FN #'(LAMBDA () ,@body))) + +(defun ignore-errors-fn (cont) + (let ((old (symbol-function 'system:universal-error-handler))) + (block foo + (unwind-protect + (progn + (setf (symbol-function 'system:universal-error-handler) + #'(lambda (&rest x) + (return-from foo (values nil x)))) + (funcall cont) ) + (setf (symbol-function 'system:universal-error-handler) old) )))) + +(defun glisp::make-pathname (&rest args &key directory &allow-other-keys) + (cond ((eq (car directory) :relative) + (apply #'lisp:make-pathname :directory (cdr directory) args)) + ((eq (car directory) :absolute) + (apply #'lisp:make-pathname :directory (cons :root (cdr directory)) args)) + (t + (apply #'lisp:make-pathname args)))) + +(defun glisp::pathname-directory (pathname) + (let ((d (lisp:pathname-directory pathname))) + (cond ((eq (car d) :root) + (cons :absolute (cdr d))) + (t + (cons :relative d))))) + + +(defun glisp::run-unix-shell-command (cmd) + (glisp::unix/system cmd)) diff --git a/glisp/dep-sbcl.lisp b/glisp/dep-sbcl.lisp new file mode 100644 index 0000000..e9bb761 --- /dev/null +++ b/glisp/dep-sbcl.lisp @@ -0,0 +1,141 @@ +;;; -*- Mode: Lisp; Syntax: Common-Lisp; Package: CL-USER; -*- +;;; --------------------------------------------------------------------------- +;;; Title: SBCL dependent stuff + fixups +;;; Created: 1999-05-25 22:32 +;;; Author: Gilbert Baumann +;;; License: GPL (See file COPYING for details). +;;; --------------------------------------------------------------------------- +;;; (c) copyright 1999 by Gilbert Baumann + +;;; This program is free software; you can redistribute it and/or modify +;;; it under the terms of the GNU General Public License as published by +;;; the Free Software Foundation; either version 2 of the License, or +;;; (at your option) any later version. +;;; +;;; This program is distributed in the hope that it will be useful, +;;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;;; GNU General Public License for more details. +;;; +;;; You should have received a copy of the GNU General Public License +;;; along with this program; if not, write to the Free Software +;;; Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +(export 'glisp::read-byte-sequence :glisp) +(export 'glisp::read-char-sequence :glisp) +(export 'glisp::run-unix-shell-command :glisp) + +(export 'glisp::getenv :glisp) + +(export 'glisp::make-server-socket :glisp) +(export 'glisp::close-server-socket :glisp) + +(defun glisp::read-byte-sequence (&rest ap) + (apply #'read-sequence ap)) + +(defun glisp::read-char-sequence (&rest ap) + (apply #'read-sequence ap)) + +(defmacro glisp::with-timeout ((&rest options) &body body) + (declare (ignore ignore)) + `(progn + ,@body)) + +(defun glisp::open-inet-socket (hostname port) + (values + (sb-bsd-sockets:socket-make-stream + (let ((host (car (sb-bsd-sockets:host-ent-addresses + (sb-bsd-sockets:get-host-by-name hostname))))) + (when host + (let ((s (make-instance 'sb-bsd-sockets:inet-socket + :type :stream :protocol :tcp))) + (sb-bsd-sockets:socket-connect s host port) + s))) + :element-type '(unsigned-byte 8) + :input t :output t) + :byte)) + +(defstruct (server-socket (:constructor make-server-socket-struct)) + fd + element-type + port) + + +#|| +(defun glisp::make-server-socket (port &key (element-type '(unsigned-byte 8))) + (make-server-socket-struct :fd (ext:create-inet-listener port) + :element-type element-type + :port port)) + + +(defun glisp::accept-connection/low (socket) + (mp:process-wait-until-fd-usable (server-socket-fd socket) :input) + (values + (sys:make-fd-stream (ext:accept-tcp-connection (server-socket-fd socket)) + :input t :output t + :element-type (server-socket-element-type socket)) + (cond ((subtypep (server-socket-element-type socket) 'integer) + :byte) + (t + :char)))) + +(defun glisp::close-server-socket (socket) + (unix:unix-close (server-socket-fd socket))) +||# + +;;;;;; + +(defun glisp::g/make-string (length &rest options) + (apply #'make-array length :element-type 'base-char options)) + + + +(defun glisp::run-unix-shell-command (command) + (sb-impl::process-exit-code + (sb-ext:run-program "/bin/sh" (list "-c" command) :wait t :input nil + :output nil))) + +(defmacro glisp::defsubst (name args &body body) + `(progn + (declaim (inline ,name)) + (defun ,name ,args .,body))) + + +;;; MP + +(export 'glisp::mp/process-yield :glisp) +(export 'glisp::mp/process-wait :glisp) +(export 'glisp::mp/process-run-function :glisp) +(export 'glisp::mp/make-lock :glisp) +(export 'glisp::mp/current-process :glisp) +(export 'glisp::mp/process-kill :glisp) + +(defun glisp::mp/make-lock (&key name) + (clim-sys::make-lock name)) + +(defmacro glisp::mp/with-lock ((lock) &body body) + `(clim-sys:with-lock-held (,lock) + ,@body)) + +(defun glisp::mp/process-yield (&optional process-to-run) + (declare (ignore process-to-run)) + (clim-sys:process-yield)) + +(defun glisp::mp/process-wait (whostate predicate) + (clim-sys:process-wait whostate predicate)) + +(defun glisp::mp/process-run-function (name fun &rest args) + (clim-sys:make-process + (lambda () + (apply fun args)) + :name name)) + +(defun glisp::mp/current-process () + (clim-sys:current-process)) + +(defun glisp::mp/process-kill (process) + (clim-sys:destroy-process process)) + +(defun glisp::getenv (string) + (sb-ext:posix-getenv string)) + diff --git a/glisp/gendep.lisp b/glisp/gendep.lisp new file mode 100644 index 0000000..61be0f8 --- /dev/null +++ b/glisp/gendep.lisp @@ -0,0 +1,427 @@ +;;; -*- Mode: Lisp; Syntax: Common-Lisp; Package: CL-USER; -*- +;;; --------------------------------------------------------------------------- +;;; Title: Generating a sane DEFPACKAGE for GLISP +;;; Created: 1999-05-25 22:30 +;;; Author: Gilbert Baumann +;;; --------------------------------------------------------------------------- +;;; (c) copyright 1999 by Gilbert Baumann + +(defparameter *all-ansi-symbols* + '("&ALLOW-OTHER-KEYS" "&AUX" "&BODY" "&ENVIRONMENT" "&KEY" "&OPTIONAL" "&REST" "&WHOLE" "*" + "**" "***" "*BREAK-ON-SIGNALS*" "*COMPILE-FILE-PATHNAME*" "*COMPILE-FILE-TRUENAME*" + "*COMPILE-PRINT*" "*COMPILE-VERBOSE*" "*DEBUG-IO*" "*DEBUGGER-HOOK*" + "*DEFAULT-PATHNAME-DEFAULTS*" "*ERROR-OUTPUT*" "*FEATURES*" "*GENSYM-COUNTER*" + "*LOAD-PATHNAME*" "*LOAD-PRINT*" "*LOAD-TRUENAME*" "*LOAD-VERBOSE*" "*MACROEXPAND-HOOK*" + "*MODULES*" "*PACKAGE*" "*PRINT-ARRAY*" "*PRINT-BASE*" "*PRINT-CASE*" "*PRINT-CIRCLE*" + "*PRINT-ESCAPE*" "*PRINT-GENSYM*" "*PRINT-LENGTH*" "*PRINT-LEVEL*" "*PRINT-LINES*" + "*PRINT-MISER-WIDTH*" "*PRINT-PPRINT-DISPATCH*" "*PRINT-PRETTY*" "*PRINT-RADIX*" + "*PRINT-READABLY*" "*PRINT-RIGHT-MARGIN*" "*QUERY-IO*" "*RANDOM-STATE*" "*READ-BASE*" + "*READ-DEFAULT-FLOAT-FORMAT*" "*READ-EVAL*" "*READ-SUPPRESS*" "*READTABLE*" + "*STANDARD-INPUT*" "*STANDARD-OUTPUT*" "*TERMINAL-IO*" "*TRACE-OUTPUT*" "+" "++" "+++" "-" + "/" "//" "///" "/=" "1+" "1-" "<" "<=" "=" ">" ">=" "ABORT" "ABS" "ACONS" "ACOS" "ACOSH" + "ADD-METHOD" "ADJOIN" "ADJUST-ARRAY" "ADJUSTABLE-ARRAY-P" "ALLOCATE-INSTANCE" + "ALPHA-CHAR-P" "ALPHANUMERICP" "AND" "APPEND" "APPLY" "APROPOS" "APROPOS-LIST" "AREF" + "ARITHMETIC-ERROR" "ARITHMETIC-ERROR-OPERANDS" "ARITHMETIC-ERROR-OPERATION" "ARRAY" + "ARRAY-DIMENSION" "ARRAY-DIMENSION-LIMIT" "ARRAY-DIMENSIONS" "ARRAY-DISPLACEMENT" + "ARRAY-ELEMENT-TYPE" "ARRAY-HAS-FILL-POINTER-P" "ARRAY-IN-BOUNDS-P" "ARRAY-RANK" + "ARRAY-RANK-LIMIT" "ARRAY-ROW-MAJOR-INDEX" "ARRAY-TOTAL-SIZE" "ARRAY-TOTAL-SIZE-LIMIT" + "ARRAYP" "ASH" "ASIN" "ASINH" "ASSERT" "ASSOC" "ASSOC-IF" "ASSOC-IF-NOT" "ATAN" "ATANH" + "ATOM" "BASE-CHAR" "BASE-STRING" "BIGNUM" "BIT" "BIT-AND" "BIT-ANDC1" "BIT-ANDC2" + "BIT-EQV" "BIT-IOR" "BIT-NAND" "BIT-NOR" "BIT-NOT" "BIT-ORC1" "BIT-ORC2" "BIT-VECTOR" + "BIT-VECTOR-P" "BIT-XOR" "BLOCK" "BOOLE" "BOOLE-1" "BOOLE-2" "BOOLE-AND" "BOOLE-ANDC1" + "BOOLE-ANDC2" "BOOLE-C1" "BOOLE-C2" "BOOLE-CLR" "BOOLE-EQV" "BOOLE-IOR" "BOOLE-NAND" + "BOOLE-NOR" "BOOLE-ORC1" "BOOLE-ORC2" "BOOLE-SET" "BOOLE-XOR" "BOOLEAN" "BOTH-CASE-P" + "BOUNDP" "BREAK" "BROADCAST-STREAM" "BROADCAST-STREAM-STREAMS" "BUILT-IN-CLASS" "BUTLAST" + "BYTE" "BYTE-POSITION" "BYTE-SIZE" "CAAAAR" "CAAADR" "CAAAR" "CAADAR" "CAADDR" "CAADR" + "CAAR" "CADAAR" "CADADR" "CADAR" "CADDAR" "CADDDR" "CADDR" "CADR" "CALL-ARGUMENTS-LIMIT" + "CALL-METHOD" "CALL-NEXT-METHOD" "CAR" "CASE" "CATCH" "CCASE" "CDAAAR" "CDAADR" "CDAAR" + "CDADAR" "CDADDR" "CDADR" "CDAR" "CDDAAR" "CDDADR" "CDDAR" "CDDDAR" "CDDDDR" "CDDDR" + "CDDR" "CDR" "CEILING" "CELL-ERROR" "CELL-ERROR-NAME" "CERROR" "CHANGE-CLASS" "CHAR" + "CHAR-CODE" "CHAR-CODE-LIMIT" "CHAR-DOWNCASE" "CHAR-EQUAL" "CHAR-GREATERP" "CHAR-INT" + "CHAR-LESSP" "CHAR-NAME" "CHAR-NOT-EQUAL" "CHAR-NOT-GREATERP" "CHAR-NOT-LESSP" + "CHAR-UPCASE" "CHAR/=" "CHAR<" "CHAR<=" "CHAR=" "CHAR>" "CHAR>=" "CHARACTER" "CHARACTERP" + "CHECK-TYPE" "CIS" "CLASS" "CLASS-NAME" "CLASS-OF" "CLEAR-INPUT" "CLEAR-OUTPUT" "CLOSE" + "CLRHASH" "CODE-CHAR" "COERCE" "COMPILATION-SPEED" "COMPILE" "COMPILE-FILE" + "COMPILE-FILE-PATHNAME" "COMPILED-FUNCTION" "COMPILED-FUNCTION-P" "COMPILER-MACRO" + "COMPILER-MACRO-FUNCTION" "COMPLEMENT" "COMPLEX" "COMPLEXP" "COMPUTE-APPLICABLE-METHODS" + "COMPUTE-RESTARTS" "CONCATENATE" "CONCATENATED-STREAM" "CONCATENATED-STREAM-STREAMS" + "COND" "CONDITION" "CONJUGATE" "CONS" "CONSP" "CONSTANTLY" "CONSTANTP" "CONTINUE" + "CONTROL-ERROR" "COPY-ALIST" "COPY-LIST" "COPY-PPRINT-DISPATCH" "COPY-READTABLE" + "COPY-SEQ" "COPY-STRUCTURE" "COPY-SYMBOL" "COPY-TREE" "COS" "COSH" "COUNT" "COUNT-IF" + "COUNT-IF-NOT" "CTYPECASE" "DEBUG" "DECF" "DECLAIM" "DECLARATION" "DECLARE" "DECODE-FLOAT" + "DECODE-UNIVERSAL-TIME" "DEFCLASS" "DEFCONSTANT" "DEFGENERIC" "DEFINE-COMPILER-MACRO" + "DEFINE-CONDITION" "DEFINE-METHOD-COMBINATION" "DEFINE-MODIFY-MACRO" + "DEFINE-SETF-EXPANDER" "DEFINE-SYMBOL-MACRO" "DEFMACRO" "DEFMETHOD" "DEFPACKAGE" + "DEFPARAMETER" "DEFSETF" "DEFSTRUCT" "DEFTYPE" "DEFUN" "DEFVAR" "DELETE" + "DELETE-DUPLICATES" "DELETE-FILE" "DELETE-IF" "DELETE-IF-NOT" "DELETE-PACKAGE" + "DENOMINATOR" "DEPOSIT-FIELD" "DESCRIBE" "DESCRIBE-OBJECT" "DESTRUCTURING-BIND" + "DIGIT-CHAR" "DIGIT-CHAR-P" "DIRECTORY" "DIRECTORY-NAMESTRING" "DISASSEMBLE" + "DIVISION-BY-ZERO" "DO" "DO*" "DO-ALL-SYMBOLS" "DO-EXTERNAL-SYMBOLS" "DO-SYMBOLS" + "DOCUMENTATION" "DOLIST" "DOTIMES" "DOUBLE-FLOAT" "DOUBLE-FLOAT-EPSILON" + "DOUBLE-FLOAT-NEGATIVE-EPSILON" "DPB" "DRIBBLE" "DYNAMIC-EXTENT" "ECASE" "ECHO-STREAM" + "ECHO-STREAM-INPUT-STREAM" "ECHO-STREAM-OUTPUT-STREAM" "ED" "EIGHTH" "ELT" + "ENCODE-UNIVERSAL-TIME" "END-OF-FILE" "ENDP" "ENOUGH-NAMESTRING" + "ENSURE-DIRECTORIES-EXIST" "ENSURE-GENERIC-FUNCTION" "EQ" "EQL" "EQUAL" "EQUALP" "ERROR" + "ETYPECASE" "EVAL" "EVAL-WHEN" "EVENP" "EVERY" "EXP" "EXPORT" "EXPT" "EXTENDED-CHAR" + "FBOUNDP" "FCEILING" "FDEFINITION" "FFLOOR" "FIFTH" "FILE-AUTHOR" "FILE-ERROR" + "FILE-ERROR-PATHNAME" "FILE-LENGTH" "FILE-NAMESTRING" "FILE-POSITION" "FILE-STREAM" + "FILE-STRING-LENGTH" "FILE-WRITE-DATE" "FILL" "FILL-POINTER" "FIND" "FIND-ALL-SYMBOLS" + "FIND-CLASS" "FIND-IF" "FIND-IF-NOT" "FIND-METHOD" "FIND-PACKAGE" "FIND-RESTART" + "FIND-SYMBOL" "FINISH-OUTPUT" "FIRST" "FIXNUM" "FLET" "FLOAT" "FLOAT-DIGITS" + "FLOAT-PRECISION" "FLOAT-RADIX" "FLOAT-SIGN" "FLOATING-POINT-INEXACT" + "FLOATING-POINT-INVALID-OPERATION" "FLOATING-POINT-OVERFLOW" "FLOATING-POINT-UNDERFLOW" + "FLOATP" "FLOOR" "FMAKUNBOUND" "FORCE-OUTPUT" "FORMAT" "FORMATTER" "FOURTH" "FRESH-LINE" + "FROUND" "FTRUNCATE" "FTYPE" "FUNCALL" "FUNCTION" "FUNCTION-KEYWORDS" + "FUNCTION-LAMBDA-EXPRESSION" "FUNCTIONP" "GCD" "GENERIC-FUNCTION" "GENSYM" "GENTEMP" "GET" + "GET-DECODED-TIME" "GET-DISPATCH-MACRO-CHARACTER" "GET-INTERNAL-REAL-TIME" + "GET-INTERNAL-RUN-TIME" "GET-MACRO-CHARACTER" "GET-OUTPUT-STREAM-STRING" "GET-PROPERTIES" + "GET-SETF-EXPANSION" "GET-UNIVERSAL-TIME" "GETF" "GETHASH" "GO" "GRAPHIC-CHAR-P" + "HANDLER-BIND" "HANDLER-CASE" "HASH-TABLE" "HASH-TABLE-COUNT" "HASH-TABLE-P" + "HASH-TABLE-REHASH-SIZE" "HASH-TABLE-REHASH-THRESHOLD" "HASH-TABLE-SIZE" "HASH-TABLE-TEST" + "HOST-NAMESTRING" "IDENTITY" "IF" "IGNORABLE" "IGNORE" "IGNORE-ERRORS" "IMAGPART" "IMPORT" + "IN-PACKAGE" "INCF" "INITIALIZE-INSTANCE" "INLINE" "INPUT-STREAM-P" "INSPECT" "INTEGER" + "INTEGER-DECODE-FLOAT" "INTEGER-LENGTH" "INTEGERP" "INTERACTIVE-STREAM-P" "INTERN" + "INTERNAL-TIME-UNITS-PER-SECOND" "INTERSECTION" "INVALID-METHOD-ERROR" "INVOKE-DEBUGGER" + "INVOKE-RESTART" "INVOKE-RESTART-INTERACTIVELY" "ISQRT" "KEYWORD" "KEYWORDP" "LABELS" + "LAMBDA" "LAMBDA-LIST-KEYWORDS" "LAMBDA-PARAMETERS-LIMIT" "LAST" "LCM" "LDB" "LDB-TEST" + "LDIFF" "LEAST-NEGATIVE-DOUBLE-FLOAT" "LEAST-NEGATIVE-LONG-FLOAT" + "LEAST-NEGATIVE-NORMALIZED-DOUBLE-FLOAT" "LEAST-NEGATIVE-NORMALIZED-LONG-FLOAT" + "LEAST-NEGATIVE-NORMALIZED-SHORT-FLOAT" "LEAST-NEGATIVE-NORMALIZED-SINGLE-FLOAT" + "LEAST-NEGATIVE-SHORT-FLOAT" "LEAST-NEGATIVE-SINGLE-FLOAT" "LEAST-POSITIVE-DOUBLE-FLOAT" + "LEAST-POSITIVE-LONG-FLOAT" "LEAST-POSITIVE-NORMALIZED-DOUBLE-FLOAT" + "LEAST-POSITIVE-NORMALIZED-LONG-FLOAT" "LEAST-POSITIVE-NORMALIZED-SHORT-FLOAT" + "LEAST-POSITIVE-NORMALIZED-SINGLE-FLOAT" "LEAST-POSITIVE-SHORT-FLOAT" + "LEAST-POSITIVE-SINGLE-FLOAT" "LENGTH" "LET" "LET*" "LISP-IMPLEMENTATION-TYPE" + "LISP-IMPLEMENTATION-VERSION" "LIST" "LIST*" "LIST-ALL-PACKAGES" "LIST-LENGTH" "LISTEN" + "LISTP" "LOAD" "LOAD-LOGICAL-PATHNAME-TRANSLATIONS" "LOAD-TIME-VALUE" "LOCALLY" "LOG" + "LOGAND" "LOGANDC1" "LOGANDC2" "LOGBITP" "LOGCOUNT" "LOGEQV" "LOGICAL-PATHNAME" + "LOGICAL-PATHNAME-TRANSLATIONS" "LOGIOR" "LOGNAND" "LOGNOR" "LOGNOT" "LOGORC1" "LOGORC2" + "LOGTEST" "LOGXOR" "LONG-FLOAT" "LONG-FLOAT-EPSILON" "LONG-FLOAT-NEGATIVE-EPSILON" + "LONG-SITE-NAME" "LOOP" "LOOP-FINISH" "LOWER-CASE-P" "MACHINE-INSTANCE" "MACHINE-TYPE" + "MACHINE-VERSION" "MACRO-FUNCTION" "MACROEXPAND" "MACROEXPAND-1" "MACROLET" "MAKE-ARRAY" + "MAKE-BROADCAST-STREAM" "MAKE-CONCATENATED-STREAM" "MAKE-CONDITION" + "MAKE-DISPATCH-MACRO-CHARACTER" "MAKE-ECHO-STREAM" "MAKE-HASH-TABLE" "MAKE-INSTANCE" + "MAKE-INSTANCES-OBSOLETE" "MAKE-LIST" "MAKE-LOAD-FORM" "MAKE-LOAD-FORM-SAVING-SLOTS" + "MAKE-METHOD" "MAKE-PACKAGE" "MAKE-PATHNAME" "MAKE-RANDOM-STATE" "MAKE-SEQUENCE" + "MAKE-STRING" "MAKE-STRING-INPUT-STREAM" "MAKE-STRING-OUTPUT-STREAM" "MAKE-SYMBOL" + "MAKE-SYNONYM-STREAM" "MAKE-TWO-WAY-STREAM" "MAKUNBOUND" "MAP" "MAP-INTO" "MAPC" "MAPCAN" + "MAPCAR" "MAPCON" "MAPHASH" "MAPL" "MAPLIST" "MASK-FIELD" "MAX" "MEMBER" "MEMBER-IF" + "MEMBER-IF-NOT" "MERGE" "MERGE-PATHNAMES" "METHOD" "METHOD-COMBINATION" + "METHOD-COMBINATION-ERROR" "METHOD-QUALIFIERS" "MIN" "MINUSP" "MISMATCH" "MOD" + "MOST-NEGATIVE-DOUBLE-FLOAT" "MOST-NEGATIVE-FIXNUM" "MOST-NEGATIVE-LONG-FLOAT" + "MOST-NEGATIVE-SHORT-FLOAT" "MOST-NEGATIVE-SINGLE-FLOAT" "MOST-POSITIVE-DOUBLE-FLOAT" + "MOST-POSITIVE-FIXNUM" "MOST-POSITIVE-LONG-FLOAT" "MOST-POSITIVE-SHORT-FLOAT" + "MOST-POSITIVE-SINGLE-FLOAT" "MUFFLE-WARNING" "MULTIPLE-VALUE-BIND" "MULTIPLE-VALUE-CALL" + "MULTIPLE-VALUE-LIST" "MULTIPLE-VALUE-PROG1" "MULTIPLE-VALUE-SETQ" "MULTIPLE-VALUES-LIMIT" + "NAME-CHAR" "NAMESTRING" "NBUTLAST" "NCONC" "NEXT-METHOD-P" "NIL" "NINTERSECTION" "NINTH" + "NO-APPLICABLE-METHOD" "NO-NEXT-METHOD" "NOT" "NOTANY" "NOTEVERY" "NOTINLINE" "NRECONC" + "NREVERSE" "NSET-DIFFERENCE" "NSET-EXCLUSIVE-OR" "NSTRING-CAPITALIZE" "NSTRING-DOWNCASE" + "NSTRING-UPCASE" "NSUBLIS" "NSUBST" "NSUBST-IF" "NSUBST-IF-NOT" "NSUBSTITUTE" + "NSUBSTITUTE-IF" "NSUBSTITUTE-IF-NOT" "NTH" "NTH-VALUE" "NTHCDR" "NULL" "NUMBER" "NUMBERP" + "NUMERATOR" "NUNION" "ODDP" "OPEN" "OPEN-STREAM-P" "OPTIMIZE" "OR" "OTHERWISE" + "OUTPUT-STREAM-P" "PACKAGE" "PACKAGE-ERROR" "PACKAGE-ERROR-PACKAGE" "PACKAGE-NAME" + "PACKAGE-NICKNAMES" "PACKAGE-SHADOWING-SYMBOLS" "PACKAGE-USE-LIST" "PACKAGE-USED-BY-LIST" + "PACKAGEP" "PAIRLIS" "PARSE-ERROR" "PARSE-INTEGER" "PARSE-NAMESTRING" "PATHNAME" + "PATHNAME-DEVICE" "PATHNAME-DIRECTORY" "PATHNAME-HOST" "PATHNAME-MATCH-P" "PATHNAME-NAME" + "PATHNAME-TYPE" "PATHNAME-VERSION" "PATHNAMEP" "PEEK-CHAR" "PHASE" "PI" "PLUSP" "POP" + "POSITION" "POSITION-IF" "POSITION-IF-NOT" "PPRINT" "PPRINT-DISPATCH" + "PPRINT-EXIT-IF-LIST-EXHAUSTED" "PPRINT-FILL" "PPRINT-INDENT" "PPRINT-LINEAR" + "PPRINT-LOGICAL-BLOCK" "PPRINT-NEWLINE" "PPRINT-POP" "PPRINT-TAB" "PPRINT-TABULAR" "PRIN1" + "PRIN1-TO-STRING" "PRINC" "PRINC-TO-STRING" "PRINT" "PRINT-NOT-READABLE" + "PRINT-NOT-READABLE-OBJECT" "PRINT-OBJECT" "PRINT-UNREADABLE-OBJECT" "PROBE-FILE" + "PROCLAIM" "PROG" "PROG*" "PROG1" "PROG2" "PROGN" "PROGRAM-ERROR" "PROGV" "PROVIDE" + "PSETF" "PSETQ" "PUSH" "PUSHNEW" "QUOTE" "RANDOM" "RANDOM-STATE" "RANDOM-STATE-P" "RASSOC" + "RASSOC-IF" "RASSOC-IF-NOT" "RATIO" "RATIONAL" "RATIONALIZE" "RATIONALP" "READ" + "READ-BYTE" "READ-CHAR" "READ-CHAR-NO-HANG" "READ-DELIMITED-LIST" "READ-FROM-STRING" + "READ-LINE" "READ-PRESERVING-WHITESPACE" "READ-SEQUENCE" "READER-ERROR" "READTABLE" + "READTABLE-CASE" "READTABLEP" "REAL" "REALP" "REALPART" "REDUCE" "REINITIALIZE-INSTANCE" + "REM" "REMF" "REMHASH" "REMOVE" "REMOVE-DUPLICATES" "REMOVE-IF" "REMOVE-IF-NOT" + "REMOVE-METHOD" "REMPROP" "RENAME-FILE" "RENAME-PACKAGE" "REPLACE" "REQUIRE" "REST" + "RESTART" "RESTART-BIND" "RESTART-CASE" "RESTART-NAME" "RETURN" "RETURN-FROM" "REVAPPEND" + "REVERSE" "ROOM" "ROTATEF" "ROUND" "ROW-MAJOR-AREF" "RPLACA" "RPLACD" "SAFETY" "SATISFIES" + "SBIT" "SCALE-FLOAT" "SCHAR" "SEARCH" "SECOND" "SEQUENCE" "SERIOUS-CONDITION" "SET" + "SET-DIFFERENCE" "SET-DISPATCH-MACRO-CHARACTER" "SET-EXCLUSIVE-OR" "SET-MACRO-CHARACTER" + "SET-PPRINT-DISPATCH" "SET-SYNTAX-FROM-CHAR" "SETF" "SETQ" "SEVENTH" "SHADOW" + "SHADOWING-IMPORT" "SHARED-INITIALIZE" "SHIFTF" "SHORT-FLOAT" "SHORT-FLOAT-EPSILON" + "SHORT-FLOAT-NEGATIVE-EPSILON" "SHORT-SITE-NAME" "SIGNAL" "SIGNED-BYTE" "SIGNUM" + "SIMPLE-ARRAY" "SIMPLE-BASE-STRING" "SIMPLE-BIT-VECTOR" "SIMPLE-BIT-VECTOR-P" + "SIMPLE-CONDITION" "SIMPLE-CONDITION-FORMAT-ARGUMENTS" "SIMPLE-CONDITION-FORMAT-CONTROL" + "SIMPLE-ERROR" "SIMPLE-STRING" "SIMPLE-STRING-P" "SIMPLE-TYPE-ERROR" "SIMPLE-VECTOR" + "SIMPLE-VECTOR-P" "SIMPLE-WARNING" "SIN" "SINGLE-FLOAT" "SINGLE-FLOAT-EPSILON" + "SINGLE-FLOAT-NEGATIVE-EPSILON" "SINH" "SIXTH" "SLEEP" "SLOT-BOUNDP" "SLOT-EXISTS-P" + "SLOT-MAKUNBOUND" "SLOT-MISSING" "SLOT-UNBOUND" "SLOT-VALUE" "SOFTWARE-TYPE" + "SOFTWARE-VERSION" "SOME" "SORT" "SPACE" "SPECIAL" "SPECIAL-OPERATOR-P" "SPEED" "SQRT" + "STABLE-SORT" "STANDARD" "STANDARD-CHAR" "STANDARD-CHAR-P" "STANDARD-CLASS" + "STANDARD-GENERIC-FUNCTION" "STANDARD-METHOD" "STANDARD-OBJECT" "STEP" "STORAGE-CONDITION" + "STORE-VALUE" "STREAM" "STREAM-ELEMENT-TYPE" "STREAM-ERROR" "STREAM-ERROR-STREAM" + "STREAM-EXTERNAL-FORMAT" "STREAMP" "STRING" "STRING-CAPITALIZE" "STRING-DOWNCASE" + "STRING-EQUAL" "STRING-GREATERP" "STRING-LEFT-TRIM" "STRING-LESSP" "STRING-NOT-EQUAL" + "STRING-NOT-GREATERP" "STRING-NOT-LESSP" "STRING-RIGHT-TRIM" "STRING-STREAM" "STRING-TRIM" + "STRING-UPCASE" "STRING/=" "STRING<" "STRING<=" "STRING=" "STRING>" "STRING>=" "STRINGP" + "STRUCTURE" "STRUCTURE-CLASS" "STRUCTURE-OBJECT" "STYLE-WARNING" "SUBLIS" "SUBSEQ" + "SUBSETP" "SUBST" "SUBST-IF" "SUBST-IF-NOT" "SUBSTITUTE" "SUBSTITUTE-IF" + "SUBSTITUTE-IF-NOT" "SUBTYPEP" "SVREF" "SXHASH" "SYMBOL" "SYMBOL-FUNCTION" + "SYMBOL-MACROLET" "SYMBOL-NAME" "SYMBOL-PACKAGE" "SYMBOL-PLIST" "SYMBOL-VALUE" "SYMBOLP" + "SYNONYM-STREAM" "SYNONYM-STREAM-SYMBOL" "T" "TAGBODY" "TAILP" "TAN" "TANH" "TENTH" + "TERPRI" "THE" "THIRD" "THROW" "TIME" "TRACE" "TRANSLATE-LOGICAL-PATHNAME" + "TRANSLATE-PATHNAME" "TREE-EQUAL" "TRUENAME" "TRUNCATE" "TWO-WAY-STREAM" + "TWO-WAY-STREAM-INPUT-STREAM" "TWO-WAY-STREAM-OUTPUT-STREAM" "TYPE" "TYPE-ERROR" + "TYPE-ERROR-DATUM" "TYPE-ERROR-EXPECTED-TYPE" "TYPE-OF" "TYPECASE" "TYPEP" "UNBOUND-SLOT" + "UNBOUND-SLOT-INSTANCE" "UNBOUND-VARIABLE" "UNDEFINED-FUNCTION" "UNEXPORT" "UNINTERN" + "UNION" "UNLESS" "UNREAD-CHAR" "UNSIGNED-BYTE" "UNTRACE" "UNUSE-PACKAGE" "UNWIND-PROTECT" + "UPDATE-INSTANCE-FOR-DIFFERENT-CLASS" "UPDATE-INSTANCE-FOR-REDEFINED-CLASS" + "UPGRADED-ARRAY-ELEMENT-TYPE" "UPGRADED-COMPLEX-PART-TYPE" "UPPER-CASE-P" "USE-PACKAGE" + "USE-VALUE" "USER-HOMEDIR-PATHNAME" "VALUES" "VALUES-LIST" "VARIABLE" "VECTOR" + "VECTOR-POP" "VECTOR-PUSH" "VECTOR-PUSH-EXTEND" "VECTORP" "WARN" "WARNING" "WHEN" + "WILD-PATHNAME-P" "WITH-ACCESSORS" "WITH-COMPILATION-UNIT" "WITH-CONDITION-RESTARTS" + "WITH-HASH-TABLE-ITERATOR" "WITH-INPUT-FROM-STRING" "WITH-OPEN-FILE" "WITH-OPEN-STREAM" + "WITH-OUTPUT-TO-STRING" "WITH-PACKAGE-ITERATOR" "WITH-SIMPLE-RESTART" "WITH-SLOTS" + "WITH-STANDARD-IO-SYNTAX" "WRITE" "WRITE-BYTE" "WRITE-CHAR" "WRITE-LINE" "WRITE-SEQUENCE" + "WRITE-STRING" "WRITE-TO-STRING" "Y-OR-N-P" "YES-OR-NO-P" "ZEROP")) + +(defvar *export-from-glisp* + '( + "DEFSUBST" + "G/MAKE-STRING" + "MP/MAKE-LOCK" + "MP/WITH-LOCK" + "WITH-TIMEOUT" + "OPEN-INET-SOCKET" + ;; util.lisp : + "ALWAYS" + "CL-BYTE-STREAM" + "CL-CHAR-STREAM" + "CL-STREAM" + "COMPOSE" + "CURRY" + "FALSE" + "FORCE" + "G/CLOSE" + "G/FINISH-OUTPUT" + "G/PEEK-CHAR" + "G/READ-BYTE" + "G/READ-BYTE-SEQUENCE" + "G/READ-CHAR" + "G/READ-CHAR-SEQUENCE" + "G/READ-LINE" + "G/READ-LINE*" + "G/UNREAD-BYTE" + "G/UNREAD-CHAR" + "G/WRITE-BYTE" + "G/WRITE-BYTE-SEQUENCE" + "G/WRITE-CHAR" + "G/WRITE-STRING" + "GSTREAM" + "MAP-ARRAY" + "MAPFCAR" + "MAX*" + "MAXF" + "MIN*" + "MINF" + "MULTIPLE-VALUE-OR" + "MULTIPLE-VALUE-SOME" + "NCONCF" + "NEQ" + "PROMISE" + "RCURRY" + "SANIFY-STRING" + "SHOW" + "SPLIT-BY" + "SPLIT-BY-IF" + "SPLIT-BY-MEMBER" + "SPLIT-STRING" + "STRING-BEGIN-EQUAL" + "TRUE" + "UNTIL" + "USE-BYTE-FOR-CHAR-STREAM-FLAVOUR" + "USE-CHAR-FOR-BYTE-STREAM-FLAVOUR" + "WHILE" + "WHITE-SPACE-P" + + "CL-BYTE-STREAM->GSTREAM" + "CL-CHAR-STREAM->GSTREAM" + "G/OPEN-INET-SOCKET" + "ACCEPT-CONNECTION" + + "FIND-TEMPORARY-FILE" + "DELETE-TEMPORARY-FILE" + "WITH-TEMPORARY-FILE" + + "SET-EQUAL" + "MAYBE-PARSE-INTEGER" + "NOP" + "WITH-STRUCTURE-SLOTS" + + "COMPILE-FUNCALL" + "FUNCALL*" + "MAPC*" + "VREDUCE*" + "LREDUCE*" + "WITH-UNIQUE-NAMES" + + ;; runes.lisp + "RUNE" + "ROD" + "SIMPLE-ROD" + "%RUNE" + "ROD-CAPITALIZE" + "CODE-RUNE" + "RUNE-CODE" + "RUNE-DOWNCASE" + "RUNE-UPCASE" + "ROD-DOWNCASE" + "ROD-UPCASE" + "WHITE-SPACE-RUNE-P" + "DIGIT-RUNE-P" + "RUNE=" + "RUNE<=" + "RUNE>=" + "RUNE-EQUAL" + "RUNEP" + "SLOOPY-ROD-P" + "ROD=" + "ROD-EQUAL" + "MAKE-ROD" + "CHAR-RUNE" + "RUNE-CHAR" + "ROD-STRING" + "STRING-ROD" + "ROD-SUBSEQ" + + "G/MAKE-HASH-TABLE" + "G/HASHGET" + "G/CLRHASH" + "STIR-HASH-CODES" + "HASH-SEQUENCE" + "HASH/STRING-EQUAL" + "MAKE-STRING-EQUAL-HASH-TABLE" + + "PRIMEP" + + ;; match.lisp + "DEFINE-MATCH-MACRO" + "IF-MATCH" + "GSTREAM-AS-STRING" + )) + +(defparameter *packages* + #-GCL '(:common-lisp) + #+GCL '(:lisp :pcl) ) + +(defparameter *dep-id* + #+CLISP "clisp" + #+(AND :CMU (NOT :PTHREAD)) "cmucl" + #+(AND :CMU :PTHREAD) "cmucl-dtc" + #+(AND ALLEGRO ALLEGRO-V5.0) "acl5" + #+(AND ALLEGRO (NOT ALLEGRO-V5.0)) "acl" + #+GCL "gcl" + #-(OR CLISP CMU ALLEGRO GCL) + #.(error "Configure!")) + +;; all symbols, which are defined by gray streams + +(defparameter *gray-symbols* + '("FUNDAMENTAL-STREAM" + "FUNDAMENTAL-INPUT-STREAM" + "FUNDAMENTAL-OUTPUT-STREAM" + "FUNDAMENTAL-CHARACTER-STREAM" + "FUNDAMENTAL-BINARY-STREAM" + "FUNDAMENTAL-CHARACTER-INPUT-STREAM" + "FUNDAMENTAL-CHARACTER-OUTPUT-STREAM" + "FUNDAMENTAL-BINARY-INPUT-STREAM" + + "STREAM-READ-CHAR" + "STREAM-UNREAD-CHAR" + "STREAM-READ-CHAR-NO-HANG" + "STREAM-PEEK-CHAR" + "STREAM-LISTEN" + "STREAM-READ-LINE" + "STREAM-CLEAR-INPUT" + + "STREAM-WRITE-CHAR" + "STREAM-LINE-COLUMN" + "STREAM-START-LINE-P" + "STREAM-WRITE-STRING" + "STREAM-TERPRI" + "STREAM-FRESH-LINE" + "STREAM-FINISH-OUTPUT" + "STREAM-FORCE-OUTPUT" + "STREAM-ADVANCE-TO-COLUMN" + "STREAM-CLEAR-INPUT" + + "STREAM-READ-BYTE" + "STREAM-WRITE-BYTE" )) + +(defparameter *gray-packages* + #+:CLISP '(:lisp) + #+:CMU '(:ext) + #+:ALLEGRO '(:excl) + #+:HARLEQUIN-COMMON-LISP '(:stream) + ) + +(defun seek-symbol (name packages) + ;; Seek the a symbol named 'name' in `packages' + (or (some #'(lambda (p) + (multiple-value-bind (sym res) (find-symbol name p) + (if (eql res :external) + (list sym) + nil))) + packages) + (progn (format T "~&There is no ~A." name) + (finish-output) + nil))) + +(defun dump-defpackage (sink) + (format sink ";; AUTOMATICALLY CREATED -- DO NOT EDIT") + (format sink "~%;; Lisp Implementation Type: ~A" (lisp-implementation-type)) + (format sink "~%;; Lisp Implementation Version: ~A" (lisp-implementation-version)) + (format sink "~%") + (let ((*print-case* :downcase) + (export-ansi nil) + (export-gray nil)) + (format sink "~%(in-package :~A)" (package-name *package*)) + (format sink "~%") + (format sink "~%(defpackage :glisp") + (format sink "~% (:use)") + (labels ((grok (symbols packages) + (let ((res nil)) + (dolist (nam symbols) + (let ((sym (seek-symbol nam packages))) + (when sym + (push (car sym) res) + (cond ((multiple-value-bind (sym2 res) (find-symbol nam :glisp) + (and sym2 (eq res :external))) + (format sink "~% ;; ~S patched" (car sym)) ) + (t + (setf sym (car sym)) + ;; CLISP has no (:import ..) ARG! + (format sink "~% (:import-from :~A #:~A)" + (package-name (symbol-package sym)) + (symbol-name sym))))))) + res))) + (setf export-ansi (grok *all-ansi-symbols* *packages*)) + (setf export-gray (grok *gray-symbols* *gray-packages*))) + (format sink "~%") + (format sink "~% ;; -- Export ------------------------------") + (format sink "~%") + (format sink "~% (:export") + (format sink "~% ;; ********** ANSI-CL") + (dolist (k (reverse export-ansi)) + (format sink "~% #:~(~A~)" k)) + (format sink "~% ;; ********** Gray Streams") + (dolist (k (reverse export-gray)) + (format sink "~% #:~(~A~)" k)) + (format sink "~%~% ;; ********** Private stuff") + (dolist (k *export-from-glisp*) + (format sink "~% #:~(~A~)" k)) + (format sink "))") + (format sink "~%") + (format sink "~%(defpackage :gluser (:use :glisp))") + (format sink "~%") ) + (terpri sink)) + +(defun run () + (make-package :glisp :use ()) + (load (format nil "dep-~A.lisp" *dep-id*)) + (with-open-file (sink (format nil "dfpck-~A.lisp" *dep-id*) :direction :output :if-exists :new-version) + (dump-defpackage sink))) diff --git a/glisp/match.lisp b/glisp/match.lisp new file mode 100644 index 0000000..1bdc712 --- /dev/null +++ b/glisp/match.lisp @@ -0,0 +1,207 @@ +;;; -*- Mode: Lisp; Syntax: Common-Lisp; Package: GLISP; -*- +;;; --------------------------------------------------------------------------- +;;; Title: Very simple (non-deterministic) regular expression matching +;;; Created: 1999-01-21 +;;; Author: Gilbert Baumann +;;; License: LGPL (See file COPYING for details). +;;; --------------------------------------------------------------------------- +;;; (c) copyright 1999 by Gilbert Baumann + +;;; This library is free software; you can redistribute it and/or +;;; modify it under the terms of the GNU Library General Public +;;; License as published by the Free Software Foundation; either +;;; version 2 of the License, or (at your option) any later version. +;;; +;;; This library is distributed in the hope that it will be useful, +;;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;;; Library General Public License for more details. +;;; +;;; You should have received a copy of the GNU Library General Public +;;; License along with this library; if not, write to the +;;; Free Software Foundation, Inc., 59 Temple Place - Suite 330, +;;; Boston, MA 02111-1307 USA. + +(in-package :GLISP) + +;; Syntax +;; ------ + +;; atom -- match the atom +;; (p predicate) -- match, iff (funcall p elt) is non-NIL +;; (& a0 .. an) -- match a0a1..an +;; (/ a0 .. an) -- match a0 or a1 ... or an +;; (* a0 .. an) -- iteration, match any number of (& a0 ... an) +;; (+ . rest) == (/ (& . rest) (* . rest)) +;; (? . rest) == (/ (& . rest) (&)) +;; (= var subexpr) == assign the subexpr to the match variable 'var' +;; +;; not implemented: +;; (- a b) -- match a, but not b +;; (and a b) -- matches if a and b matches +;; (or a b) == (/ a b) +;; (not x) == matches if x does not match +;; + +;; This syntax has to be merged with clex as well. + +(defvar *match-macros* (make-hash-table :test #'eq)) + +(defmacro define-match-macro (name args &body body) + `(eval-when (compile load eval) + (setf (gethash ',name *match-macros*) + #'(lambda (whole) + (destructuring-bind ,args (cdr whole) + ,@body))) + ',name)) + +(defun symcat (&rest syms) + (let ((pack (dolist (k syms nil) + (when (symbolp k) + (return (symbol-package k)))))) + (cond ((null pack) + (error "No package for ~S of ~S." 'symcat syms)) + (t + (intern (apply #'concatenate 'string (mapcar #'string syms)) + pack))))) + +(defun sym-equal (a b) + (string= (symbol-name a) (symbol-name b))) + +(defun bau-funcall (fun &rest args) + (cond ((and (consp fun) (eq (car fun) 'lambda)) + (cons fun args)) + ((and (consp fun) (eq (car fun) 'function)) + (cons (cadr fun) args)) + (t + (list* 'funcall fun args)))) + +(defun compile-srx (srx action &key (string-type 'vector) (test '#'eql)) + (let ((vars nil)) + (labels ((cmp (x cont-expr) + (cond + ((atom x) + (with-unique-names (string start end) + `(lambda (,string ,start ,end) + (declare (type fixnum ,start ,end) + (type ,string-type ,string)) + (if (and (< ,start ,end) + ,(bau-funcall test `(aref ,string ,start) `',x)) + ,(bau-funcall cont-expr string `(the fixnum (1+ ,start)) end))))) + + ((sym-equal (car x) 'p) + (destructuring-bind (p) (cdr x) + (with-unique-names (string start end) + `(lambda (,string ,start ,end) + (declare (type fixnum ,start ,end) + (type ,string-type ,string)) + (if (and (< ,start ,end) + ,(bau-funcall p `(aref ,string ,start))) + ,(bau-funcall cont-expr string `(the fixnum (1+ ,start)) end)))))) + + ((sym-equal (car x) '/) + (with-unique-names (ccfn string string2 start end end2 j) + `(lambda (,string ,start ,end) + (declare (type fixnum ,start ,end) + (type ,string-type ,string)) + (labels ((,ccfn (,string2 ,j ,end2) + (declare (type fixnum ,j ,end2) + (type ,string-type ,string2)) + ,(bau-funcall cont-expr string2 j end2))) + ,@(mapcar (lambda (a) + `(,(cmp a `#',ccfn) ,string ,start ,end)) + (cdr x)))))) + + ((sym-equal (car x) '*) + (with-unique-names (ccfn string string2 start end end2 j) + (let ((subexpr (cons '& (cdr x)))) + `(lambda (,string ,start ,end) + (declare (type fixnum ,start ,end) + (type ,string-type ,string)) + (labels ((,ccfn (,string2 ,j ,end2) + (declare (type fixnum ,j ,end2) + (type ,string-type ,string2)) + (,(cmp subexpr `#',ccfn) ,string2 ,j ,end2) + ,(bau-funcall cont-expr string j end))) + (,ccfn ,string ,start ,end)))))) + + ((sym-equal (car x) '&) + (case (length x) + (1 (with-unique-names (string start end) + `(lambda (,string ,start ,end) + (declare (type fixnum ,start ,end) + (type ,string-type ,string)) + ,(bau-funcall cont-expr string start end)))) + (2 (cmp (cadr x) cont-expr)) + (otherwise + (with-unique-names (string start end) + `(lambda (,string ,start ,end) + (declare (type fixnum ,start ,end) + (type ,string-type ,string)) + (,(cmp (cadr x) + (with-unique-names (string j end) + `#'(lambda (,string ,j ,end) + (declare (type fixnum ,j ,end) + (type ,string-type ,string)) + (,(cmp (cons '& (cddr x)) cont-expr) ,string ,j ,end)))) + ,string ,start ,end)))))) + + ((sym-equal (car x) '=) + (destructuring-bind (var subexpr) (cdr x) + (pushnew var vars) + (with-unique-names (string i0 end) + `(lambda (,string ,i0 ,end) + (declare (type fixnum ,i0 ,end) + (type ,string-type ,string)) + (,(cmp subexpr + (with-unique-names (string i1 end) + `#'(lambda (,string ,i1 ,end) + (declare (type fixnum ,i1 ,end) + (type ,string-type ,string)) + (setf ,(symcat var "-START") ,i0 + ,(symcat var "-END") ,i1) + ,(bau-funcall cont-expr string i1 end)))) + ,string ,i0 ,end))))) + + ((sym-equal (car x) '+) + (cmp `(& ,@(cdr x) (* ,@(cdr x))) cont-expr)) + + ((sym-equal (car x) '?) + (cmp `(/ (&) (& ,@(cdr x))) cont-expr)) + + (t + (let ((mmf (gethash (car x) *match-macros*))) + (cond (mmf + (cmp (funcall mmf x) cont-expr)) + (t + (error "Unknown symbolic regular expression: ~S." x))))) ))) + + (with-unique-names (string start end continuation match) + (let ((cf (cmp srx `#',continuation))) + `(lambda (,string ,start ,end) + (declare ;;#.cl-user:+optimize-very-fast+ + (type fixnum ,start ,end) + (type ,string-type ,string)) + (block ,match + (let ,(mapcan (lambda (var) (list (symcat var "-START") (symcat var "-END"))) vars) + (labels (,(with-unique-names (string j end) + `(,continuation (,string ,j ,end) + (declare (type fixnum ,j ,end) + (type ,string-type ,string)) + (declare (ignore ,string)) + (if (= ,j ,end) + (let () + (return-from ,match ,action)))))) + (,cf ,string ,start ,end))) + nil))))))) + +(defmacro if-match ((string &key start end type (test '#'eql)) srx &body actions) + (let ((str (gensym "str"))) + `(let ((,str ,string)) + (,(compile-srx srx `(progn .,actions) + :string-type (or type 'vector) + :test test) + ,str + ,(if start start 0) + ,(if end end `(length ,str)))))) + diff --git a/glisp/package.lisp b/glisp/package.lisp new file mode 100644 index 0000000..902233f --- /dev/null +++ b/glisp/package.lisp @@ -0,0 +1,406 @@ +;;; -*- Mode: Lisp; Syntax: Common-Lisp; Package: GLISP-TEMP; -*- +;;; --------------------------------------------------------------------------- +;;; Title: Generating a sane DEFPACKAGE for GLISP +;;; Created: 1999-05-25 +;;; Author: Gilbert Baumann +;;; --------------------------------------------------------------------------- +;;; (c) copyright 1999,2000 by Gilbert Baumann + +(defpackage :glisp-temp (:use #:cl)) +(in-package :glisp-temp) + +(defpackage :glisp (:use)) + +(eval-when (compile) + (defparameter *all-ansi-symbols* + '("&ALLOW-OTHER-KEYS" "&AUX" "&BODY" "&ENVIRONMENT" "&KEY" "&OPTIONAL" "&REST" "&WHOLE" "*" + "**" "***" "*BREAK-ON-SIGNALS*" "*COMPILE-FILE-PATHNAME*" "*COMPILE-FILE-TRUENAME*" + "*COMPILE-PRINT*" "*COMPILE-VERBOSE*" "*DEBUG-IO*" "*DEBUGGER-HOOK*" + "*DEFAULT-PATHNAME-DEFAULTS*" "*ERROR-OUTPUT*" "*FEATURES*" "*GENSYM-COUNTER*" + "*LOAD-PATHNAME*" "*LOAD-PRINT*" "*LOAD-TRUENAME*" "*LOAD-VERBOSE*" "*MACROEXPAND-HOOK*" + "*MODULES*" "*PACKAGE*" "*PRINT-ARRAY*" "*PRINT-BASE*" "*PRINT-CASE*" "*PRINT-CIRCLE*" + "*PRINT-ESCAPE*" "*PRINT-GENSYM*" "*PRINT-LENGTH*" "*PRINT-LEVEL*" "*PRINT-LINES*" + "*PRINT-MISER-WIDTH*" "*PRINT-PPRINT-DISPATCH*" "*PRINT-PRETTY*" "*PRINT-RADIX*" + "*PRINT-READABLY*" "*PRINT-RIGHT-MARGIN*" "*QUERY-IO*" "*RANDOM-STATE*" "*READ-BASE*" + "*READ-DEFAULT-FLOAT-FORMAT*" "*READ-EVAL*" "*READ-SUPPRESS*" "*READTABLE*" + "*STANDARD-INPUT*" "*STANDARD-OUTPUT*" "*TERMINAL-IO*" "*TRACE-OUTPUT*" "+" "++" "+++" "-" + "/" "//" "///" "/=" "1+" "1-" "<" "<=" "=" ">" ">=" "ABORT" "ABS" "ACONS" "ACOS" "ACOSH" + "ADD-METHOD" "ADJOIN" "ADJUST-ARRAY" "ADJUSTABLE-ARRAY-P" "ALLOCATE-INSTANCE" + "ALPHA-CHAR-P" "ALPHANUMERICP" "AND" "APPEND" "APPLY" "APROPOS" "APROPOS-LIST" "AREF" + "ARITHMETIC-ERROR" "ARITHMETIC-ERROR-OPERANDS" "ARITHMETIC-ERROR-OPERATION" "ARRAY" + "ARRAY-DIMENSION" "ARRAY-DIMENSION-LIMIT" "ARRAY-DIMENSIONS" "ARRAY-DISPLACEMENT" + "ARRAY-ELEMENT-TYPE" "ARRAY-HAS-FILL-POINTER-P" "ARRAY-IN-BOUNDS-P" "ARRAY-RANK" + "ARRAY-RANK-LIMIT" "ARRAY-ROW-MAJOR-INDEX" "ARRAY-TOTAL-SIZE" "ARRAY-TOTAL-SIZE-LIMIT" + "ARRAYP" "ASH" "ASIN" "ASINH" "ASSERT" "ASSOC" "ASSOC-IF" "ASSOC-IF-NOT" "ATAN" "ATANH" + "ATOM" "BASE-CHAR" "BASE-STRING" "BIGNUM" "BIT" "BIT-AND" "BIT-ANDC1" "BIT-ANDC2" + "BIT-EQV" "BIT-IOR" "BIT-NAND" "BIT-NOR" "BIT-NOT" "BIT-ORC1" "BIT-ORC2" "BIT-VECTOR" + "BIT-VECTOR-P" "BIT-XOR" "BLOCK" "BOOLE" "BOOLE-1" "BOOLE-2" "BOOLE-AND" "BOOLE-ANDC1" + "BOOLE-ANDC2" "BOOLE-C1" "BOOLE-C2" "BOOLE-CLR" "BOOLE-EQV" "BOOLE-IOR" "BOOLE-NAND" + "BOOLE-NOR" "BOOLE-ORC1" "BOOLE-ORC2" "BOOLE-SET" "BOOLE-XOR" "BOOLEAN" "BOTH-CASE-P" + "BOUNDP" "BREAK" "BROADCAST-STREAM" "BROADCAST-STREAM-STREAMS" "BUILT-IN-CLASS" "BUTLAST" + "BYTE" "BYTE-POSITION" "BYTE-SIZE" "CAAAAR" "CAAADR" "CAAAR" "CAADAR" "CAADDR" "CAADR" + "CAAR" "CADAAR" "CADADR" "CADAR" "CADDAR" "CADDDR" "CADDR" "CADR" "CALL-ARGUMENTS-LIMIT" + "CALL-METHOD" "CALL-NEXT-METHOD" "CAR" "CASE" "CATCH" "CCASE" "CDAAAR" "CDAADR" "CDAAR" + "CDADAR" "CDADDR" "CDADR" "CDAR" "CDDAAR" "CDDADR" "CDDAR" "CDDDAR" "CDDDDR" "CDDDR" + "CDDR" "CDR" "CEILING" "CELL-ERROR" "CELL-ERROR-NAME" "CERROR" "CHANGE-CLASS" "CHAR" + "CHAR-CODE" "CHAR-CODE-LIMIT" "CHAR-DOWNCASE" "CHAR-EQUAL" "CHAR-GREATERP" "CHAR-INT" + "CHAR-LESSP" "CHAR-NAME" "CHAR-NOT-EQUAL" "CHAR-NOT-GREATERP" "CHAR-NOT-LESSP" + "CHAR-UPCASE" "CHAR/=" "CHAR<" "CHAR<=" "CHAR=" "CHAR>" "CHAR>=" "CHARACTER" "CHARACTERP" + "CHECK-TYPE" "CIS" "CLASS" "CLASS-NAME" "CLASS-OF" "CLEAR-INPUT" "CLEAR-OUTPUT" "CLOSE" + "CLRHASH" "CODE-CHAR" "COERCE" "COMPILATION-SPEED" "COMPILE" "COMPILE-FILE" + "COMPILE-FILE-PATHNAME" "COMPILED-FUNCTION" "COMPILED-FUNCTION-P" "COMPILER-MACRO" + "COMPILER-MACRO-FUNCTION" "COMPLEMENT" "COMPLEX" "COMPLEXP" "COMPUTE-APPLICABLE-METHODS" + "COMPUTE-RESTARTS" "CONCATENATE" "CONCATENATED-STREAM" "CONCATENATED-STREAM-STREAMS" + "COND" "CONDITION" "CONJUGATE" "CONS" "CONSP" "CONSTANTLY" "CONSTANTP" "CONTINUE" + "CONTROL-ERROR" "COPY-ALIST" "COPY-LIST" "COPY-PPRINT-DISPATCH" "COPY-READTABLE" + "COPY-SEQ" "COPY-STRUCTURE" "COPY-SYMBOL" "COPY-TREE" "COS" "COSH" "COUNT" "COUNT-IF" + "COUNT-IF-NOT" "CTYPECASE" "DEBUG" "DECF" "DECLAIM" "DECLARATION" "DECLARE" "DECODE-FLOAT" + "DECODE-UNIVERSAL-TIME" "DEFCLASS" "DEFCONSTANT" "DEFGENERIC" "DEFINE-COMPILER-MACRO" + "DEFINE-CONDITION" "DEFINE-METHOD-COMBINATION" "DEFINE-MODIFY-MACRO" + "DEFINE-SETF-EXPANDER" "DEFINE-SYMBOL-MACRO" "DEFMACRO" "DEFMETHOD" "DEFPACKAGE" + "DEFPARAMETER" "DEFSETF" "DEFSTRUCT" "DEFTYPE" "DEFUN" "DEFVAR" "DELETE" + "DELETE-DUPLICATES" "DELETE-FILE" "DELETE-IF" "DELETE-IF-NOT" "DELETE-PACKAGE" + "DENOMINATOR" "DEPOSIT-FIELD" "DESCRIBE" "DESCRIBE-OBJECT" "DESTRUCTURING-BIND" + "DIGIT-CHAR" "DIGIT-CHAR-P" "DIRECTORY" "DIRECTORY-NAMESTRING" "DISASSEMBLE" + "DIVISION-BY-ZERO" "DO" "DO*" "DO-ALL-SYMBOLS" "DO-EXTERNAL-SYMBOLS" "DO-SYMBOLS" + "DOCUMENTATION" "DOLIST" "DOTIMES" "DOUBLE-FLOAT" "DOUBLE-FLOAT-EPSILON" + "DOUBLE-FLOAT-NEGATIVE-EPSILON" "DPB" "DRIBBLE" "DYNAMIC-EXTENT" "ECASE" "ECHO-STREAM" + "ECHO-STREAM-INPUT-STREAM" "ECHO-STREAM-OUTPUT-STREAM" "ED" "EIGHTH" "ELT" + "ENCODE-UNIVERSAL-TIME" "END-OF-FILE" "ENDP" "ENOUGH-NAMESTRING" + "ENSURE-DIRECTORIES-EXIST" "ENSURE-GENERIC-FUNCTION" "EQ" "EQL" "EQUAL" "EQUALP" "ERROR" + "ETYPECASE" "EVAL" "EVAL-WHEN" "EVENP" "EVERY" "EXP" "EXPORT" "EXPT" "EXTENDED-CHAR" + "FBOUNDP" "FCEILING" "FDEFINITION" "FFLOOR" "FIFTH" "FILE-AUTHOR" "FILE-ERROR" + "FILE-ERROR-PATHNAME" "FILE-LENGTH" "FILE-NAMESTRING" "FILE-POSITION" "FILE-STREAM" + "FILE-STRING-LENGTH" "FILE-WRITE-DATE" "FILL" "FILL-POINTER" "FIND" "FIND-ALL-SYMBOLS" + "FIND-CLASS" "FIND-IF" "FIND-IF-NOT" "FIND-METHOD" "FIND-PACKAGE" "FIND-RESTART" + "FIND-SYMBOL" "FINISH-OUTPUT" "FIRST" "FIXNUM" "FLET" "FLOAT" "FLOAT-DIGITS" + "FLOAT-PRECISION" "FLOAT-RADIX" "FLOAT-SIGN" "FLOATING-POINT-INEXACT" + "FLOATING-POINT-INVALID-OPERATION" "FLOATING-POINT-OVERFLOW" "FLOATING-POINT-UNDERFLOW" + "FLOATP" "FLOOR" "FMAKUNBOUND" "FORCE-OUTPUT" "FORMAT" "FORMATTER" "FOURTH" "FRESH-LINE" + "FROUND" "FTRUNCATE" "FTYPE" "FUNCALL" "FUNCTION" "FUNCTION-KEYWORDS" + "FUNCTION-LAMBDA-EXPRESSION" "FUNCTIONP" "GCD" "GENERIC-FUNCTION" "GENSYM" "GENTEMP" "GET" + "GET-DECODED-TIME" "GET-DISPATCH-MACRO-CHARACTER" "GET-INTERNAL-REAL-TIME" + "GET-INTERNAL-RUN-TIME" "GET-MACRO-CHARACTER" "GET-OUTPUT-STREAM-STRING" "GET-PROPERTIES" + "GET-SETF-EXPANSION" "GET-UNIVERSAL-TIME" "GETF" "GETHASH" "GO" "GRAPHIC-CHAR-P" + "HANDLER-BIND" "HANDLER-CASE" "HASH-TABLE" "HASH-TABLE-COUNT" "HASH-TABLE-P" + "HASH-TABLE-REHASH-SIZE" "HASH-TABLE-REHASH-THRESHOLD" "HASH-TABLE-SIZE" "HASH-TABLE-TEST" + "HOST-NAMESTRING" "IDENTITY" "IF" "IGNORABLE" "IGNORE" "IGNORE-ERRORS" "IMAGPART" "IMPORT" + "IN-PACKAGE" "INCF" "INITIALIZE-INSTANCE" "INLINE" "INPUT-STREAM-P" "INSPECT" "INTEGER" + "INTEGER-DECODE-FLOAT" "INTEGER-LENGTH" "INTEGERP" "INTERACTIVE-STREAM-P" "INTERN" + "INTERNAL-TIME-UNITS-PER-SECOND" "INTERSECTION" "INVALID-METHOD-ERROR" "INVOKE-DEBUGGER" + "INVOKE-RESTART" "INVOKE-RESTART-INTERACTIVELY" "ISQRT" "KEYWORD" "KEYWORDP" "LABELS" + "LAMBDA" "LAMBDA-LIST-KEYWORDS" "LAMBDA-PARAMETERS-LIMIT" "LAST" "LCM" "LDB" "LDB-TEST" + "LDIFF" "LEAST-NEGATIVE-DOUBLE-FLOAT" "LEAST-NEGATIVE-LONG-FLOAT" + "LEAST-NEGATIVE-NORMALIZED-DOUBLE-FLOAT" "LEAST-NEGATIVE-NORMALIZED-LONG-FLOAT" + "LEAST-NEGATIVE-NORMALIZED-SHORT-FLOAT" "LEAST-NEGATIVE-NORMALIZED-SINGLE-FLOAT" + "LEAST-NEGATIVE-SHORT-FLOAT" "LEAST-NEGATIVE-SINGLE-FLOAT" "LEAST-POSITIVE-DOUBLE-FLOAT" + "LEAST-POSITIVE-LONG-FLOAT" "LEAST-POSITIVE-NORMALIZED-DOUBLE-FLOAT" + "LEAST-POSITIVE-NORMALIZED-LONG-FLOAT" "LEAST-POSITIVE-NORMALIZED-SHORT-FLOAT" + "LEAST-POSITIVE-NORMALIZED-SINGLE-FLOAT" "LEAST-POSITIVE-SHORT-FLOAT" + "LEAST-POSITIVE-SINGLE-FLOAT" "LENGTH" "LET" "LET*" "LISP-IMPLEMENTATION-TYPE" + "LISP-IMPLEMENTATION-VERSION" "LIST" "LIST*" "LIST-ALL-PACKAGES" "LIST-LENGTH" "LISTEN" + "LISTP" "LOAD" "LOAD-LOGICAL-PATHNAME-TRANSLATIONS" "LOAD-TIME-VALUE" "LOCALLY" "LOG" + "LOGAND" "LOGANDC1" "LOGANDC2" "LOGBITP" "LOGCOUNT" "LOGEQV" "LOGICAL-PATHNAME" + "LOGICAL-PATHNAME-TRANSLATIONS" "LOGIOR" "LOGNAND" "LOGNOR" "LOGNOT" "LOGORC1" "LOGORC2" + "LOGTEST" "LOGXOR" "LONG-FLOAT" "LONG-FLOAT-EPSILON" "LONG-FLOAT-NEGATIVE-EPSILON" + "LONG-SITE-NAME" "LOOP" "LOOP-FINISH" "LOWER-CASE-P" "MACHINE-INSTANCE" "MACHINE-TYPE" + "MACHINE-VERSION" "MACRO-FUNCTION" "MACROEXPAND" "MACROEXPAND-1" "MACROLET" "MAKE-ARRAY" + "MAKE-BROADCAST-STREAM" "MAKE-CONCATENATED-STREAM" "MAKE-CONDITION" + "MAKE-DISPATCH-MACRO-CHARACTER" "MAKE-ECHO-STREAM" "MAKE-HASH-TABLE" "MAKE-INSTANCE" + "MAKE-INSTANCES-OBSOLETE" "MAKE-LIST" "MAKE-LOAD-FORM" "MAKE-LOAD-FORM-SAVING-SLOTS" + "MAKE-METHOD" "MAKE-PACKAGE" "MAKE-PATHNAME" "MAKE-RANDOM-STATE" "MAKE-SEQUENCE" + "MAKE-STRING" "MAKE-STRING-INPUT-STREAM" "MAKE-STRING-OUTPUT-STREAM" "MAKE-SYMBOL" + "MAKE-SYNONYM-STREAM" "MAKE-TWO-WAY-STREAM" "MAKUNBOUND" "MAP" "MAP-INTO" "MAPC" "MAPCAN" + "MAPCAR" "MAPCON" "MAPHASH" "MAPL" "MAPLIST" "MASK-FIELD" "MAX" "MEMBER" "MEMBER-IF" + "MEMBER-IF-NOT" "MERGE" "MERGE-PATHNAMES" "METHOD" "METHOD-COMBINATION" + "METHOD-COMBINATION-ERROR" "METHOD-QUALIFIERS" "MIN" "MINUSP" "MISMATCH" "MOD" + "MOST-NEGATIVE-DOUBLE-FLOAT" "MOST-NEGATIVE-FIXNUM" "MOST-NEGATIVE-LONG-FLOAT" + "MOST-NEGATIVE-SHORT-FLOAT" "MOST-NEGATIVE-SINGLE-FLOAT" "MOST-POSITIVE-DOUBLE-FLOAT" + "MOST-POSITIVE-FIXNUM" "MOST-POSITIVE-LONG-FLOAT" "MOST-POSITIVE-SHORT-FLOAT" + "MOST-POSITIVE-SINGLE-FLOAT" "MUFFLE-WARNING" "MULTIPLE-VALUE-BIND" "MULTIPLE-VALUE-CALL" + "MULTIPLE-VALUE-LIST" "MULTIPLE-VALUE-PROG1" "MULTIPLE-VALUE-SETQ" "MULTIPLE-VALUES-LIMIT" + "NAME-CHAR" "NAMESTRING" "NBUTLAST" "NCONC" "NEXT-METHOD-P" "NIL" "NINTERSECTION" "NINTH" + "NO-APPLICABLE-METHOD" "NO-NEXT-METHOD" "NOT" "NOTANY" "NOTEVERY" "NOTINLINE" "NRECONC" + "NREVERSE" "NSET-DIFFERENCE" "NSET-EXCLUSIVE-OR" "NSTRING-CAPITALIZE" "NSTRING-DOWNCASE" + "NSTRING-UPCASE" "NSUBLIS" "NSUBST" "NSUBST-IF" "NSUBST-IF-NOT" "NSUBSTITUTE" + "NSUBSTITUTE-IF" "NSUBSTITUTE-IF-NOT" "NTH" "NTH-VALUE" "NTHCDR" "NULL" "NUMBER" "NUMBERP" + "NUMERATOR" "NUNION" "ODDP" "OPEN" "OPEN-STREAM-P" "OPTIMIZE" "OR" "OTHERWISE" + "OUTPUT-STREAM-P" "PACKAGE" "PACKAGE-ERROR" "PACKAGE-ERROR-PACKAGE" "PACKAGE-NAME" + "PACKAGE-NICKNAMES" "PACKAGE-SHADOWING-SYMBOLS" "PACKAGE-USE-LIST" "PACKAGE-USED-BY-LIST" + "PACKAGEP" "PAIRLIS" "PARSE-ERROR" "PARSE-INTEGER" "PARSE-NAMESTRING" "PATHNAME" + "PATHNAME-DEVICE" "PATHNAME-DIRECTORY" "PATHNAME-HOST" "PATHNAME-MATCH-P" "PATHNAME-NAME" + "PATHNAME-TYPE" "PATHNAME-VERSION" "PATHNAMEP" "PEEK-CHAR" "PHASE" "PI" "PLUSP" "POP" + "POSITION" "POSITION-IF" "POSITION-IF-NOT" "PPRINT" "PPRINT-DISPATCH" + "PPRINT-EXIT-IF-LIST-EXHAUSTED" "PPRINT-FILL" "PPRINT-INDENT" "PPRINT-LINEAR" + "PPRINT-LOGICAL-BLOCK" "PPRINT-NEWLINE" "PPRINT-POP" "PPRINT-TAB" "PPRINT-TABULAR" "PRIN1" + "PRIN1-TO-STRING" "PRINC" "PRINC-TO-STRING" "PRINT" "PRINT-NOT-READABLE" + "PRINT-NOT-READABLE-OBJECT" "PRINT-OBJECT" "PRINT-UNREADABLE-OBJECT" "PROBE-FILE" + "PROCLAIM" "PROG" "PROG*" "PROG1" "PROG2" "PROGN" "PROGRAM-ERROR" "PROGV" "PROVIDE" + "PSETF" "PSETQ" "PUSH" "PUSHNEW" "QUOTE" "RANDOM" "RANDOM-STATE" "RANDOM-STATE-P" "RASSOC" + "RASSOC-IF" "RASSOC-IF-NOT" "RATIO" "RATIONAL" "RATIONALIZE" "RATIONALP" "READ" + "READ-BYTE" "READ-CHAR" "READ-CHAR-NO-HANG" "READ-DELIMITED-LIST" "READ-FROM-STRING" + "READ-LINE" "READ-PRESERVING-WHITESPACE" "READ-SEQUENCE" "READER-ERROR" "READTABLE" + "READTABLE-CASE" "READTABLEP" "REAL" "REALP" "REALPART" "REDUCE" "REINITIALIZE-INSTANCE" + "REM" "REMF" "REMHASH" "REMOVE" "REMOVE-DUPLICATES" "REMOVE-IF" "REMOVE-IF-NOT" + "REMOVE-METHOD" "REMPROP" "RENAME-FILE" "RENAME-PACKAGE" "REPLACE" "REQUIRE" "REST" + "RESTART" "RESTART-BIND" "RESTART-CASE" "RESTART-NAME" "RETURN" "RETURN-FROM" "REVAPPEND" + "REVERSE" "ROOM" "ROTATEF" "ROUND" "ROW-MAJOR-AREF" "RPLACA" "RPLACD" "SAFETY" "SATISFIES" + "SBIT" "SCALE-FLOAT" "SCHAR" "SEARCH" "SECOND" "SEQUENCE" "SERIOUS-CONDITION" "SET" + "SET-DIFFERENCE" "SET-DISPATCH-MACRO-CHARACTER" "SET-EXCLUSIVE-OR" "SET-MACRO-CHARACTER" + "SET-PPRINT-DISPATCH" "SET-SYNTAX-FROM-CHAR" "SETF" "SETQ" "SEVENTH" "SHADOW" + "SHADOWING-IMPORT" "SHARED-INITIALIZE" "SHIFTF" "SHORT-FLOAT" "SHORT-FLOAT-EPSILON" + "SHORT-FLOAT-NEGATIVE-EPSILON" "SHORT-SITE-NAME" "SIGNAL" "SIGNED-BYTE" "SIGNUM" + "SIMPLE-ARRAY" "SIMPLE-BASE-STRING" "SIMPLE-BIT-VECTOR" "SIMPLE-BIT-VECTOR-P" + "SIMPLE-CONDITION" "SIMPLE-CONDITION-FORMAT-ARGUMENTS" "SIMPLE-CONDITION-FORMAT-CONTROL" + "SIMPLE-ERROR" "SIMPLE-STRING" "SIMPLE-STRING-P" "SIMPLE-TYPE-ERROR" "SIMPLE-VECTOR" + "SIMPLE-VECTOR-P" "SIMPLE-WARNING" "SIN" "SINGLE-FLOAT" "SINGLE-FLOAT-EPSILON" + "SINGLE-FLOAT-NEGATIVE-EPSILON" "SINH" "SIXTH" "SLEEP" "SLOT-BOUNDP" "SLOT-EXISTS-P" + "SLOT-MAKUNBOUND" "SLOT-MISSING" "SLOT-UNBOUND" "SLOT-VALUE" "SOFTWARE-TYPE" + "SOFTWARE-VERSION" "SOME" "SORT" "SPACE" "SPECIAL" "SPECIAL-OPERATOR-P" "SPEED" "SQRT" + "STABLE-SORT" "STANDARD" "STANDARD-CHAR" "STANDARD-CHAR-P" "STANDARD-CLASS" + "STANDARD-GENERIC-FUNCTION" "STANDARD-METHOD" "STANDARD-OBJECT" "STEP" "STORAGE-CONDITION" + "STORE-VALUE" "STREAM" "STREAM-ELEMENT-TYPE" "STREAM-ERROR" "STREAM-ERROR-STREAM" + "STREAM-EXTERNAL-FORMAT" "STREAMP" "STRING" "STRING-CAPITALIZE" "STRING-DOWNCASE" + "STRING-EQUAL" "STRING-GREATERP" "STRING-LEFT-TRIM" "STRING-LESSP" "STRING-NOT-EQUAL" + "STRING-NOT-GREATERP" "STRING-NOT-LESSP" "STRING-RIGHT-TRIM" "STRING-STREAM" "STRING-TRIM" + "STRING-UPCASE" "STRING/=" "STRING<" "STRING<=" "STRING=" "STRING>" "STRING>=" "STRINGP" + "STRUCTURE" "STRUCTURE-CLASS" "STRUCTURE-OBJECT" "STYLE-WARNING" "SUBLIS" "SUBSEQ" + "SUBSETP" "SUBST" "SUBST-IF" "SUBST-IF-NOT" "SUBSTITUTE" "SUBSTITUTE-IF" + "SUBSTITUTE-IF-NOT" "SUBTYPEP" "SVREF" "SXHASH" "SYMBOL" "SYMBOL-FUNCTION" + "SYMBOL-MACROLET" "SYMBOL-NAME" "SYMBOL-PACKAGE" "SYMBOL-PLIST" "SYMBOL-VALUE" "SYMBOLP" + "SYNONYM-STREAM" "SYNONYM-STREAM-SYMBOL" "T" "TAGBODY" "TAILP" "TAN" "TANH" "TENTH" + "TERPRI" "THE" "THIRD" "THROW" "TIME" "TRACE" "TRANSLATE-LOGICAL-PATHNAME" + "TRANSLATE-PATHNAME" "TREE-EQUAL" "TRUENAME" "TRUNCATE" "TWO-WAY-STREAM" + "TWO-WAY-STREAM-INPUT-STREAM" "TWO-WAY-STREAM-OUTPUT-STREAM" "TYPE" "TYPE-ERROR" + "TYPE-ERROR-DATUM" "TYPE-ERROR-EXPECTED-TYPE" "TYPE-OF" "TYPECASE" "TYPEP" "UNBOUND-SLOT" + "UNBOUND-SLOT-INSTANCE" "UNBOUND-VARIABLE" "UNDEFINED-FUNCTION" "UNEXPORT" "UNINTERN" + "UNION" "UNLESS" "UNREAD-CHAR" "UNSIGNED-BYTE" "UNTRACE" "UNUSE-PACKAGE" "UNWIND-PROTECT" + "UPDATE-INSTANCE-FOR-DIFFERENT-CLASS" "UPDATE-INSTANCE-FOR-REDEFINED-CLASS" + "UPGRADED-ARRAY-ELEMENT-TYPE" "UPGRADED-COMPLEX-PART-TYPE" "UPPER-CASE-P" "USE-PACKAGE" + "USE-VALUE" "USER-HOMEDIR-PATHNAME" "VALUES" "VALUES-LIST" "VARIABLE" "VECTOR" + "VECTOR-POP" "VECTOR-PUSH" "VECTOR-PUSH-EXTEND" "VECTORP" "WARN" "WARNING" "WHEN" + "WILD-PATHNAME-P" "WITH-ACCESSORS" "WITH-COMPILATION-UNIT" "WITH-CONDITION-RESTARTS" + "WITH-HASH-TABLE-ITERATOR" "WITH-INPUT-FROM-STRING" "WITH-OPEN-FILE" "WITH-OPEN-STREAM" + "WITH-OUTPUT-TO-STRING" "WITH-PACKAGE-ITERATOR" "WITH-SIMPLE-RESTART" "WITH-SLOTS" + "WITH-STANDARD-IO-SYNTAX" "WRITE" "WRITE-BYTE" "WRITE-CHAR" "WRITE-LINE" "WRITE-SEQUENCE" + "WRITE-STRING" "WRITE-TO-STRING" "Y-OR-N-P" "YES-OR-NO-P" "ZEROP")) + + (defvar *export-from-glisp* + '( + "DEFSUBST" + "G/MAKE-STRING" + "MP/MAKE-LOCK" + "MP/WITH-LOCK" + "WITH-TIMEOUT" + "OPEN-INET-SOCKET" + ;; util.lisp : + "ALWAYS" + "CL-BYTE-STREAM" + "CL-CHAR-STREAM" + "CL-STREAM" + "COMPOSE" + "CURRY" + "FALSE" + "FORCE" + "G/CLOSE" + "G/FINISH-OUTPUT" + "G/PEEK-CHAR" + "G/READ-BYTE" + "G/READ-BYTE-SEQUENCE" + "G/READ-CHAR" + "G/READ-CHAR-SEQUENCE" + "G/READ-LINE" + "G/READ-LINE*" + "G/UNREAD-BYTE" + "G/UNREAD-CHAR" + "G/WRITE-BYTE" + "G/WRITE-BYTE-SEQUENCE" + "G/WRITE-CHAR" + "G/WRITE-STRING" + "GSTREAM" + "MAP-ARRAY" + "MAPFCAR" + "MAX*" + "MAXF" + "MIN*" + "MINF" + "MULTIPLE-VALUE-OR" + "MULTIPLE-VALUE-SOME" + "NCONCF" + "NEQ" + "PROMISE" + "RCURRY" + "SANIFY-STRING" + "SHOW" + "SPLIT-BY" + "SPLIT-BY-IF" + "SPLIT-BY-MEMBER" + "SPLIT-STRING" + "STRING-BEGIN-EQUAL" + "TRUE" + "UNTIL" + "USE-BYTE-FOR-CHAR-STREAM-FLAVOUR" + "USE-CHAR-FOR-BYTE-STREAM-FLAVOUR" + "WHILE" + "WHITE-SPACE-P" + + "CL-BYTE-STREAM->GSTREAM" + "CL-CHAR-STREAM->GSTREAM" + "G/OPEN-INET-SOCKET" + "ACCEPT-CONNECTION" + + "FIND-TEMPORARY-FILE" + "DELETE-TEMPORARY-FILE" + "WITH-TEMPORARY-FILE" + + "SET-EQUAL" + "MAYBE-PARSE-INTEGER" + "NOP" + "WITH-STRUCTURE-SLOTS" + + "COMPILE-FUNCALL" + "FUNCALL*" + "MAPC*" + "VREDUCE*" + "LREDUCE*" + "WITH-UNIQUE-NAMES" + + ;; runes.lisp + "RUNE" + "ROD" + "SIMPLE-ROD" + "%RUNE" + "ROD-CAPITALIZE" + "CODE-RUNE" + "RUNE-CODE" + "RUNE-DOWNCASE" + "RUNE-UPCASE" + "ROD-DOWNCASE" + "ROD-UPCASE" + "WHITE-SPACE-RUNE-P" + "DIGIT-RUNE-P" + "RUNE=" + "RUNE<=" + "RUNE>=" + "RUNE-EQUAL" + "RUNEP" + "SLOOPY-ROD-P" + "ROD=" + "ROD-EQUAL" + "MAKE-ROD" + "CHAR-RUNE" + "RUNE-CHAR" + "ROD-STRING" + "STRING-ROD" + "ROD-SUBSEQ" + + "G/MAKE-HASH-TABLE" + "G/HASHGET" + "G/CLRHASH" + "STIR-HASH-CODES" + "HASH-SEQUENCE" + "HASH/STRING-EQUAL" + "MAKE-STRING-EQUAL-HASH-TABLE" + + "PRIMEP" + + ;; match.lisp + "DEFINE-MATCH-MACRO" + "IF-MATCH" + "GSTREAM-AS-STRING" + )) + + (defparameter *packages* + #-GCL '(:common-lisp) + #+GCL '(:lisp :pcl) ) + + (defparameter *gray-symbols* + '("FUNDAMENTAL-STREAM" + "FUNDAMENTAL-INPUT-STREAM" + "FUNDAMENTAL-OUTPUT-STREAM" + "FUNDAMENTAL-CHARACTER-STREAM" + "FUNDAMENTAL-BINARY-STREAM" + "FUNDAMENTAL-CHARACTER-INPUT-STREAM" + "FUNDAMENTAL-CHARACTER-OUTPUT-STREAM" + "FUNDAMENTAL-BINARY-INPUT-STREAM" + + "STREAM-READ-CHAR" + "STREAM-UNREAD-CHAR" + "STREAM-READ-CHAR-NO-HANG" + "STREAM-PEEK-CHAR" + "STREAM-LISTEN" + "STREAM-READ-LINE" + "STREAM-CLEAR-INPUT" + + "STREAM-WRITE-CHAR" + "STREAM-LINE-COLUMN" + "STREAM-START-LINE-P" + "STREAM-WRITE-STRING" + "STREAM-TERPRI" + "STREAM-FRESH-LINE" + "STREAM-FINISH-OUTPUT" + "STREAM-FORCE-OUTPUT" + "STREAM-ADVANCE-TO-COLUMN" + + "STREAM-READ-BYTE" + "STREAM-WRITE-BYTE" )) + + (defparameter *gray-packages* + `( + #+:CLISP ,@'(:lisp) + #+:CMU ,@'(:ext) + #+:sbcl ,@'(:sb-gray) + #+:ALLEGRO ,@'(:common-lisp :excl :stream) + #+:HARLEQUIN-COMMON-LISP ,@'(:stream) + #+:OPENMCL ,@'(:ccl) + )) + + (defun seek-symbol (name packages) + ;; Seek the a symbol named 'name' in `packages' + (or (some #'(lambda (p) + (multiple-value-bind (sym res) (find-symbol name p) + (if (eql res :external) + (list sym) + nil))) + packages) + (progn (format T "~&There is no ~A in ~A." name packages) + (finish-output) + nil))) + + (defun dump-defpackage (&aux imports export-ansi export-gray) + (labels ((grok (symbols packages) + (let ((res nil)) + (dolist (nam symbols) + (let ((sym (seek-symbol nam packages))) + (when sym + (push (car sym) res) + (cond ((multiple-value-bind (sym2 res) (find-symbol nam :glisp) + (and sym2 (eq res :external))) + ;; + (format T "~&;; ~S is pacthed." sym) ) + (t + (setf sym (car sym)) + ;; CLISP has no (:import ..) ARG! + (push `(:import-from + ,(package-name (symbol-package sym)) + ,(symbol-name sym)) + imports)))))) + res))) + (setf export-ansi (grok *all-ansi-symbols* *packages*)) + (setf export-gray (grok *gray-symbols* *gray-packages*)) + `(progn + (defpackage "GLISP" (:use) + ,@imports + (:export + ,@(mapcar #'symbol-name export-ansi) + ,@(mapcar #'symbol-name export-gray) + ,@*export-from-glisp*)) + (defpackage "GLUSER" + (:use "GLISP")) ))) + + (defmacro define-glisp-package () + (dump-defpackage)) + ) + +(define-glisp-package) + diff --git a/glisp/runes.lisp b/glisp/runes.lisp new file mode 100644 index 0000000..8d8f55e --- /dev/null +++ b/glisp/runes.lisp @@ -0,0 +1,412 @@ +;;; -*- Mode: Lisp; Syntax: Common-Lisp; Package: GLISP; -*- +;;; --------------------------------------------------------------------------- +;;; Title: Unicode strings (called RODs) +;;; Created: 1999-05-25 22:29 +;;; Author: Gilbert Baumann +;;; License: GPL (See file COPYING for details). +;;; --------------------------------------------------------------------------- +;;; (c) copyright 1998,1999 by Gilbert Baumann + +;;; This program is free software; you can redistribute it and/or modify +;;; it under the terms of the GNU General Public License as published by +;;; the Free Software Foundation; either version 2 of the License, or +;;; (at your option) any later version. +;;; +;;; This program is distributed in the hope that it will be useful, +;;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;;; GNU General Public License for more details. +;;; +;;; You should have received a copy of the GNU General Public License +;;; along with this program; if not, write to the Free Software +;;; Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +;; Changes +;; +;; When Who What +;; ---------------------------------------------------------------------------- +;; 1999-08-15 GB - ROD=, ROD-EQUAL +;; RUNE<=, RUNE>= +;; MAKE-ROD, ROD-SUBSEQ +;; CHAR-RUNE, RUNE-CHAR, ROD-STRING, STRING-ROD +;; new functions +;; - Added rune reader +;; + +(in-package :GLISP) + +(deftype rune () '(unsigned-byte 16)) +(deftype rod () '(array rune (*))) +(deftype simple-rod () '(simple-array rune (*))) + +(defsubst rune (rod index) + (aref rod index)) + +(defun (setf rune) (new rod index) + (setf (aref rod index) new)) + +(defsubst %rune (rod index) + (aref (the (simple-array (unsigned-byte 16) (*)) rod) (the fixnum index))) + +(defsubst (setf %rune) (new rod index) + (setf (aref (the (simple-array (unsigned-byte 16) (*)) rod) (the fixnum index)) new)) + +(defun rod-capitalize (rod) + (warn "~S is not implemented." 'rod-capitalize) + rod) + +(defsubst code-rune (x) x) +(defsubst rune-code (x) x) + +(defsubst rune= (x y) + (= x y)) + +(defun rune-downcase (rune) + (cond ((<= #x0041 rune #x005a) (+ rune #x20)) + ((= rune #x00d7) rune) + ((<= #x00c0 rune #x00de) (+ rune #x20)) + (t rune))) + +(defsubst rune-upcase (rune) + (cond ((<= #x0061 rune #x007a) (- rune #x20)) + ((= rune #x00f7) rune) + ((<= #x00e0 rune #x00fe) (- rune #x20)) + (t rune))) + +(defun rune-upper-case-letter-p (rune) + (or (<= #x0041 rune #x005a) (<= #x00c0 rune #x00de))) + +(defun rune-lower-case-letter-p (rune) + (or (<= #x0061 rune #x007a) (<= #x00e0 rune #x00fe) + (= rune #x00d7))) + + +(defun rune-equal (x y) + (rune= (rune-upcase x) (rune-upcase y))) + +(defun rod-downcase (rod) + ;; FIXME + (register-rod + (map '(simple-array (unsigned-byte 16) (*)) #'rune-downcase rod))) + +(defun rod-upcase (rod) + ;; FIXME + (register-rod + (map '(simple-array (unsigned-byte 16) (*)) #'rune-upcase rod))) + +(defsubst white-space-rune-p (char) + (or (= char 9) ;TAB + (= char 10) ;Linefeed + (= char 13) ;Carriage Return + (= char 32))) ;Space + +(defsubst digit-rune-p (char &optional (radix 10)) + (cond ((<= #.(char-code #\0) char #.(char-code #\9)) + (and (< (- char #.(char-code #\0)) radix) + (- char #.(char-code #\0)))) + ((<= #.(char-code #\A) char #.(char-code #\Z)) + (and (< (- char #.(char-code #\A) -10) radix) + (- char #.(char-code #\A) -10))) + ((<= #.(char-code #\a) char #.(char-code #\z)) + (and (< (- char #.(char-code #\a) -10) radix) + (- char #.(char-code #\a) -10))) )) + +(defun rod (x) + (cond ((stringp x) (register-rod (map 'rod #'char-code x))) + ((symbolp x) (rod (string x))) + ((characterp x) (rod (string x))) + ((vectorp x) (register-rod (coerce x 'rod))) + ((integerp x) (register-rod (map 'rod #'identity (list x)))) + (t (error "Cannot convert ~S to a ~S" x 'rod)))) + +(defun runep (x) + (and (integerp x) + (<= 0 x #xFFFF))) + +(defun sloopy-rod-p (x) + (and (not (stringp x)) + (vectorp x) + (every #'runep x))) + +(defun rod= (x y) + (and (= (length x) (length y)) + (dotimes (i (length x) t) + (unless (rune= (rune x i) (rune y i)) + (return nil))))) + +(defun rod-equal (x y) + (and (= (length x) (length y)) + (dotimes (i (length x) t) + (unless (rune-equal (rune x i) (rune y i)) + (return nil))))) + +(defsubst make-rod (size) + (let ((res (make-array size :element-type 'rune))) + (register-rod res) + res)) + +(defun char-rune (char) + (code-rune (char-code char))) + +(defun rune-char (rune &optional (default #\?)) + #+CMU + (if (< rune 256) (code-char rune) default) + #-CMU + (or (code-char rune) default)) + +(defun rod-string (rod &optional (default-char #\?)) + (map 'string (lambda (x) (rune-char x default-char)) rod)) + +(defun string-rod (string) + (let* ((n (length string)) + (res (make-rod n))) + (dotimes (i n) + (setf (%rune res i) (char-rune (char string i)))) + res)) + +;;;; +;;;; RUNE Reader +;;;; + +;; Portable implementation of WHITE-SPACE-P with regard to the current +;; read table -- this is bit tricky. + +(defun rt-white-space-p (char) + (let ((stream (make-string-input-stream (string char)))) + (eq :eof (peek-char t stream nil :eof)))) + +(defun read-rune-name (input) + ;; the first char is unconditionally read + (let ((char0 (read-char input t nil t))) + (when (char= char0 #\\) + (setf char0 (read-char input t nil t))) + (with-output-to-string (res) + (write-char char0 res) + (do ((ch (peek-char nil input nil :eof t) (peek-char nil input nil :eof t))) + ((or (eq ch :eof) + (rt-white-space-p ch) + (multiple-value-bind (function non-terminating-p) (get-macro-character ch) + (and function (not non-terminating-p))))) + (write-char ch res) + (read-char input))))) ;consume this character + +(defun iso-10646-char-code (char) + (char-code char)) + +(defvar *rune-names* (make-hash-table :test #'equal) + "Hashtable, which maps all known rune names to rune codes; + Names are stored in uppercase.") + +(defun define-rune-name (name code) + (setf (gethash (string-upcase name) *rune-names*) code) + name) + +(defun lookup-rune-name (name) + (gethash (string-upcase name) *rune-names*)) + +(define-rune-name "null" #x0000) +(define-rune-name "space" #x0020) +(define-rune-name "newline" #x000A) +(define-rune-name "return" #x000D) +(define-rune-name "tab" #x0009) +(define-rune-name "page" #x000C) + +;; and just for fun: +(define-rune-name "euro" #x20AC) + +;; ASCII control characters +(define-rune-name "nul" #x0000) ;null +(define-rune-name "soh" #x0001) ;start of header +(define-rune-name "stx" #x0002) ;start of text +(define-rune-name "etx" #x0003) ;end of text +(define-rune-name "eot" #x0004) ;end of transmission +(define-rune-name "enq" #x0005) ; +(define-rune-name "ack" #x0006) ;acknowledge +(define-rune-name "bel" #x0007) ;bell +(define-rune-name "bs" #x0008) ;backspace +(define-rune-name "ht" #x0009) ;horizontal tab +(define-rune-name "lf" #X000A) ;line feed, new line +(define-rune-name "vt" #X000B) ;vertical tab +(define-rune-name "ff" #x000C) ;form feed +(define-rune-name "cr" #x000D) ;carriage return +(define-rune-name "so" #x000E) ;shift out +(define-rune-name "si" #x000F) ;shift in +(define-rune-name "dle" #x0010) ;device latch enable ? +(define-rune-name "dc1" #x0011) ;device control 1 +(define-rune-name "dc2" #x0012) ;device control 2 +(define-rune-name "dc3" #x0013) ;device control 3 +(define-rune-name "dc4" #x0014) ;device control 4 +(define-rune-name "nak" #x0015) ;negative acknowledge +(define-rune-name "syn" #x0016) ; +(define-rune-name "etb" #x0017) ; +(define-rune-name "can" #x0018) ; +(define-rune-name "em" #x0019) ;end of message +(define-rune-name "sub" #x001A) ; +(define-rune-name "esc" #x001B) ;escape +(define-rune-name "fs" #x001C) ;field separator ? +(define-rune-name "gs" #x001D) ;group separator +(define-rune-name "rs" #x001E) ; +(define-rune-name "us" #x001F) ; + +(define-rune-name "del" #x007F) ;delete + +;; iso-latin +(define-rune-name "nbsp" #x00A0) ;non breakable space +(define-rune-name "shy" #x00AD) ;soft hyphen + +(defun rune-from-read-name (name) + (cond ((= (length name) 1) + (iso-10646-char-code (char name 0))) + ((and (= (length name) 2) + (char= (char name 0) #\\)) + (iso-10646-char-code (char name 1))) + ((and (>= (length name) 3) + (char-equal (char name 0) #\u) + (char-equal (char name 1) #\+) + (every (lambda (x) (digit-char-p x 16)) (subseq name 2))) + (parse-integer name :start 2 :radix 16)) + ((lookup-rune-name name)) + (t + (error "Meaningless rune name ~S." name)))) + +(defun rune-reader (stream subchar arg) + subchar arg + (values (rune-from-read-name (read-rune-name stream)))) + +(set-dispatch-macro-character #\# #\/ 'rune-reader) + +;;;; + +(defun rune<= (rune &rest more-runes) + (apply #'<= rune more-runes)) + +(defun rune>= (rune &rest more-runes) + (apply #'>= rune more-runes)) + +(defun rodp (object) + (typep object 'rod)) + +(defun really-rod-p (object) + (and (typep object 'rod) + (really-really-rod-p object))) + +(defun rod-subseq (source start &optional (end (length source))) + (unless (rodp source) + (error "~S is not of type ~S." source 'rod)) + (unless (and (typep start 'fixnum) (>= start 0)) + (error "~S is not a non-negative fixnum." start)) + (unless (and (typep end 'fixnum) (>= end start)) + (error "END argument, ~S, is not a fixnum no less than START, ~S." end start)) + (when (> start (length source)) + (error "START argument, ~S, should be no greater than length of rod." start)) + (when (> end (length source)) + (error "END argument, ~S, should be no greater than length of rod." end)) + (locally + (declare (type rod source) + (type fixnum start end)) + (let ((res (make-rod (- end start)))) + (declare (type rod res)) + (do ((i (- (- end start) 1) (the fixnum (- i 1)))) + ((< i 0) res) + (declare (type fixnum i)) + (setf (%rune res i) (%rune source (the fixnum (+ i start)))))))) + +(defun rod-subseq* (source start &optional (end (length source))) + (unless (and (typep start 'fixnum) (>= start 0)) + (error "~S is not a non-negative fixnum." start)) + (unless (and (typep end 'fixnum) (>= end start)) + (error "END argument, ~S, is not a fixnum no less than START, ~S." end start)) + (when (> start (length source)) + (error "START argument, ~S, should be no greater than length of rod." start)) + (when (> end (length source)) + (error "END argument, ~S, should be no greater than length of rod." end)) + (locally + (declare (type fixnum start end)) + (let ((res (make-rod (- end start)))) + (declare (type rod res)) + (do ((i (- (- end start) 1) (the fixnum (- i 1)))) + ((< i 0) res) + (declare (type fixnum i)) + (setf (%rune res i) (aref source (the fixnum (+ i start)))))))) + +;;; Support for telling ROD and arrays apart: + +#+CMU +(progn + (defvar *rod-hash-table* + (make-array 5003 :initial-element nil))) + +(defun register-rod (rod) + #+CMU + (unless (really-really-rod-p rod) + (push (ext:make-weak-pointer rod) + (aref *rod-hash-table* (mod (cl::pointer-hash rod) + (length *rod-hash-table*))))) + rod) + +(defun really-really-rod-p (rod) + #+CMU + (find rod (aref *rod-hash-table* (mod (cl::pointer-hash rod) + (length *rod-hash-table*))) + :key #'ext:weak-pointer-value)) + +#+CMU +(progn + (defun rod-hash-table-rehash () + (let* ((n 5003) + (new (make-array n :initial-element nil))) + (loop for bucket across *rod-hash-table* do + (loop for item in bucket do + (let ((v (ext:weak-pointer-value item))) + (when v + (push item (aref new (mod (cl::pointer-hash v) n))))))) + (setf *rod-hash-table* new))) + + (defun rod-hash-after-gc-hook () + ;; hmm interesting question: should we rehash? + (rod-hash-table-rehash)) + + (pushnew 'rod-hash-after-gc-hook extensions:*after-gc-hooks*) ) + +;;; ROD ext syntax + +(defun rod-reader (stream subchar arg) + (declare (ignore arg)) + (rod + (with-output-to-string (bag) + (do ((c (read-char stream t nil t) + (read-char stream t nil t))) + ((char= c subchar)) + (cond ((char= c #\\) + (setf c (read-char stream t nil t)))) + (princ c bag))))) + +(defun rod-printer (stream rod) + (princ #\# stream) + (princ #\" stream) + (loop for x across rod do + (cond ((or (rune= x #.(char-code #\\)) + (rune= x #.(char-code #\"))) + (princ #\\ stream) + (princ (code-char x) stream)) + ((< x char-code-limit) + (princ (code-char x) stream)) + (t + (format stream "\\u~4,'0X" x)))) + (princ #\" stream)) + +(set-pprint-dispatch '(satisfies really-rod-p) #'rod-printer) + +(set-dispatch-macro-character #\# #\" 'rod-reader) + +#|| +(defun longish-array-p (arr) + (and (arrayp arr) + (> (array-total-size arr) 10))) + +(set-pprint-dispatch '(satisfies longish-array-p) + #'(lambda (stream object) + (let ((*print-array* nil) + (*print-pretty* nil)) + (prin1 object stream)))) +||# \ No newline at end of file diff --git a/glisp/syntax.lisp b/glisp/syntax.lisp new file mode 100644 index 0000000..d66ce6c --- /dev/null +++ b/glisp/syntax.lisp @@ -0,0 +1,190 @@ +;;; -*- Mode: Lisp; Syntax: Common-Lisp; Package: GLISP; -*- +;;; --------------------------------------------------------------------------- +;;; Title: Unicode strings (called RODs) +;;; Created: 1999-05-25 22:29 +;;; Author: Gilbert Baumann +;;; License: GPL (See file COPYING for details). +;;; --------------------------------------------------------------------------- +;;; (c) copyright 1998,1999 by Gilbert Baumann + +;;; This program is free software; you can redistribute it and/or modify +;;; it under the terms of the GNU General Public License as published by +;;; the Free Software Foundation; either version 2 of the License, or +;;; (at your option) any later version. +;;; +;;; This program is distributed in the hope that it will be useful, +;;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;;; GNU General Public License for more details. +;;; +;;; You should have received a copy of the GNU General Public License +;;; along with this program; if not, write to the Free Software +;;; Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +;; Changes +;; +;; When Who What +;; ---------------------------------------------------------------------------- +;; 1999-08-15 GB - ROD=, ROD-EQUAL +;; RUNE<=, RUNE>= +;; MAKE-ROD, ROD-SUBSEQ +;; CHAR-RUNE, RUNE-CHAR, ROD-STRING, STRING-ROD +;; new functions +;; - Added rune reader +;; + +(in-package :glisp) + +;;;; +;;;; RUNE Reader +;;;; + +;; Portable implementation of WHITE-SPACE-P with regard to the current +;; read table -- this is bit tricky. + +(defun rt-white-space-p (char) + (let ((stream (make-string-input-stream (string char)))) + (eq :eof (peek-char t stream nil :eof)))) + +(defun read-rune-name (input) + ;; the first char is unconditionally read + (let ((char0 (read-char input t nil t))) + (when (char= char0 #\\) + (setf char0 (read-char input t nil t))) + (with-output-to-string (res) + (write-char char0 res) + (do ((ch (peek-char nil input nil :eof t) (peek-char nil input nil :eof t))) + ((or (eq ch :eof) + (rt-white-space-p ch) + (multiple-value-bind (function non-terminating-p) (get-macro-character ch) + (and function (not non-terminating-p))))) + (write-char ch res) + (read-char input))))) ;consume this character + +(defun iso-10646-char-code (char) + (char-code char)) + +(defvar *rune-names* (make-hash-table :test #'equal) + "Hashtable, which maps all known rune names to rune codes; + Names are stored in uppercase.") + +(defun define-rune-name (name code) + (setf (gethash (string-upcase name) *rune-names*) code) + name) + +(defun lookup-rune-name (name) + (gethash (string-upcase name) *rune-names*)) + +(define-rune-name "null" #x0000) +(define-rune-name "space" #x0020) +(define-rune-name "newline" #x000A) +(define-rune-name "return" #x000D) +(define-rune-name "tab" #x0009) +(define-rune-name "page" #x000C) + +;; and just for fun: +(define-rune-name "euro" #x20AC) + +;; ASCII control characters +(define-rune-name "nul" #x0000) ;null +(define-rune-name "soh" #x0001) ;start of header +(define-rune-name "stx" #x0002) ;start of text +(define-rune-name "etx" #x0003) ;end of text +(define-rune-name "eot" #x0004) ;end of transmission +(define-rune-name "enq" #x0005) ; +(define-rune-name "ack" #x0006) ;acknowledge +(define-rune-name "bel" #x0007) ;bell +(define-rune-name "bs" #x0008) ;backspace +(define-rune-name "ht" #x0009) ;horizontal tab +(define-rune-name "lf" #X000A) ;line feed, new line +(define-rune-name "vt" #X000B) ;vertical tab +(define-rune-name "ff" #x000C) ;form feed +(define-rune-name "cr" #x000D) ;carriage return +(define-rune-name "so" #x000E) ;shift out +(define-rune-name "si" #x000F) ;shift in +(define-rune-name "dle" #x0010) ;device latch enable ? +(define-rune-name "dc1" #x0011) ;device control 1 +(define-rune-name "dc2" #x0012) ;device control 2 +(define-rune-name "dc3" #x0013) ;device control 3 +(define-rune-name "dc4" #x0014) ;device control 4 +(define-rune-name "nak" #x0015) ;negative acknowledge +(define-rune-name "syn" #x0016) ; +(define-rune-name "etb" #x0017) ; +(define-rune-name "can" #x0018) ; +(define-rune-name "em" #x0019) ;end of message +(define-rune-name "sub" #x001A) ; +(define-rune-name "esc" #x001B) ;escape +(define-rune-name "fs" #x001C) ;field separator ? +(define-rune-name "gs" #x001D) ;group separator +(define-rune-name "rs" #x001E) ; +(define-rune-name "us" #x001F) ; + +(define-rune-name "del" #x007F) ;delete + +;; iso-latin +(define-rune-name "nbsp" #x00A0) ;non breakable space +(define-rune-name "shy" #x00AD) ;soft hyphen + +(defun rune-from-read-name (name) + (cond ((= (length name) 1) + (iso-10646-char-code (char name 0))) + ((and (= (length name) 2) + (char= (char name 0) #\\)) + (iso-10646-char-code (char name 1))) + ((and (>= (length name) 3) + (char-equal (char name 0) #\u) + (char-equal (char name 1) #\+) + (every (lambda (x) (digit-char-p x 16)) (subseq name 2))) + (parse-integer name :start 2 :radix 16)) + ((lookup-rune-name name)) + (t + (error "Meaningless rune name ~S." name)))) + +(defun rune-reader (stream subchar arg) + subchar arg + (values (rune-from-read-name (read-rune-name stream)))) + +(set-dispatch-macro-character #\# #\/ 'rune-reader) + +;;; ROD ext syntax + +(defun rod-reader (stream subchar arg) + (declare (ignore arg)) + (rod + (with-output-to-string (bag) + (do ((c (read-char stream t nil t) + (read-char stream t nil t))) + ((char= c subchar)) + (cond ((char= c #\\) + (setf c (read-char stream t nil t)))) + (princ c bag))))) + +(defun rod-printer (stream rod) + (princ #\# stream) + (princ #\" stream) + (loop for x across rod do + (cond ((or (rune= x #.(char-code #\\)) + (rune= x #.(char-code #\"))) + (princ #\\ stream) + (princ (code-char x) stream)) + ((< x char-code-limit) + (princ (code-char x) stream)) + (t + (format stream "\\u~4,'0X" x)))) + (princ #\" stream)) + +(set-pprint-dispatch '(satisfies really-rod-p) #'rod-printer) + +(set-dispatch-macro-character #\# #\" 'rod-reader) + +#|| +(defun longish-array-p (arr) + (and (arrayp arr) + (> (array-total-size arr) 10))) + +(set-pprint-dispatch '(satisfies longish-array-p) + #'(lambda (stream object) + (let ((*print-array* nil) + (*print-pretty* nil)) + (prin1 object stream)))) +||# diff --git a/glisp/util.lisp b/glisp/util.lisp new file mode 100644 index 0000000..eb65f00 --- /dev/null +++ b/glisp/util.lisp @@ -0,0 +1,1113 @@ +;;; -*- Mode: Lisp; Syntax: Common-Lisp; Package: GLISP; -*- +;;; --------------------------------------------------------------------------- +;;; Title: Some common utilities for the Closure browser +;;; Created: 1997-12-27 +;;; Author: Gilbert Baumann +;;; License: GPL (See file COPYING for details). +;;; --------------------------------------------------------------------------- +;;; (c) copyright 1997-1999 by Gilbert Baumann + +;;; This program is free software; you can redistribute it and/or modify +;;; it under the terms of the GNU General Public License as published by +;;; the Free Software Foundation; either version 2 of the License, or +;;; (at your option) any later version. +;;; +;;; This program is distributed in the hope that it will be useful, +;;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;;; GNU General Public License for more details. +;;; +;;; You should have received a copy of the GNU General Public License +;;; along with this program; if not, write to the Free Software +;;; Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +;; Changes +;; +;; When Who What +;; ---------------------------------------------------------------------------- +;; 1999-08-24 GB = fixed MULTIPLE-VALUE-OR it now takes any number of +;; subforms +;; + +(in-package :GLISP) + +(defun neq (x y) (not (eq x y))) + +(define-compiler-macro neq (x y) + `(not (eq ,x ,y))) + +;;; -------------------------------------------------------------------------------- +;;; Meta functions + +(defun curry (fun &rest args) + #'(lambda (&rest more) + (apply fun (append args more)))) + +(defun rcurry (fun &rest args) + #'(lambda (&rest more) + (apply fun (append more args)))) + +(defun compose (f g) + #'(lambda (&rest args) + (funcall f (apply g args)))) + +(defun always (value) + #'(lambda (&rest args) + (declare (ignore args)) + value)) + +(defun true (&rest x) + (declare (ignore x)) + t) + +(defun false (&rest x) + (declare (ignore x)) + nil) + +;;; -------------------------------------------------------------------------------- +;;; Promises + +(defstruct (promise (:print-function print-promise)) + forced? value fun) + +(defun print-promise (self sink depth) + (declare (ignore depth)) + (if (promise-forced? self) + (format sink "#<~S ~S ~S>" 'promise :forced (promise-value self)) + (format sink "#<~S ~S>" 'promise :lazy))) + +(defmacro promise (form) + `(make-promise :forced? nil :fun #'(lambda () ,form))) + +(defun force (x) + (if (promise-forced? x) + (promise-value x) + (setf (promise-forced? x) t + (promise-value x) (funcall (promise-fun x))))) + +;;; -------------------------------------------------------------------------------- +;;; Some additional f macros + +(define-modify-macro maxf (&rest nums) max) +(define-modify-macro minf (&rest nums) min) +(define-modify-macro nconcf (&rest args) nconc) + +;; Man sollte mal ein generelles f macro definieren, in etwa so +;; (funcallf #'nconc x 10) + +;;; Modifizierte Version von max / min. + +(defun max* (&rest nums) + (reduce (lambda (x y) + (cond ((null x) y) + ((null y) x) + (t (max x y)))) + nums :initial-value nil)) + +(defun min* (&rest nums) + (reduce (lambda (x y) + (cond ((null x) y) + ((null y) x) + (t (min x y)))) + nums :initial-value nil)) + +;;; -------------------------------------------------------------------------------- +;;; Debuging aids + +(defmacro show (&rest exprs) + `(format T "~&** [~S]~{~#[~:; ~] ~A = ~S~}." ',(current-function-name) + (list ,@(mapcan (lambda (x) + (list (let ((*print-case* :downcase)) + (prin1-to-string x)) + x)) + exprs)))) + +#+ALLEGRO +(defun current-function-name () + (car COMPILER::.FUNCTIONS-DEFINED.)) + +#-ALLEGRO +(defun current-function-name () + 'ANONYMOUS) + +;;; -------------------------------------------------------------------------------- +;;; Multiple values + +(defmacro multiple-value-or (&rest xs) + (cond ((null xs) + nil) + ((null (cdr xs)) + (car xs)) + (t + (let ((g (gensym))) + `(LET ((,g (MULTIPLE-VALUE-LIST ,(car xs)))) + (IF (CAR ,g) + (VALUES-LIST ,g) + (MULTIPLE-VALUE-OR ,@(cdr xs)))))))) + +(defun multiple-value-some (predicate &rest sequences) + (values-list + (apply #'some (lambda (&rest args) + (let ((res (multiple-value-list (apply predicate args)))) + (if (car res) + res + nil))) + sequences))) + +;;; -------------------------------------------------------------------------------- +;;; while and until + +(defmacro while (test &body body) + `(until (not ,test) ,@body)) + +(defmacro until (test &body body) + `(do () (,test) ,@body)) + +;;; -------------------------------------------------------------------------------- +;;; Sequences + +(defun split-by-if (predicate seq &key (start 0) (nuke-empty-p nil)) + (let ((p0 (position-if predicate seq :start start))) + (if p0 + (if (and nuke-empty-p (= start p0)) + (split-by-if predicate seq :start (+ p0 1) :nuke-empty-p nuke-empty-p) + (cons (subseq seq start p0) + (split-by-if predicate seq :start (+ p0 1) :nuke-empty-p nuke-empty-p))) + (if (and nuke-empty-p (= start (length seq))) + nil + (list (subseq seq start)))))) + +(defun split-by (item &rest args) + (apply #'split-by-if (curry #'eql item) args)) + +(defun split-by-member (items &rest args) + (apply #'split-by-if (rcurry #'member items) args)) + +;;; -------------------------------------------------------------------------------- +;;; Strings + +(defun white-space-p (ch) + ;;(declare #.cl-user:+optimize-very-fast-trusted+) + (or (eq ch #\Return) + (eq ch #\Newline) + (eq ch #\Space) + (eq ch #\Tab) + (eq ch #\Page))) + +(define-compiler-macro white-space-p (ch) + `(member ,ch '(#\Return #\Newline #\Space #\Tab #\Page)) ) + +(defun sanify-string (string &optional (begin? t) (end? t) + (start 0)) + (let ((i (position-if #'white-space-p string :start start))) + (cond (i + (let ((j (position-if-not #'white-space-p string :start i))) + (if j + (concatenate 'string (subseq string start i) + (if (and (= i start) begin?) "" " ") + (sanify-string string nil end? j)) + (concatenate 'string (subseq string start i) + (if (not end?) " " ""))))) + (t (subseq string start))))) + +(defun sanify-rod (string &optional (begin? t) (end? t) (start 0)) + (let ((i (position-if #'white-space-rune-p string :start start))) + (cond (i + (let ((j (position-if-not #'white-space-rune-p string :start i))) + (if j + (concatenate 'rod (subseq string start i) + (if (and (= i start) begin?) '#() '#(32)) + (sanify-rod string nil end? j)) + (concatenate 'rod (subseq string start i) + (if (not end?) '#(32) '#()))))) + (t (subseq string start))))) + +(defun split-string (bag string) + (setq string (string-trim bag string)) + (cond ((= (length string) 0) nil) + (t + (let ((p (position bag string :test #'(lambda (x y) (member y x))))) + (if p + (cons (subseq string 0 p) (split-string bag (subseq string p))) + (list string))) ))) + +(defun string-begin-equal (a b) + "Returns non-NIL if the beginning of 'a' matches 'b'" + (and (>= (length a) (length b)) + (string-equal a b :end1 (length b))) ) + +(defun string-begin= (a b) + "Returns non-NIL if the beginning of 'a' matches 'b'" + (and (>= (length a) (length b)) + (string= a b :end1 (length b))) ) + + +;;; ------------------------------------------------------------------------------------------ +;;; Futures +;;; + +#|| +(defstruct (future (:print-function print-future)) + (read-lock (mp/make-lock)) + (guess-lock (mp/make-lock)) + value) + +(defun print-future (self sink depth) + (if (future-guess-lock self) + (format sink "#<~S unpredicted>" (type-of self)) + (if (and *print-level* (>= depth *print-level*)) + (format sink "#<~S predicted as ...>" (type-of self)) + (format sink "#<~S predicted as ~S>" (type-of self) (future-value self))))) + +(defun future () + (let ((res (make-future))) + (mp/seize-lock (future-guess-lock res)) + res)) + +(defun guess (future) + (mp/with-lock ((future-read-lock future)) + (let ((lock (future-guess-lock future))) + (when lock + (mp/seize-lock lock)) + (future-value future)))) + +(defun predict (future value) + (setf (future-value future) value) + (let ((lock (future-guess-lock future))) + (setf (future-guess-lock future) nil) + (mp/release-lock lock)) + value) + +;;; Future lists + +(defun fcar (x) (car (guess x))) +(defun fcdr (x) (cdr (guess x))) +(defun fnull (x) (null (guess x))) +(defun fendp (x) (endp (guess x))) + +(defmacro doflist ((var list &optional res) &body body) + (let ((q (make-symbol "Q"))) + `(do ((,q ,list (fcdr ,q))) + ((fendp ,q) ,res) + (let ((,var (fcar ,q))) + ,@body)))) + +(defun mapfcar (fun flist) + (cond ((fendp flist) nil) + ((cons (funcall fun (fcar flist)) (mapfcar fun (fcdr flist)))))) + +||# + +;; Example: + +;; (setq f (future)) + +;; Thread 1: +;; (doflist (k f) (print k)) + +;; Thread 2: +;; (setq f (cdr (predict f (cons 'foo (future))))) +;; (setq f (cdr (predict f (cons 'bar (future))))) +;; (predict f nil) +;; + +;;;; ----------------------------------------------------------------------------------------- +;;;; Homebrew stream classes +;;;; + +;; I am really tired of standard Common Lisp streams and thier incompatible implementations. + +;; A gstream is an objects with obeys to the following protocol: + +;; g/read-byte stream &optional (eof-error-p t) eof-value +;; g/unread-byte byte stream +;; g/read-char stream &optional (eof-error-p t) eof-value +;; g/unread-char char stream +;; g/write-char char stream +;; g/write-byte byte stream +;; g/finish-output stream +;; g/close stream &key abort + +;; Additionally the follwing generic functions are implemented based +;; on the above protocol and may be reimplemented for any custom +;; stream class for performance. + +;; g/write-string string stream &key start end +;; g/read-line stream &optional (eof-error-p t) eof-value +;; g/read-line* stream &optional (eof-error-p t) eof-value +;; g/read-byte-sequence sequence stream &key start end +;; g/read-char-sequence sequence stream &key start end +;; g/write-byte-sequence sequence stream &key start end +;; g/write-char-sequence sequence stream &key start end + + +;; The following classes exists + +;; gstream +;; use-char-for-byte-stream-flavour +;; use-byte-for-char-stream-flavour +;; cl-stream +;; cl-byte-stream +;; cl-char-stream + +(defclass gstream () ()) + +;;; use-char-for-byte-stream-flavour + +(defclass use-char-for-byte-stream-flavour () ()) + +(defmethod g/read-byte ((self use-char-for-byte-stream-flavour) &optional (eof-error-p t) eof-value) + (let ((r (g/read-char self eof-error-p :eof))) + (if (eq r :eof) + eof-value + (char-code r)))) + +(defmethod g/unread-byte (byte (self use-char-for-byte-stream-flavour)) + (g/unread-char (or (and #+CMU (<= byte char-code-limit) (code-char byte)) + (error "Cannot stuff ~D. into a character." byte)) + self)) + +(defmethod g/write-byte (byte (self use-char-for-byte-stream-flavour)) + (g/write-char (or (and #+CMU (<= byte char-code-limit) (code-char byte)) + (error "Cannot stuff ~D. into a character." byte)) + self)) + +;;; use-byte-for-char-stream-flavour + +(defclass use-byte-for-char-stream-flavour () ()) + +(defmethod g/read-char ((self use-byte-for-char-stream-flavour) &optional (eof-error-p t) eof-value) + (let ((byte (g/read-byte self eof-error-p :eof))) + (if (eq byte :eof) + eof-value + (let ((res (and #+CMU (<= byte char-code-limit) (code-char byte)))) + (or res + (error "The byte ~D. could not been represented as character in your LISP implementation." byte)))))) + +(defmethod g/unread-char (char (self use-byte-for-char-stream-flavour)) + (g/unread-byte (char-code char) self)) + +(defmethod g/write-char (char (self use-byte-for-char-stream-flavour)) + (g/write-byte (char-code char) self)) + +;;; ------------------------------------------------------------ +;;; Streams made up out of Common Lisp streams + +;;; cl-stream + +(defclass cl-stream (gstream) + ((cl-stream :initarg :cl-stream))) + +(defmethod g/finish-output ((self cl-stream)) + (with-slots (cl-stream) self + (finish-output cl-stream))) + +(defmethod g/close ((self cl-stream) &key abort) + (with-slots (cl-stream) self + (close cl-stream :abort abort))) + +;;; cl-byte-stream + +(defclass cl-byte-stream (use-byte-for-char-stream-flavour cl-stream) + ((lookahead :initform nil))) + +(defmethod g/read-byte ((self cl-byte-stream) &optional (eof-error-p t) eof-value) + (with-slots (cl-stream lookahead) self + (if lookahead + (prog1 lookahead + (setf lookahead nil)) + (read-byte cl-stream eof-error-p eof-value)))) + +(defmethod g/unread-byte (byte (self cl-byte-stream)) + (with-slots (cl-stream lookahead) self + (if lookahead + (error "You cannot unread twice.") + (setf lookahead byte)))) + +(defmethod g/write-byte (byte (self cl-byte-stream)) + (with-slots (cl-stream) self + (write-byte byte cl-stream))) + +(defmethod g/read-byte-sequence (sequence (input cl-byte-stream) &key (start 0) (end (length sequence))) + (with-slots (cl-stream) input + (read-byte-sequence sequence cl-stream :start start :end end))) + +(defmethod g/write-byte-sequence (sequence (sink cl-byte-stream) &key (start 0) (end (length sequence))) + (with-slots (cl-stream) sink + (cl:write-sequence sequence cl-stream :start start :end end))) + +;;; cl-char-stream + +(defclass cl-char-stream (use-char-for-byte-stream-flavour cl-stream) + ()) + +(defmethod g/read-char ((self cl-char-stream) &optional (eof-error-p t) eof-value) + (with-slots (cl-stream) self + (read-char cl-stream eof-error-p eof-value))) + +(defmethod g/unread-char (char (self cl-char-stream)) + (with-slots (cl-stream) self + (unread-char char cl-stream))) + +(defmethod g/write-char (char (self cl-char-stream)) + (with-slots (cl-stream) self + (write-char char cl-stream))) + +;;; ------------------------------------------------------------ +;;; General or fall back stream methods + +(defmethod g/write-string (string (stream t) &key (start 0) (end (length string))) + (do ((i start (+ i 1))) + ((>= i end)) + (g/write-char (char string i) stream))) + +(defmethod g/read-line ((stream t) &optional (eof-error-p t) eof-value) + (let ((res nil)) + (do ((c (g/read-char stream eof-error-p :eof) + (g/read-char stream nil :eof))) + ((or (eq c :eof) (char= c #\newline)) + (cond ((eq c :eof) + (values (if (null res) eof-value (coerce (nreverse res) 'string)) + t)) + (t + (values (coerce (nreverse res) 'string) + nil)))) + (push c res)))) + +(defmethod g/read-line* ((stream t) &optional (eof-error-p t) eof-value) + ;; Like read-line, but accepts CRNL, NL, CR as line termination + (let ((res nil)) + (do ((c (g/read-char stream eof-error-p :eof) + (g/read-char stream nil :eof))) + ((or (eq c :eof) (char= c #\newline) (char= c #\return)) + (cond ((eq c :eof) + (values (if (null res) eof-value (coerce (nreverse res) 'string)) + t)) + (t + (when (char= c #\return) + (let ((d (g/read-char stream nil :eof))) + (unless (or (eq d :eof) (char= d #\newline)) + (g/unread-char d stream)))) + (values (coerce (nreverse res) 'string) + nil)))) + (push c res)))) + +(defmethod g/read-byte-sequence (sequence (input t) &key (start 0) (end (length sequence))) + (let ((i start) c) + (loop + (when (>= i end) + (return i)) + (setf c (g/read-byte input nil :eof)) + (when (eq c :eof) + (return i)) + (setf (elt sequence i) c) + (incf i)))) + +(defmethod g/read-char-sequence (sequence (input t) &key (start 0) (end (length sequence))) + (let ((i start) c) + (loop + (when (>= i end) + (return i)) + (setf c (g/read-char input nil :eof)) + (when (eq c :eof) + (return i)) + (setf (elt sequence i) c) + (incf i)))) + +(defmethod g/write-byte-sequence (sequence (sink t) &key (start 0) (end (length sequence))) + (do ((i start (+ i 1))) + ((>= i end) i) + (g/write-byte (aref sequence i) sink))) + +;;; ---------------------------------------------------------------------------------------------------- +;;; Vector streams +;;; + +;; Output + +(defclass vector-output-stream (use-byte-for-char-stream-flavour) + ((buffer :initarg :buffer))) + +(defun g/make-vector-output-stream (&key (initial-size 100)) + (make-instance 'vector-output-stream + :buffer (make-array initial-size :element-type '(unsigned-byte 8) + :fill-pointer 0 + :adjustable t))) + +(defmethod g/close ((self vector-output-stream) &key abort) + (declare (ignorable self abort)) + nil) + +(defmethod g/finish-output ((self vector-output-stream)) + nil) + +(defmethod g/write-byte (byte (self vector-output-stream)) + (with-slots (buffer) self + (vector-push-extend byte buffer 100))) + +(defmethod g/write-byte-sequence (sequence (self vector-output-stream) &key (start 0) (end (length sequence))) + (with-slots (buffer) self + (adjust-array buffer (+ (length buffer) (- end start))) + (replace buffer sequence :start1 (length buffer) :start2 start :end2 end) + (setf (fill-pointer buffer) (+ (length buffer) (- end start))) + end)) + +;;; ---------------------------------------------------------------------------------------------------- +;;; Echo streams + +#|| +(defclass echo-stream (use-byte-for-char-stream-flavour) + ((echoed-to :initarg :echoed-to))) + +(defun g/make-echo-stream (echoed-to) + (make-instance 'echo-stream :echoed-to echoed-to)) +||# + +#|| + +Hmm unter PCL geht das nicht ;-( + +(defmethod g/read-byte ((stream stream) &optional (eof-error-p t) eof-value) + (read-byte stream eof-error-p eof-value)) + +(defmethod g/read-char ((stream stream) &optional (eof-error-p t) eof-value) + (read-char stream eof-error-p eof-value)) + +(defmethod g/unread-char (char (stream stream)) + (unread-char char stream)) + +(defmethod g/write-char (char (stream stream)) + (write-char char stream)) + +(defmethod g/write-byte (byte (stream stream)) + (write-byte byte stream)) + +(defmethod g/finish-output ((stream stream)) + (finish-output stream)) + +(defmethod g/close ((stream stream) &key abort) + (close stream :abort abort)) + +||# + +;;;; ---------------------------------------------------------------------------------------------------- + +#|| +(let ((null (make-symbol "NULL"))) + + (defstruct (future (:print-function print-future)) + (value null) + (awaited-by nil)) + + (defun print-future (self sink depth) + (if (eq (future-value self) null) + (format sink "#<~S unpredicted>" (type-of self)) + (if (and *print-level* (>= depth *print-level*)) + (format sink "#<~S predicted as ...>" (type-of self)) + (format sink "#<~S predicted as ~S>" (type-of self) (future-value self))))) + + (defun future () + (make-future)) + + (defun guess (future) + (when (eq (future-value future) null) + (setf (future-awaited-by future) (mp/current-process)) + (mp/process-wait "Awaiting future" (lambda () (not (eq (future-value future) null)))) + (setf (future-awaited-by future) nil)) + (future-value future)) + + (defun predict (future value) + (setf (future-value future) value) + (let ((aw (future-awaited-by future))) + (when aw (mp/process-allow-schedule aw))) + value) + ) +||# + +(defun map-array (fun array &rest make-array-options) + (let ((res (apply #'make-array (array-dimensions array) make-array-options))) + (dotimes (i (array-total-size array)) + (setf (row-major-aref res i) (funcall fun (row-major-aref array i)))) + res)) + +;;---------------------------------------------------------------------------------------------------- + +(defun g/peek-char (&optional (peek-type nil) (source *standard-input*) + (eof-error-p T) eof-value) + (cond ((eq peek-type T) + (do ((ch (g/read-char source eof-error-p '%the-eof-object%) + (g/read-char source eof-error-p '%the-eof-object%))) + ((or (eq ch '%the-eof-object%) + (not (white-space-p ch))) + (cond ((eq ch '%the-eof-object%) eof-value) + (t (g/unread-char ch source) ch)) ))) + ((eq peek-type NIL) + (let ((ch (g/read-char source eof-error-p '%the-eof-object%))) + (cond ((eq ch '%the-eof-object%) eof-value) + (t (g/unread-char ch source) + ch)))) + ((characterp peek-type) + (do ((ch (g/read-char source eof-error-p '%the-eof-object%) + (g/read-char source eof-error-p '%the-eof-object%))) + ((or (eq ch '%the-eof-object%) (eql ch peek-type)) + (cond ((eq ch '%the-eof-object%) eof-value) + (t (g/unread-char ch source) ch)) )) ) )) + + + +(defun cl-byte-stream->gstream (stream) + (make-instance 'cl-byte-stream :cl-stream stream)) + +(defun cl-char-stream->gstream (stream) + (make-instance 'cl-char-stream :cl-stream stream)) + +(defun g/open-inet-socket (&rest args) + (multiple-value-bind (stream kind) (apply #'open-inet-socket args) + (ecase kind + #-CMU + (:char (cl-char-stream->gstream stream)) + (:byte (cl-byte-stream->gstream stream)) ))) + +#|| +(defun g/open-inet-socket-ssl (host port) + (multiple-value-bind (stream) (gluser::make-ssl-client-socket host port) + (cl-byte-stream->gstream stream))) +||# + +(defun accept-connection (socket) + (multiple-value-bind (stream kind) (accept-connection/low socket) + (ecase kind + (:char (cl-char-stream->gstream stream)) + (:byte (cl-byte-stream->gstream stream)) ))) + + +;;; ---------------------------------------------------------------------------------------------------- + +(defvar *all-temporary-files* nil + "List of all temporary files.") + +(defun find-temporary-file (&key (type nil)) + (let ((temp-dir "/tmp/*") ;since Motif is only available on unix, we subtly assume a unix host. + (stream nil)) + (labels ((invent-name () + (merge-pathnames (make-pathname + :type type + :name + (let ((*print-base* 35)) + (format nil "ws_~S" (random (expt 36 7))))) + temp-dir))) + (unwind-protect + (do ((name (invent-name) (invent-name))) + ((setq stream (open name :direction :output :if-exists nil)) + (push name *all-temporary-files*) ;remember this file + name)) + (when stream + (close stream)) )))) + +(defun delete-temporary-file (filename) + (setf *all-temporary-files* (delete filename *all-temporary-files*)) + (ignore-errors (delete-file filename))) + +(defmacro with-temporary-file ((name-var &key type) &body body) + (let ((name (gensym))) + `(let* ((,name (find-temporary-file :type ,type)) + (,name-var ,name)) + (unwind-protect + (progn ,@body) + (when (open ,name :direction :probe) + (delete-temporary-file ,name)))) )) + +;;;; + +(defun set-equal (x y &rest options) + (null (apply #'set-exclusive-or x y options))) + +;;;; + +(defun maybe-parse-integer (string &key (radix 10)) + (cond ((not (stringp string)) nil) + (t + (let ((len (length string))) + (cond ((= len 0) nil) + (t + (let ((start 0) + (vz +1) + (res 0)) + (cond ((and (> len 1) (char= (char string 0) #\+)) + (incf start)) + ((and (> len 1) (char= (char string 0) #\-)) + (setf vz -1) + (incf start))) + (do ((i start (+ i 1))) + ((= i len) (* vz res)) + (let ((d (digit-char-p (char string i) radix))) + (if d + (setf res (+ (* radix res) d)) + (return nil))))))))))) + +;;; + +(defun nop (&rest ignore) + (declare (ignore ignore)) + nil) + +(defmacro with-structure-slots ((type &rest slots) obj &body body) + ;; Something like 'with-slots' but for structures. Assumes that the structure + ;; slot accessors have the default name. Note that the structure type must + ;; been provided. + (let ((obj-var (make-symbol "OBJ"))) + `(LET ((,obj-var ,obj)) + (SYMBOL-MACROLET ,(mapcar (lambda (slot) + (list slot + `(,(intern (concatenate 'string (symbol-name type) "-" (symbol-name slot)) + (symbol-package type)) + ,obj-var))) + slots) + ,@body)))) + +;;;; ---------------------------------------------------------------------------------------------------- + +;; Wir helfen den Compiler mal etwas auf die Spruenge ... +(defun compile-funcall (fn args) + (cond ((eq fn '#'identity) + (car args)) + ((eq fn '#'nop) + `(progn ,args nil)) + ((and (consp fn) (eq (car fn) 'function)) + `(,(cadr fn) .,args)) + ((and (consp fn) (eq (car fn) 'lambda)) + `(,fn .,args)) + ((and (consp fn) (eq (car fn) 'curry)) + (compile-funcall (cadr fn) (append (cddr fn) args))) + ((and (consp fn) (eq (car fn) 'rcurry)) + (compile-funcall (cadr fn) (append args (cddr fn)))) + (t + (warn "Unable to inline funcall to ~S." fn) + `(funcall ,fn .,args)) )) + +(defmacro funcall* (fn &rest args) + (compile-funcall fn args)) + +;; Ich mag mapc viel lieber als dolist, nur viele Compiler optimieren +;; das nicht, deswegen das Macro hier. Einige Compiler haben auch kein +;; DEFINE-COMPILER-MACRO :-( + +(defmacro mapc* (fn list) + (let ((g (gensym))) + `(dolist (,g ,list) + ,(compile-funcall fn (list g))))) + +;; Das gleiche mit REDUCE und MAPCAR. + +;; REDUCE arbeitet sowohl fuer Vectoren als auch fuer Listen. Wir +;; haben allerdings leider keinen vernuenftigen Zugriff auf +;; Deklarationen; Man koennte mit TYPEP herangehen und hoffen, dass +;; der Compiler das optimiert, ich fuerchte aber dass das nicht +;; funktionieren wird. Und CLISP verwirft Deklarationen ja total. Also +;; zwei Versionen: LREDUCE* und VREDUCE* + +(defmacro vreduce* (fun seq &rest rest &key (key '#'identity) from-end start end + (initial-value nil initial-value?)) + (declare (ignore rest)) + (let (($start (make-symbol "start")) + ($end (make-symbol "end")) + ($i (make-symbol "i")) + ($accu (make-symbol "accu")) + ($seq (make-symbol "seq"))) + (cond (from-end + (cond (initial-value? + `(LET* ((,$seq ,seq) + (,$start ,(or start 0)) + (,$end ,(or end `(LENGTH ,$seq))) + (,$accu ,initial-value)) + (DECLARE (TYPE FIXNUM ,$start ,$end)) + (DO ((,$i (- ,$end 1) (THE FIXNUM (- ,$i 1)))) + ((< ,$i ,$start) ,$accu) + (DECLARE (TYPE FIXNUM ,$i)) + (SETF ,$accu (FUNCALL* ,fun (FUNCALL* ,key (AREF ,$seq ,$i)) ,$accu)) ))) + (t + `(LET* ((,$seq ,seq) + (,$start ,(or start 0)) + (,$end ,(or end `(LENGTH ,$seq)))) + (DECLARE (TYPE FIXNUM ,$start ,$end)) + (COND ((= 0 (- ,$end ,$start)) + (FUNCALL* ,fun)) + (T + (LET ((,$accu (FUNCALL* ,key (AREF ,$seq (- ,$end 1))))) + (DO ((,$i (- ,$end 2) (THE FIXNUM (- ,$i 1)))) + ((< ,$i ,$start) ,$accu) + (DECLARE (TYPE FIXNUM ,$i)) + (SETF ,$accu (FUNCALL* ,fun (FUNCALL* ,key (AREF ,$seq ,$i)) ,$accu)))))))) )) + (t + (cond (initial-value? + `(LET* ((,$seq ,seq) + (,$start ,(or start 0)) + (,$end ,(or end `(LENGTH ,$seq))) + (,$accu ,initial-value)) + (DECLARE (TYPE FIXNUM ,$start ,$end)) + (DO ((,$i ,$start (THE FIXNUM (+ ,$i 1)))) + ((>= ,$i ,$end) ,$accu) + (DECLARE (TYPE FIXNUM ,$i)) + (SETF ,$accu (FUNCALL* ,fun ,$accu (FUNCALL* ,key (AREF ,$seq ,$i)))) ))) + (t + `(let* ((,$seq ,seq) + (,$start ,(or start 0)) + (,$end ,(or end `(LENGTH ,$seq)))) + (DECLARE (TYPE FIXNUM ,$start ,$end)) + (COND ((= 0 (- ,$end ,$start)) + (FUNCALL* ,fun)) + (T + (LET ((,$accu (FUNCALL* ,key (AREF ,$seq ,$start)))) + (DO ((,$i (+ ,$start 1) (+ ,$i 1))) + ((>= ,$i ,$end) ,$accu) + (DECLARE (TYPE FIXNUM ,$i)) + (SETF ,$accu (FUNCALL* ,fun ,$accu (FUNCALL* ,key (AREF ,$seq ,$i))))))))))))))) + +(defmacro lreduce* (fun seq &rest rest &key (key '#'identity) from-end start end + (initial-value nil initial-value?)) + (cond ((or start end from-end) + `(reduce ,fun ,seq .,rest)) + (t + (cond (initial-value? + (let (($accu (make-symbol "accu")) + ($k (make-symbol "k"))) + `(LET* ((,$accu ,initial-value)) + (DOLIST (,$k ,seq ,$accu) + (SETF ,$accu (FUNCALL* ,fun ,$accu (FUNCALL* ,key ,$k))))))) + (t + (let (($accu (make-symbol "accu")) + ($seq (make-symbol "seq")) + ($k (make-symbol "k"))) + `(LET* ((,$seq ,seq)) + (IF (NULL ,$seq) + (FUNCALL* ,fun) + (LET ((,$accu (FUNCALL* ,key (CAR ,$seq)))) + (DOLIST (,$k (CDR ,$seq) ,$accu) + (SETF ,$accu (FUNCALL* ,fun ,$accu (FUNCALL* ,key ,$k)))))))) ))) )) + + +;;; Wenn wir so weiter machen, koennen wir bald gleich unseren eigenen +;;; Compiler schreiben ;-) + +#|| +(defmacro lreduce* (fun seq &rest x &key key &allow-other-keys) + (let ((q (copy-list x))) + (remf q :key) + (cond (key + `(reduce ,fun (map 'vector ,key ,seq) .,q)) + (t + `(reduce ,fun ,seq .,q))))) + +(defmacro vreduce* (fun seq &rest x &key key &allow-other-keys) + (let ((q (copy-list x))) + (remf q :key) + (cond (key + `(reduce ,fun (map 'vector ,key ,seq) .,q)) + (t + `(reduce ,fun ,seq .,q))))) + +||# + +;; Stolen from Eclipse (http://elwoodcorp.com/eclipse/unique.htm + +(defmacro with-unique-names ((&rest names) &body body) + `(let (,@(mapcar (lambda (x) (list x `(gensym ',(concatenate 'string (symbol-name x) "-")))) names)) + .,body)) + + +(defun gstream-as-string (gstream &optional (buffer-size 4096)) + (let ((buffer (g/make-string buffer-size :adjustable t))) + (do* ((i 0 j) + (j (g/read-char-sequence buffer gstream :start 0 :end buffer-size) + (g/read-char-sequence buffer gstream :start i :end (+ i buffer-size)) )) + ((= j i) (subseq buffer 0 j)) + (adjust-array buffer (list (+ j buffer-size))) ))) + +;;;; Generic hash tables + +;; TODO: +;; - automatic size adjustment +;; - sensible printer +;; - make-load-form?! + +(defstruct g/hash-table + hash-function ;hash function + compare-function ;predicate to test for equality + table ;simple vector of chains + size ;size of hash table + (nitems 0)) ;number of items + +(defun g/make-hash-table (&key (size 100) (hash-function #'sxhash) (compare-function #'eql)) + "Creates a generic hashtable; + `size' is the default size of the table. + `hash-function' (default #'sxhash) is a specific hash function + `compare-function' (default #'eql) is a predicate to test for equality." + (setf size (nearest-greater-prime size)) + (make-g/hash-table :hash-function hash-function + :compare-function compare-function + :table (make-array size :initial-element nil) + :size size + :nitems 0)) + +(defun g/hashget (hashtable key &optional (default nil)) + "Looks up the key `key' in the generic hash table `hashtable'. + Returns three values: + value - value, which as associated with the key, or `default' is no value + present. + successp - true, iff the key was found. + key - the original key in the hash table." + ;; -> value ; successp ; key + (let ((j (mod (funcall (g/hash-table-hash-function hashtable) key) + (g/hash-table-size hashtable)))) + (let ((q (assoc key (aref (g/hash-table-table hashtable) j) + :test (g/hash-table-compare-function hashtable)))) + (if q + (values (cdr q) t (car q)) + (values default nil))))) + +(defun (setf g/hashget) (new-value hashtable key &optional (default nil)) + (declare (ignore default)) + (let ((j (mod (funcall (g/hash-table-hash-function hashtable) key) + (g/hash-table-size hashtable)))) + (let ((q (assoc key (aref (g/hash-table-table hashtable) j) + :test (g/hash-table-compare-function hashtable)))) + (cond ((not (null q)) + (setf (cdr q) new-value)) + (t + (push (cons key new-value) + (aref (g/hash-table-table hashtable) j)) + (incf (g/hash-table-nitems hashtable)))))) + new-value) + +(defun resize-hash-table (hashtable new-size) + "Adjust the size of a generic hash table. (the size is round to the next greater prime number)." + (setf new-size (nearest-greater-prime new-size)) + (let ((new-table (make-array new-size :initial-element nil))) + (dotimes (i (g/hash-table-size hashtable)) + (dolist (k (aref (g/hash-table-table hashtable) i)) + (push k (aref new-table + (mod (funcall (g/hash-table-hash-function hashtable) (car k)) + new-size))))) + (setf (g/hash-table-table hashtable) new-table + (g/hash-table-size hashtable) new-size) + hashtable)) + +(defun g/clrhash (hashtable) + "Clears a generic hash table." + (dotimes (i (g/hash-table-size hashtable)) + (setf (aref (g/hash-table-table hashtable) i) nil)) + (setf (g/hash-table-nitems hashtable) nil) + hashtable) + +;; hash code utilities + +(defconstant +fixnum-bits+ + (1- (integer-length most-positive-fixnum)) + "Pessimistic approximation of the number of bits of fixnums.") + +(defconstant +fixnum-mask+ + (1- (expt 2 +fixnum-bits+)) + "Pessimistic approximation of the largest bit-mask, still being a fixnum.") + +(defun stir-hash-codes (a b) + "Stirs two hash codes together; always returns a fixnum. + When applied sequenitally the first argument should be used as accumulator." + ;; ich mach das mal wie Bruno + (logand +fixnum-mask+ + (logxor (logior (logand +fixnum-mask+ (ash a 5)) + (logand +fixnum-mask+ (ash a (- 5 +fixnum-bits+)))) + b))) + +(defun hash-sequence (sequence hash-function &optional (accu 0)) + "Applies the hash function `hash-function' to each element of `sequence' and + stirs the resulting hash codes together using STIR-HASH-CODE starting from + `accu'." + (map nil (lambda (item) + (setf accu (stir-hash-codes accu (funcall hash-function item)))) + sequence) + accu) + +;; some specific hash functions + +(defun hash/string-equal (string) + "Hash function compatible with STRING-EQUAL." + (hash-sequence string (lambda (char) + (sxhash (char-upcase char))))) + +;; some specific hash tables + +(defun make-string-equal-hash-table (&rest options) + "Constructs a new generic hash table using STRING-EQUAL as predicate." + (apply #'g/make-hash-table + :hash-function #'hash/string-equal + :compare-function #'string-equal + options)) + +;; prime numbers + +(defun primep (n) + "Returns true, iff `n' is prime." + (and (> n 2) + (do ((i 2 (+ i 1))) + ((> (* i i) n) t) + (cond ((zerop (mod n i)) (return nil)))))) + +(defun nearest-greater-prime (n) + "Returns the smallest prime number no less than `n'." + (cond ((primep n) n) + ((nearest-greater-prime (+ n 1))))) + + +;;; + +(defun grind-documentation-string (string &optional (sink *standard-output*)) + ;; some people say: + ;; (defun foo () + ;; "This function + ;; frobinates its two arguments.") + ;; some say: + ;; (defun foo () + ;; "This function + ;; frobinates its two arguments.") + ;; instead. + (let ((min-indention nil)) + ;; We sort this out by finding the minimum indent in all but the first line. + (with-input-from-string (in string) + (read-line in nil nil) ;ignore first line + (do ((x (read-line in nil nil) (read-line in nil nil))) + ((null x)) + (let ((p (position-if-not (curry #'char= #\space) x))) + (when p + (setf min-indention (min* min-indention p)))))) + (setf min-indention (or min-indention 0)) + ;; Now we could dump the string + (with-input-from-string (in string) + ;; first line goes unindented + (let ((x (read-line in nil nil))) + (when x + (fresh-line sink) + (write-string x sink))) + (do ((x (read-line in nil nil) (read-line in nil nil))) + ((null x)) + (terpri sink) + (when (< min-indention (length x)) + (write-string x sink :start min-indention))))) + (values)) + +(defun ap (&rest strings) + "A new apropos." + (let ((res nil)) + (do-all-symbols (symbol) + (unless (member symbol res) + (when (every (lambda (string) + (search string (symbol-name symbol))) + strings) + (push symbol res)))) + (dolist (k res) + (print k) + (when (fboundp k) + (princ ", function")) + (when (boundp k) + (princ ", variable")) + ))) + diff --git a/mlisp-patch.diff b/mlisp-patch.diff new file mode 100644 index 0000000..7133101 --- /dev/null +++ b/mlisp-patch.diff @@ -0,0 +1,68 @@ +--- orig/xml/xml-name-rune-p.lisp ++++ mod/xml/xml-name-rune-p.lisp +@@ -206,13 +206,15 @@ + (setf (aref r i) 1))))) ) + + `(progn +- (DEFSUBST NAME-RUNE-P (RUNE) +- (AND (<= 0 RUNE ,*max*) +- (LOCALLY (DECLARE (OPTIMIZE (SAFETY 0) (SPEED 3))) +- (= 1 (SBIT ',(predicate-to-bv #'name-rune-p) +- (THE FIXNUM RUNE)))))) +- (DEFSUBST NAME-START-RUNE-P (RUNE) +- (AND (<= 0 RUNE ,*MAX*) +- (LOCALLY (DECLARE (OPTIMIZE (SAFETY 0) (SPEED 3))) +- (= 1 (SBIT ',(predicate-to-bv #'name-start-rune-p) +- (THE FIXNUM RUNE)))))))) )))) +\ No newline at end of file ++ (defsubst name-rune-p (rune) ++ (let ((code (rune-code rune))) ++ (and (<= 0 code ,*max*) ++ (locally (declare (optimize (safety 0) (speed 3))) ++ (= 1 (sbit ',(predicate-to-bv #'name-rune-p) ++ (the fixnum code))))))) ++ (defsubst name-start-rune-p (rune) ++ (let ((code (rune-code rune))) ++ (and (<= 0 code ,*max*) ++ (locally (declare (optimize (safety 0) (speed 3))) ++ (= 1 (sbit ',(predicate-to-bv #'name-start-rune-p) ++ (the fixnum code))))))))) )))) + + +--- orig/xml/xml-parse.lisp ++++ mod/xml/xml-parse.lisp +@@ -2470,20 +2469,20 @@ + (let ((input-var (gensym)) + (collect (gensym)) + (c (gensym))) +- `(LET ((,input-var ,input)) +- (MULTIPLE-VALUE-BIND (,res ,res-start ,res-end) +- (WITH-RUNE-COLLECTOR/RAW (,collect) +- (LOOP +- (LET ((,c (PEEK-RUNE ,input-var))) +- (COND ((EQ ,c :EOF) ++ `(let ((,input-var ,input)) ++ (multiple-value-bind (,res ,res-start ,res-end) ++ (with-rune-collector/raw (,collect) ++ (loop ++ (let ((,c (peek-rune ,input-var))) ++ (cond ((eq ,c :eof) + ;; xxx error message +- (RETURN)) +- ((FUNCALL ,predicate ,c) +- (RETURN)) ++ (return)) ++ ((funcall ,predicate ,c) ++ (return)) + (t + (,collect ,c) +- (CONSUME-RUNE ,input-var)))))) +- (LOCALLY ++ (consume-rune ,input-var)))))) ++ (locally + ,@body))))) + + (defun read-name-token (input) + + + diff --git a/runes/COPYING b/runes/COPYING new file mode 100644 index 0000000..243648d --- /dev/null +++ b/runes/COPYING @@ -0,0 +1,521 @@ +Preamble to the Gnu Lesser General Public License + +Copyright (c) 2000 Franz Incorporated, Berkeley, CA 94704 + +The concept of the GNU Lesser General Public License version 2.1 +("LGPL") has been adopted to govern the use and distribution of +above-mentioned application. However, the LGPL uses terminology that is +more appropriate for a program written in C than one written in +Lisp. Nevertheless, the LGPL can still be applied to a Lisp program if +certain clarifications are made. This document details those +clarifications. Accordingly, the license for the open-source Lisp +applications consists of this document plus the LGPL. Wherever there is +a conflict between this document and the LGPL, this document takes +precedence over the LGPL. + +A "Library" in Lisp is a collection of Lisp functions, data and foreign +modules. The form of the Library can be Lisp source code (for processing +by an interpreter) or object code (usually the result of compilation of +source code or built with some other mechanisms). Foreign modules are +object code in a form that can be linked into a Lisp executable. When we +speak of functions we do so in the most general way to include, in +addition, methods and unnamed functions. Lisp "data" is also a general +term that includes the data structures resulting from defining Lisp +classes. A Lisp application may include the same set of Lisp objects as +does a Library, but this does not mean that the application is +necessarily a "work based on the Library" it contains. + +The Library consists of everything in the distribution file set before +any modifications are made to the files. If any of the functions or +classes in the Library are redefined in other files, then those +redefinitions ARE considered a work based on the Library. If additional +methods are added to generic functions in the Library, those additional +methods are NOT considered a work based on the Library. If Library +classes are subclassed, these subclasses are NOT considered a work based +on the Library. If the Library is modified to explicitly call other +functions that are neither part of Lisp itself nor an available add-on +module to Lisp, then the functions called by the modified Library ARE +considered a work based on the Library. The goal is to ensure that the +Library will compile and run without getting undefined function errors. + +It is permitted to add proprietary source code to the Library, but it +must be done in a way such that the Library will still run without that +proprietary code present. Section 5 of the LGPL distinguishes between +the case of a library being dynamically linked at runtime and one being +statically linked at build time. Section 5 of the LGPL states that the +former results in an executable that is a "work that uses the Library." +Section 5 of the LGPL states that the latter results in one that is a +"derivative of the Library", which is therefore covered by the +LGPL. Since Lisp only offers one choice, which is to link the Library +into an executable at build time, we declare that, for the purpose +applying the LGPL to the Library, an executable that results from +linking a "work that uses the Library" with the Library is considered a +"work that uses the Library" and is therefore NOT covered by the LGPL. + +Because of this declaration, section 6 of LGPL is not applicable to the +Library. However, in connection with each distribution of this +executable, you must also deliver, in accordance with the terms and +conditions of the LGPL, the source code of Library (or your derivative +thereof) that is incorporated into this executable. + +End of Document +------------------------------------------------------------------------ + GNU LESSER GENERAL PUBLIC LICENSE + Version 2.1, February 1999 + + Copyright (C) 1991, 1999 Free Software Foundation, Inc. + 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + +[This is the first released version of the Lesser GPL. It also counts + as the successor of the GNU Library Public License, version 2, hence + the version number 2.1.] + + Preamble + + The licenses for most software are designed to take away your +freedom to share and change it. By contrast, the GNU General Public +Licenses are intended to guarantee your freedom to share and change +free software--to make sure the software is free for all its users. + + This license, the Lesser General Public License, applies to some +specially designated software packages--typically libraries--of the +Free Software Foundation and other authors who decide to use it. You +can use it too, but we suggest you first think carefully about whether +this license or the ordinary General Public License is the better +strategy to use in any particular case, based on the explanations below. + + When we speak of free software, we are referring to freedom of use, +not price. Our General Public Licenses are designed to make sure that +you have the freedom to distribute copies of free software (and charge +for this service if you wish); that you receive source code or can get +it if you want it; that you can change the software and use pieces of +it in new free programs; and that you are informed that you can do +these things. + + To protect your rights, we need to make restrictions that forbid +distributors to deny you these rights or to ask you to surrender these +rights. These restrictions translate to certain responsibilities for +you if you distribute copies of the library or if you modify it. + + For example, if you distribute copies of the library, whether gratis +or for a fee, you must give the recipients all the rights that we gave +you. You must make sure that they, too, receive or can get the source +code. If you link other code with the library, you must provide +complete object files to the recipients, so that they can relink them +with the library after making changes to the library and recompiling +it. And you must show them these terms so they know their rights. + + We protect your rights with a two-step method: (1) we copyright the +library, and (2) we offer you this license, which gives you legal +permission to copy, distribute and/or modify the library. + + To protect each distributor, we want to make it very clear that +there is no warranty for the free library. Also, if the library is +modified by someone else and passed on, the recipients should know +that what they have is not the original version, so that the original +author's reputation will not be affected by problems that might be +introduced by others. + + Finally, software patents pose a constant threat to the existence of +any free program. We wish to make sure that a company cannot +effectively restrict the users of a free program by obtaining a +restrictive license from a patent holder. Therefore, we insist that +any patent license obtained for a version of the library must be +consistent with the full freedom of use specified in this license. + + Most GNU software, including some libraries, is covered by the +ordinary GNU General Public License. This license, the GNU Lesser +General Public License, applies to certain designated libraries, and +is quite different from the ordinary General Public License. We use +this license for certain libraries in order to permit linking those +libraries into non-free programs. + + When a program is linked with a library, whether statically or using +a shared library, the combination of the two is legally speaking a +combined work, a derivative of the original library. The ordinary +General Public License therefore permits such linking only if the +entire combination fits its criteria of freedom. The Lesser General +Public License permits more lax criteria for linking other code with +the library. + + We call this license the "Lesser" General Public License because it +does Less to protect the user's freedom than the ordinary General +Public License. It also provides other free software developers Less +of an advantage over competing non-free programs. These disadvantages +are the reason we use the ordinary General Public License for many +libraries. However, the Lesser license provides advantages in certain +special circumstances. + + For example, on rare occasions, there may be a special need to +encourage the widest possible use of a certain library, so that it becomes +a de-facto standard. To achieve this, non-free programs must be +allowed to use the library. A more frequent case is that a free +library does the same job as widely used non-free libraries. In this +case, there is little to gain by limiting the free library to free +software only, so we use the Lesser General Public License. + + In other cases, permission to use a particular library in non-free +programs enables a greater number of people to use a large body of +free software. For example, permission to use the GNU C Library in +non-free programs enables many more people to use the whole GNU +operating system, as well as its variant, the GNU/Linux operating +system. + + Although the Lesser General Public License is Less protective of the +users' freedom, it does ensure that the user of a program that is +linked with the Library has the freedom and the wherewithal to run +that program using a modified version of the Library. + + The precise terms and conditions for copying, distribution and +modification follow. Pay close attention to the difference between a +"work based on the library" and a "work that uses the library". The +former contains code derived from the library, whereas the latter must +be combined with the library in order to run. + + GNU LESSER GENERAL PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. This License Agreement applies to any software library or other +program which contains a notice placed by the copyright holder or +other authorized party saying it may be distributed under the terms of +this Lesser General Public License (also called "this License"). +Each licensee is addressed as "you". + + A "library" means a collection of software functions and/or data +prepared so as to be conveniently linked with application programs +(which use some of those functions and data) to form executables. + + The "Library", below, refers to any such software library or work +which has been distributed under these terms. A "work based on the +Library" means either the Library or any derivative work under +copyright law: that is to say, a work containing the Library or a +portion of it, either verbatim or with modifications and/or translated +straightforwardly into another language. (Hereinafter, translation is +included without limitation in the term "modification".) + + "Source code" for a work means the preferred form of the work for +making modifications to it. For a library, complete source code means +all the source code for all modules it contains, plus any associated +interface definition files, plus the scripts used to control compilation +and installation of the library. + + Activities other than copying, distribution and modification are not +covered by this License; they are outside its scope. The act of +running a program using the Library is not restricted, and output from +such a program is covered only if its contents constitute a work based +on the Library (independent of the use of the Library in a tool for +writing it). Whether that is true depends on what the Library does +and what the program that uses the Library does. + + 1. You may copy and distribute verbatim copies of the Library's +complete source code as you receive it, in any medium, provided that +you conspicuously and appropriately publish on each copy an +appropriate copyright notice and disclaimer of warranty; keep intact +all the notices that refer to this License and to the absence of any +warranty; and distribute a copy of this License along with the +Library. + + You may charge a fee for the physical act of transferring a copy, +and you may at your option offer warranty protection in exchange for a +fee. + + 2. You may modify your copy or copies of the Library or any portion +of it, thus forming a work based on the Library, and copy and +distribute such modifications or work under the terms of Section 1 +above, provided that you also meet all of these conditions: + + a) The modified work must itself be a software library. + + b) You must cause the files modified to carry prominent notices + stating that you changed the files and the date of any change. + + c) You must cause the whole of the work to be licensed at no + charge to all third parties under the terms of this License. + + d) If a facility in the modified Library refers to a function or a + table of data to be supplied by an application program that uses + the facility, other than as an argument passed when the facility + is invoked, then you must make a good faith effort to ensure that, + in the event an application does not supply such function or + table, the facility still operates, and performs whatever part of + its purpose remains meaningful. + + (For example, a function in a library to compute square roots has + a purpose that is entirely well-defined independent of the + application. Therefore, Subsection 2d requires that any + application-supplied function or table used by this function must + be optional: if the application does not supply it, the square + root function must still compute square roots.) + +These requirements apply to the modified work as a whole. If +identifiable sections of that work are not derived from the Library, +and can be reasonably considered independent and separate works in +themselves, then this License, and its terms, do not apply to those +sections when you distribute them as separate works. But when you +distribute the same sections as part of a whole which is a work based +on the Library, the distribution of the whole must be on the terms of +this License, whose permissions for other licensees extend to the +entire whole, and thus to each and every part regardless of who wrote +it. + +Thus, it is not the intent of this section to claim rights or contest +your rights to work written entirely by you; rather, the intent is to +exercise the right to control the distribution of derivative or +collective works based on the Library. + +In addition, mere aggregation of another work not based on the Library +with the Library (or with a work based on the Library) on a volume of +a storage or distribution medium does not bring the other work under +the scope of this License. + + 3. You may opt to apply the terms of the ordinary GNU General Public +License instead of this License to a given copy of the Library. To do +this, you must alter all the notices that refer to this License, so +that they refer to the ordinary GNU General Public License, version 2, +instead of to this License. (If a newer version than version 2 of the +ordinary GNU General Public License has appeared, then you can specify +that version instead if you wish.) Do not make any other change in +these notices. + + Once this change is made in a given copy, it is irreversible for +that copy, so the ordinary GNU General Public License applies to all +subsequent copies and derivative works made from that copy. + + This option is useful when you wish to copy part of the code of +the Library into a program that is not a library. + + 4. You may copy and distribute the Library (or a portion or +derivative of it, under Section 2) in object code or executable form +under the terms of Sections 1 and 2 above provided that you accompany +it with the complete corresponding machine-readable source code, which +must be distributed under the terms of Sections 1 and 2 above on a +medium customarily used for software interchange. + + If distribution of object code is made by offering access to copy +from a designated place, then offering equivalent access to copy the +source code from the same place satisfies the requirement to +distribute the source code, even though third parties are not +compelled to copy the source along with the object code. + + 5. A program that contains no derivative of any portion of the +Library, but is designed to work with the Library by being compiled or +linked with it, is called a "work that uses the Library". Such a +work, in isolation, is not a derivative work of the Library, and +therefore falls outside the scope of this License. + + However, linking a "work that uses the Library" with the Library +creates an executable that is a derivative of the Library (because it +contains portions of the Library), rather than a "work that uses the +library". The executable is therefore covered by this License. +Section 6 states terms for distribution of such executables. + + When a "work that uses the Library" uses material from a header file +that is part of the Library, the object code for the work may be a +derivative work of the Library even though the source code is not. +Whether this is true is especially significant if the work can be +linked without the Library, or if the work is itself a library. The +threshold for this to be true is not precisely defined by law. + + If such an object file uses only numerical parameters, data +structure layouts and accessors, and small macros and small inline +functions (ten lines or less in length), then the use of the object +file is unrestricted, regardless of whether it is legally a derivative +work. (Executables containing this object code plus portions of the +Library will still fall under Section 6.) + + Otherwise, if the work is a derivative of the Library, you may +distribute the object code for the work under the terms of Section 6. +Any executables containing that work also fall under Section 6, +whether or not they are linked directly with the Library itself. + + 6. As an exception to the Sections above, you may also combine or +link a "work that uses the Library" with the Library to produce a +work containing portions of the Library, and distribute that work +under terms of your choice, provided that the terms permit +modification of the work for the customer's own use and reverse +engineering for debugging such modifications. + + You must give prominent notice with each copy of the work that the +Library is used in it and that the Library and its use are covered by +this License. You must supply a copy of this License. If the work +during execution displays copyright notices, you must include the +copyright notice for the Library among them, as well as a reference +directing the user to the copy of this License. Also, you must do one +of these things: + + a) Accompany the work with the complete corresponding + machine-readable source code for the Library including whatever + changes were used in the work (which must be distributed under + Sections 1 and 2 above); and, if the work is an executable linked + with the Library, with the complete machine-readable "work that + uses the Library", as object code and/or source code, so that the + user can modify the Library and then relink to produce a modified + executable containing the modified Library. (It is understood + that the user who changes the contents of definitions files in the + Library will not necessarily be able to recompile the application + to use the modified definitions.) + + b) Use a suitable shared library mechanism for linking with the + Library. A suitable mechanism is one that (1) uses at run time a + copy of the library already present on the user's computer system, + rather than copying library functions into the executable, and (2) + will operate properly with a modified version of the library, if + the user installs one, as long as the modified version is + interface-compatible with the version that the work was made with. + + c) Accompany the work with a written offer, valid for at + least three years, to give the same user the materials + specified in Subsection 6a, above, for a charge no more + than the cost of performing this distribution. + + d) If distribution of the work is made by offering access to copy + from a designated place, offer equivalent access to copy the above + specified materials from the same place. + + e) Verify that the user has already received a copy of these + materials or that you have already sent this user a copy. + + For an executable, the required form of the "work that uses the +Library" must include any data and utility programs needed for +reproducing the executable from it. However, as a special exception, +the materials to be distributed need not include anything that is +normally distributed (in either source or binary form) with the major +components (compiler, kernel, and so on) of the operating system on +which the executable runs, unless that component itself accompanies +the executable. + + It may happen that this requirement contradicts the license +restrictions of other proprietary libraries that do not normally +accompany the operating system. Such a contradiction means you cannot +use both them and the Library together in an executable that you +distribute. + + 7. You may place library facilities that are a work based on the +Library side-by-side in a single library together with other library +facilities not covered by this License, and distribute such a combined +library, provided that the separate distribution of the work based on +the Library and of the other library facilities is otherwise +permitted, and provided that you do these two things: + + a) Accompany the combined library with a copy of the same work + based on the Library, uncombined with any other library + facilities. This must be distributed under the terms of the + Sections above. + + b) Give prominent notice with the combined library of the fact + that part of it is a work based on the Library, and explaining + where to find the accompanying uncombined form of the same work. + + 8. You may not copy, modify, sublicense, link with, or distribute +the Library except as expressly provided under this License. Any +attempt otherwise to copy, modify, sublicense, link with, or +distribute the Library is void, and will automatically terminate your +rights under this License. However, parties who have received copies, +or rights, from you under this License will not have their licenses +terminated so long as such parties remain in full compliance. + + 9. You are not required to accept this License, since you have not +signed it. However, nothing else grants you permission to modify or +distribute the Library or its derivative works. These actions are +prohibited by law if you do not accept this License. Therefore, by +modifying or distributing the Library (or any work based on the +Library), you indicate your acceptance of this License to do so, and +all its terms and conditions for copying, distributing or modifying +the Library or works based on it. + + 10. Each time you redistribute the Library (or any work based on the +Library), the recipient automatically receives a license from the +original licensor to copy, distribute, link with or modify the Library +subject to these terms and conditions. You may not impose any further +restrictions on the recipients' exercise of the rights granted herein. +You are not responsible for enforcing compliance by third parties with +this License. + + 11. If, as a consequence of a court judgment or allegation of patent +infringement or for any other reason (not limited to patent issues), +conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot +distribute so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you +may not distribute the Library at all. For example, if a patent +license would not permit royalty-free redistribution of the Library by +all those who receive copies directly or indirectly through you, then +the only way you could satisfy both it and this License would be to +refrain entirely from distribution of the Library. + +If any portion of this section is held invalid or unenforceable under any +particular circumstance, the balance of the section is intended to apply, +and the section as a whole is intended to apply in other circumstances. + +It is not the purpose of this section to induce you to infringe any +patents or other property right claims or to contest validity of any +such claims; this section has the sole purpose of protecting the +integrity of the free software distribution system which is +implemented by public license practices. Many people have made +generous contributions to the wide range of software distributed +through that system in reliance on consistent application of that +system; it is up to the author/donor to decide if he or she is willing +to distribute software through any other system and a licensee cannot +impose that choice. + +This section is intended to make thoroughly clear what is believed to +be a consequence of the rest of this License. + + 12. If the distribution and/or use of the Library is restricted in +certain countries either by patents or by copyrighted interfaces, the +original copyright holder who places the Library under this License may add +an explicit geographical distribution limitation excluding those countries, +so that distribution is permitted only in or among countries not thus +excluded. In such case, this License incorporates the limitation as if +written in the body of this License. + + 13. The Free Software Foundation may publish revised and/or new +versions of the Lesser General Public License from time to time. +Such new versions will be similar in spirit to the present version, +but may differ in detail to address new problems or concerns. + +Each version is given a distinguishing version number. If the Library +specifies a version number of this License which applies to it and +"any later version", you have the option of following the terms and +conditions either of that version or of any later version published by +the Free Software Foundation. If the Library does not specify a +license version number, you may choose any version ever published by +the Free Software Foundation. + + 14. If you wish to incorporate parts of the Library into other free +programs whose distribution conditions are incompatible with these, +write to the author to ask for permission. For software which is +copyrighted by the Free Software Foundation, write to the Free +Software Foundation; we sometimes make exceptions for this. Our +decision will be guided by the two goals of preserving the free status +of all derivatives of our free software and of promoting the sharing +and reuse of software generally. + + NO WARRANTY + + 15. BECAUSE THE LIBRARY IS LICENSED FREE OF CHARGE, THERE IS NO +WARRANTY FOR THE LIBRARY, TO THE EXTENT PERMITTED BY APPLICABLE LAW. +EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR +OTHER PARTIES PROVIDE THE LIBRARY "AS IS" WITHOUT WARRANTY OF ANY +KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE +LIBRARY IS WITH YOU. SHOULD THE LIBRARY PROVE DEFECTIVE, YOU ASSUME +THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. + + 16. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN +WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY +AND/OR REDISTRIBUTE THE LIBRARY AS PERMITTED ABOVE, BE LIABLE TO YOU +FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR +CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE +LIBRARY (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING +RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A +FAILURE OF THE LIBRARY TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF +SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH +DAMAGES. + + END OF TERMS AND CONDITIONS + diff --git a/runes/characters.lisp b/runes/characters.lisp new file mode 100644 index 0000000..a8fa7e9 --- /dev/null +++ b/runes/characters.lisp @@ -0,0 +1,149 @@ +;;; copyright (c) 2004 knowledgeTools Int. GmbH +;;; Author of this version: David Lichteblau +;;; +;;; derived from runes.lisp, (c) copyright 1998,1999 by Gilbert Baumann +;;; +;;; License: LLGPL (See file COPYING for details). +;;; +;;; This code is free software; you can redistribute it and/or modify it +;;; under the terms of the version 2.1 of the GNU Lesser General Public +;;; License as published by the Free Software Foundation, as clarified +;;; by the "Preamble to the Gnu Lesser General Public License" found in +;;; the file COPYING. +;;; +;;; This code is distributed in the hope that it will be useful, +;;; but without any warranty; without even the implied warranty of +;;; merchantability or fitness for a particular purpose. See the GNU +;;; Lesser General Public License for more details. +;;; +;;; Version 2.1 of the GNU Lesser General Public License is in the file +;;; COPYING that was distributed with this file. If it is not present, +;;; you can access it from http://www.gnu.org/copyleft/lesser.txt (until +;;; superseded by a newer version) or write to the Free Software +;;; Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +(in-package :runes) + +(deftype rune () 'base-char) +(deftype rod () 'base-string) +(deftype simple-rod () 'simple-string) + +(defsubst rune (rod index) + (char rod index)) + +(defun (setf rune) (new rod index) + (setf (char rod index) new)) + +(defsubst %rune (rod index) + (aref (the simple-string rod) (the fixnum index))) + +(defsubst (setf %rune) (new rod index) + (setf (aref (the simple-string rod) (the fixnum index)) new)) + +(defun rod-capitalize (rod) + (string-upcase rod)) + +(defsubst code-rune (x) (code-char x)) +(defsubst rune-code (x) (char-code x)) + +(defsubst rune= (x y) + (char= x y)) + +(defun rune-downcase (rune) + (char-downcase rune)) + +(defsubst rune-upcase (rune) + (char-upcase rune)) + +(defun rune-upper-case-letter-p (rune) + (upper-case-p rune)) + +(defun rune-lower-case-letter-p (rune) + (lower-case-p rune)) + +(defun rune-equal (x y) + (char-equal x y)) + +(defun rod-downcase (rod) + (string-downcase rod)) + +(defun rod-upcase (rod) + (string-upcase rod)) + +(defsubst white-space-rune-p (char) + (or (char= char #\tab) + (char= char #.(code-char 10)) ;Linefeed + (char= char #.(code-char 13)) ;Carriage Return + (char= char #\space))) + +(defsubst digit-rune-p (char &optional (radix 10)) + (digit-char-p char radix)) + +(defun rod (x) + (cond + ((stringp x) x) + ((symbolp x) (string x)) + ((characterp x) (string x)) + ((vectorp x) (coerce x 'string)) + ((integerp x) (string (code-char x))) + (t (error "Cannot convert ~S to a ~S" x 'rod)))) + +(defun runep (x) + (characterp x)) + +(defun sloopy-rod-p (x) + (stringp x)) + +(defun rod= (x y) + (string= x y)) + +(defun rod-equal (x y) + (string-equal x y)) + +(defsubst make-rod (size) + (make-string size)) + +(defun char-rune (char) + char) + +(defun rune-char (rune &optional default) + (declare (ignore default)) + rune) + +(defun rod-string (rod &optional (default-char #\?)) + (declare (ignore default-char)) + rod) + +(defun string-rod (string) + string) + +;;;; + +(defun rune<= (rune &rest more-runes) + (loop + for (a b) on (cons rune more-runes) + while b + always (char<= a b))) + +(defun rune>= (rune &rest more-runes) + (loop + for (a b) on (cons rune more-runes) + while b + always (char>= a b))) + +(defun rodp (object) + (stringp object)) + +(defun really-rod-p (object) + (stringp object)) + +(defun rod-subseq (source start &optional (end (length source))) + (unless (stringp source) + (error "~S is not of type ~S." source 'rod)) + (subseq source start end)) + +(defun rod-subseq* (source start &optional (end (length source))) + (rod-subseq source start end)) + +(defun rod< (rod1 rod2) + (string< rod1 rod2)) diff --git a/runes/dep-acl.lisp b/runes/dep-acl.lisp new file mode 100644 index 0000000..5bbda45 --- /dev/null +++ b/runes/dep-acl.lisp @@ -0,0 +1,42 @@ +;;; -*- Mode: Lisp; Syntax: Common-Lisp; Package: RUNES; -*- +;;; --------------------------------------------------------------------------- +;;; Title: ACL-4.3 dependent stuff + fixups +;;; Created: 1999-05-25 22:33 +;;; Author: Gilbert Baumann +;;; License: LLGPL (See file COPYING for details). +;;; --------------------------------------------------------------------------- +;;; (c) copyright 1998,1999 by Gilbert Baumann + +;;; This code is free software; you can redistribute it and/or modify it +;;; under the terms of the version 2.1 of the GNU Lesser General Public +;;; License as published by the Free Software Foundation, as clarified +;;; by the "Preamble to the Gnu Lesser General Public License" found in +;;; the file COPYING. +;;; +;;; This code is distributed in the hope that it will be useful, +;;; but without any warranty; without even the implied warranty of +;;; merchantability or fitness for a particular purpose. See the GNU +;;; Lesser General Public License for more details. +;;; +;;; Version 2.1 of the GNU Lesser General Public License is in the file +;;; COPYING that was distributed with this file. If it is not present, +;;; you can access it from http://www.gnu.org/copyleft/lesser.txt (until +;;; superseded by a newer version) or write to the Free Software +;;; Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +;; ACL is incapable to define compiler macros on (setf foo) +;; Unfortunately it is also incapable to declaim such functions inline. +;; So we revoke the DEFUN hack from dep-gcl here. + +(defmacro runes::defsubst (fun args &body body) + (if (and (consp fun) (eq (car fun) 'setf)) + (let ((fnam (intern (concatenate 'string "(SETF " (symbol-name (cadr fun)) ")") + (symbol-package (cadr fun))))) + `(progn + (defsetf ,(cadr fun) (&rest ap) (new-value) (list* ',fnam new-value ap)) + (runes::defsubst ,fnam ,args .,body))) + `(progn + (defun ,fun ,args .,body) + (define-compiler-macro ,fun (&rest .args.) + (cons '(lambda ,args .,body) + .args.))))) diff --git a/runes/dep-acl5.lisp b/runes/dep-acl5.lisp new file mode 100644 index 0000000..64534d9 --- /dev/null +++ b/runes/dep-acl5.lisp @@ -0,0 +1,59 @@ +;;; -*- Mode: Lisp; Syntax: Common-Lisp; Package: CL-USER; Encoding: utf-8; -*- +;;; --------------------------------------------------------------------------- +;;; Title: ACL-5.0 dependent stuff + fixups +;;; Created: 1999-05-25 22:32 +;;; Author: Gilbert Baumann +;;; License: LLGPL (See file COPYING for details). +;;; --------------------------------------------------------------------------- +;;; (c) copyright 1998,1999 by Gilbert Baumann + +;;; This code is free software; you can redistribute it and/or modify it +;;; under the terms of the version 2.1 of the GNU Lesser General Public +;;; License as published by the Free Software Foundation, as clarified +;;; by the "Preamble to the Gnu Lesser General Public License" found in +;;; the file COPYING. +;;; +;;; This code is distributed in the hope that it will be useful, +;;; but without any warranty; without even the implied warranty of +;;; merchantability or fitness for a particular purpose. See the GNU +;;; Lesser General Public License for more details. +;;; +;;; Version 2.1 of the GNU Lesser General Public License is in the file +;;; COPYING that was distributed with this file. If it is not present, +;;; you can access it from http://www.gnu.org/copyleft/lesser.txt (until +;;; superseded by a newer version) or write to the Free Software +;;; Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +;;; Changes +;;; ======= + +;;; When Who What +;;; --------------------------------------------------------------------------- +;;; 2002-01-04 GB spend BLOCK for DEFSUBST +;;; 1999-08-31 SES Stig Erik Sandø +;;; +;;; Changed #+allegro-v5.0 to +;;; #+(and allegro-version>= (version>= 5)) +;;; + +;; ACL is incapable to define compiler macros on (setf foo) +;; Unfortunately it is also incapable to declaim such functions inline. +;; So we revoke the DEFUN hack from dep-gcl here. + +(defmacro runes::defsubst (fun args &body body) + (if (and (consp fun) (eq (car fun) 'setf)) + (let ((fnam (intern (concatenate 'string "(SETF " (symbol-name (cadr fun)) ")") + (symbol-package (cadr fun))))) + `(progn + (defsetf ,(cadr fun) (&rest ap) (new-value) (list* ',fnam new-value ap)) + (runes::defsubst ,fnam ,args .,body))) + (labels ((declp (x) + (and (consp x) (eq (car x) 'declare)))) + `(progn + (defun ,fun ,args .,body) + (define-compiler-macro ,fun (&rest .args.) + (cons '(lambda ,args + ,@(remove-if-not #'declp body) + (block ,fun + ,@(remove-if #'declp body))) + .args.)))))) diff --git a/runes/dep-clisp.lisp b/runes/dep-clisp.lisp new file mode 100644 index 0000000..2d9216b --- /dev/null +++ b/runes/dep-clisp.lisp @@ -0,0 +1,59 @@ +;;; -*- Mode: Lisp; Syntax: Common-Lisp; Package: CL-USER; -*- +;;; --------------------------------------------------------------------------- +;;; Title: CLISP dependent stuff + fixups +;;; Created: 1999-05-25 22:32 +;;; Author: Gilbert Baumann +;;; License: LLGPL (See file COPYING for details). +;;; --------------------------------------------------------------------------- +;;; (c) copyright 1999 by Gilbert Baumann + +;;; This code is free software; you can redistribute it and/or modify it +;;; under the terms of the version 2.1 of the GNU Lesser General Public +;;; License as published by the Free Software Foundation, as clarified +;;; by the "Preamble to the Gnu Lesser General Public License" found in +;;; the file COPYING. +;;; +;;; This code is distributed in the hope that it will be useful, +;;; but without any warranty; without even the implied warranty of +;;; merchantability or fitness for a particular purpose. See the GNU +;;; Lesser General Public License for more details. +;;; +;;; Version 2.1 of the GNU Lesser General Public License is in the file +;;; COPYING that was distributed with this file. If it is not present, +;;; you can access it from http://www.gnu.org/copyleft/lesser.txt (until +;;; superseded by a newer version) or write to the Free Software +;;; Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +(in-package :CL-USER) + +(eval-when (compile load eval) + (if (fboundp 'cl::define-compiler-macro) + (pushnew 'define-compiler-macro *features*))) + +(setq lisp:*load-paths* '(#P"./")) + +#+DEFINE-COMPILER-MACRO +(cl:define-compiler-macro ldb (bytespec value &whole whole) + (let (pos size) + (cond ((and (consp bytespec) + (= (length bytespec) 3) + (eq (car bytespec) 'byte) + (constantp (setq size (second bytespec))) + (constantp (setq pos (third bytespec)))) + `(logand ,(if (eql pos 0) value `(ash ,value (- ,pos))) + (1- (ash 1 ,size)))) + (t + whole)))) + +#-DEFINE-COMPILER-MACRO +(progn + (export 'runes::define-compiler-macro :runes) + (defmacro runes::define-compiler-macro (name args &body body) + (declare (ignore args body)) + `(progn + ',name))) + +(defmacro runes::defsubst (name args &body body) + `(progn + (declaim (inline ,name)) + (defun ,name ,args .,body))) diff --git a/runes/dep-cmucl-dtc.lisp b/runes/dep-cmucl-dtc.lisp new file mode 100644 index 0000000..2e080c3 --- /dev/null +++ b/runes/dep-cmucl-dtc.lisp @@ -0,0 +1,30 @@ +;;; -*- Mode: Lisp; Syntax: Common-Lisp; Package: CL-USER; -*- +;;; --------------------------------------------------------------------------- +;;; Title: CMUCL dependent stuff + fixups +;;; Created: 1999-05-25 22:32 +;;; Author: Gilbert Baumann +;;; License: LLGPL (See file COPYING for details). +;;; --------------------------------------------------------------------------- +;;; (c) copyright 1999 by Gilbert Baumann + +;;; This code is free software; you can redistribute it and/or modify it +;;; under the terms of the version 2.1 of the GNU Lesser General Public +;;; License as published by the Free Software Foundation, as clarified +;;; by the "Preamble to the Gnu Lesser General Public License" found in +;;; the file COPYING. +;;; +;;; This code is distributed in the hope that it will be useful, +;;; but without any warranty; without even the implied warranty of +;;; merchantability or fitness for a particular purpose. See the GNU +;;; Lesser General Public License for more details. +;;; +;;; Version 2.1 of the GNU Lesser General Public License is in the file +;;; COPYING that was distributed with this file. If it is not present, +;;; you can access it from http://www.gnu.org/copyleft/lesser.txt (until +;;; superseded by a newer version) or write to the Free Software +;;; Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +(defmacro runes::defsubst (name args &body body) + `(progn + (declaim (inline ,name)) + (defun ,name ,args .,body))) diff --git a/runes/dep-cmucl.lisp b/runes/dep-cmucl.lisp new file mode 100644 index 0000000..2e080c3 --- /dev/null +++ b/runes/dep-cmucl.lisp @@ -0,0 +1,30 @@ +;;; -*- Mode: Lisp; Syntax: Common-Lisp; Package: CL-USER; -*- +;;; --------------------------------------------------------------------------- +;;; Title: CMUCL dependent stuff + fixups +;;; Created: 1999-05-25 22:32 +;;; Author: Gilbert Baumann +;;; License: LLGPL (See file COPYING for details). +;;; --------------------------------------------------------------------------- +;;; (c) copyright 1999 by Gilbert Baumann + +;;; This code is free software; you can redistribute it and/or modify it +;;; under the terms of the version 2.1 of the GNU Lesser General Public +;;; License as published by the Free Software Foundation, as clarified +;;; by the "Preamble to the Gnu Lesser General Public License" found in +;;; the file COPYING. +;;; +;;; This code is distributed in the hope that it will be useful, +;;; but without any warranty; without even the implied warranty of +;;; merchantability or fitness for a particular purpose. See the GNU +;;; Lesser General Public License for more details. +;;; +;;; Version 2.1 of the GNU Lesser General Public License is in the file +;;; COPYING that was distributed with this file. If it is not present, +;;; you can access it from http://www.gnu.org/copyleft/lesser.txt (until +;;; superseded by a newer version) or write to the Free Software +;;; Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +(defmacro runes::defsubst (name args &body body) + `(progn + (declaim (inline ,name)) + (defun ,name ,args .,body))) diff --git a/runes/dep-openmcl.lisp b/runes/dep-openmcl.lisp new file mode 100644 index 0000000..3ff2c6f --- /dev/null +++ b/runes/dep-openmcl.lisp @@ -0,0 +1,16 @@ +;;;; dep-openmcl.lisp +;;;; +;;;; This file is part of the CXML parser, released under (L)LGPL. +;;;; See file COPYING for details. +;;;; +;;;; (c) copyright 1999 by Gilbert Baumann + +(defmacro runes::defsubst (fun args &body body) + (if (consp fun) + `(defun ,fun ,args + ,@body) + `(progn + (defun ,fun ,args .,body) + (define-compiler-macro ,fun (&rest .args.) + (cons '(lambda ,args .,body) + .args.))))) diff --git a/runes/dep-sbcl.lisp b/runes/dep-sbcl.lisp new file mode 100644 index 0000000..9431fb3 --- /dev/null +++ b/runes/dep-sbcl.lisp @@ -0,0 +1,30 @@ +;;; -*- Mode: Lisp; Syntax: Common-Lisp; Package: CL-USER; -*- +;;; --------------------------------------------------------------------------- +;;; Title: SBCL dependent stuff + fixups +;;; Created: 1999-05-25 22:32 +;;; Author: Gilbert Baumann +;;; License: LLGPL (See file COPYING for details). +;;; --------------------------------------------------------------------------- +;;; (c) copyright 1999 by Gilbert Baumann + +;;; This code is free software; you can redistribute it and/or modify it +;;; under the terms of the version 2.1 of the GNU Lesser General Public +;;; License as published by the Free Software Foundation, as clarified +;;; by the "Preamble to the Gnu Lesser General Public License" found in +;;; the file COPYING. +;;; +;;; This code is distributed in the hope that it will be useful, +;;; but without any warranty; without even the implied warranty of +;;; merchantability or fitness for a particular purpose. See the GNU +;;; Lesser General Public License for more details. +;;; +;;; Version 2.1 of the GNU Lesser General Public License is in the file +;;; COPYING that was distributed with this file. If it is not present, +;;; you can access it from http://www.gnu.org/copyleft/lesser.txt (until +;;; superseded by a newer version) or write to the Free Software +;;; Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +(defmacro runes::defsubst (name args &body body) + `(progn + (declaim (inline ,name)) + (defun ,name ,args .,body))) diff --git a/runes/encodings-data.lisp b/runes/encodings-data.lisp new file mode 100644 index 0000000..e29a683 --- /dev/null +++ b/runes/encodings-data.lisp @@ -0,0 +1,568 @@ +(in-package :encoding) + +(progn + (add-name :us-ascii "ANSI_X3.4-1968") + (add-name :us-ascii "iso-ir-6") + (add-name :us-ascii "ANSI_X3.4-1986") + (add-name :us-ascii "ISO_646.irv:1991") + (add-name :us-ascii "ASCII") + (add-name :us-ascii "ISO646-US") + (add-name :us-ascii "US-ASCII") + (add-name :us-ascii "us") + (add-name :us-ascii "IBM367") + (add-name :us-ascii "cp367") + (add-name :us-ascii "csASCII") + + (add-name :iso-8859-1 "ISO_8859-1:1987") + (add-name :iso-8859-1 "iso-ir-100") + (add-name :iso-8859-1 "ISO_8859-1") + (add-name :iso-8859-1 "ISO-8859-1") + (add-name :iso-8859-1 "latin1") + (add-name :iso-8859-1 "l1") + (add-name :iso-8859-1 "IBM819") + (add-name :iso-8859-1 "CP819") + (add-name :iso-8859-1 "csISOLatin1") + + (add-name :iso-8859-2 "ISO_8859-2:1987") + (add-name :iso-8859-2 "iso-ir-101") + (add-name :iso-8859-2 "ISO_8859-2") + (add-name :iso-8859-2 "ISO-8859-2") + (add-name :iso-8859-2 "latin2") + (add-name :iso-8859-2 "l2") + (add-name :iso-8859-2 "csISOLatin2") + + (add-name :iso-8859-3 "ISO_8859-3:1988") + (add-name :iso-8859-3 "iso-ir-109") + (add-name :iso-8859-3 "ISO_8859-3") + (add-name :iso-8859-3 "ISO-8859-3") + (add-name :iso-8859-3 "latin3") + (add-name :iso-8859-3 "l3") + (add-name :iso-8859-3 "csISOLatin3") + + (add-name :iso-8859-4 "ISO_8859-4:1988") + (add-name :iso-8859-4 "iso-ir-110") + (add-name :iso-8859-4 "ISO_8859-4") + (add-name :iso-8859-4 "ISO-8859-4") + (add-name :iso-8859-4 "latin4") + (add-name :iso-8859-4 "l4") + (add-name :iso-8859-4 "csISOLatin4") + + (add-name :iso-8859-6 "ISO_8859-6:1987") + (add-name :iso-8859-6 "iso-ir-127") + (add-name :iso-8859-6 "ISO_8859-6") + (add-name :iso-8859-6 "ISO-8859-6") + (add-name :iso-8859-6 "ECMA-114") + (add-name :iso-8859-6 "ASMO-708") + (add-name :iso-8859-6 "arabic") + (add-name :iso-8859-6 "csISOLatinArabic") + + (add-name :iso-8859-7 "ISO_8859-7:1987") + (add-name :iso-8859-7 "iso-ir-126") + (add-name :iso-8859-7 "ISO_8859-7") + (add-name :iso-8859-7 "ISO-8859-7") + (add-name :iso-8859-7 "ELOT_928") + (add-name :iso-8859-7 "ECMA-118") + (add-name :iso-8859-7 "greek") + (add-name :iso-8859-7 "greek8") + (add-name :iso-8859-7 "csISOLatinGreek") + + (add-name :iso-8859-8 "ISO_8859-8:1988") + (add-name :iso-8859-8 "iso-ir-138") + (add-name :iso-8859-8 "ISO_8859-8") + (add-name :iso-8859-8 "ISO-8859-8") + (add-name :iso-8859-8 "hebrew") + (add-name :iso-8859-8 "csISOLatinHebrew") + + (add-name :iso-8859-5 "ISO_8859-5:1988") + (add-name :iso-8859-5 "iso-ir-144") + (add-name :iso-8859-5 "ISO_8859-5") + (add-name :iso-8859-5 "ISO-8859-5") + (add-name :iso-8859-5 "cyrillic") + (add-name :iso-8859-5 "csISOLatinCyrillic") + + (add-name :iso-8859-9 "ISO_8859-9:1989") + (add-name :iso-8859-9 "iso-ir-148") + (add-name :iso-8859-9 "ISO_8859-9") + (add-name :iso-8859-9 "ISO-8859-9") + (add-name :iso-8859-9 "latin5") + (add-name :iso-8859-9 "l5") + (add-name :iso-8859-9 "csISOLatin5") + + (add-name :iso-8859-15 "ISO_8859-15") + (add-name :iso-8859-15 "ISO-8859-15") + + (add-name :iso-8859-14 "ISO_8859-14") + (add-name :iso-8859-14 "ISO-8859-14") + + (add-name :koi8-r "KOI8-R") + (add-name :koi8-r "csKOI8R") + + (add-name :utf-8 "UTF-8") + + (add-name :utf-16 "UTF-16") + + (add-name :ucs-4 "ISO-10646-UCS-4") + (add-name :ucs-4 "UCS-4") + + (add-name :ucs-2 "ISO-10646-UCS-2") + (add-name :ucs-2 "UCS-2") ) + + +(progn + (define-encoding :iso-8859-1 + (make-simple-8-bit-encoding + :charset (find-charset :iso-8859-1))) + + (define-encoding :iso-8859-2 + (make-simple-8-bit-encoding + :charset (find-charset :iso-8859-2))) + + (define-encoding :iso-8859-3 + (make-simple-8-bit-encoding + :charset (find-charset :iso-8859-3))) + + (define-encoding :iso-8859-4 + (make-simple-8-bit-encoding + :charset (find-charset :iso-8859-4))) + + (define-encoding :iso-8859-5 + (make-simple-8-bit-encoding + :charset (find-charset :iso-8859-5))) + + (define-encoding :iso-8859-6 + (make-simple-8-bit-encoding + :charset (find-charset :iso-8859-6))) + + (define-encoding :iso-8859-7 + (make-simple-8-bit-encoding + :charset (find-charset :iso-8859-7))) + + (define-encoding :iso-8859-8 + (make-simple-8-bit-encoding + :charset (find-charset :iso-8859-8))) + + (define-encoding :iso-8859-14 + (make-simple-8-bit-encoding + :charset (find-charset :iso-8859-14))) + + (define-encoding :iso-8859-15 + (make-simple-8-bit-encoding + :charset (find-charset :iso-8859-15))) + + (define-encoding :koi8-r + (make-simple-8-bit-encoding + :charset (find-charset :koi8-r))) + + (define-encoding :utf-8 :utf-8) + ) + +(progn + (define-8-bit-charset :iso-8859-1 + #| #o00x |# #x0000 #x0001 #x0002 #x0003 #x0004 #x0005 #x0006 #x0007 + #| #o01x |# #x0008 #x0009 #x000A #x000B #x000C #x000A #x000E #x000F + #| #o02x |# #x0010 #x0011 #x0012 #x0013 #x0014 #x0015 #x0016 #x0017 + #| #o03x |# #x0018 #x0019 #x001A #x001B #x001C #x001D #x001E #x001F + #| #o04x |# #x0020 #x0021 #x0022 #x0023 #x0024 #x0025 #x0026 #x0027 + #| #o05x |# #x0028 #x0029 #x002A #x002B #x002C #x002D #x002E #x002F + #| #o06x |# #x0030 #x0031 #x0032 #x0033 #x0034 #x0035 #x0036 #x0037 + #| #o07x |# #x0038 #x0039 #x003A #x003B #x003C #x003D #x003E #x003F + #| #o10x |# #x0040 #x0041 #x0042 #x0043 #x0044 #x0045 #x0046 #x0047 + #| #o11x |# #x0048 #x0049 #x004A #x004B #x004C #x004D #x004E #x004F + #| #o12x |# #x0050 #x0051 #x0052 #x0053 #x0054 #x0055 #x0056 #x0057 + #| #o13x |# #x0058 #x0059 #x005A #x005B #x005C #x005D #x005E #x005F + #| #o14x |# #x0060 #x0061 #x0062 #x0063 #x0064 #x0065 #x0066 #x0067 + #| #o15x |# #x0068 #x0069 #x006A #x006B #x006C #x006D #x006E #x006F + #| #o16x |# #x0070 #x0071 #x0072 #x0073 #x0074 #x0075 #x0076 #x0077 + #| #o17x |# #x0078 #x0079 #x007A #x007B #x007C #x007D #x007E #x007F + #| #o20x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF + #| #o21x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF + #| #o22x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF + #| #o23x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF + #| #o24x |# #x00A0 #x00A1 #x00A2 #x00A3 #x00A4 #x00A5 #x00A6 #x00A7 + #| #o25x |# #x00A8 #x00A9 #x00AA #x00AB #x00AC #x00AD #x00AE #x00AF + #| #o26x |# #x00B0 #x00B1 #x00B2 #x00B3 #x00B4 #x00B5 #x00B6 #x00B7 + #| #o27x |# #x00B8 #x00B9 #x00BA #x00BB #x00BC #x00BD #x00BE #x00BF + #| #o30x |# #x00C0 #x00C1 #x00C2 #x00C3 #x00C4 #x00C5 #x00C6 #x00C7 + #| #o31x |# #x00C8 #x00C9 #x00CA #x00CB #x00CC #x00CD #x00CE #x00CF + #| #o32x |# #x00D0 #x00D1 #x00D2 #x00D3 #x00D4 #x00D5 #x00D6 #x00D7 + #| #o33x |# #x00D8 #x00D9 #x00DA #x00DB #x00DC #x00DD #x00DE #x00DF + #| #o34x |# #x00E0 #x00E1 #x00E2 #x00E3 #x00E4 #x00E5 #x00E6 #x00E7 + #| #o35x |# #x00E8 #x00E9 #x00EA #x00EB #x00EC #x00ED #x00EE #x00EF + #| #o36x |# #x00F0 #x00F1 #x00F2 #x00F3 #x00F4 #x00F5 #x00F6 #x00F7 + #| #o37x |# #x00F8 #x00F9 #x00FA #x00FB #x00FC #x00FD #x00FE #x00FF) + + (define-8-bit-charset :iso-8859-2 + #| #o00x |# #x0000 #x0001 #x0002 #x0003 #x0004 #x0005 #x0006 #x0007 + #| #o01x |# #x0008 #x0009 #x000A #x000B #x000C #x000A #x000E #x000F + #| #o02x |# #x0010 #x0011 #x0012 #x0013 #x0014 #x0015 #x0016 #x0017 + #| #o03x |# #x0018 #x0019 #x001A #x001B #x001C #x001D #x001E #x001F + #| #o04x |# #x0020 #x0021 #x0022 #x0023 #x0024 #x0025 #x0026 #x0027 + #| #o05x |# #x0028 #x0029 #x002A #x002B #x002C #x002D #x002E #x002F + #| #o06x |# #x0030 #x0031 #x0032 #x0033 #x0034 #x0035 #x0036 #x0037 + #| #o07x |# #x0038 #x0039 #x003A #x003B #x003C #x003D #x003E #x003F + #| #o10x |# #x0040 #x0041 #x0042 #x0043 #x0044 #x0045 #x0046 #x0047 + #| #o11x |# #x0048 #x0049 #x004A #x004B #x004C #x004D #x004E #x004F + #| #o12x |# #x0050 #x0051 #x0052 #x0053 #x0054 #x0055 #x0056 #x0057 + #| #o13x |# #x0058 #x0059 #x005A #x005B #x005C #x005D #x005E #x005F + #| #o14x |# #x0060 #x0061 #x0062 #x0063 #x0064 #x0065 #x0066 #x0067 + #| #o15x |# #x0068 #x0069 #x006A #x006B #x006C #x006D #x006E #x006F + #| #o16x |# #x0070 #x0071 #x0072 #x0073 #x0074 #x0075 #x0076 #x0077 + #| #o17x |# #x0078 #x0079 #x007A #x007B #x007C #x007D #x007E #x007F + #| #o20x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF + #| #o21x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF + #| #o22x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF + #| #o23x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF + #| #o24x |# #x00A0 #x0104 #x02D8 #x0141 #x00A4 #x013D #x015A #x00A7 + #| #o25x |# #x00A8 #x0160 #x015E #x0164 #x0179 #x00AD #x017D #x017B + #| #o26x |# #x00B0 #x0105 #x02DB #x0142 #x00B4 #x013E #x015B #x02C7 + #| #o27x |# #x00B8 #x0161 #x015F #x0165 #x017A #x02DD #x017E #x017C + #| #o30x |# #x0154 #x00C1 #x00C2 #x0102 #x00C4 #x0139 #x0106 #x00C7 + #| #o31x |# #x010C #x00C9 #x0118 #x00CB #x011A #x00CD #x00CE #x010E + #| #o32x |# #x0110 #x0143 #x0147 #x00D3 #x00D4 #x0150 #x00D6 #x00D7 + #| #o33x |# #x0158 #x016E #x00DA #x0170 #x00DC #x00DD #x0162 #x00DF + #| #o34x |# #x0155 #x00E1 #x00E2 #x0103 #x00E4 #x013A #x0107 #x00E7 + #| #o35x |# #x010D #x00E9 #x0119 #x00EB #x011B #x00ED #x00EE #x010F + #| #o36x |# #x0111 #x0144 #x0148 #x00F3 #x00F4 #x0151 #x00F6 #x00F7 + #| #o37x |# #x0159 #x016F #x00FA #x0171 #x00FC #x00FD #x0163 #x02D9) + + (define-8-bit-charset :iso-8859-3 + #| #o00x |# #x0000 #x0001 #x0002 #x0003 #x0004 #x0005 #x0006 #x0007 + #| #o01x |# #x0008 #x0009 #x000A #x000B #x000C #x000A #x000E #x000F + #| #o02x |# #x0010 #x0011 #x0012 #x0013 #x0014 #x0015 #x0016 #x0017 + #| #o03x |# #x0018 #x0019 #x001A #x001B #x001C #x001D #x001E #x001F + #| #o04x |# #x0020 #x0021 #x0022 #x0023 #x0024 #x0025 #x0026 #x0027 + #| #o05x |# #x0028 #x0029 #x002A #x002B #x002C #x002D #x002E #x002F + #| #o06x |# #x0030 #x0031 #x0032 #x0033 #x0034 #x0035 #x0036 #x0037 + #| #o07x |# #x0038 #x0039 #x003A #x003B #x003C #x003D #x003E #x003F + #| #o10x |# #x0040 #x0041 #x0042 #x0043 #x0044 #x0045 #x0046 #x0047 + #| #o11x |# #x0048 #x0049 #x004A #x004B #x004C #x004D #x004E #x004F + #| #o12x |# #x0050 #x0051 #x0052 #x0053 #x0054 #x0055 #x0056 #x0057 + #| #o13x |# #x0058 #x0059 #x005A #x005B #x005C #x005D #x005E #x005F + #| #o14x |# #x0060 #x0061 #x0062 #x0063 #x0064 #x0065 #x0066 #x0067 + #| #o15x |# #x0068 #x0069 #x006A #x006B #x006C #x006D #x006E #x006F + #| #o16x |# #x0070 #x0071 #x0072 #x0073 #x0074 #x0075 #x0076 #x0077 + #| #o17x |# #x0078 #x0079 #x007A #x007B #x007C #x007D #x007E #x007F + #| #o20x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF + #| #o21x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF + #| #o22x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF + #| #o23x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF + #| #o24x |# #x00A0 #x0126 #x02D8 #x00A3 #x00A4 #xFFFF #x0124 #x00A7 + #| #o25x |# #x00A8 #x0130 #x015E #x011E #x0134 #x00AD #xFFFF #x017B + #| #o26x |# #x00B0 #x0127 #x00B2 #x00B3 #x00B4 #x00B5 #x0125 #x00B7 + #| #o27x |# #x00B8 #x0131 #x015F #x011F #x0135 #x00BD #xFFFF #x017C + #| #o30x |# #x00C0 #x00C1 #x00C2 #xFFFF #x00C4 #x010A #x0108 #x00C7 + #| #o31x |# #x00C8 #x00C9 #x00CA #x00CB #x00CC #x00CD #x00CE #x00CF + #| #o32x |# #xFFFF #x00D1 #x00D2 #x00D3 #x00D4 #x0120 #x00D6 #x00D7 + #| #o33x |# #x011C #x00D9 #x00DA #x00DB #x00DC #x016C #x015C #x00DF + #| #o34x |# #x00E0 #x00E1 #x00E2 #xFFFF #x00E4 #x010B #x0109 #x00E7 + #| #o35x |# #x00E8 #x00E9 #x00EA #x00EB #x00EC #x00ED #x00EE #x00EF + #| #o36x |# #xFFFF #x00F1 #x00F2 #x00F3 #x00F4 #x0121 #x00F6 #x00F7 + #| #o37x |# #x011D #x00F9 #x00FA #x00FB #x00FC #x016D #x015D #x02D9) + + (define-8-bit-charset :iso-8859-4 + #| #o00x |# #x0000 #x0001 #x0002 #x0003 #x0004 #x0005 #x0006 #x0007 + #| #o01x |# #x0008 #x0009 #x000A #x000B #x000C #x000A #x000E #x000F + #| #o02x |# #x0010 #x0011 #x0012 #x0013 #x0014 #x0015 #x0016 #x0017 + #| #o03x |# #x0018 #x0019 #x001A #x001B #x001C #x001D #x001E #x001F + #| #o04x |# #x0020 #x0021 #x0022 #x0023 #x0024 #x0025 #x0026 #x0027 + #| #o05x |# #x0028 #x0029 #x002A #x002B #x002C #x002D #x002E #x002F + #| #o06x |# #x0030 #x0031 #x0032 #x0033 #x0034 #x0035 #x0036 #x0037 + #| #o07x |# #x0038 #x0039 #x003A #x003B #x003C #x003D #x003E #x003F + #| #o10x |# #x0040 #x0041 #x0042 #x0043 #x0044 #x0045 #x0046 #x0047 + #| #o11x |# #x0048 #x0049 #x004A #x004B #x004C #x004D #x004E #x004F + #| #o12x |# #x0050 #x0051 #x0052 #x0053 #x0054 #x0055 #x0056 #x0057 + #| #o13x |# #x0058 #x0059 #x005A #x005B #x005C #x005D #x005E #x005F + #| #o14x |# #x0060 #x0061 #x0062 #x0063 #x0064 #x0065 #x0066 #x0067 + #| #o15x |# #x0068 #x0069 #x006A #x006B #x006C #x006D #x006E #x006F + #| #o16x |# #x0070 #x0071 #x0072 #x0073 #x0074 #x0075 #x0076 #x0077 + #| #o17x |# #x0078 #x0079 #x007A #x007B #x007C #x007D #x007E #x007F + #| #o20x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF + #| #o21x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF + #| #o22x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF + #| #o23x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF + #| #o24x |# #x00A0 #x0104 #x0138 #x0156 #x00A4 #x0128 #x013B #x00A7 + #| #o25x |# #x00A8 #x0160 #x0112 #x0122 #x0166 #x00AD #x017D #x00AF + #| #o26x |# #x00B0 #x0105 #x02DB #x0157 #x00B4 #x0129 #x013C #x02C7 + #| #o27x |# #x00B8 #x0161 #x0113 #x0123 #x0167 #x014A #x017E #x014B + #| #o30x |# #x0100 #x00C1 #x00C2 #x00C3 #x00C4 #x00C5 #x00C6 #x012E + #| #o31x |# #x010C #x00C9 #x0118 #x00CB #x0116 #x00CD #x00CE #x012A + #| #o32x |# #x0110 #x0145 #x014C #x0136 #x00D4 #x00D5 #x00D6 #x00D7 + #| #o33x |# #x00D8 #x0172 #x00DA #x00DB #x00DC #x0168 #x016A #x00DF + #| #o34x |# #x0101 #x00E1 #x00E2 #x00E3 #x00E4 #x00E5 #x00E6 #x012F + #| #o35x |# #x010D #x00E9 #x0119 #x00EB #x0117 #x00ED #x00EE #x012B + #| #o36x |# #x0111 #x0146 #x014D #x0137 #x00F4 #x00F5 #x00F6 #x00F7 + #| #o37x |# #x00F8 #x0173 #x00FA #x00FB #x00FC #x0169 #x016B #x02D9) + + (define-8-bit-charset :iso-8859-5 + #| #o00x |# #x0000 #x0001 #x0002 #x0003 #x0004 #x0005 #x0006 #x0007 + #| #o01x |# #x0008 #x0009 #x000A #x000B #x000C #x000A #x000E #x000F + #| #o02x |# #x0010 #x0011 #x0012 #x0013 #x0014 #x0015 #x0016 #x0017 + #| #o03x |# #x0018 #x0019 #x001A #x001B #x001C #x001D #x001E #x001F + #| #o04x |# #x0020 #x0021 #x0022 #x0023 #x0024 #x0025 #x0026 #x0027 + #| #o05x |# #x0028 #x0029 #x002A #x002B #x002C #x002D #x002E #x002F + #| #o06x |# #x0030 #x0031 #x0032 #x0033 #x0034 #x0035 #x0036 #x0037 + #| #o07x |# #x0038 #x0039 #x003A #x003B #x003C #x003D #x003E #x003F + #| #o10x |# #x0040 #x0041 #x0042 #x0043 #x0044 #x0045 #x0046 #x0047 + #| #o11x |# #x0048 #x0049 #x004A #x004B #x004C #x004D #x004E #x004F + #| #o12x |# #x0050 #x0051 #x0052 #x0053 #x0054 #x0055 #x0056 #x0057 + #| #o13x |# #x0058 #x0059 #x005A #x005B #x005C #x005D #x005E #x005F + #| #o14x |# #x0060 #x0061 #x0062 #x0063 #x0064 #x0065 #x0066 #x0067 + #| #o15x |# #x0068 #x0069 #x006A #x006B #x006C #x006D #x006E #x006F + #| #o16x |# #x0070 #x0071 #x0072 #x0073 #x0074 #x0075 #x0076 #x0077 + #| #o17x |# #x0078 #x0079 #x007A #x007B #x007C #x007D #x007E #x007F + #| #o20x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF + #| #o21x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF + #| #o22x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF + #| #o23x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF + #| #o24x |# #x00A0 #x0401 #x0402 #x0403 #x0404 #x0405 #x0406 #x0407 + #| #o25x |# #x0408 #x0409 #x040A #x040B #x040C #x00AD #x040E #x040F + #| #o26x |# #x0410 #x0411 #x0412 #x0413 #x0414 #x0415 #x0416 #x0417 + #| #o27x |# #x0418 #x0419 #x041A #x041B #x041C #x041D #x041E #x041F + #| #o30x |# #x0420 #x0421 #x0422 #x0423 #x0424 #x0425 #x0426 #x0427 + #| #o31x |# #x0428 #x0429 #x042A #x042B #x042C #x042D #x042E #x042F + #| #o32x |# #x0430 #x0431 #x0432 #x0433 #x0434 #x0435 #x0436 #x0437 + #| #o33x |# #x0438 #x0439 #x043A #x043B #x043C #x043D #x043E #x043F + #| #o34x |# #x0440 #x0441 #x0442 #x0443 #x0444 #x0445 #x0446 #x0447 + #| #o35x |# #x0448 #x0449 #x044A #x044B #x044C #x044D #x044E #x044F + #| #o36x |# #x2116 #x0451 #x0452 #x0453 #x0454 #x0455 #x0456 #x0457 + #| #o37x |# #x0458 #x0459 #x045A #x045B #x045C #x00A7 #x045E #x045F) + + (define-8-bit-charset :iso-8859-6 + #| #o00x |# #x0000 #x0001 #x0002 #x0003 #x0004 #x0005 #x0006 #x0007 + #| #o01x |# #x0008 #x0009 #x000A #x000B #x000C #x000A #x000E #x000F + #| #o02x |# #x0010 #x0011 #x0012 #x0013 #x0014 #x0015 #x0016 #x0017 + #| #o03x |# #x0018 #x0019 #x001A #x001B #x001C #x001D #x001E #x001F + #| #o04x |# #x0020 #x0021 #x0022 #x0023 #x0024 #x0025 #x0026 #x0027 + #| #o05x |# #x0028 #x0029 #x002A #x002B #x002C #x002D #x002E #x002F + #| #o06x |# #x0660 #x0661 #x0662 #x0663 #x0664 #x0665 #x0666 #x0667 + #| #o07x |# #x0668 #x0669 #x003A #x003B #x003C #x003D #x003E #x003F + #| #o10x |# #x0040 #x0041 #x0042 #x0043 #x0044 #x0045 #x0046 #x0047 + #| #o11x |# #x0048 #x0049 #x004A #x004B #x004C #x004D #x004E #x004F + #| #o12x |# #x0050 #x0051 #x0052 #x0053 #x0054 #x0055 #x0056 #x0057 + #| #o13x |# #x0058 #x0059 #x005A #x005B #x005C #x005D #x005E #x005F + #| #o14x |# #x0060 #x0061 #x0062 #x0063 #x0064 #x0065 #x0066 #x0067 + #| #o15x |# #x0068 #x0069 #x006A #x006B #x006C #x006D #x006E #x006F + #| #o16x |# #x0070 #x0071 #x0072 #x0073 #x0074 #x0075 #x0076 #x0077 + #| #o17x |# #x0078 #x0079 #x007A #x007B #x007C #x007D #x007E #x007F + #| #o20x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF + #| #o21x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF + #| #o22x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF + #| #o23x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF + #| #o24x |# #x00A0 #xFFFF #xFFFF #xFFFF #x00A4 #xFFFF #xFFFF #xFFFF + #| #o25x |# #xFFFF #xFFFF #xFFFF #xFFFF #x060C #x00AD #xFFFF #xFFFF + #| #o26x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF + #| #o27x |# #xFFFF #xFFFF #xFFFF #x061B #xFFFF #xFFFF #xFFFF #x061F + #| #o30x |# #xFFFF #x0621 #x0622 #x0623 #x0624 #x0625 #x0626 #x0627 + #| #o31x |# #x0628 #x0629 #x062A #x062B #x062C #x062D #x062E #x062F + #| #o32x |# #x0630 #x0631 #x0632 #x0633 #x0634 #x0635 #x0636 #x0637 + #| #o33x |# #x0638 #x0639 #x063A #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF + #| #o34x |# #x0640 #x0641 #x0642 #x0643 #x0644 #x0645 #x0646 #x0647 + #| #o35x |# #x0648 #x0649 #x064A #x064B #x064C #x064D #x064E #x064F + #| #o36x |# #x0650 #x0651 #x0652 #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF + #| #o37x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF) + + (define-8-bit-charset :iso-8859-7 + #| #o00x |# #x0000 #x0001 #x0002 #x0003 #x0004 #x0005 #x0006 #x0007 + #| #o01x |# #x0008 #x0009 #x000A #x000B #x000C #x000A #x000E #x000F + #| #o02x |# #x0010 #x0011 #x0012 #x0013 #x0014 #x0015 #x0016 #x0017 + #| #o03x |# #x0018 #x0019 #x001A #x001B #x001C #x001D #x001E #x001F + #| #o04x |# #x0020 #x0021 #x0022 #x0023 #x0024 #x0025 #x0026 #x0027 + #| #o05x |# #x0028 #x0029 #x002A #x002B #x002C #x002D #x002E #x002F + #| #o06x |# #x0030 #x0031 #x0032 #x0033 #x0034 #x0035 #x0036 #x0037 + #| #o07x |# #x0038 #x0039 #x003A #x003B #x003C #x003D #x003E #x003F + #| #o10x |# #x0040 #x0041 #x0042 #x0043 #x0044 #x0045 #x0046 #x0047 + #| #o11x |# #x0048 #x0049 #x004A #x004B #x004C #x004D #x004E #x004F + #| #o12x |# #x0050 #x0051 #x0052 #x0053 #x0054 #x0055 #x0056 #x0057 + #| #o13x |# #x0058 #x0059 #x005A #x005B #x005C #x005D #x005E #x005F + #| #o14x |# #x0060 #x0061 #x0062 #x0063 #x0064 #x0065 #x0066 #x0067 + #| #o15x |# #x0068 #x0069 #x006A #x006B #x006C #x006D #x006E #x006F + #| #o16x |# #x0070 #x0071 #x0072 #x0073 #x0074 #x0075 #x0076 #x0077 + #| #o17x |# #x0078 #x0079 #x007A #x007B #x007C #x007D #x007E #x007F + #| #o20x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF + #| #o21x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF + #| #o22x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF + #| #o23x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF + #| #o24x |# #x00A0 #x02BD #x02BC #x00A3 #xFFFF #xFFFF #x00A6 #x00A7 + #| #o25x |# #x00A8 #x00A9 #xFFFF #x00AB #x00AC #x00AD #xFFFF #x2015 + #| #o26x |# #x00B0 #x00B1 #x00B2 #x00B3 #x0384 #x0385 #x0386 #x00B7 + #| #o27x |# #x0388 #x0389 #x038A #x00BB #x038C #x00BD #x038E #x038F + #| #o30x |# #x0390 #x0391 #x0392 #x0393 #x0394 #x0395 #x0396 #x0397 + #| #o31x |# #x0398 #x0399 #x039A #x039B #x039C #x039D #x039E #x039F + #| #o32x |# #x03A0 #x03A1 #xFFFF #x03A3 #x03A4 #x03A5 #x03A6 #x03A7 + #| #o33x |# #x03A8 #x03A9 #x03AA #x03AB #x03AC #x03AD #x03AE #x03AF + #| #o34x |# #x03B0 #x03B1 #x03B2 #x03B3 #x03B4 #x03B5 #x03B6 #x03B7 + #| #o35x |# #x03B8 #x03B9 #x03BA #x03BB #x03BC #x03BD #x03BE #x03BF + #| #o36x |# #x03C0 #x03C1 #x03C2 #x03C3 #x03C4 #x03C5 #x03C6 #x03C7 + #| #o37x |# #x03C8 #x03C9 #x03CA #x03CB #x03CC #x03CD #x03CE #xFFFF) + + (define-8-bit-charset :iso-8859-8 + #| #o00x |# #x0000 #x0001 #x0002 #x0003 #x0004 #x0005 #x0006 #x0007 + #| #o01x |# #x0008 #x0009 #x000A #x000B #x000C #x000A #x000E #x000F + #| #o02x |# #x0010 #x0011 #x0012 #x0013 #x0014 #x0015 #x0016 #x0017 + #| #o03x |# #x0018 #x0019 #x001A #x001B #x001C #x001D #x001E #x001F + #| #o04x |# #x0020 #x0021 #x0022 #x0023 #x0024 #x0025 #x0026 #x0027 + #| #o05x |# #x0028 #x0029 #x002A #x002B #x002C #x002D #x002E #x002F + #| #o06x |# #x0030 #x0031 #x0032 #x0033 #x0034 #x0035 #x0036 #x0037 + #| #o07x |# #x0038 #x0039 #x003A #x003B #x003C #x003D #x003E #x003F + #| #o10x |# #x0040 #x0041 #x0042 #x0043 #x0044 #x0045 #x0046 #x0047 + #| #o11x |# #x0048 #x0049 #x004A #x004B #x004C #x004D #x004E #x004F + #| #o12x |# #x0050 #x0051 #x0052 #x0053 #x0054 #x0055 #x0056 #x0057 + #| #o13x |# #x0058 #x0059 #x005A #x005B #x005C #x005D #x005E #x005F + #| #o14x |# #x0060 #x0061 #x0062 #x0063 #x0064 #x0065 #x0066 #x0067 + #| #o15x |# #x0068 #x0069 #x006A #x006B #x006C #x006D #x006E #x006F + #| #o16x |# #x0070 #x0071 #x0072 #x0073 #x0074 #x0075 #x0076 #x0077 + #| #o17x |# #x0078 #x0079 #x007A #x007B #x007C #x007D #x007E #x007F + #| #o20x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF + #| #o21x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF + #| #o22x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF + #| #o23x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF + #| #o24x |# #x00A0 #xFFFF #x00A2 #x00A3 #x00A4 #x00A5 #x00A6 #x00A7 + #| #o25x |# #x00A8 #x00A9 #x00D7 #x00AB #x00AC #x00AD #x00AE #x203E + #| #o26x |# #x00B0 #x00B1 #x00B2 #x00B3 #x00B4 #x00B5 #x00B6 #x00B7 + #| #o27x |# #x00B8 #x00B9 #x00F7 #x00BB #x00BC #x00BD #x00BE #xFFFF + #| #o30x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF + #| #o31x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF + #| #o32x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF + #| #o33x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #x2017 + #| #o34x |# #x05D0 #x05D1 #x05D2 #x05D3 #x05D4 #x05D5 #x05D6 #x05D7 + #| #o35x |# #x05D8 #x05D9 #x05DA #x05DB #x05DC #x05DD #x05DE #x05DF + #| #o36x |# #x05E0 #x05E1 #x05E2 #x05E3 #x05E4 #x05E5 #x05E6 #x05E7 + #| #o37x |# #x05E8 #x05E9 #x05EA #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF) + + (define-8-bit-charset :iso-8859-9 + #| #o00x |# #x0000 #x0001 #x0002 #x0003 #x0004 #x0005 #x0006 #x0007 + #| #o01x |# #x0008 #x0009 #x000A #x000B #x000C #x000A #x000E #x000F + #| #o02x |# #x0010 #x0011 #x0012 #x0013 #x0014 #x0015 #x0016 #x0017 + #| #o03x |# #x0018 #x0019 #x001A #x001B #x001C #x001D #x001E #x001F + #| #o04x |# #x0020 #x0021 #x0022 #x0023 #x0024 #x0025 #x0026 #x0027 + #| #o05x |# #x0028 #x0029 #x002A #x002B #x002C #x002D #x002E #x002F + #| #o06x |# #x0030 #x0031 #x0032 #x0033 #x0034 #x0035 #x0036 #x0037 + #| #o07x |# #x0038 #x0039 #x003A #x003B #x003C #x003D #x003E #x003F + #| #o10x |# #x0040 #x0041 #x0042 #x0043 #x0044 #x0045 #x0046 #x0047 + #| #o11x |# #x0048 #x0049 #x004A #x004B #x004C #x004D #x004E #x004F + #| #o12x |# #x0050 #x0051 #x0052 #x0053 #x0054 #x0055 #x0056 #x0057 + #| #o13x |# #x0058 #x0059 #x005A #x005B #x005C #x005D #x005E #x005F + #| #o14x |# #x0060 #x0061 #x0062 #x0063 #x0064 #x0065 #x0066 #x0067 + #| #o15x |# #x0068 #x0069 #x006A #x006B #x006C #x006D #x006E #x006F + #| #o16x |# #x0070 #x0071 #x0072 #x0073 #x0074 #x0075 #x0076 #x0077 + #| #o17x |# #x0078 #x0079 #x007A #x007B #x007C #x007D #x007E #x007F + #| #o20x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF + #| #o21x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF + #| #o22x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF + #| #o23x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF + #| #o24x |# #x00A0 #x00A1 #x00A2 #x00A3 #x00A4 #x00A5 #x00A6 #x00A7 + #| #o25x |# #x00A8 #x00A9 #x00AA #x00AB #x00AC #x00AD #x00AE #x00AF + #| #o26x |# #x00B0 #x00B1 #x00B2 #x00B3 #x00B4 #x00B5 #x00B6 #x00B7 + #| #o27x |# #x00B8 #x00B9 #x00BA #x00BB #x00BC #x00BD #x00BE #x00BF + #| #o30x |# #x00C0 #x00C1 #x00C2 #x00C3 #x00C4 #x00C5 #x00C6 #x00C7 + #| #o31x |# #x00C8 #x00C9 #x00CA #x00CB #x00CC #x00CD #x00CE #x00CF + #| #o32x |# #x011E #x00D1 #x00D2 #x00D3 #x00D4 #x00D5 #x00D6 #x00D7 + #| #o33x |# #x00D8 #x00D9 #x00DA #x00DB #x00DC #x0130 #x015E #x00DF + #| #o34x |# #x00E0 #x00E1 #x00E2 #x00E3 #x00E4 #x00E5 #x00E6 #x00E7 + #| #o35x |# #x00E8 #x00E9 #x00EA #x00EB #x00EC #x00ED #x00EE #x00EF + #| #o36x |# #x011F #x00F1 #x00F2 #x00F3 #x00F4 #x00F5 #x00F6 #x00F7 + #| #o37x |# #x00F8 #x00F9 #x00FA #x00FB #x00FC #x0131 #x015F #x00FF) + + (define-8-bit-charset :iso-8859-14 + #| #o00x |# #x0000 #x0001 #x0002 #x0003 #x0004 #x0005 #x0006 #x0007 + #| #o01x |# #x0008 #x0009 #x000A #x000B #x000C #x000A #x000E #x000F + #| #o02x |# #x0010 #x0011 #x0012 #x0013 #x0014 #x0015 #x0016 #x0017 + #| #o03x |# #x0018 #x0019 #x001A #x001B #x001C #x001D #x001E #x001F + #| #o04x |# #x0020 #x0021 #x0022 #x0023 #x0024 #x0025 #x0026 #x0027 + #| #o05x |# #x0028 #x0029 #x002A #x002B #x002C #x002D #x002E #x002F + #| #o06x |# #x0030 #x0031 #x0032 #x0033 #x0034 #x0035 #x0036 #x0037 + #| #o07x |# #x0038 #x0039 #x003A #x003B #x003C #x003D #x003E #x003F + #| #o10x |# #x0040 #x0041 #x0042 #x0043 #x0044 #x0045 #x0046 #x0047 + #| #o11x |# #x0048 #x0049 #x004A #x004B #x004C #x004D #x004E #x004F + #| #o12x |# #x0050 #x0051 #x0052 #x0053 #x0054 #x0055 #x0056 #x0057 + #| #o13x |# #x0058 #x0059 #x005A #x005B #x005C #x005D #x005E #x005F + #| #o14x |# #x0060 #x0061 #x0062 #x0063 #x0064 #x0065 #x0066 #x0067 + #| #o15x |# #x0068 #x0069 #x006A #x006B #x006C #x006D #x006E #x006F + #| #o16x |# #x0070 #x0071 #x0072 #x0073 #x0074 #x0075 #x0076 #x0077 + #| #o17x |# #x0078 #x0079 #x007A #x007B #x007C #x007D #x007E #x007F + #| #o20x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF + #| #o21x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF + #| #o22x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF + #| #o23x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF + #| #o24x |# #x00A0 #x1E02 #x1E03 #x00A3 #x010A #x010B #x1E0A #x00A7 + #| #o25x |# #x1E80 #x00A9 #x1E82 #x1E0B #x1EF2 #x00AD #x00AE #x0178 + #| #o26x |# #x1E1E #x1E1F #x0120 #x0121 #x1E40 #x1E41 #x00B6 #x1E56 + #| #o27x |# #x1E81 #x1E57 #x1E83 #x1E60 #x1EF3 #x1E84 #x1E85 #x1E61 + #| #o30x |# #x00C0 #x00C1 #x00C2 #x00C3 #x00C4 #x00C5 #x00C6 #x00C7 + #| #o31x |# #x00C8 #x00C9 #x00CA #x00CB #x00CC #x00CD #x00CE #x00CF + #| #o32x |# #x0174 #x00D1 #x00D2 #x00D3 #x00D4 #x00D5 #x00D6 #x1E6A + #| #o33x |# #x00D8 #x00D9 #x00DA #x00DB #x00DC #x00DD #x0176 #x00DF + #| #o34x |# #x00E0 #x00E1 #x00E2 #x00E3 #x00E4 #x00E5 #x00E6 #x00E7 + #| #o35x |# #x00E8 #x00E9 #x00EA #x00EB #x00EC #x00ED #x00EE #x00EF + #| #o36x |# #x0175 #x00F1 #x00F2 #x00F3 #x00F4 #x00F5 #x00F6 #x1E6B + #| #o37x |# #x00F8 #x00F9 #x00FA #x00FB #x00FC #x00FD #x0177 #x00FF) + + (define-8-bit-charset :iso-8859-15 + #| #o00x |# #x0000 #x0001 #x0002 #x0003 #x0004 #x0005 #x0006 #x0007 + #| #o01x |# #x0008 #x0009 #x000A #x000B #x000C #x000A #x000E #x000F + #| #o02x |# #x0010 #x0011 #x0012 #x0013 #x0014 #x0015 #x0016 #x0017 + #| #o03x |# #x0018 #x0019 #x001A #x001B #x001C #x001D #x001E #x001F + #| #o04x |# #x0020 #x0021 #x0022 #x0023 #x0024 #x0025 #x0026 #x0027 + #| #o05x |# #x0028 #x0029 #x002A #x002B #x002C #x002D #x002E #x002F + #| #o06x |# #x0030 #x0031 #x0032 #x0033 #x0034 #x0035 #x0036 #x0037 + #| #o07x |# #x0038 #x0039 #x003A #x003B #x003C #x003D #x003E #x003F + #| #o10x |# #x0040 #x0041 #x0042 #x0043 #x0044 #x0045 #x0046 #x0047 + #| #o11x |# #x0048 #x0049 #x004A #x004B #x004C #x004D #x004E #x004F + #| #o12x |# #x0050 #x0051 #x0052 #x0053 #x0054 #x0055 #x0056 #x0057 + #| #o13x |# #x0058 #x0059 #x005A #x005B #x005C #x005D #x005E #x005F + #| #o14x |# #x0060 #x0061 #x0062 #x0063 #x0064 #x0065 #x0066 #x0067 + #| #o15x |# #x0068 #x0069 #x006A #x006B #x006C #x006D #x006E #x006F + #| #o16x |# #x0070 #x0071 #x0072 #x0073 #x0074 #x0075 #x0076 #x0077 + #| #o17x |# #x0078 #x0079 #x007A #x007B #x007C #x007D #x007E #x007F + #| #o20x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF + #| #o21x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF + #| #o22x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF + #| #o23x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF + #| #o24x |# #x00A0 #x00A1 #x00A2 #x00A3 #x20AC #x00A5 #x0160 #x00A7 + #| #o25x |# #x0161 #x00A9 #x00AA #x00AB #x00AC #x00AD #x00AE #x00AF + #| #o26x |# #x00B0 #x00B1 #x00B2 #x00B3 #x017D #x00B5 #x00B6 #x00B7 + #| #o27x |# #x017E #x00B9 #x00BA #x00BB #x0152 #x0153 #x0178 #x00BF + #| #o30x |# #x00C0 #x00C1 #x00C2 #x00C3 #x00C4 #x00C5 #x00C6 #x00C7 + #| #o31x |# #x00C8 #x00C9 #x00CA #x00CB #x00CC #x00CD #x00CE #x00CF + #| #o32x |# #x00D0 #x00D1 #x00D2 #x00D3 #x00D4 #x00D5 #x00D6 #x00D7 + #| #o33x |# #x00D8 #x00D9 #x00DA #x00DB #x00DC #x00DD #x00DE #x00DF + #| #o34x |# #x00E0 #x00E1 #x00E2 #x00E3 #x00E4 #x00E5 #x00E6 #x00E7 + #| #o35x |# #x00E8 #x00E9 #x00EA #x00EB #x00EC #x00ED #x00EE #x00EF + #| #o36x |# #x00F0 #x00F1 #x00F2 #x00F3 #x00F4 #x00F5 #x00F6 #x00F7 + #| #o37x |# #x00F8 #x00F9 #x00FA #x00FB #x00FC #x00FD #x00FE #x00FF) + + (define-8-bit-charset :koi8-r + #| #o00x |# #x0000 #x0001 #x0002 #x0003 #x0004 #x0005 #x0006 #x0007 + #| #o01x |# #x0008 #x0009 #x000A #x000B #x000C #x000A #x000E #x000F + #| #o02x |# #x0010 #x0011 #x0012 #x0013 #x0014 #x0015 #x0016 #x0017 + #| #o03x |# #x0018 #x0019 #x001A #x001B #x001C #x001D #x001E #x001F + #| #o04x |# #x0020 #x0021 #x0022 #x0023 #x0024 #x0025 #x0026 #x0027 + #| #o05x |# #x0028 #x0029 #x002A #x002B #x002C #x002D #x002E #x002F + #| #o06x |# #x0030 #x0031 #x0032 #x0033 #x0034 #x0035 #x0036 #x0037 + #| #o07x |# #x0038 #x0039 #x003A #x003B #x003C #x003D #x003E #x003F + #| #o10x |# #x0040 #x0041 #x0042 #x0043 #x0044 #x0045 #x0046 #x0047 + #| #o11x |# #x0048 #x0049 #x004A #x004B #x004C #x004D #x004E #x004F + #| #o12x |# #x0050 #x0051 #x0052 #x0053 #x0054 #x0055 #x0056 #x0057 + #| #o13x |# #x0058 #x0059 #x005A #x005B #x005C #x005D #x005E #x005F + #| #o14x |# #x0060 #x0061 #x0062 #x0063 #x0064 #x0065 #x0066 #x0067 + #| #o15x |# #x0068 #x0069 #x006A #x006B #x006C #x006D #x006E #x006F + #| #o16x |# #x0070 #x0071 #x0072 #x0073 #x0074 #x0075 #x0076 #x0077 + #| #o17x |# #x0078 #x0079 #x007A #x007B #x007C #x007D #x007E #x007F + #| #o20x |# #x2500 #x2502 #x250C #x2510 #x2514 #x2518 #x251C #x2524 + #| #o21x |# #x252C #x2534 #x253C #x2580 #x2584 #x2588 #x258C #x2590 + #| #o22x |# #x2591 #x2592 #x2593 #x2320 #x25A0 #x2219 #x221A #x2248 + #| #o23x |# #x2264 #x2265 #x00A0 #x2321 #x00B0 #x00B2 #x00B7 #x00F7 + #| #o24x |# #x2550 #x2551 #x2552 #x0451 #x2553 #x2554 #x2555 #x2556 + #| #o25x |# #x2557 #x2558 #x2559 #x255A #x255B #x255C #x255D #x255E + #| #o26x |# #x255F #x2560 #x2561 #x0401 #x2562 #x2563 #x2564 #x2565 + #| #o27x |# #x2566 #x2567 #x2568 #x2569 #x256A #x256B #x256C #x00A9 + #| #o30x |# #x044E #x0430 #x0431 #x0446 #x0434 #x0435 #x0444 #x0433 + #| #o31x |# #x0445 #x0438 #x0439 #x043A #x043B #x043C #x043D #x043E + #| #o32x |# #x043F #x044F #x0440 #x0441 #x0442 #x0443 #x0436 #x0432 + #| #o33x |# #x044C #x044B #x0437 #x0448 #x044D #x0449 #x0447 #x044A + #| #o34x |# #x042E #x0410 #x0411 #x0426 #x0414 #x0415 #x0424 #x0413 + #| #o35x |# #x0425 #x0418 #x0419 #x041A #x041B #x041C #x041D #x041E + #| #o36x |# #x041F #x042F #x0420 #x0421 #x0422 #x0423 #x0416 #x0412 + #| #o37x |# #x042C #x042B #x0417 #x0428 #x042D #x0429 #x0427 #x042A) + ) + diff --git a/runes/encodings.lisp b/runes/encodings.lisp new file mode 100644 index 0000000..0982caa --- /dev/null +++ b/runes/encodings.lisp @@ -0,0 +1,347 @@ +(in-package :encoding) + +;;;; --------------------------------------------------------------------------- +;;;; Encoding names +;;;; + +(defvar *names* (make-hash-table :test #'eq)) + +(defun canon-name (string) + (with-output-to-string (bag) + (map nil (lambda (ch) + (cond ((char= ch #\_) (write-char #\- bag)) + (t (write-char (char-upcase ch) bag)))) + string))) + +(defun canon-name-2 (string) + (with-output-to-string (bag) + (map nil (lambda (ch) + (cond ((char= ch #\_)) + ((char= ch #\-)) + (t (write-char (char-upcase ch) bag)))) + string))) + +(defmethod encoding-names ((encoding symbol)) + (gethash encoding *names*)) + +(defmethod (setf encoding-names) (new-value (encoding symbol)) + (setf (gethash encoding *names*) new-value)) + +(defun add-name (encoding name) + (pushnew (canon-name name) (encoding-names encoding) :test #'string=)) + +(defun resolve-name (string) + (cond ((symbolp string) + string) + (t + (setq string (canon-name string)) + (or + (block nil + (maphash (lambda (x y) + (when (member string y :test #'string=) + (return x))) + *names*) + nil) + (block nil + (maphash (lambda (x y) + (when (member string y + :test #'(lambda (x y) + (string= (canon-name-2 x) + (canon-name-2 y)))) + (return x))) + *names*) + nil))))) + +;;;; --------------------------------------------------------------------------- +;;;; Encodings +;;;; + +(defvar *encodings* (make-hash-table :test #'eq)) + +(defmacro define-encoding (name init-form) + `(progn + (setf (gethash ',name *encodings*) + (list nil (lambda () ,init-form))) + ',name)) + +(defun find-encoding (name) + (let ((x (gethash (resolve-name name) *encodings*))) + (and x + (or (first x) + (setf (first x) (funcall (second x))))))) + +(defclass encoding () ()) + +(defclass simple-8-bit-encoding (encoding) + ((table :initarg :table))) + +(defun make-simple-8-bit-encoding (&key charset) + (make-instance 'simple-8-bit-encoding + :table (coerce (to-unicode-table charset) '(simple-array (unsigned-byte 16) (256))))) + +;;;;;;; + +(defmacro fx-op (op &rest xs) + `(the fixnum (,op ,@(mapcar (lambda (x) `(the fixnum ,x)) xs)))) +(defmacro fx-pred (op &rest xs) + `(,op ,@(mapcar (lambda (x) `(the fixnum ,x)) xs))) + +(defmacro %+ (&rest xs) `(fx-op + ,@xs)) +(defmacro %- (&rest xs) `(fx-op - ,@xs)) +(defmacro %* (&rest xs) `(fx-op * ,@xs)) +(defmacro %/ (&rest xs) `(fx-op floor ,@xs)) +(defmacro %and (&rest xs) `(fx-op logand ,@xs)) +(defmacro %ior (&rest xs) `(fx-op logior ,@xs)) +(defmacro %xor (&rest xs) `(fx-op logxor ,@xs)) +(defmacro %ash (&rest xs) `(fx-op ash ,@xs)) +(defmacro %mod (&rest xs) `(fx-op mod ,@xs)) + +(defmacro %= (&rest xs) `(fx-pred = ,@xs)) +(defmacro %<= (&rest xs) `(fx-pred <= ,@xs)) +(defmacro %>= (&rest xs) `(fx-pred >= ,@xs)) +(defmacro %< (&rest xs) `(fx-pred < ,@xs)) +(defmacro %> (&rest xs) `(fx-pred > ,@xs)) + +(defmethod decode-sequence ((encoding (eql :utf-16-big-endian)) + in in-start in-end out out-start out-end eof?) + ;; -> new wptr, new rptr + (let ((wptr out-start) + (rptr in-start)) + (loop + (when (%= wptr out-end) + (return)) + (when (>= (%+ rptr 1) in-end) + (return)) + (let ((hi (aref in rptr)) + (lo (aref in (%+ 1 rptr)))) + (setf rptr (%+ 2 rptr)) + (setf (aref out wptr) (logior (ash hi 8) lo)) + (setf wptr (%+ 1 wptr)))) + (values wptr rptr))) + +(defmethod decode-sequence ((encoding (eql :utf-16-little-endian)) + in in-start in-end out out-start out-end eof?) + ;; -> new wptr, new rptr + (let ((wptr out-start) + (rptr in-start)) + (loop + (when (%= wptr out-end) + (return)) + (when (>= (%+ rptr 1) in-end) + (return)) + (let ((lo (aref in (%+ 0 rptr))) + (hi (aref in (%+ 1 rptr)))) + (setf rptr (%+ 2 rptr)) + (setf (aref out wptr) (logior (ash hi 8) lo)) + (setf wptr (%+ 1 wptr)))) + (values wptr rptr))) + +(defmethod decode-sequence ((encoding (eql :utf-8)) + in in-start in-end out out-start out-end eof?) + (declare (optimize (speed 3) (safety 0)) + (type (simple-array (unsigned-byte 8) (*)) in) + (type (simple-array (unsigned-byte 16) (*)) out) + (type fixnum in-start in-end out-start out-end)) + (let ((wptr out-start) + (rptr in-start) + byte0) + (macrolet ((put (x) + `((lambda (x) + (cond ((or (<= #xD800 x #xDBFF) + (<= #xDC00 x #xDFFF)) + (error "Encoding UTF-16 in UTF-8? : #x~x." x))) + '(unless (data-char-p x) + (error "#x~x is not a data character." x)) + ;;(fresh-line) + ;;(prin1 x) (princ "-> ") + (cond ((%> x #xFFFF) + (setf (aref out (%+ 0 wptr)) (%+ #xD7C0 (ash x -10)) + (aref out (%+ 1 wptr)) (%ior #xDC00 (%and x #x3FF))) + (setf wptr (%+ wptr 2))) + (t + (setf (aref out wptr) x) + (setf wptr (%+ wptr 1))))) + ,x)) + (put1 (x) + `(progn + (setf (aref out wptr) ,x) + (setf wptr (%+ wptr 1))))) + (loop + (when (%= (+ wptr 1) out-end) (return)) + (when (%>= rptr in-end) (return)) + (setq byte0 (aref in rptr)) + (cond ((= byte0 #x0D) + ;; CR handling + ;; we need to know the following character + (cond ((>= (%+ rptr 1) in-end) + ;; no characters in buffer + (cond (eof? + ;; at EOF, pass it as NL + (put #x0A) + (setf rptr (%+ rptr 1))) + (t + ;; demand more characters + (return)))) + ((= (aref in (%+ rptr 1)) #x0A) + ;; we see CR NL, so forget this CR and the next NL will be + ;; inserted literally + (setf rptr (%+ rptr 1))) + (t + ;; singleton CR, pass it as NL + (put #x0A) + (setf rptr (%+ rptr 1))))) + + ((%<= #|#b00000000|# byte0 #b01111111) + (put1 byte0) + (setf rptr (%+ rptr 1))) + + ((%<= #|#b10000000|# byte0 #b10111111) + (error "Corrupted UTF-8 input (initial byte was #b~8,'0B)" byte0) + (setf rptr (%+ rptr 1))) + + ((%<= #|#b11000000|# byte0 #b11011111) + (cond ((< (%+ rptr 2) in-end) + (put + (dpb (ldb (byte 5 0) byte0) (byte 5 6) + (dpb (ldb (byte 6 0) (aref in (%+ rptr 1))) (byte 6 0) + 0))) + (setf rptr (%+ rptr 2))) + (t + (return)))) + + ((%<= #|#b11100000|# byte0 #b11101111) + (cond ((< (%+ rptr 3) in-end) + (put + (dpb (ldb (byte 4 0) byte0) (byte 4 12) + (dpb (ldb (byte 6 0) (aref in (%+ 1 rptr))) (byte 6 6) + (dpb (ldb (byte 6 0) (aref in (%+ 2 rptr))) (byte 6 0) + 0)))) + (setf rptr (%+ rptr 3))) + (t + (return)))) + + ((%<= #|#b11110000|# byte0 #b11110111) + (cond ((< (%+ rptr 4) in-end) + (put + (dpb (ldb (byte 3 0) byte0) (byte 3 18) + (dpb (ldb (byte 6 0) (aref in (%+ 1 rptr))) (byte 6 12) + (dpb (ldb (byte 6 0) (aref in (%+ 2 rptr))) (byte 6 6) + (dpb (ldb (byte 6 0) (aref in (%+ 3 rptr))) (byte 6 0) + 0))))) + (setf rptr (%+ rptr 4))) + (t + (return)))) + + ((%<= #|#b11111000|# byte0 #b11111011) + (cond ((< (%+ rptr 5) in-end) + (put + (dpb (ldb (byte 2 0) byte0) (byte 2 24) + (dpb (ldb (byte 6 0) (aref in (%+ 1 rptr))) (byte 6 18) + (dpb (ldb (byte 6 0) (aref in (%+ 2 rptr))) (byte 6 12) + (dpb (ldb (byte 6 0) (aref in (%+ 3 rptr))) (byte 6 6) + (dpb (ldb (byte 6 0) (aref in (%+ 4 rptr))) (byte 6 0) + 0)))))) + (setf rptr (%+ rptr 5))) + (t + (return)))) + + ((%<= #|#b11111100|# byte0 #b11111101) + (cond ((< (%+ rptr 6) in-end) + (put + (dpb (ldb (byte 1 0) byte0) (byte 1 30) + (dpb (ldb (byte 6 0) (aref in (%+ 1 rptr))) (byte 6 24) + (dpb (ldb (byte 6 0) (aref in (%+ 2 rptr))) (byte 6 18) + (dpb (ldb (byte 6 0) (aref in (%+ 3 rptr))) (byte 6 12) + (dpb (ldb (byte 6 0) (aref in (%+ 4 rptr))) (byte 6 6) + (dpb (ldb (byte 6 0) (aref in (%+ 5 rptr))) (byte 6 0) + 0))))))) + (setf rptr (%+ rptr 6))) + (t + (return)))) + + (t + (error "Corrupted UTF-8 input (initial byte was #b~8,'0B)" byte0)) ) )) + (values wptr rptr)) ) + +(defmethod encoding-p ((object (eql :utf-16-little-endian))) t) +(defmethod encoding-p ((object (eql :utf-16-big-endian))) t) +(defmethod encoding-p ((object (eql :utf-8))) t) + +(defmethod encoding-p ((object encoding)) t) + +(defmethod decode-sequence ((encoding simple-8-bit-encoding) + in in-start in-end + out out-start out-end + eof?) + (declare (optimize (speed 3) (safety 0)) + (type (simple-array (unsigned-byte 8) (*)) in) + (type (simple-array (unsigned-byte 16) (*)) out) + (type fixnum in-start in-end out-start out-end)) + (let ((wptr out-start) + (rptr in-start) + (byte 0) + (table (slot-value encoding 'table))) + (declare (type fixnum wptr rptr) + (type (unsigned-byte 8) byte) + (type (simple-array (unsigned-byte 16) (*)) table)) + (loop + (when (%= wptr out-end) (return)) + (when (%>= rptr in-end) (return)) + (setq byte (aref in rptr)) + (cond ((= byte #x0D) + ;; CR handling + ;; we need to know the following character + (cond ((>= (%+ rptr 1) in-end) + ;; no characters in buffer + (cond (eof? + ;; at EOF, pass it as NL + (setf (aref out wptr) #x0A) + (setf wptr (%+ wptr 1)) + (setf rptr (%+ rptr 1))) + (t + ;; demand more characters + (return)))) + ((= (aref in (%+ rptr 1)) #x0A) + ;; we see CR NL, so forget this CR and the next NL will be + ;; inserted literally + (setf rptr (%+ rptr 1))) + (t + ;; singleton CR, pass it as NL + (setf (aref out wptr) #x0A) + (setf wptr (%+ wptr 1)) + (setf rptr (%+ rptr 1))))) + + (t + (setf (aref out wptr) (aref table byte)) + (setf wptr (%+ wptr 1)) + (setf rptr (%+ rptr 1))) )) + (values wptr rptr))) + +;;;; --------------------------------------------------------------------------- +;;;; Character sets +;;;; + +(defvar *charsets* (make-hash-table :test #'eq)) + +(defclass 8-bit-charset () + ((name :initarg :name) + (to-unicode-table + :initarg :to-unicode-table + :reader to-unicode-table))) + +(defmacro define-8-bit-charset (name &rest codes) + (assert (= 256 (length codes))) + `(progn + (setf (gethash ',name *charsets*) + (make-instance '8-bit-charset + :name ',name + :to-unicode-table + ',(make-array 256 + :element-type '(unsigned-byte 16) + :initial-contents codes))) + ',name)) + +(defun find-charset (name) + (or (gethash name *charsets*) + (error "There is no character set named ~S." name))) + diff --git a/runes/package.lisp b/runes/package.lisp new file mode 100644 index 0000000..921c457 --- /dev/null +++ b/runes/package.lisp @@ -0,0 +1,50 @@ +;;; -*- Mode: Lisp; Syntax: Common-Lisp; Package: CL-USER; -*- +;;; --------------------------------------------------------------------------- +;;; Title: Generating a sane DEFPACKAGE for RUNES +;;; Created: 1999-05-25 +;;; Author: Gilbert Baumann +;;; --------------------------------------------------------------------------- +;;; (c) copyright 1999,2000 by Gilbert Baumann + +(in-package :cl-user) + +(defpackage :runes + (:use :cl) + (:export #:defsubst + + ;; util.lisp : + #:compose + #:curry + #:rcurry + #:until + #:while + + ;; runes.lisp + #:rune + #:rod + #:simple-rod + #:%rune + #:rod-capitalize + #:code-rune + #:rune-code + #:rune-downcase + #:rune-upcase + #:rod-downcase + #:rod-upcase + #:white-space-rune-p + #:digit-rune-p + #:rune= + #:rune<= + #:rune>= + #:rune-equal + #:runep + #:sloopy-rod-p + #:rod= + #:rod-equal + #:make-rod + #:char-rune + #:rune-char + #:rod-string + #:string-rod + #:rod-subseq + #:rod<)) diff --git a/runes/runes.lisp b/runes/runes.lisp new file mode 100644 index 0000000..7aed6d0 --- /dev/null +++ b/runes/runes.lisp @@ -0,0 +1,273 @@ +;;; -*- Mode: Lisp; Syntax: Common-Lisp; Package: RUNES; -*- +;;; --------------------------------------------------------------------------- +;;; Title: Unicode strings (called RODs) +;;; Created: 1999-05-25 22:29 +;;; Author: Gilbert Baumann +;;; License: LLGPL (See file COPYING for details). +;;; --------------------------------------------------------------------------- +;;; (c) copyright 1998,1999 by Gilbert Baumann + +;;; This code is free software; you can redistribute it and/or modify it +;;; under the terms of the version 2.1 of the GNU Lesser General Public +;;; License as published by the Free Software Foundation, as clarified +;;; by the "Preamble to the Gnu Lesser General Public License" found in +;;; the file COPYING. +;;; +;;; This code is distributed in the hope that it will be useful, +;;; but without any warranty; without even the implied warranty of +;;; merchantability or fitness for a particular purpose. See the GNU +;;; Lesser General Public License for more details. +;;; +;;; Version 2.1 of the GNU Lesser General Public License is in the file +;;; COPYING that was distributed with this file. If it is not present, +;;; you can access it from http://www.gnu.org/copyleft/lesser.txt (until +;;; superseded by a newer version) or write to the Free Software +;;; Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +;; Changes +;; +;; When Who What +;; ---------------------------------------------------------------------------- +;; 1999-08-15 GB - ROD=, ROD-EQUAL +;; RUNE<=, RUNE>= +;; MAKE-ROD, ROD-SUBSEQ +;; CHAR-RUNE, RUNE-CHAR, ROD-STRING, STRING-ROD +;; new functions +;; - Added rune reader +;; + +(in-package :runes) + +(deftype rune () '(unsigned-byte 16)) +(deftype rod () '(array rune (*))) +(deftype simple-rod () '(simple-array rune (*))) + +(defsubst rune (rod index) + (aref rod index)) + +(defun (setf rune) (new rod index) + (setf (aref rod index) new)) + +(defsubst %rune (rod index) + (aref (the (simple-array (unsigned-byte 16) (*)) rod) (the fixnum index))) + +(defsubst (setf %rune) (new rod index) + (setf (aref (the (simple-array (unsigned-byte 16) (*)) rod) (the fixnum index)) new)) + +(defun rod-capitalize (rod) + (warn "~S is not implemented." 'rod-capitalize) + rod) + +(defsubst code-rune (x) x) +(defsubst rune-code (x) x) + +(defsubst rune= (x y) + (= x y)) + +(defun rune-downcase (rune) + (cond ((<= #x0041 rune #x005a) (+ rune #x20)) + ((= rune #x00d7) rune) + ((<= #x00c0 rune #x00de) (+ rune #x20)) + (t rune))) + +(defsubst rune-upcase (rune) + (cond ((<= #x0061 rune #x007a) (- rune #x20)) + ((= rune #x00f7) rune) + ((<= #x00e0 rune #x00fe) (- rune #x20)) + (t rune))) + +(defun rune-upper-case-letter-p (rune) + (or (<= #x0041 rune #x005a) (<= #x00c0 rune #x00de))) + +(defun rune-lower-case-letter-p (rune) + (or (<= #x0061 rune #x007a) (<= #x00e0 rune #x00fe) + (= rune #x00d7))) + + +(defun rune-equal (x y) + (rune= (rune-upcase x) (rune-upcase y))) + +(defun rod-downcase (rod) + ;; FIXME + (register-rod + (map '(simple-array (unsigned-byte 16) (*)) #'rune-downcase rod))) + +(defun rod-upcase (rod) + ;; FIXME + (register-rod + (map '(simple-array (unsigned-byte 16) (*)) #'rune-upcase rod))) + +(defsubst white-space-rune-p (char) + (or (= char 9) ;TAB + (= char 10) ;Linefeed + (= char 13) ;Carriage Return + (= char 32))) ;Space + +(defsubst digit-rune-p (char &optional (radix 10)) + (cond ((<= #.(char-code #\0) char #.(char-code #\9)) + (and (< (- char #.(char-code #\0)) radix) + (- char #.(char-code #\0)))) + ((<= #.(char-code #\A) char #.(char-code #\Z)) + (and (< (- char #.(char-code #\A) -10) radix) + (- char #.(char-code #\A) -10))) + ((<= #.(char-code #\a) char #.(char-code #\z)) + (and (< (- char #.(char-code #\a) -10) radix) + (- char #.(char-code #\a) -10))) )) + +(defun rod (x) + (cond ((stringp x) (register-rod (map 'rod #'char-code x))) + ((symbolp x) (rod (string x))) + ((characterp x) (rod (string x))) + ((vectorp x) (register-rod (coerce x 'rod))) + ((integerp x) (register-rod (map 'rod #'identity (list x)))) + (t (error "Cannot convert ~S to a ~S" x 'rod)))) + +(defun runep (x) + (and (integerp x) + (<= 0 x #xFFFF))) + +(defun sloopy-rod-p (x) + (and (not (stringp x)) + (vectorp x) + (every #'runep x))) + +(defun rod= (x y) + (and (= (length x) (length y)) + (dotimes (i (length x) t) + (unless (rune= (rune x i) (rune y i)) + (return nil))))) + +(defun rod-equal (x y) + (and (= (length x) (length y)) + (dotimes (i (length x) t) + (unless (rune-equal (rune x i) (rune y i)) + (return nil))))) + +(defsubst make-rod (size) + (let ((res (make-array size :element-type 'rune))) + (register-rod res) + res)) + +(defun char-rune (char) + (code-rune (char-code char))) + +(defun rune-char (rune &optional (default #\?)) + #+CMU + (if (< rune 256) (code-char rune) default) + #-CMU + (or (code-char rune) default)) + +(defun rod-string (rod &optional (default-char #\?)) + (map 'string (lambda (x) (rune-char x default-char)) rod)) + +(defun string-rod (string) + (let* ((n (length string)) + (res (make-rod n))) + (dotimes (i n) + (setf (%rune res i) (char-rune (char string i)))) + res)) + +;;;; + +(defun rune<= (rune &rest more-runes) + (apply #'<= rune more-runes)) + +(defun rune>= (rune &rest more-runes) + (apply #'>= rune more-runes)) + +(defun rodp (object) + (typep object 'rod)) + +(defun really-rod-p (object) + (and (typep object 'rod) + (really-really-rod-p object))) + +(defun rod-subseq (source start &optional (end (length source))) + (unless (rodp source) + (error "~S is not of type ~S." source 'rod)) + (unless (and (typep start 'fixnum) (>= start 0)) + (error "~S is not a non-negative fixnum." start)) + (unless (and (typep end 'fixnum) (>= end start)) + (error "END argument, ~S, is not a fixnum no less than START, ~S." end start)) + (when (> start (length source)) + (error "START argument, ~S, should be no greater than length of rod." start)) + (when (> end (length source)) + (error "END argument, ~S, should be no greater than length of rod." end)) + (locally + (declare (type rod source) + (type fixnum start end)) + (let ((res (make-rod (- end start)))) + (declare (type rod res)) + (do ((i (- (- end start) 1) (the fixnum (- i 1)))) + ((< i 0) res) + (declare (type fixnum i)) + (setf (%rune res i) (%rune source (the fixnum (+ i start)))))))) + +(defun rod-subseq* (source start &optional (end (length source))) + (unless (and (typep start 'fixnum) (>= start 0)) + (error "~S is not a non-negative fixnum." start)) + (unless (and (typep end 'fixnum) (>= end start)) + (error "END argument, ~S, is not a fixnum no less than START, ~S." end start)) + (when (> start (length source)) + (error "START argument, ~S, should be no greater than length of rod." start)) + (when (> end (length source)) + (error "END argument, ~S, should be no greater than length of rod." end)) + (locally + (declare (type fixnum start end)) + (let ((res (make-rod (- end start)))) + (declare (type rod res)) + (do ((i (- (- end start) 1) (the fixnum (- i 1)))) + ((< i 0) res) + (declare (type fixnum i)) + (setf (%rune res i) (aref source (the fixnum (+ i start)))))))) + +;;; Support for telling ROD and arrays apart: + +#+CMU +(progn + (defvar *rod-hash-table* + (make-array 5003 :initial-element nil))) + +(defun register-rod (rod) + #+CMU + (unless (really-really-rod-p rod) + (push (ext:make-weak-pointer rod) + (aref *rod-hash-table* (mod (cl::pointer-hash rod) + (length *rod-hash-table*))))) + rod) + +(defun really-really-rod-p (rod) + #+CMU + (find rod (aref *rod-hash-table* (mod (cl::pointer-hash rod) + (length *rod-hash-table*))) + :key #'ext:weak-pointer-value)) + +#+CMU +(progn + (defun rod-hash-table-rehash () + (let* ((n 5003) + (new (make-array n :initial-element nil))) + (loop for bucket across *rod-hash-table* do + (loop for item in bucket do + (let ((v (ext:weak-pointer-value item))) + (when v + (push item (aref new (mod (cl::pointer-hash v) n))))))) + (setf *rod-hash-table* new))) + + (defun rod-hash-after-gc-hook () + ;; hmm interesting question: should we rehash? + (rod-hash-table-rehash)) + + (pushnew 'rod-hash-after-gc-hook extensions:*after-gc-hooks*) ) + +(defun rod< (rod1 rod2) + (do ((i 0 (+ i 1))) + (nil) + (cond ((= i (length rod1)) + (return t)) + ((= i (length rod2)) + (return nil)) + ((< (aref rod1 i) (aref rod2 i)) + (return t)) + ((> (aref rod1 i) (aref rod2 i)) + (return nil))))) diff --git a/runes/syntax.lisp b/runes/syntax.lisp new file mode 100644 index 0000000..1c35251 --- /dev/null +++ b/runes/syntax.lisp @@ -0,0 +1,196 @@ +;;; -*- Mode: Lisp; Syntax: Common-Lisp; Package: RUNES; -*- +;;; --------------------------------------------------------------------------- +;;; Title: Unicode strings (called RODs) +;;; Created: 1999-05-25 22:29 +;;; Author: Gilbert Baumann +;;; License: LLGPL (See file COPYING for details). +;;; --------------------------------------------------------------------------- +;;; (c) copyright 1998,1999 by Gilbert Baumann + +;;; This code is free software; you can redistribute it and/or modify it +;;; under the terms of the version 2.1 of the GNU Lesser General Public +;;; License as published by the Free Software Foundation, as clarified +;;; by the "Preamble to the Gnu Lesser General Public License" found in +;;; the file COPYING. +;;; +;;; This code is distributed in the hope that it will be useful, +;;; but without any warranty; without even the implied warranty of +;;; merchantability or fitness for a particular purpose. See the GNU +;;; Lesser General Public License for more details. +;;; +;;; Version 2.1 of the GNU Lesser General Public License is in the file +;;; COPYING that was distributed with this file. If it is not present, +;;; you can access it from http://www.gnu.org/copyleft/lesser.txt (until +;;; superseded by a newer version) or write to the Free Software +;;; Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +;; Changes +;; +;; When Who What +;; ---------------------------------------------------------------------------- +;; 1999-08-15 GB - ROD=, ROD-EQUAL +;; RUNE<=, RUNE>= +;; MAKE-ROD, ROD-SUBSEQ +;; CHAR-RUNE, RUNE-CHAR, ROD-STRING, STRING-ROD +;; new functions +;; - Added rune reader +;; + +(in-package :runes) + +;;;; +;;;; RUNE Reader +;;;; + +;; Portable implementation of WHITE-SPACE-P with regard to the current +;; read table -- this is bit tricky. + +(defun rt-white-space-p (char) + (let ((stream (make-string-input-stream (string char)))) + (eq :eof (peek-char t stream nil :eof)))) + +(defun read-rune-name (input) + ;; the first char is unconditionally read + (let ((char0 (read-char input t nil t))) + (when (char= char0 #\\) + (setf char0 (read-char input t nil t))) + (with-output-to-string (res) + (write-char char0 res) + (do ((ch (peek-char nil input nil :eof t) (peek-char nil input nil :eof t))) + ((or (eq ch :eof) + (rt-white-space-p ch) + (multiple-value-bind (function non-terminating-p) (get-macro-character ch) + (and function (not non-terminating-p))))) + (write-char ch res) + (read-char input))))) ;consume this character + +(defun iso-10646-char-code (char) + (char-code char)) + +(defvar *rune-names* (make-hash-table :test #'equal) + "Hashtable, which maps all known rune names to rune codes; + Names are stored in uppercase.") + +(defun define-rune-name (name code) + (setf (gethash (string-upcase name) *rune-names*) code) + name) + +(defun lookup-rune-name (name) + (gethash (string-upcase name) *rune-names*)) + +(define-rune-name "null" #x0000) +(define-rune-name "space" #x0020) +(define-rune-name "newline" #x000A) +(define-rune-name "return" #x000D) +(define-rune-name "tab" #x0009) +(define-rune-name "page" #x000C) + +;; and just for fun: +(define-rune-name "euro" #x20AC) + +;; ASCII control characters +(define-rune-name "nul" #x0000) ;null +(define-rune-name "soh" #x0001) ;start of header +(define-rune-name "stx" #x0002) ;start of text +(define-rune-name "etx" #x0003) ;end of text +(define-rune-name "eot" #x0004) ;end of transmission +(define-rune-name "enq" #x0005) ; +(define-rune-name "ack" #x0006) ;acknowledge +(define-rune-name "bel" #x0007) ;bell +(define-rune-name "bs" #x0008) ;backspace +(define-rune-name "ht" #x0009) ;horizontal tab +(define-rune-name "lf" #X000A) ;line feed, new line +(define-rune-name "vt" #X000B) ;vertical tab +(define-rune-name "ff" #x000C) ;form feed +(define-rune-name "cr" #x000D) ;carriage return +(define-rune-name "so" #x000E) ;shift out +(define-rune-name "si" #x000F) ;shift in +(define-rune-name "dle" #x0010) ;device latch enable ? +(define-rune-name "dc1" #x0011) ;device control 1 +(define-rune-name "dc2" #x0012) ;device control 2 +(define-rune-name "dc3" #x0013) ;device control 3 +(define-rune-name "dc4" #x0014) ;device control 4 +(define-rune-name "nak" #x0015) ;negative acknowledge +(define-rune-name "syn" #x0016) ; +(define-rune-name "etb" #x0017) ; +(define-rune-name "can" #x0018) ; +(define-rune-name "em" #x0019) ;end of message +(define-rune-name "sub" #x001A) ; +(define-rune-name "esc" #x001B) ;escape +(define-rune-name "fs" #x001C) ;field separator ? +(define-rune-name "gs" #x001D) ;group separator +(define-rune-name "rs" #x001E) ; +(define-rune-name "us" #x001F) ; + +(define-rune-name "del" #x007F) ;delete + +;; iso-latin +(define-rune-name "nbsp" #x00A0) ;non breakable space +(define-rune-name "shy" #x00AD) ;soft hyphen + +(defun rune-from-read-name (name) + (code-rune + (cond ((= (length name) 1) + (iso-10646-char-code (char name 0))) + ((and (= (length name) 2) + (char= (char name 0) #\\)) + (iso-10646-char-code (char name 1))) + ((and (>= (length name) 3) + (char-equal (char name 0) #\u) + (char-equal (char name 1) #\+) + (every (lambda (x) (digit-char-p x 16)) (subseq name 2))) + (parse-integer name :start 2 :radix 16)) + ((lookup-rune-name name)) + (t + (error "Meaningless rune name ~S." name))))) + +(defun rune-reader (stream subchar arg) + subchar arg + (values (rune-from-read-name (read-rune-name stream)))) + +(set-dispatch-macro-character #\# #\/ 'rune-reader) + +;;; ROD ext syntax + +(defun rod-reader (stream subchar arg) + (declare (ignore arg)) + (rod + (with-output-to-string (bag) + (do ((c (read-char stream t nil t) + (read-char stream t nil t))) + ((char= c subchar)) + (cond ((char= c #\\) + (setf c (read-char stream t nil t)))) + (princ c bag))))) + +#-rune-is-character +(defun rod-printer (stream rod) + (princ #\# stream) + (princ #\" stream) + (loop for x across rod do + (cond ((or (rune= x #.(char-rune #\\)) + (rune= x #.(char-rune #\"))) + (princ #\\ stream) + (princ (code-char x) stream)) + ((< x char-code-limit) + (princ (code-char x) stream)) + (t + (format stream "\\u~4,'0X" x)))) + (princ #\" stream)) + +#-rune-is-character +(set-pprint-dispatch '(satisfies really-rod-p) #'rod-printer) + +(set-dispatch-macro-character #\# #\" 'rod-reader) + +#|| +(defun longish-array-p (arr) + (and (arrayp arr) + (> (array-total-size arr) 10))) + +(set-pprint-dispatch '(satisfies longish-array-p) + #'(lambda (stream object) + (let ((*print-array* nil) + (*print-pretty* nil)) + (prin1 object stream)))) +||# diff --git a/runes/util.lisp b/runes/util.lisp new file mode 100644 index 0000000..60cd74c --- /dev/null +++ b/runes/util.lisp @@ -0,0 +1,73 @@ +;;; -*- Mode: Lisp; Syntax: Common-Lisp; Package: RUNES; -*- +;;; --------------------------------------------------------------------------- +;;; Title: Some common utilities for the Closure browser +;;; Created: 1997-12-27 +;;; Author: Gilbert Baumann +;;; License: LLGPL (See file COPYING for details). +;;; --------------------------------------------------------------------------- +;;; (c) copyright 1997-1999 by Gilbert Baumann + +;;; This code is free software; you can redistribute it and/or modify it +;;; under the terms of the version 2.1 of the GNU Lesser General Public +;;; License as published by the Free Software Foundation, as clarified +;;; by the "Preamble to the Gnu Lesser General Public License" found in +;;; the file COPYING. +;;; +;;; This code is distributed in the hope that it will be useful, +;;; but without any warranty; without even the implied warranty of +;;; merchantability or fitness for a particular purpose. See the GNU +;;; Lesser General Public License for more details. +;;; +;;; Version 2.1 of the GNU Lesser General Public License is in the file +;;; COPYING that was distributed with this file. If it is not present, +;;; you can access it from http://www.gnu.org/copyleft/lesser.txt (until +;;; superseded by a newer version) or write to the Free Software +;;; Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +;; Changes +;; +;; When Who What +;; ---------------------------------------------------------------------------- +;; 1999-08-24 GB = fixed MULTIPLE-VALUE-OR it now takes any number of +;; subforms +;; + +(in-package :runes) + +;;; -------------------------------------------------------------------------------- +;;; Meta functions + +(defun curry (fun &rest args) + #'(lambda (&rest more) + (apply fun (append args more)))) + +(defun rcurry (fun &rest args) + #'(lambda (&rest more) + (apply fun (append more args)))) + +(defun compose (f g) + #'(lambda (&rest args) + (funcall f (apply g args)))) + +;;; -------------------------------------------------------------------------------- +;;; while and until + +(defmacro while (test &body body) + `(until (not ,test) ,@body)) + +(defmacro until (test &body body) + `(do () (,test) ,@body)) + +;; prime numbers + +(defun primep (n) + "Returns true, iff `n' is prime." + (and (> n 2) + (do ((i 2 (+ i 1))) + ((> (* i i) n) t) + (cond ((zerop (mod n i)) (return nil)))))) + +(defun nearest-greater-prime (n) + "Returns the smallest prime number no less than `n'." + (cond ((primep n) n) + ((nearest-greater-prime (+ n 1))))) diff --git a/runes/xstream.lisp b/runes/xstream.lisp new file mode 100644 index 0000000..9032a7b --- /dev/null +++ b/runes/xstream.lisp @@ -0,0 +1,391 @@ +;;; -*- Mode: Lisp; Syntax: Common-Lisp; Package: runes; readtable: runes; Encoding: utf-8; -*- +;;; --------------------------------------------------------------------------- +;;; Title: Fast streams +;;; Created: 1999-07-17 +;;; Author: Gilbert Baumann +;;; License: LGPL (See file COPYING for details). +;;; --------------------------------------------------------------------------- +;;; © copyright 1999 by Gilbert Baumann + +;;; This library is free software; you can redistribute it and/or +;;; modify it under the terms of the GNU Library General Public +;;; License as published by the Free Software Foundation; either +;;; version 2 of the License, or (at your option) any later version. +;;; +;;; This library is distributed in the hope that it will be useful, +;;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;;; Library General Public License for more details. +;;; +;;; You should have received a copy of the GNU Library General Public +;;; License along with this library; if not, write to the +;;; Free Software Foundation, Inc., 59 Temple Place - Suite 330, +;;; Boston, MA 02111-1307 USA. + +(in-package :runes) + +;;; API +;; +;; MAKE-XSTREAM cl-stream &key name! speed initial-speed initial-encoding +;; [function] +;; MAKE-ROD-XSTREAM rod &key name [function] +;; CLOSE-XSTREAM xstream [function] +;; XSTREAM-P object [function] +;; +;; READ-RUNE xstream [macro] +;; PEEK-RUNE xstream [macro] +;; FREAD-RUNE xstream [function] +;; FPEEK-RUNE xstream [function] +;; CONSUME-RUNE xstream [macro] +;; UNREAD-RUNE rune xstream [function] +;; +;; XSTREAM-NAME xstream [accessor] +;; XSTREAM-POSITION xstream [function] +;; XSTREAM-LINE-NUMBER xstream [function] +;; XSTREAM-COLUMN-NUMBER xstream [function] +;; XSTREAM-PLIST xstream [accessor] +;; XSTREAM-ENCODING xstream [accessor] <-- be careful here. [*] +;; SET-TO-FULL-SPEED xstream [function] + +;; [*] switching the encoding on the fly is only possible when the +;; stream's buffer is empty; therefore to be able to switch the +;; encoding, while some runes are already read, set the stream's speed +;; to 1 initially (via the initial-speed argument for MAKE-XSTREAM) +;; and later set it to full speed. (The encoding of the runes +;; sequence, you fetch off with READ-RUNE is always UTF-16 though). +;; After switching the encoding, SET-TO-FULL-SPEED can be used to bump the +;; speed up to a full buffer length. + +;; An encoding is simply something, which provides the DECODE-SEQUENCE +;; method. + +;;; Controller protocol +;; +;; READ-OCTECTS sequence os-stream start end -> first-non-written +;; XSTREAM/CLOSE os-stream +;; + +(eval-when (eval compile load) + (defparameter *fast* '(optimize (speed 3) (safety 0))) + ;;(defparameter *fast* '(optimize (speed 2) (safety 3))) + ) + +;; Let us first define fast fixnum arithmetric get rid of type +;; checks. (After all we know what we do here). + +(defmacro fx-op (op &rest xs) + `(the fixnum (,op ,@(mapcar (lambda (x) `(the fixnum ,x)) xs)))) +(defmacro fx-pred (op &rest xs) + `(,op ,@(mapcar (lambda (x) `(the fixnum ,x)) xs))) + +(defmacro %+ (&rest xs) `(fx-op + ,@xs)) +(defmacro %= (&rest xs) `(fx-pred = ,@xs)) + +(deftype buffer-index () + `(unsigned-byte ,(integer-length array-total-size-limit))) + +(deftype buffer-byte () + `(unsigned-byte 16)) + +(deftype octet () + `(unsigned-byte 8)) + +;; The usage of a special marker for EOF is experimental and +;; considered unhygenic. + +(defconstant +end+ #xFFFF + "Special marker inserted into stream buffers to indicate end of buffered data.") + +(defvar +null-buffer+ (make-array 0 :element-type 'buffer-byte)) +(defvar +null-octet-buffer+ (make-array 0 :element-type 'octet)) + +(defstruct (xstream + (:constructor make-xstream/low) + (:copier nil) + (:print-function print-xstream)) + + ;;; Read buffer + + ;; the buffer itself + (buffer +null-buffer+ + :type (simple-array buffer-byte (*))) + ;; points to the next element of `buffer' containing the next rune + ;; about to be read. + (read-ptr 0 :type buffer-index) + ;; points to the first element of `buffer' not containing a rune to + ;; be read. + (fill-ptr 0 :type buffer-index) + + ;;; OS buffer + + ;; a scratch pad for READ-SEQUENCE + (os-buffer +null-octet-buffer+ + :type (simple-array octet (*))) + + ;; `os-left-start', `os-left-end' designate a region of os-buffer, + ;; which still contains some undecoded data. This is needed because + ;; of the DECODE-SEQUENCE protocol + (os-left-start 0 :type buffer-index) + (os-left-end 0 :type buffer-index) + + ;; How much to read each time + (speed 0 :type buffer-index) + + ;; Some stream object obeying to a certain protcol + os-stream + + ;; The external format + ;; (some object offering the ENCODING protocol) + (encoding :utf-8) + + ;;A STREAM-NAME object + (name nil) + + ;; a plist a struct keeps the hack away + (plist nil) + + ;; Stream Position + (line-number 1 :type integer) ;current line number + (line-start 0 :type integer) ;stream position the current line starts at + (buffer-start 0 :type integer) ;stream position the current buffer starts at + + ;; There is no need to maintain a column counter for each character + ;; read, since we can easily compute it from `line-start' and + ;; `buffer-start'. + ) + +(defmacro read-rune (input) + "Read a single rune off the xstream `input'. In case of end of file :EOF + is returned." + `((lambda (input) + (declare (type xstream input) + #.*fast*) + (let ((rp (xstream-read-ptr input))) + (declare (type buffer-index rp)) + (let ((ch (aref (the (simple-array buffer-byte (*)) (xstream-buffer input)) + rp))) + (declare (type buffer-byte ch)) + (setf (xstream-read-ptr input) (%+ rp 1)) + (cond ((%= ch +end+) + (the (or (member :eof) rune) + (xstream-underflow input))) + ((%= ch #x000A) ;line break + (account-for-line-break input) + (code-rune ch)) + (t + (code-rune ch)))))) + ,input)) + +(defmacro peek-rune (input) + "Peek a single rune off the xstream `input'. In case of end of file :EOF + is returned." + `((lambda (input) + (declare (type xstream input) + #.*fast*) + (let ((rp (xstream-read-ptr input))) + (declare (type buffer-index rp)) + (let ((ch (aref (the (simple-array buffer-byte (*)) (xstream-buffer input)) + rp))) + (declare (type buffer-byte ch)) + (cond ((%= ch +end+) + (prog1 + (the (or (member :eof) rune) (xstream-underflow input)) + (setf (xstream-read-ptr input) 0))) + (t + (code-rune ch)))))) + ,input)) + +(defmacro consume-rune (input) + "Like READ-RUNE, but does not actually return the read rune." + `((lambda (input) + (declare (type xstream input) + #.*fast*) + (let ((rp (xstream-read-ptr input))) + (declare (type buffer-index rp)) + (let ((ch (aref (the (simple-array buffer-byte (*)) (xstream-buffer input)) + rp))) + (declare (type buffer-byte ch)) + (setf (xstream-read-ptr input) (%+ rp 1)) + (when (%= ch +end+) + (xstream-underflow input)) + (when (%= ch #x000A) ;line break + (account-for-line-break input) ))) + nil) + ,input)) + +(defsubst unread-rune (rune input) + "Unread the last recently read rune; if there wasn't such a rune, you + deserve to lose." + (declare (ignore rune)) + (decf (xstream-read-ptr input)) + (when (rune= (peek-rune input) #/u+000A) ;was it a line break? + (unaccount-for-line-break input))) + +(defun fread-rune (input) + (read-rune input)) + +(defun fpeek-rune (input) + (peek-rune input)) + +;;; Line counting + +(defun account-for-line-break (input) + (declare (type xstream input)) + (incf (xstream-line-number input)) + (setf (xstream-line-start input) + (+ (xstream-buffer-start input) (xstream-read-ptr input)))) + +(defun unaccount-for-line-break (input) + ;; incomplete! + ;; We better use a traditional lookahead technique or forbid unread-rune. + (decf (xstream-line-number input))) + +;; User API: + +(defun xstream-position (input) + (+ (xstream-buffer-start input) (xstream-read-ptr input))) + +;; xstream-line-number is structure accessor + +(defun xstream-column-number (input) + (+ (- (xstream-position input) + (xstream-line-start input)) + 1)) + +;;; Underflow + +;;(defun read-runes (sequence input)) + +(defun xstream-underflow (input) + (declare (type xstream input)) + ;; we are about to fill new data into the buffer, so we need to + ;; adjust buffer-start. + (incf (xstream-buffer-start input) + (- (xstream-fill-ptr input) 0)) + (let (n m) + ;; when there is something left in the os-buffer, we move it to + ;; the start of the buffer. + (setf m (- (xstream-os-left-end input) (xstream-os-left-start input))) + (unless (zerop m) + (replace (xstream-os-buffer input) (xstream-os-buffer input) + :start1 0 :end1 m + :start2 (xstream-os-left-start input) + :end2 (xstream-os-left-end input)) + ;; then we take care that the buffer is large enough to carry at + ;; least 100 bytes (a random number) + (unless (>= (length (xstream-os-buffer input)) 100) + (error "You lost") + ;; todo: enlarge buffer + )) + (setf n + (read-octets (xstream-os-buffer input) (xstream-os-stream input) + m (min (1- (length (xstream-os-buffer input))) + (+ m (xstream-speed input))))) + (cond ((%= n 0) + (setf (xstream-read-ptr input) 0 + (xstream-fill-ptr input) n) + (setf (aref (xstream-buffer input) (xstream-fill-ptr input)) +end+) + :eof) + (t + (multiple-value-bind (fnw fnr) + (encoding:decode-sequence + (xstream-encoding input) + (xstream-os-buffer input) 0 n + (xstream-buffer input) 0 (1- (length (xstream-buffer input))) + (= n m)) + (setf (xstream-os-left-start input) fnr + (xstream-os-left-end input) n + (xstream-read-ptr input) 0 + (xstream-fill-ptr input) fnw) + (setf (aref (xstream-buffer input) (xstream-fill-ptr input)) +end+) + (read-rune input)))))) + +;;; constructor + +(defun make-xstream (os-stream &key name + (speed 8192) + (initial-speed 1) + (initial-encoding :guess)) + ;; XXX if initial-speed isn't 1, encoding will me munged up + (assert (eql initial-speed 1)) + (multiple-value-bind (encoding preread) + (if (eq initial-encoding :guess) + (figure-encoding os-stream) + (values initial-encoding nil)) + (let ((osbuf (make-array speed :element-type '(unsigned-byte 8)))) + (replace osbuf preread) + (make-xstream/low + :buffer (let ((r (make-array speed :element-type 'buffer-byte))) + (setf (elt r 0) #xFFFF) + r) + :read-ptr 0 + :fill-ptr 0 + :os-buffer osbuf + :speed initial-speed + :os-stream os-stream + :os-left-start 0 + :os-left-end (length preread) + :encoding encoding + :name name)))) + +(defun make-rod-xstream (string &key name) + ;; XXX encoding is mis-handled by this kind of stream + (let ((n (length string))) + (let ((buffer (make-array (1+ n) :element-type 'buffer-byte))) + (declare (type (simple-array buffer-byte (*)) buffer)) + ;; copy the rod + (do ((i (1- n) (- i 1))) + ((< i 0)) + (declare (type fixnum i)) + (setf (aref buffer i) (rune-code (%rune string i)))) + (setf (aref buffer n) +end+) + ;; + (make-xstream/low :buffer buffer + :read-ptr 0 + :fill-ptr n + ;; :os-buffer nil + :speed 1 + :os-stream nil + :name name)))) + +(defmethod figure-encoding ((stream null)) + (values :utf-8 nil)) + +(defmethod figure-encoding ((stream stream)) + (let ((c0 (read-byte stream nil :eof))) + (cond ((eq c0 :eof) + (values :utf-8 nil)) + (t + (let ((c1 (read-byte stream nil :eof))) + (cond ((eq c1 :eof) + (values :utf-8 (list c0))) + (t + (cond ((and (= c0 #xFE) (= c1 #xFF)) (values :utf-16-big-endian nil)) + ((and (= c0 #xFF) (= c1 #xFE)) (values :utf-16-little-endian nil)) + (t + (values :utf-8 (list c0 c1))))))))))) + +;;; misc + +(defun close-xstream (input) + (xstream/close (xstream-os-stream input))) + +(defun set-to-full-speed (xstream) + (setf (xstream-speed xstream) (length (xstream-os-buffer xstream)))) + +;;; controller implementations + +(defmethod read-octets (sequence (stream stream) start end) + (#+CLISP lisp:read-byte-sequence + #-CLISP read-sequence + sequence stream :start start :end end)) + +(defmethod read-octets (sequence (stream null) start end) + (declare (ignore sequence start end)) + 0) + +(defmethod xstream/close ((stream stream)) + (close stream)) + +(defmethod xstream/close ((stream null)) + nil) diff --git a/test/domtest.lisp b/test/domtest.lisp new file mode 100644 index 0000000..596fd3a --- /dev/null +++ b/test/domtest.lisp @@ -0,0 +1,634 @@ +(defpackage :domtest + (:use :cl :xml) + (:export #:run-all-tests)) +(defpackage :domtest-tests + (:use)) +(in-package :domtest) + + +;;;; allgemeine Hilfsfunktionen + +(defmacro string-case (keyform &rest clauses) + (let ((key (gensym "key"))) + `(let ((,key ,keyform)) + (declare (ignorable ,key)) + (cond + ,@(loop + for (keys . forms) in clauses + for test = (etypecase keys + (string `(string= ,key ,keys)) + (sequence `(find ,key ',keys :test 'string=)) + ((eql t) t)) + collect + `(,test ,@forms)))))) + +(defun rcurry (function &rest args) + (lambda (&rest more-args) + (apply function (append more-args args)))) + +(defmacro for ((&rest clauses) &rest body-forms) + `(%for ,clauses (progn ,@body-forms))) + +(defmacro for* ((&rest clauses) &rest body-forms) + `(%for* ,clauses (progn ,@body-forms))) + +(defmacro %for ((&rest clauses) body-form &rest finally-forms) + (for-aux 'for clauses body-form finally-forms)) + +(defmacro %for* ((&rest clauses) body-form &rest finally-forms) + (for-aux 'for* clauses body-form finally-forms)) + +(defmacro for-finish () + '(loop-finish)) + +(eval-when (:compile-toplevel :load-toplevel :execute) + (defun for-aux (kind clauses body-form finally-forms) + ` (loop ,@ (loop for firstp = t then nil + for %clauses = clauses then (rest %clauses) + for clause = (first %clauses) then (first %clauses) + while (and %clauses (listp clause)) + append (cons (ecase kind + (for (if firstp 'as 'and)) + (for* 'as)) + (if (= 2 (length clause)) + (list (first clause) '= (second clause)) + clause)) + into result + finally (return (append result %clauses))) + do (progn ,body-form) + finally (progn ,@finally-forms)))) + + +;;;; spezielle Hilfsfunktionen + +(defun tag-name (elt) + (runes:rod-string (dom:tag-name elt))) + +(defmacro with-attributes ((&rest attributes) element &body body) + (let ((e (gensym "element"))) + `(let* ((,e ,element) + ,@(mapcar (lambda (var) + `(,var (dom:get-attribute ,e ,(symbol-name var)))) + attributes)) + ,@body))) + +(defun map-child-elements (result-type fn element &key name) + (remove '#1=#:void + (map result-type + (lambda (node) + (if (and (eq (dom:node-type node) :element) + (or (null name) + (equal (tag-name node) name))) + (funcall fn node) + '#1#)) + (dom:child-nodes element)))) + +(defmacro do-child-elements ((var element &key name) &body body) + `(block nil + (map-child-elements nil (lambda (,var) ,@body) ,element :name ,name))) + +(defun find-child-element (name element) + (do-child-elements (child element :name name) + (return child))) + +(defun %intern (name) + (unless (stringp name) + (setf name (runes:rod-string name))) + (if (zerop (length name)) + nil + (intern name :domtest-tests))) + +(defun replace-studly-caps (str) + (unless (stringp str) + (setf str (runes:rod-string str))) + ;; s/([A-Z][a-z])/-\1/ + (with-output-to-string (out) + (with-input-from-string (in str) + (for ((c = (read-char in nil nil)) + (previous = nil then c) + (next = (peek-char nil in nil nil)) + :while c) + (when (and previous + (upper-case-p c) next (lower-case-p next) + (not (lower-case-p previous))) + (write-char #\- out)) + (write-char (char-downcase c) out) + (when (and (lower-case-p c) next (upper-case-p next)) + (write-char #\- out)))))) + +(defun intern-dom (name) + (intern (replace-studly-caps name) :dom)) + +(defun child-elements (element) + (map-child-elements 'list #'identity element)) + +(defun parse-java-literal (str) + (unless (stringp str) + (setf str (runes:rod-string str))) + (cond + ((zerop (length str)) nil) + ((equal str "true") + t) + ((equal str "false") + nil) + ((digit-char-p (char str 0)) + (parse-integer str)) + ((char= (char str 0) #\") + (runes:rod + (with-output-to-string (out) + (with-input-from-string (in str) + (read-char in) + (for ((c = (read-char in)) + :until (char= c #\")) + (if (char= c #\\) + (ecase (read-char in) + ;; ... + (#\n (write-char #\newline out))) + (write-char c out))))))) + (t + (%intern str)))) + +(defun maybe-setf (place form) + (if place + `(setf ,place ,form) + form)) + +(defun nullify (str) + (if (zerop (length str)) nil str)) + + +;;;; dom1-interfaces.xml auslesen + +(defvar *methods* '()) +(defvar *fields* '()) + +(declaim (special *directory*)) + +(defun read-members () + (let* ((pathname (merge-pathnames "patches/dom1-interfaces.xml" *directory*)) + (builder (dom:make-dom-builder)) + (library (dom:document-element (xml:parse-file pathname builder))) + (methods '()) + (fields '())) + (do-child-elements (interface library :name "interface") + (do-child-elements (method interface :name "method") + (let ((parameters (find-child-element "parameters" method))) + (push (cons (dom:get-attribute method "name") + (map-child-elements 'list + (rcurry #'dom:get-attribute "name") + parameters + :name "param")) + methods))) + (do-child-elements (attribute interface :name "attribute") + (push (dom:get-attribute attribute "name") fields))) + (values methods fields))) + + +;;;; Conditions uebersetzen + +(defun translate-condition (element) + (string-case (tag-name element) + ("equals" (translate-equals element)) + ("contentType" (translate-content-type element)) + ("hasFeature" (translate-has-feature element)) + ("implementationAttribute" (assert-have-implementation-attribute element)) + ("isNull" (translate-is-null element)) + ("not" (translate-is-null element)) + ("notNull" (translate-not-null element)) + ("or" (translate-or element)) + ("same" (translate-same element)) + (t (error "unknown condition: ~A" element)))) + +(defun equalsp (a b test) + (when (typep a 'dom-impl::named-node-map) + (setf a (dom:items a))) + (when (typep b 'dom-impl::named-node-map) + (setf b (dom:items b))) + (if (and (typep a 'sequence) (typep b 'sequence)) + (null (set-exclusive-or (coerce a 'list) (coerce b 'list) :test test)) + (funcall test a b))) + +(defun %equal (a b) + (or (equal a b) (and (runes::rodp a) (runes::rodp b) (runes:rod= a b)))) + +(defun %equalp (a b) + (or (equalp a b) (and (runes::rodp a) (runes::rodp b) (runes:rod-equal a b)))) + +(defun translate-equals (element) + (with-attributes (|actual| |expected| |ignoreCase|) element + `(equalsp ,(%intern actual) + ,(parse-java-literal expected) + ',(if (parse-java-literal |ignoreCase|) '%equal '%equal)))) + +(defun translate-same (element) + (with-attributes (|actual| |expected|) element + `(eql ,(%intern actual) ,(parse-java-literal expected)))) + +(defun translate-or (element) + `(or ,@(map-child-elements 'list #'translate-condition element))) + +(defun translate-instance-of (element) + (with-attributes (|obj| |type|) element + `(eq (dom:node-type ,(%intern |obj|)) + ',(string-case (runes:rod-string |type|) + ("Document" :document) + ("DocumentFragment" :document-fragment) + ("Text" :text) + ("Comment" :comment) + ("CDATASection" :cdata-section) + ("Attr" :attribute) + ("Element" :element) + ("DocumentType" :document-type) + ("Notation" :notation) + ("Entity" :entity) + ("EntityReference" :entity-reference) + ("ProcessingInstruction" :processing-instruction) + (t (error "unknown interface: ~A" |type|)))))) + +(defun translate-is-null (element) + (with-attributes (|obj|) element + `(null ,(%intern |obj|)))) + +(defun translate-not-null (element) + (with-attributes (|obj|) element + (%intern |obj|))) + +(defun translate-content-type (element) ;XXX verstehe ich nicht + (with-attributes (|type|) element + `(equal ,|type| "text/xml"))) + +(defun translate-uri-equals (element) + (with-attributes + (|actual| + |scheme| |path| |host| |file| |name| |query| |fragment| |isAbsolute|) + element + |isAbsolute| + `(let ((uri (net.uri:parse-uri (runes:rod-string ,(%intern |actual|))))) + (flet ((uri-directory (path) + (namestring + (make-pathname :directory (pathname-directory path)))) + (uri-file (path) + (namestring (make-pathname :name (pathname-name path) + :type (pathname-type path)))) + (uri-name (path) + (pathname-name path)) + (maybe-equal (expected actual) + (if expected + (%equal (runes::rod expected) (runes::rod actual)) + t))) + (and (maybe-equal ,(parse-java-literal |scheme|) + (net.uri:uri-scheme uri)) + (maybe-equal ,(parse-java-literal |host|) + (net.uri:uri-host uri)) + (maybe-equal ,(parse-java-literal |path|) + (uri-directory (net.uri:uri-path uri))) + (maybe-equal ,(parse-java-literal |file|) + (uri-file (net.uri:uri-path uri))) + (maybe-equal ,(parse-java-literal |name|) + (uri-name (net.uri:uri-path uri))) + (maybe-equal ,(parse-java-literal |query|) + (net.uri:uri-query uri)) + (maybe-equal ,(parse-java-literal |fragment|) + (net.uri:uri-fragment uri))))))) + + +;;;; Statements uebersetzen + +(defun translate-statement (element) + (string-case (tag-name element) + ("append" (translate-append element)) + ("assertDOMException" (translate-assert-domexception element)) + ("assertEquals" (translate-assert-equals element)) + ("assertNotNull" (translate-assert-not-null element)) + ("assertInstanceOf" (translate-assert-instance-of element)) + ("assertNull" (translate-assert-null element)) + ("assertSame" (translate-assert-same element)) + ("assertSize" (translate-assert-size element)) + ("assertTrue" (translate-assert-true element)) + ("assertFalse" (translate-assert-false element)) + ("assertURIEquals" (translate-assert-uri-equals element)) + ("for-each" (translate-for-each element)) + ("fail" (translate-fail element)) + ("hasFeature" (translate-has-feature element)) + ("if" (translate-if element)) + ("increment" (translate-unary-assignment '+ element)) + ("decrement" (translate-unary-assignment '- element)) + ("length" (translate-length element)) + ("load" (translate-load element)) + ("nodeType" (translate-node-type element)) + ("plus" (translate-binary-assignment '+ element)) + ("try" (translate-try element)) + ("while" (translate-while element)) + (t (translate-member element)))) + +(defun translate-binary-assignment (fn element) + (with-attributes (|var| |op1| |op2|) element + (maybe-setf (%intern |var|) + `(,fn ,(parse-java-literal |op1|) + ,(parse-java-literal |op2|))))) + +(defun translate-unary-assignment (fn element) + (with-attributes (|var| |value|) element + (maybe-setf (%intern |var|) + `(,fn ,(%intern |var|) ,(parse-java-literal |value|))))) + +(defun translate-load (load) + (with-attributes (|var| |href| |willBeModified|) load + (maybe-setf (%intern |var|) + `(load-file ,|href| ,(parse-java-literal |willBeModified|))))) + +(defun translate-length (load) + ;; XXX Soweit ich sehe unterscheiden die Tests nicht zwischen + ;; der Laenge von DOMString und der length()-Methode der uebrigen + ;; Interfaces. Also unterscheiden wir das erstmal manuell. + (with-attributes (|var| |obj|) load + (let ((obj (%intern |obj|))) + (maybe-setf (%intern |var|) + `(if (typep ,obj 'sequence) + (length ,obj) + (dom:length ,obj)))))) + +(defun translate-call (call method) + (let ((name (car method)) + (args (mapcar (lambda (name) + (parse-java-literal (dom:get-attribute call name))) + (cdr method)))) + (with-attributes (|var| |obj|) call + (maybe-setf (%intern |var|) + `(,(intern-dom name) ,(%intern |obj|) ,@args))))) + +(defun translate-get (call name) + (with-attributes (|var| |value| |obj|) call + (cond + ((nullify |var|) ;get + (maybe-setf (%intern |var|) `(,(intern-dom name) ,(%intern |obj|)))) + ((nullify |value|) ;set + `(setf (,(intern-dom name) ,(%intern |obj|)) + ,(parse-java-literal |value|))) + (t + (error "oops"))))) + +(defun translate-has-feature (element) + (with-attributes (|var| |feature| |version|) element + (maybe-setf (%intern |var|) + `(and (runes:rod-equal ,(parse-java-literal |feature|) #"XML") + (or (zerop (length ,(parse-java-literal |version|))) + (runes:rod-equal ,(parse-java-literal |version|) #"1.0")))))) + +(defun translate-fail (element) + (declare (ignore element)) + `(error "failed")) + +(defun translate-node-type (element) + ;; XXX Das muessten eigentlich ints sein, sind aber Keywords in CXML. + (with-attributes (|var| |obj|) element + (maybe-setf (%intern |var|) + `(ecase (dom:node-type ,(%intern |obj|)) + (:element 1) + (:attribute 2) + (:text 3) + (:cdata-section 4) + (:entity-reference 5) + (:entity 6) + (:processing-instruction 7) + (:comment 8) + (:document 9) + (:document-type 10) + (:document-fragment 11) + (:notation 12))))) + +(defun translate-member (element) + (let* ((name (dom:tag-name element)) + (method (find name *methods* :key #'car :test #'runes:rod=)) + (field (find name *fields* :test #'runes:rod=))) + (cond + (method (translate-call element method)) + (field (translate-get element field)) + (t (error "unknown element ~A" element))))) + +(defun translate-assert-equals (element) + `(assert ,(translate-equals element))) + +(defun translate-assert-same (element) + `(assert ,(translate-same element))) + +(defun translate-assert-null (element) + (with-attributes (|actual|) element + `(assert (null ,(%intern |actual|))))) + +(defun translate-assert-not-null (element) + (with-attributes (|actual|) element + `(assert ,(%intern |actual|)))) + +(defun translate-assert-size (element) + (with-attributes (|collection| |size|) element + `(let ((collection ,(%intern |collection|))) + (when (typep collection 'dom-impl::named-node-map) + (setf collection (dom:items collection))) + (assert (eql (length collection) ,(parse-java-literal |size|)))))) + +(defun translate-assert-instance-of (element) + `(assert ,(translate-instance-of element))) + +(defun translate-if (element) + (destructuring-bind (condition &rest rest) + (child-elements element) + (let (then else) + (dolist (r rest) + (when (equal (tag-name r) "else") + (setf else (child-elements r)) + (return)) + (push r then)) + `(cond + (,(translate-condition condition) + ,@(mapcar #'translate-statement (reverse then))) + (t + ,@(mapcar #'translate-statement else)))))) + +(defun translate-while (element) + (destructuring-bind (condition &rest body) + (child-elements element) + `(loop + while ,(translate-condition condition) + do (progn ,@(mapcar #'translate-statement body))))) + +(defun translate-assert-domexception (element) + (do-child-elements (c element) + (unless (equal (tag-name c) "metadata") + (return + `(block assert-domexception + (handler-bind + ((dom-impl::dom-exception + (lambda (c) + (when (eq (dom-impl::dom-exception-key c) + ,(intern (tag-name c) :keyword)) + (return-from assert-domexception))))) + ,@(translate-body c) + (error "expected exception ~A" ,(tag-name c)))))))) + +(defun translate-catch (catch return) + `(lambda (c) + ,@(map-child-elements + 'list + (lambda (exception) + `(when (eq (dom-impl::dom-exception-key c) + ,(intern (runes:rod-string (dom:get-attribute exception "code")) + :keyword)) + ,@(translate-body exception) + ,return)) + catch))) + +(defun translate-try (element) + `(block try + (handler-bind + ((dom-impl::dom-exception + ,(translate-catch + (do-child-elements (c element :name "catch") (return c)) + '(return-from try)))) + ,@(map-child-elements 'list + (lambda (c) + (if (equal (tag-name c) "catch") + nil + (translate-statement c))) + element)))) + +(defun translate-append (element) + (with-attributes (|collection| |item|) element + (let ((c (%intern |collection|)) + (i (%intern |item|))) + (maybe-setf c `(append ,c (list ,i)))))) + +(defun translate-assert-true (element) + (with-attributes (|actual|) element + `(assert ,(if (nullify |actual|) + (%intern |actual|) + (translate-condition + (do-child-elements (c element) (return c))))))) + +(defun translate-assert-false (element) + (with-attributes (|actual|) element + `(assert (not ,(%intern |actual|))))) + +(defun translate-assert-uri-equals (element) + `(assert ,(translate-uri-equals element))) + + +;;;; Tests uebersetzen + +(defun translate-body (element) + (map-child-elements 'list #'translate-statement element)) + +(defun translate-for-each (element) + (with-attributes (|collection| |member|) element + `(let ((collection ,(%intern |collection|))) + (when (typep collection 'dom-impl::named-node-map) + (setf collection (dom:items collection))) + (map nil (lambda (,(%intern |member|)) ,@(translate-body element)) + collection)))) + +(defun assert-have-implementation-attribute (element) + (let ((attribute (runes:rod-string (dom:get-attribute element "name")))) + (string-case attribute + (t + (format t "~&implementationAttribute ~A not supported, skipping test~%" + attribute) + (throw 'give-up nil))))) + +(defun slurp-test (pathname) + (unless *fields* + (multiple-value-setq (*methods* *fields*) (read-members *directory*))) + (catch 'give-up + (let* ((builder (dom:make-dom-builder)) + (test (dom:document-element (xml:parse-file pathname builder))) + title + (bindings '()) + (code '())) + (declare (ignore title)) + (do-child-elements (e test) + (string-case (tag-name e) + ("metadata" + (let ((title-element (find-child-element "title" e))) + (setf title (dom:data (dom:first-child title-element))))) + ("var" + (push (list (%intern (dom:get-attribute e "name")) + (string-case (runes:rod-string + (dom:get-attribute e "type")) + (("byte" "short" "int" "long") 0) + (t nil))) + bindings) + (do-child-elements (member e :name "member") e + (push `(setf ,(%intern (dom:get-attribute e "name")) + (append ,(%intern (dom:get-attribute e "name")) + (list + ,(parse-java-literal + (dom:data + (dom:item + (dom:child-nodes member) + 0)))))) + code))) + ("implementationAttribute" + (assert-have-implementation-attribute e)) + (t + (push (translate-statement e) code)))) + `(lambda () + (let (,@bindings) + (declare (ignorable ,@(mapcar #'car bindings))) + ,@(reverse code)))))) + +(defun load-file (name &optional will-be-modified-p) + (declare (ignore will-be-modified-p)) + (setf name (runes:rod-string name)) + (let* ((directory (merge-pathnames "tests/level1/core/files/" *directory*)) + (document + (xml:parse-file + (make-pathname :name name :type "xml" :defaults directory) + (dom:make-dom-builder)))) + document)) + +(defparameter *bad-tests* + '("hc_elementnormalize2.xml" "hc_nodereplacechildnewchildexists.xml")) + +(defun run-all-tests (*directory* &optional verbose) + (let* ((xml::*redefinition-warning* nil) + (test-directory (merge-pathnames "tests/level1/core/" *directory*)) + (all-tests (merge-pathnames "alltests.xml" test-directory)) + (builder (dom:make-dom-builder)) + (suite (dom:document-element (xml:parse-file all-tests builder))) + (n 0) + (i 0) + (ntried 0) + (nfailed 0)) + (do-child-elements (member suite) + (unless + (member (runes:rod-string (dom:get-attribute member "href")) + *bad-tests* + :test 'equal) + (incf n))) + (do-child-elements (member suite) + (let ((href (runes:rod-string (dom:get-attribute member "href")))) + (unless (member href *bad-tests* :test 'equal) + (format t "~&~D/~D ~A~%" i n href) + (let ((lisp (slurp-test (merge-pathnames href test-directory)))) + (when verbose + (print lisp)) + (when lisp + (incf ntried) + (with-simple-restart (skip-test "Skip this test") + (handler-case + (funcall (compile nil lisp)) + (serious-condition (c) + (incf nfailed) + (warn "test failed: ~A" c)))))) + (incf i)))) + (format t "~&~D/~D tests failed; ~D test~:P were skipped" + nfailed ntried (- n ntried)))) + +(defun run-test (*directory* href) + (let* ((test-directory (merge-pathnames "tests/level1/core/" *directory*)) + (lisp (slurp-test (merge-pathnames href test-directory)))) + (print lisp) + (when lisp + (funcall (compile nil lisp))))) + +#+(or) +(run-all-tests "~/src/2001/DOM-Test-Suite/") diff --git a/test/xmlconf-base.diff b/test/xmlconf-base.diff new file mode 100644 index 0000000..fb0d478 --- /dev/null +++ b/test/xmlconf-base.diff @@ -0,0 +1,53 @@ +A recent check-in to the XML-Test-Suite's metadata has broken my parser for +xmlconf.xml. Apply this patch to revert it. + +Index: oasis/oasis.xml +=================================================================== +RCS file: /sources/public/2001/XML-Test-Suite/xmlconf/oasis/oasis.xml,v +retrieving revision 1.5 +retrieving revision 1.6 +diff -u -r1.5 -r1.6 +--- oasis/oasis.xml 16 May 2002 14:46:32 -0000 1.5 ++++ oasis/oasis.xml 4 Mar 2004 18:23:37 -0000 1.6 +@@ -1,6 +1,6 @@ + + +- ++ + + +Index: xmltest/xmltest.xml +=================================================================== +RCS file: /sources/public/2001/XML-Test-Suite/xmlconf/xmltest/xmltest.xml,v +retrieving revision 1.9 +retrieving revision 1.10 +diff -u -r1.9 -r1.10 +--- xmltest/xmltest.xml 21 May 2002 19:05:57 -0000 1.9 ++++ xmltest/xmltest.xml 4 Mar 2004 18:25:11 -0000 1.10 +@@ -5,7 +5,7 @@ + All Rights Reserved. + --> + +- ++ + + + + +- ++ + + diff --git a/test/xmlconf.lisp b/test/xmlconf.lisp new file mode 100644 index 0000000..7f7c752 --- /dev/null +++ b/test/xmlconf.lisp @@ -0,0 +1,104 @@ +(defpackage xmlconf + (:use :cl :runes) + (:export #:run-all-tests)) +(in-package :xmlconf) + +(defun get-attribute (element name) + (rod-string (dom:get-attribute element name))) + +(defun relevant-test-p (test) + (and (equal (get-attribute test "TYPE") "valid") + (let ((version (get-attribute test "RECOMMENDATION"))) + (cond + ((or (equal version "") ;XXX + (equal version "XML1.0")) + (cond + ((equal (get-attribute test "NAMESPACE") "no") + (format t "~A: test applies to parsers without namespace support, skipping~%" + (get-attribute test "URI")) + nil) + (t + t))) + ((equal version "XML1.1") + ;; not supported + nil) + (t + (warn "unrecognized RECOMMENDATION value: ~S" version) + nil))))) + +(defun test-pathnames (directory test) + (let* ((sub-directory + (loop + for parent = test then (dom:parent-node parent) + for base = (get-attribute parent "xml:base") + until (plusp (length base)) + finally (return (merge-pathnames base directory)))) + (uri (get-attribute test "URI")) + (output (get-attribute test "OUTPUT"))) + (values (merge-pathnames uri sub-directory) + (when (plusp (length output)) + (merge-pathnames output sub-directory))))) + +(defun serialize-document (document) + (map 'vector #'char-code + (with-output-to-string (s) + (xml:unparse-document document s)))) + +(defun file-contents (pathname) + (with-open-file (s pathname) + (let ((result + (make-array (file-length s) :element-type '(unsigned-byte 8)))) + (read-sequence result s ) + result))) + +(defun run-all-tests (directory) + (let* ((pathname (merge-pathnames "xmlconf.xml" directory)) + (builder (dom:make-dom-builder)) + (xmlconf (xml:parse-file pathname builder)) + (ntried 0) + (nfailed 0) + (nskipped 0)) + (dom:do-node-list (test (dom:get-elements-by-tag-name xmlconf "TEST")) + (cond + ((relevant-test-p test) + (incf ntried) + (multiple-value-bind (pathname output) + (test-pathnames directory test) + (princ pathname) + (unless (probe-file pathname) + (error "file not found: ~A" pathname)) + (with-simple-restart (skip-test "Skip this test") + (handler-case + (progn + (mp:with-timeout (60) + (let ((document + (xml:parse-file pathname (dom:make-dom-builder)))) + (cond + ((null output) + (format t " ok (output not checked)~%")) + ((equalp (file-contents output) + (serialize-document document)) + (format t " ok~%")) + (t + (let ((error-output + (make-pathname :type "error" :defaults output))) + (with-open-file (s error-output + :direction :output + :if-exists :supersede) + (write-sequence (serialize-document document) s)) + (error "well-formed, but output ~S not the expected ~S~%" + error-output output))))))) + ((and serious-condition (not excl:interrupt-signal)) (c) + (incf nfailed) + (format t " FAILED:~% ~A~%[~A]~%" + c + (rod-string + (dom:data + (dom:item (dom:child-nodes test) 0))))))))) + (t + (incf nskipped)))) + (format t "~&~D/~D tests failed; ~D test~:P were skipped" + nfailed ntried nskipped))) + +#+(or) +(xmlconf::run-all-tests "/mnt/debian/space/xmlconf/") diff --git a/xml/COPYING b/xml/COPYING new file mode 100644 index 0000000..5615459 --- /dev/null +++ b/xml/COPYING @@ -0,0 +1,459 @@ + GNU LESSER GENERAL PUBLIC LICENSE + Version 2.1, February 1999 + + Copyright (C) 1991, 1999 Free Software Foundation, Inc. + 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + +[This is the first released version of the Lesser GPL. It also counts + as the successor of the GNU Library Public License, version 2, hence + the version number 2.1.] + + Preamble + + The licenses for most software are designed to take away your +freedom to share and change it. By contrast, the GNU General Public +Licenses are intended to guarantee your freedom to share and change +free software--to make sure the software is free for all its users. + + This license, the Lesser General Public License, applies to some +specially designated software packages--typically libraries--of the +Free Software Foundation and other authors who decide to use it. You +can use it too, but we suggest you first think carefully about whether +this license or the ordinary General Public License is the better +strategy to use in any particular case, based on the explanations below. + + When we speak of free software, we are referring to freedom of use, +not price. Our General Public Licenses are designed to make sure that +you have the freedom to distribute copies of free software (and charge +for this service if you wish); that you receive source code or can get +it if you want it; that you can change the software and use pieces of +it in new free programs; and that you are informed that you can do +these things. + + To protect your rights, we need to make restrictions that forbid +distributors to deny you these rights or to ask you to surrender these +rights. These restrictions translate to certain responsibilities for +you if you distribute copies of the library or if you modify it. + + For example, if you distribute copies of the library, whether gratis +or for a fee, you must give the recipients all the rights that we gave +you. You must make sure that they, too, receive or can get the source +code. If you link other code with the library, you must provide +complete object files to the recipients, so that they can relink them +with the library after making changes to the library and recompiling +it. And you must show them these terms so they know their rights. + + We protect your rights with a two-step method: (1) we copyright the +library, and (2) we offer you this license, which gives you legal +permission to copy, distribute and/or modify the library. + + To protect each distributor, we want to make it very clear that +there is no warranty for the free library. Also, if the library is +modified by someone else and passed on, the recipients should know +that what they have is not the original version, so that the original +author's reputation will not be affected by problems that might be +introduced by others. + + Finally, software patents pose a constant threat to the existence of +any free program. We wish to make sure that a company cannot +effectively restrict the users of a free program by obtaining a +restrictive license from a patent holder. Therefore, we insist that +any patent license obtained for a version of the library must be +consistent with the full freedom of use specified in this license. + + Most GNU software, including some libraries, is covered by the +ordinary GNU General Public License. This license, the GNU Lesser +General Public License, applies to certain designated libraries, and +is quite different from the ordinary General Public License. We use +this license for certain libraries in order to permit linking those +libraries into non-free programs. + + When a program is linked with a library, whether statically or using +a shared library, the combination of the two is legally speaking a +combined work, a derivative of the original library. The ordinary +General Public License therefore permits such linking only if the +entire combination fits its criteria of freedom. The Lesser General +Public License permits more lax criteria for linking other code with +the library. + + We call this license the "Lesser" General Public License because it +does Less to protect the user's freedom than the ordinary General +Public License. It also provides other free software developers Less +of an advantage over competing non-free programs. These disadvantages +are the reason we use the ordinary General Public License for many +libraries. However, the Lesser license provides advantages in certain +special circumstances. + + For example, on rare occasions, there may be a special need to +encourage the widest possible use of a certain library, so that it becomes +a de-facto standard. To achieve this, non-free programs must be +allowed to use the library. A more frequent case is that a free +library does the same job as widely used non-free libraries. In this +case, there is little to gain by limiting the free library to free +software only, so we use the Lesser General Public License. + + In other cases, permission to use a particular library in non-free +programs enables a greater number of people to use a large body of +free software. For example, permission to use the GNU C Library in +non-free programs enables many more people to use the whole GNU +operating system, as well as its variant, the GNU/Linux operating +system. + + Although the Lesser General Public License is Less protective of the +users' freedom, it does ensure that the user of a program that is +linked with the Library has the freedom and the wherewithal to run +that program using a modified version of the Library. + + The precise terms and conditions for copying, distribution and +modification follow. Pay close attention to the difference between a +"work based on the library" and a "work that uses the library". The +former contains code derived from the library, whereas the latter must +be combined with the library in order to run. + + GNU LESSER GENERAL PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. This License Agreement applies to any software library or other +program which contains a notice placed by the copyright holder or +other authorized party saying it may be distributed under the terms of +this Lesser General Public License (also called "this License"). +Each licensee is addressed as "you". + + A "library" means a collection of software functions and/or data +prepared so as to be conveniently linked with application programs +(which use some of those functions and data) to form executables. + + The "Library", below, refers to any such software library or work +which has been distributed under these terms. A "work based on the +Library" means either the Library or any derivative work under +copyright law: that is to say, a work containing the Library or a +portion of it, either verbatim or with modifications and/or translated +straightforwardly into another language. (Hereinafter, translation is +included without limitation in the term "modification".) + + "Source code" for a work means the preferred form of the work for +making modifications to it. For a library, complete source code means +all the source code for all modules it contains, plus any associated +interface definition files, plus the scripts used to control compilation +and installation of the library. + + Activities other than copying, distribution and modification are not +covered by this License; they are outside its scope. The act of +running a program using the Library is not restricted, and output from +such a program is covered only if its contents constitute a work based +on the Library (independent of the use of the Library in a tool for +writing it). Whether that is true depends on what the Library does +and what the program that uses the Library does. + + 1. You may copy and distribute verbatim copies of the Library's +complete source code as you receive it, in any medium, provided that +you conspicuously and appropriately publish on each copy an +appropriate copyright notice and disclaimer of warranty; keep intact +all the notices that refer to this License and to the absence of any +warranty; and distribute a copy of this License along with the +Library. + + You may charge a fee for the physical act of transferring a copy, +and you may at your option offer warranty protection in exchange for a +fee. + + 2. You may modify your copy or copies of the Library or any portion +of it, thus forming a work based on the Library, and copy and +distribute such modifications or work under the terms of Section 1 +above, provided that you also meet all of these conditions: + + a) The modified work must itself be a software library. + + b) You must cause the files modified to carry prominent notices + stating that you changed the files and the date of any change. + + c) You must cause the whole of the work to be licensed at no + charge to all third parties under the terms of this License. + + d) If a facility in the modified Library refers to a function or a + table of data to be supplied by an application program that uses + the facility, other than as an argument passed when the facility + is invoked, then you must make a good faith effort to ensure that, + in the event an application does not supply such function or + table, the facility still operates, and performs whatever part of + its purpose remains meaningful. + + (For example, a function in a library to compute square roots has + a purpose that is entirely well-defined independent of the + application. Therefore, Subsection 2d requires that any + application-supplied function or table used by this function must + be optional: if the application does not supply it, the square + root function must still compute square roots.) + +These requirements apply to the modified work as a whole. If +identifiable sections of that work are not derived from the Library, +and can be reasonably considered independent and separate works in +themselves, then this License, and its terms, do not apply to those +sections when you distribute them as separate works. But when you +distribute the same sections as part of a whole which is a work based +on the Library, the distribution of the whole must be on the terms of +this License, whose permissions for other licensees extend to the +entire whole, and thus to each and every part regardless of who wrote +it. + +Thus, it is not the intent of this section to claim rights or contest +your rights to work written entirely by you; rather, the intent is to +exercise the right to control the distribution of derivative or +collective works based on the Library. + +In addition, mere aggregation of another work not based on the Library +with the Library (or with a work based on the Library) on a volume of +a storage or distribution medium does not bring the other work under +the scope of this License. + + 3. You may opt to apply the terms of the ordinary GNU General Public +License instead of this License to a given copy of the Library. To do +this, you must alter all the notices that refer to this License, so +that they refer to the ordinary GNU General Public License, version 2, +instead of to this License. (If a newer version than version 2 of the +ordinary GNU General Public License has appeared, then you can specify +that version instead if you wish.) Do not make any other change in +these notices. + + Once this change is made in a given copy, it is irreversible for +that copy, so the ordinary GNU General Public License applies to all +subsequent copies and derivative works made from that copy. + + This option is useful when you wish to copy part of the code of +the Library into a program that is not a library. + + 4. You may copy and distribute the Library (or a portion or +derivative of it, under Section 2) in object code or executable form +under the terms of Sections 1 and 2 above provided that you accompany +it with the complete corresponding machine-readable source code, which +must be distributed under the terms of Sections 1 and 2 above on a +medium customarily used for software interchange. + + If distribution of object code is made by offering access to copy +from a designated place, then offering equivalent access to copy the +source code from the same place satisfies the requirement to +distribute the source code, even though third parties are not +compelled to copy the source along with the object code. + + 5. A program that contains no derivative of any portion of the +Library, but is designed to work with the Library by being compiled or +linked with it, is called a "work that uses the Library". Such a +work, in isolation, is not a derivative work of the Library, and +therefore falls outside the scope of this License. + + However, linking a "work that uses the Library" with the Library +creates an executable that is a derivative of the Library (because it +contains portions of the Library), rather than a "work that uses the +library". The executable is therefore covered by this License. +Section 6 states terms for distribution of such executables. + + When a "work that uses the Library" uses material from a header file +that is part of the Library, the object code for the work may be a +derivative work of the Library even though the source code is not. +Whether this is true is especially significant if the work can be +linked without the Library, or if the work is itself a library. The +threshold for this to be true is not precisely defined by law. + + If such an object file uses only numerical parameters, data +structure layouts and accessors, and small macros and small inline +functions (ten lines or less in length), then the use of the object +file is unrestricted, regardless of whether it is legally a derivative +work. (Executables containing this object code plus portions of the +Library will still fall under Section 6.) + + Otherwise, if the work is a derivative of the Library, you may +distribute the object code for the work under the terms of Section 6. +Any executables containing that work also fall under Section 6, +whether or not they are linked directly with the Library itself. + + 6. As an exception to the Sections above, you may also combine or +link a "work that uses the Library" with the Library to produce a +work containing portions of the Library, and distribute that work +under terms of your choice, provided that the terms permit +modification of the work for the customer's own use and reverse +engineering for debugging such modifications. + + You must give prominent notice with each copy of the work that the +Library is used in it and that the Library and its use are covered by +this License. You must supply a copy of this License. If the work +during execution displays copyright notices, you must include the +copyright notice for the Library among them, as well as a reference +directing the user to the copy of this License. Also, you must do one +of these things: + + a) Accompany the work with the complete corresponding + machine-readable source code for the Library including whatever + changes were used in the work (which must be distributed under + Sections 1 and 2 above); and, if the work is an executable linked + with the Library, with the complete machine-readable "work that + uses the Library", as object code and/or source code, so that the + user can modify the Library and then relink to produce a modified + executable containing the modified Library. (It is understood + that the user who changes the contents of definitions files in the + Library will not necessarily be able to recompile the application + to use the modified definitions.) + + b) Use a suitable shared library mechanism for linking with the + Library. A suitable mechanism is one that (1) uses at run time a + copy of the library already present on the user's computer system, + rather than copying library functions into the executable, and (2) + will operate properly with a modified version of the library, if + the user installs one, as long as the modified version is + interface-compatible with the version that the work was made with. + + c) Accompany the work with a written offer, valid for at + least three years, to give the same user the materials + specified in Subsection 6a, above, for a charge no more + than the cost of performing this distribution. + + d) If distribution of the work is made by offering access to copy + from a designated place, offer equivalent access to copy the above + specified materials from the same place. + + e) Verify that the user has already received a copy of these + materials or that you have already sent this user a copy. + + For an executable, the required form of the "work that uses the +Library" must include any data and utility programs needed for +reproducing the executable from it. However, as a special exception, +the materials to be distributed need not include anything that is +normally distributed (in either source or binary form) with the major +components (compiler, kernel, and so on) of the operating system on +which the executable runs, unless that component itself accompanies +the executable. + + It may happen that this requirement contradicts the license +restrictions of other proprietary libraries that do not normally +accompany the operating system. Such a contradiction means you cannot +use both them and the Library together in an executable that you +distribute. + + 7. You may place library facilities that are a work based on the +Library side-by-side in a single library together with other library +facilities not covered by this License, and distribute such a combined +library, provided that the separate distribution of the work based on +the Library and of the other library facilities is otherwise +permitted, and provided that you do these two things: + + a) Accompany the combined library with a copy of the same work + based on the Library, uncombined with any other library + facilities. This must be distributed under the terms of the + Sections above. + + b) Give prominent notice with the combined library of the fact + that part of it is a work based on the Library, and explaining + where to find the accompanying uncombined form of the same work. + + 8. You may not copy, modify, sublicense, link with, or distribute +the Library except as expressly provided under this License. Any +attempt otherwise to copy, modify, sublicense, link with, or +distribute the Library is void, and will automatically terminate your +rights under this License. However, parties who have received copies, +or rights, from you under this License will not have their licenses +terminated so long as such parties remain in full compliance. + + 9. You are not required to accept this License, since you have not +signed it. However, nothing else grants you permission to modify or +distribute the Library or its derivative works. These actions are +prohibited by law if you do not accept this License. Therefore, by +modifying or distributing the Library (or any work based on the +Library), you indicate your acceptance of this License to do so, and +all its terms and conditions for copying, distributing or modifying +the Library or works based on it. + + 10. Each time you redistribute the Library (or any work based on the +Library), the recipient automatically receives a license from the +original licensor to copy, distribute, link with or modify the Library +subject to these terms and conditions. You may not impose any further +restrictions on the recipients' exercise of the rights granted herein. +You are not responsible for enforcing compliance by third parties with +this License. + + 11. If, as a consequence of a court judgment or allegation of patent +infringement or for any other reason (not limited to patent issues), +conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot +distribute so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you +may not distribute the Library at all. For example, if a patent +license would not permit royalty-free redistribution of the Library by +all those who receive copies directly or indirectly through you, then +the only way you could satisfy both it and this License would be to +refrain entirely from distribution of the Library. + +If any portion of this section is held invalid or unenforceable under any +particular circumstance, the balance of the section is intended to apply, +and the section as a whole is intended to apply in other circumstances. + +It is not the purpose of this section to induce you to infringe any +patents or other property right claims or to contest validity of any +such claims; this section has the sole purpose of protecting the +integrity of the free software distribution system which is +implemented by public license practices. Many people have made +generous contributions to the wide range of software distributed +through that system in reliance on consistent application of that +system; it is up to the author/donor to decide if he or she is willing +to distribute software through any other system and a licensee cannot +impose that choice. + +This section is intended to make thoroughly clear what is believed to +be a consequence of the rest of this License. + + 12. If the distribution and/or use of the Library is restricted in +certain countries either by patents or by copyrighted interfaces, the +original copyright holder who places the Library under this License may add +an explicit geographical distribution limitation excluding those countries, +so that distribution is permitted only in or among countries not thus +excluded. In such case, this License incorporates the limitation as if +written in the body of this License. + + 13. The Free Software Foundation may publish revised and/or new +versions of the Lesser General Public License from time to time. +Such new versions will be similar in spirit to the present version, +but may differ in detail to address new problems or concerns. + +Each version is given a distinguishing version number. If the Library +specifies a version number of this License which applies to it and +"any later version", you have the option of following the terms and +conditions either of that version or of any later version published by +the Free Software Foundation. If the Library does not specify a +license version number, you may choose any version ever published by +the Free Software Foundation. + + 14. If you wish to incorporate parts of the Library into other free +programs whose distribution conditions are incompatible with these, +write to the author to ask for permission. For software which is +copyrighted by the Free Software Foundation, write to the Free +Software Foundation; we sometimes make exceptions for this. Our +decision will be guided by the two goals of preserving the free status +of all derivatives of our free software and of promoting the sharing +and reuse of software generally. + + NO WARRANTY + + 15. BECAUSE THE LIBRARY IS LICENSED FREE OF CHARGE, THERE IS NO +WARRANTY FOR THE LIBRARY, TO THE EXTENT PERMITTED BY APPLICABLE LAW. +EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR +OTHER PARTIES PROVIDE THE LIBRARY "AS IS" WITHOUT WARRANTY OF ANY +KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE +LIBRARY IS WITH YOU. SHOULD THE LIBRARY PROVE DEFECTIVE, YOU ASSUME +THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. + + 16. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN +WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY +AND/OR REDISTRIBUTE THE LIBRARY AS PERMITTED ABOVE, BE LIABLE TO YOU +FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR +CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE +LIBRARY (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING +RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A +FAILURE OF THE LIBRARY TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF +SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH +DAMAGES. + + END OF TERMS AND CONDITIONS + diff --git a/xml/catalog.lisp b/xml/catalog.lisp new file mode 100644 index 0000000..3c752e8 --- /dev/null +++ b/xml/catalog.lisp @@ -0,0 +1,161 @@ +;;;; catalogs.lisp -- XML Catalogs -*- Mode: Lisp; readtable: runes -*- +;;;; +;;;; This file is part of the CXML parser, released under (L)LGPL. +;;;; See file COPYING for details. +;;;; +;;;; Developed 2004 for headcraft - http://headcraft.de/ +;;;; Copyright: David Lichteblau + +(in-package :cxml) + +;;; http://www.oasis-open.org/committees/entity/spec.html +;;; +;;; Bugs: +;;; - We validate using the Catalog DTD while parsing, which is too strict +;;; and will will fail to parse files using other parser's extensions. +;;; (Jedenfalls behauptet das die Spec.) +;;; A long-term solution might be an XML Schema validator. +;;; +;;; XXX Das mit dem :dtd geht natuerlich gar nicht. Die Option muss weg. + +(defvar *prefer* nil) +(defvar *catalog* + '(;; libxml standard + "/etc/xml/catalog" + ;; FreeBSD + "/usr/local/share/xml/catalog.ports")) + +(defparameter *catalog-dtd* nil) + +(defun parse-catalog (files) + (let ((result '())) + (loop + (let ((file (pop files))) + (unless file + (return)) + (multiple-value-bind (entries next) (parse-catalog-file file) + (setf result (append result entries)) + (setf files (append next files))))) + result)) + +(defun parse-catalog-file (uri) + (handler-case + (parse-catalog-file/strict uri) + (file-error () nil) + (parser-error () nil))) + +(defun parse-catalog-file/strict (uri) + (when (stringp uri) + (setf uri (puri:parse-uri uri))) + (unless *catalog-dtd* + (let ((cxml + (slot-value (asdf:find-system :cxml) 'asdf::relative-pathname))) + (setf *catalog-dtd* + (parse-dtd-file (merge-pathnames "catalog.dtd" cxml))))) + (with-open-stream (s (open (uri-to-pathname uri) + :element-type '(unsigned-byte 8) + :direction :input)) + (parse-stream s + (make-instance 'catalog-parser :uri uri) + :validate t + ;; XXX das geht nicht + :dtd *catalog-dtd*))) + +(defclass catalog-parser () + ((entries :initform '() :accessor entries) + (next :initform '() :accessor next) + (prefer-stack :initform (list *prefer*) :accessor prefer-stack) + (base-stack :accessor base-stack))) + +(defmethod initialize-instance :after + ((instance catalog-parser) &key uri) + (setf (base-stack instance) (list uri))) + +(defmethod prefer ((handler catalog-parser)) + (car (prefer-stack handler))) + +(defmethod base ((handler catalog-parser)) + (car (base-stack handler))) + +(defun get-attribute/lname (name attributes) + (member name attributes + :key (lambda (a) + (or (sax:attribute-local-name a) + (sax:attribute-qname a))) + :test #'rod=)) + +(defmethod sax:start-element ((handler catalog-parser) uri lname qname attrs) + (declare (ignore uri)) + (setf lname (or lname qname)) + ;; we can dispatch on lnames only because we validate against the DTD, + ;; which disallows other namespaces. + (push (string-or (get-attribute/lname #"prefer" attrs) (prefer handler)) + (prefer-stack handler)) + (push (string-or (get-attribute/lname #"base" attrs) (base handler)) + (base-stack handler)) + (cond + ((rod= lname #"public") + (push (list :public + (get-attribute/lname #"publicId" attrs) + (puri:merge-uris + (puri:parse-uri (get-attribute/lname #"uri" attrs)) + (base handler))) + (entries handler))) + ((rod= lname #"system") + (push (list :system + (get-attribute/lname #"systemId" attrs) + (puri:merge-uris + (puri:parse-uri (get-attribute/lname #"uri" attrs)) + (base handler))) + (entries handler))) + ((rod= lname #"uri") + (push (list :uri + (get-attribute/lname #"name" attrs) + (puri:merge-uris + (puri:parse-uri (get-attribute/lname #"uri" attrs)) + (base handler))) + (entries handler))) + ((rod= lname #"rewriteSystem") + (push (list :rewrite-system + (get-attribute/lname #"systemIdStartString" attrs) + (get-attribute/lname #"rewritePrefix" attrs)) + (entries handler))) + ((rod= lname #"rewriteURI") + (push (list :rewrite-uri + (get-attribute/lname #"uriStartString" attrs) + (get-attribute/lname #"rewritePrefix" attrs)) + (entries handler))) + ((rod= lname #"delegatePublic") + (push (list :delegate-public + (get-attribute/lname #"publicIdStartString" attrs) + (puri:merge-uris + (puri:parse-uri (get-attribute/lname #"catalog" attrs)) + (base handler))) + (entries handler))) + ((rod= lname #"delegateSystem") + (push (list :delegate-system + (get-attribute/lname #"systemIdStartString" attrs) + (puri:merge-uris + (puri:parse-uri (get-attribute/lname #"catalog" attrs)) + (base handler))) + (entries handler))) + ((rod= lname #"delegateURI") + (push (list :delegate-uri + (get-attribute/lname #"uriStartString" attrs) + (puri:merge-uris + (puri:parse-uri (get-attribute/lname #"catalog" attrs)) + (base handler))) + (entries handler))) + ((rod= lname #"nextCatalog") + (push (puri:merge-uris + (puri:parse-uri (get-attribute/lname #"catalog" attrs)) + (base handler)) + (next handler))))) + +(defmethod sax:end-element ((handler catalog-parser) uri lname qname) + (declare (ignore uri lname qname)) + (pop (base-stack handler)) + (pop (prefer-stack handler))) + +(defmethod sax:end-document ((handler catalog-parser)) + (values (reverse (entries handler)) (reverse (next handler)))) diff --git a/xml/characters.lisp b/xml/characters.lisp new file mode 100644 index 0000000..b17bb6e --- /dev/null +++ b/xml/characters.lisp @@ -0,0 +1,127 @@ +;;; XXX wird derzeit in DOM:CREATE-ATTRIBUTE verwendet. Muesste aber wohl vom +;;; Parser auch schon geprueft werden. Vorher sollte man allerdings die +;;; Geschwindigkeit der Sache mal untersuchen. + +(in-package :xml) + +(defun valid-name-p (rod) + (and (not (zerop (length rod))) + (let ((initial (elt rod 0))) + (or (rune-in-range-p initial *base-char-ranges*) + (rune-in-range-p initial *ideographic-ranges*) + (eql initial #.(char-code #\_)) + (eql initial #.(char-code #\:)))) + (every #'rune-name-char-p rod))) + +(defun rune-name-char-p (rune) + (or (rune-in-range-p rune *base-char-ranges*) + (rune-in-range-p rune *ideographic-ranges*) + (eql rune #.(char-code #\.)) + (eql rune #.(char-code #\-)) + (eql rune #.(char-code #\_)) + (eql rune #.(char-code #\:)) + (rune-in-range-p rune *combining-char-ranges*) + (rune-in-range-p rune *extender-ranges*))) + +(defun rune-in-range-p (rune range) + ;; XXX FIXME, das geht doch besser + (block nil + (map nil (lambda (range) + (when (< rune (car range)) + (return nil)) + (when (<= rune (cadr range)) + (return t))) + range))) + +(defparameter *base-char-ranges* + #((#x0041 #x005A) (#x0061 #x007A) (#x00C0 #x00D6) (#x00D8 #x00F6) + (#x00F8 #x00FF) (#x0100 #x0131) (#x0134 #x013E) (#x0141 #x0148) + (#x014A #x017E) (#x0180 #x01C3) (#x01CD #x01F0) (#x01F4 #x01F5) + (#x01FA #x0217) (#x0250 #x02A8) (#x02BB #x02C1) (#x0386 #x0386) + (#x0388 #x038A) (#x038C #x038C) (#x038E #x03A1) (#x03A3 #x03CE) + (#x03D0 #x03D6) (#x03DA #x03DA) (#x03DC #x03DC) (#x03DE #x03DE) + (#x03E0 #x03E0) (#x03E2 #x03F3) (#x0401 #x040C) (#x040E #x044F) + (#x0451 #x045C) (#x045E #x0481) (#x0490 #x04C4) (#x04C7 #x04C8) + (#x04CB #x04CC) (#x04D0 #x04EB) (#x04EE #x04F5) (#x04F8 #x04F9) + (#x0531 #x0556) (#x0559 #x0559) (#x0561 #x0586) (#x05D0 #x05EA) + (#x05F0 #x05F2) (#x0621 #x063A) (#x0641 #x064A) (#x0671 #x06B7) + (#x06BA #x06BE) (#x06C0 #x06CE) (#x06D0 #x06D3) (#x06D5 #x06D5) + (#x06E5 #x06E6) (#x0905 #x0939) (#x093D #x093D) (#x0958 #x0961) + (#x0985 #x098C) (#x098F #x0990) (#x0993 #x09A8) (#x09AA #x09B0) + (#x09B2 #x09B2) (#x09B6 #x09B9) (#x09DC #x09DD) (#x09DF #x09E1) + (#x09F0 #x09F1) (#x0A05 #x0A0A) (#x0A0F #x0A10) (#x0A13 #x0A28) + (#x0A2A #x0A30) (#x0A32 #x0A33) (#x0A35 #x0A36) (#x0A38 #x0A39) + (#x0A59 #x0A5C) (#x0A5E #x0A5E) (#x0A72 #x0A74) (#x0A85 #x0A8B) + (#x0A8D #x0A8D) (#x0A8F #x0A91) (#x0A93 #x0AA8) (#x0AAA #x0AB0) + (#x0AB2 #x0AB3) (#x0AB5 #x0AB9) (#x0ABD #x0ABD) (#x0AE0 #x0AE0) + (#x0B05 #x0B0C) (#x0B0F #x0B10) (#x0B13 #x0B28) (#x0B2A #x0B30) + (#x0B32 #x0B33) (#x0B36 #x0B39) (#x0B3D #x0B3D) (#x0B5C #x0B5D) + (#x0B5F #x0B61) (#x0B85 #x0B8A) (#x0B8E #x0B90) (#x0B92 #x0B95) + (#x0B99 #x0B9A) (#x0B9C #x0B9C) (#x0B9E #x0B9F) (#x0BA3 #x0BA4) + (#x0BA8 #x0BAA) (#x0BAE #x0BB5) (#x0BB7 #x0BB9) (#x0C05 #x0C0C) + (#x0C0E #x0C10) (#x0C12 #x0C28) (#x0C2A #x0C33) (#x0C35 #x0C39) + (#x0C60 #x0C61) (#x0C85 #x0C8C) (#x0C8E #x0C90) (#x0C92 #x0CA8) + (#x0CAA #x0CB3) (#x0CB5 #x0CB9) (#x0CDE #x0CDE) (#x0CE0 #x0CE1) + (#x0D05 #x0D0C) (#x0D0E #x0D10) (#x0D12 #x0D28) (#x0D2A #x0D39) + (#x0D60 #x0D61) (#x0E01 #x0E2E) (#x0E30 #x0E30) (#x0E32 #x0E33) + (#x0E40 #x0E45) (#x0E81 #x0E82) (#x0E84 #x0E84) (#x0E87 #x0E88) + (#x0E8A #x0E8A) (#x0E8D #x0E8D) (#x0E94 #x0E97) (#x0E99 #x0E9F) + (#x0EA1 #x0EA3) (#x0EA5 #x0EA5) (#x0EA7 #x0EA7) (#x0EAA #x0EAB) + (#x0EAD #x0EAE) (#x0EB0 #x0EB0) (#x0EB2 #x0EB3) (#x0EBD #x0EBD) + (#x0EC0 #x0EC4) (#x0F40 #x0F47) (#x0F49 #x0F69) (#x10A0 #x10C5) + (#x10D0 #x10F6) (#x1100 #x1100) (#x1102 #x1103) (#x1105 #x1107) + (#x1109 #x1109) (#x110B #x110C) (#x110E #x1112) (#x113C #x113C) + (#x113E #x113E) (#x1140 #x1140) (#x114C #x114C) (#x114E #x114E) + (#x1150 #x1150) (#x1154 #x1155) (#x1159 #x1159) (#x115F #x1161) + (#x1163 #x1163) (#x1165 #x1165) (#x1167 #x1167) (#x1169 #x1169) + (#x116D #x116E) (#x1172 #x1173) (#x1175 #x1175) (#x119E #x119E) + (#x11A8 #x11A8) (#x11AB #x11AB) (#x11AE #x11AF) (#x11B7 #x11B8) + (#x11BA #x11BA) (#x11BC #x11C2) (#x11EB #x11EB) (#x11F0 #x11F0) + (#x11F9 #x11F9) (#x1E00 #x1E9B) (#x1EA0 #x1EF9) (#x1F00 #x1F15) + (#x1F18 #x1F1D) (#x1F20 #x1F45) (#x1F48 #x1F4D) (#x1F50 #x1F57) + (#x1F59 #x1F59) (#x1F5B #x1F5B) (#x1F5D #x1F5D) (#x1F5F #x1F7D) + (#x1F80 #x1FB4) (#x1FB6 #x1FBC) (#x1FBE #x1FBE) (#x1FC2 #x1FC4) + (#x1FC6 #x1FCC) (#x1FD0 #x1FD3) (#x1FD6 #x1FDB) (#x1FE0 #x1FEC) + (#x1FF2 #x1FF4) (#x1FF6 #x1FFC) (#x2126 #x2126) (#x212A #x212B) + (#x212E #x212E) (#x2180 #x2182) (#x3041 #x3094) (#x30A1 #x30FA) + (#x3105 #x312C) (#xAC00 #xD7A3))) + +(defparameter *ideographic-ranges* + #((#x4E00 #x9FA5) (#x3007 #x3007) (#x3021 #x3029))) + +(defparameter *combining-char-ranges* + #((#x0300 #x0345) (#x0360 #x0361) (#x0483 #x0486) (#x0591 #x05A1) + (#x05A3 #x05B9) (#x05BB #x05BD) (#x05BF #x05BF) (#x05C1 #x05C2) + (#x05C4 #x05C4) (#x064B #x0652) (#x0670 #x0670) (#x06D6 #x06DC) + (#x06DD #x06DF) (#x06E0 #x06E4) (#x06E7 #x06E8) (#x06EA #x06ED) + (#x0901 #x0903) (#x093C #x093C) (#x093E #x094C) (#x094D #x094D) + (#x0951 #x0954) (#x0962 #x0963) (#x0981 #x0983) (#x09BC #x09BC) + (#x09BE #x09BE) (#x09BF #x09BF) (#x09C0 #x09C4) (#x09C7 #x09C8) + (#x09CB #x09CD) (#x09D7 #x09D7) (#x09E2 #x09E3) (#x0A02 #x0A02) + (#x0A3C #x0A3C) (#x0A3E #x0A3E) (#x0A3F #x0A3F) (#x0A40 #x0A42) + (#x0A47 #x0A48) (#x0A4B #x0A4D) (#x0A70 #x0A71) (#x0A81 #x0A83) + (#x0ABC #x0ABC) (#x0ABE #x0AC5) (#x0AC7 #x0AC9) (#x0ACB #x0ACD) + (#x0B01 #x0B03) (#x0B3C #x0B3C) (#x0B3E #x0B43) (#x0B47 #x0B48) + (#x0B4B #x0B4D) (#x0B56 #x0B57) (#x0B82 #x0B83) (#x0BBE #x0BC2) + (#x0BC6 #x0BC8) (#x0BCA #x0BCD) (#x0BD7 #x0BD7) (#x0C01 #x0C03) + (#x0C3E #x0C44) (#x0C46 #x0C48) (#x0C4A #x0C4D) (#x0C55 #x0C56) + (#x0C82 #x0C83) (#x0CBE #x0CC4) (#x0CC6 #x0CC8) (#x0CCA #x0CCD) + (#x0CD5 #x0CD6) (#x0D02 #x0D03) (#x0D3E #x0D43) (#x0D46 #x0D48) + (#x0D4A #x0D4D) (#x0D57 #x0D57) (#x0E31 #x0E31) (#x0E34 #x0E3A) + (#x0E47 #x0E4E) (#x0EB1 #x0EB1) (#x0EB4 #x0EB9) (#x0EBB #x0EBC) + (#x0EC8 #x0ECD) (#x0F18 #x0F19) (#x0F35 #x0F35) (#x0F37 #x0F37) + (#x0F39 #x0F39) (#x0F3E #x0F3E) (#x0F3F #x0F3F) (#x0F71 #x0F84) + (#x0F86 #x0F8B) (#x0F90 #x0F95) (#x0F97 #x0F97) (#x0F99 #x0FAD) + (#x0FB1 #x0FB7) (#x0FB9 #x0FB9) (#x20D0 #x20DC) (#x20E1 #x20E1) + (#x302A #x302F) (#x3099 #x3099) (#x309A #x309A))) + +(defparameter *digit-ranges* + #((#x0030 #x0039) (#x0660 #x0669) (#x06F0 #x06F9) (#x0966 #x096F) + (#x09E6 #x09EF) (#x0A66 #x0A6F) (#x0AE6 #x0AEF) (#x0B66 #x0B6F) + (#x0BE7 #x0BEF) (#x0C66 #x0C6F) (#x0CE6 #x0CEF) (#x0D66 #x0D6F) + (#x0E50 #x0E59) (#x0ED0 #x0ED9) (#x0F20 #x0F29))) + +(defparameter *extender-ranges* + #((#x00B7 #x00B7) (#x02D0 #x02D0) (#x02D1 #x02D1) (#x0387 #x0387) + (#x0640 #x0640) (#x0E46 #x0E46) (#x0EC6 #x0EC6) (#x3005 #x3005) + (#x3031 #x3035) (#x309D #x309E) (#x30FC #x30FE))) diff --git a/xml/dom-builder.lisp b/xml/dom-builder.lisp new file mode 100644 index 0000000..9c803f4 --- /dev/null +++ b/xml/dom-builder.lisp @@ -0,0 +1,46 @@ +(in-package :dom-impl) + +(export 'dom-builder) + +(defclass dom-builder () + ((document :initform nil :accessor document) + (element-stack :initform '() :accessor element-stack))) + +(defmethod sax:start-document ((handler dom-builder)) + (let ((document (make-instance 'dom-impl::document)) + (doctype (make-instance 'dom-impl::document-type + :notations (make-hash-table :test #'equalp)))) + (setf (slot-value document 'dom-impl::owner) document + (slot-value document 'dom-impl::doc-type) doctype) + (setf (document handler) document) + (push document (element-stack handler)))) + +(defmethod sax:end-document ((handler dom-builder)) + (setf (slot-value (document handler) 'children ) + (nreverse (slot-value (document handler) 'children))) + (document handler)) + +(defmethod sax:start-element ((handler dom-builder) namespace-uri local-name qname attributes) + (with-slots (document element-stack) handler + (let ((element (dom:create-element document qname)) + (parent (car element-stack))) + (dolist (attr attributes) + (dom:set-attribute element (xml::attribute-qname attr) (xml::attribute-value attr))) + (setf (slot-value element 'dom-impl::parent) parent) + (push element (slot-value parent 'dom-impl::children)) + (push element element-stack)))) + +(defmethod sax:end-element ((handler dom-builder) namespace-uri local-name qname) + (let ((element (pop (element-stack handler)))) + (setf (slot-value element 'dom-impl::children) + (nreverse (slot-value element 'dom-impl::children))))) + +(defmethod sax:characters ((handler dom-builder) data) + (with-slots (document element-stack) handler + (let ((node (dom:create-text-node document data))) + (push node (slot-value (car element-stack) 'dom-impl::children))))) + +(defmethod sax:processing-instruction ((handler dom-builder) target data) + (with-slots (document element-stack) handler + (let ((node (dom:create-processing-instruction document target data))) + (push node (slot-value (car element-stack) 'dom-impl::children))))) diff --git a/xml/dom-impl.lisp b/xml/dom-impl.lisp new file mode 100644 index 0000000..02bac90 --- /dev/null +++ b/xml/dom-impl.lisp @@ -0,0 +1,512 @@ +(defpackage :dom-impl + (:use :glisp)) + +(in-package :dom-impl) + +;; Classes + +(defclass node () + ((parent :initarg :parent :initform nil) + (children :initarg :children :initform nil) + (owner :initarg :owner :initform nil))) + +(defclass document (node) + ((doc-type :initarg :doc-type :reader dom:doctype))) + +(defclass document-fragment (node) + ()) + +(defclass character-data (node) + ((data :initarg :data :reader dom:data))) + +(defclass attribute (node) + ((name :initarg :name :reader dom:name) + (value :initarg :value :reader dom:value) + (specified-p :initarg :specified-p :reader dom:specified))) + +(defclass element (node) + ((tag-name :initarg :tag-name :reader dom:tag-name) + (attributes :initarg :attributes :reader dom:attributes + :initform (make-instance 'named-node-map)))) + +(defclass text (character-data) + ()) + +(defclass comment (character-data) + ()) + +(defclass cdata-section (text) + ()) + +(defclass document-type (node) + ((name :initarg :name :reader dom:name) + (entities :initarg :entities :reader dom:entities) + (notations :initarg :notations :reader dom:notations))) + +(defclass notation (node) + ((name :initarg :name :reader dom:name) + (public-id :initarg :public-id :reader dom:public-id) + (system-id :initarg :system-id :reader dom:system-id))) + +(defclass entity (node) + ((name :initarg :name :reader dom:name) + (public-id :initarg :public-id :reader dom:public-id) + (system-id :initarg :system-id :reader dom:system-id) + (notation-name :initarg :notation-name :reader dom:notation-name))) + +(defclass entity-reference (node) + ((name :initarg :name :reader dom:name))) + +(defclass processing-instruction (node) + ((target :initarg :target :reader dom:target) + (data :initarg :data :reader dom:data))) + +(defclass named-node-map () + ((items :initarg :items :reader dom:items + :initform nil) )) + + +;;; Implementation + +;; document-fragment protocol +;; document protocol + +(defmethod dom:implementation ((document document)) + 'implementation) + +(defmethod dom:document-element ((document document)) + (dolist (k (dom:child-nodes document)) + (cond ((typep k 'element) + (return k))))) + +(defmethod dom:create-element ((document document) tag-name) + (setf tag-name (rod tag-name)) + (make-instance 'element + :tag-name tag-name + :owner document)) + +(defmethod dom:create-document-fragment ((document document)) + (make-instance 'document-fragment + :owner document)) + +(defmethod dom:create-text-node ((document document) data) + (setf data (rod data)) + (make-instance 'text + :data data + :owner document)) + +(defmethod dom:create-comment ((document document) data) + (setf data (rod data)) + (make-instance 'comment + :data data + :owner document)) + +(defmethod dom:create-cdata-section ((document document) data) + (setf data (rod data)) + (make-instance 'cdata-section + :data data + :owner document)) + +(defmethod dom:create-processing-instruction ((document document) target data) + (setf target (rod target)) + (setf data (rod data)) + (make-instance 'processing-instruction + :owner document + :target target + :data data)) + +(defmethod dom:create-attribute ((document document) name) + (setf name (rod name)) + (make-instance 'attribute + :name name + :specified-p nil ;??? + :owner document)) + +(defmethod dom:create-entity-reference ((document document) name) + (setf name (rod name)) + (make-instance 'entity-reference + :name name + :owner document)) + +(defmethod dom:get-elements-by-tag-name ((document document) tag-name) + (setf tag-name (rod tag-name)) + (let ((result nil)) + (setf tag-name (rod tag-name)) + (let ((wild-p (rod= tag-name '#.(string-rod "*")))) + (labels ((walk (n) + (when (and (dom:element-p n) + (or wild-p (tag-name-eq tag-name (dom:node-name n)))) + (push n result)) + (mapc #'walk (dom:child-nodes n)))) + (walk document) + (reverse result))))) + +;;; Node + +(defmethod dom:parent-node ((node node)) + (slot-value node 'parent)) + +(defmethod dom:child-nodes ((node node)) + (slot-value node 'children)) + +(defmethod dom:first-child ((node node)) + (car (slot-value node 'children))) + +(defmethod dom:last-child ((node node)) + (car (last (slot-value node 'children)))) + +(defmethod dom:previous-sibling ((node node)) + (with-slots (parent) node + (when parent + (with-slots (children) parent + (do ((q children (cdr q))) + ((null (cdr q)) niL) + (cond ((eq (cadr q) node) + (return (car q))))))))) + +(defmethod dom:next-sibling ((node node)) + (with-slots (parent) node + (when parent + (with-slots (children) parent + (do ((q children (cdr q))) + ((null (cdr q)) niL) + (cond ((eq (car q) node) + (return (cadr q))))))))) + +(defmethod dom:owner-document ((node node)) + (slot-value node 'owner)) + +(defun ensure-valid-insertion-request (node new-child) + (unless (can-adopt-p node new-child) + ;; HIERARCHY_REQUEST_ERR + (error "~S cannot adopt ~S." node new-child)) + (unless (eq (dom:owner-document node) + (dom:owner-document new-child)) + ;; WRONG_DOCUMENT_ERR + (error "~S cannot adopt ~S, since it was created by a different document." + node new-child)) + (with-slots (children) node + (unless (null (slot-value new-child 'parent)) + (cond ((eq (slot-value new-child 'parent) + node) + ;; remove it first + (setf children (delete new-child children))) + (t + ;; otherwise it is an error. + ;; GB_INTEGRITY_ERR + (error "~S is already adopted." new-child)))) )) + +(defmethod dom:insert-before ((node node) (new-child node) (ref-child t)) + (ensure-valid-insertion-request node new-child) + (with-slots (children) node + (cond ((eq (car children) ref-child) + (setf (slot-value new-child 'parent) node) + (setf children (cons new-child children))) + (t + (do ((q children (cdr q))) + ((null (cdr q)) + (cond ((null ref-child) + (setf (slot-value new-child 'parent) node) + (setf (cdr q) (cons new-child nil))) + (t + ;; NOT_FOUND_ERR + (error "~S is no child of ~S." ref-child node)))) + (cond ((eq (cadr q) ref-child) + (setf (slot-value new-child 'parent) node) + (setf (cdr q) (cons new-child (cdr q))) + (return)))))) + new-child)) + +(defmethod dom:insert-before ((node node) (fragment document-fragment) ref-child) + (dolist (child (dom:child-nodes fragment)) + (dom:insert-before node child ref-child)) + fragment) + +(defmethod dom:replace-child ((node node) (new-child node) (old-child node)) + (ensure-valid-insertion-request node new-child) + (with-slots (children) node + (do ((q children (cdr q))) + ((null q) + ;; NOT_FOUND_ERR + (error "~S is no child of ~S." old-child node)) + (cond ((eq (car q) old-child) + (setf (car q) new-child) + (setf (slot-value new-child 'parent) node) + (setf (slot-value old-child 'parent) nil) + (return)))) + old-child)) + +(defmethod dom:append-child ((node node) (new-child node)) + (ensure-valid-insertion-request node new-child) + (with-slots (children) node + (setf children (nconc children (list new-child))) + (setf (slot-value new-child 'parent) node) + new-child)) + +(defmethod dom:has-child-nodes ((node node)) + (not (null (slot-value node 'children)))) + +(defmethod dom:append-child ((node node) (new-child document-fragment)) + (dolist (child (dom:child-nodes new-child)) + (dom:append-child node child)) + new-child) + +;; was auf node noch implemetiert werden muss: +;; - node-type +;; - can-adopt-p +;; - ggf attributes +;; - node-name +;; - node-value + +;; node-name + +(defmethod dom:node-name ((self document)) + '#.(string-rod "#document")) + +(defmethod dom:node-name ((self document-fragment)) + '#.(string-rod "#document-fragment")) + +(defmethod dom:node-name ((self text)) + '#.(string-rod "#text")) + +(defmethod dom:node-name ((self cdata-section)) + '#.(string-rod "#cdata-section")) + +(defmethod dom:node-name ((self comment)) + '#.(string-rod "#comment")) + +(defmethod dom:node-name ((self attribute)) + (dom:name self)) + +(defmethod dom:node-name ((self element)) + (dom:tag-name self)) + +(defmethod dom:node-name ((self document-type)) + (dom:name self)) + +(defmethod dom:node-name ((self notation)) + (dom:name self)) + +(defmethod dom:node-name ((self entity)) + (dom:name self)) + +(defmethod dom:node-name ((self entity-reference)) + (dom:name self)) + +(defmethod dom:node-name ((self processing-instruction)) + (dom:target self)) + +;; node-type + +(defmethod dom:node-type ((self document)) :document) +(defmethod dom:node-type ((self document-fragment)) :document-fragment) +(defmethod dom:node-type ((self text)) :text) +(defmethod dom:node-type ((self comment)) :comment) +(defmethod dom:node-type ((self cdata-section)) :cdata-section) +(defmethod dom:node-type ((self attribute)) :attribute) +(defmethod dom:node-type ((self element)) :element) +(defmethod dom:node-type ((self document-type)) :document-type) +(defmethod dom:node-type ((self notation)) :notation) +(defmethod dom:node-type ((self entity)) :entity) +(defmethod dom:node-type ((self entity-reference)) :entity-reference) +(defmethod dom:node-type ((self processing-instruction)) :processing-instruction) + +;; node-value + +(defmethod dom:node-value ((self document)) nil) +(defmethod dom:node-value ((self document-fragment)) nil) +(defmethod dom:node-value ((self character-data)) (dom:data self)) +(defmethod dom:node-value ((self attribute)) (dom:name self)) +(defmethod dom:node-value ((self element)) nil) +(defmethod dom:node-value ((self document-type)) nil) +(defmethod dom:node-value ((self notation)) nil) +(defmethod dom:node-value ((self entity)) nil) +(defmethod dom:node-value ((self entity-reference)) nil) +(defmethod dom:node-value ((self processing-instruction)) (dom:data self)) + +;; attributes + +;; (gibt es nur auf element) + +(defmethod dom:attributes ((self node)) + nil) + +;; dann fehlt noch can-adopt und attribute conventions fuer adoption + +;;; NAMED-NODE-MAP + +(defmethod dom:get-named-item ((self named-node-map) name) + (setf name (rod name)) + (with-slots (items) self + (dolist (k items nil) + (cond ((rod= name (dom:node-name k)) + (return k)))))) + +(defmethod dom:set-named-item ((self named-node-map) arg) + (let ((name (dom:node-name arg))) + (with-slots (items) self + (dolist (k items (progn (setf items (cons arg items))nil)) + (cond ((rod= name (dom:node-name k)) + (setf items (cons arg (delete k items))) + (return k))))))) + +(defmethod dom:remove-named-item ((self named-node-map) name) + (setf name (rod name)) + (with-slots (items) self + (dolist (k items nil) + (cond ((rod= name (dom:node-name k)) + (setf items (delete k items)) + (return k)))))) + +(defmethod dom:length ((self named-node-map)) + (with-slots (items) self + (length items))) + +(defmethod dom:item ((self named-node-map) index) + (with-slots (items) self + (elt items index))) + +;;; CHARACTER-DATA + +(defmethod dom:length ((node character-data)) + (length (slot-value node 'value))) + +(defmethod dom:substring-data ((node character-data) offset count) + (subseq (slot-value node 'value) offset (+ offset count))) + +(defmethod dom:append-data ((node character-data) arg) + (setq arg (rod arg)) + (with-slots (value) node + (setf value (concatenate (type-of value) value arg))) + (values)) + +(defmethod dom:delete-data ((node character-data) offset count) + (with-slots (value) node + (let ((new (make-array (- (length value) count) :element-type (type-of value)))) + (replace new value + :start1 0 :end1 offset + :start2 0 :end2 offset) + (replace new value + :start1 offset :end1 (length new) + :start2 (+ offset count) :end2 (length value)) + (setf value new))) + (values)) + +(defmethod dom:replace-data ((node character-data) offset count arg) + (setf arg (rod arg)) + (with-slots (value) node + (replace value arg + :start1 offset :end1 (+ offset count) + :start2 0 :end2 count)) + (values)) + +;;; ATTR + +;; hmm... value muss noch entities lesen und text-nodes in die hierarchie hängen. + +(defmethod (setf dom:value) (new-value (node attribute)) + (setf (slot-value node 'value) (rod new-value))) + +;;; ELEMENT + +(defmethod dom:get-attribute-node ((element element) name) + (dom:get-named-item (dom:attributes element) name)) + +(defmethod dom:set-attribute-node ((element element) (new-attr attribute)) + (dom:set-named-item (dom:attributes element) new-attr)) + +(defmethod dom:get-attribute ((element element) name) + (let ((a (dom:get-attribute-node element name))) + (if a + (dom:value a) + nil))) + +(defmethod dom:set-attribute ((element element) name value) + (with-slots (owner) element + (dom:set-attribute-node + element (make-instance 'attribute + :owner owner + :name name + :value value + :specified-p t)) + (values))) + +(defmethod dom:remove-attribute-node ((element element) (old-attr attribute)) + (let ((res (dom:remove-named-item element (dom:name old-attr)))) + (if res + res + ;; NOT_FOUND_ERR + (error "Attribute not found.")))) + +(defmethod dom:get-elements-by-tag-name ((element element) name) + name + (error "Not implemented.")) + +(defmethod dom:normalize ((element element)) + (error "Not implemented.") ) + +;;; TEXT + +(defmethod dom:split-text ((text text) offset) + offset + (error "Not implemented.")) + +;;; COMMENT -- nix +;;; CDATA-SECTION -- nix + +;;; DOCUMENT-TYPE -- missing +;;; NOTATION -- nix +;;; ENTITY -- nix +;;; ENTITY-REFERENCE -- nix +;;; PROCESSING-INSTRUCTION -- nix + +;; Notbehelf! +(defun can-adopt-p (x y) x y t) + + +;;; predicates + +(defmethod dom:node-p ((object node)) t) +(defmethod dom:node-p ((object t)) nil) + +(defmethod dom:document-p ((object document)) t) +(defmethod dom:document-p ((object t)) nil) + +(defmethod dom:document-fragment-p ((object document-fragment)) t) +(defmethod dom:document-fragment-p ((object t)) nil) + +(defmethod dom:character-data-p ((object character-data)) t) +(defmethod dom:character-data-p ((object t)) nil) + +(defmethod dom:attribute-p ((object attribute)) t) +(defmethod dom:attribute-p ((object t)) nil) + +(defmethod dom:element-p ((object element)) t) +(defmethod dom:element-p ((object t)) nil) + +(defmethod dom:text-node-p ((object text)) t) +(defmethod dom:text-node-p ((object t)) nil) + +(defmethod dom:comment-p ((object comment)) t) +(defmethod dom:comment-p ((object t)) nil) + +(defmethod dom:cdata-section-p ((object cdata-section)) t) +(defmethod dom:cdata-section-p ((object t)) nil) + +(defmethod dom:document-type-p ((object document-type)) t) +(defmethod dom:document-type-p ((object t)) nil) + +(defmethod dom:notation-p ((object notation)) t) +(defmethod dom:notation-p ((object t)) nil) + +(defmethod dom:entity-p ((object entity)) t) +(defmethod dom:entity-p ((object t)) nil) + +(defmethod dom:entity-reference-p ((object entity-reference)) t) +(defmethod dom:entity-reference-p ((object t)) nil) + +(defmethod dom:processing-instruction-p ((object processing-instruction)) t) +(defmethod dom:processing-instruction-p ((object t)) nil) + +(defmethod dom:named-node-map-p ((object named-node-map)) t) +(defmethod dom:named-node-map-p ((object t)) nil) diff --git a/xml/dompack.lisp b/xml/dompack.lisp new file mode 100644 index 0000000..247d638 --- /dev/null +++ b/xml/dompack.lisp @@ -0,0 +1,102 @@ +(defpackage :dom + (:use) + (:export + + ;; methods + #:has-feature + #:doctype + #:implementation + #:document-element + #:create-element + #:create-document-fragment + #:create-text-node + #:create-comment + #:create-cdata-section + #:create-processing-instruction + #:create-attribute + #:create-entity-reference + #:get-elements-by-tag-name + #:node-name + #:node-value + #:node-type + #:parent-node + #:child-nodes + #:first-child + #:last-child + #:previous-sibling + #:next-sibling + #:attributes + #:owner-document + #:insert-before + #:replace-child + #:remove-child + #:append-child + #:has-child-nodes + #:clone-node + #:item + #:length + #:get-named-item + #:set-named-item + #:remove-named-item + #:data + #:substring-data + #:append-data + #:insert-data + #:delete-data + #:replace-data + #:name + #:specified + #:value + #:tag-name + #:get-attribute + #:set-attribute + #:remove-atttribute + #:get-attribute-node + #:set-attribute-node + #:remove-attribute-node + #:normalize + #:split-text + #:entities + #:notations + #:public-id + #:system-id + #:notation-name + #:target + + ;; protocol classes + #:dom-implementation + #:document-fragment + #:document + #:node + #:node-list + #:named-node-map + #:character-data + #:attr + #:element + #:text + #:comment + #:cdata-section + #:document-type + #:notation + #:entity + #:entity-reference + #:processing-instruction + ;; + #:items + ;; + #:node-p + #:document-p + #:document-fragment-p + #:character-data-p + #:attribute-p + #:element-p + #:text-node-p + #:comment-p + #:cdata-section-p + #:document-type-p + #:notation-p + #:entity-p + #:entity-reference-p + #:processing-instruction-p + #:named-node-map-p + )) \ No newline at end of file diff --git a/xml/encodings-data.lisp b/xml/encodings-data.lisp new file mode 100644 index 0000000..e29a683 --- /dev/null +++ b/xml/encodings-data.lisp @@ -0,0 +1,568 @@ +(in-package :encoding) + +(progn + (add-name :us-ascii "ANSI_X3.4-1968") + (add-name :us-ascii "iso-ir-6") + (add-name :us-ascii "ANSI_X3.4-1986") + (add-name :us-ascii "ISO_646.irv:1991") + (add-name :us-ascii "ASCII") + (add-name :us-ascii "ISO646-US") + (add-name :us-ascii "US-ASCII") + (add-name :us-ascii "us") + (add-name :us-ascii "IBM367") + (add-name :us-ascii "cp367") + (add-name :us-ascii "csASCII") + + (add-name :iso-8859-1 "ISO_8859-1:1987") + (add-name :iso-8859-1 "iso-ir-100") + (add-name :iso-8859-1 "ISO_8859-1") + (add-name :iso-8859-1 "ISO-8859-1") + (add-name :iso-8859-1 "latin1") + (add-name :iso-8859-1 "l1") + (add-name :iso-8859-1 "IBM819") + (add-name :iso-8859-1 "CP819") + (add-name :iso-8859-1 "csISOLatin1") + + (add-name :iso-8859-2 "ISO_8859-2:1987") + (add-name :iso-8859-2 "iso-ir-101") + (add-name :iso-8859-2 "ISO_8859-2") + (add-name :iso-8859-2 "ISO-8859-2") + (add-name :iso-8859-2 "latin2") + (add-name :iso-8859-2 "l2") + (add-name :iso-8859-2 "csISOLatin2") + + (add-name :iso-8859-3 "ISO_8859-3:1988") + (add-name :iso-8859-3 "iso-ir-109") + (add-name :iso-8859-3 "ISO_8859-3") + (add-name :iso-8859-3 "ISO-8859-3") + (add-name :iso-8859-3 "latin3") + (add-name :iso-8859-3 "l3") + (add-name :iso-8859-3 "csISOLatin3") + + (add-name :iso-8859-4 "ISO_8859-4:1988") + (add-name :iso-8859-4 "iso-ir-110") + (add-name :iso-8859-4 "ISO_8859-4") + (add-name :iso-8859-4 "ISO-8859-4") + (add-name :iso-8859-4 "latin4") + (add-name :iso-8859-4 "l4") + (add-name :iso-8859-4 "csISOLatin4") + + (add-name :iso-8859-6 "ISO_8859-6:1987") + (add-name :iso-8859-6 "iso-ir-127") + (add-name :iso-8859-6 "ISO_8859-6") + (add-name :iso-8859-6 "ISO-8859-6") + (add-name :iso-8859-6 "ECMA-114") + (add-name :iso-8859-6 "ASMO-708") + (add-name :iso-8859-6 "arabic") + (add-name :iso-8859-6 "csISOLatinArabic") + + (add-name :iso-8859-7 "ISO_8859-7:1987") + (add-name :iso-8859-7 "iso-ir-126") + (add-name :iso-8859-7 "ISO_8859-7") + (add-name :iso-8859-7 "ISO-8859-7") + (add-name :iso-8859-7 "ELOT_928") + (add-name :iso-8859-7 "ECMA-118") + (add-name :iso-8859-7 "greek") + (add-name :iso-8859-7 "greek8") + (add-name :iso-8859-7 "csISOLatinGreek") + + (add-name :iso-8859-8 "ISO_8859-8:1988") + (add-name :iso-8859-8 "iso-ir-138") + (add-name :iso-8859-8 "ISO_8859-8") + (add-name :iso-8859-8 "ISO-8859-8") + (add-name :iso-8859-8 "hebrew") + (add-name :iso-8859-8 "csISOLatinHebrew") + + (add-name :iso-8859-5 "ISO_8859-5:1988") + (add-name :iso-8859-5 "iso-ir-144") + (add-name :iso-8859-5 "ISO_8859-5") + (add-name :iso-8859-5 "ISO-8859-5") + (add-name :iso-8859-5 "cyrillic") + (add-name :iso-8859-5 "csISOLatinCyrillic") + + (add-name :iso-8859-9 "ISO_8859-9:1989") + (add-name :iso-8859-9 "iso-ir-148") + (add-name :iso-8859-9 "ISO_8859-9") + (add-name :iso-8859-9 "ISO-8859-9") + (add-name :iso-8859-9 "latin5") + (add-name :iso-8859-9 "l5") + (add-name :iso-8859-9 "csISOLatin5") + + (add-name :iso-8859-15 "ISO_8859-15") + (add-name :iso-8859-15 "ISO-8859-15") + + (add-name :iso-8859-14 "ISO_8859-14") + (add-name :iso-8859-14 "ISO-8859-14") + + (add-name :koi8-r "KOI8-R") + (add-name :koi8-r "csKOI8R") + + (add-name :utf-8 "UTF-8") + + (add-name :utf-16 "UTF-16") + + (add-name :ucs-4 "ISO-10646-UCS-4") + (add-name :ucs-4 "UCS-4") + + (add-name :ucs-2 "ISO-10646-UCS-2") + (add-name :ucs-2 "UCS-2") ) + + +(progn + (define-encoding :iso-8859-1 + (make-simple-8-bit-encoding + :charset (find-charset :iso-8859-1))) + + (define-encoding :iso-8859-2 + (make-simple-8-bit-encoding + :charset (find-charset :iso-8859-2))) + + (define-encoding :iso-8859-3 + (make-simple-8-bit-encoding + :charset (find-charset :iso-8859-3))) + + (define-encoding :iso-8859-4 + (make-simple-8-bit-encoding + :charset (find-charset :iso-8859-4))) + + (define-encoding :iso-8859-5 + (make-simple-8-bit-encoding + :charset (find-charset :iso-8859-5))) + + (define-encoding :iso-8859-6 + (make-simple-8-bit-encoding + :charset (find-charset :iso-8859-6))) + + (define-encoding :iso-8859-7 + (make-simple-8-bit-encoding + :charset (find-charset :iso-8859-7))) + + (define-encoding :iso-8859-8 + (make-simple-8-bit-encoding + :charset (find-charset :iso-8859-8))) + + (define-encoding :iso-8859-14 + (make-simple-8-bit-encoding + :charset (find-charset :iso-8859-14))) + + (define-encoding :iso-8859-15 + (make-simple-8-bit-encoding + :charset (find-charset :iso-8859-15))) + + (define-encoding :koi8-r + (make-simple-8-bit-encoding + :charset (find-charset :koi8-r))) + + (define-encoding :utf-8 :utf-8) + ) + +(progn + (define-8-bit-charset :iso-8859-1 + #| #o00x |# #x0000 #x0001 #x0002 #x0003 #x0004 #x0005 #x0006 #x0007 + #| #o01x |# #x0008 #x0009 #x000A #x000B #x000C #x000A #x000E #x000F + #| #o02x |# #x0010 #x0011 #x0012 #x0013 #x0014 #x0015 #x0016 #x0017 + #| #o03x |# #x0018 #x0019 #x001A #x001B #x001C #x001D #x001E #x001F + #| #o04x |# #x0020 #x0021 #x0022 #x0023 #x0024 #x0025 #x0026 #x0027 + #| #o05x |# #x0028 #x0029 #x002A #x002B #x002C #x002D #x002E #x002F + #| #o06x |# #x0030 #x0031 #x0032 #x0033 #x0034 #x0035 #x0036 #x0037 + #| #o07x |# #x0038 #x0039 #x003A #x003B #x003C #x003D #x003E #x003F + #| #o10x |# #x0040 #x0041 #x0042 #x0043 #x0044 #x0045 #x0046 #x0047 + #| #o11x |# #x0048 #x0049 #x004A #x004B #x004C #x004D #x004E #x004F + #| #o12x |# #x0050 #x0051 #x0052 #x0053 #x0054 #x0055 #x0056 #x0057 + #| #o13x |# #x0058 #x0059 #x005A #x005B #x005C #x005D #x005E #x005F + #| #o14x |# #x0060 #x0061 #x0062 #x0063 #x0064 #x0065 #x0066 #x0067 + #| #o15x |# #x0068 #x0069 #x006A #x006B #x006C #x006D #x006E #x006F + #| #o16x |# #x0070 #x0071 #x0072 #x0073 #x0074 #x0075 #x0076 #x0077 + #| #o17x |# #x0078 #x0079 #x007A #x007B #x007C #x007D #x007E #x007F + #| #o20x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF + #| #o21x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF + #| #o22x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF + #| #o23x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF + #| #o24x |# #x00A0 #x00A1 #x00A2 #x00A3 #x00A4 #x00A5 #x00A6 #x00A7 + #| #o25x |# #x00A8 #x00A9 #x00AA #x00AB #x00AC #x00AD #x00AE #x00AF + #| #o26x |# #x00B0 #x00B1 #x00B2 #x00B3 #x00B4 #x00B5 #x00B6 #x00B7 + #| #o27x |# #x00B8 #x00B9 #x00BA #x00BB #x00BC #x00BD #x00BE #x00BF + #| #o30x |# #x00C0 #x00C1 #x00C2 #x00C3 #x00C4 #x00C5 #x00C6 #x00C7 + #| #o31x |# #x00C8 #x00C9 #x00CA #x00CB #x00CC #x00CD #x00CE #x00CF + #| #o32x |# #x00D0 #x00D1 #x00D2 #x00D3 #x00D4 #x00D5 #x00D6 #x00D7 + #| #o33x |# #x00D8 #x00D9 #x00DA #x00DB #x00DC #x00DD #x00DE #x00DF + #| #o34x |# #x00E0 #x00E1 #x00E2 #x00E3 #x00E4 #x00E5 #x00E6 #x00E7 + #| #o35x |# #x00E8 #x00E9 #x00EA #x00EB #x00EC #x00ED #x00EE #x00EF + #| #o36x |# #x00F0 #x00F1 #x00F2 #x00F3 #x00F4 #x00F5 #x00F6 #x00F7 + #| #o37x |# #x00F8 #x00F9 #x00FA #x00FB #x00FC #x00FD #x00FE #x00FF) + + (define-8-bit-charset :iso-8859-2 + #| #o00x |# #x0000 #x0001 #x0002 #x0003 #x0004 #x0005 #x0006 #x0007 + #| #o01x |# #x0008 #x0009 #x000A #x000B #x000C #x000A #x000E #x000F + #| #o02x |# #x0010 #x0011 #x0012 #x0013 #x0014 #x0015 #x0016 #x0017 + #| #o03x |# #x0018 #x0019 #x001A #x001B #x001C #x001D #x001E #x001F + #| #o04x |# #x0020 #x0021 #x0022 #x0023 #x0024 #x0025 #x0026 #x0027 + #| #o05x |# #x0028 #x0029 #x002A #x002B #x002C #x002D #x002E #x002F + #| #o06x |# #x0030 #x0031 #x0032 #x0033 #x0034 #x0035 #x0036 #x0037 + #| #o07x |# #x0038 #x0039 #x003A #x003B #x003C #x003D #x003E #x003F + #| #o10x |# #x0040 #x0041 #x0042 #x0043 #x0044 #x0045 #x0046 #x0047 + #| #o11x |# #x0048 #x0049 #x004A #x004B #x004C #x004D #x004E #x004F + #| #o12x |# #x0050 #x0051 #x0052 #x0053 #x0054 #x0055 #x0056 #x0057 + #| #o13x |# #x0058 #x0059 #x005A #x005B #x005C #x005D #x005E #x005F + #| #o14x |# #x0060 #x0061 #x0062 #x0063 #x0064 #x0065 #x0066 #x0067 + #| #o15x |# #x0068 #x0069 #x006A #x006B #x006C #x006D #x006E #x006F + #| #o16x |# #x0070 #x0071 #x0072 #x0073 #x0074 #x0075 #x0076 #x0077 + #| #o17x |# #x0078 #x0079 #x007A #x007B #x007C #x007D #x007E #x007F + #| #o20x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF + #| #o21x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF + #| #o22x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF + #| #o23x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF + #| #o24x |# #x00A0 #x0104 #x02D8 #x0141 #x00A4 #x013D #x015A #x00A7 + #| #o25x |# #x00A8 #x0160 #x015E #x0164 #x0179 #x00AD #x017D #x017B + #| #o26x |# #x00B0 #x0105 #x02DB #x0142 #x00B4 #x013E #x015B #x02C7 + #| #o27x |# #x00B8 #x0161 #x015F #x0165 #x017A #x02DD #x017E #x017C + #| #o30x |# #x0154 #x00C1 #x00C2 #x0102 #x00C4 #x0139 #x0106 #x00C7 + #| #o31x |# #x010C #x00C9 #x0118 #x00CB #x011A #x00CD #x00CE #x010E + #| #o32x |# #x0110 #x0143 #x0147 #x00D3 #x00D4 #x0150 #x00D6 #x00D7 + #| #o33x |# #x0158 #x016E #x00DA #x0170 #x00DC #x00DD #x0162 #x00DF + #| #o34x |# #x0155 #x00E1 #x00E2 #x0103 #x00E4 #x013A #x0107 #x00E7 + #| #o35x |# #x010D #x00E9 #x0119 #x00EB #x011B #x00ED #x00EE #x010F + #| #o36x |# #x0111 #x0144 #x0148 #x00F3 #x00F4 #x0151 #x00F6 #x00F7 + #| #o37x |# #x0159 #x016F #x00FA #x0171 #x00FC #x00FD #x0163 #x02D9) + + (define-8-bit-charset :iso-8859-3 + #| #o00x |# #x0000 #x0001 #x0002 #x0003 #x0004 #x0005 #x0006 #x0007 + #| #o01x |# #x0008 #x0009 #x000A #x000B #x000C #x000A #x000E #x000F + #| #o02x |# #x0010 #x0011 #x0012 #x0013 #x0014 #x0015 #x0016 #x0017 + #| #o03x |# #x0018 #x0019 #x001A #x001B #x001C #x001D #x001E #x001F + #| #o04x |# #x0020 #x0021 #x0022 #x0023 #x0024 #x0025 #x0026 #x0027 + #| #o05x |# #x0028 #x0029 #x002A #x002B #x002C #x002D #x002E #x002F + #| #o06x |# #x0030 #x0031 #x0032 #x0033 #x0034 #x0035 #x0036 #x0037 + #| #o07x |# #x0038 #x0039 #x003A #x003B #x003C #x003D #x003E #x003F + #| #o10x |# #x0040 #x0041 #x0042 #x0043 #x0044 #x0045 #x0046 #x0047 + #| #o11x |# #x0048 #x0049 #x004A #x004B #x004C #x004D #x004E #x004F + #| #o12x |# #x0050 #x0051 #x0052 #x0053 #x0054 #x0055 #x0056 #x0057 + #| #o13x |# #x0058 #x0059 #x005A #x005B #x005C #x005D #x005E #x005F + #| #o14x |# #x0060 #x0061 #x0062 #x0063 #x0064 #x0065 #x0066 #x0067 + #| #o15x |# #x0068 #x0069 #x006A #x006B #x006C #x006D #x006E #x006F + #| #o16x |# #x0070 #x0071 #x0072 #x0073 #x0074 #x0075 #x0076 #x0077 + #| #o17x |# #x0078 #x0079 #x007A #x007B #x007C #x007D #x007E #x007F + #| #o20x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF + #| #o21x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF + #| #o22x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF + #| #o23x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF + #| #o24x |# #x00A0 #x0126 #x02D8 #x00A3 #x00A4 #xFFFF #x0124 #x00A7 + #| #o25x |# #x00A8 #x0130 #x015E #x011E #x0134 #x00AD #xFFFF #x017B + #| #o26x |# #x00B0 #x0127 #x00B2 #x00B3 #x00B4 #x00B5 #x0125 #x00B7 + #| #o27x |# #x00B8 #x0131 #x015F #x011F #x0135 #x00BD #xFFFF #x017C + #| #o30x |# #x00C0 #x00C1 #x00C2 #xFFFF #x00C4 #x010A #x0108 #x00C7 + #| #o31x |# #x00C8 #x00C9 #x00CA #x00CB #x00CC #x00CD #x00CE #x00CF + #| #o32x |# #xFFFF #x00D1 #x00D2 #x00D3 #x00D4 #x0120 #x00D6 #x00D7 + #| #o33x |# #x011C #x00D9 #x00DA #x00DB #x00DC #x016C #x015C #x00DF + #| #o34x |# #x00E0 #x00E1 #x00E2 #xFFFF #x00E4 #x010B #x0109 #x00E7 + #| #o35x |# #x00E8 #x00E9 #x00EA #x00EB #x00EC #x00ED #x00EE #x00EF + #| #o36x |# #xFFFF #x00F1 #x00F2 #x00F3 #x00F4 #x0121 #x00F6 #x00F7 + #| #o37x |# #x011D #x00F9 #x00FA #x00FB #x00FC #x016D #x015D #x02D9) + + (define-8-bit-charset :iso-8859-4 + #| #o00x |# #x0000 #x0001 #x0002 #x0003 #x0004 #x0005 #x0006 #x0007 + #| #o01x |# #x0008 #x0009 #x000A #x000B #x000C #x000A #x000E #x000F + #| #o02x |# #x0010 #x0011 #x0012 #x0013 #x0014 #x0015 #x0016 #x0017 + #| #o03x |# #x0018 #x0019 #x001A #x001B #x001C #x001D #x001E #x001F + #| #o04x |# #x0020 #x0021 #x0022 #x0023 #x0024 #x0025 #x0026 #x0027 + #| #o05x |# #x0028 #x0029 #x002A #x002B #x002C #x002D #x002E #x002F + #| #o06x |# #x0030 #x0031 #x0032 #x0033 #x0034 #x0035 #x0036 #x0037 + #| #o07x |# #x0038 #x0039 #x003A #x003B #x003C #x003D #x003E #x003F + #| #o10x |# #x0040 #x0041 #x0042 #x0043 #x0044 #x0045 #x0046 #x0047 + #| #o11x |# #x0048 #x0049 #x004A #x004B #x004C #x004D #x004E #x004F + #| #o12x |# #x0050 #x0051 #x0052 #x0053 #x0054 #x0055 #x0056 #x0057 + #| #o13x |# #x0058 #x0059 #x005A #x005B #x005C #x005D #x005E #x005F + #| #o14x |# #x0060 #x0061 #x0062 #x0063 #x0064 #x0065 #x0066 #x0067 + #| #o15x |# #x0068 #x0069 #x006A #x006B #x006C #x006D #x006E #x006F + #| #o16x |# #x0070 #x0071 #x0072 #x0073 #x0074 #x0075 #x0076 #x0077 + #| #o17x |# #x0078 #x0079 #x007A #x007B #x007C #x007D #x007E #x007F + #| #o20x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF + #| #o21x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF + #| #o22x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF + #| #o23x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF + #| #o24x |# #x00A0 #x0104 #x0138 #x0156 #x00A4 #x0128 #x013B #x00A7 + #| #o25x |# #x00A8 #x0160 #x0112 #x0122 #x0166 #x00AD #x017D #x00AF + #| #o26x |# #x00B0 #x0105 #x02DB #x0157 #x00B4 #x0129 #x013C #x02C7 + #| #o27x |# #x00B8 #x0161 #x0113 #x0123 #x0167 #x014A #x017E #x014B + #| #o30x |# #x0100 #x00C1 #x00C2 #x00C3 #x00C4 #x00C5 #x00C6 #x012E + #| #o31x |# #x010C #x00C9 #x0118 #x00CB #x0116 #x00CD #x00CE #x012A + #| #o32x |# #x0110 #x0145 #x014C #x0136 #x00D4 #x00D5 #x00D6 #x00D7 + #| #o33x |# #x00D8 #x0172 #x00DA #x00DB #x00DC #x0168 #x016A #x00DF + #| #o34x |# #x0101 #x00E1 #x00E2 #x00E3 #x00E4 #x00E5 #x00E6 #x012F + #| #o35x |# #x010D #x00E9 #x0119 #x00EB #x0117 #x00ED #x00EE #x012B + #| #o36x |# #x0111 #x0146 #x014D #x0137 #x00F4 #x00F5 #x00F6 #x00F7 + #| #o37x |# #x00F8 #x0173 #x00FA #x00FB #x00FC #x0169 #x016B #x02D9) + + (define-8-bit-charset :iso-8859-5 + #| #o00x |# #x0000 #x0001 #x0002 #x0003 #x0004 #x0005 #x0006 #x0007 + #| #o01x |# #x0008 #x0009 #x000A #x000B #x000C #x000A #x000E #x000F + #| #o02x |# #x0010 #x0011 #x0012 #x0013 #x0014 #x0015 #x0016 #x0017 + #| #o03x |# #x0018 #x0019 #x001A #x001B #x001C #x001D #x001E #x001F + #| #o04x |# #x0020 #x0021 #x0022 #x0023 #x0024 #x0025 #x0026 #x0027 + #| #o05x |# #x0028 #x0029 #x002A #x002B #x002C #x002D #x002E #x002F + #| #o06x |# #x0030 #x0031 #x0032 #x0033 #x0034 #x0035 #x0036 #x0037 + #| #o07x |# #x0038 #x0039 #x003A #x003B #x003C #x003D #x003E #x003F + #| #o10x |# #x0040 #x0041 #x0042 #x0043 #x0044 #x0045 #x0046 #x0047 + #| #o11x |# #x0048 #x0049 #x004A #x004B #x004C #x004D #x004E #x004F + #| #o12x |# #x0050 #x0051 #x0052 #x0053 #x0054 #x0055 #x0056 #x0057 + #| #o13x |# #x0058 #x0059 #x005A #x005B #x005C #x005D #x005E #x005F + #| #o14x |# #x0060 #x0061 #x0062 #x0063 #x0064 #x0065 #x0066 #x0067 + #| #o15x |# #x0068 #x0069 #x006A #x006B #x006C #x006D #x006E #x006F + #| #o16x |# #x0070 #x0071 #x0072 #x0073 #x0074 #x0075 #x0076 #x0077 + #| #o17x |# #x0078 #x0079 #x007A #x007B #x007C #x007D #x007E #x007F + #| #o20x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF + #| #o21x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF + #| #o22x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF + #| #o23x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF + #| #o24x |# #x00A0 #x0401 #x0402 #x0403 #x0404 #x0405 #x0406 #x0407 + #| #o25x |# #x0408 #x0409 #x040A #x040B #x040C #x00AD #x040E #x040F + #| #o26x |# #x0410 #x0411 #x0412 #x0413 #x0414 #x0415 #x0416 #x0417 + #| #o27x |# #x0418 #x0419 #x041A #x041B #x041C #x041D #x041E #x041F + #| #o30x |# #x0420 #x0421 #x0422 #x0423 #x0424 #x0425 #x0426 #x0427 + #| #o31x |# #x0428 #x0429 #x042A #x042B #x042C #x042D #x042E #x042F + #| #o32x |# #x0430 #x0431 #x0432 #x0433 #x0434 #x0435 #x0436 #x0437 + #| #o33x |# #x0438 #x0439 #x043A #x043B #x043C #x043D #x043E #x043F + #| #o34x |# #x0440 #x0441 #x0442 #x0443 #x0444 #x0445 #x0446 #x0447 + #| #o35x |# #x0448 #x0449 #x044A #x044B #x044C #x044D #x044E #x044F + #| #o36x |# #x2116 #x0451 #x0452 #x0453 #x0454 #x0455 #x0456 #x0457 + #| #o37x |# #x0458 #x0459 #x045A #x045B #x045C #x00A7 #x045E #x045F) + + (define-8-bit-charset :iso-8859-6 + #| #o00x |# #x0000 #x0001 #x0002 #x0003 #x0004 #x0005 #x0006 #x0007 + #| #o01x |# #x0008 #x0009 #x000A #x000B #x000C #x000A #x000E #x000F + #| #o02x |# #x0010 #x0011 #x0012 #x0013 #x0014 #x0015 #x0016 #x0017 + #| #o03x |# #x0018 #x0019 #x001A #x001B #x001C #x001D #x001E #x001F + #| #o04x |# #x0020 #x0021 #x0022 #x0023 #x0024 #x0025 #x0026 #x0027 + #| #o05x |# #x0028 #x0029 #x002A #x002B #x002C #x002D #x002E #x002F + #| #o06x |# #x0660 #x0661 #x0662 #x0663 #x0664 #x0665 #x0666 #x0667 + #| #o07x |# #x0668 #x0669 #x003A #x003B #x003C #x003D #x003E #x003F + #| #o10x |# #x0040 #x0041 #x0042 #x0043 #x0044 #x0045 #x0046 #x0047 + #| #o11x |# #x0048 #x0049 #x004A #x004B #x004C #x004D #x004E #x004F + #| #o12x |# #x0050 #x0051 #x0052 #x0053 #x0054 #x0055 #x0056 #x0057 + #| #o13x |# #x0058 #x0059 #x005A #x005B #x005C #x005D #x005E #x005F + #| #o14x |# #x0060 #x0061 #x0062 #x0063 #x0064 #x0065 #x0066 #x0067 + #| #o15x |# #x0068 #x0069 #x006A #x006B #x006C #x006D #x006E #x006F + #| #o16x |# #x0070 #x0071 #x0072 #x0073 #x0074 #x0075 #x0076 #x0077 + #| #o17x |# #x0078 #x0079 #x007A #x007B #x007C #x007D #x007E #x007F + #| #o20x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF + #| #o21x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF + #| #o22x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF + #| #o23x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF + #| #o24x |# #x00A0 #xFFFF #xFFFF #xFFFF #x00A4 #xFFFF #xFFFF #xFFFF + #| #o25x |# #xFFFF #xFFFF #xFFFF #xFFFF #x060C #x00AD #xFFFF #xFFFF + #| #o26x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF + #| #o27x |# #xFFFF #xFFFF #xFFFF #x061B #xFFFF #xFFFF #xFFFF #x061F + #| #o30x |# #xFFFF #x0621 #x0622 #x0623 #x0624 #x0625 #x0626 #x0627 + #| #o31x |# #x0628 #x0629 #x062A #x062B #x062C #x062D #x062E #x062F + #| #o32x |# #x0630 #x0631 #x0632 #x0633 #x0634 #x0635 #x0636 #x0637 + #| #o33x |# #x0638 #x0639 #x063A #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF + #| #o34x |# #x0640 #x0641 #x0642 #x0643 #x0644 #x0645 #x0646 #x0647 + #| #o35x |# #x0648 #x0649 #x064A #x064B #x064C #x064D #x064E #x064F + #| #o36x |# #x0650 #x0651 #x0652 #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF + #| #o37x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF) + + (define-8-bit-charset :iso-8859-7 + #| #o00x |# #x0000 #x0001 #x0002 #x0003 #x0004 #x0005 #x0006 #x0007 + #| #o01x |# #x0008 #x0009 #x000A #x000B #x000C #x000A #x000E #x000F + #| #o02x |# #x0010 #x0011 #x0012 #x0013 #x0014 #x0015 #x0016 #x0017 + #| #o03x |# #x0018 #x0019 #x001A #x001B #x001C #x001D #x001E #x001F + #| #o04x |# #x0020 #x0021 #x0022 #x0023 #x0024 #x0025 #x0026 #x0027 + #| #o05x |# #x0028 #x0029 #x002A #x002B #x002C #x002D #x002E #x002F + #| #o06x |# #x0030 #x0031 #x0032 #x0033 #x0034 #x0035 #x0036 #x0037 + #| #o07x |# #x0038 #x0039 #x003A #x003B #x003C #x003D #x003E #x003F + #| #o10x |# #x0040 #x0041 #x0042 #x0043 #x0044 #x0045 #x0046 #x0047 + #| #o11x |# #x0048 #x0049 #x004A #x004B #x004C #x004D #x004E #x004F + #| #o12x |# #x0050 #x0051 #x0052 #x0053 #x0054 #x0055 #x0056 #x0057 + #| #o13x |# #x0058 #x0059 #x005A #x005B #x005C #x005D #x005E #x005F + #| #o14x |# #x0060 #x0061 #x0062 #x0063 #x0064 #x0065 #x0066 #x0067 + #| #o15x |# #x0068 #x0069 #x006A #x006B #x006C #x006D #x006E #x006F + #| #o16x |# #x0070 #x0071 #x0072 #x0073 #x0074 #x0075 #x0076 #x0077 + #| #o17x |# #x0078 #x0079 #x007A #x007B #x007C #x007D #x007E #x007F + #| #o20x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF + #| #o21x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF + #| #o22x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF + #| #o23x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF + #| #o24x |# #x00A0 #x02BD #x02BC #x00A3 #xFFFF #xFFFF #x00A6 #x00A7 + #| #o25x |# #x00A8 #x00A9 #xFFFF #x00AB #x00AC #x00AD #xFFFF #x2015 + #| #o26x |# #x00B0 #x00B1 #x00B2 #x00B3 #x0384 #x0385 #x0386 #x00B7 + #| #o27x |# #x0388 #x0389 #x038A #x00BB #x038C #x00BD #x038E #x038F + #| #o30x |# #x0390 #x0391 #x0392 #x0393 #x0394 #x0395 #x0396 #x0397 + #| #o31x |# #x0398 #x0399 #x039A #x039B #x039C #x039D #x039E #x039F + #| #o32x |# #x03A0 #x03A1 #xFFFF #x03A3 #x03A4 #x03A5 #x03A6 #x03A7 + #| #o33x |# #x03A8 #x03A9 #x03AA #x03AB #x03AC #x03AD #x03AE #x03AF + #| #o34x |# #x03B0 #x03B1 #x03B2 #x03B3 #x03B4 #x03B5 #x03B6 #x03B7 + #| #o35x |# #x03B8 #x03B9 #x03BA #x03BB #x03BC #x03BD #x03BE #x03BF + #| #o36x |# #x03C0 #x03C1 #x03C2 #x03C3 #x03C4 #x03C5 #x03C6 #x03C7 + #| #o37x |# #x03C8 #x03C9 #x03CA #x03CB #x03CC #x03CD #x03CE #xFFFF) + + (define-8-bit-charset :iso-8859-8 + #| #o00x |# #x0000 #x0001 #x0002 #x0003 #x0004 #x0005 #x0006 #x0007 + #| #o01x |# #x0008 #x0009 #x000A #x000B #x000C #x000A #x000E #x000F + #| #o02x |# #x0010 #x0011 #x0012 #x0013 #x0014 #x0015 #x0016 #x0017 + #| #o03x |# #x0018 #x0019 #x001A #x001B #x001C #x001D #x001E #x001F + #| #o04x |# #x0020 #x0021 #x0022 #x0023 #x0024 #x0025 #x0026 #x0027 + #| #o05x |# #x0028 #x0029 #x002A #x002B #x002C #x002D #x002E #x002F + #| #o06x |# #x0030 #x0031 #x0032 #x0033 #x0034 #x0035 #x0036 #x0037 + #| #o07x |# #x0038 #x0039 #x003A #x003B #x003C #x003D #x003E #x003F + #| #o10x |# #x0040 #x0041 #x0042 #x0043 #x0044 #x0045 #x0046 #x0047 + #| #o11x |# #x0048 #x0049 #x004A #x004B #x004C #x004D #x004E #x004F + #| #o12x |# #x0050 #x0051 #x0052 #x0053 #x0054 #x0055 #x0056 #x0057 + #| #o13x |# #x0058 #x0059 #x005A #x005B #x005C #x005D #x005E #x005F + #| #o14x |# #x0060 #x0061 #x0062 #x0063 #x0064 #x0065 #x0066 #x0067 + #| #o15x |# #x0068 #x0069 #x006A #x006B #x006C #x006D #x006E #x006F + #| #o16x |# #x0070 #x0071 #x0072 #x0073 #x0074 #x0075 #x0076 #x0077 + #| #o17x |# #x0078 #x0079 #x007A #x007B #x007C #x007D #x007E #x007F + #| #o20x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF + #| #o21x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF + #| #o22x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF + #| #o23x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF + #| #o24x |# #x00A0 #xFFFF #x00A2 #x00A3 #x00A4 #x00A5 #x00A6 #x00A7 + #| #o25x |# #x00A8 #x00A9 #x00D7 #x00AB #x00AC #x00AD #x00AE #x203E + #| #o26x |# #x00B0 #x00B1 #x00B2 #x00B3 #x00B4 #x00B5 #x00B6 #x00B7 + #| #o27x |# #x00B8 #x00B9 #x00F7 #x00BB #x00BC #x00BD #x00BE #xFFFF + #| #o30x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF + #| #o31x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF + #| #o32x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF + #| #o33x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #x2017 + #| #o34x |# #x05D0 #x05D1 #x05D2 #x05D3 #x05D4 #x05D5 #x05D6 #x05D7 + #| #o35x |# #x05D8 #x05D9 #x05DA #x05DB #x05DC #x05DD #x05DE #x05DF + #| #o36x |# #x05E0 #x05E1 #x05E2 #x05E3 #x05E4 #x05E5 #x05E6 #x05E7 + #| #o37x |# #x05E8 #x05E9 #x05EA #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF) + + (define-8-bit-charset :iso-8859-9 + #| #o00x |# #x0000 #x0001 #x0002 #x0003 #x0004 #x0005 #x0006 #x0007 + #| #o01x |# #x0008 #x0009 #x000A #x000B #x000C #x000A #x000E #x000F + #| #o02x |# #x0010 #x0011 #x0012 #x0013 #x0014 #x0015 #x0016 #x0017 + #| #o03x |# #x0018 #x0019 #x001A #x001B #x001C #x001D #x001E #x001F + #| #o04x |# #x0020 #x0021 #x0022 #x0023 #x0024 #x0025 #x0026 #x0027 + #| #o05x |# #x0028 #x0029 #x002A #x002B #x002C #x002D #x002E #x002F + #| #o06x |# #x0030 #x0031 #x0032 #x0033 #x0034 #x0035 #x0036 #x0037 + #| #o07x |# #x0038 #x0039 #x003A #x003B #x003C #x003D #x003E #x003F + #| #o10x |# #x0040 #x0041 #x0042 #x0043 #x0044 #x0045 #x0046 #x0047 + #| #o11x |# #x0048 #x0049 #x004A #x004B #x004C #x004D #x004E #x004F + #| #o12x |# #x0050 #x0051 #x0052 #x0053 #x0054 #x0055 #x0056 #x0057 + #| #o13x |# #x0058 #x0059 #x005A #x005B #x005C #x005D #x005E #x005F + #| #o14x |# #x0060 #x0061 #x0062 #x0063 #x0064 #x0065 #x0066 #x0067 + #| #o15x |# #x0068 #x0069 #x006A #x006B #x006C #x006D #x006E #x006F + #| #o16x |# #x0070 #x0071 #x0072 #x0073 #x0074 #x0075 #x0076 #x0077 + #| #o17x |# #x0078 #x0079 #x007A #x007B #x007C #x007D #x007E #x007F + #| #o20x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF + #| #o21x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF + #| #o22x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF + #| #o23x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF + #| #o24x |# #x00A0 #x00A1 #x00A2 #x00A3 #x00A4 #x00A5 #x00A6 #x00A7 + #| #o25x |# #x00A8 #x00A9 #x00AA #x00AB #x00AC #x00AD #x00AE #x00AF + #| #o26x |# #x00B0 #x00B1 #x00B2 #x00B3 #x00B4 #x00B5 #x00B6 #x00B7 + #| #o27x |# #x00B8 #x00B9 #x00BA #x00BB #x00BC #x00BD #x00BE #x00BF + #| #o30x |# #x00C0 #x00C1 #x00C2 #x00C3 #x00C4 #x00C5 #x00C6 #x00C7 + #| #o31x |# #x00C8 #x00C9 #x00CA #x00CB #x00CC #x00CD #x00CE #x00CF + #| #o32x |# #x011E #x00D1 #x00D2 #x00D3 #x00D4 #x00D5 #x00D6 #x00D7 + #| #o33x |# #x00D8 #x00D9 #x00DA #x00DB #x00DC #x0130 #x015E #x00DF + #| #o34x |# #x00E0 #x00E1 #x00E2 #x00E3 #x00E4 #x00E5 #x00E6 #x00E7 + #| #o35x |# #x00E8 #x00E9 #x00EA #x00EB #x00EC #x00ED #x00EE #x00EF + #| #o36x |# #x011F #x00F1 #x00F2 #x00F3 #x00F4 #x00F5 #x00F6 #x00F7 + #| #o37x |# #x00F8 #x00F9 #x00FA #x00FB #x00FC #x0131 #x015F #x00FF) + + (define-8-bit-charset :iso-8859-14 + #| #o00x |# #x0000 #x0001 #x0002 #x0003 #x0004 #x0005 #x0006 #x0007 + #| #o01x |# #x0008 #x0009 #x000A #x000B #x000C #x000A #x000E #x000F + #| #o02x |# #x0010 #x0011 #x0012 #x0013 #x0014 #x0015 #x0016 #x0017 + #| #o03x |# #x0018 #x0019 #x001A #x001B #x001C #x001D #x001E #x001F + #| #o04x |# #x0020 #x0021 #x0022 #x0023 #x0024 #x0025 #x0026 #x0027 + #| #o05x |# #x0028 #x0029 #x002A #x002B #x002C #x002D #x002E #x002F + #| #o06x |# #x0030 #x0031 #x0032 #x0033 #x0034 #x0035 #x0036 #x0037 + #| #o07x |# #x0038 #x0039 #x003A #x003B #x003C #x003D #x003E #x003F + #| #o10x |# #x0040 #x0041 #x0042 #x0043 #x0044 #x0045 #x0046 #x0047 + #| #o11x |# #x0048 #x0049 #x004A #x004B #x004C #x004D #x004E #x004F + #| #o12x |# #x0050 #x0051 #x0052 #x0053 #x0054 #x0055 #x0056 #x0057 + #| #o13x |# #x0058 #x0059 #x005A #x005B #x005C #x005D #x005E #x005F + #| #o14x |# #x0060 #x0061 #x0062 #x0063 #x0064 #x0065 #x0066 #x0067 + #| #o15x |# #x0068 #x0069 #x006A #x006B #x006C #x006D #x006E #x006F + #| #o16x |# #x0070 #x0071 #x0072 #x0073 #x0074 #x0075 #x0076 #x0077 + #| #o17x |# #x0078 #x0079 #x007A #x007B #x007C #x007D #x007E #x007F + #| #o20x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF + #| #o21x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF + #| #o22x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF + #| #o23x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF + #| #o24x |# #x00A0 #x1E02 #x1E03 #x00A3 #x010A #x010B #x1E0A #x00A7 + #| #o25x |# #x1E80 #x00A9 #x1E82 #x1E0B #x1EF2 #x00AD #x00AE #x0178 + #| #o26x |# #x1E1E #x1E1F #x0120 #x0121 #x1E40 #x1E41 #x00B6 #x1E56 + #| #o27x |# #x1E81 #x1E57 #x1E83 #x1E60 #x1EF3 #x1E84 #x1E85 #x1E61 + #| #o30x |# #x00C0 #x00C1 #x00C2 #x00C3 #x00C4 #x00C5 #x00C6 #x00C7 + #| #o31x |# #x00C8 #x00C9 #x00CA #x00CB #x00CC #x00CD #x00CE #x00CF + #| #o32x |# #x0174 #x00D1 #x00D2 #x00D3 #x00D4 #x00D5 #x00D6 #x1E6A + #| #o33x |# #x00D8 #x00D9 #x00DA #x00DB #x00DC #x00DD #x0176 #x00DF + #| #o34x |# #x00E0 #x00E1 #x00E2 #x00E3 #x00E4 #x00E5 #x00E6 #x00E7 + #| #o35x |# #x00E8 #x00E9 #x00EA #x00EB #x00EC #x00ED #x00EE #x00EF + #| #o36x |# #x0175 #x00F1 #x00F2 #x00F3 #x00F4 #x00F5 #x00F6 #x1E6B + #| #o37x |# #x00F8 #x00F9 #x00FA #x00FB #x00FC #x00FD #x0177 #x00FF) + + (define-8-bit-charset :iso-8859-15 + #| #o00x |# #x0000 #x0001 #x0002 #x0003 #x0004 #x0005 #x0006 #x0007 + #| #o01x |# #x0008 #x0009 #x000A #x000B #x000C #x000A #x000E #x000F + #| #o02x |# #x0010 #x0011 #x0012 #x0013 #x0014 #x0015 #x0016 #x0017 + #| #o03x |# #x0018 #x0019 #x001A #x001B #x001C #x001D #x001E #x001F + #| #o04x |# #x0020 #x0021 #x0022 #x0023 #x0024 #x0025 #x0026 #x0027 + #| #o05x |# #x0028 #x0029 #x002A #x002B #x002C #x002D #x002E #x002F + #| #o06x |# #x0030 #x0031 #x0032 #x0033 #x0034 #x0035 #x0036 #x0037 + #| #o07x |# #x0038 #x0039 #x003A #x003B #x003C #x003D #x003E #x003F + #| #o10x |# #x0040 #x0041 #x0042 #x0043 #x0044 #x0045 #x0046 #x0047 + #| #o11x |# #x0048 #x0049 #x004A #x004B #x004C #x004D #x004E #x004F + #| #o12x |# #x0050 #x0051 #x0052 #x0053 #x0054 #x0055 #x0056 #x0057 + #| #o13x |# #x0058 #x0059 #x005A #x005B #x005C #x005D #x005E #x005F + #| #o14x |# #x0060 #x0061 #x0062 #x0063 #x0064 #x0065 #x0066 #x0067 + #| #o15x |# #x0068 #x0069 #x006A #x006B #x006C #x006D #x006E #x006F + #| #o16x |# #x0070 #x0071 #x0072 #x0073 #x0074 #x0075 #x0076 #x0077 + #| #o17x |# #x0078 #x0079 #x007A #x007B #x007C #x007D #x007E #x007F + #| #o20x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF + #| #o21x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF + #| #o22x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF + #| #o23x |# #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF #xFFFF + #| #o24x |# #x00A0 #x00A1 #x00A2 #x00A3 #x20AC #x00A5 #x0160 #x00A7 + #| #o25x |# #x0161 #x00A9 #x00AA #x00AB #x00AC #x00AD #x00AE #x00AF + #| #o26x |# #x00B0 #x00B1 #x00B2 #x00B3 #x017D #x00B5 #x00B6 #x00B7 + #| #o27x |# #x017E #x00B9 #x00BA #x00BB #x0152 #x0153 #x0178 #x00BF + #| #o30x |# #x00C0 #x00C1 #x00C2 #x00C3 #x00C4 #x00C5 #x00C6 #x00C7 + #| #o31x |# #x00C8 #x00C9 #x00CA #x00CB #x00CC #x00CD #x00CE #x00CF + #| #o32x |# #x00D0 #x00D1 #x00D2 #x00D3 #x00D4 #x00D5 #x00D6 #x00D7 + #| #o33x |# #x00D8 #x00D9 #x00DA #x00DB #x00DC #x00DD #x00DE #x00DF + #| #o34x |# #x00E0 #x00E1 #x00E2 #x00E3 #x00E4 #x00E5 #x00E6 #x00E7 + #| #o35x |# #x00E8 #x00E9 #x00EA #x00EB #x00EC #x00ED #x00EE #x00EF + #| #o36x |# #x00F0 #x00F1 #x00F2 #x00F3 #x00F4 #x00F5 #x00F6 #x00F7 + #| #o37x |# #x00F8 #x00F9 #x00FA #x00FB #x00FC #x00FD #x00FE #x00FF) + + (define-8-bit-charset :koi8-r + #| #o00x |# #x0000 #x0001 #x0002 #x0003 #x0004 #x0005 #x0006 #x0007 + #| #o01x |# #x0008 #x0009 #x000A #x000B #x000C #x000A #x000E #x000F + #| #o02x |# #x0010 #x0011 #x0012 #x0013 #x0014 #x0015 #x0016 #x0017 + #| #o03x |# #x0018 #x0019 #x001A #x001B #x001C #x001D #x001E #x001F + #| #o04x |# #x0020 #x0021 #x0022 #x0023 #x0024 #x0025 #x0026 #x0027 + #| #o05x |# #x0028 #x0029 #x002A #x002B #x002C #x002D #x002E #x002F + #| #o06x |# #x0030 #x0031 #x0032 #x0033 #x0034 #x0035 #x0036 #x0037 + #| #o07x |# #x0038 #x0039 #x003A #x003B #x003C #x003D #x003E #x003F + #| #o10x |# #x0040 #x0041 #x0042 #x0043 #x0044 #x0045 #x0046 #x0047 + #| #o11x |# #x0048 #x0049 #x004A #x004B #x004C #x004D #x004E #x004F + #| #o12x |# #x0050 #x0051 #x0052 #x0053 #x0054 #x0055 #x0056 #x0057 + #| #o13x |# #x0058 #x0059 #x005A #x005B #x005C #x005D #x005E #x005F + #| #o14x |# #x0060 #x0061 #x0062 #x0063 #x0064 #x0065 #x0066 #x0067 + #| #o15x |# #x0068 #x0069 #x006A #x006B #x006C #x006D #x006E #x006F + #| #o16x |# #x0070 #x0071 #x0072 #x0073 #x0074 #x0075 #x0076 #x0077 + #| #o17x |# #x0078 #x0079 #x007A #x007B #x007C #x007D #x007E #x007F + #| #o20x |# #x2500 #x2502 #x250C #x2510 #x2514 #x2518 #x251C #x2524 + #| #o21x |# #x252C #x2534 #x253C #x2580 #x2584 #x2588 #x258C #x2590 + #| #o22x |# #x2591 #x2592 #x2593 #x2320 #x25A0 #x2219 #x221A #x2248 + #| #o23x |# #x2264 #x2265 #x00A0 #x2321 #x00B0 #x00B2 #x00B7 #x00F7 + #| #o24x |# #x2550 #x2551 #x2552 #x0451 #x2553 #x2554 #x2555 #x2556 + #| #o25x |# #x2557 #x2558 #x2559 #x255A #x255B #x255C #x255D #x255E + #| #o26x |# #x255F #x2560 #x2561 #x0401 #x2562 #x2563 #x2564 #x2565 + #| #o27x |# #x2566 #x2567 #x2568 #x2569 #x256A #x256B #x256C #x00A9 + #| #o30x |# #x044E #x0430 #x0431 #x0446 #x0434 #x0435 #x0444 #x0433 + #| #o31x |# #x0445 #x0438 #x0439 #x043A #x043B #x043C #x043D #x043E + #| #o32x |# #x043F #x044F #x0440 #x0441 #x0442 #x0443 #x0436 #x0432 + #| #o33x |# #x044C #x044B #x0437 #x0448 #x044D #x0449 #x0447 #x044A + #| #o34x |# #x042E #x0410 #x0411 #x0426 #x0414 #x0415 #x0424 #x0413 + #| #o35x |# #x0425 #x0418 #x0419 #x041A #x041B #x041C #x041D #x041E + #| #o36x |# #x041F #x042F #x0420 #x0421 #x0422 #x0423 #x0416 #x0412 + #| #o37x |# #x042C #x042B #x0417 #x0428 #x042D #x0429 #x0427 #x042A) + ) + diff --git a/xml/encodings.lisp b/xml/encodings.lisp new file mode 100644 index 0000000..96e7f98 --- /dev/null +++ b/xml/encodings.lisp @@ -0,0 +1,347 @@ +(in-package :encoding) + +;;;; --------------------------------------------------------------------------- +;;;; Encoding names +;;;; + +(defvar *names* (make-hash-table :test #'eq)) + +(defun canon-name (string) + (with-output-to-string (bag) + (map nil (lambda (ch) + (cond ((char= ch #\_) (write-char #\- bag)) + (t (write-char (char-upcase ch) bag)))) + string))) + +(defun canon-name-2 (string) + (with-output-to-string (bag) + (map nil (lambda (ch) + (cond ((char= ch #\_)) + ((char= ch #\-)) + (t (write-char (char-upcase ch) bag)))) + string))) + +(defmethod encoding-names ((encoding symbol)) + (gethash encoding *names*)) + +(defmethod (setf encoding-names) (new-value (encoding symbol)) + (setf (gethash encoding *names*) new-value)) + +(defun add-name (encoding name) + (pushnew (canon-name name) (encoding-names encoding) :test #'string=)) + +(defun resolve-name (string) + (cond ((symbolp string) + string) + (t + (setq string (canon-name string)) + (or + (block nil + (maphash (lambda (x y) + (when (member string y :test #'string=) + (return x))) + *names*) + nil) + (block nil + (maphash (lambda (x y) + (when (member string y + :test #'(lambda (x y) + (string= (canon-name-2 x) + (canon-name-2 y)))) + (return x))) + *names*) + nil))))) + +;;;; --------------------------------------------------------------------------- +;;;; Encodings +;;;; + +(defvar *encodings* (make-hash-table :test #'eq)) + +(defmacro define-encoding (name init-form) + `(progn + (setf (gethash ',name *encodings*) + (list nil (lambda () ,init-form))) + ',name)) + +(defun find-encoding (name) + (let ((x (gethash (resolve-name name) *encodings*))) + (and x + (or (first x) + (setf (first x) (funcall (second x))))))) + +(defclass encoding () ()) + +(defclass simple-8-bit-encoding (encoding) + ((table :initarg :table))) + +(defun make-simple-8-bit-encoding (&key charset) + (make-instance 'simple-8-bit-encoding + :table (coerce (to-unicode-table charset) '(simple-array (unsigned-byte 16) (256))))) + +;;;;;;; + +(defmacro fx-op (op &rest xs) + `(the fixnum (,op ,@(mapcar (lambda (x) `(the fixnum ,x)) xs)))) +(defmacro fx-pred (op &rest xs) + `(,op ,@(mapcar (lambda (x) `(the fixnum ,x)) xs))) + +(defmacro %+ (&rest xs) `(fx-op + ,@xs)) +(defmacro %- (&rest xs) `(fx-op - ,@xs)) +(defmacro %* (&rest xs) `(fx-op * ,@xs)) +(defmacro %/ (&rest xs) `(fx-op floor ,@xs)) +(defmacro %and (&rest xs) `(fx-op logand ,@xs)) +(defmacro %ior (&rest xs) `(fx-op logior ,@xs)) +(defmacro %xor (&rest xs) `(fx-op logxor ,@xs)) +(defmacro %ash (&rest xs) `(fx-op ash ,@xs)) +(defmacro %mod (&rest xs) `(fx-op mod ,@xs)) + +(defmacro %= (&rest xs) `(fx-pred = ,@xs)) +(defmacro %<= (&rest xs) `(fx-pred <= ,@xs)) +(defmacro %>= (&rest xs) `(fx-pred >= ,@xs)) +(defmacro %< (&rest xs) `(fx-pred < ,@xs)) +(defmacro %> (&rest xs) `(fx-pred > ,@xs)) + +(defmethod decode-sequence ((encoding (eql :utf-16-big-endian)) + in in-start in-end out out-start out-end eof?) + ;; -> new wptr, new rptr + (let ((wptr out-start) + (rptr in-start)) + (loop + (when (%= wptr out-end) + (return)) + (when (>= (%+ rptr 1) in-end) + (return)) + (let ((hi (aref in rptr)) + (lo (aref in (%+ 1 rptr)))) + (setf rptr (%+ 2 rptr)) + (setf (aref out wptr) (logior (ash hi 8) lo)) + (setf wptr (%+ 1 wptr)))) + (values wptr rptr))) + +(defmethod decode-sequence ((encoding (eql :utf-16-little-endian)) + in in-start in-end out out-start out-end eof?) + ;; -> new wptr, new rptr + (let ((wptr out-start) + (rptr in-start)) + (loop + (when (%= wptr out-end) + (return)) + (when (>= (%+ rptr 1) in-end) + (return)) + (let ((lo (aref in (%+ 0 rptr))) + (hi (aref in (%+ 1 rptr)))) + (setf rptr (%+ 2 rptr)) + (setf (aref out wptr) (logior (ash hi 8) lo)) + (setf wptr (%+ 1 wptr)))) + (values wptr rptr))) + +(defmethod decode-sequence ((encoding (eql :utf-8)) + in in-start in-end out out-start out-end eof?) + (declare (optimize (speed 3) (safety 0)) + (type (simple-array (unsigned-byte 8) (*)) in) + (type (simple-array rune (*)) out) + (type fixnum in-start in-end out-start out-end)) + (let ((wptr out-start) + (rptr in-start) + byte0) + (macrolet ((put (x) + `((lambda (x) + (cond ((or (<= #xD800 x #xDBFF) + (<= #xDC00 x #xDFFF)) + (error "Encoding UTF-16 in UTF-8? : #x~x." x))) + '(unless (data-char-p x) + (error "#x~x is not a data character." x)) + ;;(fresh-line) + ;;(prin1 x) (princ "-> ") + (cond ((%> x #xFFFF) + (setf (aref out (%+ 0 wptr)) (%+ #xD7C0 (ash x -10)) + (aref out (%+ 1 wptr)) (%ior #xDC00 (%and x #x3FF))) + (setf wptr (%+ wptr 2))) + (t + (setf (aref out wptr) x) + (setf wptr (%+ wptr 1))))) + ,x)) + (put1 (x) + `(progn + (setf (aref out wptr) ,x) + (setf wptr (%+ wptr 1))))) + (loop + (when (%= (+ wptr 1) out-end) (return)) + (when (%>= rptr in-end) (return)) + (setq byte0 (aref in rptr)) + (cond ((= byte0 #x0D) + ;; CR handling + ;; we need to know the following character + (cond ((>= (%+ rptr 1) in-end) + ;; no characters in buffer + (cond (eof? + ;; at EOF, pass it as NL + (put #x0A) + (setf rptr (%+ rptr 1))) + (t + ;; demand more characters + (return)))) + ((= (aref in (%+ rptr 1)) #x0A) + ;; we see CR NL, so forget this CR and the next NL will be + ;; inserted literally + (setf rptr (%+ rptr 1))) + (t + ;; singleton CR, pass it as NL + (put #x0A) + (setf rptr (%+ rptr 1))))) + + ((%<= #|#b00000000|# byte0 #b01111111) + (put1 byte0) + (setf rptr (%+ rptr 1))) + + ((%<= #|#b10000000|# byte0 #b10111111) + (error "Corrupted UTF-8 input (initial byte was #b~8,'0B)" byte0) + (setf rptr (%+ rptr 1))) + + ((%<= #|#b11000000|# byte0 #b11011111) + (cond ((< (%+ rptr 2) in-end) + (put + (dpb (ldb (byte 5 0) byte0) (byte 5 6) + (dpb (ldb (byte 6 0) (aref in (%+ rptr 1))) (byte 6 0) + 0))) + (setf rptr (%+ rptr 2))) + (t + (return)))) + + ((%<= #|#b11100000|# byte0 #b11101111) + (cond ((< (%+ rptr 3) in-end) + (put + (dpb (ldb (byte 4 0) byte0) (byte 4 12) + (dpb (ldb (byte 6 0) (aref in (%+ 1 rptr))) (byte 6 6) + (dpb (ldb (byte 6 0) (aref in (%+ 2 rptr))) (byte 6 0) + 0)))) + (setf rptr (%+ rptr 3))) + (t + (return)))) + + ((%<= #|#b11110000|# byte0 #b11110111) + (cond ((< (%+ rptr 4) in-end) + (put + (dpb (ldb (byte 3 0) byte0) (byte 3 18) + (dpb (ldb (byte 6 0) (aref in (%+ 1 rptr))) (byte 6 12) + (dpb (ldb (byte 6 0) (aref in (%+ 2 rptr))) (byte 6 6) + (dpb (ldb (byte 6 0) (aref in (%+ 3 rptr))) (byte 6 0) + 0))))) + (setf rptr (%+ rptr 4))) + (t + (return)))) + + ((%<= #|#b11111000|# byte0 #b11111011) + (cond ((< (%+ rptr 5) in-end) + (put + (dpb (ldb (byte 2 0) byte0) (byte 2 24) + (dpb (ldb (byte 6 0) (aref in (%+ 1 rptr))) (byte 6 18) + (dpb (ldb (byte 6 0) (aref in (%+ 2 rptr))) (byte 6 12) + (dpb (ldb (byte 6 0) (aref in (%+ 3 rptr))) (byte 6 6) + (dpb (ldb (byte 6 0) (aref in (%+ 4 rptr))) (byte 6 0) + 0)))))) + (setf rptr (%+ rptr 5))) + (t + (return)))) + + ((%<= #|#b11111100|# byte0 #b11111101) + (cond ((< (%+ rptr 6) in-end) + (put + (dpb (ldb (byte 1 0) byte0) (byte 1 30) + (dpb (ldb (byte 6 0) (aref in (%+ 1 rptr))) (byte 6 24) + (dpb (ldb (byte 6 0) (aref in (%+ 2 rptr))) (byte 6 18) + (dpb (ldb (byte 6 0) (aref in (%+ 3 rptr))) (byte 6 12) + (dpb (ldb (byte 6 0) (aref in (%+ 4 rptr))) (byte 6 6) + (dpb (ldb (byte 6 0) (aref in (%+ 5 rptr))) (byte 6 0) + 0))))))) + (setf rptr (%+ rptr 6))) + (t + (return)))) + + (t + (error "Corrupted UTF-8 input (initial byte was #b~8,'0B)" byte0)) ) )) + (values wptr rptr)) ) + +(defmethod encoding-p ((object (eql :utf-16-little-endian))) t) +(defmethod encoding-p ((object (eql :utf-16-big-endian))) t) +(defmethod encoding-p ((object (eql :utf-8))) t) + +(defmethod encoding-p ((object encoding)) t) + +(defmethod decode-sequence ((encoding simple-8-bit-encoding) + in in-start in-end + out out-start out-end + eof?) + (declare (optimize (speed 3) (safety 0)) + (type (simple-array (unsigned-byte 8) (*)) in) + (type (simple-array rune (*)) out) + (type fixnum in-start in-end out-start out-end)) + (let ((wptr out-start) + (rptr in-start) + (byte 0) + (table (slot-value encoding 'table))) + (declare (type fixnum wptr rptr) + (type (unsigned-byte 8) byte) + (type (simple-array (unsigned-byte 16) (*)) table)) + (loop + (when (%= wptr out-end) (return)) + (when (%>= rptr in-end) (return)) + (setq byte (aref in rptr)) + (cond ((= byte #x0D) + ;; CR handling + ;; we need to know the following character + (cond ((>= (%+ rptr 1) in-end) + ;; no characters in buffer + (cond (eof? + ;; at EOF, pass it as NL + (setf (aref out wptr) #x0A) + (setf wptr (%+ wptr 1)) + (setf rptr (%+ rptr 1))) + (t + ;; demand more characters + (return)))) + ((= (aref in (%+ rptr 1)) #x0A) + ;; we see CR NL, so forget this CR and the next NL will be + ;; inserted literally + (setf rptr (%+ rptr 1))) + (t + ;; singleton CR, pass it as NL + (setf (aref out wptr) #x0A) + (setf wptr (%+ wptr 1)) + (setf rptr (%+ rptr 1))))) + + (t + (setf (aref out wptr) (aref table byte)) + (setf wptr (%+ wptr 1)) + (setf rptr (%+ rptr 1))) )) + (values wptr rptr))) + +;;;; --------------------------------------------------------------------------- +;;;; Character sets +;;;; + +(defvar *charsets* (make-hash-table :test #'eq)) + +(defclass 8-bit-charset () + ((name :initarg :name) + (to-unicode-table + :initarg :to-unicode-table + :reader to-unicode-table))) + +(defmacro define-8-bit-charset (name &rest codes) + (assert (= 256 (length codes))) + `(progn + (setf (gethash ',name *charsets*) + (make-instance '8-bit-charset + :name ',name + :to-unicode-table + ',(make-array 256 + :element-type '(unsigned-byte 16) + :initial-contents codes))) + ',name)) + +(defun find-charset (name) + (or (gethash name *charsets*) + (error "There is no character set named ~S." name))) + diff --git a/xml/package.lisp b/xml/package.lisp new file mode 100644 index 0000000..6722810 --- /dev/null +++ b/xml/package.lisp @@ -0,0 +1,41 @@ +(in-package :cl-user) + +(defpackage :Encoding + (:use :glisp) + (:export + #:find-encoding + #:decode-sequence)) + +(defpackage :XML + (:use + :glisp + :encoding) + + (:Export + ;; xstreams + #:make-xstream + #:make-rod-xstream + #:close-xstream + #:read-rune + #:peek-rune + #:unread-rune + #:fread-rune + #:fpeek-rune + #:xstream-position + #:xstream-line-number + #:xstream-column-number + #:xstream-plist + #:xstream-encoding + + ;; xstream controller protocol + #:read-octects + #:xstream/close + + #:attribute-namespace-uri + #:attribute-local-name + #:attribute-qname + #:attribute-value + + #:parse-file + #:parse-stream + #:parse-string) ) diff --git a/xml/recoder.lisp b/xml/recoder.lisp new file mode 100644 index 0000000..e8c5093 --- /dev/null +++ b/xml/recoder.lisp @@ -0,0 +1,110 @@ +(in-package :cxml) + +(defclass recoder () + ((recoder :initarg :recoder :accessor recoder) + (chained-handler :initarg :chained-handler :accessor chained-handler))) + +(defun make-recoder (chained-handler &optional (recoder-fn #'rod-string)) + (make-instance 'recoder + :recoder recoder-fn + :chained-handler chained-handler)) + +(macrolet ((%string (rod) + `(let ((rod ,rod)) + (if (typep rod '(or rod string)) + (funcall (recoder handler) rod) + rod))) + (defwrapper (name (&rest args) &rest forms) + `(defmethod ,name ((handler recoder) ,@args) + (,name (chained-handler handler) ,@forms)))) + (defwrapper sax:start-document ()) + + (defwrapper sax:start-element + (namespace-uri local-name qname attributes) + (%string namespace-uri) + (%string local-name) + (%string qname) + (mapcar (lambda (attr) + (sax:make-attribute + :namespace-uri (%string (sax:attribute-namespace-uri attr)) + :local-name (%string (sax:attribute-local-name attr)) + :qname (%string (sax:attribute-qname attr)) + :value (%string (sax:attribute-value attr)) + :specified-p (sax:attribute-specified-p attr))) + attributes)) + + (defwrapper sax:start-prefix-mapping (prefix uri) + (%string prefix) + (%string uri)) + + (defwrapper sax:characters (data) + (%string data)) + + (defwrapper sax:processing-instruction (target data) + (%string target) + (%string data)) + + (defwrapper sax:end-prefix-mapping (prefix) + (%string prefix)) + + (defwrapper sax:end-element (namespace-uri local-name qname) + (%string namespace-uri) + (%string local-name) + (%string qname)) + + (defwrapper sax:end-document ()) + + (defwrapper sax:comment (data) + (%string data)) + + (defwrapper sax:start-cdata ()) + + (defwrapper sax:end-cdata ()) + + (defwrapper sax:start-dtd (name public-id system-id) + (%string name) + (%string public-id) + (%string system-id)) + + (defwrapper sax:end-dtd ()) + + (defwrapper sax:unparsed-entity-declaration + (name public-id system-id notation-name) + (%string name) + (%string public-id) + (%string system-id) + (%string notation-name)) + + (defwrapper sax:external-entity-declaration + (kind name public-id system-id) + (%string kind) + (%string name) + (%string public-id) + (%string system-id)) + + (defwrapper sax:internal-entity-declaration + (kind name value) + kind + (%string name) + (%string value)) + + (defwrapper sax:notation-declaration + (name public-id system-id) + (%string name) + (%string public-id) + (%string system-id)) + + (defwrapper sax:element-declaration (name model) + (%string name) + model) + + (defwrapper sax:attribute-declaration + (element-name attribute-name type default) + (%string element-name) + (%string attribute-name) + (%string type) + (%string default)) + + (defwrapper sax:entity-resolver + (resolver) + resolver)) diff --git a/xml/sax-handler.lisp b/xml/sax-handler.lisp new file mode 100644 index 0000000..4fa91e4 --- /dev/null +++ b/xml/sax-handler.lisp @@ -0,0 +1,227 @@ +;;; -*- Mode: Lisp; Syntax: Common-Lisp; Package: SAX; readtable: glisp; Encoding: utf-8; -*- +;;; --------------------------------------------------------------------------- +;;; Title: A SAX2-like API for the xml parser +;;; Created: 2003-06-30 +;;; Author: Henrik Motakef +;;; License: BSD +;;; --------------------------------------------------------------------------- +;;; © copyright 2003 by Henrik Motakef + +;;; Redistribution and use in source and binary forms, with or without +;;; modification, are permitted provided that the following conditions are +;;; met: +;;; +;;; 1. Redistributions of source code must retain the above copyright +;;; notice, this list of conditions and the following disclaimer. +;;; +;;; 2. Redistributions in binary form must reproduce the above copyright +;;; notice, this list of conditions and the following disclaimer in the +;;; documentation and/or other materials provided with the distribution +;;; +;;; THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED +;;; WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +;;; MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +;;; IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, +;;; INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +;;; (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +;;; SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +;;; HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, +;;; STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING +;;; IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +;;; POSSIBILITY OF SUCH DAMAGE. + +;;; TODO/ Open Questions: + +;; o Should there be a predefined "handler" class, or even several +;; (like Java SAX' ContentHandler, DTDHandler, LexicalHandler etc? I +;; don't really see why. +;; o Missing stuff from Java SAX2: +;; * ignorable-whitespace +;; * document-locator/(setf document-locator) +;; (probably implies a handler class with an appropriate slot) +;; * skipped-entity +;; * notation-declaration +;; * unparsed-entity-declaration +;; * The whole ErrorHandler class, this is better handled using +;; conditions (but isn't yet) +;; * The LexicalHandler (start-cdata etc) would be nice +;; * The DeclHandler interface (element-decl, attribute-decl...) +;; is useful, but the Java interface sucks. +;; o Despite all the namespace-uri etc arguments, namespaces are not +;; really supported yet, the xml parser always passes nil. This will +;; hopefully change Real Soon Now, and I didn't want to have to +;; rewrite the interface then + +(defpackage :sax + (:use :common-lisp) + (:export #:*namespace-processing* + #:*include-xmlns-attributes* + #:*use-xmlns-namespace* + + #:start-document + #:start-prefix-mapping + #:start-element + #:characters + #:processing-instruction + #:end-element + #:end-prefix-mapping + #:end-document + #:comment + #:start-cdata + #:end-cdata)) + +(in-package :sax) + +;; The http://xml.org/sax/features/namespaces property +(defvar *namespace-processing* t + "If non-nil (the default), namespace processing is enabled. + +See also `start-element' and `end-element' for a detailed description +of the consequences of modifying this variable, and +`*include-xmlns-attributes*' and `*use-xmlns-namespace*' for further +related options.") + +;; The http://xml.org/sax/features/namespace-prefixes property. +(defvar *include-xmlns-attributes* nil + "If non-nil, namespace declarations are reported as normal +attributes. + +This variable has no effect unless `*namespace-processing*' is +non-nil. + +See also `*use-xmlns-namespace*', and `start-element' for a detailed +description of the consequences of setting this variable.") + +(defvar *use-xmlns-namespace* nil + "If this variable is nil (the default), attributes with a name like +'xmlns:x' are not considered to be in a namespace, following the +'Namespaces in XML' specification. + +If it is non-nil, such attributes are considered to be in a namespace +with the URI 'http://www.w3.org/2000/xmlns/', following an +incompatible change silently introduced in the errata to that spec, +and adopted by some W3C standards. + +For example, an attribute like xmlns:ex='http://example.com' would be +reported like this: + +*use-xmlns-namespace*: nil +namespace-uri: nil +local-name: nil +qname: #\"xmlns:ex\" + +*use-xmlns-namespace*: t +namespace-uri: #\"http://www.w3.org/2000/xmlns/\" +local-name: #\"ex\" +qname: #\"xmlns:ex\" + +Setting this variable has no effect unless both +`*namespace-processing*' and `*include-xmlns-attributes*' are non-nil.") + +(defgeneric start-document (handler) + (:documentation "Called at the beginning of the parsing process, +before any element, processing instruction or comment is reported. + +Handlers that need to maintain internal state may use this to perform +any neccessary initializations.") + (:method ((handler t)) nil)) + +;; How should attributes be represented? +;; Currently its just a (name . value) alist, but this isn't too +;; useful wrt namespaced attributes. Probably a struct. +(defgeneric start-element (handler namespace-uri local-name qname attributes) + (:documentation "Called to report the beginning of an element. + +There will always be a corresponding call to end-element, even in the +case of an empty element (i.e. ). + +If the value of *namespaces* is non-nil, namespace-uri, local-name and +qname are rods. If it is nil, namespace-uri and local-name are always +nil, and it is not an error if the qname is not a well-formed +qualified element name (for example, if it contains more than one +colon). + +The attributes parameter is a list (in arbitrary order) of instances +of the `attribute' structure class. The for their namespace-uri and +local-name properties, the same rules as for the element name +apply. Additionally, namespace-declaring attributes (those whose name +is \"xmlns\" or starts with \"xmlns:\") are only included if +*namespace-prefixes* is non-nil.") + (:method ((handler t) namespace-uri local-name qname attributes) nil)) + +(defgeneric start-prefix-mapping (handler prefix uri) + (:documentation "Called when the scope of a new prefix -> namespace-uri mapping begins. + +This will always be called immediatly before the `start-element' event +for the element on which the namespaces are declared. + +Clients don't usually have to implement this except under special +circumstances, for example when they have to deal with qualified names +in textual content. The parser will handle namespaces of elements and +attributes on its own.") + (:method ((handler t) prefix uri) nil)) + +(defgeneric characters (handler data) + (:documentation "Called for textual element content. + +The data is passed as a rod, with all entity references resolved. +It is possible that the character content of an element is reported +via multiple subsequent calls to this generic function.") + (:method ((handler t) data) nil)) + +(defgeneric processing-instruction (handler target data) + (:documentation "Called when a processing instruction is read. + +Both target and data are rods.") + (:method ((handler t) target data) nil)) + +(defgeneric end-prefix-mapping (handler prefix) + (:documentation "Called when a prefix -> namespace-uri mapping goes out of scope. + +This will always be called immediatly after the `end-element' event +for the element on which the namespace is declared. The order of the +end-prefix-mapping events is otherwise not guaranteed. + +Clients don't usually have to implement this except under special +circumstances, for example when they have to deal with qualified names +in textual content. The parser will handle namespaces of elements and +attributes on its own.") + (:method ((handler t) prefix) nil)) + +(defgeneric end-element (handler namespace-uri local-name qname) + (:documentation "Called to report the end of an element. + +See the documentation for `start-element' for a description of the +parameters.") + (:method ((handler t) namespace-uri local-name qname) nil)) + +(defgeneric end-document (handler) + (:documentation "Called at the end of parsing a document. +This is always the last function called in the parsing process. + +In contrast to all of the other methods, the return value of this gf +is significant, it will be returned by the parse-file/stream/string function.") + (:method ((handler t)) nil)) + +;; LexicalHandler + +(defgeneric comment (handler data) + (:method ((handler t) data) nil)) + +(defgeneric start-cdata (handler) + (:documentation "Called at the beginning of parsing a CDATA section. + +Handlers only have to implement this if they are interested in the +lexical structure of the parsed document. The content of the CDATA +section is reported via the `characters' generic function like all +other textual content.") + (:method ((handler t)) nil)) + +(defgeneric end-cdata (handler) + (:documentation "Called at the end of parsing a CDATA section. + +Handlers only have to implement this if they are interested in the +lexical structure of the parsed document. The content of the CDATA +section is reported via the `characters' generic function like all +other textual content.") + (:method ((handler t)) nil)) \ No newline at end of file diff --git a/xml/sax-proxy.lisp b/xml/sax-proxy.lisp new file mode 100644 index 0000000..a6a9915 --- /dev/null +++ b/xml/sax-proxy.lisp @@ -0,0 +1,38 @@ +;;;; sax-proxy.lisp +;;;; +;;;; This file is part of the CXML parser, released under (L)LGPL. +;;;; See file COPYING for details. +;;;; +;;;; Copyright (c) 2004 David Lichteblau +;;;; Author: David Lichteblau + +(in-package :cxml) + +(defclass sax-proxy () + ((chained-handler :initform nil + :initarg :chained-handler + :accessor proxy-chained-handler))) + +(macrolet ((define-proxy-method (name (&rest args)) + `(defmethod ,name ((handler sax-proxy) ,@args) + (,name (proxy-chained-handler handler) ,@args)))) + (define-proxy-method sax:start-document ()) + (define-proxy-method sax:start-element (uri lname qname attributes)) + (define-proxy-method sax:start-prefix-mapping (prefix uri)) + (define-proxy-method sax:characters (data)) + (define-proxy-method sax:processing-instruction (target data)) + (define-proxy-method sax:end-prefix-mapping (prefix)) + (define-proxy-method sax:end-element (namespace-uri local-name qname)) + (define-proxy-method sax:end-document ()) + (define-proxy-method sax:comment (data)) + (define-proxy-method sax:start-cdata ()) + (define-proxy-method sax:end-cdata ()) + (define-proxy-method sax:start-dtd (name public-id system-id)) + (define-proxy-method sax:end-dtd ()) + (define-proxy-method sax:unparsed-entity-declaration (name pub sys not)) + (define-proxy-method sax:external-entity-declaration (kind name pub sys)) + (define-proxy-method sax:internal-entity-declaration (kind name value)) + (define-proxy-method sax:notation-declaration (name public-id system-id)) + (define-proxy-method sax:element-declaration (name model)) + (define-proxy-method sax:attribute-declaration (elt attr type default)) + (define-proxy-method sax:entity-resolver (resolver))) diff --git a/xml/sax-tests/event-collecting-handler.lisp b/xml/sax-tests/event-collecting-handler.lisp new file mode 100644 index 0000000..e03c6eb --- /dev/null +++ b/xml/sax-tests/event-collecting-handler.lisp @@ -0,0 +1,37 @@ +(in-package :sax-tests) + +(defclass event-collecting-handler () + ((event-list :initform '() :accessor event-list))) + +(defmethod start-document ((handler event-collecting-handler)) + (push (list :start-document) (event-list handler))) + +(defmethod start-element ((handler event-collecting-handler) ns-uri local-name qname attrs) + (push (list :start-element ns-uri local-name qname attrs) + (event-list handler))) + +(defmethod start-prefix-mapping ((handler event-collecting-handler) prefix uri) + (push (list :start-prefix-mapping prefix uri) + (event-list handler))) + +(defmethod characters ((handler event-collecting-handler) data) + (push (list :characters data) + (event-list handler))) + +(defmethod processing-instruction ((handler event-collecting-handler) target data) + (push (list :processing-instruction target data) + (event-list handler))) + +(defmethod end-prefix-mapping ((handler event-collecting-handler) prefix) + (push (list :end-prefix-mapping prefix) + (event-list handler))) + +(defmethod end-element ((handler event-collecting-handler) namespace-uri local-name qname) + (push (list :end-element namespace-uri local-name qname) + (event-list handler))) + +(defmethod end-document ((handler event-collecting-handler)) + (push (list :end-document) + (event-list handler)) + + (nreverse (event-list handler))) \ No newline at end of file diff --git a/xml/sax-tests/package.lisp b/xml/sax-tests/package.lisp new file mode 100644 index 0000000..b81099b --- /dev/null +++ b/xml/sax-tests/package.lisp @@ -0,0 +1,4 @@ +(defpackage :sax-tests + (:use :cl :xml :sax :glisp :rt) + (:export #:event-collecting-handler)) + diff --git a/xml/sax-tests/tests.lisp b/xml/sax-tests/tests.lisp new file mode 100644 index 0000000..7d39069 --- /dev/null +++ b/xml/sax-tests/tests.lisp @@ -0,0 +1,332 @@ +(in-package :sax-tests) + +(defun first-start-element-event (string) + (let ((events (xml:parse-string string (make-instance 'event-collecting-handler)))) + (find :start-element events :key #'car))) + + +;;; Attribute handling + +(deftest no-default-namespace-for-attributes + (let* ((evt (first-start-element-event "")) + (attr (car (fifth evt)))) + (values + (attribute-namespace-uri attr) + (attribute-local-name attr))) + nil nil) + +(deftest attribute-uniqueness-1 + (handler-case + (xml:parse-string "") + (error () t) + (:no-error (&rest junk) + (declare (ignore junk)) + nil)) + t) + +(deftest attribute-uniqueness-2 + (handler-case + (xml:parse-string "") + (error () nil) + (:no-error (&rest junk) + (declare (ignore junk)) + t)) + t) + +(deftest attribute-uniqueness-3 + (let ((sax:*namespace-processing* nil)) + (handler-case + (xml:parse-string "") + (error () nil) + (:no-error (&rest junk) + (declare (ignore junk)) + t))) + t) + +;;; Namespace undeclaring + +(deftest undeclare-default-namespace-1 + (let* ((evts (xml:parse-string "" + (make-instance 'event-collecting-handler))) + (start-elt-events (remove :start-element evts :test (complement #'eql) :key #'car)) + (evt1 (first start-elt-events)) + (evt2 (second start-elt-events ))) + (values + (rod= #"http://example.com" (second evt1)) + (second evt2) + (third evt2))) + t nil nil) + +(deftest undeclare-other-namespace + (handler-case + (xml:parse-string "") + (error () t) + (:no-error (&rest junk) + (declare (ignore junk)) + nil)) + t) + + +;;; Require names otherwise totally out of scope of the xmlns rec to be NcNames for no reason + +(deftest pi-names-are-ncnames-when-namespace-processing-1 + (handler-case + (xml:parse-string "") + (error () t) + (:no-error (&rest junk) + (declare (ignore junk)) + nil)) + t) + +(deftest pi-names-are-ncnames-when-namespace-processing-2 + (let ((sax:*namespace-processing* nil)) + (handler-case + (xml:parse-string "") + (error () nil) + (:no-error (&rest junk) + (declare (ignore junk)) + t))) + t) + +(deftest entity-names-are-ncnames-when-namespace-processing-1 + (handler-case + (xml:parse-string " ]>&y:z;") + (error () t) + (:no-error (&rest junk) + (declare (ignore junk)) + nil)) + t) + +(deftest entity-names-are-ncnames-when-namespace-processing-2 + (handler-case + (xml:parse-string " ]>") + (error () t) + (:no-error (&rest junk) + (declare (ignore junk)) + nil)) + t) + +(deftest entity-names-are-ncnames-when-namespace-processing-3 + (let ((sax:*namespace-processing* nil)) + (handler-case + (xml:parse-string " ]>&y:z;") + (error () nil) + (:no-error (&rest junk) + (declare (ignore junk)) + t))) + t) + +(deftest entity-names-are-ncnames-when-namespace-processing-4 + (let ((sax:*namespace-processing* nil)) + (handler-case + (xml:parse-string " ]>") + (error () nil) + (:no-error (&rest junk) + (declare (ignore junk)) + t))) + t) + +;;; Inclusion of xmlns attributes + +(deftest xmlns-attr-include-1 + (let* ((sax:*namespace-processing* t) + (sax:*include-xmlns-attributes* t) + (evt (first-start-element-event "")) + (attrs (fifth evt))) + (length attrs)) + 1) + +(deftest xmlns-attr-discard-1 + (let* ((sax:*namespace-processing* t) + (sax:*include-xmlns-attributes* nil) + (evt (first-start-element-event "")) + (attrs (fifth evt))) + (length attrs)) + 0) + +;;; Namespace of xmlns attributes + +(deftest xmlns-attr-ns-uri-1 + (let* ((sax:*namespace-processing* t) + (sax:*include-xmlns-attributes* t) + (sax:*use-xmlns-namespace* nil) + (evt (first-start-element-event "")) + (attrs (fifth evt))) + (attribute-namespace-uri (car attrs))) + nil) + +(deftest xmlns-attr-ns-uri-2 + (let* ((sax:*namespace-processing* t) + (sax:*include-xmlns-attributes* t) + (sax:*use-xmlns-namespace* nil) + (evt (first-start-element-event "")) + (attrs (fifth evt))) + (attribute-namespace-uri (car attrs))) + nil) + +(deftest xmlns-attr-ns-uri-3 + (let* ((sax:*namespace-processing* t) + (sax:*include-xmlns-attributes* t) + (sax:*use-xmlns-namespace* t) + (evt (first-start-element-event "")) + (attrs (fifth evt))) + (attribute-namespace-uri (car attrs))) + nil) + +(deftest xmlns-attr-ns-uri-4 + (let* ((sax:*namespace-processing* t) + (sax:*include-xmlns-attributes* t) + (sax:*use-xmlns-namespace* t) + (evt (first-start-element-event "")) + (attrs (fifth evt))) + (rod= #"http://www.w3.org/2000/xmlns/" (attribute-namespace-uri (car attrs)))) + t) + +(deftest xmlns-attr-ns-local-name-1 + (let* ((sax:*namespace-processing* t) + (sax:*include-xmlns-attributes* t) + (sax:*use-xmlns-namespace* nil) + (evt (first-start-element-event "")) + (attrs (fifth evt))) + (attribute-local-name (car attrs))) + nil) + +(deftest xmlns-attr-ns-local-name-2 + (let* ((sax:*namespace-processing* t) + (sax:*include-xmlns-attributes* t) + (sax:*use-xmlns-namespace* nil) + (evt (first-start-element-event "")) + (attrs (fifth evt))) + (attribute-local-name (car attrs))) + nil) + +(deftest xmlns-attr-ns-local-name-3 + (let* ((sax:*namespace-processing* t) + (sax:*include-xmlns-attributes* t) + (sax:*use-xmlns-namespace* t) + (evt (first-start-element-event "")) + (attrs (fifth evt))) + (attribute-local-name (car attrs))) + nil) + +(deftest xmlns-attr-ns-local-name-4 + (let* ((sax:*namespace-processing* t) + (sax:*include-xmlns-attributes* t) + (sax:*use-xmlns-namespace* t) + (evt (first-start-element-event "")) + (attrs (fifth evt))) + (rod= #"foo" (attribute-local-name (car attrs)))) + t) + +(deftest xmlns-attr-qname-1 + (let* ((sax:*namespace-processing* t) + (sax:*include-xmlns-attributes* t) + (sax:*use-xmlns-namespace* nil) + (evt (first-start-element-event "")) + (attrs (fifth evt))) + (rod= #"xmlns" (attribute-qname (car attrs)))) + t) + +(deftest xmlns-attr-qname-2 + (let* ((sax:*namespace-processing* t) + (sax:*include-xmlns-attributes* t) + (sax:*use-xmlns-namespace* nil) + (evt (first-start-element-event "")) + (attrs (fifth evt))) + (rod= #"xmlns:foo" (attribute-qname (car attrs)))) + t) + +(deftest xmlns-attr-qname-4 + (let* ((sax:*namespace-processing* t) + (sax:*include-xmlns-attributes* t) + (sax:*use-xmlns-namespace* t) + (evt (first-start-element-event "")) + (attrs (fifth evt))) + (rod= #"xmlns" (attribute-qname (car attrs)))) + t) + +(deftest xmlns-attr-qname-4 + (let* ((sax:*namespace-processing* t) + (sax:*include-xmlns-attributes* t) + (sax:*use-xmlns-namespace* t) + (evt (first-start-element-event "")) + (attrs (fifth evt))) + (rod= #"xmlns:foo" (attribute-qname (car attrs)))) + t) + + +;;; Predefined Namespaces + +(deftest redefine-xml-namespace-1 + (handler-case + (xml:parse-string "") + (error () nil) + (:no-error (&rest junk) + (declare (ignore junk)) + t)) + t) + +(deftest redefine-xml-namespace-2 + (handler-case + (xml:parse-string "") + (error () t) + (:no-error (&rest junk) + (declare (ignore junk)) + nil)) + t) + +(deftest redefine-xml-namespace-3 + (handler-case + (xml:parse-string "") + (error () t) + (:no-error (&rest junk) + (declare (ignore junk)) + nil)) + t) + +(deftest redefine-xml-namespace-4 + (handler-case + (xml:parse-string "") + (error () t) + (:no-error (&rest junk) + (declare (ignore junk)) + nil)) + t) + +(deftest redefine-xmlns-namespace-1 + (handler-case + (xml:parse-string "") + (error () t) + (:no-error (&rest junk) + (declare (ignore junk)) + nil)) + t) + +(deftest redefine-xmlns-namespace-2 + (handler-case + (xml:parse-string "") + (error () t) + (:no-error (&rest junk) + (declare (ignore junk)) + nil)) + t) + +(deftest redefine-xmlns-namespace-3 + (handler-case + (xml:parse-string "") + (error () t) + (:no-error (&rest junk) + (declare (ignore junk)) + nil)) + t) + +(deftest redefine-xmlns-namespace-4 + (handler-case + (xml:parse-string "") + (error () t) + (:no-error (&rest junk) + (declare (ignore junk)) + nil)) + t) + + diff --git a/xml/split-sequence.lisp b/xml/split-sequence.lisp new file mode 100644 index 0000000..e1a7d8d --- /dev/null +++ b/xml/split-sequence.lisp @@ -0,0 +1,44 @@ +;;; This code was based on Arthur Lemmens' in +;;; ; + +(in-package :cxml) + +(defun split-sequence-if (predicate seq &key (count nil) (remove-empty-subseqs nil) (from-end nil) (start 0) (end nil) (key nil key-supplied)) + (let ((len (length seq)) + (other-keys (when key-supplied + (list :key key)))) + (unless end (setq end len)) + (if from-end + (loop for right = end then left + for left = (max (or (apply #'position-if predicate seq + :end right + :from-end t + other-keys) + -1) + (1- start)) + unless (and (= right (1+ left)) + remove-empty-subseqs) ; empty subseq we don't want + if (and count (>= nr-elts count)) + ;; We can't take any more. Return now. + return (values (nreverse subseqs) right) + else + collect (subseq seq (1+ left) right) into subseqs + and sum 1 into nr-elts + until (< left start) + finally (return (values (nreverse subseqs) (1+ left)))) + (loop for left = start then (+ right 1) + for right = (min (or (apply #'position-if predicate seq + :start left + other-keys) + len) + end) + unless (and (= right left) + remove-empty-subseqs) ; empty subseq we don't want + if (and count (>= nr-elts count)) + ;; We can't take any more. Return now. + return (values subseqs left) + else + collect (subseq seq left right) into subseqs + and sum 1 into nr-elts + until (>= right end) + finally (return (values subseqs right)))))) diff --git a/xml/string-dom.lisp b/xml/string-dom.lisp new file mode 100644 index 0000000..91e6458 --- /dev/null +++ b/xml/string-dom.lisp @@ -0,0 +1,35 @@ +(defpackage :string-dom + (:use)) + +(eval-when (:compile-toplevel :load-toplevel :execute) + (do-external-symbols (var :cdom) + (let* ((home-package + (if (member var '(cdom:data cdom:name cdom:value cdom:tag-name + cdom:node-name cdom:node-value + cdom:substring-data cdom:get-attribute)) + :string-dom + :cdom)) + (symbol (intern (symbol-name var) home-package))) + (import symbol :string-dom) + (export (list symbol) :string-dom)))) + +(defpackage :string-dom-impl (:use :cl)) +(in-package :string-dom-impl) + +(defun rod-to-string (frob) + (if (null frob) + nil + (map 'string #'code-char frob))) + +(defun string-dom:data (node) (rod-to-string (cdom:data node))) +(defun string-dom:name (node) (rod-to-string (cdom:name node))) +(defun string-dom:value (node) (rod-to-string (cdom:value node))) +(defun string-dom:tag-name (node) (rod-to-string (cdom:tag-name node))) +(defun string-dom:node-name (node) (rod-to-string (cdom:node-name node))) +(defun string-dom:node-value (node) (rod-to-string (cdom:node-value node))) + +(defun string-dom:substring-data (node offset count) + (rod-to-string (cdom:substring-data node offset count))) + +(defun string-dom:get-attribute (elt name) + (rod-to-string (cdom:get-attribute elt name))) diff --git a/xml/unparse.lisp b/xml/unparse.lisp new file mode 100644 index 0000000..4eee275 --- /dev/null +++ b/xml/unparse.lisp @@ -0,0 +1,438 @@ +;;; -*- Mode: Lisp; Syntax: Common-Lisp; Package: CXML; readtable: runes; Encoding: utf-8; -*- +;;; --------------------------------------------------------------------------- +;;; Title: Unparse XML +;;; Title: (including support for canonic XML according to J.Clark) +;;; Created: 1999-09-09 +;;; Author: Gilbert Baumann +;;; Author: David Lichteblau +;;; License: LGPL (See file COPYING for details). +;;; --------------------------------------------------------------------------- +;;; © copyright 1999 by Gilbert Baumann + +;;; This library is free software; you can redistribute it and/or +;;; modify it under the terms of the GNU Library General Public +;;; License as published by the Free Software Foundation; either +;;; version 2 of the License, or (at your option) any later version. +;;; +;;; This library is distributed in the hope that it will be useful, +;;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;;; Library General Public License for more details. +;;; +;;; You should have received a copy of the GNU Library General Public +;;; License along with this library; if not, write to the +;;; Free Software Foundation, Inc., 59 Temple Place - Suite 330, +;;; Boston, MA 02111-1307 USA. + +(in-package :cxml) + +;; +;; | Canonical XML +;; | ============= +;; | +;; | This document defines a subset of XML called canonical XML. The +;; | intended use of canonical XML is in testing XML processors, as a +;; | representation of the result of parsing an XML document. +;; | +;; | Every well-formed XML document has a unique structurally equivalent +;; | canonical XML document. Two structurally equivalent XML documents have +;; | a byte-for-byte identical canonical XML document. Canonicalizing an +;; | XML document requires only information that an XML processor is +;; | required to make available to an application. +;; | +;; | A canonical XML document conforms to the following grammar: +;; | +;; | CanonXML ::= Pi* element Pi* +;; | element ::= Stag (Datachar | Pi | element)* Etag +;; | Stag ::= '<' Name Atts '>' +;; | Etag ::= '' +;; | Pi ::= '' Char*)) '?>' +;; | Atts ::= (' ' Name '=' '"' Datachar* '"')* +;; | Datachar ::= '&' | '<' | '>' | '"' +;; | | ' '| ' '| ' ' +;; | | (Char - ('&' | '<' | '>' | '"' | #x9 | #xA | #xD)) +;; | Name ::= (see XML spec) +;; | Char ::= (see XML spec) +;; | S ::= (see XML spec) +;; | +;; | Attributes are in lexicographical order (in Unicode bit order). +;; | +;; | A canonical XML document is encoded in UTF-8. +;; | +;; | Ignorable white space is considered significant and is treated +;; | equivalently to data. +;; +;; -- James Clark (jjc@jclark.com) + + +;;;; SINK: a rune output "stream" + +(defclass sink () + ((high-surrogate :initform nil) + (column :initform 0 :accessor column) + (width :initform 79 :initarg :width :accessor width) + (canonical :initform t :initarg :canonical :accessor canonical) + (indentation :initform nil :initarg :indentation :accessor indentation) + (current-indentation :initform 0 :accessor current-indentation) + (notations :initform (make-buffer :element-type t) :accessor notations) + (name-for-dtd :accessor name-for-dtd) + (previous-notation :initform nil :accessor previous-notation) + (stack :initform nil :accessor stack))) + +(defmethod initialize-instance :after ((instance sink) &key) + (when (eq (canonical instance) t) + (setf (canonical instance) 1)) + (unless (member (canonical instance) '(nil 1 2)) + (error "Invalid canonical form: ~A" (canonical instance))) + (when (and (canonical instance) (indentation instance)) + (error "Cannot indent XML in canonical mode"))) + +;; WRITE-OCTET als generisch zu machen ist vielleicht nicht die schnellste +;; Loesung, aber die einfachste. +(defgeneric write-octet (octet sink)) + +(defun make-buffer (&key (element-type '(unsigned-byte 8))) + (make-array 1 + :element-type element-type + :adjustable t + :fill-pointer 0)) + +(defmethod write-octet :after (octet sink) + (with-slots (column) sink + (setf column (if (eql octet 10) 0 (1+ column))))) + + +;; vector (octet) sinks + +(defclass vector-sink (sink) + ((target-vector :initform (make-buffer)))) + +(defun make-octet-vector-sink (&rest initargs) + (apply #'make-instance 'vector-sink initargs)) + +(defmethod write-octet (octet (sink vector-sink)) + (let ((target-vector (slot-value sink 'target-vector))) + (vector-push-extend octet target-vector (length target-vector)))) + +(defmethod sax:end-document ((sink vector-sink)) + (slot-value sink 'target-vector)) + + +;; character stream sinks + +(defclass character-stream-sink (sink) + ((target-stream :initarg :target-stream))) + +(defun make-character-stream-sink (character-stream &rest initargs) + (apply #'make-instance 'character-stream-sink + :target-stream character-stream + initargs)) + +(defmethod write-octet (octet (sink character-stream-sink)) + (write-char (code-char octet) (slot-value sink 'target-stream))) + +(defmethod sax:end-document ((sink character-stream-sink)) + (slot-value sink 'target-stream)) + + +;; octet stream sinks + +(defclass octet-stream-sink (sink) + ((target-stream :initarg :target-stream))) + +(defun make-octet-stream-sink (octet-stream &rest initargs) + (apply #'make-instance 'octet-stream-sink + :target-stream octet-stream + initargs)) + +(defmethod write-octet (octet (sink octet-stream-sink)) + (write-byte octet (slot-value sink 'target-stream))) + +(defmethod sax:end-document ((sink octet-stream-sink)) + (slot-value sink 'target-stream)) + + +;;;; doctype and notations + +(defmethod sax:start-dtd ((sink sink) name public-id system-id) + (declare (ignore public-id system-id)) + (setf (name-for-dtd sink) name)) + +(defmethod sax:notation-declaration ((sink sink) name public-id system-id) + (when (and (canonical sink) (>= (canonical sink) 2)) + (let ((prev (previous-notation sink))) + (cond + (prev + (unless (rod< prev name) + (error "misordered notations; cannot unparse canonically"))) + (t + ;; need a doctype declaration + (write-rod #" sink) + (write-rune #/U+000A sink))) + +(defmethod sax:end-dtd ((sink sink)) + (when (previous-notation sink) + (write-rod #"]>" sink) + (write-rune #/U+000A sink))) + + +;;;; elements + +(defun sink-fresh-line (sink) + (unless (zerop (column sink)) + (write-rune-0 10 sink) + (indent sink))) + +(defmethod sax:start-element + ((sink sink) namespace-uri local-name qname attributes) + (declare (ignore namespace-uri local-name)) + (when (stack sink) + (incf (cdr (first (stack sink))))) + (push (cons qname 0) (stack sink)) + (when (indentation sink) + (sink-fresh-line sink) + (start-indentation-block sink)) + (write-rune #/< sink) + (write-rod qname sink) + (let ((atts (sort (copy-list attributes) #'rod< :key #'sax:attribute-qname))) + (dolist (a atts) + (write-rune #/space sink) + (write-rod (sax:attribute-qname a) sink) + (write-rune #/= sink) + (write-rune #/\" sink) + (map nil (lambda (c) (unparse-datachar c sink)) (sax:attribute-value a)) + (write-rune #/\" sink))) + (write-rod '#.(string-rod ">") sink)) + +(defmethod sax:end-element + ((sink sink) namespace-uri local-name qname) + (declare (ignore namespace-uri local-name)) + (let ((cons (pop (stack sink)))) + (unless (consp cons) + (error "output does not nest: not in an element")) + (unless (rod= (car cons) qname) + (error "output does not nest: expected ~A but got ~A" + (rod qname) (rod (car cons)))) + (when (indentation sink) + (end-indentation-block sink) + (unless (zerop (cdr cons)) + (sink-fresh-line sink)))) + (write-rod '#.(string-rod "") sink)) + +(defmethod sax:processing-instruction ((sink sink) target data) + (unless (rod-equal target '#.(string-rod "xml")) + (write-rod '#.(string-rod "") sink))) + +(defmethod sax:start-cdata ((sink sink)) + (push :cdata (stack sink))) + +(defmethod sax:characters ((sink sink) data) + (cond + ((and (eq (car (stack sink)) :cdata) + (not (canonical sink)) + (not (search #"]]" data))) + (when (indentation sink) + (sink-fresh-line sink)) + (write-rod #"" sink)) + (t + (if (indentation sink) + (unparse-indented-text data sink) + (map nil (if (canonical sink) + (lambda (c) (unparse-datachar c sink)) + (lambda (c) (unparse-datachar-readable c sink))) + data))))) + +(defmethod sax:end-cdata ((sink sink)) + (unless (eq (pop (stack sink)) :cdata) + (error "output does not nest: not in a cdata section"))) + +(defun indent (sink) + (dotimes (x (current-indentation sink)) + (write-rune-0 32 sink))) + +(defun start-indentation-block (sink) + (incf (current-indentation sink) (indentation sink))) + +(defun end-indentation-block (sink) + (decf (current-indentation sink) (indentation sink))) + +(defun unparse-indented-text (data sink) + (flet ((whitespacep (x) + (or (rune= x #/U+000A) (rune= x #/U+0020)))) + (let* ((n (length data)) + (pos (position-if-not #'whitespacep data)) + (need-whitespace-p nil)) + (cond + ((zerop n)) + (pos + (sink-fresh-line sink) + (while (< pos n) + (let* ((w (or (position-if #'whitespacep data :start (1+ pos)) n)) + (next (or (position-if-not #'whitespacep data :start w) n))) + (when need-whitespace-p + (if (< (+ (column sink) w (- pos)) (width sink)) + (write-rune-0 32 sink) + (sink-fresh-line sink))) + (loop + for i from pos below w do + (unparse-datachar-readable (elt data i) sink)) + (setf need-whitespace-p (< w n)) + (setf pos next)))) + (t + (write-rune-0 32 sink)))))) + +(defun unparse-datachar (c sink) + (cond ((rune= c #/&) (write-rod '#.(string-rod "&") sink)) + ((rune= c #/<) (write-rod '#.(string-rod "<") sink)) + ((rune= c #/>) (write-rod '#.(string-rod ">") sink)) + ((rune= c #/\") (write-rod '#.(string-rod """) sink)) + ((rune= c #/U+0009) (write-rod '#.(string-rod " ") sink)) + ((rune= c #/U+000A) (write-rod '#.(string-rod " ") sink)) + ((rune= c #/U+000D) (write-rod '#.(string-rod " ") sink)) + (t + (write-rune c sink)))) + +(defun unparse-datachar-readable (c sink) + (cond ((rune= c #/&) (write-rod '#.(string-rod "&") sink)) + ((rune= c #/<) (write-rod '#.(string-rod "<") sink)) + ((rune= c #/>) (write-rod '#.(string-rod ">") sink)) + ((rune= c #/\") (write-rod '#.(string-rod """) sink)) + (t + (write-rune c sink)))) + + +;;;; UTF-8 output for SINKs + +(defun write-rod (rod sink) + (map nil (lambda (c) (write-rune c sink)) rod)) + +(defun write-rune (rune sink) + (let ((code (rune-code rune))) + (with-slots (high-surrogate) sink + (cond + ((<= #xD800 code #xDBFF) + (setf high-surrogate code)) + ((<= #xDC00 code #xDFFF) + (let ((q (logior (ash (- high-surrogate #xD7C0) 10) + (- code #xDC00)))) + (write-rune-0 q sink)) + (setf high-surrogate nil)) + (t + (write-rune-0 code sink)))))) + +(defun write-rune-0 (code sink) + (labels ((wr (x) + (write-octet x sink))) + (cond ((<= #x00000000 code #x0000007F) + (wr code)) + ((<= #x00000080 code #x000007FF) + (wr (logior #b11000000 (ldb (byte 5 6) code))) + (wr (logior #b10000000 (ldb (byte 6 0) code)))) + ((<= #x00000800 code #x0000FFFF) + (wr (logior #b11100000 (ldb (byte 4 12) code))) + (wr (logior #b10000000 (ldb (byte 6 6) code))) + (wr (logior #b10000000 (ldb (byte 6 0) code)))) + ((<= #x00010000 code #x001FFFFF) + (wr (logior #b11110000 (ldb (byte 3 18) code))) + (wr (logior #b10000000 (ldb (byte 6 12) code))) + (wr (logior #b10000000 (ldb (byte 6 6) code))) + (wr (logior #b10000000 (ldb (byte 6 0) code)))) + ((<= #x00200000 code #x03FFFFFF) + (wr (logior #b11111000 (ldb (byte 2 24) code))) + (wr (logior #b10000000 (ldb (byte 6 18) code))) + (wr (logior #b10000000 (ldb (byte 6 12) code))) + (wr (logior #b10000000 (ldb (byte 6 6) code))) + (wr (logior #b10000000 (ldb (byte 6 0) code)))) + ((<= #x04000000 code #x7FFFFFFF) + (wr (logior #b11111100 (ldb (byte 1 30) code))) + (wr (logior #b10000000 (ldb (byte 6 24) code))) + (wr (logior #b10000000 (ldb (byte 6 18) code))) + (wr (logior #b10000000 (ldb (byte 6 12) code))) + (wr (logior #b10000000 (ldb (byte 6 6) code))) + (wr (logior #b10000000 (ldb (byte 6 0) code))))))) + + +;;;; convenience functions for DOMless XML serialization + +(defvar *current-element*) +(defvar *sink*) + +(defmacro with-xml-output (sink &body body) + `(invoke-with-xml-output (lambda () ,@body) ,sink)) + +(defun invoke-with-xml-output (fn sink) + (let ((*sink* sink) + (*current-element* nil)) + (sax:start-document *sink*) + (funcall fn) + (sax:end-document *sink*))) + +(defmacro with-element (qname &body body) + ;; XXX Statt qname soll man in zukunft auch mal (lname uri) angeben koennen. + ;; Hat aber Zeit bis DOM 2. + (when (listp qname) + (destructuring-bind (n) qname + (setf qname n))) + `(invoke-with-element (lambda () ,@body) ,qname)) + +(defun maybe-emit-start-tag () + (when *current-element* + ;; starting child node, need to emit opening tag of parent first: + (destructuring-bind (qname &rest attributes) *current-element* + (sax:start-element *sink* nil nil qname (reverse attributes))) + (setf *current-element* nil))) + +(defun invoke-with-element (fn qname) + (maybe-emit-start-tag) + (let ((*current-element* (list qname))) + (multiple-value-prog1 + (funcall fn) + (maybe-emit-start-tag) + (sax:end-element *sink* nil nil qname)))) + +(defun attribute (name value) + (push (sax:make-attribute :qname name :value value) + (cdr *current-element*)) + value) + +(defun cdata (data) + (sax:start-cdata *sink*) + (sax:characters *sink* data) + (sax:end-cdata *sink*) + data) + +(defun text (data) + (maybe-emit-start-tag) + (sax:characters *sink* data) + data) diff --git a/xml/xml-canonic.lisp b/xml/xml-canonic.lisp new file mode 100644 index 0000000..f9e0d48 --- /dev/null +++ b/xml/xml-canonic.lisp @@ -0,0 +1,172 @@ +;;; -*- Mode: Lisp; Syntax: Common-Lisp; Package: XML; readtable: glisp; Encoding: utf-8; -*- +;;; --------------------------------------------------------------------------- +;;; Title: Dump canonic XML according to J.Clark +;;; Created: 1999-09-09 +;;; Author: Gilbert Baumann +;;; License: LGPL (See file COPYING for details). +;;; --------------------------------------------------------------------------- +;;; © copyright 1999 by Gilbert Baumann + +;;; This library is free software; you can redistribute it and/or +;;; modify it under the terms of the GNU Library General Public +;;; License as published by the Free Software Foundation; either +;;; version 2 of the License, or (at your option) any later version. +;;; +;;; This library is distributed in the hope that it will be useful, +;;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;;; Library General Public License for more details. +;;; +;;; You should have received a copy of the GNU Library General Public +;;; License along with this library; if not, write to the +;;; Free Software Foundation, Inc., 59 Temple Place - Suite 330, +;;; Boston, MA 02111-1307 USA. + +(in-package :xml) + +;; +;; | Canonical XML +;; | ============= +;; | +;; | This document defines a subset of XML called canonical XML. The +;; | intended use of canonical XML is in testing XML processors, as a +;; | representation of the result of parsing an XML document. +;; | +;; | Every well-formed XML document has a unique structurally equivalent +;; | canonical XML document. Two structurally equivalent XML documents have +;; | a byte-for-byte identical canonical XML document. Canonicalizing an +;; | XML document requires only information that an XML processor is +;; | required to make available to an application. +;; | +;; | A canonical XML document conforms to the following grammar: +;; | +;; | CanonXML ::= Pi* element Pi* +;; | element ::= Stag (Datachar | Pi | element)* Etag +;; | Stag ::= '<' Name Atts '>' +;; | Etag ::= '' +;; | Pi ::= '' Char*)) '?>' +;; | Atts ::= (' ' Name '=' '"' Datachar* '"')* +;; | Datachar ::= '&' | '<' | '>' | '"' +;; | | ' '| ' '| ' ' +;; | | (Char - ('&' | '<' | '>' | '"' | #x9 | #xA | #xD)) +;; | Name ::= (see XML spec) +;; | Char ::= (see XML spec) +;; | S ::= (see XML spec) +;; | +;; | Attributes are in lexicographical order (in Unicode bit order). +;; | +;; | A canonical XML document is encoded in UTF-8. +;; | +;; | Ignorable white space is considered significant and is treated +;; | equivalently to data. +;; +;; -- James Clark (jjc@jclark.com) + +(defvar *quux*) ;!!!BIG HACK!!! + +(defun unparse-document (doc sink) + (mapc (rcurry #'unparse-node sink) (dom:child-nodes doc))) + +(defun unparse-node (node sink) + (cond ((dom:element-p node) + (write-rune #/< sink) + (write-rod (dom:tag-name node) sink) + ;; atts + (let ((atts (sort (copy-list (dom:items (dom:attributes node))) + #'rod< :key #'dom:name))) + (dolist (a atts) + (write-rune #/space sink) + (write-rod (dom:name a) sink) + (write-rune #/= sink) + (write-rune #/\" sink) + (let ((*quux* nil)) + (map nil (lambda (c) (unparse-datachar c sink)) (dom:value a))) + (write-rune #/\" sink))) + (write-rod '#.(string-rod ">") sink) + (dolist (k (dom:child-nodes node)) + (unparse-node k sink)) + (write-rod '#.(string-rod "") sink)) + ((dom:processing-instruction-p node) + (unless (rod-equal (dom:target node) '#.(string-rod "xml")) + (write-rod '#.(string-rod "") sink) )) + ((dom:text-node-p node) + (let ((*quux* nil)) + (map nil (lambda (c) (unparse-datachar c sink)) + (dom:data node)))) + (t + (error "Oops in unparse: ~S." node)))) + +(defun unparse-datachar (c sink) + (cond ((rune= c #/&) (write-rod '#.(string-rod "&") sink)) + ((rune= c #/<) (write-rod '#.(string-rod "<") sink)) + ((rune= c #/>) (write-rod '#.(string-rod ">") sink)) + ((rune= c #/\") (write-rod '#.(string-rod """) sink)) + ((rune= c #/U+0009) (write-rod '#.(string-rod " ") sink)) + ((rune= c #/U+000A) (write-rod '#.(string-rod " ") sink)) + ((rune= c #/U+000D) (write-rod '#.(string-rod " ") sink)) + (t + (write-rune c sink)))) + +(defun write-rod (rod sink) + (let ((*quux* nil)) + (map nil (lambda (c) (write-rune c sink)) rod))) + +(defun write-rune (rune sink) + (cond ((<= #xD800 rune #xDBFF) + (setf *quux* rune)) + ((<= #xDC00 rune #xDFFF) + (let ((q (logior (ash (- *quux* #xD7C0) 10) (- rune #xDC00)))) + (write-rune-0 q sink)) + (setf *quux* nil)) + (t + (write-rune-0 rune sink)))) + +(defun write-rune-0 (rune sink) + (labels ((wr (x) + (write-char (code-char x) sink))) + (cond ((<= #x00000000 rune #x0000007F) + (wr rune)) + ((<= #x00000080 rune #x000007FF) + (wr (logior #b11000000 (ldb (byte 5 6) rune))) + (wr (logior #b10000000 (ldb (byte 6 0) rune)))) + ((<= #x00000800 rune #x0000FFFF) + (wr (logior #b11100000 (ldb (byte 4 12) rune))) + (wr (logior #b10000000 (ldb (byte 6 6) rune))) + (wr (logior #b10000000 (ldb (byte 6 0) rune)))) + ((<= #x00010000 rune #x001FFFFF) + (wr (logior #b11110000 (ldb (byte 3 18) rune))) + (wr (logior #b10000000 (ldb (byte 6 12) rune))) + (wr (logior #b10000000 (ldb (byte 6 6) rune))) + (wr (logior #b10000000 (ldb (byte 6 0) rune)))) + ((<= #x00200000 rune #x03FFFFFF) + (wr (logior #b11111000 (ldb (byte 2 24) rune))) + (wr (logior #b10000000 (ldb (byte 6 18) rune))) + (wr (logior #b10000000 (ldb (byte 6 12) rune))) + (wr (logior #b10000000 (ldb (byte 6 6) rune))) + (wr (logior #b10000000 (ldb (byte 6 0) rune)))) + ((<= #x04000000 rune #x7FFFFFFF) + (wr (logior #b11111100 (ldb (byte 1 30) rune))) + (wr (logior #b10000000 (ldb (byte 6 24) rune))) + (wr (logior #b10000000 (ldb (byte 6 18) rune))) + (wr (logior #b10000000 (ldb (byte 6 12) rune))) + (wr (logior #b10000000 (ldb (byte 6 6) rune))) + (wr (logior #b10000000 (ldb (byte 6 0) rune))))))) + +(defun rod< (rod1 rod2) + (do ((i 0 (+ i 1))) + (nil) + (cond ((= i (length rod1)) + (return t)) + ((= i (length rod2)) + (return nil)) + ((< (aref rod1 i) (aref rod2 i)) + (return t)) + ((> (aref rod1 i) (aref rod2 i)) + (return nil))))) + diff --git a/xml/xml-name-rune-p.lisp b/xml/xml-name-rune-p.lisp new file mode 100644 index 0000000..9d2efd2 --- /dev/null +++ b/xml/xml-name-rune-p.lisp @@ -0,0 +1,218 @@ +;;; -*- Mode: Lisp; Syntax: Common-Lisp; Package: XML; readtable: glisp; Encoding: utf-8; -*- + +(in-package :xml) + +#.(funcall + (compile + nil + '(lambda () + (let ((*max* #xD800)) + (labels + ((name-start-rune-p (rune) + (or (letter-rune-p rune) + (= #.(char-code #\_) rune) + (= #.(char-code #\:) rune))) + + (name-rune-p (rune) + (or (letter-rune-p rune) + (digit-rune-p* rune) + (= rune #.(char-code #\.)) + (= rune #.(char-code #\-)) + (= rune #.(char-code #\_)) + (= rune #.(char-code #\:)) + (combining-rune-p rune) + (extender-rune-p rune))) + + (letter-rune-p (rune) + (or (base-rune-p rune) + (ideographic-rune-p rune))) + + (digit-rune-p* (rune) + (or (<= 48 rune 57) + (<= 1632 rune 1641) + (<= 1776 rune 1785) + (<= 2406 rune 2415) + (<= 2534 rune 2543) + (<= 2662 rune 2671) + (<= 2790 rune 2799) + (<= 2918 rune 2927) + (<= 3047 rune 3055) + (<= 3174 rune 3183) + (<= 3302 rune 3311) + (<= 3430 rune 3439) + (<= 3664 rune 3673) + (<= 3792 rune 3801) + (<= 3872 rune 3881))) + + + (combining-rune-p (rune) + (or (<= 768 rune 837) + (<= 864 rune 865) + (<= 1155 rune 1158) + (<= 1425 rune 1441) + (<= 1443 rune 1465) + (<= 1467 rune 1469) + (= 1471 rune) + (<= 1473 rune 1474) + (= 1476 rune) + (<= 1611 rune 1618) + (= 1648 rune) + (<= 1750 rune 1756) + (<= 1757 rune 1759) + (<= 1760 rune 1764) + (<= 1767 rune 1768) + (<= 1770 rune 1773) + (<= 2305 rune 2307) + (= 2364 rune) + (<= 2366 rune 2380) + (= 2381 rune) + (<= 2385 rune 2388) + (<= 2402 rune 2403) + (<= 2433 rune 2435) + (= 2492 rune) + (= 2494 rune) + (= 2495 rune) + (<= 2496 rune 2500) + (<= 2503 rune 2504) + (<= 2507 rune 2509) + (= 2519 rune) + (<= 2530 rune 2531) + (= 2562 rune) + (= 2620 rune) + (= 2622 rune) + (= 2623 rune) + (<= 2624 rune 2626) + (<= 2631 rune 2632) + (<= 2635 rune 2637) + (<= 2672 rune 2673) + (<= 2689 rune 2691) + (= 2748 rune) + (<= 2750 rune 2757) + (<= 2759 rune 2761) + (<= 2763 rune 2765) + (<= 2817 rune 2819) + (= 2876 rune) + (<= 2878 rune 2883) + (<= 2887 rune 2888) + (<= 2891 rune 2893) + (<= 2902 rune 2903) + (<= 2946 rune 2947) + (<= 3006 rune 3010) + (<= 3014 rune 3016) + (<= 3018 rune 3021) + (= 3031 rune) + (<= 3073 rune 3075) + (<= 3134 rune 3140) + (<= 3142 rune 3144) + (<= 3146 rune 3149) + (<= 3157 rune 3158) + (<= 3202 rune 3203) + (<= 3262 rune 3268) + (<= 3270 rune 3272) + (<= 3274 rune 3277) + (<= 3285 rune 3286) + (<= 3330 rune 3331) + (<= 3390 rune 3395) + (<= 3398 rune 3400) + (<= 3402 rune 3405) + (= 3415 rune) + (= 3633 rune) + (<= 3636 rune 3642) + (<= 3655 rune 3662) + (= 3761 rune) + (<= 3764 rune 3769) + (<= 3771 rune 3772) + (<= 3784 rune 3789) + (<= 3864 rune 3865) + (= 3893 rune) + (= 3895 rune) + (= 3897 rune) + (= 3902 rune) + (= 3903 rune) + (<= 3953 rune 3972) + (<= 3974 rune 3979) + (<= 3984 rune 3989) + (= 3991 rune) + (<= 3993 rune 4013) + (<= 4017 rune 4023) + (= 4025 rune) + (<= 8400 rune 8412) + (= 8417 rune) + (<= 12330 rune 12335) + (= 12441 rune) + (= 12442 rune))) + + (extender-rune-p (rune) + (or + (= 183 rune) + (= 720 rune) + (= 721 rune) + (= 903 rune) + (= 1600 rune) + (= 3654 rune) + (= 3782 rune) + (= 12293 rune) + (<= 12337 rune 12341) + (<= 12445 rune 12446) + (<= 12540 rune 12542))) + + (base-rune-p (rune) + (or + (<= 65 rune 90) (<= 97 rune 122) (<= 192 rune 214) (<= 216 rune 246) (<= 248 rune 255) (<= 256 rune 305) + (<= 308 rune 318) (<= 321 rune 328) (<= 330 rune 382) (<= 384 rune 451) (<= 461 rune 496) (<= 500 rune 501) + (<= 506 rune 535) (<= 592 rune 680) (<= 699 rune 705) (= 902 rune) (<= 904 rune 906) (= 908 rune) + (<= 910 rune 929) (<= 931 rune 974) (<= 976 rune 982) (= 986 rune) (= 988 rune) (= 990 rune) (= 992 rune) + (<= 994 rune 1011) (<= 1025 rune 1036) (<= 1038 rune 1103) (<= 1105 rune 1116) (<= 1118 rune 1153) + (<= 1168 rune 1220) (<= 1223 rune 1224) (<= 1227 rune 1228) (<= 1232 rune 1259) (<= 1262 rune 1269) + (<= 1272 rune 1273) (<= 1329 rune 1366) (= 1369 rune) (<= 1377 rune 1414) (<= 1488 rune 1514) + (<= 1520 rune 1522) (<= 1569 rune 1594) (<= 1601 rune 1610) (<= 1649 rune 1719) (<= 1722 rune 1726) + (<= 1728 rune 1742) (<= 1744 rune 1747) (= 1749 rune) (<= 1765 rune 1766) (<= 2309 rune 2361) (= 2365 rune) + (<= 2392 rune 2401) (<= 2437 rune 2444) (<= 2447 rune 2448) (<= 2451 rune 2472) (<= 2474 rune 2480) + (= 2482 rune) (<= 2486 rune 2489) (<= 2524 rune 2525) (<= 2527 rune 2529) (<= 2544 rune 2545) + (<= 2565 rune 2570) (<= 2575 rune 2576) (<= 2579 rune 2600) (<= 2602 rune 2608) (<= 2610 rune 2611) + (<= 2613 rune 2614) (<= 2616 rune 2617) (<= 2649 rune 2652) (= 2654 rune) (<= 2674 rune 2676) + (<= 2693 rune 2699) (= 2701 rune) (<= 2703 rune 2705) (<= 2707 rune 2728) (<= 2730 rune 2736) + (<= 2738 rune 2739) (<= 2741 rune 2745) (= 2749 rune) (= 2784 rune) (<= 2821 rune 2828) (<= 2831 rune 2832) + (<= 2835 rune 2856) (<= 2858 rune 2864) (<= 2866 rune 2867) (<= 2870 rune 2873) (= 2877 rune) + (<= 2908 rune 2909) (<= 2911 rune 2913) (<= 2949 rune 2954) (<= 2958 rune 2960) (<= 2962 rune 2965) + (<= 2969 rune 2970) (= 2972 rune) (<= 2974 rune 2975) (<= 2979 rune 2980) (<= 2984 rune 2986) + (<= 2990 rune 2997) (<= 2999 rune 3001) (<= 3077 rune 3084) (<= 3086 rune 3088) (<= 3090 rune 3112) + (<= 3114 rune 3123) (<= 3125 rune 3129) (<= 3168 rune 3169) (<= 3205 rune 3212) (<= 3214 rune 3216) + (<= 3218 rune 3240) (<= 3242 rune 3251) (<= 3253 rune 3257) (= 3294 rune) (<= 3296 rune 3297) + (<= 3333 rune 3340) (<= 3342 rune 3344) (<= 3346 rune 3368) (<= 3370 rune 3385) (<= 3424 rune 3425) + (<= 3585 rune 3630) (= 3632 rune) (<= 3634 rune 3635) (<= 3648 rune 3653) (<= 3713 rune 3714) (= 3716 rune) + (<= 3719 rune 3720) (= 3722 rune) (= 3725 rune) (<= 3732 rune 3735) (<= 3737 rune 3743) (<= 3745 rune 3747) + (= 3749 rune) (= 3751 rune) (<= 3754 rune 3755) (<= 3757 rune 3758) (= 3760 rune) (<= 3762 rune 3763) (= 3773 rune) + (<= 3776 rune 3780) (<= 3904 rune 3911) (<= 3913 rune 3945) (<= 4256 rune 4293) (<= 4304 rune 4342) + (= 4352 rune) (<= 4354 rune 4355) (<= 4357 rune 4359) (= 4361 rune) (<= 4363 rune 4364) (<= 4366 rune 4370) + (= 4412 rune) (= 4414 rune) (= 4416 rune) (= 4428 rune) (= 4430 rune) (= 4432 rune) (<= 4436 rune 4437) (= 4441 rune) + (<= 4447 rune 4449) (= 4451 rune) (= 4453 rune) (= 4455 rune) (= 4457 rune) (<= 4461 rune 4462) (<= 4466 rune 4467) + (= 4469 rune) (= 4510 rune) (= 4520 rune) (= 4523 rune) (<= 4526 rune 4527) (<= 4535 rune 4536) (= 4538 rune) + (<= 4540 rune 4546) (= 4587 rune) (= 4592 rune) (= 4601 rune) (<= 7680 rune 7835) (<= 7840 rune 7929) + (<= 7936 rune 7957) (<= 7960 rune 7965) (<= 7968 rune 8005) (<= 8008 rune 8013) (<= 8016 rune 8023) + (= 8025 rune) (= 8027 rune) (= 8029 rune) (<= 8031 rune 8061) (<= 8064 rune 8116) (<= 8118 rune 8124) (= 8126 rune) + (<= 8130 rune 8132) (<= 8134 rune 8140) (<= 8144 rune 8147) (<= 8150 rune 8155) (<= 8160 rune 8172) + (<= 8178 rune 8180) (<= 8182 rune 8188) (= 8486 rune) (<= 8490 rune 8491) (= 8494 rune) (<= 8576 rune 8578) + (<= 12353 rune 12436) (<= 12449 rune 12538) (<= 12549 rune 12588) (<= 44032 rune 55203))) + + (ideographic-rune-p (rune) + (or (<= 19968 rune 40869) (= 12295 rune) (<= 12321 rune 12329))) + + + (predicate-to-bv (p) + (let ((r (make-array *max* :element-type 'bit :initial-element 0))) + (dotimes (i #x10000 r) + (when (funcall p i) + (setf (aref r i) 1))))) ) + + `(progn + (DEFSUBST NAME-RUNE-P (RUNE) + (AND (<= 0 RUNE ,*max*) + (LOCALLY (DECLARE (OPTIMIZE (SAFETY 0) (SPEED 3))) + (= 1 (SBIT ',(predicate-to-bv #'name-rune-p) + (THE FIXNUM RUNE)))))) + (DEFSUBST NAME-START-RUNE-P (RUNE) + (AND (<= 0 RUNE ,*MAX*) + (LOCALLY (DECLARE (OPTIMIZE (SAFETY 0) (SPEED 3))) + (= 1 (SBIT ',(predicate-to-bv #'name-start-rune-p) + (THE FIXNUM RUNE)))))))) )))) \ No newline at end of file diff --git a/xml/xml-parse.lisp b/xml/xml-parse.lisp new file mode 100644 index 0000000..3d4c4c2 --- /dev/null +++ b/xml/xml-parse.lisp @@ -0,0 +1,2824 @@ +;;; -*- Mode: Lisp; Syntax: Common-Lisp; Package: XML; readtable: glisp; Encoding: utf-8; -*- +;;; --------------------------------------------------------------------------- +;;; Title: A prototype XML parser +;;; Created: 1999-07-17 +;;; Author: Gilbert Baumann +;;; License: LGPL (See file COPYING for details). +;;; --------------------------------------------------------------------------- +;;; © copyright 1999 by Gilbert Baumann + +;;; This library is free software; you can redistribute it and/or +;;; modify it under the terms of the GNU Library General Public +;;; License as published by the Free Software Foundation; either +;;; version 2 of the License, or (at your option) any later version. +;;; +;;; This library is distributed in the hope that it will be useful, +;;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;;; Library General Public License for more details. +;;; +;;; You should have received a copy of the GNU Library General Public +;;; License along with this library; if not, write to the +;;; Free Software Foundation, Inc., 59 Temple Place - Suite 330, +;;; Boston, MA 02111-1307 USA. + +;;; Streams + +;;; xstreams + +;; For reading runes, I defined my own streams, called xstreams, +;; because we want to be fast. A function call or even a method call +;; per character is not acceptable, instead of that we define a +;; buffered stream with and advertised buffer layout, so that we +;; could use the trick stdio uses: READ-RUNE and PEEK-RUNE are macros, +;; directly accessing the buffer and only calling some underflow +;; handler in case of stream underflows. This will yield to quite a +;; performance boost vs calling READ-BYTE per character. + +;; Also we need to do encoding and character set conversion on input, +;; this better done at large chunks of data rather than on a character +;; by character basis. This way we need a dispatch on the active +;; encoding only once in a while, instead of for each character. This +;; allows us to use a CLOS interface to do the underflow handling. + +;;; zstreams + +;; Now, for reading tokens, we define another kind of streams, called +;; zstreams. These zstreams also maintain an input stack to implement +;; inclusion of external entities. This input stack contains xstreams +;; or the special marker :STOP. Such a :STOP marker indicates, that +;; input should not continue there, but well stop; that is simulate an +;; EOF. The user is then responsible to pop this marker off the input +;; stack. +;; +;; This input stack is also used to detect circular entity inclusion. + +;; The zstream tokenizer recognizes the following types of tokens and +;; is controlled by the *DATA-BEHAVIOUR* flag. (Which should become a +;; slot of zstreams instead). + +;; Common +;; :xml-pi ( . ) ;processing-instruction starting with " . ) ;processing-instruction +;; :stag ( . ) ;start tag +;; :etag ( . ) ;end tag +;; :ztag ( . ) ;empty tag +;; : + +;; *data-behaviour* = :DTD +;; +;; :name +;; :#required +;; :#implied +;; :#fixed +;; :#pcdata +;; :s +;; :\[ :\] :\( :\) :|\ :\> :\" :\' :\, :\? :\* :\+ + +;; *data-behaviour* = :DOC +;; +;; :entity-ref +;; :cdata + + + + +;;; NOTES +;; +;; Stream buffers as well as RODs are supposed to be encoded in +;; UTF-16. + +;; where does the time go? +;; DATA-RUNE-P +;; CANON-NOT-CDATA-ATTVAL +;; READ-ATTVAL (MUFFLE) +;; CLOSy DOM +;; UTF-8 decoder (13%) +;; READ-ATTVAL (10%) +;; + +;;; TODO +;; +;; o Improve error messages: +;; - line and column number counters +;; - better texts +;; - better handling of errors (no crash'n burn behaviour) +;; +;; o provide for a faster DOM +;; +;; o parse document should get passed a document instance, so that a user +;; could pass his own DOM implementation +;; +;; o morph zstream into a context object and thus also get rid of +;; special variables. Put the current DTD there too. + +;; o the *scratch-pad* hack should become something much more +;; reentrant, we could either define a system-wide resource +;; or allocate some scratch-pads per context. + +;; o only parse the DTD on an option + +;; o make the invalid tests pass. +;; +;; o CR handling in utf-16 deocders +;; +;; o UCS-4 reader +;; +;; o max depth together with circle detection +;; (or proof, that our circle detection is enough). +;; +;; o element definitions (with att definitions in the elements) +;; +;; o store entities in the DTD +;; +;; o better extensibility wrt character representation, one may want to +;; have +;; - UTF-8 in standard CL strings +;; - UCS-2 in RODs +;; - UTF-16 in RODs +;; - UCS-4 in vectoren +;; +;; o xstreams auslagern, documententieren und dann auch in SGML und +;; CSS parser verwenden. (halt alles was zeichen liest). +;; +;; o merge node representation with SGML module +;; +;; o namespaces (this will get ugly). +;; +;; o validation +;; +;; o line/column number recording +;; +;; o better error messages +;; +;; o recording of source locations for nodes. +;; +;; o make the *scratch-pad* hack safe +;; +;; o based on the DTD and xml:space attribute implement HTML white +;; space rules. +;; +;; o on a parser option, do not expand external entities. +;; +;; o on a parser option, do not parse the DTD. +;; +;; o caching DTDs? +;; +;; That is, if we parse a lot of documents all having the same DTD, +;; we do not need to re-read it every time. +;; But watch the file write date, since not doing so would be +;; a good way to confuse a hell lot of users. +;; But: What to do with declarations in the " (type-of self) (mu (xstream-name self)))) + +(deftype read-element () 'rune) + +;; (unsigned-byte 16)) ;;t) + +(defmethod figure-encoding ((stream null)) + (values :utf-8 nil)) + +(defmethod figure-encoding ((stream stream)) + (let ((c0 (read-byte stream nil :eof))) + (cond ((eq c0 :eof) + (values :utf-8 nil)) + (t + (let ((c1 (read-byte stream nil :eof))) + (cond ((eq c1 :eof) + (values :utf-8 (list c0))) + (t + (cond ((and (= c0 #xFE) (= c1 #xFF)) (values :utf-16-big-endian nil)) + ((and (= c0 #xFF) (= c1 #xFE)) (values :utf-16-little-endian nil)) + (t + (values :utf-8 (list c0 c1))))))))))) + +(defun call-with-open-xstream (continuation &rest open-args) + (let ((input (apply #'open (car open-args) :element-type '(unsigned-byte 8) (cdr open-args)))) + (unwind-protect + (progn + (funcall continuation (make-xstream input))) + (close input)))) + +(defmacro with-open-xstream ((stream &rest open-args) &body body) + `(call-with-open-xstream (lambda (,stream) .,body) .,open-args)) + +;;; Decoders + +;; The decoders share a common signature: +;; +;; DECODE input input-start input-end +;; output output-start output-end +;; eof-p +;; -> first-not-written ; first-not-read +;; +;; These decode functions should decode as much characters off `input' +;; into the `output' as possible and return the indexes to the first +;; not read and first not written element of `input' and `output' +;; respectively. If there are not enough bytes in `input' to decode a +;; full character, decoding shold be abandomed; the caller has to +;; ensure that the remaining bytes of `input' are passed to the +;; decoder again with more bytes appended. +;; +;; `eof-p' now in turn indicates, if the given input sequence, is all +;; the producer does have and might be used to produce error messages +;; in case of incomplete codes or decided what to do. +;; +;; Decoders are expected to handle the various CR/NL conventions and +;; canonicalize each end of line into a single NL rune (#xA) in good +;; old Lisp tradition. +;; + +;; TODO: change this to an encoding class, which then might carry +;; additional state. Stateless encodings could been represented by +;; keywords. e.g. +;; +;; defmethod DECODE-SEQUENCE ((encoding (eql :utf-8)) ...) +;; + +;;;; --------------------------------------------------------------------------- +;;;; rod hashtable +;;;; + +;;; make-rod-hashtable +;;; rod-hash-get hashtable rod &optional start end -> value ; successp +;;; (setf (rod-hash-get hashtable rod &optional start end) new-value +;;; + +(defstruct (rod-hashtable (:constructor make-rod-hashtable/low)) + size ;size of table + table ; + ) + +(defun make-rod-hashtable (&key (size 200)) + (setf size (glisp::nearest-greater-prime size)) + (make-rod-hashtable/low + :size size + :table (make-array size :initial-element nil))) + +(eval-when (compile eval load) + (defconstant +fixnum-bits+ + (1- (integer-length most-positive-fixnum)) + "Pessimistic approximation of the number of bits of fixnums.") + + (defconstant +fixnum-mask+ + (1- (expt 2 +fixnum-bits+)) + "Pessimistic approximation of the largest bit-mask, still being a fixnum.")) + +(defsubst stir (a b) + (%and +fixnum-mask+ + (%xor (%ior (%ash (%and a #.(ash +fixnum-mask+ -5)) 5) + (%ash a #.(- 5 +fixnum-bits+))) + b))) + +(defsubst rod-hash (rod start end) + "Compute a hash code out of a rod." + (let ((res (%- end start))) + (do ((i start (%+ i 1))) + ((%= i end)) + (declare (type fixnum i)) + (setf res (stir res (%rune rod i)))) + res)) + +(defsubst rod=* (x y &key (start1 0) (end1 (length x)) + (start2 0) (end2 (length y))) + (and (%= (%- end1 start1) (%- end2 start2)) + (do ((i start1 (%+ i 1)) + (j start2 (%+ j 1))) + ((%= i end1) + t) + (unless (= (%rune x i) (%rune y j)) + (return nil))))) + +(defsubst rod=** (x y start1 end1 start2 end2) + (and (%= (%- end1 start1) (%- end2 start2)) + (do ((i start1 (%+ i 1)) + (j start2 (%+ j 1))) + ((%= i end1) + t) + (unless (= (%rune x i) (%rune y j)) + (return nil))))) + +(defun rod-hash-get (hashtable rod &optional (start 0) (end (length rod))) + (declare (type (simple-array rune (*)) rod)) + (let ((j (%mod (rod-hash rod start end) + (rod-hashtable-size hashtable)))) + (dolist (q (svref (rod-hashtable-table hashtable) j) + (values nil nil nil)) + (declare (type cons q)) + (when (rod=** (car q) rod 0 (length (the (simple-array rune (*)) (car q))) start end) + (return (values (cdr q) t (car q))))))) + +(defun rod-hash-set (new-value hashtable rod &optional (start 0) (end (length rod))) + (let ((j (%mod (rod-hash rod start end) + (rod-hashtable-size hashtable))) + (key nil)) + (dolist (q (svref (rod-hashtable-table hashtable) j) + (progn + (setf key (rod-subseq* rod start end)) + (push (cons key new-value) + (aref (rod-hashtable-table hashtable) j)))) + (when (rod=* (car q) rod :start2 start :end2 end) + (setf key (car q)) + (setf (cdr q) new-value) + (return))) + (values new-value key))) + +(defun rod-subseq* (source start &optional (end (length source))) + (unless (and (typep start 'fixnum) (>= start 0)) + (error "~S is not a non-negative fixnum." start)) + (unless (and (typep end 'fixnum) (>= end start)) + (error "END argument, ~S, is not a fixnum no less than START, ~S." end start)) + (when (> start (length source)) + (error "START argument, ~S, should be no greater than length of rod." start)) + (when (> end (length source)) + (error "END argument, ~S, should be no greater than length of rod." end)) + (locally + (declare (type fixnum start end)) + (let ((res (make-rod (- end start)))) + (declare (type rod res)) + (do ((i (- (- end start) 1) (the fixnum (- i 1)))) + ((< i 0) res) + (declare (type fixnum i)) + (setf (%rune res i) (aref source (the fixnum (+ i start)))))))) + +(deftype ufixnum () `(unsigned-byte ,(integer-length most-positive-fixnum))) + +(defun rod-subseq** (source start &optional (end (length source))) + (declare (type (simple-array rune (*)) source) + (type ufixnum start) + (type ufixnum end) + (optimize (speed 3) (safety 0))) + (let ((res (make-array (%- end start) :element-type 'rune))) + (declare (type (simple-array rune (*)) res)) + (let ((i (%- end start))) + (declare (type ufixnum i)) + (loop + (setf i (- i 1)) + (when (= i 0) + (return)) + (setf (%rune res i) (%rune source (the ufixnum (+ i start)))))) + res)) + +(defun (setf rod-hash-get) (new-value hashtable rod &optional (start 0) (end (length rod))) + (rod-hash-set new-value hashtable rod start end)) + +(defparameter *name-hashtable* (make-rod-hashtable :size 2000)) + +(defun intern-name (rod &optional (start 0) (end (length rod))) + (multiple-value-bind (value successp key) (rod-hash-get *name-hashtable* rod start end) + (declare (ignore value)) + (if successp + key + (nth-value 1 (rod-hash-set t *name-hashtable* rod start end))))) + +;;;; --------------------------------------------------------------------------- +;;;; +;;;; rod collector +;;;; + +(defparameter *scratch-pad* + (make-array 1024 :element-type 'rune)) + +(defparameter *scratch-pad-2* + (make-array 1024 :element-type 'rune)) + +(defparameter *scratch-pad-3* + (make-array 1024 :element-type 'rune)) + +(defparameter *scratch-pad-4* + (make-array 1024 :element-type 'rune)) + +(declaim (type (simple-array rune (*)) + *scratch-pad* *scratch-pad-2* *scratch-pad-3* *scratch-pad-4*)) + +(defmacro %put-rune (rune-var put) + `(progn + (cond ((%> ,rune-var #xFFFF) + (,put (the (unsigned-byte 16) (%+ #xD7C0 (ash ,rune-var -10)))) + (,put (the (unsigned-byte 16) (%ior #xDC00 (%and ,rune-var #x3FF))))) + (t + (,put ,rune-var))))) + +(defun adjust-array-by-copying (old-array new-size) + "Adjust an array by copying and thus ensures, that result is a SIMPLE-ARRAY." + (let ((res (make-array new-size :element-type (array-element-type old-array)))) + (replace res old-array + :start1 0 :end1 (length old-array) + :start2 0 :end2 (length old-array)) + res)) + +(defmacro with-rune-collector-aux (scratch collect body mode) + (let ((rod (gensym)) + (n (gensym)) + (i (gensym)) + (b (gensym))) + `(let ((,n (length ,scratch)) + (,i 0) + (,b ,scratch)) + (declare (type fixnum ,n ,i)) + (macrolet + ((,collect (x) + `((lambda (x) + (locally + (declare #.*fast*) + (when (%>= ,',i ,',n) + (setf ,',n (* 2 ,',n)) + (setf ,',b + (setf ,',scratch + (adjust-array-by-copying ,',scratch ,',n)))) + (setf (aref (the (simple-array rune (*)) ,',b) ,',i) x) + (incf ,',i))) + ,x))) + ,@body + ,(ecase mode + (:intern + `(intern-name ,b 0 ,i)) + (:copy + `(let ((,rod (make-rod ,i))) + (while (not (%= ,i 0)) + (setf ,i (%- ,i 1)) + (setf (%rune ,rod ,i) + (aref (the (simple-array rune (*)) ,b) ,i))) + ,rod)) + (:raw + `(values ,b 0 ,i)) + ))))) + +'(defmacro with-rune-collector-aux (scratch collect body mode) + (let ((rod (gensym)) + (n (gensym)) + (i (gensym)) + (b (gensym))) + `(let ((,n (length ,scratch)) + (,i 0)) + (declare (type fixnum ,n ,i)) + (macrolet + ((,collect (x) + `((lambda (x) + (locally + (declare #.*fast*) + (when (%>= ,',i ,',n) + (setf ,',n (* 2 ,',n)) + (setf ,',scratch + (setf ,',scratch + (adjust-array-by-copying ,',scratch ,',n)))) + (setf (aref (the (simple-array rune (*)) ,',scratch) ,',i) x) + (incf ,',i))) + ,x))) + ,@body + ,(ecase mode + (:intern + `(intern-name ,scratch 0 ,i)) + (:copy + `(let ((,rod (make-rod ,i))) + (while (%> ,i 0) + (setf ,i (%- ,i 1)) + (setf (%rune ,rod ,i) + (aref (the (simple-array rune (*)) ,scratch) ,i))) + ,rod)) + (:raw + `(values ,scratch 0 ,i)) + ))))) + +(defmacro with-rune-collector ((collect) &body body) + `(with-rune-collector-aux *scratch-pad* ,collect ,body :copy)) + +(defmacro with-rune-collector-2 ((collect) &body body) + `(with-rune-collector-aux *scratch-pad-2* ,collect ,body :copy)) + +(defmacro with-rune-collector-3 ((collect) &body body) + `(with-rune-collector-aux *scratch-pad-3* ,collect ,body :copy)) + +(defmacro with-rune-collector-4 ((collect) &body body) + `(with-rune-collector-aux *scratch-pad-4* ,collect ,body :copy)) + +(defmacro with-rune-collector/intern ((collect) &body body) + `(with-rune-collector-aux *scratch-pad* ,collect ,body :intern)) + +(defmacro with-rune-collector/raw ((collect) &body body) + `(with-rune-collector-aux *scratch-pad* ,collect ,body :raw)) + +#| +(defmacro while-reading-runes ((reader stream-in) &rest body) + ;; Thou shalt not leave body via a non local exit + (let ((stream (make-symbol "STREAM")) + (rptr (make-symbol "RPTR")) + (fptr (make-symbol "FPTR")) + (buf (make-symbol "BUF")) ) + `(let* ((,stream ,stream-in) + (,rptr (xstream-read-ptr ,stream)) + (,fptr (xstream-fill-ptr ,stream)) + (,buf (xstream-buffer ,stream))) + (declare (type fixnum ,rptr ,fptr) + (type xstream ,stream)) + (macrolet ((,reader (res-var) + `(cond ((%= ,',rptr ,',fptr) + (setf (xstream-read-ptr ,',stream) ,',rptr) + (setf ,res-var (xstream-underflow ,',stream)) + (setf ,',rptr (xstream-read-ptr ,',stream)) + (setf ,',fptr (xstream-fill-ptr ,',stream)) + (setf ,',buf (xstream-buffer ,',stream))) + (t + (setf ,res-var + (aref (the (simple-array read-element (*)) ,',buf) + (the fixnum ,',rptr))) + (setf ,',rptr (%+ ,',rptr 1)))))) + (prog1 + (let () .,body) + (setf (xstream-read-ptr ,stream) ,rptr) ))))) +|# + +;;;; --------------------------------------------------------------------------- +;;;; DTD +;;;; + +(defparameter *entities* nil) +(defvar *dtd*) + +(defun absolute-uri (sysid source-stream) + (setq sysid (rod-string sysid)) + (let ((base-sysid + (dolist (k (zstream-input-stack source-stream)) + (let ((base-sysid (stream-name-file-name (xstream-name k)))) + (when base-sysid (return base-sysid)))))) + (assert (not (null base-sysid))) + (merge-sysid sysid base-sysid))) + +(defun absolute-extid (source-stream extid) + (case (car extid) + (:system + (list (car extid) + (absolute-uri (cadr extid) source-stream))) + (:public + (list (car extid) + (cadr extid) + (absolute-uri (caddr extid) source-stream))))) + +(defun define-entity (source-stream name kind def) + (when (eq (car def) :external) + (setf def + (list (car def) (absolute-extid source-stream (cadr def))))) + (setf name (intern-name name)) + (setf *entities* + (append *entities* + (list (cons (list kind name) + def))))) + +#|| +(defun define-element (zinput dtd element-name content-model) + ;; zinput is for source code location recoding + (let ((elmdef (make-elmdef :name element-name + :content content-model + ))) + ())) +||# + +(defun entity->xstream (entity-name kind &optional zstream) + ;; `zstream' is for error messages + (let ((looked (assoc (list kind entity-name) *entities* :test #'equal))) + (unless looked + (if zstream + (perror zstream "Entity '~A' is not defined." (rod-string entity-name)) + (error "Entity '~A' is not defined." (rod-string entity-name)))) + (let (r) + (ecase (cadr looked) + (:internal + (setf r (make-rod-xstream (caddr looked))) + (setf (xstream-name r) + (make-stream-name :entity-name entity-name + :entity-kind kind + :file-name nil))) + (:external + (setf r (open-extid (caddr looked))) + (setf (stream-name-entity-name (xstream-name r)) entity-name + (stream-name-entity-kind (xstream-name r)) kind))) + r))) + +(defun entity-source-kind (name type) + (let ((looked (assoc (list type name) *entities* :test #'equal))) + (unless looked + (error "Entity '~A' is not defined." (rod-string name))) + (cadr looked))) + +(defun open-extid (extid) + (let ((nam (ecase (car extid) + (:SYSTEM (cadr extid)) + (:PUBLIC (caddr extid))))) + (make-xstream (open-sysid nam) + :name (make-stream-name :file-name nam) + :initial-speed 1))) + +(defun call-with-entity-expansion-as-stream (zstream cont name kind) + ;; `zstream' is for error messages -- we need something better! + (let ((in (entity->xstream name kind zstream))) + (unwind-protect + (funcall cont in) + (close-xstream in)))) + +(defun define-default-entities () + (define-entity nil '#.(string-rod "lt") :general `(:internal #.(string-rod "<"))) + (define-entity nil '#.(string-rod "gt") :general `(:internal #.(string-rod ">"))) + (define-entity nil '#.(string-rod "amp") :general `(:internal #.(string-rod "&"))) + (define-entity nil '#.(string-rod "apos") :general `(:internal #.(string-rod "'"))) + (define-entity nil '#.(string-rod "quot") :general `(:internal #.(string-rod "\""))) + ;; + #|| + (define-entity nil '#.(string-rod "ouml") :general `(:internal #.(string-rod "ö"))) + (define-entity nil '#.(string-rod "uuml") :general `(:internal #.(string-rod "ü"))) + (define-entity nil '#.(string-rod "auml") :general `(:internal #.(string-rod "ä"))) + (define-entity nil '#.(string-rod "Ouml") :general `(:internal #.(string-rod "Ö"))) + (define-entity nil '#.(string-rod "Auml") :general `(:internal #.(string-rod "Ä"))) + (define-entity nil '#.(string-rod "Uuml") :general `(:internal #.(string-rod "Ü"))) + (define-entity nil '#.(string-rod "szlig") :general `(:internal #.(string-rod "ß"))) + ||# + ;; + #|| + (define-entity nil '#.(string-rod "nbsp") + :general `(:internal ,(let ((r (make-rod 1))) + (setf (aref r 0) #o240) + r))) + ||# + ) + +(defstruct attdef + ;; an attribute definition + element ;name of element this attribute belongs to + name ;name of attribute + type ;type of attribute; either one of :CDATA, :ID, :IDREF, :IDREFS, + ; :ENTITY, :ENTITIES, :NMTOKEN, :NMTOKENS, or + ; (:NOTATION *) + ; (:ENUMERATION *) + default) ;default value of attribute: + ; :REQUIRED, :IMPLIED, (:FIXED content) or (:DEFAULT content) + +(defstruct elmdef + ;; an element definition + name ;name of the element + content ;content model + attributes ;list of defined attribtes + defined-p) ;is this element defined? [*] + +;; [*] in XML it is possible to define attributes, before the element +;; itself is defined and since we hang attribute definitions into the +;; relevant element definitions, this flag indicates, whether an +;; element was actually defined. + +(defstruct dtd + elements ;hashtable or whatnot of all elements + attdefs ; + gentities ;general entities + pentities ;parameter entities + ) + +;;;; + +(defun define-attribute (dtd element name type default) + (let ((adef (make-attdef :element element + :name name + :type type + :default default))) + (cond ((find-attribute dtd element name) + (warn "Attribute \"~A\" of \"~A\" not redefined." + (rod-string name) + (rod-string element))) + (t + (push adef (dtd-attdefs dtd)))))) + +(defun find-attribute (dtd element name) + (dolist (k (dtd-attdefs dtd)) + (cond ((and (eq element (attdef-element k)) + (eq name (attdef-name k))) + (return k))))) + +(defun map-all-attdefs-for-element (dtd element continuation) + (declare (dynamic-extent continuation));this does not help under ACL + (dolist (k (dtd-attdefs dtd)) + (cond ((eq element (attdef-element k)) + (funcall continuation k))))) + +;;;; --------------------------------------------------------------------------- +;;;; z streams and lexer +;;;; + +(defstruct zstream + token-category + token-semantic + input-stack) + +(defun read-token (input) + (cond ((zstream-token-category input) + (multiple-value-prog1 + (values (zstream-token-category input) + (zstream-token-semantic input)) + (setf (zstream-token-category input) nil + (zstream-token-semantic input) nil))) + (t + (read-token-2 input)))) + +(defun peek-token (input) + (cond ((zstream-token-category input) + (values + (zstream-token-category input) + (zstream-token-semantic input))) + (t + (multiple-value-bind (c s) (read-token input) + (setf (zstream-token-category input) c + (zstream-token-semantic input) s)) + (values (zstream-token-category input) + (zstream-token-semantic input))))) + +(defun read-token-2 (input) + (cond ((null (zstream-input-stack input)) + (values :eof nil)) + (t + (let ((c (peek-rune (car (zstream-input-stack input))))) + (cond ((eq c :eof) + (cond ((eq (cadr (zstream-input-stack input)) :stop) + (values :eof nil)) + (t + (close-xstream (pop (zstream-input-stack input))) + (if (null (zstream-input-stack input)) + (values :eof nil) + (values :S nil) ;fake #x20 after PE expansion + )))) + (t + (read-token-3 input))))))) + +(defvar *data-behaviour* + ) ;either :DTD or :DOC + +(defun read-token-3 (zinput) + (let ((input (car (zstream-input-stack zinput)))) + ;; PI Comment + (let ((c (read-rune input))) + (cond + ;; first the common tokens + ((rune= #/< c) + (read-token-after-|<| zinput input)) + ;; now dispatch + (t + (ecase *data-behaviour* + (:DTD + (cond ((rune= #/\[ c) :\[) + ((rune= #/\] c) :\]) + ((rune= #/\( c) :\() + ((rune= #/\) c) :\)) + ((rune= #/\| c) :\|) + ((rune= #/\> c) :\>) + ((rune= #/\" c) :\") + ((rune= #/\' c) :\') + ((rune= #/\, c) :\,) + ((rune= #/\? c) :\?) + ((rune= #/\* c) :\*) + ((rune= #/\+ c) :\+) + ((name-rune-p c) + (unread-rune c input) + (values :name (read-name-token input))) + ((rune= #/# c) + (let ((q (read-name-token input))) + (cond ((equalp q '#.(string-rod "REQUIRED")) :|#REQUIRED|) + ((equalp q '#.(string-rod "IMPLIED")) :|#IMPLIED|) + ((equalp q '#.(string-rod "FIXED")) :|#FIXED|) + ((equalp q '#.(string-rod "PCDATA")) :|#PCDATA|) + (t + (error "Unknown token: ~S." q))))) + ((or (rune= c #/U+0020) + (rune= c #/U+0009) + (rune= c #/U+000D) + (rune= c #/U+000A)) + (values :s nil)) + ((rune= #/% c) + (cond ((name-start-rune-p (peek-rune input)) + ;; an entity reference + (read-pe-reference zinput)) + (t + (values :%)))) + (t + (error "Unexpected character ~S." c)))) + (:DOC + (cond + ((rune= c #/&) + (multiple-value-bind (kind data) (read-entity-ref input) + (cond ((eq kind :named) + (values :entity-ref data) ) + ((eq kind :numeric) + (values :cdata + (with-rune-collector (collect) + (%put-rune data collect))))))) + (t + (unread-rune c input) + (values :cdata (read-cdata input))) )))))))) + +(defun read-pe-reference (zinput) + (let* ((input (car (zstream-input-stack zinput))) + (nam (read-name-token input))) + (assert (rune= #/\; (read-rune input))) + (cond (*expand-pe-p* + ;; no external entities here! + (let ((i2 (entity->xstream nam :parameter))) + (zstream-push i2 zinput)) + (values :S nil) ;space before inserted PE expansion. + ) + (t + (values :pe-reference nam)) ))) + +(defun read-token-after-|<| (zinput input) + (let ((d (read-rune input))) + (cond ((eq d :eof) + (error "EOF after '<'")) + ((rune= #/! d) + (read-token-after-| in case of a named entity + or :NUMERIC in case of numeric entities. + The initial #\\& is considered to be consumed already." + (let ((c (peek-rune input))) + (cond ((eq c :eof) + (error "EOF after '&'")) + ((rune= c #/#) + (values :numeric (read-numeric-entity input))) + (t + (unless (name-start-rune-p (peek-rune input)) + (error "Expecting name after &.")) + (let ((name (read-name-token input))) + (setf c (read-rune input)) + (unless (rune= c #/\;) + (perror input "Expected \";\".")) + (values :named name)))))) + +(defsubst read-S? (input) + (while (member (peek-rune input) '(#/U+0020 #/U+0009 #/U+000A #/U+000D) + :test #'eq) + (consume-rune input))) + +(defun read-tag-2 (zinput input kind) + (let ((name (read-name-token input)) + (atts nil)) + (setf atts (read-attribute-list zinput input nil)) + ;;(setf atts (nreverse atts)) + ;; care for atts + ;; + ;;zzz + (let ((fn (lambda (adef &aux x) + (setf x (assoc (attdef-name adef) atts)) + + (when (and (consp (attdef-default adef)) + (eq (car (attdef-default adef)) :default) + (not x)) + (setf atts (cons (setf x (cons (attdef-name adef) (cadr (attdef-default adef)))) + atts))) + (when (and (consp (attdef-default adef)) + (eq (car (attdef-default adef)) :fixed) + (not x)) + (setf atts (cons (setf x (cons (attdef-name adef) (cadr (attdef-default adef)))) + atts))) + (unless (eq (attdef-type adef) :cdata) + (when x + (setf (cdr x) (canon-not-cdata-attval (cdr x))))) + + ;; xxx more tests + ))) + (declare (dynamic-extent fn)) + (map-all-attdefs-for-element + *dtd* name fn)) + + ;; check for double attributes + (do ((q atts (cdr q))) + ((null q)) + (cond ((find (caar q) (cdr q) :key #'car) + (error "Attribute ~S has two definitions in element ~S." + (rod-string (caar q)) + (rod-string name))))) + + (cond ((eq (peek-rune input) #/>) + (consume-rune input) + (values kind (cons name atts))) + ((eq (peek-rune input) #//) + (consume-rune input) + (assert (rune= #/> (read-rune input))) + (values :ztag (cons name atts))) + (t + (error "syntax error in read-tag-2.")) ))) + +(defun read-attribute (zinput input) + (unless (name-start-rune-p (peek-rune input)) + (error "Expected name.")) + ;; arg thanks to the post mortem nature of name space declarations, + ;; we could only process the attribute values post mortem. + (let ((name (read-name-token input))) + (while (let ((c (peek-rune input))) + (and (not (eq c :eof)) + (or (= c #/U+0020) + (= c #/U+0009) + (= c #/U+000A) + (= c #/U+000D)))) + (consume-rune input)) + (unless (eq (read-rune input) #/=) + (perror zinput "Expected \"=\".")) + (while (let ((c (peek-rune input))) + (and (not (eq c :eof)) + (or (= c #/U+0020) + (= c #/U+0009) + (= c #/U+000A) + (= c #/U+000D)))) + (consume-rune input)) + (cons name (read-att-value-2 input)) + ;;(cons name (read-att-value zinput input :att t)) + )) + +(defun canon-not-cdata-attval (value) + ;; | If the declared value is not CDATA, then the XML processor must + ;; | further process the normalized attribute value by discarding any + ;; | leading and trailing space (#x20) characters, and by replacing + ;; | sequences of space (#x20) characters by a single space (#x20) + ;; | character. + (with-rune-collector (collect) + (let ((gimme-20 nil) + (anything-seen-p nil)) + (map nil (lambda (c) + (cond ((= c #x20) + (setf gimme-20 t)) + (t + (when (and anything-seen-p gimme-20) + (collect #x20)) + (setf gimme-20 nil) + (setf anything-seen-p t) + (collect c)))) + value)))) + +#|| +(defun canon-not-cdata-attval (value) + ;; | If the declared value is not CDATA, then the XML processor must + ;; | further process the normalized attribute value by discarding any + ;; | leading and trailing space (#x20) characters, and by replacing + ;; | sequences of space (#x20) characters by a single space (#x20) + ;; | character. + value) +||# + +(defsubst data-rune-p (c) + ;; any Unicode character, excluding the surrogate blocks, FFFE, and FFFF. + (or (= c #x9) (= c #xA) (= c #xD) + (<= #x20 c #xD7FF) + (<= #xE000 c #xFFFD) + ;; + (<= #xD800 c #xDBFF) + (<= #xDC00 c #xDFFF) + ;; + )) + +#|| +(defsubst data-rune-p (c) + t) +||# + +(defun read-att-value (zinput input mode &optional canon-space-p (delim nil)) + (with-rune-collector-2 (collect) + (labels ((muffle (input delim) + (let (c) + (loop + (setf c (read-rune input)) + (cond ((eql delim c) + (return)) + ((eq c :eof) + (error "EOF")) + ((rune= c #/&) + (setf c (peek-rune input)) + (cond ((rune= c #/#) + (let ((c (read-numeric-entity input))) + (%put-rune c collect))) + (t + (unless (name-start-rune-p (peek-rune input)) + (error "Expecting name after &.")) + (let ((name (read-name-token input))) + (setf c (read-rune input)) + (assert (rune= c #/\;)) + (ecase mode + (:att + (recurse-on-entity + zinput name :general + (lambda (zinput) + (muffle (car (zstream-input-stack zinput)) + :eof)))) + (:ent + ;; bypass, but never the less we + ;; need to check for legal + ;; syntax. + ;; Must it be defined? + ;; allerdings: unparsed sind verboten + (collect #/&) + (map nil (lambda (x) (collect x)) name) + (collect #/\; ))))))) + ((and (eq mode :ent) (rune= c #/%)) + (unless (name-start-rune-p (peek-rune input)) + (error "Expecting name after %.")) + (let ((name (read-name-token input))) + (setf c (read-rune input)) + (assert (rune= c #/\;)) + (cond (*expand-pe-p* + (recurse-on-entity + zinput name :parameter + (lambda (zinput) + (muffle (car (zstream-input-stack zinput)) + :eof)))) + (t + (error "No PE here."))))) + ((and (eq mode :att) (rune= c #/<)) + ;; xxx fix error message + (cerror "Eat them in spite of this." + "For no apparent reason #\/< is forbidden in attribute values. ~ + You lost -- next time choose SEXPR syntax.") + (collect c)) + ((and canon-space-p (space-rune-p c)) + (collect #/space)) + ((not (data-rune-p c)) + (error "illegal char: ~S." c)) + (t + (collect c))))))) + (declare (dynamic-extent #'muffle)) + (muffle input (or delim + (let ((delim (read-rune input))) + (assert (member delim '(#/\" #/\'))) + delim)))))) + +(defun read-numeric-entity (input) + ;; xxx eof handling + ;; The #/& is already read + (let ((res + (let ((c (read-rune input))) + (assert (rune= c #/#)) + (setq c (read-rune input)) + (cond ((rune= c #/x) + ;; hexadecimal + (setq c (read-rune input)) + (assert (digit-rune-p c 16)) + (prog1 + (parse-integer + (with-output-to-string (sink) + (write-char (code-char c) sink) + (while (digit-rune-p (setq c (read-rune input)) 16) + (write-char (code-char c) sink))) + :radix 16) + (assert (rune= c #/\;))) + ) + ((<= #/0 c #/9) + ;; decimal + (prog1 + (parse-integer + (with-output-to-string (sink) + (write-char (code-char c) sink) + (while (<= #/0 (setq c (read-rune input)) #/9) + (write-char (code-char c) sink))) + :radix 10) + (assert (rune= c #/\;))) ) + (t + (error "Bad char in numeric character entity.") ))))) + (unless (data-char-p res) + (error "expansion of numeric character reference (#x~X) is no data char." + res)) + res)) + +(defun read-pi (input) + ;; ") (return)) + (when (rune= d #/?) + (collect #/?) + (go state-2)) + (collect #/?) + (collect d) + (go state-1))))) + +(defun read-comment-content (input &aux d) + (let ((warnedp nil)) + (with-rune-collector (collect) + (block nil + (tagbody + state-1 + (setf d (read-rune input)) + (unless (data-rune-p d) + (error "Illegal char: ~S." d)) + (when (rune= d #/-) (go state-2)) + (collect d) + (go state-1) + state-2 ;; #/- seen + (setf d (read-rune input)) + (unless (data-rune-p d) + (error "Illegal char: ~S." d)) + (when (rune= d #/-) (go state-3)) + (collect #/-) + (collect d) + (go state-1) + state-3 ;; #/- #/- seen + (setf d (read-rune input)) + (unless (data-rune-p d) + (error "Illegal char: ~S." d)) + (when (rune= d #/>) (return)) + (unless warnedp + (warn "WFC: no '--' in comments please.") + (setf warnedp t)) + (when (rune= d #/-) + (collect #/-) + (go state-3)) + (collect #/-) + (collect #/-) + (collect d) + (go state-1)))))) + +(defun read-cdata-sect (input &aux d) + ;; + (with-rune-collector (collect) + (block nil + (tagbody + state-1 + (setf d (read-rune input)) + (unless (data-rune-p d) + (error "Illegal char: ~S." d)) + (when (rune= d #/\]) (go state-2)) + (collect d) + (go state-1) + state-2 ;; #/] seen + (setf d (read-rune input)) + (unless (data-rune-p d) + (error "Illegal char: ~S." d)) + (when (rune= d #/\]) (go state-3)) + (collect #/\]) + (collect d) + (go state-1) + state-3 ;; #/\] #/\] seen + (setf d (read-rune input)) + (unless (data-rune-p d) + (error "Illegal char: ~S." d)) + (when (rune= d #/>) + (return)) + (when (rune= d #/\]) + (collect #/\]) + (go state-3)) + (collect #/\]) + (collect #/\]) + (collect d) + (go state-1))))) + +#+(or) ;; FIXME: There is another definition below that looks more reasonable. +(defun read-cdata (input initial-char &aux d) + (cond ((not (data-rune-p initial-char)) + (error "Illegal char: ~S." initial-char))) + (with-rune-collector (collect) + (block nil + (tagbody + (cond ((rune= initial-char #/\]) + (go state-2)) + (t + (collect initial-char))) + state-1 + (setf d (peek-rune input)) + (when (or (eq d :eof) (rune= d #/<) (rune= d #/&)) + (return)) + (read-rune input) + (unless (data-rune-p d) + (error "Illegal char: ~S." d)) + (when (rune= d #/\]) (go state-2)) + (collect d) + (go state-1) + + state-2 ;; #/\] seen + (setf d (peek-rune input)) + (when (or (eq d :eof) (rune= d #/<) (rune= d #/&)) + (collect #/\]) + (return)) + (read-rune input) + (unless (data-rune-p d) + (error "Illegal char: ~S." d)) + (when (rune= d #/\]) (go state-3)) + (collect #/\]) + (collect d) + (go state-1) + + state-3 ;; #/\] #/\] seen + (setf d (peek-rune input)) + (when (or (eq d :eof) (rune= d #/<) (rune= d #/&)) + (collect #/\]) + (collect #/\]) + (return)) + (read-rune input) + (unless (data-rune-p d) + (error "Illegal char: ~S." d)) + (when (rune= d #/>) + (error "For no apparent reason ']]>' in not allowed in a CharData token -- you lost.")) + (when (rune= d #/\]) + (collect #/\]) + (go state-3)) + (collect #/\]) + (collect #/\]) + (collect d) + (go state-1))))) + + +;; some character categories + +#|| +(defun name-start-rune-p (rune) + (or (<= #x0041 rune #x005A) + (<= #x0061 rune #x007A) + ;; lots more + (>= rune #x0080) + (rune= rune #/_) + (rune= rune #/:))) + +(defun name-rune-p (rune) + (or (name-start-rune-p rune) + (rune= rune #/.) + (rune= rune #/-) + (rune<= #/0 rune #/9))) +||# + +(defun space-rune-p (rune) + (declare (type rune rune)) + (or (rune= rune #/U+0020) + (rune= rune #/U+0009) + (rune= rune #/U+000A) + (rune= rune #/U+000D))) + +(defun data-char-p (c) + ;; any Unicode character, excluding the surrogate blocks, FFFE, and FFFF. + (or (= c #x9) (= c #xA) (= c #xD) + (<= #x20 c #xD7FF) + (<= #xE000 c #xFFFD) + (<= #x10000 c #x10FFFF))) + +(defun pubid-char-p (c) + (or (= c #x20) (= c #xD) (= c #xA) + (<= #/a c #/z) + (<= #/A c #/Z) + (<= #/0 c #/9) + (member c '(#/- #/' #/\( #/\) #/+ #/, #/. #// + #/: #/= #/? #/\; #/! #/* #/# + #/@ #/$ #/_ #/%)))) + + +(defun expect (input category) + (multiple-value-bind (cat sem) (read-token input) + (unless (eq cat category) + (error "Expected ~S saw ~S [~S]" category cat sem)) + (values cat sem))) + +(defun consume-token (input) + (read-token input)) + +;;;; --------------------------------------------------------------------------- +;;;; Parser +;;;; + +(defun p/S (input) + ;; S ::= (#x20 | #x9 | #xD | #xA)+ + (expect input :S) + (while (eq (peek-token input) :S) + (consume-token input))) + +(defun p/S? (input) + ;; S ::= (#x20 | #x9 | #xD | #xA)+ + (while (eq (peek-token input) :S) + (consume-token input))) + +(defun p/name (input) + (nth-value 1 (expect input :name))) + +(defun p/attlist-decl (input) + ;; [52] AttlistDecl ::= '' + (let (elm-name) + (expect input :|) + (consume-token input) + (return)) + (t + (multiple-value-bind (name type default) (p/attdef input) + (define-attribute *dtd* elm-name name type default)) ))) + (:> + (return)) + (otherwise + (error "Expected either another AttDef or end of \" (S? )* S? + ;; + (declare (type function item-parser)) + (let (res) + (p/S? input) + (setf res (list (funcall item-parser input))) + (loop + (p/S? input) + (cond ((eq (peek-token input) delimiter) + (consume-token input) + (p/S? input) + (push (funcall item-parser input) res)) + (t + (return)))) + (p/S? input) + (reverse res))) + +(defun p/att-type (input) + ;; [54] AttType ::= StringType | TokenizedType | EnumeratedType + ;; [55] StringType ::= 'CDATA' + ;; [56] TokenizedType ::= 'ID' /*VC: ID */ + ;; /*VC: One ID per Element Type */ + ;; /*VC: ID Attribute Default */ + ;; | 'IDREF' /*VC: IDREF */ + ;; | 'IDREFS' /*VC: IDREF */ + ;; | 'ENTITY' /*VC: Entity Name */ + ;; | 'ENTITIES' /*VC: Entity Name */ + ;; | 'NMTOKEN' /*VC: Name Token */ + ;; | 'NMTOKENS' /*VC: Name Token */ + ;; [57] EnumeratedType ::= NotationType | Enumeration + ;; [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')' + ;; /* VC: Notation Attributes */ + ;; [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')' /* VC: Enumeration */ + (multiple-value-bind (cat sem) (read-token input) + (cond ((eq cat :name) + (cond ((equalp sem '#.(string-rod "CDATA")) :cdata) + ((equalp sem '#.(string-rod "ID")) :id) + ((equalp sem '#.(string-rod "IDREF")) :idrefs) + ((equalp sem '#.(string-rod "IDREFS")) :idrefs) + ((equalp sem '#.(string-rod "ENTITY")) :entity) + ((equalp sem '#.(string-rod "ENTITIES")) :entities) + ((equalp sem '#.(string-rod "NMTOKEN")) :nmtoken) + ((equalp sem '#.(string-rod "NMTOKENS")) :nmtokens) + ((equalp sem '#.(string-rod "NOTATION")) + ;; xxx nmtoken vs name + (let (names) + (p/S input) + (expect input :\() + (setf names (p/list input #'p/name :\| )) + (expect input :\)) + (cons :notation names))) + (t + (error "In p/att-type: ~S ~S." cat sem)))) + ((eq cat :\() + ;; xxx nmtoken vs name + (let (names) + ;;(expect input :\() + (setf names (p/list input #'p/name :\| )) + (expect input :\)) + (cons :enumeration names))) + (t + (error "In p/att-type: ~S ~S." cat sem)) ))) + +(defun p/default-decl (input) + ;; [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' + ;; | (('#FIXED' S)? AttValue) /* VC: Required Attribute */ + ;; + ;; /* VC: Attribute Default Legal */ + ;; /* WFC: No < in Attribute Values */ + ;; /* VC: Fixed Attribute Default */ + (multiple-value-bind (cat sem) (peek-token input) + (cond ((eq cat :|#REQUIRED|) + (consume-token input) :required) + ((eq cat :|#IMPLIED|) + (consume-token input) :implied) + ((eq cat :|#FIXED|) + (consume-token input) + (p/S input) + (list :fixed (p/att-value input))) + ((or (eq cat :\') (eq cat :\")) + (list :default (p/att-value input))) + (t + (error "p/default-decl: ~S ~S." cat sem)) ))) +;;;; + +;; [70] EntityDecl ::= GEDecl | PEDecl +;; [71] GEDecl ::= '' +;; [72] PEDecl ::= '' +;; [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?) +;; [74] PEDef ::= EntityValue | ExternalID +;; [75] ExternalID ::= 'SYSTEM' S SystemLiteral +;; | 'PUBLIC' S PubidLiteral S SystemLiteral +;; [76] NDataDecl ::= S 'NDATA' S Name /* VC: Notation Declared */ + +(defun p/entity-decl (input) + (let (name def kind) + (expect input :|))) + +(defun p/entity-def (input kind) + (multiple-value-bind (cat sem) (peek-token input) + (cond ((member cat '(:\" :\')) + (list :internal (p/entity-value input))) + ((and (eq cat :name) + (or (equalp sem '#.(string-rod "SYSTEM")) + (equalp sem '#.(string-rod "PUBLIC")))) + (let (extid ndata) + (setf extid (p/external-id input nil)) + (when (eq kind :general) ;NDATA allowed at all? + (cond ((eq (peek-token input) :S) + (p/S? input) + (when (and (eq (peek-token input) :name) + (equalp (nth-value 1 (peek-token input)) + '#.(string-rod "NDATA"))) + (consume-token input) + (p/S input) + (setf ndata (p/name input)))))) + (list :external extid ndata))) + (t + (error "p/entity-def: ~S / ~S." cat sem)) ))) + +(defun p/entity-value (input) + (let ((delim (if (eq (read-token input) :\") #/\" #/\'))) + (read-att-value input + (car (zstream-input-stack input)) + :ent + nil + delim))) + +(defun p/att-value (input) + (let ((delim (if (eq (read-token input) :\") #/\" #/\'))) + (read-att-value input + (car (zstream-input-stack input)) + :att + t + delim))) + +(defun p/external-id (input &optional (public-only-ok-p nil)) + ;; xxx public-only-ok-p + (multiple-value-bind (cat sem) (read-token input) + (cond ((and (eq cat :name) (equalp sem '#.(string-rod "SYSTEM"))) + (p/S input) + (list :system (p/system-literal input)) + ) + ((and (eq cat :name) (equalp sem '#.(string-rod "PUBLIC"))) + (let (pub sys) + (p/S input) + (setf pub (p/pubid-literal input)) + (when (eq (peek-token input) :S) + (p/S input) + (when (member (peek-token input) '(:\" :\')) + (setf sys (p/system-literal input)))) + (unless (every #'pubid-char-p pub) + (error "Illegal pubid: ~S." (rod-string pub))) + (when (and (not public-only-ok-p) + (null sys)) + (error "System identifier needed for this PUBLIC external identifier.")) + (list :public pub sys))) + (t + (error "Expected external-id: ~S / ~S." cat sem))))) + + +;; [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'") +;; [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'" +;; [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] +;; | [-'()+,./:=?;!*#@$_%] + +(defun p/system-literal (input) + (multiple-value-bind (cat) (read-token input) + (cond ((member cat '(:\" :\')) + (let ((delim (if (eq cat :\") #/\" #/\'))) + (with-rune-collector (collect) + (loop + (let ((c (read-rune (car (zstream-input-stack input))))) + (cond ((eq c :eof) + (error "EOF in system literal.")) + ((rune= c delim) + (return)) + (t + (collect c)))))))) + (t + (error "Expect either \" or \'."))))) + +(defun p/pubid-literal (input) + ;; xxx check for valid chars + (p/system-literal input)) + + +;;;; + +(defun p/element-decl (input) + (let (name content) + (expect input :|) + (list :element name content))) + +(defun legal-content-model-p (cspec) + (or (eq cspec :PCDATA) + (eq cspec :ANY) + (eq cspec :EMPTY) + (and (consp cspec) + (eq (car cspec) '*) + (consp (cadr cspec)) + (eq (car (cadr cspec)) 'or) + (eq (cadr (cadr cspec)) :pcdata) + (every #'vectorp (cddr (cadr cspec)))) + (labels ((walk (x) + (cond ((member x '(:PCDATA :ANY :EMPTY)) + nil) + ((atom x) t) + ((and (walk (car x)) + (walk (cdr x))))))) + (walk cspec)))) + +;; wir fahren besser, wenn wir machen: + +;; cspec ::= 'EMPTY' | 'ANY' | '#PCDATA' +;; | Name +;; | cs +;; cs ::= '(' S? cspec ( S? '|' S? cs)* S? ')' ('?' | '*' | '+')? +;; und eine post mortem analyse + +(defun p/cspec (input &optional (level 0) (only-names-p nil)) + (let ((term + (let ((names nil) op-cat op res) + (multiple-value-bind (cat sem) (peek-token input) + (cond ((eq cat :name) + (consume-token input) + (cond ((rod= sem '#.(string-rod "EMPTY")) + :empty) + ((rod= sem '#.(string-rod "ANY")) + :any) + (t + sem))) + ((and (eq cat :\#PCDATA) (not only-names-p)) + (unless (= level 1) + (error "#PCDATA only on top level in content modell.")) + (consume-token input) + :pcdata) + ((and (eq cat :\() (not only-names-p)) + (consume-token input) + (p/S? input) + (setq names (list (p/cspec input (+ level 1)))) + (p/S? input) + (let ((on? (eq (car names) :pcdata))) + (cond ((member (peek-token input) '(:\| :\,)) + (setf op-cat (peek-token input)) + (setf op (if (eq op-cat :\,) 'and 'or)) + (while (eq (peek-token input) op-cat) + (consume-token input) + (p/S? input) + (push (p/cspec input (+ level 1) on?) names) + (p/S? input)) + (setf res (cons op (reverse names)))) + (t + (setf res (car names))))) + (p/S? input) + (expect input :\)) + res) + (t + (error "p/cspec - ~s / ~s" cat sem))))))) + (cond ((eq (peek-token input) :?) (consume-token input) (list '? term)) + ((eq (peek-token input) :+) (consume-token input) (list '+ term)) + ((eq (peek-token input) :*) (consume-token input) (list '* term)) + (t + term)))) + +;; [52] AttlistDecl ::= '' + + +;; [52] AttlistDecl ::= '' +;; [52] AttlistDecl ::= '' +;; [53] AttDefs ::= S Name S AttType S DefaultDecl AttDefs +;; [53] AttDefs ::= + +(defun p/notation-decl (input) + (let (name id) + (expect input :|) + (list :notation-decl name id))) + +;;; + +(defun p/conditional-sect (input) + (expect input :) ) + +(defun p/ignore-sect (input) + ;; "))) + (cond ((and (rune= c3 #/<) (rune= c2 #/!) (rune= c1 #/\[)) + (incf level))) + (cond ((and (rune= c3 #/\]) (rune= c2 #/\]) (rune= c1 #/>)) + (decf level))) )))) + +(defun p/ext-subset-decl (input) + ;; ( markupdecl | conditionalSect | S )* + (loop + (case (let ((*expand-pe-p* nil)) (peek-token input)) + (:| )) + (setf extid (p/external-id input t)))) + (p/S? input) + (when (eq (peek-token input) :\[ ) + (consume-token input) + (while (progn (p/S? input) + (not (eq (peek-token input) :\] ))) + (if (eq (peek-token input) :pe-reference) + (let ((name (nth-value 1 (read-token input)))) + (recurse-on-entity input name :parameter + (lambda (input) + (ecase (entity-source-kind name :parameter) + (:external + (p/ext-subset input)) + (:internal + (p/ext-subset-decl input))) + (unless (eq :eof (peek-token input)) + (error "Trailing garbage."))))) + (p/markup-decl input))) + (consume-token input) + (p/S? input)) + (expect input :>) + (when extid + (let* ((xi2 (open-extid (absolute-extid input extid))) + (zi2 (make-zstream :input-stack (list xi2)))) + (let () + (p/ext-subset zi2)))) + (list :doctype name extid)))) + +(defun p/misc*-2 (input) + ;; Misc* + (while (member (peek-token input) '(:comment :pi :s)) + (when (eq (peek-token input) :pi) + (sax:processing-instruction + *handler* + (car (nth-value 1 (peek-token input))) + (cdr (nth-value 1 (peek-token input))))) + (consume-token input))) + + +(defvar *handler*) + +(defun p/document (input handler) + (let ((*handler* handler) + (*namespace-bindings* *default-namespace-bindings*)) + (setf *entities* nil) + (setf *dtd* (make-dtd)) + (define-default-entities) + (sax:start-document *handler*) + ;; document ::= XMLDecl? Misc* (doctypedecl Misc*)? element Misc* + ;; Misc ::= Comment | PI | S + ;; xmldecl::='' + ;; sddecl::= S 'standalone' Eq (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no') '"')) + ;; + ;; we will use the attribute-value parser for the xml decl. + (let ((*data-behaviour* :DTD)) + ;; optional XMLDecl? + (cond ((eq (peek-token input) :xml-pi) + (let ((hd (parse-xml-pi (cdr (nth-value 1 (peek-token input))) t))) + (setup-encoding input hd)) + ;; FIXME: Ceci n'est pas un pi. Should probably go away. + ;; (hmot 30/06/03) + (sax:processing-instruction + *handler* + (car (nth-value 1 (peek-token input))) + (cdr (nth-value 1 (peek-token input)))) + (read-token input))) + (set-full-speed input) + ;; Misc* + (p/misc*-2 input) + ;; (doctypedecl Misc*)? + (when (eq (peek-token input) :' content + (when (eq (peek-token input) :xml-pi) + (let ((hd (parse-xml-pi (cdr (nth-value 1 (peek-token input))) nil))) + (setup-encoding input hd)) + (consume-token input) ) + (set-full-speed input) + (p/content input)) + +(defun parse-xml-pi (content sd-ok-p) + ;; --> xml-header + ;;(make-xml-header)) + (let* ((res (make-xml-header)) + (i (make-rod-xstream content)) + (atts (read-attribute-list 'foo i t))) ;xxx on 'foo + (unless (eq (peek-rune i) :eof) + (error "Garbage at end of XML PI.")) + ;; versioninfo muss da sein + ;; dann ? encodingdecl + ;; dann ? sddecl + ;; dann ende + (when (and (not (eq (caar atts) (intern-name '#.(string-rod "version")))) + sd-ok-p) + (error "XML PI needs version.")) + (when (eq (caar atts) (intern-name '#.(string-rod "version"))) + (unless (and (>= (length (cdar atts)) 1) + (every (lambda (x) + (or (<= #/a x #/z) + (<= #/A x #/Z) + (<= #/0 x #/9) + (rune= x #/_) + (rune= x #/.) + (rune= x #/:) + (rune= x #/-))) + (cdar atts))) + (error "Bad XML version number: ~S." (rod-string (cdar atts)))) + (setf (xml-header-version res) (rod-string (cdar atts))) + (pop atts)) + (when (eq (caar atts) (intern-name '#.(string-rod "encoding"))) + (unless (and (>= (length (cdar atts)) 1) + (every (lambda (x) + (or (<= #/a x #/z) + (<= #/A x #/Z) + (<= #/0 x #/9) + (rune= x #/_) + (rune= x #/.) + (rune= x #/-))) + (cdar atts)) + ((lambda (x) + (or (<= #/a x #/z) + (<= #/A x #/Z) + (<= #/0 x #/9))) + (aref (cdar atts) 0))) + (error "Bad XML encoding name: ~S." (rod-string (cdar atts)))) + (setf (xml-header-encoding res) (rod-string (cdar atts))) + (pop atts)) + (when (and sd-ok-p (eq (caar atts) (intern-name '#.(string-rod "standalone")))) + (unless (or (rod= (cdar atts) '#.(string-rod "yes")) + (rod= (cdar atts) '#.(string-rod "no"))) + (error "Hypersensitivity pitfall: ~ + XML PI's 'standalone' attribute must be exactly \"yes\" or \"no\" and not ~S." + (rod-string (cdar atts)))) + (setf (xml-header-standalone-p res) + (if (rod-equal '#.(string-rod "yes") (cdar atts)) + :yes + :no)) + (pop atts)) + (when atts + (error "XML designers decided to disallow future extensions to the set ~ + of allowed XML PI's attributes -- you might have lost big on ~S (~S)" + (rod-string content) sd-ok-p + )) + res)) + +;;;; --------------------------------------------------------------------------- +;;;; mu +;;;; + +(defun mu (x) + (cond ((stringp x) x) + ((vectorp x) (rod-string x)) + ((consp x) + (cons (mu (car x)) (mu (cdr x)))) + (x))) + +;;;; --------------------------------------------------------------------------- +;;;; +;;;; canonical XML according to James Clark +;;;; + +;;;; User inteface ;;;; + +(defun parse-file (filename &optional (handler (make-instance 'dom-impl::dom-builder))) + (with-open-xstream (input filename) + (setf (xstream-name input) + (make-stream-name + :entity-name "main document" + :entity-kind :main + :file-name filename)) + (let ((zstream (make-zstream :input-stack (list input)))) + (peek-rune input) + (progn 'time + (p/document zstream handler))))) + +(defun parse-stream (stream &optional (handler (make-instance 'dom-impl::dom-builder))) + (let* ((xstream + (make-xstream + stream + :name (make-stream-name + :entity-name "main document" + :entity-kind :main + :file-name (or (ignore-errors (pathname *standard-output*)) + *default-pathname-defaults*)) + :initial-speed 1)) + (zstream (make-zstream :input-stack (list xstream)))) + (p/document zstream handler))) + +(defun parse-string (string &optional (handler (make-instance 'dom-impl::dom-builder))) + (let* ((x (string->xstream string)) + (z (make-zstream :input-stack (list x)))) + (p/document z handler))) + +(defun string->xstream (string) + (make-rod-xstream (string-rod string))) + +;;;; + +#+ALLEGRO +(defmacro sp (&body body) + `(progn + (prof:with-profiling (:type :space) .,body) + (prof:show-flat-profile))) + +#+ALLEGRO +(defmacro tm (&body body) + `(progn + (prof:with-profiling (:type :time) .,body) + (prof:show-flat-profile))) + +;;;; + +(defun zstream-push (new-xstream zstream) + (cond ((find-if (lambda (x) + (and (xstream-p x) + (eql (stream-name-entity-name (xstream-name x)) + (stream-name-entity-name (xstream-name new-xstream))) + (eql (stream-name-entity-kind (xstream-name x)) + (stream-name-entity-kind (xstream-name new-xstream))))) + (zstream-input-stack zstream)) + (error "Infinite recursion."))) + (push new-xstream (zstream-input-stack zstream)) + zstream) + +(defun recurse-on-entity (zstream name kind continuation) + (assert (not (zstream-token-category zstream))) + ;;(sleep .2) + ;;(warn "~S / ~S[~S]." (zstream-input-stack zstream) (mu name) kind) + (call-with-entity-expansion-as-stream + zstream + (lambda (new-xstream) + (push :stop (zstream-input-stack zstream)) + (zstream-push new-xstream zstream) + (prog1 + (funcall continuation zstream) + (assert (eq (peek-token zstream) :eof)) + (assert (eq (pop (zstream-input-stack zstream)) new-xstream)) + (close-xstream new-xstream) + (assert (eq (pop (zstream-input-stack zstream)) :stop)) + (setf (zstream-token-category zstream) nil) + '(consume-token zstream)) ) + name kind)) + +(defun merge-sysid (sysid base) + (merge-pathnames sysid base)) + +(defun open-sysid (sysid) + (open sysid :element-type '(unsigned-byte 8) :direction :input)) + + +;;;; + +(defparameter *test-files* + '(;;"jclark:xmltest;not-wf;*;*.xml" + "jclark:xmltest;valid;*;*.xml" + ;;"jclark:xmltest;invalid;*.xml" + )) + +(defun run-all-tests (&optional (test-files *test-files*)) + (let ((failed nil)) + (dolist (k test-files) + (dolist (j (sort (directory k) #'string< :key #'pathname-name)) + (unless (test-file j) + (push j failed)))) + (fresh-line) + (cond (failed + (write-string "**** Test failed on") + (dolist (k failed) + (format t "~%**** ~S." k)) + nil) + (t + (write-string "**** Test passed!") + t)))) + +(defun test-file (filename) + (let ((out-filename (merge-pathnames "out/" filename))) + (if (probe-file out-filename) + (positive-test-file filename out-filename) + (negative-test-file filename)))) + +(defun positive-test-file (filename out-filename) + (multiple-value-bind (nodes condition) + (ignore-errors (parse-file filename)) + (cond (condition + (warn "**** Error in ~S: ~A." filename condition) + nil) + (t + (let (res equal?) + (setf res (with-output-to-string (sink) + (unparse-document nodes sink))) + (setf equal? + (with-open-file (in out-filename :direction :input :element-type 'character) + (do ((i 0 (+ i 1)) + (c (read-char in nil nil) (read-char in nil nil))) + ((or (eq c nil) (= i (length res))) + (and (eq c nil) (= i (length res)))) + (unless (eql c (char res i)) + (return nil))))) + (cond ((not equal?) + (format t "~&**** Test failed on ~S." filename) + (fresh-line) + (format t "** me: ~A" res) + (fresh-line) + (format t "** he: " res) + (finish-output) + (with-open-file (in out-filename :direction :input :element-type 'character) + (do ((c (read-char in nil nil) (read-char in nil nil))) + ((eq c nil)) + (write-char c))) + nil) + (t + t))))))) + +(defun negative-test-file (filename) + (multiple-value-bind (nodes condition) + (ignore-errors (parse-file filename)) + (declare (ignore nodes)) + (cond (condition + t) + (t + (warn "**** negative test failed on ~S." filename))))) + +;;;; + +(progn + + (defmethod dom:create-processing-instruction ((document null) target data) + (declare (ignorable document target data)) + nil) + + (defmethod dom:append-child ((node null) child) + (declare (ignorable node child)) + nil) + + (defmethod dom:create-element ((document null) name) + (declare (ignorable document name)) + nil) + + (defmethod dom:set-attribute ((document null) name value) + (declare (ignorable document name value)) + nil) + + (defmethod dom:create-text-node ((document null) data) + (declare (ignorable document data)) + nil) + + (defmethod dom:create-cdata-section ((document null) data) + (declare (ignorable document data)) + nil) + ) + + +;;; Implementation of a simple but faster DOM. + +(defclass simple-document () + ((children :initform nil :accessor simple-document-children))) + +(defstruct node + parent) + +(defstruct (processing-instruction (:include node)) + target + data) + +(defstruct (text (:include node) + (:constructor make-text-boa (parent data))) + data) + +(defstruct (element (:include node)) + gi + attributes + children) + +(defmethod dom:create-processing-instruction ((document simple-document) target data) + (make-processing-instruction :target target :data data)) + +(defmethod dom:append-child ((node element) child) + (setf (node-parent child) node) + (push child (element-children node))) + +(defmethod dom:append-child ((node simple-document) child) + (push child (simple-document-children node)) + nil) + +(defmethod dom:create-element ((document simple-document) name) + (make-element :gi name)) + +(defmethod dom:set-attribute ((node element) name value) + (push (cons name value) + (element-attributes node))) + +(defmethod dom:create-text-node ((document simple-document) data) + (make-text-boa nil data)) + +(defmethod dom:create-cdata-section ((document simple-document) data) + (make-text-boa nil data)) + +#|| +(defmacro read-data-until* ((predicate input res res-start res-end) &body body) + ;; fast variant -- for now disabled for no apparent reason + ;; -> res, res-start, res-end + `(let* ((rptr (xstream-read-ptr ,input)) + (p0 rptr) + (fptr (xstream-fill-ptr ,input)) + (buf (xstream-buffer ,input)) + ,res ,res-start ,res-end) + (declare (type fixnum rptr fptr p0) + (type (simple-array read-element (*)) buf)) + (loop + (cond ((%= rptr fptr) + ;; underflow -- hmm inject the scratch-pad with what we + ;; read and continue, while using read-rune and collecting + ;; d.h. besser wäre hier auch while-reading zu benutzen. + (setf (xstream-read-ptr ,input) rptr) + (multiple-value-setq (,res ,res-start ,res-end) + (with-rune-collector/raw (collect) + (do ((i p0 (%+ i 1))) + ((%= i rptr)) + (collect (%rune buf i))) + (let (c) + (loop + (cond ((%= rptr fptr) + (setf (xstream-read-ptr ,input) rptr) + (setf c (peek-rune input)) + (cond ((eq c :eof) + (return))) + (setf rptr (xstream-read-ptr ,input) + fptr (xstream-fill-ptr ,input) + buf (xstream-buffer ,input))) + (t + (setf c (%rune buf rptr)))) + (cond ((,predicate c) + ;; we stop + (setf (xstream-read-ptr ,input) rptr) + (return)) + (t + ;; we continue + (collect c) + (setf rptr (%+ rptr 1))) ))))) + (return)) + ((,predicate (%rune buf rptr)) + ;; we stop + (setf (xstream-read-ptr ,input) rptr) + (setf ,res buf ,res-start p0 ,res-end rptr) + (return) ) + (t + ;; we continue + (setf rptr (%+ rptr 1))) )) + ,@body )) +||# + +;(defun read-data-until (predicate input continuation) +; ) + +(defmacro read-data-until* ((predicate input res res-start res-end) &body body) + "Read data from `input' until `predicate' applied to the read char + turns true. Then execute `body' with `res', `res-start', `res-end' + bound to denote a subsequence (of RUNEs) containing the read portion. + The rune upon which `predicate' turned true is neither consumed from + the stream, nor included in `res'. + + Keep the predicate short, this it may be included more than once into + the macro's expansion." + ;; + (let ((input-var (gensym)) + (collect (gensym)) + (c (gensym))) + `(LET ((,input-var ,input)) + (MULTIPLE-VALUE-BIND (,res ,res-start ,res-end) + (WITH-RUNE-COLLECTOR/RAW (,collect) + (LOOP + (LET ((,c (PEEK-RUNE ,input-var))) + (COND ((EQ ,c :EOF) + ;; xxx error message + (RETURN)) + ((FUNCALL ,predicate ,c) + (RETURN)) + (t + (,collect ,c) + (CONSUME-RUNE ,input-var)))))) + (LOCALLY + ,@body))))) + +(defun read-name-token (input) + (read-data-until* ((lambda (rune) + (declare (type rune rune)) + (not (name-rune-p rune))) + input + r rs re) + (intern-name r rs re))) + +(defun read-cdata (input) + (read-data-until* ((lambda (rune) + (declare (type rune rune)) + (or (%= rune #/<) (%= rune #/&))) + input + source start end) + (locally + (declare (type (simple-array rune (*)) source) + (type ufixnum start) + (type ufixnum end) + (optimize (speed 3) (safety 0))) + (let ((res (make-array (%- end start) :element-type 'rune))) + (declare (type (simple-array rune (*)) res)) + (let ((i (%- end start))) + (declare (type ufixnum i)) + (loop + (setf i (- i 1)) + (setf (%rune res i) (%rune source (the ufixnum (+ i start)))) + (when (= i 0) + (return)))) + res)))) + +(defun internal-entity-expansion (name) + (let ((e (assoc (list :general name) *entities* :test #'equal))) + (unless e + (error "Entity '~A' is not defined." (rod-string name))) + (unless (eq :internal (cadr e)) + (error "Entity '~A' is not an internal entity.")) + (or (cadddr e) + (car + (setf (cdddr e) + (cons (find-internal-entity-expansion name) nil)))))) + +(defun find-internal-entity-expansion (name) + (let ((zinput (make-zstream))) + (with-rune-collector-3 (collect) + (labels ((muffle (input) + (let (c) + (loop + (setf c (read-rune input)) + (cond ((eq c :eof) + (return)) + ((rune= c #/&) + (setf c (peek-rune input)) + (cond ((rune= c #/#) + (let ((c (read-numeric-entity input))) + (%put-rune c collect))) + (t + (unless (name-start-rune-p (peek-rune input)) + (error "Expecting name after &.")) + (let ((name (read-name-token input))) + (setf c (read-rune input)) + (assert (rune= c #/\;)) + (recurse-on-entity + zinput name :general + (lambda (zinput) + (muffle (car (zstream-input-stack zinput))))))))) + ((and (rune= c #/<)) + ;; xxx fix error message + (cerror "Eat them in spite of this." + "For no apparent reason #\/< is forbidden in attribute values. ~ + You lost -- next time choose SEXPR syntax.") + (collect c)) + ((space-rune-p c) + (collect #/space)) + ((not (data-rune-p c)) + (error "illegal char: ~S." c)) + (t + (collect c))))))) + (declare (dynamic-extent #'muffle)) + (recurse-on-entity + zinput name :general + (lambda (zinput) + (muffle (car (zstream-input-stack zinput))))) )))) + +#+(or) ;; Do we need this? Not called anywhere +(defun ff (name) + (let ((input (make-zstream))) + (let ((*data-behaviour* :doc) + (*document* (make-instance 'simple-document))) + (recurse-on-entity + input name :general + (lambda (input) + (prog1 + (ecase (entity-source-kind name :general) + (:internal (p/content input)) + (:external (p/ext-parsed-ent input))) + (unless (eq (peek-token input) :eof) + (error "Trailing garbage. - ~S" (peek-token input))))))))) + +(defun read-att-value-2 (input) + (let ((delim (read-rune input))) + (unless (member delim '(#/\" #/\') :test #'eql) + (error "Bad attribute value delimiter ~S, must be either #\\\" or #\\\'." + (if (< delim char-code-limit) (code-char delim) delim))) + (with-rune-collector-4 (collect) + (loop + (let ((c (read-rune input))) + (cond ((eq c :eof) + (error "EOF")) + ((rune= c delim) + (return)) + ((rune= #/& c) + (multiple-value-bind (kind sem) (read-entity-ref input) + (ecase kind + (:numeric + (%put-rune sem collect)) + (:named + (let* ((exp (internal-entity-expansion sem)) + (n (length exp))) + (declare (type (simple-array rune (*)) exp)) + (do ((i 0 (%+ i 1))) + ((%= i n)) + (collect (%rune exp i)))))))) + ((space-rune-p c) + (collect #x20)) + (t + (collect c)))))))) + +;;;;;;;;;;;;;;;;; + +;;; Namespace stuff + +(defvar *namespace-bindings* ()) +(defvar *default-namespace-bindings* + '((#"" . nil) + (#"xmlns" . #"http://www.w3.org/2000/xmlns/") + (#"xml" . #"http://www.w3.org/XML/1998/namespace"))) + +;; We already know that name is part of a valid XML name, so all we +;; have to check is that the first rune is a name-start-rune and that +;; there is not colon in it. +(defun nc-name-p (name) + (and (name-start-rune-p (rune name 0)) + (notany #'(lambda (rune) (rune= #/: rune)) name))) + +(defun split-qname (qname) + (declare (type glisp:simple-rod qname)) + (let ((pos (position #/: qname))) + (if pos + (let ((prefix (subseq qname 0 pos)) + (local-name (subseq qname (1+ pos)))) + (if (nc-name-p local-name) + (values prefix local-name) + (error "~S is not a valid NcName." local-name))) + (values () qname)))) + +(defun decode-qname (qname) + "decode-qname name => namespace-uri, prefix, local-name" + (declare (type glisp:simple-rod qname)) + (multiple-value-bind (prefix local-name) (split-qname qname) + (let ((uri (find-namespace-binding prefix))) + (if uri + (values uri prefix local-name) + (values nil nil nil))))) + + +(defun find-namespace-binding (prefix) + (cdr (or (assoc prefix *namespace-bindings* :test #'rod=) + (error "Undeclared namespace prefix: ~A" (rod-string prefix))))) + +;; FIXME: Should probably be refactored by adding :start and :end to rod=/rod-equal +(defun rod-starts-with (prefix rod) + (and (<= (length prefix) (length rod)) + (dotimes (i (length prefix) t) + (unless (rune= (rune prefix i) (rune rod i)) + (return nil))))) + +(defun xmlns-attr-p (attr-name) + (rod-starts-with #.(string-rod "xmlns") attr-name)) + +(defun attrname->prefix (attrname) + (if (< 5 (length attrname)) + (subseq attrname 6) + nil)) + +(defun find-namespace-declarations (attr-alist) + (mapcar #'(lambda (attr) + (cons (attrname->prefix (car attr)) (cdr attr))) + (remove-if-not #'xmlns-attr-p attr-alist :key #'car))) + +(defun declare-namespaces (attr-alist) + (let ((ns-decls (find-namespace-declarations attr-alist))) + (dolist (ns-decl ns-decls ) + ;; check some namespace validity constraints + ;; FIXME: Would be nice to add "this is insane, go ahead" restarts + (let ((prefix (car ns-decl)) + (uri (if (rod= #"" (cdr ns-decl)) + nil + (cdr ns-decl)))) + (cond + ((and (rod= prefix #"xml") + (not (rod= uri #"http://www.w3.org/XML/1998/namespace"))) + (error "Attempt to rebind the prefix \"xml\" to ~S." (mu uri))) + ((and (rod= uri #"http://www.w3.org/XML/1998/namespace") + (not (rod= prefix #"xml"))) + (error "The namespace URI \"http://www.w3.org/XML/1998/namespace\" ~ + may not be bound to the prefix ~S, only \"xml\" is legal." + (mu prefix))) + ((and (rod= prefix #"xmlns") + (rod= uri #"http://www.w3.org/2000/xmlns/")) + (error "Attempt to bind the prefix \"xmlns\" to its predefined ~ + URI \"http://www.w3.org/2000/xmlns/\", which is ~ + forbidden for no good reason.")) + ((rod= prefix #"xmlns") + (error "Attempt to bind the prefix \"xmlns\" to the URI ~S, ~ + but it may not be declared." (mu uri))) + ((rod= uri #"http://www.w3.org/2000/xmlns/") + (error "The namespace URI \"http://www.w3.org/2000/xmlns/\" may ~ + not be bound to prefix ~S (or any other)." (mu prefix))) + ((and (rod= uri #"") prefix) + (error "Only the default namespace (the one without a prefix) may ~ + be bound to an empty namespace URI, thus undeclaring it.")) + (t + (push (cons prefix uri) *namespace-bindings*) + (sax:start-prefix-mapping *handler* (car ns-decl) (cdr ns-decl)))))) + ns-decls)) + +(defun undeclare-namespaces (ns-decls) + (dolist (ns-decl ns-decls) + (setq *namespace-bindings* (delete ns-decl *namespace-bindings*)) + (sax:end-prefix-mapping *handler* (car ns-decl)))) + +(defstruct attribute + namespace-uri + local-name + qname + value) + +(defun build-attribute-list-no-ns (attr-alist) + (mapcar #'(lambda (pair) (make-attribute :qname (car pair) :value (cdr pair))) + attr-alist)) + +;; FIXME: Use a non-braindead way to enforce attribute uniqueness +(defun build-attribute-list-ns (attr-alist) + (let (attributes) + (dolist (pair attr-alist) + (when (or (not (xmlns-attr-p (car pair))) + sax:*include-xmlns-attributes*) + (push (build-attribute (car pair) (cdr pair)) attributes))) + + ;; 5.3 Uniqueness of Attributes + ;; In XML documents conforming to [the xmlns] specification, no + ;; tag may contain two attributes which: + ;; 1. have identical names, or + ;; 2. have qualified names with the same local part and with + ;; prefixes which have been bound to namespace names that are + ;; identical. + ;; + ;; 1. is checked by read-tag-2, so we only deal with 2 here + (do ((sublist attributes (cdr sublist))) + ((null sublist) attributes) + (let ((attr-1 (car sublist))) + (when (and (attribute-namespace-uri attr-1) + (find-if #'(lambda (attr-2) + (and (rod= (attribute-namespace-uri attr-1) + (attribute-namespace-uri attr-2)) + (rod= (attribute-local-name attr-1) + (attribute-local-name attr-2)))) + (cdr sublist))) + (error "Multiple definitions of attribute ~S in namespace ~S." + (mu (attribute-local-name attr-1)) + (mu (attribute-namespace-uri attr-1)))))))) + +(defun build-attribute (name value) + (multiple-value-bind (prefix local-name) (split-qname name) + (declare (ignorable local-name)) + (if (or (not prefix) ;; default namespace doesn't apply to attributes + (and (rod= #"xmlns" prefix) (not sax:*use-xmlns-namespace*))) + (make-attribute :qname name :value value) + (multiple-value-bind (uri prefix local-name) + (decode-qname name) + (declare (ignore prefix)) + (make-attribute :qname name + :value value + :namespace-uri uri + :local-name local-name))))) + +;;; Faster constructors + +;; Since using the general DOM interface to construct the parsed trees +;; may turn out to be quite expensive (That depends on the underlying +;; DOM implementation). A particular DOM implementation may choose to +;; implement an XML:FAST-CONSTRUCTORS method: + +;; XML:FAST-CONSTRUCTORS document [method] +;; +;; Return an alist of constructors suitable for the document `document'. +;; +;; (:MAKE-TEXT document parent data) +;; (:MAKE-PROCESSING-INSTRUCTION document parent target content) +;; (:MAKE-NODE document parent attributes content) +;; [`attributes' now in turn is an alist] +;; (:MAKE-CDATA document parent data) +;; (:MAKE-COMMENT document parent data) +;; + +;;;;;;;;;;;;;;;;; + +;; System Identifier Protocol + +;; A system identifier is an object obeying to the system identifier +;; protocol. Often something like an URL or a pathname. + +;; OPEN-SYS-ID sys-id [generic function] +;; +;; Opens the resource associated with the system identifier `sys-id' +;; for reading and returns a stream. For now it is expected, that the +;; stream is an octet stream (one of element type (unsigned-byte 8)). +;; +;; More precisely: The returned object only has to obey to the xstream +;; controller protocol. (That is it has to provide implementations for +;; READ-OCTETS and XSTREAM-CONTROLLER-CLOSE). + +;; MERGE-SYS-ID sys-id base [generic function] +;; +;; Merges two system identifiers. That is resolve `sys-id' relative to +;; `base' yielding an absolute system identifier suitable for +;; OPEN-SYS-ID. + +;; xstream Controller Protocol +;; +;; + + +#|| +(defun xml-parse (system-id &key document standalone-p) + ) +||# diff --git a/xml/xml-stream.lisp b/xml/xml-stream.lisp new file mode 100644 index 0000000..7cd9d9f --- /dev/null +++ b/xml/xml-stream.lisp @@ -0,0 +1,370 @@ +;;; -*- Mode: Lisp; Syntax: Common-Lisp; Package: XML; readtable: glisp; Encoding: utf-8; -*- +;;; --------------------------------------------------------------------------- +;;; Title: Fast streams +;;; Created: 1999-07-17 +;;; Author: Gilbert Baumann +;;; License: LGPL (See file COPYING for details). +;;; --------------------------------------------------------------------------- +;;; © copyright 1999 by Gilbert Baumann + +;;; This library is free software; you can redistribute it and/or +;;; modify it under the terms of the GNU Library General Public +;;; License as published by the Free Software Foundation; either +;;; version 2 of the License, or (at your option) any later version. +;;; +;;; This library is distributed in the hope that it will be useful, +;;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;;; Library General Public License for more details. +;;; +;;; You should have received a copy of the GNU Library General Public +;;; License along with this library; if not, write to the +;;; Free Software Foundation, Inc., 59 Temple Place - Suite 330, +;;; Boston, MA 02111-1307 USA. + +(in-package :xml) + +;;; API +;; +;; MAKE-XSTREAM cl-stream &key name speed initial-speed [function] +;; MAKE-ROD-XSTREAM rod &key name [function] +;; CLOSE-XSTREAM xstream [function] +;; READ-RUNE xstream [macro] +;; PEEK-RUNE xstream [macro] +;; FREAD-RUNE xstream [function] +;; FPEEK-RUNE xstream [function] +;; XSTREAM-POSITION xstream [function] +;; XSTREAM-LINE-NUMBER xstream [function] +;; XSTREAM-COLUMN-NUMBER xstream [function] +;; XSTREAM-PLIST xstream [accessor] +;; XSTREAM-ENCODING xstream [accessor] <-- be careful here. [*] + +;; [*] swichting the encoding on the fly is only possible when the +;; stream's buffer is empty; therefore to be able to switch the +;; encoding, while some runes are already read, set the stream's speed +;; to 1 initially (via the initial-speed argument for MAKE-XSTREAM) +;; and later set it to full speed. (The encoding of the runes +;; sequence, you fetch off with READ-RUNE is always UTF-16 though). + +;; An encoding is simply something, which provides the DECODE-SEQUENCE +;; method. + +;;; Controller protocol +;; +;; READ-OCTECTS sequence os-stream start end -> first-non-written +;; XSTREAM/CLOSE os-stream +;; + +(eval-when (eval compile load) + (defparameter *fast* '(optimize (speed 3) (safety 0))) + ;;(defparameter *fast* '(optimize (speed 2) (safety 3))) + ) + +;; Let us first define fast fixnum arithmetric get rid of type +;; checks. (After all we know what we do here). + +(defmacro fx-op (op &rest xs) + `(the fixnum (,op ,@(mapcar (lambda (x) `(the fixnum ,x)) xs)))) +(defmacro fx-pred (op &rest xs) + `(,op ,@(mapcar (lambda (x) `(the fixnum ,x)) xs))) + +(defmacro %+ (&rest xs) `(fx-op + ,@xs)) +(defmacro %- (&rest xs) `(fx-op - ,@xs)) +(defmacro %* (&rest xs) `(fx-op * ,@xs)) +(defmacro %/ (&rest xs) `(fx-op floor ,@xs)) +(defmacro %and (&rest xs) `(fx-op logand ,@xs)) +(defmacro %ior (&rest xs) `(fx-op logior ,@xs)) +(defmacro %xor (&rest xs) `(fx-op logxor ,@xs)) +(defmacro %ash (&rest xs) `(fx-op ash ,@xs)) +(defmacro %mod (&rest xs) `(fx-op mod ,@xs)) + +(defmacro %= (&rest xs) `(fx-pred = ,@xs)) +(defmacro %<= (&rest xs) `(fx-pred <= ,@xs)) +(defmacro %>= (&rest xs) `(fx-pred >= ,@xs)) +(defmacro %< (&rest xs) `(fx-pred < ,@xs)) +(defmacro %> (&rest xs) `(fx-pred > ,@xs)) + +(deftype buffer-index () + `(unsigned-byte ,(integer-length array-total-size-limit))) + +(deftype buffer-byte () + `(unsigned-byte 16)) + +(deftype octet () + `(unsigned-byte 8)) + +;; The usage of a special marker for EOF is experimental and +;; considered unhygenic. + +(defconstant +end+ #xFFFF + "Special marker inserted into stream buffers to indicate end of buffered data.") + +(defvar +null-buffer+ (make-array 0 :element-type 'buffer-byte)) +(defvar +null-octet-buffer+ (make-array 0 :element-type 'octet)) + +(defstruct (xstream + (:constructor make-xstream/low) + (:copier nil) + (:print-function print-xstream)) + + ;;; Read buffer + + ;; the buffer itself + (buffer +null-buffer+ + :type (simple-array buffer-byte (*))) + ;; points to the next element of `buffer' containing the next rune + ;; about to be read. + (read-ptr 0 :type buffer-index) + ;; points to the first element of `buffer' not containing a rune to + ;; be read. + (fill-ptr 0 :type buffer-index) + + ;;; OS buffer + + ;; a scratch pad for READ-SEQUENCE + (os-buffer +null-octet-buffer+ + :type (simple-array octet (*))) + + ;; `os-left-start', `os-left-end' designate a region of os-buffer, + ;; which still contains some undecoded data. This is needed because + ;; of the DECODE-SEQUENCE protocol + (os-left-start 0 :type buffer-index) + (os-left-end 0 :type buffer-index) + + ;; How much to read each time + (speed 0 :type buffer-index) + + ;; Some stream object obeying to a certain protcol + os-stream + + ;; The external format + ;; (some object offering the ENCODING protocol) + (encoding :utf-8) + + ;;A STREAM-NAME object + (name nil) + + ;; a plist a struct keeps the hack away + (plist nil) + + ;; Stream Position + (line-number 1 :type integer) ;current line number + (line-start 0 :type integer) ;stream position the current line starts at + (buffer-start 0 :type integer) ;stream position the current buffer starts at + + ;; There is no need to maintain a column counter for each character + ;; read, since we can easily compute it from `line-start' and + ;; `buffer-start'. + ) + +(defmacro read-rune (input) + "Read a single rune off the xstream `input'. In case of end of file :EOF + is returned." + `((lambda (input) + (declare (type xstream input) + #.*fast*) + (let ((rp (xstream-read-ptr input))) + (declare (type buffer-index rp)) + (let ((ch (aref (the (simple-array buffer-byte (*)) (xstream-buffer input)) + rp))) + (declare (type buffer-byte ch)) + (setf (xstream-read-ptr input) (%+ rp 1)) + (cond ((%= ch +end+) + (the (or (member :eof) rune) + (xstream-underflow input))) + ((%= ch #x000A) ;line break + (account-for-line-break input) + (code-rune ch)) + (t + (code-rune ch)))))) + ,input)) + +(defmacro peek-rune (input) + "Peek a single rune off the xstream `input'. In case of end of file :EOF + is returned." + `((lambda (input) + (declare (type xstream input) + #.*fast*) + (let ((rp (xstream-read-ptr input))) + (declare (type buffer-index rp)) + (let ((ch (aref (the (simple-array buffer-byte (*)) (xstream-buffer input)) + rp))) + (declare (type buffer-byte ch)) + (cond ((%= ch +end+) + (prog1 + (the (or (member :eof) rune) (xstream-underflow input)) + (setf (xstream-read-ptr input) 0))) + (t + (code-rune ch)))))) + ,input)) + +(defmacro consume-rune (input) + "Like READ-RUNE, but does not actually return the read rune." + `((lambda (input) + (declare (type xstream input) + #.*fast*) + (let ((rp (xstream-read-ptr input))) + (declare (type buffer-index rp)) + (let ((ch (aref (the (simple-array buffer-byte (*)) (xstream-buffer input)) + rp))) + (declare (type buffer-byte ch)) + (setf (xstream-read-ptr input) (%+ rp 1)) + (when (%= ch +end+) + (xstream-underflow input)) + (when (%= ch #x000A) ;line break + (account-for-line-break input) ))) + nil) + ,input)) + +(defsubst unread-rune (rune input) + "Unread the last recently read rune; if there wasn't such a rune, you + deserve to loose." + (declare (ignore rune)) + (decf (xstream-read-ptr input)) + (when (%= (peek-rune input) #x000A) ;was it a line break? + (unaccount-for-line-break input))) + +(defun fread-rune (input) + (read-rune input)) + +(defun fpeek-rune (input) + (peek-rune input)) + +;;; Line counting + +(defun account-for-line-break (input) + (declare (type xstream input)) + (incf (xstream-line-number input)) + (setf (xstream-line-start input) + (+ (xstream-buffer-start input) (xstream-read-ptr input)))) + +(defun unaccount-for-line-break (input) + ;; incomplete! + ;; We better use a traditional lookahead technique or forbid unread-rune. + (decf (xstream-line-number input))) + +;; User API: + +(defun xstream-position (input) + (+ (xstream-buffer-start input) (xstream-read-ptr input))) + +;; xstream-line-number is structure accessor + +(defun xstream-column-number (input) + (+ (- (xstream-position input) + (xstream-line-start input)) + 1)) + +;;; Underflow + +;;(defun read-runes (sequence input)) + +(defun xstream-underflow (input) + (declare (type xstream input)) + ;; we are about to fill new data into the buffer, so we need to + ;; adjust buffer-start. + (incf (xstream-buffer-start input) + (- (xstream-fill-ptr input) 0)) + (let (n m) + ;; when there is something left in the os-buffer, we move it to + ;; the start of the buffer. + (setf m (- (xstream-os-left-end input) (xstream-os-left-start input))) + (unless (zerop m) + (replace (xstream-os-buffer input) (xstream-os-buffer input) + :start1 0 :end1 m + :start2 (xstream-os-left-start input) + :end2 (xstream-os-left-end input)) + ;; then we take care that the buffer is large enough to carry at + ;; least 100 bytes (a random number) + (unless (>= (length (xstream-os-buffer input)) 100) + (error "You lost") + ;; todo: enlarge buffer + )) + (setf n + (read-octets (xstream-os-buffer input) (xstream-os-stream input) + m (min (1- (length (xstream-os-buffer input))) + (+ m (xstream-speed input))))) + (cond ((%= n 0) + (setf (xstream-read-ptr input) 0 + (xstream-fill-ptr input) n) + (setf (aref (xstream-buffer input) (xstream-fill-ptr input)) +end+) + :eof) + (t + (multiple-value-bind (fnw fnr) + (decode-sequence (xstream-encoding input) + (xstream-os-buffer input) 0 n + (xstream-buffer input) 0 (1- (length (xstream-buffer input))) + (= n m)) + (setf (xstream-os-left-start input) fnr + (xstream-os-left-end input) n + (xstream-read-ptr input) 0 + (xstream-fill-ptr input) fnw) + (setf (aref (xstream-buffer input) (xstream-fill-ptr input)) +end+) + (read-rune input)))))) + +;;; constructor + +(defun make-xstream (os-stream &key name (speed 8192) (initial-speed speed)) + (let () + (multiple-value-bind (encoding preread) (figure-encoding os-stream) + (let ((osbuf (make-array speed :element-type '(unsigned-byte 8)))) + (replace osbuf preread) + (make-xstream/low + :buffer (let ((r (make-array speed :element-type 'buffer-byte))) + (setf (%rune r 0) #xFFFF) + r) + :read-ptr 0 + :fill-ptr 0 + :os-buffer osbuf + :speed initial-speed + :os-stream os-stream + :os-left-start 0 + :os-left-end (length preread) + :encoding encoding + :name name))))) + +(defmethod figure-encoding ((stream glisp:gstream)) + ;; For HTML iso-8859-1 is the default + (values (xml::find-encoding :iso-8859-1) nil)) + +(defun make-rod-xstream (string &key name) + (let ((n (length string))) + (let ((buffer (make-array (1+ n) :element-type 'buffer-byte))) + (declare (type (simple-array buffer-byte (*)) buffer)) + ;; copy the rod + (do ((i (1- n) (- i 1))) + ((< i 0)) + (declare (type fixnum i)) + (setf (aref buffer i) (rune-code (%rune string i)))) + (setf (aref buffer n) +end+) + ;; + (make-xstream/low :buffer buffer + :read-ptr 0 + :fill-ptr n + ;; :os-buffer nil + :speed 1 + :os-stream nil + :name name)))) + +;;; misc + +(defun close-xstream (input) + (xstream/close (xstream-os-stream input))) + +;;; controller implementations + +(defmethod read-octets (sequence (stream stream) start end) + (#+CLISP lisp:read-byte-sequence + #-CLISP read-sequence + sequence stream :start start :end end)) + +(defmethod read-octets (sequence (stream null) start end) + (declare (ignore sequence start end)) + 0) + +(defmethod xstream/close ((stream stream)) + (close stream)) + +(defmethod xstream/close ((stream null)) + nil) + diff --git a/xml/xmls-compat.lisp b/xml/xmls-compat.lisp new file mode 100644 index 0000000..e2d6537 --- /dev/null +++ b/xml/xmls-compat.lisp @@ -0,0 +1,118 @@ +;;;; xml-compat.lisp -- XMLS-compatible data structures +;;;; +;;;; This file is part of the CXML parser, released under (L)LGPL. +;;;; See file COPYING for details. +;;;; +;;;; Copyright (c) 2004 headcraft GmbH +;;;; Author: David Lichteblau + +(defpackage cxml-xmls + (:use :cl) + (:export #:make-node #:node-name #:node-attrs #:node-children + #:make-xmls-builder #:map-node)) + +(in-package :cxml-xmls) + + +;;;; Knoten + +;; XXX Wie namespaces in xmls funktionieren nsollen verstehe ich noch nicht so +;; ganz. Daher verzichte ich vorerst auf NODE-NS und verwende durchweg QNAMEs. +(defun make-node (&key name attrs children) + `(,name ,attrs ,@children)) + +(defun node-name (node) + (car node)) + +(defun (setf node-name) (newval node) + (setf (car node) newval)) + +(defun node-attrs (node) + (cadr node)) + +(defun (setf node-attrs) (newval node) + (setf (cadr node) newval)) + +(defun node-children (node) + (cddr node)) + +(defun (setf node-children) (newval node) + (setf (cddr node) newval)) + + +;;;; SAX-Handler (Parser) + +(defclass xmls-builder () + ((element-stack :initform nil :accessor element-stack) + (root :initform nil :accessor root))) + +(defun make-xmls-builder () + (make-instance 'xmls-builder)) + +(defmethod sax:end-document ((handler xmls-builder)) + (root handler)) + +(defmethod sax:start-element + ((handler xmls-builder) namespace-uri local-name qname attributes) + (declare (ignore namespace-uri local-name)) + (let* ((attributes + (mapcar (lambda (attr) + (list (sax:attribute-qname attr) + (sax:attribute-value attr))) + attributes)) + (node (make-node :name qname :attrs attributes)) + (parent (car (element-stack handler)))) + (if parent + (push node (node-children parent)) + (setf (root handler) node)) + (push node (element-stack handler)))) + +(defmethod sax:end-element + ((handler xmls-builder) namespace-uri local-name qname) + (declare (ignore namespace-uri local-name qname)) + (let ((node (pop (element-stack handler)))) + (setf (node-children node) (reverse (node-children node))))) + +(defmethod sax:characters ((handler xmls-builder) data) + (let* ((parent (car (element-stack handler))) + (prev (car (node-children parent)))) + (if (stringp prev) + ;; um entities herum wird SAX:CHARACTERS mehrfach aufgerufen fuer + ;; den gleichen Textknoten. Hier muessen wir den bestehenden Knoten + ;; erweitern, sonst ist das Dokument nicht normalisiert. + ;; (XXX Oder sollte man besser den Parser entsprechend aendern?) + (setf (car (node-children parent)) + (concatenate 'runes:rod prev data)) + (push data (node-children parent))))) + + +;;;; SAX-Treiber (fuer Serialisierung) + +(defun map-node + (handler node + &key (include-xmlns-attributes sax:*include-xmlns-attributes*)) + (sax:start-document handler) + (labels ((walk (node) + (let ((attlist + (compute-attributes node include-xmlns-attributes)) + (qname (node-name node))) + ;; fixme: namespaces + (sax:start-element handler nil nil qname attlist) + (dolist (child (node-children node)) + (typecase child + (list (walk child)) + (string (sax:characters handler child)))) + (sax:end-element handler nil nil qname)))) + (walk node)) + (sax:end-document handler)) + +(defun compute-attributes (node xmlnsp) + (remove nil + (mapcar (lambda (a) + (destructuring-bind (name value) a + (if (or xmlnsp (not (cxml::xmlns-attr-p name))) + (sax:make-attribute :qname name + :value value + :specified-p t) + nil))) + (node-attrs node)))) diff --git a/xmlconf.lisp b/xmlconf.lisp new file mode 100644 index 0000000..5bf59ee --- /dev/null +++ b/xmlconf.lisp @@ -0,0 +1,23 @@ +(defpackage xmlconf + (:use :cl) + (:alias (:string-dom :dom))) +(in-package :xmlconf) + +(defun test-xml-conformance (directory) + (let ((xmlconf (xml:parse-file (merge-pathnames "xmlconf.xml" directory)))) + (dolist (test (dom:get-elements-by-tag-name xmlconf "test")) + (when (equal (dom:get-attribute test "TYPE") "valid") + (let* ((base (dom:get-attribute (dom:parent-node test) "xml:base")) + (uri (dom:get-attribute test "URI"))) + (unless base + (inspect test)) + (princ uri) + (handler-case + (progn + (xml:parse-file + (merge-pathnames uri (merge-pathnames base directory))) + (format t " ok~%")) + (serious-condition (c) + (format t " FAILED:~% ~A~%[~A]~%" + c + (dom:data (car (dom:child-nodes test)))))))))))