Update to current dev version

git-svn-id: svn://bknr.net/svn/trunk/thirdparty/cl-ppcre@3581 4281704c-cde7-0310-8518-8e2dc76b1ff0
2008-07-23 11:44:08 +00:00
parent 2974af4010
commit 25c3dedeeb
37 changed files with 5443 additions and 6794 deletions
--- a/12
+++ b/12
@ -1,3 +1,15 @@
+Version 2.0.0
+2008-07-23
+Added named properties (\p{foo})
+Added Unicode support
+Introduced test functions for character classes
+Added optional test function optimization	
+Cleaned up test suite, removed performance cruft
+Removed the various alternative system definitions (too much maintenance work)
+Exported PARSE-STRING	
+General cleanup	
+Lots of documentation additions
+
 Version 1.4.1
 2008-07-03
 Skip non-characters in CREATE-RANGES-FROM-SET	
--- a/62
+++ b/62
@ -1,62 +0,0 @@
-Complete documentation for CL-PPCRE can be found in the 'doc'
-directory.
-
-CL-PPCRE also supports Nikodemus Siivola's HYPERDOC, see
-<http://common-lisp.net/project/hyperdoc/> and
-<http://www.cliki.net/hyperdoc>.
-
-1. Installation
-
-1.1. Probably the easiest way is
-
-       (load "/path/to/cl-ppcre/load.lisp")
-
-     This should compile and load CL-PPCRE on most Common Lisp
-     implementations.
-
-1.2. With MK:DEFSYSTEM you can make a symbolic link from
-     'cl-ppcre.system' and 'cl-ppcre-test.system' to your central registry
-     (which by default is in '/usr/local/lisp/Registry/') and then issue
-     the command
-
-       (mk:compile-system "cl-ppcre")
-
-     Note that this relies on TRUENAME returning the original file a
-     symbolic link is pointing to. This will only work with AllegroCL
-     6.2 if you've applied all patches with (SYS:UPDATE-ALLEGRO).
-
-1.3. You can also use ASDF instead of MK:DEFSYSTEM in a similar way
-     (use the .asd files instead of the .system files).
-
-1.4. For LispWorks there's a file 'lispworks-defsystem.lisp' which includes
-     a system definition for LispWork's Common Defsystem.
-
-2. Test
-
-CL-PPCRE comes with a test suite that can be used to check its
-compatibility with Perl's regex syntax. See the documentation on how
-to use this test suite for benchmarks and on how to write your own
-tests.
-
-2.1. If you've used 'load.lisp' to load CL-PPCRE you already have the
-     test suite loaded and can start the default tests with
-
-       (cl-ppcre-test:test)
-
-2.2. With MK:DEFSYSTEM you need to compile the 'cl-ppcre-test' system
-     as well before you can proceed as in 2.1.
-
-2.3. Same for ASDF.
-
-Depending on your machine and your CL implementation the default test
-will take between a few seconds and a couple of minutes. (It will
-print a dot for every tenth test case while it proceeds to give some
-visual feedback.) It should exactly report three 'errors' (662, 790,
-and 1439) which are explained in the documentation.
-
-MCL might report an error for the ninth test case which is also
-explained in the docs.
-
-Genera notes (thanks to Patrick O'Donnell): Some more tests will fail
-because characters like #\Return, #\Linefeed, or #\Tab have encodings
-which differ from Perl's (and thus CL-PPCRE's) expectations.
--- a/api.lisp
+++ b/api.lisp
@ -1,5 +1,5 @@
 ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: CL-PPCRE; Base: 10 -*-
-;;; $Header: /usr/local/cvsrep/cl-ppcre/api.lisp,v 1.79 2008/07/03 08:39:10 edi Exp $
+;;; $Header: /usr/local/cvsrep/cl-ppcre/api.lisp,v 1.84 2008/07/06 18:12:04 edi Exp $

 ;;; The external API for creating and using scanners.

@ -29,7 +29,7 @@
 ;;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 ;;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

-(in-package #:cl-ppcre)
+(in-package :cl-ppcre)

 (defgeneric create-scanner (regex &key case-insensitive-mode
                                       multi-line-mode
@ -40,9 +40,9 @@
 parse-tree or as a string - and returns a scan closure which will scan
 strings for this regular expression and a list mapping registers to
 their names \(NIL stands for unnamed ones).  The \"mode\" keyboard
-arguments are equivalent to the imsx modifiers in Perl. If DESTRUCTIVE
-is not NIL the function is allowed to destructively modify its first
-argument \(but only if it's a parse tree)."))
+arguments are equivalent to the imsx modifiers in Perl.  If
+DESTRUCTIVE is not NIL, the function is allowed to destructively
+modify its first argument \(but only if it's a parse tree)."))

 #-:use-acl-regexp2-engine
 (defmethod create-scanner ((regex-string string) &key case-insensitive-mode
@ -76,8 +76,7 @@ argument \(but only if it's a parse tree)."))
  (declare #.*standard-optimize-settings*)
  (declare (ignore destructive))
  (when (or case-insensitive-mode multi-line-mode single-line-mode extended-mode)
-    (signal-ppcre-invocation-error
-     "You can't use the keyword arguments to modify an existing scanner."))
+    (signal-invocation-error "You can't use the keyword arguments to modify an existing scanner."))
  scanner)

 #-:use-acl-regexp2-engine
@ -88,8 +87,7 @@ argument \(but only if it's a parse tree)."))
                                               destructive)
  (declare #.*standard-optimize-settings*)
  (when extended-mode
-    (signal-ppcre-invocation-error
-     "Extended mode doesn't make sense in parse trees."))
+    (signal-invocation-error "Extended mode doesn't make sense in parse trees."))
  ;; convert parse-tree into internal representation REGEX and at the
  ;; same time compute the number of registers and the constant string
  ;; (or anchor) the regex starts with (if any)
@ -180,7 +178,6 @@ argument \(but only if it's a parse tree)."))

 #+:use-acl-regexp2-engine
 (declaim (inline create-scanner))
-
 #+:use-acl-regexp2-engine
 (defmethod create-scanner ((scanner regexp::regular-expression) &key case-insensitive-mode
                                                                     multi-line-mode
@ -190,8 +187,7 @@ argument \(but only if it's a parse tree)."))
  (declare #.*standard-optimize-settings*)
  (declare (ignore destructive))
  (when (or case-insensitive-mode multi-line-mode single-line-mode extended-mode)
-    (signal-ppcre-invocation-error
-     "You can't use the keyword arguments to modify an existing scanner."))
+    (signal-invocation-error "You can't use the keyword arguments to modify an existing scanner."))
  scanner)

 #+:use-acl-regexp2-engine
@ -254,7 +250,6 @@ internal purposes."))

 #+:use-acl-regexp2-engine
 (declaim (inline scan))
-
 #+:use-acl-regexp2-engine
 (defmethod scan ((parse-tree t) target-string
                                &key (start 0)
@ -292,12 +287,12 @@ internal purposes."))
 (defun scan-to-strings (regex target-string &key (start 0)
                                                 (end (length target-string))
                                                 sharedp)
-  (declare #.*standard-optimize-settings*)
  "Like SCAN but returns substrings of TARGET-STRING instead of
 positions, i.e. this function returns two values on success: the whole
 match as a string plus an array of substrings (or NILs) corresponding
-to the matched registers. If SHAREDP is true, the substrings may share
-structure with TARGET-STRING."
+to the matched registers.  If SHAREDP is true, the substrings may
+share structure with TARGET-STRING."
+  (declare #.*standard-optimize-settings*)
  (multiple-value-bind (match-start match-end reg-starts reg-ends)
      (scan regex target-string :start start :end end)
    (unless match-start
@ -329,11 +324,11 @@ structure with TARGET-STRING."
  "Executes BODY with the variables in VAR-LIST bound to the
 corresponding register groups after TARGET-STRING has been matched
 against REGEX, i.e. each variable is either bound to a string or to
-NIL. If there is no match, BODY is _not_ executed. For each element of
-VAR-LIST which is NIL there's no binding to the corresponding register
-group. The number of variables in VAR-LIST must not be greater than
-the number of register groups. If SHAREDP is true, the substrings may
-share structure with TARGET-STRING."
+NIL.  If there is no match, BODY is _not_ executed. For each element
+of VAR-LIST which is NIL there's no binding to the corresponding
+register group.  The number of variables in VAR-LIST must not be
+greater than the number of register groups.  If SHAREDP is true, the
+substrings may share structure with TARGET-STRING."
  (with-rebinding (target-string)
    (with-unique-names (match-start match-end reg-starts reg-ends
                                    start-index substr-fn)
@ -482,8 +477,9 @@ the number of register groups. After the last match, returns
 RESULT-FORM if provided or NIL otherwise.  An implicit block named NIL
 surrounds DO-REGISTER-GROUPS; RETURN may be used to terminate the loop
 immediately. If REGEX matches an empty string the scan is continued
-one position behind this match. If SHAREDP is true, the substrings may
-share structure with TARGET-STRING. BODY may start with declarations."
+one position behind this match.  If SHAREDP is true, the substrings
+may share structure with TARGET-STRING.  BODY may start with
+declarations."
  (with-rebinding (target-string)
    (with-unique-names (substr-fn match-start match-end
                                  reg-starts reg-ends start-index)
@ -510,11 +506,11 @@ share structure with TARGET-STRING. BODY may start with declarations."
 (defun all-matches (regex target-string
                          &key (start 0)
                               (end (length target-string)))
-  (declare #.*standard-optimize-settings*)
  "Returns a list containing the start and end positions of all
 matches of REGEX against TARGET-STRING, i.e. if there are N matches
 the list contains (* 2 N) elements.  If REGEX matches an empty string
 the scan is continued one position behind this match."
+  (declare #.*standard-optimize-settings*)
  (let (result-list)
    (do-matches (match-start match-end
                 regex target-string
@ -536,11 +532,11 @@ compile time."
                                     &key (start 0)
                                          (end (length target-string))
                                          sharedp)
-  (declare #.*standard-optimize-settings*)
  "Returns a list containing all substrings of TARGET-STRING which
 match REGEX. If REGEX matches an empty string the scan is continued
 one position behind this match. If SHAREDP is true, the substrings may
 share structure with TARGET-STRING."
+  (declare #.*standard-optimize-settings*)
  (let (result-list)
    (do-matches-as-strings (match regex target-string (nreverse result-list)
                                  :start start :end end :sharedp sharedp)
@ -563,18 +559,18 @@ compile time."
                         with-registers-p
                         omit-unmatched-p
                         sharedp)
-  (declare #.*standard-optimize-settings*)
  "Matches REGEX against TARGET-STRING as often as possible and
 returns a list of the substrings between the matches.  If
 WITH-REGISTERS-P is true, substrings corresponding to matched
 registers are inserted into the list as well.  If OMIT-UNMATCHED-P is
 true, unmatched registers will simply be left out, otherwise they will
 show up as NIL.  LIMIT limits the number of elements returned -
-registers aren't counted. If LIMIT is NIL (or 0 which is equivalent),
-trailing empty strings are removed from the result list.  If REGEX
-matches an empty string the scan is continued one position behind this
-match. If SHAREDP is true, the substrings may share structure with
-TARGET-STRING."
+registers aren't counted.  If LIMIT is NIL \(or 0 which is
+equivalent), trailing empty strings are removed from the result list.
+If REGEX matches an empty string the scan is continued one position
+behind this match.  If SHAREDP is true, the substrings may share
+structure with TARGET-STRING."
+  (declare #.*standard-optimize-settings*)
  ;; initialize list of positions POS-LIST to extract substrings with
  ;; START so that the start of the next match will mark the end of
  ;; the first substring
@ -637,13 +633,13 @@ TARGET-STRING."

 (defun string-case-modifier (str from to start end)
  (declare #.*standard-optimize-settings*)
-  (declare (type fixnum from to start end))
+  (declare (fixnum from to start end))
  "Checks whether all words in STR between FROM and TO are upcased,
 downcased or capitalized and returns a function which applies a
 corresponding case modification to strings.  Returns #'IDENTITY
 otherwise, especially if words in the target area extend beyond FROM
 or TO.  STR is supposed to be bounded by START and END.  It is assumed
-that (<= START FROM TO END)."
+that \(<= START FROM TO END)."
  (case
      (if (or (<= to from)
              (and (< start from)
@ -740,8 +736,7 @@ S-expression."))
                          ((#\\) :backslash)))))
          (when (and (numberp token) (< token 0))
            ;; make sure we don't accept something like "\\0"
-            (signal-ppcre-invocation-error
-             "Illegal substring ~S in replacement string"
+            (signal-invocation-error "Illegal substring ~S in replacement string."
                                     (subseq replacement-string match-start match-end)))
          (push token collector))
        ;; remember where the match ended
@ -801,8 +796,7 @@ S-expression."))
                              ((#\\) :backslash)))))
              (when (and (numberp token) (< token 0))
                ;; make sure we don't accept something like "\\0"
-                (signal-ppcre-invocation-error
-                 "Illegal substring ~S in replacement string"
+                (signal-invocation-error "Illegal substring ~S in replacement string."
                                         (subseq replacement match-start match-end)))
              (push token collector))
            ;; remember where the match ended
@ -843,8 +837,7 @@ corresponding string."
                   (when (>= token reg-bound)
                     ;; but only if the register was referenced in the
                     ;; regular expression
-                     (signal-ppcre-invocation-error
-                      "Reference to non-existent register ~A in replacement string"
+                     (signal-invocation-error "Reference to non-existent register ~A in replacement string."
                                              (1+ token)))
                   (when (svref reg-starts token)
                     ;; and only if it matched, i.e. no match results
@ -909,11 +902,11 @@ corresponding string."

 (defun replace-aux (target-string replacement pos-list reg-list start end
                                  preserve-case simple-calls element-type)
+  "Auxiliary function used by REGEX-REPLACE and REGEX-REPLACE-ALL.
+POS-LIST contains a list with the start and end positions of all
+matches while REG-LIST contains a list of arrays representing the
+corresponding register start and end positions."
  (declare #.*standard-optimize-settings*)
-  "Auxiliary function used by REGEX-REPLACE and
-REGEX-REPLACE-ALL. POS-LIST contains a list with the start and end
-positions of all matches while REG-LIST contains a list of arrays
-representing the corresponding register start and end positions."
  ;; build the template once before we start the loop
  (let ((replacement-template (build-replacement-template replacement)))
    (with-output-to-string (s nil :element-type element-type)
@ -955,7 +948,6 @@ representing the corresponding register start and end positions."
                            preserve-case
                            simple-calls
                            (element-type #+:lispworks 'lw:simple-char #-:lispworks 'character))
-  (declare #.*standard-optimize-settings*)
  "Try to match TARGET-STRING between START and END against REGEX and
 replace the first match with REPLACEMENT.  Two values are returned;
 the modified string, and T if REGEX matched or NIL otherwise.
@ -985,6 +977,7 @@ match. The result will always be a fresh string, even if REGEX doesn't
 match.

  ELEMENT-TYPE is the element type of the resulting string."
+  (declare #.*standard-optimize-settings*)
  (multiple-value-bind (match-start match-end reg-starts reg-ends)
      (scan regex target-string :start start :end end)
    (if match-start
@ -1012,7 +1005,6 @@ match.
                                preserve-case
                                simple-calls
                                (element-type #+:lispworks 'lw:simple-char #-:lispworks 'character))
-  (declare #.*standard-optimize-settings*)
  "Try to match TARGET-STRING between START and END against REGEX and
 replace all matches with REPLACEMENT.  Two values are returned; the
 modified string, and T if REGEX matched or NIL otherwise.
@ -1042,6 +1034,7 @@ match. The result will always be a fresh string, even if REGEX doesn't
 match.

  ELEMENT-TYPE is the element type of the resulting string."
+  (declare #.*standard-optimize-settings*)
  (let ((pos-list '())
        (reg-list '()))
    (do-scans (match-start match-end reg-starts reg-ends regex target-string
@ -1102,6 +1095,9 @@ scanner, a case-insensitive scanner is used."

 #+:cormanlisp
 (defmacro do-with-all-symbols ((variable package-or-packagelist) &body body)
+  "Executes BODY with VARIABLE bound to each symbol in
+PACKAGE-OR-PACKAGELIST \(a designator for a list of packages) in
+turn."
  (with-unique-names (pack-var)
    `(if (listp ,package-or-packagelist)
      (dolist (,pack-var ,package-or-packagelist)
@ -1113,11 +1109,11 @@ scanner, a case-insensitive scanner is used."
 #+:cormanlisp
 (defmacro regex-apropos-aux ((regex packages case-insensitive &optional return-form)
                             &body body)
-  "Auxiliary macro used by REGEX-APROPOS and REGEX-APROPOS-LIST. Loops
-through PACKAGES and executes BODY with SYMBOL bound to each symbol
-which matches REGEX. Optionally evaluates and returns RETURN-FORM at
-the end. If CASE-INSENSITIVE is true and REGEX isn't already a
-scanner, a case-insensitive scanner is used."
+  "Auxiliary macro used by REGEX-APROPOS and REGEX-APROPOS-LIST.
+Loops through PACKAGES and executes BODY with SYMBOL bound to each
+symbol which matches REGEX.  Optionally evaluates and returns
+RETURN-FORM at the end.  If CASE-INSENSITIVE is true and REGEX isn't
+already a scanner, a case-insensitive scanner is used."
  (with-rebinding (regex)
    (with-unique-names (scanner %packages hash)
      `(let* ((,scanner (create-scanner ,regex
--- a/charmap.lisp
+++ b/charmap.lisp
@ -0,0 +1,152 @@
+;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: CL-PPCRE; Base: 10 -*-
+;;; $Header: /usr/local/cvsrep/cl-ppcre/charmap.lisp,v 1.18 2008/07/22 23:54:59 edi Exp $
+
+;;; An optimized representation of sets of characters.
+
+;;; Copyright (c) 2008, Dr. Edmund Weitz. All rights reserved.
+
+;;; Redistribution and use in source and binary forms, with or without
+;;; modification, are permitted provided that the following conditions
+;;; are met:
+
+;;;   * Redistributions of source code must retain the above copyright
+;;;     notice, this list of conditions and the following disclaimer.
+
+;;;   * Redistributions in binary form must reproduce the above
+;;;     copyright notice, this list of conditions and the following
+;;;     disclaimer in the documentation and/or other materials
+;;;     provided with the distribution.
+
+;;; THIS SOFTWARE IS PROVIDED BY THE AUTHOR 'AS IS' AND ANY EXPRESSED
+;;; OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+;;; WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+;;; ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+;;; DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+;;; DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
+;;; GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+;;; INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+;;; WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+;;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+;;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+(in-package :cl-ppcre)
+
+(defstruct (charmap  (:constructor make-charmap%))
+  ;; a bit vector mapping char codes to "booleans" (1 for set members,
+  ;; 0 for others)
+  (vector #*0 :type simple-bit-vector)
+  ;; the smallest character code of all characters in the set
+  (start 0 :type fixnum)
+  ;; the upper (exclusive) bound of all character codes in the set
+  (end 0 :type fixnum)
+  ;; the number of characters in the set, or NIL if this is unknown
+  (count nil :type (or fixnum null))
+  ;; whether the charmap actually represents the complement of the set  
+  (complementp nil :type boolean))
+
+;; seems to be necessary for some Lisps like ClozureCL
+(defmethod make-load-form ((map charmap) &optional environment)
+  (make-load-form-saving-slots map :environment environment))
+
+(declaim (inline in-charmap-p))
+(defun in-charmap-p (char charmap)
+  "Tests whether the character CHAR belongs to the set represented by CHARMAP."
+  (declare #.*standard-optimize-settings*)
+  (declare (character char) (charmap charmap))
+  (let* ((char-code (char-code char))
+         (char-in-vector-p
+          (let ((charmap-start (charmap-start charmap)))
+            (declare (fixnum charmap-start))
+            (and (<= charmap-start char-code)
+                 (< char-code (the fixnum (charmap-end charmap)))
+                 (= 1 (sbit (the simple-bit-vector (charmap-vector charmap))
+                            (- char-code charmap-start)))))))
+    (cond ((charmap-complementp charmap) (not char-in-vector-p))
+          (t char-in-vector-p))))
+
+(defun charmap-contents (charmap)
+  "Returns a list of all characters belonging to a character map.
+Only works for non-complement charmaps."
+  (declare #.*standard-optimize-settings*)
+  (declare (charmap charmap))
+  (and (not (charmap-complementp charmap))
+       (loop for code of-type fixnum from (charmap-start charmap) to (charmap-end charmap)
+             for i across (the simple-bit-vector (charmap-vector charmap))
+             when (= i 1)
+             collect (code-char code))))
+
+(defun make-charmap (start end test-function &optional complementp)
+  "Creates and returns a charmap representing all characters with
+character codes in the interval [start end) that satisfy
+TEST-FUNCTION.  The COMPLEMENTP slot of the charmap is set to the
+value of the optional argument, but this argument doesn't have an
+effect on how TEST-FUNCTION is used."
+  (declare #.*standard-optimize-settings*)
+  (declare (fixnum start end))
+  (let ((vector (make-array (- end start) :element-type 'bit))
+        (count 0))
+    (declare (fixnum count))
+    (loop for code from start below end
+          for char = (code-char code)
+          for index from 0
+          when char do
+          (incf count)
+          (setf (sbit vector index) (if (funcall test-function char) 1 0)))
+    (make-charmap% :vector vector
+                   :start start
+                   :end end
+                   ;; we don't know for sure if COMPLEMENTP is true as
+                   ;; there isn't a necessary a character for each
+                   ;; integer below *REGEX-CHAR-CODE-LIMIT*
+                   :count (and (not complementp) count)
+                   ;; make sure it's boolean
+                   :complementp (not (not complementp)))))
+
+(defun create-charmap-from-test-function (test-function start end)
+  "Creates and returns a charmap representing all characters with
+character codes between START and END which satisfy TEST-FUNCTION.
+Tries to find the smallest interval which is necessary to represent
+the character set and uses the complement representation if that
+helps."
+  (declare #.*standard-optimize-settings*)
+  (let (start-in end-in start-out end-out)
+    ;; determine the smallest intervals containing the set and its
+    ;; complement, [start-in, end-in) and [start-out, end-out) - first
+    ;; the lower bound
+    (loop for code from start below end
+          for char = (code-char code)
+          until (and start-in start-out)
+          when (and char
+                    (not start-in)
+                    (funcall test-function char))
+          do (setq start-in code)
+          when (and char
+                    (not start-out)
+                    (not (funcall test-function char)))
+          do (setq start-out code))
+    (unless start-in
+      ;; no character satisfied the test, so return a "pseudo" charmap
+      ;; where IN-CHARMAP-P is always false
+      (return-from create-charmap-from-test-function
+        (make-charmap% :count 0)))
+    (unless start-out
+      ;; no character failed the test, so return a "pseudo" charmap
+      ;; where IN-CHARMAP-P is always true
+      (return-from create-charmap-from-test-function
+        (make-charmap% :complementp t)))
+    ;; now determine upper bound
+    (loop for code from (1- end) downto start
+          for char = (code-char code)
+          until (and end-in end-out)
+          when (and char
+                    (not end-in)
+                    (funcall test-function char))
+          do (setq end-in (1+ code))
+          when (and char
+                    (not end-out)
+                    (not (funcall test-function char)))
+          do (setq end-out (1+ code)))
+    ;; use the smaller interval
+    (cond ((<= (- end-in start-in) (- end-out start-out))
+           (make-charmap start-in end-in test-function))
+          (t (make-charmap start-out end-out (complement* test-function) t)))))
--- a/charset.lisp
+++ b/charset.lisp
@ -1,5 +1,5 @@
 ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: CL-PPCRE; Base: 10 -*-
-;;; $Header: /usr/local/cvsrep/cl-ppcre/charset.lisp,v 1.4 2008/07/03 08:39:10 edi Exp $
+;;; $Header: /usr/local/cvsrep/cl-ppcre/charset.lisp,v 1.9 2008/07/23 00:47:58 edi Exp $

 ;;; A specialized set implementation for characters by Nikodemus Siivola.

@ -30,7 +30,7 @@
 ;;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 ;;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

-(in-package #:cl-ppcre)
+(in-package :cl-ppcre)

 (defconstant +probe-depth+ 3
  "Maximum number of collisions \(for any element) we accept before we
@ -45,7 +45,7 @@ initialized to #\Null except for the first one which is initialized to
 #\?."
  (declare #.*standard-optimize-settings*)
  (declare (type (integer 2 #.(1- array-total-size-limit)) size))
-  ;; Since #\Null always hashes to 0, store something else there
+  ;; since #\Null always hashes to 0, store something else there
  ;; initially, and #\Null everywhere else
  (let ((result (make-array size
                            :element-type #-:lispworks 'character #+:lispworks 'lw:simple-char
@ -53,7 +53,7 @@ initialized to #\Null except for the first one which is initialized to
    (setf (char result 0) #\?)
    result))

-(defstruct (charset (:constructor make-charset))
+(defstruct (charset (:constructor make-charset ()))
  ;; this is set to 0 when we stop hashing and just use a CHAR-CODE
  ;; indexed vector
  (depth +probe-depth+ :type fixnum)
@ -92,7 +92,7 @@ to the hash code HASH."
        (depth (charset-depth set))
        (code (char-code char)))
    (declare (fixnum depth))
-    ;; As long as the set remains reasonably small, we use non-linear
+    ;; as long as the set remains reasonably small, we use non-linear
    ;; hashing - the first hash of any character is its CHAR-CODE, and
    ;; subsequent hashes are computed by MIX above
    (cond ((or
@ -129,14 +129,15 @@ to the hash code HASH."
  "Adds the character CHAR to the charset SET, extending SET if
 necessary.  Returns CHAR."
  (declare #.*standard-optimize-settings*)
-  (or (%add-to-charset char set)
+  (or (%add-to-charset char set t)
      (%add-to-charset/expand char set)
      (error "Oops, this should not happen..."))
  char)

-(defun %add-to-charset (char set)
+(defun %add-to-charset (char set count)
  "Tries to add the character CHAR to the charset SET without
-extending it.  Returns NIL if this fails."
+extending it.  Returns NIL if this fails.  Counts CHAR as new
+if COUNT is true and it is added to SET."
  (declare #.*standard-optimize-settings*)
  (declare (character char) (charset set))
  (let ((vector (charset-vector set))
@ -145,7 +146,11 @@ extending it.  Returns NIL if this fails."
    (declare (fixnum depth))
    ;; see comments in IN-CHARSET-P for algorithm
    (cond ((or (zerop depth) (zerop code))
-           (setf (char vector code) char))
+           (unless (eq char (char vector code))
+             (setf (char vector code) char)
+             (when count
+               (incf (charset-count set))))
+           char)
          (t
           (let ((hash code))
             (tagbody
@ -154,7 +159,8 @@ extending it.  Returns NIL if this fails."
                     (x (char vector index)))
                (cond ((eq x (code-char 0))
                       (setf (char vector index) char)
-                       (incf (charset-count set))
+                       (when count
+                         (incf (charset-count set)))
                       (return-from %add-to-charset char))
                      ((eq x char)
                       (return-from %add-to-charset char))
@ -184,7 +190,10 @@ extending it.  Returns NIL if this fails."
       (setf (charset-depth set) new-depth
             (charset-vector set) new-vector)
       (flet ((try-add (x)
-                (unless (%add-to-charset x set)
+                ;; don't count - old characters are already accounted
+                ;; for, and might count the new one multiple times as
+                ;; well
+                (unless (%add-to-charset x set nil)
                  (assert (not (zerop new-depth)))
                  (setf new-size (* 2 new-size))
                  (go :retry))))
@ -196,32 +205,38 @@ extending it.  Returns NIL if this fails."
                 (try-add x))
               (unless (zerop i)
                 (try-add x))))))))
+    ;; added and expanded, /now/ count the new character.
+    (incf (charset-count set))
    t))

-(defun all-characters (set)
-  "Returns a list of all characters in the charset SET."
+(defun map-charset (function charset)
+  "Calls FUNCTION with all characters in SET.  Returns NIL."
  (declare #.*standard-optimize-settings*)
-  (loop with count = (charset-count set)
-        with counter = 0
-        for code below char-code-limit
-        for char = (code-char code)
-        while (< counter count)
-        when (and char (in-charset-p char set))
-        do (incf counter)
-        and collect char))
+  (declare (function function))
+  (let* ((n (charset-count charset))
+         (vector (charset-vector charset))
+         (size (length vector)))
+    ;; see comments in IN-CHARSET-P for algorithm
+    (when (eq (code-char 0) (char vector 0))
+      (funcall function (code-char 0))
+      (decf n))
+    (loop for i from 1 below size
+          for char = (char vector i)
+          unless (eq (code-char 0) char) do
+          (funcall function char)
+          ;; this early termination test should be worth it when
+          ;; mapping across depth 0 charsets.
+          (when (zerop (decf n))
+            (return-from map-charset nil))))
+  nil)

-(defun merge-set (set1 set2 &optional invertedp)
-  "Returns the \"sum\" of two charsets.  This is a destructive
-operation on SET1.  If INVERTEDP is true, merges the \"inverse\" of
-SET2 into SET1 instead."
+(defun create-charset-from-test-function (test-function start end)
+  "Creates and returns a charset representing all characters with
+character codes between START and END which satisfy TEST-FUNCTION."
  (declare #.*standard-optimize-settings*)
-  ;; we only consider values with character codes below
-  ;; *REGEX-CHAR-CODE-LIMIT*
-  (loop for code of-type fixnum from 0 below *regex-char-code-limit*
+  (loop with charset = (make-charset)
+        for code from start below end
        for char = (code-char code)
-        when (and char (if invertedp
-                         (not (in-charset-p char set2))
-                         (in-charset-p char set2)))
-        do (add-to-charset char set1))
-  set1)
-
+        when (and char (funcall test-function char))
+        do (add-to-charset char charset)
+        finally (return charset)))
--- a/chartest.lisp
+++ b/chartest.lisp
@ -0,0 +1,98 @@
+;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: CL-PPCRE; Base: 10 -*-
+;;; $Header: /usr/local/cvsrep/cl-ppcre/chartest.lisp,v 1.3 2008/07/23 00:47:58 edi Exp $
+
+;;; Copyright (c) 2008, Dr. Edmund Weitz. All rights reserved.
+
+;;; Redistribution and use in source and binary forms, with or without
+;;; modification, are permitted provided that the following conditions
+;;; are met:
+
+;;;   * Redistributions of source code must retain the above copyright
+;;;     notice, this list of conditions and the following disclaimer.
+
+;;;   * Redistributions in binary form must reproduce the above
+;;;     copyright notice, this list of conditions and the following
+;;;     disclaimer in the documentation and/or other materials
+;;;     provided with the distribution.
+
+;;; THIS SOFTWARE IS PROVIDED BY THE AUTHOR 'AS IS' AND ANY EXPRESSED
+;;; OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+;;; WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+;;; ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+;;; DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+;;; DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
+;;; GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+;;; INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+;;; WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+;;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+;;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+(in-package :cl-ppcre)
+
+(defun create-hash-table-from-test-function (test-function start end)
+  "Creates and returns a hash table representing all characters with
+character codes between START and END which satisfy TEST-FUNCTION."
+  (declare #.*standard-optimize-settings*)
+  (loop with hash-table = (make-hash-table)
+        for code from start below end
+        for char = (code-char code)
+        when (and char (funcall test-function char))
+        do (setf (gethash char hash-table) t)
+        finally (return hash-table)))
+
+(defun create-optimized-test-function (test-function &key
+                                                     (start 0)
+                                                     (end *regex-char-code-limit*)
+                                                     (kind *optimize-char-classes*))
+  "Given a unary test function which is applicable to characters
+returns a function which yields the same boolean results for all
+characters with character codes from START to \(excluding) END.  If
+KIND is NIL, TEST-FUNCTION will simply be returned.  Otherwise, KIND
+should be one of:
+
+* :HASH-TABLE - builds a hash table representing all characters which
+                satisfy the test and returns a closure which checks if
+                a character is in that hash table
+
+* :CHARSET - instead of a hash table uses a \"charset\" which is a
+             data structure using non-linear hashing and optimized to
+             represent \(sparse) sets of characters in a fast and
+             space-efficient way \(contributed by Nikodemus Siivola)
+
+* :CHARMAP - instead of a hash table uses a bit vector to represent
+             the set of characters
+
+You can also use :HASH-TABLE* or :CHARSET* which are like :HASH-TABLE
+and :CHARSET but use the complement of the set if the set contains
+more than half of all characters between START and END.  This saves
+space but needs an additional pass across all characters to create the
+data structure.  There is no corresponding :CHARMAP* kind as the bit
+vectors are already created to cover the smallest possible interval
+which contains either the set or its complement."
+  (declare #.*standard-optimize-settings*)
+  (ecase kind
+    ((nil) test-function)
+    (:charmap
+     (let ((charmap (create-charmap-from-test-function test-function start end)))
+       (lambda (char)
+         (in-charmap-p char charmap))))
+    ((:charset :charset*)
+     (let ((charset (create-charset-from-test-function test-function start end)))
+       (cond ((or (eq kind :charset)
+                  (<= (charset-count charset) (ceiling (- end start) 2)))
+              (lambda (char)
+                (in-charset-p char charset)))
+             (t (setq charset (create-charset-from-test-function (complement* test-function)
+                                                                 start end))
+                (lambda (char)
+                  (not (in-charset-p char charset)))))))
+    ((:hash-table :hash-table*)
+     (let ((hash-table (create-hash-table-from-test-function test-function start end)))
+       (cond ((or (eq kind :charset)
+                  (<= (hash-table-count hash-table) (ceiling (- end start) 2)))
+              (lambda (char)
+                (gethash char hash-table)))
+             (t (setq hash-table (create-hash-table-from-test-function (complement* test-function)
+                                                                       start end))
+                (lambda (char)
+                  (not (gethash char hash-table)))))))))
--- a/cl-ppcre-unicode.asd
+++ b/cl-ppcre-unicode.asd
@ -0,0 +1,58 @@
+;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: CL-USER; Base: 10 -*-
+;;; $Header: /usr/local/cvsrep/cl-ppcre/cl-ppcre-unicode.asd,v 1.14 2008/07/22 14:19:44 edi Exp $
+
+;;; This ASDF system definition was kindly provided by Marco Baringer.
+
+;;; Copyright (c) 2002-2008, Dr. Edmund Weitz.  All rights reserved.
+
+;;; Redistribution and use in source and binary forms, with or without
+;;; modification, are permitted provided that the following conditions
+;;; are met:
+
+;;;   * Redistributions of source code must retain the above copyright
+;;;     notice, this list of conditions and the following disclaimer.
+
+;;;   * Redistributions in binary form must reproduce the above
+;;;     copyright notice, this list of conditions and the following
+;;;     disclaimer in the documentation and/or other materials
+;;;     provided with the distribution.
+
+;;; THIS SOFTWARE IS PROVIDED BY THE AUTHOR 'AS IS' AND ANY EXPRESSED
+;;; OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+;;; WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+;;; ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+;;; DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+;;; DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
+;;; GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+;;; INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+;;; WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+;;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+;;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+(in-package :cl-user)
+
+(defpackage :cl-ppcre-unicode-asd
+  (:use :cl :asdf))
+
+(in-package :cl-ppcre-unicode-asd)
+
+(defsystem :cl-ppcre-unicode
+  :components ((:module "cl-ppcre-unicode"
+                        :serial t
+                        :components ((:file "packages")
+                                     (:file "resolver"))))
+  :depends-on (:cl-ppcre :cl-unicode))
+
+(defsystem :cl-ppcre-unicode-test
+  :depends-on (:cl-ppcre-unicode :cl-ppcre-test)
+  :components ((:module "test"
+                        :serial t
+                        :components ((:file "unicode-tests")))))
+
+(defmethod perform ((o test-op) (c (eql (find-system :cl-ppcre-unicode))))
+  ;; we must load CL-PPCRE explicitly so that the CL-PPCRE-TEST system
+  ;; will be found
+  (operate 'load-op :cl-ppcre)
+  (operate 'load-op :cl-ppcre-unicode-test)
+  (funcall (intern (symbol-name :run-all-tests) (find-package :cl-ppcre-test))
+           :more-tests (intern (symbol-name :unicode-test) (find-package :cl-ppcre-test))))
--- a/cl-ppcre-unicode/packages.lisp
+++ b/cl-ppcre-unicode/packages.lisp
@ -1,7 +1,7 @@
 ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: CL-USER; Base: 10 -*-
-;;; $Header: /usr/local/cvsrep/cl-ppcre/cl-ppcre-test.system,v 1.11 2007/01/01 23:43:10 edi Exp $
+;;; $Header: /usr/local/cvsrep/cl-ppcre/cl-ppcre-unicode/packages.lisp,v 1.2 2008/07/22 13:58:13 edi Exp $

-;;; Copyright (c) 2002-2007, Dr. Edmund Weitz.  All rights reserved.
+;;; Copyright (c) 2002-2008, Dr. Edmund Weitz. All rights reserved.

 ;;; Redistribution and use in source and binary forms, with or without
 ;;; modification, are permitted provided that the following conditions
@ -27,14 +27,12 @@
 ;;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 ;;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

-(in-package #:cl-user)
+(in-package :cl-user)

-(defparameter *cl-ppcre-test-base-directory*
-  (make-pathname :name nil :type nil :version nil
-                 :defaults (parse-namestring *load-truename*)))
-
-(mk:defsystem #:cl-ppcre-test
-    :source-pathname *cl-ppcre-test-base-directory*
-    :source-extension "lisp"
-    :depends-on (#:cl-ppcre)
-    :components ((:file "ppcre-tests")))
+(defpackage :cl-ppcre-unicode
+  #+:genera
+  (:shadowing-import-from :common-lisp :lambda :string)
+  (:use #-:genera :cl #+:genera :future-common-lisp
+        :cl-ppcre :cl-unicode)
+  (:import-from :cl-ppcre :signal-syntax-error)
+  (:export :unicode-property-resolver))
--- a/cl-ppcre-unicode/resolver.lisp
+++ b/cl-ppcre-unicode/resolver.lisp
@ -0,0 +1,61 @@
+;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: CL-PPCRE; Base: 10 -*-
+;;; $Header: /usr/local/cvsrep/cl-ppcre/cl-ppcre-unicode/resolver.lisp,v 1.5 2008/07/23 02:14:08 edi Exp $
+
+;;; Copyright (c) 2008, Dr. Edmund Weitz. All rights reserved.
+
+;;; Redistribution and use in source and binary forms, with or without
+;;; modification, are permitted provided that the following conditions
+;;; are met:
+
+;;;   * Redistributions of source code must retain the above copyright
+;;;     notice, this list of conditions and the following disclaimer.
+
+;;;   * Redistributions in binary form must reproduce the above
+;;;     copyright notice, this list of conditions and the following
+;;;     disclaimer in the documentation and/or other materials
+;;;     provided with the distribution.
+
+;;; THIS SOFTWARE IS PROVIDED BY THE AUTHOR 'AS IS' AND ANY EXPRESSED
+;;; OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+;;; WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+;;; ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+;;; DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+;;; DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
+;;; GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+;;; INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+;;; WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+;;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+;;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+(in-package :cl-ppcre-unicode)
+
+(defun unicode-property-resolver (property-name)
+  "A property resolver which understands Unicode properties using
+CL-UNICODE's PROPERTY-TEST function.  This resolver is automatically
+installed in *PROPERTY-RESOLVER* when the CL-PPCRE-UNICODE system is
+loaded."
+  (or (property-test property-name :errorp nil)
+      (signal-syntax-error "There is no property named ~S." property-name)))
+
+(setq *property-resolver* 'unicode-property-resolver)
+
+(pushnew :cl-ppcre-unicode *features*)
+
+;; stuff for Nikodemus Siivola's HYPERDOC
+;; see <http://common-lisp.net/project/hyperdoc/>
+;; and <http://www.cliki.net/hyperdoc>
+;; also used by LW-ADD-ONS
+
+(defvar *hyperdoc-base-uri* "http://weitz.de/cl-ppcre/")
+
+(let ((exported-symbols-alist
+       (loop for symbol being the external-symbols of :cl-ppcre-unicode
+             collect (cons symbol
+                           (concatenate 'string
+                                        "#"
+                                        (string-downcase symbol))))))
+  (defun hyperdoc-lookup (symbol type)
+    (declare (ignore type))
+    (cdr (assoc symbol
+                exported-symbols-alist
+                :test #'eq))))
--- a/cl-ppcre.asd
+++ b/cl-ppcre.asd
@ -1,5 +1,5 @@
 ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: CL-USER; Base: 10 -*-
-;;; $Header: /usr/local/cvsrep/cl-ppcre/cl-ppcre.asd,v 1.30 2008/07/03 10:06:15 edi Exp $
+;;; $Header: /usr/local/cvsrep/cl-ppcre/cl-ppcre.asd,v 1.45 2008/07/23 02:14:06 edi Exp $

 ;;; This ASDF system definition was kindly provided by Marco Baringer.

@ -29,14 +29,23 @@
 ;;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 ;;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

-(asdf:defsystem :cl-ppcre
-  :version "1.4.1"
+(in-package :cl-user)
+
+(defpackage :cl-ppcre-asd
+  (:use :cl :asdf))
+
+(in-package :cl-ppcre-asd)
+
+(defsystem :cl-ppcre
+  :version "2.0.0"
  :serial t
  :components ((:file "packages")
               (:file "specials")
-               (:file "charset")
               (:file "util")
               (:file "errors")
+               (:file "charset")
+               (:file "charmap")
+               (:file "chartest")
               #-:use-acl-regexp2-engine
               (:file "lexer")
               #-:use-acl-regexp2-engine
@ -44,6 +53,8 @@
               #-:use-acl-regexp2-engine
               (:file "regex-class")
               #-:use-acl-regexp2-engine
+               (:file "regex-class-util")
+               #-:use-acl-regexp2-engine
               (:file "convert")
               #-:use-acl-regexp2-engine
               (:file "optimize")
@ -54,3 +65,15 @@
               #-:use-acl-regexp2-engine
               (:file "scanner")
               (:file "api")))
+
+(defsystem :cl-ppcre-test
+  :depends-on (:cl-ppcre :flexi-streams)
+  :components ((:module "test"
+                        :serial t
+                        :components ((:file "packages")
+                                     (:file "tests")
+                                     (:file "perl-tests")))))
+
+(defmethod perform ((o test-op) (c (eql (find-system :cl-ppcre))))
+  (operate 'load-op :cl-ppcre-test)
+  (funcall (intern (symbol-name :run-all-tests) (find-package :cl-ppcre-test))))
--- a/cl-ppcre.system
+++ b/cl-ppcre.system
@ -1,59 +0,0 @@
-;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: CL-USER; Base: 10 -*-
-;;; $Header: /usr/local/cvsrep/cl-ppcre/cl-ppcre.system,v 1.13 2007/01/01 23:43:10 edi Exp $
-
-;;; Copyright (c) 2002-2007, Dr. Edmund Weitz.  All rights reserved.
-
-;;; Redistribution and use in source and binary forms, with or without
-;;; modification, are permitted provided that the following conditions
-;;; are met:
-
-;;;   * Redistributions of source code must retain the above copyright
-;;;     notice, this list of conditions and the following disclaimer.
-
-;;;   * Redistributions in binary form must reproduce the above
-;;;     copyright notice, this list of conditions and the following
-;;;     disclaimer in the documentation and/or other materials
-;;;     provided with the distribution.
-
-;;; THIS SOFTWARE IS PROVIDED BY THE AUTHOR 'AS IS' AND ANY EXPRESSED
-;;; OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-;;; WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-;;; ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
-;;; DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-;;; DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
-;;; GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-;;; INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
-;;; WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
-;;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-;;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-(in-package #:cl-user)
-
-(defparameter *cl-ppcre-base-directory*
-  (make-pathname :name nil :type nil :version nil
-                 :defaults (parse-namestring *load-truename*)))
-
-(mk:defsystem #:cl-ppcre
-    :source-pathname *cl-ppcre-base-directory*
-    :source-extension "lisp"
-    :components ((:file "packages")
-                 (:file "specials" :depends-on ("packages"))
-                 (:file "util" :depends-on ("packages"))
-                 (:file "errors" :depends-on ("util"))
-                 #-:use-acl-regexp2-engine
-                 (:file "lexer" :depends-on ("errors" "specials"))
-                 #-:use-acl-regexp2-engine
-                 (:file "parser" :depends-on ("lexer"))
-                 #-:use-acl-regexp2-engine
-                 (:file "regex-class" :depends-on ("parser"))
-                 #-:use-acl-regexp2-engine
-                 (:file "convert" :depends-on ("regex-class"))
-                 #-:use-acl-regexp2-engine
-                 (:file "optimize" :depends-on ("convert"))
-                 #-:use-acl-regexp2-engine
-                 (:file "closures" :depends-on ("optimize" "specials"))
-                 #-:use-acl-regexp2-engine
-                 (:file "repetition-closures" :depends-on ("closures"))
-                 #-:use-acl-regexp2-engine
-                 (:file "scanner" :depends-on ("repetition-closures"))
-                 (:file "api" :depends-on ("scanner"))))
--- a/closures.lisp
+++ b/closures.lisp
@ -1,5 +1,5 @@
 ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: CL-PPCRE; Base: 10 -*-
-;;; $Header: /usr/local/cvsrep/cl-ppcre/closures.lisp,v 1.36 2008/07/03 07:44:06 edi Exp $
+;;; $Header: /usr/local/cvsrep/cl-ppcre/closures.lisp,v 1.44 2008/07/22 22:38:05 edi Exp $

 ;;; Here we create the closures which together build the final
 ;;; scanner.
@ -30,16 +30,15 @@
 ;;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 ;;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

-(in-package #:cl-ppcre)
+(in-package :cl-ppcre)

 (declaim (inline *string*= *string*-equal))
-
 (defun *string*= (string2 start1 end1 start2 end2)
  "Like STRING=, i.e. compares the special string *STRING* from START1
 to END1 with STRING2 from START2 to END2. Note that there's no
 boundary check - this has to be implemented by the caller."
  (declare #.*standard-optimize-settings*)
-  (declare (type fixnum start1 end1 start2 end2))
+  (declare (fixnum start1 end1 start2 end2))
  (loop for string1-idx of-type fixnum from start1 below end1
        for string2-idx of-type fixnum from start2 below end2
        always (char= (schar *string* string1-idx)
@ -50,7 +49,7 @@ boundary check - this has to be implemented by the caller."
 START1 to END1 with STRING2 from START2 to END2. Note that there's no
 boundary check - this has to be implemented by the caller."
  (declare #.*standard-optimize-settings*)
-  (declare (type fixnum start1 end1 start2 end2))
+  (declare (fixnum start1 end1 start2 end2))
  (loop for string1-idx of-type fixnum from start1 below end1
        for string2-idx of-type fixnum from start2 below end2
        always (char-equal (schar *string* string1-idx)
@ -81,7 +80,7 @@ such that the call to NEXT-FN after the match would succeed."))
    ;; now create a closure which checks if one of the closures
    ;; created above can succeed
    (lambda (start-pos)
-      (declare (type fixnum start-pos))
+      (declare (fixnum start-pos))
      (loop for matcher in all-matchers
            thereis (funcall (the function matcher) start-pos)))))

@ -90,13 +89,13 @@ such that the call to NEXT-FN after the match would succeed."))
  ;; the position of this REGISTER within the whole regex; we start to
  ;; count at 0
  (let ((num (num register)))
-    (declare (type fixnum num))
+    (declare (fixnum num))
    ;; STORE-END-OF-REG is a thin wrapper around NEXT-FN which will
    ;; update the corresponding values of *REGS-START* and *REGS-END*
    ;; after the inner matcher has succeeded
    (flet ((store-end-of-reg (start-pos)
-               (declare (type fixnum start-pos)
-                        (type function next-fn))
+               (declare (fixnum start-pos)
+                        (function next-fn))
               (setf (svref *reg-starts* num) (svref *regs-maybe-start* num)
                     (svref *reg-ends* num) start-pos)
           (funcall next-fn start-pos)))
@ -104,10 +103,10 @@ such that the call to NEXT-FN after the match would succeed."))
      ;; wrapped by this REGISTER
      (let ((inner-matcher (create-matcher-aux (regex register)
                                               #'store-end-of-reg)))
-        (declare (type function inner-matcher))
+        (declare (function inner-matcher))
        ;; here comes the actual closure for REGISTER
        (lambda (start-pos)
-          (declare (type fixnum start-pos))
+          (declare (fixnum start-pos))
          ;; remember the old values of *REGS-START* and friends in
          ;; case we cannot match
          (let ((old-*reg-starts* (svref *reg-starts* num))
@ -129,7 +128,7 @@ such that the call to NEXT-FN after the match would succeed."))
  ;; create a closure which just checks for the inner regex and
  ;; doesn't care about NEXT-FN
  (let ((test-matcher (create-matcher-aux (regex lookahead) #'identity)))
-    (declare (type function next-fn test-matcher))
+    (declare (function next-fn test-matcher))
    (if (positivep lookahead)
      ;; positive look-ahead: check success of inner regex, then call
      ;; NEXT-FN
@ -148,152 +147,52 @@ such that the call to NEXT-FN after the match would succeed."))
        ;; create a closure which just checks for the inner regex and
        ;; doesn't care about NEXT-FN
        (test-matcher (create-matcher-aux (regex lookbehind) #'identity)))
-    (declare (type function next-fn test-matcher)
-             (type fixnum len))
+    (declare (function next-fn test-matcher)
+             (fixnum len))
    (if (positivep lookbehind)
      ;; positive look-behind: check success of inner regex (if we're
      ;; far enough from the start of *STRING*), then call NEXT-FN
      (lambda (start-pos)
-        (declare (type fixnum start-pos))
+        (declare (fixnum start-pos))
        (and (>= (- start-pos (or *real-start-pos* *start-pos*)) len)
             (funcall test-matcher (- start-pos len))
             (funcall next-fn start-pos)))
      ;; negative look-behind: check failure of inner regex (if we're
      ;; far enough from the start of *STRING*), then call NEXT-FN
      (lambda (start-pos)
-        (declare (type fixnum start-pos))
+        (declare (fixnum start-pos))
        (and (or (< (- start-pos (or *real-start-pos* *start-pos*)) len)
                 (not (funcall test-matcher (- start-pos len))))
             (funcall next-fn start-pos))))))

 (defmacro insert-char-class-tester ((char-class chr-expr) &body body)
-  "Utility macro to replace each occurence of '(CHAR-CLASS-TEST)
+  "Utility macro to replace each occurence of '\(CHAR-CLASS-TEST)
 within BODY with the correct test (corresponding to CHAR-CLASS)
 against CHR-EXPR."
-  (with-unique-names (%char-class)
-    ;; the actual substitution is done here: replace
-    ;; '(CHAR-CLASS-TEST) with NEW
+  (with-rebinding (char-class)
+    (with-unique-names (test-function)
      (flet ((substitute-char-class-tester (new)
               (subst new '(char-class-test) body
                      :test #'equalp)))
-      `(let* ((,%char-class ,char-class)
-              (set (charset ,%char-class))
-              (count (if set
-                       (charset-count set)
-                       most-positive-fixnum))
-              ;; collect a list of "all" characters in the set if
-              ;; there aren't more than two
-              (all-chars (if (<= count 2)
-                           (all-characters set)
-                           nil))
-              downcasedp)
-        (declare (type fixnum count))
-        ;; check if we can partition the charset into three ranges (or
-        ;; less)
-        (multiple-value-bind (min1 max1 min2 max2 min3 max3)
-            (create-ranges-from-set set)
-          ;; if that didn't work and CHAR-CLASS is case-insensitive we
-          ;; try it again with every character downcased
-          (when (and (not min1)
-                     (case-insensitive-p ,%char-class))
-            (multiple-value-setq (min1 max1 min2 max2 min3 max3)
-              (create-ranges-from-set set :downcasep t))
-            (setq downcasedp t))
-          (cond ((= count 1)
-                  ;; charset contains exactly one character so we just
-                  ;; check for this single character; (note that this
-                  ;; actually can't happen because this case is
-                  ;; optimized away in CONVERT already...)
-                  (let ((chr1 (first all-chars)))
+        `(let ((,test-function (test-function ,char-class)))
           ,@(substitute-char-class-tester
-                       `(char= ,chr-expr chr1))))
-                ((= count 2)
-                  ;; set contains exactly two characters
-                  (let ((chr1 (first all-chars))
-                        (chr2 (second all-chars)))
-                    ,@(substitute-char-class-tester
-                       `(let ((chr ,chr-expr))
-                         (or (char= chr chr1)
-                             (char= chr chr2))))))
-                ((word-char-class-p ,%char-class)
-                  ;; special-case: set is \w, \W, [\w], [\W] or
-                  ;; something equivalent
-                  ,@(substitute-char-class-tester
-                     `(word-char-p ,chr-expr)))
-                ((= count *regex-char-code-limit*)
-                  ;; according to the ANSI standard we might have all
-                  ;; possible characters in the set even if it doesn't
-                  ;; contain CHAR-CODE-LIMIT characters but this
-                  ;; doesn't seem to be the case for current
-                  ;; implementations (also note that this optimization
-                  ;; implies that you must not have characters with
-                  ;; character codes beyond *REGEX-CHAR-CODE-LIMIT* in
-                  ;; your regexes if you've changed this limit); we
-                  ;; expect the compiler to optimize this T "test"
-                  ;; away
-                  ,@(substitute-char-class-tester t))
-                ((and downcasedp min1 min2 min3)
-                  ;; three different ranges, downcased
-                  ,@(substitute-char-class-tester
-                     `(let ((chr ,chr-expr))
-                       (or (char-not-greaterp min1 chr max1)
-                           (char-not-greaterp min2 chr max2)
-                           (char-not-greaterp min3 chr max3)))))
-                ((and downcasedp min1 min2)
-                  ;; two ranges, downcased
-                  ,@(substitute-char-class-tester
-                     `(let ((chr ,chr-expr))
-                       (or (char-not-greaterp min1 chr max1)
-                           (char-not-greaterp min2 chr max2)))))
-                ((and downcasedp min1)
-                  ;; one downcased range
-                  ,@(substitute-char-class-tester
-                     `(char-not-greaterp min1 ,chr-expr max1)))
-                ((and min1 min2 min3)
-                  ;; three ranges
-                  ,@(substitute-char-class-tester
-                     `(let ((chr ,chr-expr))
-                       (or (char<= min1 chr max1)
-                           (char<= min2 chr max2)
-                           (char<= min3 chr max3)))))
-                ((and min1 min2)
-                  ;; two ranges
-                  ,@(substitute-char-class-tester
-                     `(let ((chr ,chr-expr))
-                       (or (char<= min1 chr max1)
-                           (char<= min2 chr max2)))))
-                (min1
-                  ;; one range
-                  ,@(substitute-char-class-tester
-                     `(char<= min1 ,chr-expr max1)))
-                (t
-                  ;; the general case; note that most of the above
-                  ;; "optimizations" are based on early (2002)
-                  ;; experiences and benchmarks with CMUCL
-                  ,@(substitute-char-class-tester
-                     `(in-charset-p ,chr-expr set)))))))))
+              `(funcall ,test-function ,chr-expr)))))))

 (defmethod create-matcher-aux ((char-class char-class) next-fn)
  (declare #.*standard-optimize-settings*)
-  (declare (type function next-fn))
+  (declare (function next-fn))
  ;; insert a test against the current character within *STRING*
  (insert-char-class-tester (char-class (schar *string* start-pos))
-    (if (invertedp char-class)
    (lambda (start-pos)
-        (declare (type fixnum start-pos))
-        (and (< start-pos *end-pos*)
-             (not (char-class-test))
-             (funcall next-fn (1+ start-pos))))
-      (lambda (start-pos)
-        (declare (type fixnum start-pos))
+      (declare (fixnum start-pos))
      (and (< start-pos *end-pos*)
           (char-class-test)
-             (funcall next-fn (1+ start-pos)))))))
+           (funcall next-fn (1+ start-pos))))))

 (defmethod create-matcher-aux ((str str) next-fn)
  (declare #.*standard-optimize-settings*)
-  (declare (type fixnum *end-string-pos*)
-           (type function next-fn)
+  (declare (fixnum *end-string-pos*)
+           (function next-fn)
           ;; this special value is set by CREATE-SCANNER when the
           ;; closures are built
           (special end-string))
@ -307,15 +206,15 @@ against CHR-EXPR."
         (end-string-len (if end-string
                           (length end-string)
                           nil)))
-    (declare (type fixnum len))
+    (declare (fixnum len))
    (cond ((and start-of-end-string-p case-insensitive-p)
            ;; closure for the first STR which belongs to the constant
            ;; string at the end of the regular expression;
            ;; case-insensitive version
            (lambda (start-pos)
-              (declare (type fixnum start-pos end-string-len))
+              (declare (fixnum start-pos end-string-len))
              (let ((test-end-pos (+ start-pos end-string-len)))
-                (declare (type fixnum test-end-pos))
+                (declare (fixnum test-end-pos))
                ;; either we're at *END-STRING-POS* (which means that
                ;; it has already been confirmed that end-string
                ;; starts here) or we really have to test
@ -329,9 +228,9 @@ against CHR-EXPR."
            ;; string at the end of the regular expression;
            ;; case-sensitive version
            (lambda (start-pos)
-              (declare (type fixnum start-pos end-string-len))
+              (declare (fixnum start-pos end-string-len))
              (let ((test-end-pos (+ start-pos end-string-len)))
-                (declare (type fixnum test-end-pos))
+                (declare (fixnum test-end-pos))
                ;; either we're at *END-STRING-POS* (which means that
                ;; it has already been confirmed that end-string
                ;; starts here) or we really have to test
@ -344,13 +243,13 @@ against CHR-EXPR."
            ;; a STR which can be skipped because some other function
            ;; has already confirmed that it matches
            (lambda (start-pos)
-              (declare (type fixnum start-pos))
+              (declare (fixnum start-pos))
              (funcall next-fn (+ start-pos len))))
          ((and (= len 1) case-insensitive-p)
            ;; STR represent exactly one character; case-insensitive
            ;; version
            (lambda (start-pos)
-              (declare (type fixnum start-pos))
+              (declare (fixnum start-pos))
              (and (< start-pos *end-pos*)
                   (char-equal (schar *string* start-pos) chr)
                   (funcall next-fn (1+ start-pos)))))
@ -358,35 +257,34 @@ against CHR-EXPR."
            ;; STR represent exactly one character; case-sensitive
            ;; version
            (lambda (start-pos)
-              (declare (type fixnum start-pos))
+              (declare (fixnum start-pos))
              (and (< start-pos *end-pos*)
                   (char= (schar *string* start-pos) chr)
                   (funcall next-fn (1+ start-pos)))))
          (case-insensitive-p
            ;; general case, case-insensitive version
            (lambda (start-pos)
-              (declare (type fixnum start-pos))
+              (declare (fixnum start-pos))
              (let ((next-pos (+ start-pos len)))
-                (declare (type fixnum next-pos))
+                (declare (fixnum next-pos))
                (and (<= next-pos *end-pos*)
                     (*string*-equal str start-pos next-pos 0 len)
                     (funcall next-fn next-pos)))))
          (t
            ;; general case, case-sensitive version
            (lambda (start-pos)
-              (declare (type fixnum start-pos))
+              (declare (fixnum start-pos))
              (let ((next-pos (+ start-pos len)))
-                (declare (type fixnum next-pos))
+                (declare (fixnum next-pos))
                (and (<= next-pos *end-pos*)
                     (*string*= str start-pos next-pos 0 len)
                     (funcall next-fn next-pos))))))))

 (declaim (inline word-boundary-p))
-
 (defun word-boundary-p (start-pos)
  "Check whether START-POS is a word-boundary within *STRING*."
  (declare #.*standard-optimize-settings*)
-  (declare (type fixnum start-pos))
+  (declare (fixnum start-pos))
  (let ((1-start-pos (1- start-pos))
        (*start-pos* (or *real-start-pos* *start-pos*)))
    ;; either the character before START-POS is a word-constituent and
@ -407,7 +305,7 @@ against CHR-EXPR."

 (defmethod create-matcher-aux ((word-boundary word-boundary) next-fn)
  (declare #.*standard-optimize-settings*)
-  (declare (type function next-fn))
+  (declare (function next-fn))
  (if (negatedp word-boundary)
    (lambda (start-pos)
      (and (not (word-boundary-p start-pos))
@ -418,25 +316,25 @@ against CHR-EXPR."

 (defmethod create-matcher-aux ((everything everything) next-fn)
  (declare #.*standard-optimize-settings*)
-  (declare (type function next-fn))
+  (declare (function next-fn))
  (if (single-line-p everything)
    ;; closure for single-line-mode: we really match everything, so we
    ;; just advance the index into *STRING* by one and carry on
    (lambda (start-pos)
-      (declare (type fixnum start-pos))
+      (declare (fixnum start-pos))
      (and (< start-pos *end-pos*)
           (funcall next-fn (1+ start-pos))))
    ;; not single-line-mode, so we have to make sure we don't match
    ;; #\Newline
    (lambda (start-pos)
-      (declare (type fixnum start-pos))
+      (declare (fixnum start-pos))
      (and (< start-pos *end-pos*)
           (char/= (schar *string* start-pos) #\Newline)
           (funcall next-fn (1+ start-pos))))))

 (defmethod create-matcher-aux ((anchor anchor) next-fn)
  (declare #.*standard-optimize-settings*)
-  (declare (type function next-fn))
+  (declare (function next-fn))
  (let ((startp (startp anchor))
        (multi-line-p (multi-line-p anchor)))
    (cond ((no-newline-p anchor)
@ -444,14 +342,14 @@ against CHR-EXPR."
            ;; we just have to check whether START-POS equals
            ;; *END-POS*
            (lambda (start-pos)
-              (declare (type fixnum start-pos))
+              (declare (fixnum start-pos))
              (and (= start-pos *end-pos*)
                   (funcall next-fn start-pos))))
          ((and startp multi-line-p)
            ;; a start-anchor in multi-line-mode: check if we're at
            ;; *START-POS* or if the last character was #\Newline
            (lambda (start-pos)
-              (declare (type fixnum start-pos))
+              (declare (fixnum start-pos))
              (let ((*start-pos* (or *real-start-pos* *start-pos*)))
                (and (or (= start-pos *start-pos*)
                         (and (<= start-pos *end-pos*)
@ -463,7 +361,7 @@ against CHR-EXPR."
            ;; a start-anchor which is not in multi-line-mode, so just
            ;; check whether we're at *START-POS*
            (lambda (start-pos)
-              (declare (type fixnum start-pos))
+              (declare (fixnum start-pos))
              (and (= start-pos (or *real-start-pos* *start-pos*))
                   (funcall next-fn start-pos))))
          (multi-line-p
@ -471,7 +369,7 @@ against CHR-EXPR."
            ;; *END-POS* or if the character we're looking at is
            ;; #\Newline
            (lambda (start-pos)
-              (declare (type fixnum start-pos))
+              (declare (fixnum start-pos))
              (and (or (= start-pos *end-pos*)
                       (and (< start-pos *end-pos*)
                            (char= #\Newline
@ -482,7 +380,7 @@ against CHR-EXPR."
            ;; check if we're at *END-POS* or if we're looking at
            ;; #\Newline and there's nothing behind it
            (lambda (start-pos)
-              (declare (type fixnum start-pos))
+              (declare (fixnum start-pos))
              (and (or (= start-pos *end-pos*)
                       (and (= start-pos (1- *end-pos*))
                            (char= #\Newline
@ -491,14 +389,14 @@ against CHR-EXPR."

 (defmethod create-matcher-aux ((back-reference back-reference) next-fn)
  (declare #.*standard-optimize-settings*)
-  (declare (type function next-fn))
+  (declare (function next-fn))
  ;; the position of the corresponding REGISTER within the whole
  ;; regex; we start to count at 0
  (let ((num (num back-reference)))
    (if (case-insensitive-p back-reference)
      ;; the case-insensitive version
      (lambda (start-pos)
-        (declare (type fixnum start-pos))
+        (declare (fixnum start-pos))
        (let ((reg-start (svref *reg-starts* num))
              (reg-end (svref *reg-ends* num)))
          ;; only bother to check if the corresponding REGISTER as
@ -506,7 +404,7 @@ against CHR-EXPR."
          (and reg-start
               (let ((next-pos (+ start-pos (- (the fixnum reg-end)
                                               (the fixnum reg-start)))))
-                 (declare (type fixnum next-pos))
+                 (declare (fixnum next-pos))
                 (and
                   (<= next-pos *end-pos*)
                   (*string*-equal *string* start-pos next-pos
@ -514,7 +412,7 @@ against CHR-EXPR."
                   (funcall next-fn next-pos))))))
      ;; the case-sensitive version
      (lambda (start-pos)
-        (declare (type fixnum start-pos))
+        (declare (fixnum start-pos))
        (let ((reg-start (svref *reg-starts* num))
              (reg-end (svref *reg-ends* num)))
          ;; only bother to check if the corresponding REGISTER as
@ -522,7 +420,7 @@ against CHR-EXPR."
          (and reg-start
               (let ((next-pos (+ start-pos (- (the fixnum reg-end)
                                               (the fixnum reg-start)))))
-                 (declare (type fixnum next-pos))
+                 (declare (fixnum next-pos))
                 (and
                   (<= next-pos *end-pos*)
                   (*string*= *string* start-pos next-pos
@ -534,17 +432,17 @@ against CHR-EXPR."
  (let* ((test (test branch))
         (then-matcher (create-matcher-aux (then-regex branch) next-fn))
         (else-matcher (create-matcher-aux (else-regex branch) next-fn)))
-    (declare (type function then-matcher else-matcher))
+    (declare (function then-matcher else-matcher))
    (cond ((numberp test)
            (lambda (start-pos)
-              (declare (type fixnum test))
+              (declare (fixnum test))
              (if (and (< test (length *reg-starts*))
                       (svref *reg-starts* test))
                (funcall then-matcher start-pos)
                (funcall else-matcher start-pos))))
          (t
            (let ((test-matcher (create-matcher-aux test #'identity)))
-              (declare (type function test-matcher))
+              (declare (function test-matcher))
              (lambda (start-pos)
                (if (funcall test-matcher start-pos)
                  (funcall then-matcher start-pos)
@ -553,7 +451,7 @@ against CHR-EXPR."
 (defmethod create-matcher-aux ((standalone standalone) next-fn)
  (declare #.*standard-optimize-settings*)
  (let ((inner-matcher (create-matcher-aux (regex standalone) #'identity)))
-    (declare (type function next-fn inner-matcher))
+    (declare (function next-fn inner-matcher))
    (lambda (start-pos)
      (let ((next-pos (funcall inner-matcher start-pos)))
        (and next-pos
--- a/convert.lisp
+++ b/convert.lisp
@ -1,5 +1,5 @@
 ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: CL-PPCRE; Base: 10 -*-
-;;; $Header: /usr/local/cvsrep/cl-ppcre/convert.lisp,v 1.29 2008/07/03 07:44:06 edi Exp $
+;;; $Header: /usr/local/cvsrep/cl-ppcre/convert.lisp,v 1.54 2008/07/23 02:14:06 edi Exp $

 ;;; Here the parse tree is converted into its internal representation
 ;;; using REGEX objects.  At the same time some optimizations are
@ -31,7 +31,7 @@
 ;;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 ;;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

-(in-package #:cl-ppcre)
+(in-package :cl-ppcre)

 ;;; The flags that represent the "ism" modifiers are always kept
 ;;; together in a three-element list. We use the following macros to
@ -50,10 +50,10 @@
  `(third ,flags))

 (defun set-flag (token)
-  (declare #.*standard-optimize-settings*)
-  (declare (special flags))
  "Reads a flag token and sets or unsets the corresponding entry in
 the special FLAGS list."
+  (declare #.*standard-optimize-settings*)
+  (declare (special flags))
  (case token
    ((:case-insensitive-p)
      (setf (case-insensitive-mode-p flags) t))
@ -68,70 +68,107 @@ the special FLAGS list."
    ((:not-single-line-mode-p)
      (setf (single-line-mode-p flags) nil))
    (otherwise
-      (signal-ppcre-syntax-error "Unknown flag token ~A" token))))
+      (signal-syntax-error "Unknown flag token ~A." token))))

-(defun add-range-to-set (set from to)
+(defgeneric resolve-property (property)
+  (:documentation "Resolves PROPERTY to a unary character test
+function.  PROPERTY can either be a function designator or it can be a
+string which is resolved using *PROPERTY-RESOLVER*.")
+  (:method ((property-name string))
+   (funcall *property-resolver* property-name))
+  (:method ((function-name symbol))
+   function-name)
+  (:method ((test-function function))
+   test-function))
+
+(defun convert-char-class-to-test-function (list invertedp case-insensitive-p)
+  "Combines all items in LIST into test function and returns a
+logical-OR combination of these functions.  Items can be single
+characters, character ranges like \(:RANGE #\\A #\\E), or special
+character classes like :DIGIT-CLASS.  Does the right thing with
+respect to case-\(in)sensitivity as specified by the special variable
+FLAGS."
  (declare #.*standard-optimize-settings*)
  (declare (special flags))
-  "Adds all characters from character FROM to character TO
-\(inclusive) to the charset SET.  Does the right thing with respect to
-case-\(in)sensitivity as specified by the special variable FLAGS."
-  (let ((from-code (char-code from))
-        (to-code (char-code to)))
-    (when (> from-code to-code)
-      (signal-ppcre-syntax-error "Invalid range from ~A to ~A in char-class"
-                                 from to))
-    (cond ((case-insensitive-mode-p flags)
-            (loop for code from from-code to to-code
-                  for char = (code-char code)
-                  do (add-to-charset (char-upcase char) set)
-                     (add-to-charset (char-downcase char) set)))
-          (t
-            (loop for code from from-code to to-code
-                  do (add-to-charset (code-char code) set))))
-    set))
-
-(defun convert-char-class-to-charset (list)
-  (declare #.*standard-optimize-settings*)
-  "Combines all items in LIST into one charset and returns it.  Items
-can be single characters, character ranges like \(:RANGE #\\A #\\E),
-or special character classes like :DIGIT-CLASS.  Does the right thing
-with respect to case-\(in)sensitivity as specified by the special
-variable FLAGS."
-  (loop with set = (make-charset)
-        for item in list
-        if (characterp item)
-        ;; treat a single character C like a range (:RANGE C C)
-        do (add-range-to-set set item item)
-        else if (symbolp item)
-        ;; special character classes
-        do (setq set
+  (let ((test-functions
+         (loop for item in list
+               collect (cond ((characterp item)
+                              ;; rebind so closure captures the right one
+                              (let ((this-char item))
+                                (lambda (char)
+                                  (declare (character char this-char))
+                                  (char= char this-char))))
+                             ((symbolp item)
                              (case item
-                   ((:digit-class)
-                    (merge-set set +digit-set+))
-                   ((:non-digit-class)
-                    (merge-set set +digit-set+ t))
-                   ((:whitespace-char-class)
-                    (merge-set set +whitespace-char-set+))
-                   ((:non-whitespace-char-class)
-                    (merge-set set +whitespace-char-set+ t))
-                   ((:word-char-class)
-                    (merge-set set +word-char-set+))
-                   ((:non-word-char-class)
-                    (merge-set set +word-char-set+ t))
+                                ((:digit-class) #'digit-char-p)
+                                ((:non-digit-class) (complement* #'digit-char-p))
+                                ((:whitespace-char-class) #'whitespacep)
+                                ((:non-whitespace-char-class) (complement* #'whitespacep))
+                                ((:word-char-class) #'word-char-p)
+                                ((:non-word-char-class) (complement* #'word-char-p))
                                (otherwise
-                    (signal-ppcre-syntax-error
-                     "Unknown symbol ~A in character class"
-                     item))))
-        else if (and (consp item)
-                     (eq (car item) :range))
-        ;; proper ranges
-        do (add-range-to-set set
-                             (second item)
-                             (third item))
-        else do (signal-ppcre-syntax-error "Unknown item ~A in char-class list"
-                                           item)
-        finally (return set)))
+                                 (signal-syntax-error "Unknown symbol ~A in character class." item))))
+                             ((and (consp item)
+                                   (eq (first item) :property))
+                              (resolve-property (second item)))
+                             ((and (consp item)
+                                   (eq (first item) :inverted-property))
+                              (complement* (resolve-property (second item))))
+                             ((and (consp item)
+                                   (eq (first item) :range))
+                              (let ((from (second item))
+                                    (to (third item)))
+                                (when (char> from to)
+                                  (signal-syntax-error "Invalid range from ~S to ~S in char-class." from to))
+                                (lambda (char)
+                                  (declare (character char from to))
+                                  (char<= from char to))))
+                             (t (signal-syntax-error "Unknown item ~A in char-class list." item))))))
+    (unless test-functions
+      (signal-syntax-error "Empty character class."))
+    (cond ((cdr test-functions)           
+           (cond ((and invertedp case-insensitive-p)
+                  (lambda (char)
+                    (declare (character char))
+                    (loop with both-case-p = (both-case-p char)
+                          with char-down = (if both-case-p (char-downcase char) char)
+                          with char-up = (if both-case-p (char-upcase char) nil)
+                          for test-function in test-functions
+                          never (or (funcall test-function char-down)
+                                    (and char-up (funcall test-function char-up))))))
+                 (case-insensitive-p
+                  (lambda (char)
+                    (declare (character char))
+                    (loop with both-case-p = (both-case-p char)
+                          with char-down = (if both-case-p (char-downcase char) char)
+                          with char-up = (if both-case-p (char-upcase char) nil)
+                          for test-function in test-functions
+                          thereis (or (funcall test-function char-down)
+                                      (and char-up (funcall test-function char-up))))))
+                 (invertedp
+                  (lambda (char)
+                    (loop for test-function in test-functions
+                          never (funcall test-function char))))
+                 (t
+                  (lambda (char)
+                    (loop for test-function in test-functions
+                          thereis (funcall test-function char))))))
+          ;; there's only one test-function
+          (t (let ((test-function (first test-functions)))
+               (cond ((and invertedp case-insensitive-p)
+                      (lambda (char)
+                        (declare (character char))
+                        (not (or (funcall test-function (char-downcase char))
+                                 (and (both-case-p char)
+                                      (funcall test-function (char-upcase char)))))))
+                     (case-insensitive-p
+                      (lambda (char)
+                        (declare (character char))
+                        (or (funcall test-function (char-downcase char))
+                            (and (both-case-p char)
+                                 (funcall test-function (char-upcase char))))))
+                     (invertedp (complement* test-function))
+                     (t test-function)))))))

 (defun maybe-split-repetition (regex
                               greedyp
@ -140,14 +177,14 @@ variable FLAGS."
                               min-len
                               length
                               reg-seen)
-  (declare #.*standard-optimize-settings*)
-  (declare (type fixnum minimum)
-           (type (or fixnum null) maximum))
  "Splits a REPETITION object into a constant and a varying part if
 applicable, i.e. something like
  a{3,} -> a{3}a*
 The arguments to this function correspond to the REPETITION slots of
 the same name."
+  (declare #.*standard-optimize-settings*)
+  (declare (fixnum minimum)
+           ((or fixnum null) maximum))
  ;; note the usage of COPY-REGEX here; we can't use the same REGEX
  ;; object in both REPETITIONS because they will have different
  ;; offsets
@ -209,12 +246,12 @@ the same name."
 ;; regex at the start (perhaps modulo #\Newline).

 (defun maybe-accumulate (str)
-  (declare #.*standard-optimize-settings*)
-  (declare (special accumulate-start-p starts-with))
-  (declare (ftype (function (t) fixnum) len))
  "Accumulate STR into the special variable STARTS-WITH if
 ACCUMULATE-START-P (also special) is true and STARTS-WITH is either
 NIL or a STR object of the same case mode. Always returns NIL."
+  (declare #.*standard-optimize-settings*)
+  (declare (special accumulate-start-p starts-with))
+  (declare (ftype (function (t) fixnum) len))
  (when accumulate-start-p
    (etypecase starts-with
      (str
@ -263,12 +300,11 @@ NIL or a STR object of the same case mode. Always returns NIL."
        (setq accumulate-start-p nil))))
  nil)

+(declaim (inline convert-aux))
 (defun convert-aux (parse-tree)
-  (declare #.*standard-optimize-settings*)
-  (declare (special flags reg-num reg-names accumulate-start-p starts-with max-back-ref))
-  "Converts the parse tree PARSE-TREE into a REGEX object and returns it.
+  "Converts the parse tree PARSE-TREE into a REGEX object and returns
+it.  Will also

-Will also
  - split and optimize repetitions,
  - accumulate strings or EVERYTHING objects into the special variable
    STARTS-WITH,
@ -279,10 +315,22 @@ Will also
  - maintain and adher to the currently applicable modifiers in the special
    variable FLAGS, and
  - maybe even wash your car..."
-  (cond ((consp parse-tree)
-         (case (first parse-tree)
-           ;; (:SEQUENCE {<regex>}*)
-           ((:sequence)
+  (declare #.*standard-optimize-settings*)
+  (if (consp parse-tree)
+    (convert-compound-parse-tree (first parse-tree) parse-tree)
+    (convert-simple-parse-tree parse-tree)))
+
+(defgeneric convert-compound-parse-tree (token parse-tree &key)
+  (declare #.*standard-optimize-settings*)
+  (:documentation "Helper function for CONVERT-AUX which converts
+parse trees which are conses and dispatches on TOKEN which is the
+first element of the parse tree.")
+  (:method (token parse-tree &key)
+   (signal-syntax-error "Unknown token ~A in parse-tree." token)))
+
+(defmethod convert-compound-parse-tree ((token (eql :sequence)) parse-tree &key)
+  "The case for parse trees like \(:SEQUENCE {<regex>}*)."
+  (declare #.*standard-optimize-settings*)
  (cond ((cddr parse-tree)
         ;; this is essentially like
         ;; (MAPCAR 'CONVERT-AUX (REST PARSE-TREE))
@ -291,15 +339,18 @@ Will also
               while parse-tree-rest
               do (setf (car parse-tree-rest)
                        (convert-aux (car parse-tree-rest))))
-                   (make-instance 'seq
-                                  :elements (rest parse-tree)))
+         (make-instance 'seq :elements (rest parse-tree)))
        (t (convert-aux (second parse-tree)))))
-           ;; (:GROUP {<regex>}*)
-           ;; this is a syntactical construct equivalent to :SEQUENCE
-           ;; intended to keep the effect of modifiers local
-           ((:group)
-            ;; make a local copy of FLAGS and shadow the global
-            ;; value while we descend into the enclosed regexes
+
+(defmethod convert-compound-parse-tree ((token (eql :group)) parse-tree &key)
+  "The case for parse trees like \(:GROUP {<regex>}*).
+
+This is a syntactical construct equivalent to :SEQUENCE intended to
+keep the effect of modifiers local."
+  (declare #.*standard-optimize-settings*)
+  (declare (special flags))
+  ;; make a local copy of FLAGS and shadow the global value while we
+  ;; descend into the enclosed regexes
  (let ((flags (copy-list flags)))
    (declare (special flags))
    (cond ((cddr parse-tree)
@ -307,47 +358,46 @@ Will also
                 while parse-tree-rest
                 do (setf (car parse-tree-rest)
                          (convert-aux (car parse-tree-rest))))
-                     (make-instance 'seq
-                                    :elements (rest parse-tree)))
+           (make-instance 'seq :elements (rest parse-tree)))
          (t (convert-aux (second parse-tree))))))
-           ;; (:ALTERNATION {<regex>}*)
-           ((:alternation)
-            ;; we must stop accumulating objects into STARTS-WITH
-            ;; once we reach an alternation
+
+(defmethod convert-compound-parse-tree ((token (eql :alternation)) parse-tree &key)
+  "The case for \(:ALTERNATION {<regex>}*)."
+  (declare #.*standard-optimize-settings*)
+  (declare (special accumulate-start-p))
+  ;; we must stop accumulating objects into STARTS-WITH once we reach
+  ;; an alternation
  (setq accumulate-start-p nil)
  (loop for parse-tree-rest on (rest parse-tree)
        while parse-tree-rest
        do (setf (car parse-tree-rest)
                 (convert-aux (car parse-tree-rest))))
-            (make-instance 'alternation
-                           :choices (rest parse-tree)))
-           ;; (:BRANCH <test> <regex>)
-           ;; <test> must be look-ahead, look-behind or number;
-           ;; if <regex> is an alternation it must have one or two
-           ;; choices
-           ((:branch)
+  (make-instance 'alternation :choices (rest parse-tree)))
+
+(defmethod convert-compound-parse-tree ((token (eql :branch)) parse-tree &key)
+  "The case for \(:BRANCH <test> <regex>).
+
+Here, <test> must be look-ahead, look-behind or number; if <regex> is
+an alternation it must have one or two choices."
+  (declare #.*standard-optimize-settings*)
+  (declare (special accumulate-start-p))
  (setq accumulate-start-p nil)
  (let* ((test-candidate (second parse-tree))
         (test (cond ((numberp test-candidate)
                      (when (zerop (the fixnum test-candidate))
-                                  (signal-ppcre-syntax-error
-                                   "Register 0 doesn't exist: ~S"
-                                   parse-tree))
+                        (signal-syntax-error "Register 0 doesn't exist: ~S." parse-tree))
                      (1- (the fixnum test-candidate)))
                     (t (convert-aux test-candidate))))
         (alternations (convert-aux (third parse-tree))))
    (when (and (not (numberp test))
               (not (typep test 'lookahead))
               (not (typep test 'lookbehind)))
-                (signal-ppcre-syntax-error
-                 "Branch test must be look-ahead, look-behind or number: ~S"
-                 parse-tree))
+      (signal-syntax-error "Branch test must be look-ahead, look-behind or number: ~S." parse-tree))
    (typecase alternations
      (alternation
       (case (length (choices alternations))
         ((0)
-                    (signal-ppcre-syntax-error "No choices in branch: ~S"
-                                               parse-tree))
+          (signal-syntax-error "No choices in branch: ~S." parse-tree))
         ((1)
          (make-instance 'branch
                         :test test
@ -361,28 +411,39 @@ Will also
                         :else-regex (second
                                      (choices alternations))))
         (otherwise
-                    (signal-ppcre-syntax-error
-                     "Too much choices in branch: ~S"
-                     parse-tree))))
+          (signal-syntax-error "Too much choices in branch: ~S." parse-tree))))
      (t
       (make-instance 'branch
                      :test test
                      :then-regex alternations)))))
-           ;; (:POSITIVE-LOOKAHEAD|:NEGATIVE-LOOKAHEAD <regex>)
-           ((:positive-lookahead :negative-lookahead)
-            ;; keep the effect of modifiers local to the enclosed
-            ;; regex and stop accumulating into STARTS-WITH
+
+(defmethod convert-compound-parse-tree ((token (eql :positive-lookahead)) parse-tree &key)
+  "The case for \(:POSITIVE-LOOKAHEAD <regex>)."
+  (declare #.*standard-optimize-settings*)
+  (declare (special flags accumulate-start-p))
+  ;; keep the effect of modifiers local to the enclosed regex and stop
+  ;; accumulating into STARTS-WITH
  (setq accumulate-start-p nil)
  (let ((flags (copy-list flags)))
    (declare (special flags))
    (make-instance 'lookahead
                   :regex (convert-aux (second parse-tree))
-                             :positivep (eq (first parse-tree)
-                                            :positive-lookahead))))
-           ;; (:POSITIVE-LOOKBEHIND|:NEGATIVE-LOOKBEHIND <regex>)
-           ((:positive-lookbehind :negative-lookbehind)
-            ;; keep the effect of modifiers local to the enclosed
-            ;; regex and stop accumulating into STARTS-WITH
+                   :positivep t)))
+
+(defmethod convert-compound-parse-tree ((token (eql :negative-lookahead)) parse-tree &key)
+  "The case for \(:NEGATIVE-LOOKAHEAD <regex>)."
+  (declare #.*standard-optimize-settings*)
+  ;; do the same as for positive look-aheads and just switch afterwards
+  (let ((regex (convert-compound-parse-tree :positive-lookahead parse-tree)))
+    (setf (slot-value regex 'positivep) nil)
+    regex))
+
+(defmethod convert-compound-parse-tree ((token (eql :positive-lookbehind)) parse-tree &key)
+  "The case for \(:POSITIVE-LOOKBEHIND <regex>)."
+  (declare #.*standard-optimize-settings*)
+  (declare (special flags accumulate-start-p))
+  ;; keep the effect of modifiers local to the enclosed regex and stop
+  ;; accumulating into STARTS-WITH
  (setq accumulate-start-p nil)
  (let* ((flags (copy-list flags))
         (regex (convert-aux (second parse-tree)))
@ -390,21 +451,32 @@ Will also
    (declare (special flags))
    ;; lookbehind assertions must be of fixed length
    (unless len
-                (signal-ppcre-syntax-error
-                 "Variable length look-behind not implemented (yet): ~S"
-                 parse-tree))
+      (signal-syntax-error "Variable length look-behind not implemented \(yet): ~S." parse-tree))
    (make-instance 'lookbehind
                   :regex regex
-                             :positivep (eq (first parse-tree)
-                                            :positive-lookbehind)
+                   :positivep t
                   :len len)))
-           ;; (:GREEDY-REPETITION|:NON-GREEDY-REPETITION <min> <max> <regex>)
-           ((:greedy-repetition :non-greedy-repetition)
+
+(defmethod convert-compound-parse-tree ((token (eql :negative-lookbehind)) parse-tree &key)
+  "The case for \(:NEGATIVE-LOOKBEHIND <regex>)."
+  (declare #.*standard-optimize-settings*)
+  ;; do the same as for positive look-behinds and just switch afterwards
+  (let ((regex (convert-compound-parse-tree :positive-lookbehind parse-tree)))
+    (setf (slot-value regex 'positivep) nil)
+    regex))
+
+(defmethod convert-compound-parse-tree ((token (eql :greedy-repetition)) parse-tree &key (greedyp t))
+  "The case for \(:GREEDY-REPETITION|:NON-GREEDY-REPETITION <min> <max> <regex>).
+
+This function is also used for the non-greedy case in which case it is
+called with GREEDYP set to NIL as you would expect."
+  (declare #.*standard-optimize-settings*)
+  (declare (special accumulate-start-p starts-with))
  ;; remember the value of ACCUMULATE-START-P upon entering
  (let ((local-accumulate-start-p accumulate-start-p))
    (let ((minimum (second parse-tree))
          (maximum (third parse-tree)))
-                (declare (type fixnum minimum))
+      (declare (fixnum minimum))
      (declare (type (or null fixnum) maximum))
      (unless (and maximum
                   (= 1 minimum maximum))
@ -416,7 +488,6 @@ Will also
      (let* (reg-seen
             (regex (convert-aux (fourth parse-tree)))
             (min-len (regex-min-length regex))
-                       (greedyp (eq (first parse-tree) :greedy-repetition))
             (length (regex-length regex)))
        ;; note that this declaration already applies to
        ;; the call to CONVERT-AUX above
@ -493,53 +564,66 @@ Will also
                                      min-len
                                      nil
                                      t))))))))
-           ;; (:REGISTER <regex>)
-           ;; (:NAMED-REGISTER <name> <regex>)
-           ((:register :named-register)
-            ;; keep the effect of modifiers local to the enclosed
-            ;; regex; also, assign the current value of REG-NUM to
-            ;; the corresponding slot of the REGISTER object and
-            ;; increase this counter afterwards; for named register
-            ;; update REG-NAMES and set the corresponding name slot
-            ;; of the REGISTER object too
+
+(defmethod convert-compound-parse-tree ((token (eql :non-greedy-repetition)) parse-tree &key)
+  "The case for \(:NON-GREEDY-REPETITION <min> <max> <regex>)."
+  (declare #.*standard-optimize-settings*)
+  ;; just dispatch to the method above with GREEDYP explicitly set to NIL
+  (convert-compound-parse-tree :greedy-repetition parse-tree :greedyp nil))
+
+(defmethod convert-compound-parse-tree ((token (eql :register)) parse-tree &key name)
+  "The case for \(:REGISTER <regex>).  Also used for named registers
+when NAME is not NIL."
+  (declare #.*standard-optimize-settings*)
+  (declare (special flags reg-num reg-names))
+  ;; keep the effect of modifiers local to the enclosed regex; also,
+  ;; assign the current value of REG-NUM to the corresponding slot of
+  ;; the REGISTER object and increase this counter afterwards; for
+  ;; named register update REG-NAMES and set the corresponding name
+  ;; slot of the REGISTER object too
  (let ((flags (copy-list flags))
-                  (stored-reg-num reg-num)
-                  (reg-name (when (eq (first parse-tree) :named-register)
-                              (copy-seq (second parse-tree)))))
+        (stored-reg-num reg-num))
    (declare (special flags reg-seen named-reg-seen))
    (setq reg-seen t)
-              (when reg-name
-                (setq named-reg-seen t))
+    (when name (setq named-reg-seen t))
    (incf (the fixnum reg-num))
-              (push reg-name
-                    reg-names)
+    (push name reg-names)
    (make-instance 'register
-                             :regex (convert-aux (if (eq (first parse-tree) :named-register)
-                                                   (third parse-tree)
-                                                   (second parse-tree)))
+                   :regex (convert-aux (if name (third parse-tree) (second parse-tree)))
                   :num stored-reg-num
-                             :name reg-name)))
-           ;; (:FILTER <function> &optional <length>)
-           ((:filter)
+                   :name name)))
+
+(defmethod convert-compound-parse-tree ((token (eql :named-register)) parse-tree &key)
+  "The case for \(:NAMED-REGISTER <regex>)."
+  (declare #.*standard-optimize-settings*)
+  ;; call the method above and use the :NAME keyword argument
+  (convert-compound-parse-tree :register parse-tree :name (copy-seq (second parse-tree))))
+
+(defmethod convert-compound-parse-tree ((token (eql :filter)) parse-tree &key)
+  "The case for \(:FILTER <function> &optional <length>)."
+  (declare #.*standard-optimize-settings*)
+  (declare (special accumulate-start-p))
  ;; stop accumulating into STARTS-WITH
  (setq accumulate-start-p nil)
  (make-instance 'filter
                 :fn (second parse-tree)
                 :len (third parse-tree)))
-           ;; (:STANDALONE <regex>)
-           ((:standalone)
+
+(defmethod convert-compound-parse-tree ((token (eql :standalone)) parse-tree &key)
+  "The case for \(:STANDALONE <regex>)."
+  (declare #.*standard-optimize-settings*)
+  (declare (special flags accumulate-start-p))
  ;; stop accumulating into STARTS-WITH
  (setq accumulate-start-p nil)
-            ;; keep the effect of modifiers local to the enclosed
-            ;; regex
+  ;; keep the effect of modifiers local to the enclosed regex
  (let ((flags (copy-list flags)))
    (declare (special flags))
-              (make-instance 'standalone
-                             :regex (convert-aux (second parse-tree)))))
-           ;; (:BACK-REFERENCE <number>)
-           ;; (:BACK-REFERENCE <name>)
-           ((:back-reference)
-            (locally (declare (special reg-names reg-num))
+    (make-instance 'standalone :regex (convert-aux (second parse-tree)))))
+
+(defmethod convert-compound-parse-tree ((token (eql :back-reference)) parse-tree &key)
+  "The case for \(:BACK-REFERENCE <number>|<name>)."
+  (declare #.*standard-optimize-settings*)
+  (declare (special flags accumulate-start-p reg-num reg-names max-back-ref))
  (let* ((backref-name (and (stringp (second parse-tree))
                            (second parse-tree)))
         (referred-regs
@ -547,25 +631,21 @@ Will also
            ;; find which register corresponds to the given name
            ;; we have to deal with case where several registers share
            ;; the same name and collect their respective numbers
-                        (loop
-                         for name in reg-names
+            (loop for name in reg-names
                  for reg-index from 0
                  when (string= name backref-name)
-                         ;; NOTE: REG-NAMES stores register names in reversed order
-                         ;; REG-NUM contains number of (any) registers seen so far
-                         ;; 1- will be done later
+                  ;; NOTE: REG-NAMES stores register names in reversed
+                  ;; order REG-NUM contains number of (any) registers
+                  ;; seen so far; 1- will be done later
                  collect (- reg-num reg-index))))
         ;; store the register number for the simple case
-                     (backref-number (or (first referred-regs)
-                                         (second parse-tree))))
+         (backref-number (or (first referred-regs) (second parse-tree))))
    (declare (type (or fixnum null) backref-number))
    (when (or (not (typep backref-number 'fixnum))
              (<= backref-number 0))
-                  (signal-ppcre-syntax-error
-                   "Illegal back-reference: ~S"
-                   parse-tree))
-                ;; stop accumulating into STARTS-WITH and increase
-                ;; MAX-BACK-REF if necessary
+      (signal-syntax-error "Illegal back-reference: ~S." parse-tree))
+    ;; stop accumulating into STARTS-WITH and increase MAX-BACK-REF if
+    ;; necessary
    (setq accumulate-start-p nil
          max-back-ref (max (the fixnum max-back-ref)
                            backref-number))
@ -578,9 +658,8 @@ Will also
                            :name (copy-seq backref-name))))
      (cond
       ((cdr referred-regs)
-                    ;; several registers share the same name
-                    ;; we will try to match any of them, starting
-                    ;; with the most recent first
+        ;; several registers share the same name we will try to match
+        ;; any of them, starting with the most recent first
        ;; alternation is used to accomplish matching
        (make-instance 'alternation
                       :choices (loop
@ -588,72 +667,51 @@ Will also
                                 collect (make-back-ref reg-index))))
       ;; simple case - backref corresponds to only one register
       (t
-                    (make-back-ref backref-number)))))))
-           ;; (:REGEX <string>)
-           ((:regex)
-            (let ((regex (second parse-tree)))
-              (convert-aux (parse-string regex))))
-           ;; (:CHAR-CLASS|:INVERTED-CHAR-CLASS {<item>}*)
-           ;; where item is one of
-           ;;   - a character
-           ;;   - a character range: (:RANGE <char1> <char2>)
-           ;;   - a special char class symbol like :DIGIT-CHAR-CLASS
-           ((:char-class :inverted-char-class)
-            ;; first create the charset and some auxiliary values
-            (let* (set set-contents
-                   (count most-positive-fixnum)
-                   (item-list (rest parse-tree))
-                   (invertedp (eq (first parse-tree) :inverted-char-class))
-                   word-char-class-p)
-              (cond ((every (lambda (item) (eq item :word-char-class))
-                            item-list)
-                     ;; treat "[\\w]" like "\\w"
-                     (setq word-char-class-p t))
-                    ((every (lambda (item) (eq item :non-word-char-class))
-                            item-list)
-                     ;; treat "[\\W]" like "\\W"
-                     (setq word-char-class-p t)
-                     (setq invertedp (not invertedp)))
-                    (t
-                     (setq set (convert-char-class-to-charset item-list)
-                           count (charset-count set))
-                     (when (<= count 2)
-                       ;; collect the contents of SET into a list if
-                       ;; COUNT is smaller than 3
-                       (setq set-contents (all-characters set)))))
-              (cond ((and (not invertedp)
-                          (= count 1))
-                     ;; convert one-element charset into a STR object
-                     ;; and try to accumulate into STARTS-WITH
-                     (let ((str (make-instance 'str
-                                               :str (string (first set-contents))
-                                               :case-insensitive-p nil)))
-                       (maybe-accumulate str)
-                       str))
-                    ((and (not invertedp)
-                          (= count 2)
-                          (char-equal (first set-contents) (second set-contents)))
-                     ;; convert two-element charset into a
-                     ;; case-insensitive STR object and try to
-                     ;; accumulate into STARTS-WITH if the two
-                     ;; characters are CHAR-EQUAL
-                     (let ((str (make-instance 'str
-                                               :str (string (first set-contents))
-                                               :case-insensitive-p t)))
-                       (maybe-accumulate str)
-                       str))
-                    (t
-                     ;; the general case; stop accumulating into STARTS-WITH
+        (make-back-ref backref-number))))))
+
+(defmethod convert-compound-parse-tree ((token (eql :regex)) parse-tree &key)
+  "The case for \(:REGEX <string>)."
+  (declare #.*standard-optimize-settings*)
+  (convert-aux (parse-string (second parse-tree))))
+
+(defmethod convert-compound-parse-tree ((token (eql :char-class)) parse-tree &key invertedp)
+  "The case for \(:CHAR-CLASS {<item>}*) where item is one of
+
+- a character,
+- a character range: \(:RANGE <char1> <char2>), or
+- a special char class symbol like :DIGIT-CHAR-CLASS.
+
+Also used for inverted char classes when INVERTEDP is true."
+  (declare #.*standard-optimize-settings*)
+  (declare (special flags accumulate-start-p))
+  (let ((test-function
+         (create-optimized-test-function
+          (convert-char-class-to-test-function (rest parse-tree)
+                                               invertedp
+                                               (case-insensitive-mode-p flags)))))
    (setq accumulate-start-p nil)
-                     (make-instance 'char-class
-                                    :charset set
-                                    :case-insensitive-p
-                                    (case-insensitive-mode-p flags)
-                                    :invertedp invertedp
-                                    :word-char-class-p word-char-class-p)))))
-           ;; (:FLAGS {<flag>}*)
-           ;; where flag is a modifier symbol like :CASE-INSENSITIVE-P
-           ((:flags)
+    (make-instance 'char-class :test-function test-function)))
+
+(defmethod convert-compound-parse-tree ((token (eql :inverted-char-class)) parse-tree &key)
+  "The case for \(:INVERTED-CHAR-CLASS {<item>}*)."
+  (declare #.*standard-optimize-settings*)
+  ;; just dispatch to the "real" method
+  (convert-compound-parse-tree :char-class parse-tree :invertedp t))
+
+(defmethod convert-compound-parse-tree ((token (eql :property)) parse-tree &key)
+  "The case for \(:PROPERTY <name>) where <name> is a string."
+  (declare #.*standard-optimize-settings*)
+  (make-instance 'char-class :test-function (resolve-property (second parse-tree))))
+
+(defmethod convert-compound-parse-tree ((token (eql :inverted-property)) parse-tree &key)
+  "The case for \(:INVERTED-PROPERTY <name>) where <name> is a string."
+  (declare #.*standard-optimize-settings*)
+  (make-instance 'char-class :test-function (complement* (resolve-property (second parse-tree)))))
+
+(defmethod convert-compound-parse-tree ((token (eql :flags)) parse-tree &key)
+  "The case for \(:FLAGS {<flag>}*) where flag is a modifier symbol
+like :CASE-INSENSITIVE-P."
+  (declare #.*standard-optimize-settings*)
  ;; set/unset the flags corresponding to the symbols
  ;; following :FLAGS
  (mapc #'set-flag (rest parse-tree))
@ -662,119 +720,133 @@ Will also
  ;; construct into a VOID object which'll be optimized
  ;; away when creating the matcher
  (make-instance 'void))
-           (otherwise
-            (signal-ppcre-syntax-error
-             "Unknown token ~A in parse-tree"
-             (first parse-tree)))))
-        ((or (characterp parse-tree) (stringp parse-tree))
-         ;; turn characters or strings into STR objects and try to
-         ;; accumulate into STARTS-WITH
-         (let ((str (make-instance 'str
-                                   :str (string parse-tree)
-                                   :case-insensitive-p
-                                   (case-insensitive-mode-p flags))))
-           (maybe-accumulate str)
-           str))
-        (t
-         ;; and now for the tokens which are symbols
-         (case parse-tree
-           ((:void)
+
+(defgeneric convert-simple-parse-tree (parse-tree)
+  (declare #.*standard-optimize-settings*)
+  (:documentation "Helper function for CONVERT-AUX which converts
+parse trees which are atoms.")
+  (:method ((parse-tree (eql :void)))
+   (declare #.*standard-optimize-settings*)
   (make-instance 'void))
-           ((:word-boundary)
+  (:method ((parse-tree (eql :word-boundary)))
+   (declare #.*standard-optimize-settings*)
   (make-instance 'word-boundary :negatedp nil))
-           ((:non-word-boundary)
+  (:method ((parse-tree (eql :non-word-boundary)))
+   (declare #.*standard-optimize-settings*)
   (make-instance 'word-boundary :negatedp t))
-           ;; the special character classes
-           ((:digit-class
-             :non-digit-class
-             :word-char-class
-             :non-word-char-class
-             :whitespace-char-class
-             :non-whitespace-char-class)
-            ;; stop accumulating into STARTS-WITH
+  (:method ((parse-tree (eql :everything)))
+   (declare #.*standard-optimize-settings*)
+   (declare (special flags accumulate-start-p))
   (setq accumulate-start-p nil)
-            (make-instance 'char-class
-                           ;; use the constants defined in util.lisp
-                           :charset (case parse-tree
-                                      ((:digit-class
-                                        :non-digit-class)
-                                       +digit-set+)
-                                      ((:word-char-class
-                                        :non-word-char-class)
-                                       nil)
-                                      ((:whitespace-char-class
-                                        :non-whitespace-char-class)
-                                       +whitespace-char-set+))
-                           ;; this value doesn't really matter but
-                           ;; NIL should result in slightly faster
-                           ;; matchers
-                           :case-insensitive-p nil
-                           :invertedp (member parse-tree
-                                              '(:non-digit-class
-                                                :non-word-char-class
-                                                :non-whitespace-char-class)
-                                              :test #'eq)
-                           :word-char-class-p (member parse-tree
-                                                      '(:word-char-class
-                                                        :non-word-char-class)
-                                                      :test #'eq)))
-           ((:start-anchor              ; Perl's "^"
-             :end-anchor                ; Perl's "$"
-             :modeless-end-anchor-no-newline
-                                        ; Perl's "\z"
-             :modeless-start-anchor     ; Perl's "\A"
-             :modeless-end-anchor)      ; Perl's "\Z"
-            (make-instance 'anchor
-                           :startp (member parse-tree
-                                           '(:start-anchor
-                                             :modeless-start-anchor)
-                                           :test #'eq)
-                           ;; set this value according to the
-                           ;; current settings of FLAGS (unless it's
-                           ;; a modeless anchor)
-                           :multi-line-p
-                           (and (multi-line-mode-p flags)
-                                (not (member parse-tree
-                                             '(:modeless-start-anchor
-                                               :modeless-end-anchor
-                                               :modeless-end-anchor-no-newline)
-                                             :test #'eq)))
-                           :no-newline-p
-                           (eq parse-tree
-                               :modeless-end-anchor-no-newline)))
-           ((:everything)
-            ;; stop accumulating into STARTS-WITHS
+   (make-instance 'everything :single-line-p (single-line-mode-p flags)))
+  (:method ((parse-tree (eql :digit-class)))
+   (declare #.*standard-optimize-settings*)
+   (declare (special accumulate-start-p))
   (setq accumulate-start-p nil)
-            (make-instance 'everything
-                           :single-line-p (single-line-mode-p flags)))
-           ;; special tokens corresponding to Perl's "ism" modifiers
-           ((:case-insensitive-p
-             :case-sensitive-p
-             :multi-line-mode-p
-             :not-multi-line-mode-p
-             :single-line-mode-p
-             :not-single-line-mode-p)
-            ;; we're only interested in the side effect of
-            ;; setting/unsetting the flags and turn these tokens
-            ;; into VOID objects which'll be optimized away when
-            ;; creating the matcher
+   (make-instance 'char-class :test-function #'digit-char-p))
+  (:method ((parse-tree (eql :word-char-class)))
+   (declare #.*standard-optimize-settings*)
+   (declare (special accumulate-start-p))
+   (setq accumulate-start-p nil)
+   (make-instance 'char-class :test-function #'word-char-p))
+  (:method ((parse-tree (eql :whitespace-char-class)))
+   (declare #.*standard-optimize-settings*)
+   (declare (special accumulate-start-p))
+   (setq accumulate-start-p nil)
+   (make-instance 'char-class :test-function #'whitespacep))
+  (:method ((parse-tree (eql :non-digit-class)))
+   (declare #.*standard-optimize-settings*)
+   (declare (special accumulate-start-p))
+   (setq accumulate-start-p nil)
+   (make-instance 'char-class :test-function (complement* #'digit-char-p)))
+  (:method ((parse-tree (eql :non-word-char-class)))
+   (declare #.*standard-optimize-settings*)
+   (declare (special accumulate-start-p))
+   (setq accumulate-start-p nil)
+   (make-instance 'char-class :test-function (complement* #'word-char-p)))
+  (:method ((parse-tree (eql :non-whitespace-char-class)))
+   (declare #.*standard-optimize-settings*)
+   (declare (special accumulate-start-p))
+   (setq accumulate-start-p nil)
+   (make-instance 'char-class :test-function (complement* #'whitespacep)))
+  (:method ((parse-tree (eql :start-anchor)))
+   ;; Perl's "^"
+   (declare #.*standard-optimize-settings*)
+   (declare (special flags))
+   (make-instance 'anchor :startp t :multi-line-p (multi-line-mode-p flags)))
+  (:method ((parse-tree (eql :end-anchor)))
+   ;; Perl's "$"
+   (declare #.*standard-optimize-settings*)
+   (declare (special flags))
+   (make-instance 'anchor :startp nil :multi-line-p (multi-line-mode-p flags)))
+  (:method ((parse-tree (eql :modeless-start-anchor)))
+   ;; Perl's "\A"
+   (declare #.*standard-optimize-settings*)
+   (make-instance 'anchor :startp t))
+  (:method ((parse-tree (eql :modeless-end-anchor)))
+   ;; Perl's "$\Z"
+   (declare #.*standard-optimize-settings*)
+   (make-instance 'anchor :startp nil))
+  (:method ((parse-tree (eql :modeless-end-anchor-no-newline)))
+   ;; Perl's "$\z"
+   (declare #.*standard-optimize-settings*)
+   (make-instance 'anchor :startp nil :no-newline-p t))
+  (:method ((parse-tree (eql :case-insensitive-p)))
+   (declare #.*standard-optimize-settings*)
   (set-flag parse-tree)
   (make-instance 'void))
-           (otherwise
-            (let ((translation (and (symbolp parse-tree)
-                                    (parse-tree-synonym parse-tree))))
+  (:method ((parse-tree (eql :case-sensitive-p)))
+   (declare #.*standard-optimize-settings*)
+   (set-flag parse-tree)
+   (make-instance 'void))
+  (:method ((parse-tree (eql :multi-line-mode-p)))
+   (declare #.*standard-optimize-settings*)
+   (set-flag parse-tree)
+   (make-instance 'void))
+  (:method ((parse-tree (eql :not-multi-line-mode-p)))
+   (declare #.*standard-optimize-settings*)
+   (set-flag parse-tree)
+   (make-instance 'void))
+  (:method ((parse-tree (eql :single-line-mode-p)))
+   (declare #.*standard-optimize-settings*)
+   (set-flag parse-tree)
+   (make-instance 'void))
+  (:method ((parse-tree (eql :not-single-line-mode-p)))
+   (declare #.*standard-optimize-settings*)
+   (set-flag parse-tree)
+   (make-instance 'void)))
+
+(defmethod convert-simple-parse-tree ((parse-tree string))
+  (declare #.*standard-optimize-settings*)
+  (declare (special flags))
+  ;; turn strings into STR objects and try to accumulate into
+  ;; STARTS-WITH
+  (let ((str (make-instance 'str
+                            :str parse-tree
+                            :case-insensitive-p (case-insensitive-mode-p flags))))
+    (maybe-accumulate str)
+    str))
+
+(defmethod convert-simple-parse-tree ((parse-tree character))
+  (declare #.*standard-optimize-settings*)
+  ;; dispatch to the method for strings
+  (convert-simple-parse-tree (string parse-tree)))
+        
+(defmethod convert-simple-parse-tree (parse-tree)
+  "The default method - check if there's a translation."
+  (declare #.*standard-optimize-settings*)
+  (let ((translation (and (symbolp parse-tree) (parse-tree-synonym parse-tree))))
    (if translation
      (convert-aux (copy-tree translation))
-                (signal-ppcre-syntax-error "Unknown token ~A in parse-tree"
-                                           parse-tree))))))))
+      (signal-syntax-error "Unknown token ~A in parse tree." parse-tree))))

 (defun convert (parse-tree)
-  (declare #.*standard-optimize-settings*)
  "Converts the parse tree PARSE-TREE into an equivalent REGEX object
 and returns three values: the REGEX object, the number of registers
 seen and an object the regex starts with which is either a STR object
-or an EVERYTHING object (if the regex starts with something like
+or an EVERYTHING object \(if the regex starts with something like
 \".*\") or NIL."
+  (declare #.*standard-optimize-settings*)
  ;; this function basically just initializes the special variables
  ;; and then calls CONVERT-AUX to do all the work
  (let* ((flags (list nil nil nil))
@ -790,12 +862,12 @@ or an EVERYTHING object (if the regex starts with something like
    ;; make sure we don't reference registers which aren't there
    (when (> (the fixnum max-back-ref)
             (the fixnum reg-num))
-      (signal-ppcre-syntax-error
-       "Backreference to register ~A which has not been defined"
-       max-back-ref))
+      (signal-syntax-error "Backreference to register ~A which has not been defined." max-back-ref))
    (when (typep starts-with 'str)
      (setf (slot-value starts-with 'str)
-              (coerce (slot-value starts-with 'str) 'simple-string)))
+              (coerce (slot-value starts-with 'str)
+                      #+:lispworks 'lw:simple-text-string
+                      #-:lispworks 'simple-string)))
    (values converted-parse-tree reg-num starts-with
            ;; we can't simply use *ALLOW-NAMED-REGISTERS*
            ;; since parse-tree syntax ignores it
--- a/doc/benchmarks.2002-12-22.txt
+++ b/doc/benchmarks.2002-12-22.txt
--- a/doc/index.html
+++ b/doc/index.html
--- a/errors.lisp
+++ b/errors.lisp
@ -1,5 +1,5 @@
 ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: CL-PPCRE; Base: 10 -*-
-;;; $Header: /usr/local/cvsrep/cl-ppcre/errors.lisp,v 1.18 2008/06/25 14:04:27 edi Exp $
+;;; $Header: /usr/local/cvsrep/cl-ppcre/errors.lisp,v 1.21 2008/07/06 18:12:04 edi Exp $

 ;;; Copyright (c) 2002-2008, Dr. Edmund Weitz. All rights reserved.

@ -27,7 +27,7 @@
 ;;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 ;;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

-(in-package #:cl-ppcre)
+(in-package :cl-ppcre)

 (defvar *syntax-error-string* nil
  "The string which caused the syntax error.")
@ -69,16 +69,16 @@ parse tree).")
  (:documentation "Signaled when CL-PPCRE functions are
 invoked with wrong arguments."))

-(defmacro signal-ppcre-syntax-error* (pos format-control &rest format-arguments)
+(defmacro signal-syntax-error* (pos format-control &rest format-arguments)
  `(error 'ppcre-syntax-error
          :pos ,pos
          :format-control ,format-control
          :format-arguments (list ,@format-arguments)))

-(defmacro signal-ppcre-syntax-error (format-control &rest format-arguments)
-  `(signal-ppcre-syntax-error* nil ,format-control ,@format-arguments))
+(defmacro signal-syntax-error (format-control &rest format-arguments)
+  `(signal-syntax-error* nil ,format-control ,@format-arguments))

-(defmacro signal-ppcre-invocation-error (format-control &rest format-arguments)
+(defmacro signal-invocation-error (format-control &rest format-arguments)
  `(error 'ppcre-invocation-error
          :format-control ,format-control
          :format-arguments (list ,@format-arguments)))
--- a/lexer.lisp
+++ b/lexer.lisp
@ -1,5 +1,5 @@
 ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: CL-PPCRE; Base: 10 -*-
-;;; $Header: /usr/local/cvsrep/cl-ppcre/lexer.lisp,v 1.28 2008/06/25 14:04:27 edi Exp $
+;;; $Header: /usr/local/cvsrep/cl-ppcre/lexer.lisp,v 1.34 2008/07/06 22:36:30 edi Exp $

 ;;; The lexer's responsibility is to convert the regex string into a
 ;;; sequence of tokens which are in turn consumed by the parser.
@ -35,7 +35,7 @@
 ;;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 ;;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

-(in-package #:cl-ppcre)
+(in-package :cl-ppcre)

 (declaim (inline map-char-to-special-class))
 (defun map-char-to-special-char-class (chr)
@ -56,27 +56,18 @@ their associated character classes."
    ((#\S)
      :non-whitespace-char-class)))

-(locally
-  (declare #.*standard-optimize-settings*)
 (defstruct (lexer (:constructor make-lexer-internal))
  "LEXER structures are used to hold the regex string which is
 currently lexed and to keep track of the lexer's state."
-    (str ""
-         :type string
-         :read-only t)
-    (len 0
-         :type fixnum
-         :read-only t)
-    (reg 0
-         :type fixnum)
-    (pos 0
-         :type fixnum)
-    (last-pos nil
-              :type list)))
+  (str "" :type string :read-only t)
+  (len 0 :type fixnum :read-only t)
+  (reg 0 :type fixnum)
+  (pos 0 :type fixnum)
+  (last-pos nil :type list))

 (defun make-lexer (string)
  (declare (inline make-lexer-internal)
-           #-genera (type string string))
+           #-:genera (string string))
  (make-lexer-internal :str (maybe-coerce-to-simple-string string)
                       :len (length string)))

@ -101,10 +92,8 @@ Does not respect extended mode."
  (declare #.*standard-optimize-settings*)
  "Returns the next character which is to be examined and updates the
 POS slot. Does not respect extended mode."
-  (cond ((end-of-string-p lexer)
-          nil)
-        (t
-          (prog1
+  (cond ((end-of-string-p lexer) nil)
+        (t (prog1
               (schar (lexer-str lexer) (lexer-pos lexer))
             (incf (lexer-pos lexer))))))

@ -135,9 +124,7 @@ nested comments are skipped if applicable."
                            while (and skip-char
                                       (char/= skip-char #\)))
                            finally (return skip-char))
-                    (signal-ppcre-syntax-error*
-                     error-pos
-                     "Comment group not closed")))
+                    (signal-syntax-error* error-pos "Comment group not closed.")))
                (setq next-char (next-char-non-extended lexer)))
              (t
                ;; undo effect of previous INCF if we didn't see a #
@ -177,7 +164,7 @@ nested comments are skipped if applicable."
  "Moves (LEXER-POS LEXER) back to the last position stored in
 \(LEXER-LAST-POS LEXER) and pops the LAST-POS stack."
  (unless (lexer-last-pos lexer)
-    (signal-ppcre-syntax-error "LAST-POS stack of LEXER ~A is empty" lexer))
+    (signal-syntax-error "LAST-POS stack of LEXER ~A is empty." lexer))
  (setf (lexer-pos lexer) (pop (lexer-last-pos lexer)))
  nil)

@ -232,19 +219,16 @@ the corresponding number started within the regex string."
  (let ((code (logand #o377 (the fixnum (or number 0)))))
    (or (and (< code char-code-limit)
             (code-char code))
-        (signal-ppcre-syntax-error*
-         error-pos
-         "No character for hex-code ~X"
-         number))))
+        (signal-syntax-error* error-pos "No character for hex-code ~X." number))))

 (defun unescape-char (lexer)
  (declare #.*standard-optimize-settings*)
-  "Convert the characters(s) following a backslash into a token
+  "Convert the characters\(s) following a backslash into a token
 which is returned. This function is to be called when the backslash
 has already been consumed. Special character classes like \\W are
 handled elsewhere."
  (when (end-of-string-p lexer)
-    (signal-ppcre-syntax-error "String ends with backslash"))
+    (signal-syntax-error "String ends with backslash."))
  (let ((chr (next-char-non-extended lexer)))
    (case chr
      ((#\E)
@ -257,9 +241,7 @@ handled elsewhere."
        ;; \cx means control-x in Perl
        (let ((next-char (next-char-non-extended lexer)))
          (unless next-char
-            (signal-ppcre-syntax-error*
-             (lexer-pos lexer)
-             "Character missing after '\\c' at position ~A"))
+            (signal-syntax-error* (lexer-pos lexer) "Character missing after '\\c' at position ~A."))
          (code-char (logxor #x40 (char-code (char-upcase next-char))))))
      ((#\x)
        ;; \x should be followed by a hexadecimal char code,
@ -295,12 +277,28 @@ handled elsewhere."
        ;; all other characters aren't affected by a backslash
        chr))))

-(defun collect-char-class (lexer)
+(defun read-char-property (lexer first-char)
  (declare #.*standard-optimize-settings*)
+  (unless (eql (next-char-non-extended lexer) #\{)
+    (signal-syntax-error* (lexer-pos lexer) "Expected left brace after \\~A." first-char))
+  (let ((name (with-output-to-string (out nil :element-type
+                                          #+:lispworks 'lw:simple-char #-:lispworks 'character)
+                  (loop
+                   (let ((char (or (next-char-non-extended lexer)
+                                   (signal-syntax-error "Unexpected EOF after \\~A{." first-char))))
+                     (when (char= char #\})
+                       (return))
+                     (write-char char out))))))
+    (list (if (char= first-char #\p) :property :inverted-property)
+          ;; we must reverse here because of what PARSE-STRING does
+          (nreverse name))))
+
+(defun collect-char-class (lexer)
  "Reads and consumes characters from regex string until a right
 bracket is seen.  Assembles them into a list \(which is returned) of
 characters, character ranges, like \(:RANGE #\\A #\\E) for a-e, and
 tokens representing special character classes."
+  (declare #.*standard-optimize-settings*)
  (let ((start-pos (lexer-pos lexer))         ; remember start for error message
        hyphen-seen
        last-char
@ -336,6 +334,21 @@ we're inside a range or not."
                        (push #\- list)
                        (incf (lexer-pos lexer)))
                      (setq hyphen-seen nil))
+                     ((#\P #\p)
+                      ;; maybe a character property
+                      (cond ((null *property-resolver*)
+                             (handle-char next-char))
+                            (t
+                             (push (read-char-property lexer next-char) list)
+                             ;; if the last character was a hyphen
+                             ;; just collect it literally
+                             (when hyphen-seen
+                               (push #\- list))
+                             ;; if the next character is a hyphen do the same
+                             (when (looking-at-p lexer #\-)
+                               (push #\- list)
+                               (incf (lexer-pos lexer)))
+                             (setq hyphen-seen nil))))                        
                     ((#\E)
                      ;; if \Q quoting is on we ignore \E,
                      ;; otherwise it's just a plain #\E
@ -372,9 +385,7 @@ we're inside a range or not."
                 (handle-char c))))
      ;; we can only exit the loop normally if we've reached the end
      ;; of the regex string without seeing a right bracket
-      (signal-ppcre-syntax-error*
-       start-pos
-       "Missing right bracket to close character class"))))
+      (signal-syntax-error* start-pos "Missing right bracket to close character class."))))

 (defun maybe-parse-flags (lexer)
  (declare #.*standard-optimize-settings*)
@ -387,7 +398,7 @@ the behaviour of the lexer itself via the special variable
    (loop with set = t
          for chr = (next-char-non-extended lexer)
          unless chr
-            do (signal-ppcre-syntax-error "Unexpected end of string")
+            do (signal-syntax-error "Unexpected end of string.")
          while (find chr "-imsx" :test #'char=)
          ;; the first #\- will invert the meaning of all modifiers
          ;; following it
@ -473,9 +484,7 @@ closing #\> will also be consumed."
                            :test #'char=)))
    (unless end-name
      ;; there has to be > somewhere, syntax error otherwise
-      (signal-ppcre-syntax-error*
-       (1- (lexer-pos lexer))
-       "Opening #\< in named group has no closing #\>"))
+      (signal-syntax-error* (1- (lexer-pos lexer)) "Opening #\< in named group has no closing #\>."))
    (let ((name (subseq (lexer-str lexer)
                        (lexer-pos lexer)
                        end-name)))
@ -484,9 +493,7 @@ closing #\> will also be consumed."
                             (char= #\- char)))
                     name)
        ;; register name can contain only alphanumeric characters or #\-
-        (signal-ppcre-syntax-error*
-         (lexer-pos lexer)
-         "Invalid character in named register group"))
+        (signal-syntax-error* (lexer-pos lexer) "Invalid character in named register group."))
      ;; advance lexer beyond "<name>" part
      (setf (lexer-pos lexer) (1+ end-name))
      name)))
@ -518,10 +525,7 @@ closing #\> will also be consumed."
             ((#\+ #\*)
              ;; quantifiers will always be consumend by
              ;; GET-QUANTIFIER, they must not appear here
-              (signal-ppcre-syntax-error*
-               (1- (lexer-pos lexer))
-               "Quantifier '~A' not allowed"
-               next-char))
+              (signal-syntax-error* (1- (lexer-pos lexer)) "Quantifier '~A' not allowed." next-char))
             ((#\{)
              ;; left brace isn't a special character in it's own
              ;; right but we must check if what follows might
@ -530,9 +534,8 @@ closing #\> will also be consumed."
                    (this-last-pos (lexer-last-pos lexer)))
                (unget-token lexer)
                (when (get-quantifier lexer)
-                  (signal-ppcre-syntax-error*
-                   (car this-last-pos)
-                   "Quantifier '~A' not allowed"
+                  (signal-syntax-error* (car this-last-pos)
+                                        "Quantifier '~A' not allowed."
                                        (subseq (lexer-str lexer)
                                                (car this-last-pos)
                                                (lexer-pos lexer))))
@ -580,7 +583,7 @@ closing #\> will also be consumed."
                   (let* ((old-pos (decf (lexer-pos lexer)))
                          ;; ...so let's get the whole number first
                          (backref-number (get-number lexer)))
-                     (declare (type fixnum backref-number))
+                     (declare (fixnum backref-number))
                     (cond ((and (> backref-number (lexer-reg lexer))
                                 (<= 10 backref-number))
                            ;; \10 and higher are treated as octal
@ -603,6 +606,10 @@ closing #\> will also be consumed."
                   (let ((old-pos (decf (lexer-pos lexer))))
                     (make-char-from-code (get-number lexer :radix 8 :max-length 3)
                                          old-pos)))
+                  ((#\P #\p)
+                   ;; might be a named property
+                   (cond (*property-resolver* (read-char-property lexer next-char))
+                         (t next-char)))
                  (otherwise
                   ;; in all other cases just unescape the
                   ;; character
@ -622,17 +629,15 @@ closing #\> will also be consumed."
                       ;; or a closing parenthesis are following
                       (when (and flags
                                  (not (find next-char ":)" :test #'char=)))
-                         (signal-ppcre-syntax-error*
-                          (car (lexer-last-pos lexer))
-                          "Sequence '~A' not recognized"
+                         (signal-syntax-error* (car (lexer-last-pos lexer))
+                                               "Sequence '~A' not recognized."
                                               (subseq (lexer-str lexer)
                                                       (car (lexer-last-pos lexer))
                                                       (lexer-pos lexer))))
                       (case next-char
                         ((nil)
                          ;; syntax error
-                          (signal-ppcre-syntax-error
-                           "End of string following '(?'"))
+                          (signal-syntax-error "End of string following '(?'."))
                         ((#\))
                          ;; an empty group except for the flags
                          ;; (if there are any)
@ -664,9 +669,8 @@ closing #\> will also be consumed."
                                  ;; we have encountered a named group
                                  ;; are we supporting register naming?
                                  (unless *allow-named-registers*
-                                    (signal-ppcre-syntax-error*
-                                     (1- (lexer-pos lexer))
-                                     "Character '~A' may not follow '(?<'"
+                                    (signal-syntax-error* (1- (lexer-pos lexer))
+                                                          "Character '~A' may not follow '(?<'."
                                                          next-char))
                                  ;; put the letter back
                                  (decf (lexer-pos lexer))
@ -685,18 +689,15 @@ closing #\> will also be consumed."
                                   :void)
                                  ((nil)
                                   ;; syntax error
-                                   (signal-ppcre-syntax-error
-                                    "End of string following '(?<'"))
+                                   (signal-syntax-error "End of string following '(?<'."))
                                  (t
                                   ;; also syntax error
-                                   (signal-ppcre-syntax-error*
-                                    (1- (lexer-pos lexer))
-                                    "Character '~A' may not follow '(?<'"
+                                   (signal-syntax-error* (1- (lexer-pos lexer))
+                                                         "Character '~A' may not follow '(?<'."
                                                         next-char ))))))
                         (otherwise
-                          (signal-ppcre-syntax-error*
-                           (1- (lexer-pos lexer))
-                           "Character '~A' may not follow '(?'"
+                          (signal-syntax-error* (1- (lexer-pos lexer))
+                                                "Character '~A' may not follow '(?'."
                                                next-char)))))
                    (t
                     ;; if next-char was not #\? (this is within
--- a/load.lisp
+++ b/load.lisp
@ -1,67 +0,0 @@
-;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: CL-USER; Base: 10 -*-
-;;; $Header: /usr/local/cvsrep/cl-ppcre/load.lisp,v 1.16 2008/06/25 14:04:27 edi Exp $
-
-;;; Copyright (c) 2002-2008, Dr. Edmund Weitz.  All rights reserved.
-
-;;; Redistribution and use in source and binary forms, with or without
-;;; modification, are permitted provided that the following conditions
-;;; are met:
-
-;;;   * Redistributions of source code must retain the above copyright
-;;;     notice, this list of conditions and the following disclaimer.
-
-;;;   * Redistributions in binary form must reproduce the above
-;;;     copyright notice, this list of conditions and the following
-;;;     disclaimer in the documentation and/or other materials
-;;;     provided with the distribution.
-
-;;; THIS SOFTWARE IS PROVIDED BY THE AUTHOR 'AS IS' AND ANY EXPRESSED
-;;; OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-;;; WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-;;; ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
-;;; DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-;;; DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
-;;; GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-;;; INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
-;;; WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
-;;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-;;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-(in-package :cl-user)
-
-(let ((cl-ppcre-base-directory
-        (make-pathname :name nil :type nil :version nil
-                       :defaults (parse-namestring *load-truename*)))
-      must-compile)
-  (with-compilation-unit ()
-    (dolist (file '("packages"
-                    "specials"
-                    "util"
-                    "errors"
-                    #-:use-acl-regexp2-engine "lexer"
-                    #-:use-acl-regexp2-engine "parser"
-                    #-:use-acl-regexp2-engine "regex-class"
-                    #-:use-acl-regexp2-engine "convert"
-                    #-:use-acl-regexp2-engine "optimize"
-                    #-:use-acl-regexp2-engine "closures"
-                    #-:use-acl-regexp2-engine "repetition-closures"
-                    #-:use-acl-regexp2-engine "scanner"
-                    "api"
-                    "ppcre-tests"))
-      (let ((pathname (make-pathname :name file :type "lisp" :version nil
-                                     :defaults cl-ppcre-base-directory)))
-        ;; don't use COMPILE-FILE in Corman Lisp, it's broken - LOAD
-        ;; will yield compiled functions anyway
-        #-:cormanlisp
-        (let ((compiled-pathname (compile-file-pathname pathname)))
-          (unless (and (not must-compile)
-                       (probe-file compiled-pathname)
-                       (< (file-write-date pathname)
-                          (file-write-date compiled-pathname)))
-            (setq must-compile t)
-            (compile-file pathname))
-          (setq pathname compiled-pathname))
-        (load pathname)))))
-
-
-
--- a/optimize.lisp
+++ b/optimize.lisp
@ -1,5 +1,5 @@
 ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: CL-PPCRE; Base: 10 -*-
-;;; $Header: /usr/local/cvsrep/cl-ppcre/optimize.lisp,v 1.31 2008/06/25 14:04:27 edi Exp $
+;;; $Header: /usr/local/cvsrep/cl-ppcre/optimize.lisp,v 1.35 2008/07/06 18:12:04 edi Exp $

 ;;; This file contains optimizations which can be applied to converted
 ;;; parse trees.
@ -30,7 +30,7 @@
 ;;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 ;;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

-(in-package #:cl-ppcre)
+(in-package :cl-ppcre)

 (defgeneric flatten (regex)
  (declare #.*standard-optimize-settings*)
@ -96,8 +96,7 @@ operation on REGEX."))
            alternation)
          ((cdr choices)
            (first choices))
-          (t (signal-ppcre-syntax-error
-              "Encountered alternation without choices.")))))
+          (t (signal-syntax-error "Encountered alternation without choices.")))))

 (defmethod flatten ((branch branch))
  (declare #.*standard-optimize-settings*)
@ -143,7 +142,7 @@ operation on REGEX."))
         collector-start
         (collector-length 0)
         skip)
-    (declare (type fixnum collector-length))
+    (declare (fixnum collector-length))
    (loop
      (let ((elements-rest (cdr curr-point)))
        (unless elements-rest
@ -394,7 +393,7 @@ function called by END-STRIN.)"))
        concatenated-string
        concatenated-start
        (concatenated-length 0))
-    (declare (type fixnum concatenated-length))
+    (declare (fixnum concatenated-length))
    (loop for element in (reverse (elements seq))
          ;; remember the case-(in)sensitivity of the last relevant
          ;; STR object
@ -429,7 +428,7 @@ function called by END-STRIN.)"))
                               concatenated-start nil))
                       (let ((len (len element-end))
                             (str (str element-end)))
-                         (declare (type fixnum len))
+                         (declare (fixnum len))
                         (incf concatenated-length len)
                         (loop for i of-type fixnum downfrom (1- len) to 0
                               do (vector-push-extend (char str i)
--- a/packages.lisp
+++ b/packages.lisp
@ -1,5 +1,5 @@
 ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: CL-USER; Base: 10 -*-
-;;; $Header: /usr/local/cvsrep/cl-ppcre/packages.lisp,v 1.24 2008/06/25 14:04:27 edi Exp $
+;;; $Header: /usr/local/cvsrep/cl-ppcre/packages.lisp,v 1.38 2008/07/22 23:54:59 edi Exp $

 ;;; Copyright (c) 2002-2008, Dr. Edmund Weitz. All rights reserved.

@ -29,78 +29,40 @@

 (in-package :cl-user)

-#-:cormanlisp
-(defpackage #:cl-ppcre
-  (:nicknames #:ppcre)
-  #+genera (:shadowing-import-from #:common-lisp #:lambda #:simple-string #:string)
-  (:use #-genera #:cl #+genera #:future-common-lisp)
-  (:export #:create-scanner
-           #:parse-tree-synonym
-           #:define-parse-tree-synonym
-           #:scan
-           #:scan-to-strings
-           #:do-scans
-           #:do-matches
-           #:do-matches-as-strings
-           #:all-matches
-           #:all-matches-as-strings
-           #:split
-           #:regex-replace
-           #:regex-replace-all
-           #:regex-apropos
-           #:regex-apropos-list
-           #:quote-meta-chars
-           #:*regex-char-code-limit*
-           #:*use-bmh-matchers*
-           #:*allow-quoting*
-           #:*allow-named-registers*
-           #:ppcre-error
-           #:ppcre-invocation-error
-           #:ppcre-syntax-error
-           #:ppcre-syntax-error-string
-           #:ppcre-syntax-error-pos
-           #:register-groups-bind
-           #:do-register-groups))
-
-#+:cormanlisp
-(defpackage "CL-PPCRE"
-  (:nicknames "PPCRE")
-  (:use "CL")
-  (:export "CREATE-SCANNER"
-           "PARSE-TREE-SYNONYM"
-           "DEFINE-PARSE-TREE-SYNONYM"
-           "SCAN"
-           "SCAN-TO-STRINGS"
-           "DO-SCANS"
-           "DO-MATCHES"
-           "DO-MATCHES-AS-STRINGS"
-           "ALL-MATCHES"
-           "ALL-MATCHES-AS-STRINGS"
-           "SPLIT"
-           "REGEX-REPLACE"
-           "REGEX-REPLACE-ALL"
-           "REGEX-APROPOS"
-           "REGEX-APROPOS-LIST"
-           "QUOTE-META-CHARS"
-           "*REGEX-CHAR-CODE-LIMIT*"
-           "*USE-BMH-MATCHERS*"
-           "*ALLOW-QUOTING*"
-           "*ALLOW-NAMED-REGISTERS*"
-           "PPCRE-ERROR"
-           "PPCRE-INVOCATION-ERROR"
-           "PPCRE-SYNTAX-ERROR"
-           "PPCRE-SYNTAX-ERROR-STRING"
-           "PPCRE-SYNTAX-ERROR-POS"
-           "REGISTER-GROUPS-BIND"
-           "DO-REGISTER-GROUPS"))
-
-#-:cormanlisp
-(defpackage #:cl-ppcre-test
-  #+genera (:shadowing-import-from #:common-lisp #:lambda)
-  (:use #-genera #:cl #+genera #:future-common-lisp #:cl-ppcre)
-  (:export #:test))
-
-#+:cormanlisp
-(defpackage "CL-PPCRE-TEST"
-  (:use "CL" "CL-PPCRE")
-  (:export "TEST"))
+(defpackage :cl-ppcre
+  (:nicknames :ppcre)
+  #+:genera
+  (:shadowing-import-from :common-lisp :lambda :simple-string :string)
+  (:use #-:genera :cl #+:genera :future-common-lisp)
+  (:shadow :digit-char-p :defconstant)
+  (:export :parse-string
+           :create-scanner
+           :create-optimized-test-function
+           :parse-tree-synonym
+           :define-parse-tree-synonym
+           :scan
+           :scan-to-strings
+           :do-scans
+           :do-matches
+           :do-matches-as-strings
+           :all-matches
+           :all-matches-as-strings
+           :split
+           :regex-replace
+           :regex-replace-all
+           :regex-apropos
+           :regex-apropos-list
+           :quote-meta-chars
+           :*regex-char-code-limit*
+           :*use-bmh-matchers*
+           :*allow-quoting*
+           :*allow-named-registers*
+           :*optimize-char-classes*
+           :*property-resolver*
+           :ppcre-error
+           :ppcre-invocation-error
+           :ppcre-syntax-error
+           :ppcre-syntax-error-string
+           :ppcre-syntax-error-pos
+           :register-groups-bind
+           :do-register-groups))
--- a/parser.lisp
+++ b/parser.lisp
@ -1,11 +1,11 @@
 ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: CL-PPCRE; Base: 10 -*-
-;;; $Header: /usr/local/cvsrep/cl-ppcre/parser.lisp,v 1.25 2008/06/25 14:04:28 edi Exp $
+;;; $Header: /usr/local/cvsrep/cl-ppcre/parser.lisp,v 1.30 2008/07/06 18:12:05 edi Exp $

 ;;; The parser will - with the help of the lexer - parse a regex
 ;;; string and convert it into a "parse tree" (see docs for details
-;;; about the syntax of these trees). Note that the lexer might return
-;;; illegal parse trees. It is assumed that the conversion process
-;;; later on will track them down.
+;;; about the syntax of these trees).  Note that the lexer might
+;;; return illegal parse trees.  It is assumed that the conversion
+;;; process later on will track them down.

 ;;; Copyright (c) 2002-2008, Dr. Edmund Weitz. All rights reserved.

@ -33,10 +33,9 @@
 ;;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 ;;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

-(in-package #:cl-ppcre)
+(in-package :cl-ppcre)

 (defun group (lexer)
-  (declare #.*standard-optimize-settings*)
  "Parses and consumes a <group>.
 The productions are: <group> -> \"\(\"<regex>\")\"
                                \"\(?:\"<regex>\")\"
@ -53,6 +52,7 @@ The productions are: <group> -> \"\(\"<regex>\")\"
 where <flags> is parsed by the lexer function MAYBE-PARSE-FLAGS.
 Will return <parse-tree> or \(<grouping-type> <parse-tree>) where
 <grouping-type> is one of six keywords - see source for details."
+  (declare #.*standard-optimize-settings*)
  (multiple-value-bind (open-token flags)
      (get-token lexer)
    (cond ((eq open-token :open-paren-paren)
@ -65,7 +65,7 @@ Will return <parse-tree> or \(<grouping-type> <parse-tree>) where
                   (number (try-number lexer :no-whitespace-p t))
                   ;; make changes to extended-mode-p local
                   (*extended-mode-p* *extended-mode-p*))
-              (declare (type fixnum open-paren-pos))
+              (declare (fixnum open-paren-pos))
              (cond (number
                      ;; condition is a number (i.e. refers to a
                      ;; back-reference)
@ -73,13 +73,11 @@ Will return <parse-tree> or \(<grouping-type> <parse-tree>) where
                             (reg-expr (reg-expr lexer))
                             (close-token (get-token lexer)))
                        (unless (eq inner-close-token :close-paren)
-                          (signal-ppcre-syntax-error*
-                           (+ open-paren-pos 2)
-                           "Opening paren has no matching closing paren"))
+                          (signal-syntax-error* (+ open-paren-pos 2)
+                                                "Opening paren has no matching closing paren."))
                        (unless (eq close-token :close-paren)
-                          (signal-ppcre-syntax-error*
-                           open-paren-pos
-                           "Opening paren has no matching closing paren"))
+                          (signal-syntax-error* open-paren-pos
+                                                "Opening paren has no matching closing paren."))
                        (list :branch number reg-expr)))
                    (t
                      ;; condition must be a full regex (actually a
@ -94,9 +92,8 @@ Will return <parse-tree> or \(<grouping-type> <parse-tree>) where
                             (reg-expr (reg-expr lexer))
                             (close-token (get-token lexer)))
                        (unless (eq close-token :close-paren)
-                          (signal-ppcre-syntax-error*
-                           open-paren-pos
-                           "Opening paren has no matching closing paren"))
+                          (signal-syntax-error* open-paren-pos
+                                                "Opening paren has no matching closing paren."))
                        (list :branch inner-reg-expr reg-expr))))))
          ((member open-token '(:open-paren
                                :open-paren-colon
@ -124,9 +121,8 @@ Will return <parse-tree> or \(<grouping-type> <parse-tree>) where
                (unless (eq close-token :close-paren)
                  ;; the token following <regex> must be the closing
                  ;; parenthesis or this is a syntax error
-                  (signal-ppcre-syntax-error*
-                   open-paren-pos
-                   "Opening paren has no matching closing paren"))
+                  (signal-syntax-error* open-paren-pos
+                                        "Opening paren has no matching closing paren."))
                (if flags
                  ;; if the lexer has returned a list of flags this must
                  ;; have been the "(?:"<regex>")" production
@ -160,11 +156,11 @@ Will return <parse-tree> or \(<grouping-type> <parse-tree>) where
           open-token))))

 (defun greedy-quant (lexer)
-  (declare #.*standard-optimize-settings*)
  "Parses and consumes a <greedy-quant>.
 The productions are: <greedy-quant> -> <group> | <group><quantifier>
 where <quantifier> is parsed by the lexer function GET-QUANTIFIER.
 Will return <parse-tree> or (:GREEDY-REPETITION <min> <max> <parse-tree>)."
+  (declare #.*standard-optimize-settings*)
  (let* ((group (group lexer))
         (token (get-quantifier lexer)))
    (if token
@ -174,11 +170,11 @@ Will return <parse-tree> or (:GREEDY-REPETITION <min> <max> <parse-tree>)."
      group)))

 (defun quant (lexer)
-  (declare #.*standard-optimize-settings*)
  "Parses and consumes a <quant>.
 The productions are: <quant> -> <greedy-quant> | <greedy-quant>\"?\".
 Will return the <parse-tree> returned by GREEDY-QUANT and optionally
 change :GREEDY-REPETITION to :NON-GREEDY-REPETITION."
+  (declare #.*standard-optimize-settings*)
  (let* ((greedy-quant (greedy-quant lexer))
         (pos (lexer-pos lexer))
         (next-char (next-char lexer)))
@ -189,10 +185,10 @@ change :GREEDY-REPETITION to :NON-GREEDY-REPETITION."
    greedy-quant))

 (defun seq (lexer)
-  (declare #.*standard-optimize-settings*)
  "Parses and consumes a <seq>.
 The productions are: <seq> -> <quant> | <quant><seq>.
 Will return <parse-tree> or (:SEQUENCE <parse-tree> <parse-tree>)."
+  (declare #.*standard-optimize-settings*)
  (flet ((make-array-from-two-chars (char1 char2)
           (let ((string (make-array 2
                                     :element-type 'character
@ -254,10 +250,10 @@ Will return <parse-tree> or (:SEQUENCE <parse-tree> <parse-tree>)."
      :void)))
  
 (defun reg-expr (lexer)
-  (declare #.*standard-optimize-settings*)
  "Parses and consumes a <regex>, a complete regular expression.
 The productions are: <regex> -> <seq> | <seq>\"|\"<regex>.
 Will return <parse-tree> or (:ALTERNATION <parse-tree> <parse-tree>)."
+  (declare #.*standard-optimize-settings*)
  (let ((pos (lexer-pos lexer)))
    (case (next-char lexer)
      ((nil)
@ -299,6 +295,8 @@ Will return <parse-tree> or (:ALTERNATION <parse-tree> <parse-tree>)."
              seq)))))))

 (defun reverse-strings (parse-tree)
+  "Recursively walks through PARSE-TREE and destructively reverses all
+strings in it."
  (declare #.*standard-optimize-settings*)
  (cond ((stringp parse-tree)
          (nreverse parse-tree))
@ -311,13 +309,11 @@ Will return <parse-tree> or (:ALTERNATION <parse-tree> <parse-tree>)."
        (t parse-tree)))

 (defun parse-string (string)
-  (declare #.*standard-optimize-settings*)
  "Translate the regex string STRING into a parse tree."
+  (declare #.*standard-optimize-settings*)
  (let* ((lexer (make-lexer string))
         (parse-tree (reverse-strings (reg-expr lexer))))
    ;; check whether we've consumed the whole regex string
    (if (end-of-string-p lexer)
      parse-tree
-      (signal-ppcre-syntax-error*
-       (lexer-pos lexer)
-       "Expected end of string"))))
+      (signal-syntax-error* (lexer-pos lexer) "Expected end of string."))))
--- a/ppcre-tests.lisp
+++ b/ppcre-tests.lisp
@ -1,269 +0,0 @@
-;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: CL-PPCRE-TEST; Base: 10 -*-
-;;; $Header: /usr/local/cvsrep/cl-ppcre/ppcre-tests.lisp,v 1.36 2008/06/25 14:04:28 edi Exp $
-
-;;; Copyright (c) 2002-2008, Dr. Edmund Weitz. All rights reserved.
-
-;;; Redistribution and use in source and binary forms, with or without
-;;; modification, are permitted provided that the following conditions
-;;; are met:
-
-;;;   * Redistributions of source code must retain the above copyright
-;;;     notice, this list of conditions and the following disclaimer.
-
-;;;   * Redistributions in binary form must reproduce the above
-;;;     copyright notice, this list of conditions and the following
-;;;     disclaimer in the documentation and/or other materials
-;;;     provided with the distribution.
-
-;;; THIS SOFTWARE IS PROVIDED BY THE AUTHOR 'AS IS' AND ANY EXPRESSED
-;;; OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-;;; WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-;;; ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
-;;; DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-;;; DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
-;;; GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-;;; INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
-;;; WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
-;;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-;;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-(in-package #:cl-ppcre-test)
-
-(defparameter *cl-ppcre-test-base-directory*
-  (make-pathname :name nil :type nil :version nil
-                 :defaults (parse-namestring *load-truename*)))
-
-(defun full-gc ()
-  "Start a full garbage collection."
-  ;; what are the corresponding values for MCL and OpenMCL?
-  #+:allegro (excl:gc t)
-  #+(or :cmu :scl) (ext:gc :full t)
-  #+:ecl (si:gc t)
-  #+:clisp (ext:gc)
-  #+:cormanlisp (loop for i from 0 to 3 do (cormanlisp:gc i))
-  #+:lispworks4 (hcl:mark-and-sweep 3)
-  #+:lispworks5 (hcl:gc-generation #+:lispworks-32bit 3 #+:lispworks-64bit :blocking-gen-num)
-  #+:sbcl (sb-ext:gc :full t))
-
-;; warning: ugly code ahead!!
-;; this is just a quick hack for testing purposes
-
-(defun time-regex (factor regex string
-                          &key case-insensitive-mode
-                               multi-line-mode
-                               single-line-mode
-                               extended-mode)
-  (declare #.ppcre::*standard-optimize-settings*)
-  "Auxiliary function used by TEST to benchmark a regex scanner
-against Perl timings."
-  (declare (type string string))
-  (let* ((scanner (create-scanner regex
-                                  :case-insensitive-mode case-insensitive-mode
-                                  :multi-line-mode multi-line-mode
-                                  :single-line-mode single-line-mode
-                                  :extended-mode extended-mode))
-         ;; make sure GC doesn't invalidate our benchmarking
-         (dummy (full-gc))
-         (start (get-internal-real-time)))
-    (declare (ignore dummy))
-    (dotimes (i factor)
-      (funcall scanner string 0 (length string)))
-    (float (/ (- (get-internal-real-time) start) internal-time-units-per-second))))
-
-#+(or scl
-      lispworks
-      (and sbcl sb-thread))
-(defun threaded-scan (scanner target-string &key (threads 10) (repetitions 5000))
-  (declare #.ppcre::*standard-optimize-settings*)
-  "Auxiliary function used by TEST to check whether SCANNER is thread-safe."
-  (full-gc)
-  (let ((collector (make-array threads))
-        (counter 0))
-    (loop for i below threads
-          do (let* ((j i)
-                    (fn
-                      (lambda ()
-                        (let ((r (random repetitions)))
-                          (loop for k below repetitions
-                                if (= k r)
-                                  do (setf (aref collector j)
-                                             (let ((result
-                                                     (multiple-value-list
-                                                       (cl-ppcre:scan scanner target-string))))
-                                               (unless (cdr result)
-                                                 (setq result '(nil nil #() #())))
-                                               result))
-                                else
-                                  do (cl-ppcre:scan scanner target-string))
-                          (incf counter)))))
-               #+scl (thread:thread-create fn)
-               #+lispworks (mp:process-run-function "" nil fn)
-               #+(and sbcl sb-thread) (sb-thread:make-thread fn)))
-    (loop while (< counter threads)
-          do (sleep .1))
-    (destructuring-bind (first-start first-end first-reg-starts first-reg-ends)
-        (aref collector 0)
-      (loop for (start end reg-starts reg-ends) across collector
-            if (or (not (eql first-start start))
-                   (not (eql first-end end))
-                   (/= (length first-reg-starts) (length reg-starts))
-                   (/= (length first-reg-ends) (length reg-ends))
-                   (loop for first-reg-start across first-reg-starts
-                         for reg-start across reg-starts
-                         thereis (not (eql first-reg-start reg-start)))
-                   (loop for first-reg-end across first-reg-ends
-                         for reg-end across reg-ends
-                         thereis (not (eql first-reg-end reg-end))))
-            do (return (format nil "~&Inconsistent results during multi-threading"))))))
-
-(defun create-string-from-input (input)
-  (cond ((or (null input)
-             (stringp input))
-          input)
-        (t
-          (cl-ppcre::string-list-to-simple-string
-           (loop for element in input
-                 if (stringp element)
-                 collect element
-                 else
-                 collect (string (code-char element)))))))
-
-(defun test (&key (file-name 
-                   (make-pathname :name "testdata"
-                                  :type nil :version nil
-                                  :defaults *cl-ppcre-test-base-directory*)
-                   file-name-provided-p)
-                  threaded)
-  (declare #.ppcre::*standard-optimize-settings*)
-  (declare (ignorable threaded))
-  "Loop through all test cases in FILE-NAME and print report. Only in
-LispWorks and SCL: If THREADED is true, also test whether the scanners
-work multi-threaded."
-  (with-open-file (stream file-name
-                          #+(or :allegro :clisp :scl :sbcl)
-                          :external-format
-                          #+(or :allegro :clisp :scl :sbcl)
-                          (if file-name-provided-p
-                            :default
-                            #+(or :allegro :scl :sbcl) :iso-8859-1
-                            #+:clisp charset:iso-8859-1))
-    (loop with testcount of-type fixnum = 0
-          with *regex-char-code-limit* = (if file-name-provided-p
-                                           *regex-char-code-limit*
-                                           ;; the standard test suite
-                                           ;; doesn't need Unicode
-                                           ;; support
-                                           256)
-          with *allow-quoting* = (if file-name-provided-p
-                                   *allow-quoting*
-                                   t)
-          for input-line = (read stream nil nil)
-          for (counter info-string regex
-                       case-insensitive-mode multi-line-mode
-                       single-line-mode extended-mode
-                       string perl-error factor
-                       perl-time ex-result ex-subs) = input-line
-          while input-line
-          do (let ((info-string (create-string-from-input info-string))
-                   (regex (create-string-from-input regex))
-                   (string (create-string-from-input string))
-                   (ex-result (create-string-from-input ex-result))
-                   (ex-subs (mapcar #'create-string-from-input ex-subs))
-                   (errors '()))
-               ;; provide some visual feedback for slow CL
-               ;; implementations; suggested by JP Massar
-               (incf testcount)
-               #+(or scl
-                     lispworks
-                     (and sbcl sb-thread))
-               (when threaded
-                 (format t "Test #~A (ID ~A)~%" testcount counter)
-                 (force-output))
-               (unless #-(or scl
-                             lispworks
-                             (and sbcl sb-thread))
-                       nil
-                       #+(or scl
-                             lispworks
-                             (and sbcl sb-thread))
-                       threaded
-                 (when (zerop (mod testcount 10))
-                   (format t ".")
-                   (force-output))
-                 (when (zerop (mod testcount 100))
-                   (terpri)))
-               (handler-case
-                 (let* ((*use-bmh-matchers* (if (and (> factor 1) (plusp perl-time))
-                                              *use-bmh-matchers*
-                                              ;; if we only check for
-                                              ;; correctness we don't
-                                              ;; care about speed that
-                                              ;; match (but rather
-                                              ;; about space
-                                              ;; constraints of the
-                                              ;; trial versions)
-                                              nil))
-                        (scanner (create-scanner regex
-                                                :case-insensitive-mode case-insensitive-mode
-                                                :multi-line-mode multi-line-mode
-                                                :single-line-mode single-line-mode
-                                                :extended-mode extended-mode)))
-                   (multiple-value-bind (result1 result2 sub-starts sub-ends)
-                       (scan scanner string)
-                     (cond (perl-error
-                             (push (format nil
-                                           "~&expected an error but got a result")
-                                   errors))
-                           (t
-                             (when (not (eq result1 ex-result))
-                               (if result1
-                                 (let ((result (subseq string result1 result2)))
-                                   (unless (string= result ex-result)
-                                     (push (format nil
-                                                   "~&expected ~S but got ~S"
-                                                   ex-result result)
-                                           errors))
-                                   (setq sub-starts (coerce sub-starts 'list)
-                                         sub-ends (coerce sub-ends 'list))
-                                   (loop for i from 0
-                                         for ex-sub in ex-subs
-                                         for sub-start = (nth i sub-starts)
-                                         for sub-end = (nth i sub-ends)
-                                         for sub = (if (and sub-start sub-end)
-                                                     (subseq string sub-start sub-end)
-                                                     nil)
-                                         unless (string= ex-sub sub)
-                                           do (push (format nil
-                                                            "~&\\~A: expected ~S but got ~S"
-                                                            (1+ i) ex-sub sub) errors)))
-                                 (push (format nil
-                                               "~&expected ~S but got ~S"
-                                               ex-result result1)
-                                       errors)))))
-                     #+(or scl
-                           lispworks
-                           (and sbcl sb-thread))
-                     (when threaded
-                       (let ((thread-result (threaded-scan scanner string)))
-                         (when thread-result
-                         (push thread-result errors))))))
-                 (condition (msg)
-                   (unless perl-error
-                     (push (format nil "~&got an unexpected error: '~A'" msg)
-                           errors))))
-               (setq errors (nreverse errors))
-               (cond (errors
-                       (when (or (<= factor 1) (zerop perl-time))
-                         (format t "~&~4@A (~A):~{~&   ~A~}~%"
-                                 counter info-string errors)))
-                     ((and (> factor 1) (plusp perl-time))
-                       (let ((result (time-regex factor regex string
-                                                 :case-insensitive-mode case-insensitive-mode
-                                                 :multi-line-mode multi-line-mode
-                                                 :single-line-mode single-line-mode
-                                                 :extended-mode extended-mode)))
-                         (format t "~&~4@A: ~,4F (~A repetitions, Perl: ~,4F seconds, CL-PPCRE: ~,4F seconds)" counter
-                                 (float (/ result perl-time)) factor perl-time result)
-                         #+:cormanlisp (force-output *standard-output*)))
-                     (t nil))))
-    (values)))
--- a/regex-class-util.lisp
+++ b/regex-class-util.lisp
@ -0,0 +1,555 @@
+;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: CL-PPCRE; Base: 10 -*-
+;;; $Header: /usr/local/cvsrep/cl-ppcre/regex-class-util.lisp,v 1.8 2008/07/22 22:38:05 edi Exp $
+
+;;; This file contains some utility methods for REGEX objects.
+
+;;; Copyright (c) 2002-2008, Dr. Edmund Weitz. All rights reserved.
+
+;;; Redistribution and use in source and binary forms, with or without
+;;; modification, are permitted provided that the following conditions
+;;; are met:
+
+;;;   * Redistributions of source code must retain the above copyright
+;;;     notice, this list of conditions and the following disclaimer.
+
+;;;   * Redistributions in binary form must reproduce the above
+;;;     copyright notice, this list of conditions and the following
+;;;     disclaimer in the documentation and/or other materials
+;;;     provided with the distribution.
+
+;;; THIS SOFTWARE IS PROVIDED BY THE AUTHOR 'AS IS' AND ANY EXPRESSED
+;;; OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+;;; WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+;;; ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+;;; DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+;;; DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
+;;; GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+;;; INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+;;; WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+;;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+;;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+(in-package :cl-ppcre)
+
+;;; The following four methods allow a VOID object to behave like a
+;;; zero-length STR object (only readers needed)
+
+(defmethod len ((void void))
+  (declare #.*standard-optimize-settings*)
+  0)
+
+(defmethod str ((void void))
+  (declare #.*standard-optimize-settings*)
+  "")
+
+(defmethod skip ((void void))
+  (declare #.*standard-optimize-settings*)
+  nil)
+
+(defmethod start-of-end-string-p ((void void))
+  (declare #.*standard-optimize-settings*)
+  nil)
+
+(defgeneric case-mode (regex old-case-mode)
+  (declare #.*standard-optimize-settings*)
+  (:documentation "Utility function used by the optimizer (see GATHER-STRINGS).
+Returns a keyword denoting the case-(in)sensitivity of a STR or its
+second argument if the STR has length 0. Returns NIL for REGEX objects
+which are not of type STR."))
+
+(defmethod case-mode ((str str) old-case-mode)
+  (declare #.*standard-optimize-settings*)
+  (cond ((zerop (len str))
+          old-case-mode)
+        ((case-insensitive-p str)
+          :case-insensitive)
+        (t
+          :case-sensitive)))
+
+(defmethod case-mode ((regex regex) old-case-mode)
+  (declare #.*standard-optimize-settings*)
+  (declare (ignore old-case-mode))
+  nil)
+
+(defgeneric copy-regex (regex)
+  (declare #.*standard-optimize-settings*)
+  (:documentation "Implements a deep copy of a REGEX object."))
+
+(defmethod copy-regex ((anchor anchor))
+  (declare #.*standard-optimize-settings*)
+  (make-instance 'anchor
+                 :startp (startp anchor)
+                 :multi-line-p (multi-line-p anchor)
+                 :no-newline-p (no-newline-p anchor)))
+
+(defmethod copy-regex ((everything everything))
+  (declare #.*standard-optimize-settings*)
+  (make-instance 'everything
+                 :single-line-p (single-line-p everything)))
+
+(defmethod copy-regex ((word-boundary word-boundary))
+  (declare #.*standard-optimize-settings*)
+  (make-instance 'word-boundary
+                 :negatedp (negatedp word-boundary)))
+
+(defmethod copy-regex ((void void))
+  (declare #.*standard-optimize-settings*)
+  (make-instance 'void))
+
+(defmethod copy-regex ((lookahead lookahead))
+  (declare #.*standard-optimize-settings*)
+  (make-instance 'lookahead
+                 :regex (copy-regex (regex lookahead))
+                 :positivep (positivep lookahead)))
+
+(defmethod copy-regex ((seq seq))
+  (declare #.*standard-optimize-settings*)
+  (make-instance 'seq
+                 :elements (mapcar #'copy-regex (elements seq))))
+
+(defmethod copy-regex ((alternation alternation))
+  (declare #.*standard-optimize-settings*)
+  (make-instance 'alternation
+                 :choices (mapcar #'copy-regex (choices alternation))))
+
+(defmethod copy-regex ((branch branch))
+  (declare #.*standard-optimize-settings*)
+  (with-slots (test)
+      branch
+    (make-instance 'branch
+                   :test (if (typep test 'regex)
+                           (copy-regex test)
+                           test)
+                   :then-regex (copy-regex (then-regex branch))
+                   :else-regex (copy-regex (else-regex branch)))))
+
+(defmethod copy-regex ((lookbehind lookbehind))
+  (declare #.*standard-optimize-settings*)
+  (make-instance 'lookbehind
+                 :regex (copy-regex (regex lookbehind))
+                 :positivep (positivep lookbehind)
+                 :len (len lookbehind)))
+
+(defmethod copy-regex ((repetition repetition))
+  (declare #.*standard-optimize-settings*)
+  (make-instance 'repetition
+                 :regex (copy-regex (regex repetition))
+                 :greedyp (greedyp repetition)
+                 :minimum (minimum repetition)
+                 :maximum (maximum repetition)
+                 :min-len (min-len repetition)
+                 :len (len repetition)
+                 :contains-register-p (contains-register-p repetition)))
+
+(defmethod copy-regex ((register register))
+  (declare #.*standard-optimize-settings*)
+  (make-instance 'register
+                 :regex (copy-regex (regex register))
+                 :num (num register)
+                 :name (name register)))
+
+(defmethod copy-regex ((standalone standalone))
+  (declare #.*standard-optimize-settings*)
+  (make-instance 'standalone
+                 :regex (copy-regex (regex standalone))))
+
+(defmethod copy-regex ((back-reference back-reference))
+  (declare #.*standard-optimize-settings*)
+  (make-instance 'back-reference
+                 :num (num back-reference)
+                 :case-insensitive-p (case-insensitive-p back-reference)))
+
+(defmethod copy-regex ((char-class char-class))
+  (declare #.*standard-optimize-settings*)
+  (make-instance 'char-class
+                 :test-function (test-function char-class)))
+
+(defmethod copy-regex ((str str))
+  (declare #.*standard-optimize-settings*)
+  (make-instance 'str
+                 :str (str str)
+                 :case-insensitive-p (case-insensitive-p str)))
+
+(defmethod copy-regex ((filter filter))
+  (declare #.*standard-optimize-settings*)
+  (make-instance 'filter
+                 :fn (fn filter)
+                 :len (len filter)))
+
+;;; Note that COPY-REGEX and REMOVE-REGISTERS could have easily been
+;;; wrapped into one function. Maybe in the next release...
+
+;;; Further note that this function is used by CONVERT to factor out
+;;; complicated repetitions, i.e. cases like
+;;;   (a)* -> (?:a*(a))?
+;;; This won't work for, say,
+;;;   ((a)|(b))* -> (?:(?:a|b)*((a)|(b)))?
+;;; and therefore we stop REGISTER removal once we see an ALTERNATION.
+
+(defgeneric remove-registers (regex)
+  (declare #.*standard-optimize-settings*)
+  (:documentation "Returns a deep copy of a REGEX (see COPY-REGEX) and
+optionally removes embedded REGISTER objects if possible and if the
+special variable REMOVE-REGISTERS-P is true."))
+
+(defmethod remove-registers ((register register))
+  (declare #.*standard-optimize-settings*)
+  (declare (special remove-registers-p reg-seen))
+  (cond (remove-registers-p
+          (remove-registers (regex register)))
+        (t
+          ;; mark REG-SEEN as true so enclosing REPETITION objects
+          ;; (see method below) know if they contain a register or not
+          (setq reg-seen t)
+          (copy-regex register))))
+
+(defmethod remove-registers ((repetition repetition))
+  (declare #.*standard-optimize-settings*)
+  (let* (reg-seen
+         (inner-regex (remove-registers (regex repetition))))
+    ;; REMOVE-REGISTERS will set REG-SEEN (see method above) if
+    ;; (REGEX REPETITION) contains a REGISTER
+    (declare (special reg-seen))
+    (make-instance 'repetition
+                   :regex inner-regex
+                   :greedyp (greedyp repetition)
+                   :minimum (minimum repetition)
+                   :maximum (maximum repetition)
+                   :min-len (min-len repetition)
+                   :len (len repetition)
+                   :contains-register-p reg-seen)))
+
+(defmethod remove-registers ((standalone standalone))
+  (declare #.*standard-optimize-settings*)
+  (make-instance 'standalone
+                 :regex (remove-registers (regex standalone))))
+
+(defmethod remove-registers ((lookahead lookahead))
+  (declare #.*standard-optimize-settings*)
+  (make-instance 'lookahead
+                 :regex (remove-registers (regex lookahead))
+                 :positivep (positivep lookahead)))
+
+(defmethod remove-registers ((lookbehind lookbehind))
+  (declare #.*standard-optimize-settings*)
+  (make-instance 'lookbehind
+                 :regex (remove-registers (regex lookbehind))
+                 :positivep (positivep lookbehind)
+                 :len (len lookbehind)))
+
+(defmethod remove-registers ((branch branch))
+  (declare #.*standard-optimize-settings*)
+  (with-slots (test)
+      branch
+    (make-instance 'branch
+                   :test (if (typep test 'regex)
+                           (remove-registers test)
+                           test)
+                   :then-regex (remove-registers (then-regex branch))
+                   :else-regex (remove-registers (else-regex branch)))))
+
+(defmethod remove-registers ((alternation alternation))
+  (declare #.*standard-optimize-settings*)
+  (declare (special remove-registers-p))
+  ;; an ALTERNATION, so we can't remove REGISTER objects further down
+  (setq remove-registers-p nil)
+  (copy-regex alternation))
+
+(defmethod remove-registers ((regex regex))
+  (declare #.*standard-optimize-settings*)
+  (copy-regex regex))
+
+(defmethod remove-registers ((seq seq))
+  (declare #.*standard-optimize-settings*)
+  (make-instance 'seq
+                 :elements (mapcar #'remove-registers (elements seq))))
+
+(defgeneric everythingp (regex)
+  (declare #.*standard-optimize-settings*)
+  (:documentation "Returns an EVERYTHING object if REGEX is equivalent
+to this object, otherwise NIL.  So, \"(.){1}\" would return true
+\(i.e. the object corresponding to \".\", for example."))
+
+(defmethod everythingp ((seq seq))
+  (declare #.*standard-optimize-settings*)
+  ;; we might have degenerate cases like (:SEQUENCE :VOID ...)
+  ;; due to the parsing process
+  (let ((cleaned-elements (remove-if #'(lambda (element)
+                                         (typep element 'void))
+                                     (elements seq))))
+    (and (= 1 (length cleaned-elements))
+         (everythingp (first cleaned-elements)))))
+
+(defmethod everythingp ((alternation alternation))
+  (declare #.*standard-optimize-settings*)
+  (with-slots (choices)
+      alternation
+    (and (= 1 (length choices))
+         ;; this is unlikely to happen for human-generated regexes,
+         ;; but machine-generated ones might look like this
+         (everythingp (first choices)))))
+
+(defmethod everythingp ((repetition repetition))
+  (declare #.*standard-optimize-settings*)
+  (with-slots (maximum minimum regex)
+      repetition
+    (and maximum
+         (= 1 minimum maximum)
+         ;; treat "<regex>{1,1}" like "<regex>"
+         (everythingp regex))))
+
+(defmethod everythingp ((register register))
+  (declare #.*standard-optimize-settings*)
+  (everythingp (regex register)))
+
+(defmethod everythingp ((standalone standalone))
+  (declare #.*standard-optimize-settings*)
+  (everythingp (regex standalone)))
+
+(defmethod everythingp ((everything everything))
+  (declare #.*standard-optimize-settings*)
+  everything)
+
+(defmethod everythingp ((regex regex))
+  (declare #.*standard-optimize-settings*)
+  ;; the general case for ANCHOR, BACK-REFERENCE, BRANCH, CHAR-CLASS,
+  ;; LOOKAHEAD, LOOKBEHIND, STR, VOID, FILTER, and WORD-BOUNDARY
+  nil)
+
+(defgeneric regex-length (regex)
+  (declare #.*standard-optimize-settings*)
+  (:documentation "Return the length of REGEX if it is fixed, NIL otherwise."))
+
+(defmethod regex-length ((seq seq))
+  (declare #.*standard-optimize-settings*)
+  ;; simply add all inner lengths unless one of them is NIL
+  (loop for sub-regex in (elements seq)
+        for len = (regex-length sub-regex)
+        if (not len) do (return nil)
+        sum len))
+
+(defmethod regex-length ((alternation alternation))
+  (declare #.*standard-optimize-settings*)
+  ;; only return a true value if all inner lengths are non-NIL and
+  ;; mutually equal
+  (loop for sub-regex in (choices alternation)
+        for old-len = nil then len
+        for len = (regex-length sub-regex)
+        if (or (not len)
+               (and old-len (/= len old-len))) do (return nil)
+        finally (return len)))
+
+(defmethod regex-length ((branch branch))
+  (declare #.*standard-optimize-settings*)
+  ;; only return a true value if both alternations have a length and
+  ;; if they're equal
+  (let ((then-length (regex-length (then-regex branch))))
+    (and then-length
+         (eql then-length (regex-length (else-regex branch)))
+         then-length)))
+
+(defmethod regex-length ((repetition repetition))
+  (declare #.*standard-optimize-settings*)
+  ;; we can only compute the length of a REPETITION object if the
+  ;; number of repetitions is fixed; note that we don't call
+  ;; REGEX-LENGTH for the inner regex, we assume that the LEN slot is
+  ;; always set correctly
+  (with-slots (len minimum maximum)
+      repetition
+    (if (and len
+             (eql minimum maximum))
+      (* minimum len)
+      nil)))
+
+(defmethod regex-length ((register register))
+  (declare #.*standard-optimize-settings*)
+  (regex-length (regex register)))
+
+(defmethod regex-length ((standalone standalone))
+  (declare #.*standard-optimize-settings*)
+  (regex-length (regex standalone)))
+
+(defmethod regex-length ((back-reference back-reference))
+  (declare #.*standard-optimize-settings*)
+  ;; with enough effort we could possibly do better here, but
+  ;; currently we just give up and return NIL
+  nil)
+    
+(defmethod regex-length ((char-class char-class))
+  (declare #.*standard-optimize-settings*)
+  1)
+
+(defmethod regex-length ((everything everything))
+  (declare #.*standard-optimize-settings*)
+  1)
+
+(defmethod regex-length ((str str))
+  (declare #.*standard-optimize-settings*)
+  (len str))
+
+(defmethod regex-length ((filter filter))
+  (declare #.*standard-optimize-settings*)
+  (len filter))
+
+(defmethod regex-length ((regex regex))
+  (declare #.*standard-optimize-settings*)
+  ;; the general case for ANCHOR, LOOKAHEAD, LOOKBEHIND, VOID, and
+  ;; WORD-BOUNDARY (which all have zero-length)
+  0)
+
+(defgeneric regex-min-length (regex)
+  (declare #.*standard-optimize-settings*)
+  (:documentation "Returns the minimal length of REGEX."))
+
+(defmethod regex-min-length ((seq seq))
+  (declare #.*standard-optimize-settings*)
+  ;; simply add all inner minimal lengths
+  (loop for sub-regex in (elements seq)
+        for len = (regex-min-length sub-regex)
+        sum len))
+
+(defmethod regex-min-length ((alternation alternation))
+  (declare #.*standard-optimize-settings*)
+  ;; minimal length of an alternation is the minimal length of the
+  ;; "shortest" element
+  (loop for sub-regex in (choices alternation)
+        for len = (regex-min-length sub-regex)
+        minimize len))
+
+(defmethod regex-min-length ((branch branch))
+  (declare #.*standard-optimize-settings*)
+  ;; minimal length of both alternations
+  (min (regex-min-length (then-regex branch))
+       (regex-min-length (else-regex branch))))
+
+(defmethod regex-min-length ((repetition repetition))
+  (declare #.*standard-optimize-settings*)
+  ;; obviously the product of the inner minimal length and the minimal
+  ;; number of repetitions
+  (* (minimum repetition) (min-len repetition)))
+    
+(defmethod regex-min-length ((register register))
+  (declare #.*standard-optimize-settings*)
+  (regex-min-length (regex register)))
+    
+(defmethod regex-min-length ((standalone standalone))
+  (declare #.*standard-optimize-settings*)
+  (regex-min-length (regex standalone)))
+    
+(defmethod regex-min-length ((char-class char-class))
+  (declare #.*standard-optimize-settings*)
+  1)
+
+(defmethod regex-min-length ((everything everything))
+  (declare #.*standard-optimize-settings*)
+  1)
+
+(defmethod regex-min-length ((str str))
+  (declare #.*standard-optimize-settings*)
+  (len str))
+    
+(defmethod regex-min-length ((filter filter))
+  (declare #.*standard-optimize-settings*)
+  (or (len filter)
+      0))
+
+(defmethod regex-min-length ((regex regex))
+  (declare #.*standard-optimize-settings*)
+  ;; the general case for ANCHOR, BACK-REFERENCE, LOOKAHEAD,
+  ;; LOOKBEHIND, VOID, and WORD-BOUNDARY
+  0)
+
+(defgeneric compute-offsets (regex start-pos)
+  (declare #.*standard-optimize-settings*)
+  (:documentation "Returns the offset the following regex would have
+relative to START-POS or NIL if we can't compute it. Sets the OFFSET
+slot of REGEX to START-POS if REGEX is a STR. May also affect OFFSET
+slots of STR objects further down the tree."))
+
+;; note that we're actually only interested in the offset of
+;; "top-level" STR objects (see ADVANCE-FN in the SCAN function) so we
+;; can stop at variable-length alternations and don't need to descend
+;; into repetitions
+
+(defmethod compute-offsets ((seq seq) start-pos)
+  (declare #.*standard-optimize-settings*)
+  (loop for element in (elements seq)
+        ;; advance offset argument for next call while looping through
+        ;; the elements
+        for pos = start-pos then curr-offset
+        for curr-offset = (compute-offsets element pos)
+        while curr-offset
+        finally (return curr-offset)))
+
+(defmethod compute-offsets ((alternation alternation) start-pos)
+  (declare #.*standard-optimize-settings*)
+  (loop for choice in (choices alternation)
+        for old-offset = nil then curr-offset
+        for curr-offset = (compute-offsets choice start-pos)
+        ;; we stop immediately if two alternations don't result in the
+        ;; same offset
+        if (or (not curr-offset)
+               (and old-offset (/= curr-offset old-offset)))
+          do (return nil)
+        finally (return curr-offset)))
+
+(defmethod compute-offsets ((branch branch) start-pos)
+  (declare #.*standard-optimize-settings*)
+  ;; only return offset if both alternations have equal value
+  (let ((then-offset (compute-offsets (then-regex branch) start-pos)))
+    (and then-offset
+         (eql then-offset (compute-offsets (else-regex branch) start-pos))
+         then-offset)))
+
+(defmethod compute-offsets ((repetition repetition) start-pos)
+  (declare #.*standard-optimize-settings*)
+  ;; no need to descend into the inner regex
+  (with-slots (len minimum maximum)
+      repetition
+    (if (and len
+             (eq minimum maximum))
+      ;; fixed number of repetitions, so we know how to proceed
+      (+ start-pos (* minimum len))
+      ;; otherwise return NIL
+      nil)))
+
+(defmethod compute-offsets ((register register) start-pos)
+  (declare #.*standard-optimize-settings*)
+  (compute-offsets (regex register) start-pos))
+    
+(defmethod compute-offsets ((standalone standalone) start-pos)
+  (declare #.*standard-optimize-settings*)
+  (compute-offsets (regex standalone) start-pos))
+    
+(defmethod compute-offsets ((char-class char-class) start-pos)
+  (declare #.*standard-optimize-settings*)
+  (1+ start-pos))
+    
+(defmethod compute-offsets ((everything everything) start-pos)
+  (declare #.*standard-optimize-settings*)
+  (1+ start-pos))
+    
+(defmethod compute-offsets ((str str) start-pos)
+  (declare #.*standard-optimize-settings*)
+  (setf (offset str) start-pos)
+  (+ start-pos (len str)))
+
+(defmethod compute-offsets ((back-reference back-reference) start-pos)
+  (declare #.*standard-optimize-settings*)
+  ;; with enough effort we could possibly do better here, but
+  ;; currently we just give up and return NIL
+  (declare (ignore start-pos))
+  nil)
+
+(defmethod compute-offsets ((filter filter) start-pos)
+  (declare #.*standard-optimize-settings*)
+  (let ((len (len filter)))
+    (if len
+      (+ start-pos len)
+      nil)))
+
+(defmethod compute-offsets ((regex regex) start-pos)
+  (declare #.*standard-optimize-settings*)
+  ;; the general case for ANCHOR, LOOKAHEAD, LOOKBEHIND, VOID, and
+  ;; WORD-BOUNDARY (which all have zero-length)
+  start-pos)
--- a/regex-class.lisp
+++ b/regex-class.lisp
@ -1,9 +1,8 @@
 ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: CL-PPCRE; Base: 10 -*-
-;;; $Header: /usr/local/cvsrep/cl-ppcre/regex-class.lisp,v 1.34 2008/07/03 07:44:06 edi Exp $
+;;; $Header: /usr/local/cvsrep/cl-ppcre/regex-class.lisp,v 1.42 2008/07/22 22:38:05 edi Exp $

-;;; This file defines the REGEX class and some utility methods for
-;;; this class. REGEX objects are used to represent the (transformed)
-;;; parse trees internally
+;;; This file defines the REGEX class.  REGEX objects are used to
+;;; represent the (transformed) parse trees internally

 ;;; Copyright (c) 2002-2008, Dr. Edmund Weitz. All rights reserved.

@ -31,26 +30,20 @@
 ;;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 ;;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

-(in-package #:cl-ppcre)
+(in-package :cl-ppcre)

-;; Genera need the eval-when, here, or the types created by the class
-;; definitions aren't seen by the typep calls later in the file.
-(eval-when (:compile-toplevel :load-toplevel :execute)
-  (locally
-    (declare #.*standard-optimize-settings*)
 (defclass regex ()
  ()
  (:documentation "The REGEX base class.  All other classes inherit
 from this one."))

-
 (defclass seq (regex)
  ((elements :initarg :elements
             :accessor elements
             :type cons
             :documentation "A list of REGEX objects."))
-      (:documentation "SEQ objects represents sequences of
-regexes. (Like \"ab\" is the sequence of \"a\" and \"b\".)"))
+  (:documentation "SEQ objects represents sequences of regexes.
+\(Like \"ab\" is the sequence of \"a\" and \"b\".)"))

 (defclass alternation (regex)
  ((choices :initarg :choices
@ -58,7 +51,7 @@ regexes. (Like \"ab\" is the sequence of \"a\" and \"b\".)"))
            :type cons
            :documentation "A list of REGEX objects"))
  (:documentation "ALTERNATION objects represent alternations of
-regexes. (Like \"a|b\" ist the alternation of \"a\" or \"b\".)"))
+regexes.  \(Like \"a|b\" ist the alternation of \"a\" or \"b\".)"))

 (defclass lookahead (regex)
  ((regex :initarg :regex
@ -79,7 +72,7 @@ regexes. (Like \"a|b\" ist the alternation of \"a\" or \"b\".)"))
   (len :initarg :len
        :accessor len
        :type fixnum
-               :documentation "The (fixed) length of the enclosed regex."))
+        :documentation "The \(fixed) length of the enclosed regex."))
  (:documentation "LOOKBEHIND objects represent look-behind assertions."))

 (defclass repetition (regex)
@ -102,16 +95,17 @@ Can be NIL for unbounded.")
            :documentation "The minimal length of the enclosed regex.")
   (len :initarg :len
        :reader len
-               :documentation "The length of the enclosed regex. NIL
-if unknown.")
+        :documentation "The length of the enclosed regex.  NIL if
+unknown.")
   (min-rest :initform 0
             :accessor min-rest
             :type fixnum
-                    :documentation "The minimal number of characters which must
-appear after this repetition.")
+             :documentation "The minimal number of characters which
+must appear after this repetition.")
   (contains-register-p :initarg :contains-register-p
                        :reader contains-register-p
-                               :documentation "If the regex contains a register."))
+                        :documentation "Whether the regex contains a
+register."))
  (:documentation "REPETITION objects represent repetitions of regexes."))

 (defclass register (regex)
@ -149,24 +143,14 @@ reference refers to or NIL.")
                       :documentation "Whether we check
 case-insensitively."))
  (:documentation "BACK-REFERENCE objects represent backreferences."))
+
 (defclass char-class (regex)
-         ((charset :initarg :charset
-                   :reader charset
-                   :type (or charset null)
-                   :documentation "A charset denoting the characters
-in the character class.")
-          (case-insensitive-p :initarg :case-insensitive-p
-                              :reader case-insensitive-p
-                              :documentation "If the char class
-case-insensitive.")
-          (invertedp :initarg :invertedp
-                     :reader invertedp
-                     :documentation "Whether we mean the inverse of
-the char class.")
-          (word-char-class-p :initarg :word-char-class-p
-                             :reader word-char-class-p
-                             :documentation "Whether this CHAR CLASS
-represents the special class WORD-CHAR-CLASS."))
+  ((test-function :initarg :test-function
+                  :reader test-function
+                  :type (or function symbol nil)
+                  :documentation "A unary function \(accepting a
+character) which stands in for the character class and does the work
+of checking whether a character belongs to the class."))
  (:documentation "CHAR-CLASS objects represent character classes."))

 (defclass str (regex)
@ -202,10 +186,12 @@ STR which starts END-STRING (a slot of MATCHER)."))
           :reader startp
           :documentation "Whether this is a \"start anchor\".")
   (multi-line-p :initarg :multi-line-p
+                 :initform nil
                 :reader multi-line-p
                 :documentation "Whether we're in multi-line mode,
 i.e. whether each #\\Newline is surrounded by anchors.")
   (no-newline-p :initarg :no-newline-p
+                 :initform nil
                 :reader no-newline-p
                 :documentation "Whether we ignore #\\Newline at the end."))
  (:documentation "ANCHOR objects represent anchors like \"^\" or \"$\"."))
@ -255,20 +241,7 @@ defined by the user."))

 (defclass void (regex)
  ()
-      (:documentation "VOID objects represent empty regular expressions."))))
-
-(defmethod initialize-instance :after ((char-class char-class) &rest init-args)
-  (declare #.*standard-optimize-settings*)
-  "Make large charsets smaller, if possible."
-  (let ((set (getf init-args :charset)))
-    (when (and set
-               (> *regex-char-code-limit* 256)
-               (> (charset-count set)
-                  (/ *regex-char-code-limit* 2)))
-      (setf (slot-value char-class 'set)
-            (merge-set (make-charset) set)
-            (slot-value char-class 'invertedp)
-            (not (slot-value char-class 'invertedp))))))
+  (:documentation "VOID objects represent empty regular expressions."))

 (defmethod initialize-instance :after ((str str) &rest init-args)
  (declare #.*standard-optimize-settings*)
@ -279,528 +252,3 @@ defined by the user."))
      (setf (slot-value str 'str) (coerce str-slot 'simple-string))))
  (setf (len str) (length (str str))))

-;;; The following four methods allow a VOID object to behave like a
-;;; zero-length STR object (only readers needed)
-
-(defmethod len ((void void))
-  (declare #.*standard-optimize-settings*)
-  0)
-
-(defmethod str ((void void))
-  (declare #.*standard-optimize-settings*)
-  "")
-
-(defmethod skip ((void void))
-  (declare #.*standard-optimize-settings*)
-  nil)
-
-(defmethod start-of-end-string-p ((void void))
-  (declare #.*standard-optimize-settings*)
-  nil)
-
-(defgeneric case-mode (regex old-case-mode)
-  (declare #.*standard-optimize-settings*)
-  (:documentation "Utility function used by the optimizer (see GATHER-STRINGS).
-Returns a keyword denoting the case-(in)sensitivity of a STR or its
-second argument if the STR has length 0. Returns NIL for REGEX objects
-which are not of type STR."))
-
-(defmethod case-mode ((str str) old-case-mode)
-  (declare #.*standard-optimize-settings*)
-  (cond ((zerop (len str))
-          old-case-mode)
-        ((case-insensitive-p str)
-          :case-insensitive)
-        (t
-          :case-sensitive)))
-
-(defmethod case-mode ((regex regex) old-case-mode)
-  (declare #.*standard-optimize-settings*)
-  (declare (ignore old-case-mode))
-  nil)
-
-(defgeneric copy-regex (regex)
-  (declare #.*standard-optimize-settings*)
-  (:documentation "Implements a deep copy of a REGEX object."))
-
-(defmethod copy-regex ((anchor anchor))
-  (declare #.*standard-optimize-settings*)
-  (make-instance 'anchor
-                 :startp (startp anchor)
-                 :multi-line-p (multi-line-p anchor)
-                 :no-newline-p (no-newline-p anchor)))
-
-(defmethod copy-regex ((everything everything))
-  (declare #.*standard-optimize-settings*)
-  (make-instance 'everything
-                 :single-line-p (single-line-p everything)))
-
-(defmethod copy-regex ((word-boundary word-boundary))
-  (declare #.*standard-optimize-settings*)
-  (make-instance 'word-boundary
-                 :negatedp (negatedp word-boundary)))
-
-(defmethod copy-regex ((void void))
-  (declare #.*standard-optimize-settings*)
-  (make-instance 'void))
-
-(defmethod copy-regex ((lookahead lookahead))
-  (declare #.*standard-optimize-settings*)
-  (make-instance 'lookahead
-                 :regex (copy-regex (regex lookahead))
-                 :positivep (positivep lookahead)))
-
-(defmethod copy-regex ((seq seq))
-  (declare #.*standard-optimize-settings*)
-  (make-instance 'seq
-                 :elements (mapcar #'copy-regex (elements seq))))
-
-(defmethod copy-regex ((alternation alternation))
-  (declare #.*standard-optimize-settings*)
-  (make-instance 'alternation
-                 :choices (mapcar #'copy-regex (choices alternation))))
-
-(defmethod copy-regex ((branch branch))
-  (declare #.*standard-optimize-settings*)
-  (with-slots (test)
-      branch
-    (make-instance 'branch
-                   :test (if (typep test 'regex)
-                           (copy-regex test)
-                           test)
-                   :then-regex (copy-regex (then-regex branch))
-                   :else-regex (copy-regex (else-regex branch)))))
-
-(defmethod copy-regex ((lookbehind lookbehind))
-  (declare #.*standard-optimize-settings*)
-  (make-instance 'lookbehind
-                 :regex (copy-regex (regex lookbehind))
-                 :positivep (positivep lookbehind)
-                 :len (len lookbehind)))
-
-(defmethod copy-regex ((repetition repetition))
-  (declare #.*standard-optimize-settings*)
-  (make-instance 'repetition
-                 :regex (copy-regex (regex repetition))
-                 :greedyp (greedyp repetition)
-                 :minimum (minimum repetition)
-                 :maximum (maximum repetition)
-                 :min-len (min-len repetition)
-                 :len (len repetition)
-                 :contains-register-p (contains-register-p repetition)))
-
-(defmethod copy-regex ((register register))
-  (declare #.*standard-optimize-settings*)
-  (make-instance 'register
-                 :regex (copy-regex (regex register))
-                 :num (num register)
-                 :name (name register)))
-
-(defmethod copy-regex ((standalone standalone))
-  (declare #.*standard-optimize-settings*)
-  (make-instance 'standalone
-                 :regex (copy-regex (regex standalone))))
-
-(defmethod copy-regex ((back-reference back-reference))
-  (declare #.*standard-optimize-settings*)
-  (make-instance 'back-reference
-                 :num (num back-reference)
-                 :case-insensitive-p (case-insensitive-p back-reference)))
-
-(defmethod copy-regex ((char-class char-class))
-  (declare #.*standard-optimize-settings*)
-  (make-instance 'char-class
-                 :charset (charset char-class)
-                 :case-insensitive-p (case-insensitive-p char-class)
-                 :invertedp (invertedp char-class)
-                 :word-char-class-p (word-char-class-p char-class)))
-
-(defmethod copy-regex ((str str))
-  (declare #.*standard-optimize-settings*)
-  (make-instance 'str
-                 :str (str str)
-                 :case-insensitive-p (case-insensitive-p str)))
-
-(defmethod copy-regex ((filter filter))
-  (declare #.*standard-optimize-settings*)
-  (make-instance 'filter
-                 :fn (fn filter)
-                 :len (len filter)))
-
-;;; Note that COPY-REGEX and REMOVE-REGISTERS could have easily been
-;;; wrapped into one function. Maybe in the next release...
-
-;;; Further note that this function is used by CONVERT to factor out
-;;; complicated repetitions, i.e. cases like
-;;;   (a)* -> (?:a*(a))?
-;;; This won't work for, say,
-;;;   ((a)|(b))* -> (?:(?:a|b)*((a)|(b)))?
-;;; and therefore we stop REGISTER removal once we see an ALTERNATION.
-
-(defgeneric remove-registers (regex)
-  (declare #.*standard-optimize-settings*)
-  (:documentation "Returns a deep copy of a REGEX (see COPY-REGEX) and
-optionally removes embedded REGISTER objects if possible and if the
-special variable REMOVE-REGISTERS-P is true."))
-
-(defmethod remove-registers ((register register))
-  (declare #.*standard-optimize-settings*)
-  (declare (special remove-registers-p reg-seen))
-  (cond (remove-registers-p
-          (remove-registers (regex register)))
-        (t
-          ;; mark REG-SEEN as true so enclosing REPETITION objects
-          ;; (see method below) know if they contain a register or not
-          (setq reg-seen t)
-          (copy-regex register))))
-
-(defmethod remove-registers ((repetition repetition))
-  (declare #.*standard-optimize-settings*)
-  (let* (reg-seen
-         (inner-regex (remove-registers (regex repetition))))
-    ;; REMOVE-REGISTERS will set REG-SEEN (see method above) if
-    ;; (REGEX REPETITION) contains a REGISTER
-    (declare (special reg-seen))
-    (make-instance 'repetition
-                   :regex inner-regex
-                   :greedyp (greedyp repetition)
-                   :minimum (minimum repetition)
-                   :maximum (maximum repetition)
-                   :min-len (min-len repetition)
-                   :len (len repetition)
-                   :contains-register-p reg-seen)))
-
-(defmethod remove-registers ((standalone standalone))
-  (declare #.*standard-optimize-settings*)
-  (make-instance 'standalone
-                 :regex (remove-registers (regex standalone))))
-
-(defmethod remove-registers ((lookahead lookahead))
-  (declare #.*standard-optimize-settings*)
-  (make-instance 'lookahead
-                 :regex (remove-registers (regex lookahead))
-                 :positivep (positivep lookahead)))
-
-(defmethod remove-registers ((lookbehind lookbehind))
-  (declare #.*standard-optimize-settings*)
-  (make-instance 'lookbehind
-                 :regex (remove-registers (regex lookbehind))
-                 :positivep (positivep lookbehind)
-                 :len (len lookbehind)))
-
-(defmethod remove-registers ((branch branch))
-  (declare #.*standard-optimize-settings*)
-  (with-slots (test)
-      branch
-    (make-instance 'branch
-                   :test (if (typep test 'regex)
-                           (remove-registers test)
-                           test)
-                   :then-regex (remove-registers (then-regex branch))
-                   :else-regex (remove-registers (else-regex branch)))))
-
-(defmethod remove-registers ((alternation alternation))
-  (declare #.*standard-optimize-settings*)
-  (declare (special remove-registers-p))
-  ;; an ALTERNATION, so we can't remove REGISTER objects further down
-  (setq remove-registers-p nil)
-  (copy-regex alternation))
-
-(defmethod remove-registers ((regex regex))
-  (declare #.*standard-optimize-settings*)
-  (copy-regex regex))
-
-(defmethod remove-registers ((seq seq))
-  (declare #.*standard-optimize-settings*)
-  (make-instance 'seq
-                 :elements (mapcar #'remove-registers (elements seq))))
-
-(defgeneric everythingp (regex)
-  (declare #.*standard-optimize-settings*)
-  (:documentation "Returns an EVERYTHING object if REGEX is equivalent
-to this object, otherwise NIL. So, \"(.){1}\" would return true
-(i.e. the object corresponding to \".\", for example."))
-
-(defmethod everythingp ((seq seq))
-  (declare #.*standard-optimize-settings*)
-  ;; we might have degenerate cases like (:SEQUENCE :VOID ...)
-  ;; due to the parsing process
-  (let ((cleaned-elements (remove-if #'(lambda (element)
-                                         (typep element 'void))
-                                     (elements seq))))
-    (and (= 1 (length cleaned-elements))
-         (everythingp (first cleaned-elements)))))
-
-(defmethod everythingp ((alternation alternation))
-  (declare #.*standard-optimize-settings*)
-  (with-slots (choices)
-      alternation
-    (and (= 1 (length choices))
-         ;; this is unlikely to happen for human-generated regexes,
-         ;; but machine-generated ones might look like this
-         (everythingp (first choices)))))
-
-(defmethod everythingp ((repetition repetition))
-  (declare #.*standard-optimize-settings*)
-  (with-slots (maximum minimum regex)
-      repetition
-    (and maximum
-         (= 1 minimum maximum)
-         ;; treat "<regex>{1,1}" like "<regex>"
-         (everythingp regex))))
-
-(defmethod everythingp ((register register))
-  (declare #.*standard-optimize-settings*)
-  (everythingp (regex register)))
-
-(defmethod everythingp ((standalone standalone))
-  (declare #.*standard-optimize-settings*)
-  (everythingp (regex standalone)))
-
-(defmethod everythingp ((everything everything))
-  (declare #.*standard-optimize-settings*)
-  everything)
-
-(defmethod everythingp ((regex regex))
-  (declare #.*standard-optimize-settings*)
-  ;; the general case for ANCHOR, BACK-REFERENCE, BRANCH, CHAR-CLASS,
-  ;; LOOKAHEAD, LOOKBEHIND, STR, VOID, FILTER, and WORD-BOUNDARY
-  nil)
-
-(defgeneric regex-length (regex)
-  (declare #.*standard-optimize-settings*)
-  (:documentation "Return the length of REGEX if it is fixed, NIL otherwise."))
-
-(defmethod regex-length ((seq seq))
-  (declare #.*standard-optimize-settings*)
-  ;; simply add all inner lengths unless one of them is NIL
-  (loop for sub-regex in (elements seq)
-        for len = (regex-length sub-regex)
-        if (not len) do (return nil)
-        sum len))
-
-(defmethod regex-length ((alternation alternation))
-  (declare #.*standard-optimize-settings*)
-  ;; only return a true value if all inner lengths are non-NIL and
-  ;; mutually equal
-  (loop for sub-regex in (choices alternation)
-        for old-len = nil then len
-        for len = (regex-length sub-regex)
-        if (or (not len)
-               (and old-len (/= len old-len))) do (return nil)
-        finally (return len)))
-
-(defmethod regex-length ((branch branch))
-  (declare #.*standard-optimize-settings*)
-  ;; only return a true value if both alternations have a length and
-  ;; if they're equal
-  (let ((then-length (regex-length (then-regex branch))))
-    (and then-length
-         (eql then-length (regex-length (else-regex branch)))
-         then-length)))
-
-(defmethod regex-length ((repetition repetition))
-  (declare #.*standard-optimize-settings*)
-  ;; we can only compute the length of a REPETITION object if the
-  ;; number of repetitions is fixed; note that we don't call
-  ;; REGEX-LENGTH for the inner regex, we assume that the LEN slot is
-  ;; always set correctly
-  (with-slots (len minimum maximum)
-      repetition
-    (if (and len
-             (eql minimum maximum))
-      (* minimum len)
-      nil)))
-
-(defmethod regex-length ((register register))
-  (declare #.*standard-optimize-settings*)
-  (regex-length (regex register)))
-
-(defmethod regex-length ((standalone standalone))
-  (declare #.*standard-optimize-settings*)
-  (regex-length (regex standalone)))
-
-(defmethod regex-length ((back-reference back-reference))
-  (declare #.*standard-optimize-settings*)
-  ;; with enough effort we could possibly do better here, but
-  ;; currently we just give up and return NIL
-  nil)
-    
-(defmethod regex-length ((char-class char-class))
-  (declare #.*standard-optimize-settings*)
-  1)
-
-(defmethod regex-length ((everything everything))
-  (declare #.*standard-optimize-settings*)
-  1)
-
-(defmethod regex-length ((str str))
-  (declare #.*standard-optimize-settings*)
-  (len str))
-
-(defmethod regex-length ((filter filter))
-  (declare #.*standard-optimize-settings*)
-  (len filter))
-
-(defmethod regex-length ((regex regex))
-  (declare #.*standard-optimize-settings*)
-  ;; the general case for ANCHOR, LOOKAHEAD, LOOKBEHIND, VOID, and
-  ;; WORD-BOUNDARY (which all have zero-length)
-  0)
-
-(defgeneric regex-min-length (regex)
-  (declare #.*standard-optimize-settings*)
-  (:documentation "Returns the minimal length of REGEX."))
-
-(defmethod regex-min-length ((seq seq))
-  (declare #.*standard-optimize-settings*)
-  ;; simply add all inner minimal lengths
-  (loop for sub-regex in (elements seq)
-        for len = (regex-min-length sub-regex)
-        sum len))
-
-(defmethod regex-min-length ((alternation alternation))
-  (declare #.*standard-optimize-settings*)
-  ;; minimal length of an alternation is the minimal length of the
-  ;; "shortest" element
-  (loop for sub-regex in (choices alternation)
-        for len = (regex-min-length sub-regex)
-        minimize len))
-
-(defmethod regex-min-length ((branch branch))
-  (declare #.*standard-optimize-settings*)
-  ;; minimal length of both alternations
-  (min (regex-min-length (then-regex branch))
-       (regex-min-length (else-regex branch))))
-
-(defmethod regex-min-length ((repetition repetition))
-  (declare #.*standard-optimize-settings*)
-  ;; obviously the product of the inner minimal length and the minimal
-  ;; number of repetitions
-  (* (minimum repetition) (min-len repetition)))
-    
-(defmethod regex-min-length ((register register))
-  (declare #.*standard-optimize-settings*)
-  (regex-min-length (regex register)))
-    
-(defmethod regex-min-length ((standalone standalone))
-  (declare #.*standard-optimize-settings*)
-  (regex-min-length (regex standalone)))
-    
-(defmethod regex-min-length ((char-class char-class))
-  (declare #.*standard-optimize-settings*)
-  1)
-
-(defmethod regex-min-length ((everything everything))
-  (declare #.*standard-optimize-settings*)
-  1)
-
-(defmethod regex-min-length ((str str))
-  (declare #.*standard-optimize-settings*)
-  (len str))
-    
-(defmethod regex-min-length ((filter filter))
-  (declare #.*standard-optimize-settings*)
-  (or (len filter)
-      0))
-
-(defmethod regex-min-length ((regex regex))
-  (declare #.*standard-optimize-settings*)
-  ;; the general case for ANCHOR, BACK-REFERENCE, LOOKAHEAD,
-  ;; LOOKBEHIND, VOID, and WORD-BOUNDARY
-  0)
-
-(defgeneric compute-offsets (regex start-pos)
-  (declare #.*standard-optimize-settings*)
-  (:documentation "Returns the offset the following regex would have
-relative to START-POS or NIL if we can't compute it. Sets the OFFSET
-slot of REGEX to START-POS if REGEX is a STR. May also affect OFFSET
-slots of STR objects further down the tree."))
-
-;; note that we're actually only interested in the offset of
-;; "top-level" STR objects (see ADVANCE-FN in the SCAN function) so we
-;; can stop at variable-length alternations and don't need to descend
-;; into repetitions
-
-(defmethod compute-offsets ((seq seq) start-pos)
-  (declare #.*standard-optimize-settings*)
-  (loop for element in (elements seq)
-        ;; advance offset argument for next call while looping through
-        ;; the elements
-        for pos = start-pos then curr-offset
-        for curr-offset = (compute-offsets element pos)
-        while curr-offset
-        finally (return curr-offset)))
-
-(defmethod compute-offsets ((alternation alternation) start-pos)
-  (declare #.*standard-optimize-settings*)
-  (loop for choice in (choices alternation)
-        for old-offset = nil then curr-offset
-        for curr-offset = (compute-offsets choice start-pos)
-        ;; we stop immediately if two alternations don't result in the
-        ;; same offset
-        if (or (not curr-offset)
-               (and old-offset (/= curr-offset old-offset)))
-          do (return nil)
-        finally (return curr-offset)))
-
-(defmethod compute-offsets ((branch branch) start-pos)
-  (declare #.*standard-optimize-settings*)
-  ;; only return offset if both alternations have equal value
-  (let ((then-offset (compute-offsets (then-regex branch) start-pos)))
-    (and then-offset
-         (eql then-offset (compute-offsets (else-regex branch) start-pos))
-         then-offset)))
-
-(defmethod compute-offsets ((repetition repetition) start-pos)
-  (declare #.*standard-optimize-settings*)
-  ;; no need to descend into the inner regex
-  (with-slots (len minimum maximum)
-      repetition
-    (if (and len
-             (eq minimum maximum))
-      ;; fixed number of repetitions, so we know how to proceed
-      (+ start-pos (* minimum len))
-      ;; otherwise return NIL
-      nil)))
-
-(defmethod compute-offsets ((register register) start-pos)
-  (declare #.*standard-optimize-settings*)
-  (compute-offsets (regex register) start-pos))
-    
-(defmethod compute-offsets ((standalone standalone) start-pos)
-  (declare #.*standard-optimize-settings*)
-  (compute-offsets (regex standalone) start-pos))
-    
-(defmethod compute-offsets ((char-class char-class) start-pos)
-  (declare #.*standard-optimize-settings*)
-  (1+ start-pos))
-    
-(defmethod compute-offsets ((everything everything) start-pos)
-  (declare #.*standard-optimize-settings*)
-  (1+ start-pos))
-    
-(defmethod compute-offsets ((str str) start-pos)
-  (declare #.*standard-optimize-settings*)
-  (setf (offset str) start-pos)
-  (+ start-pos (len str)))
-
-(defmethod compute-offsets ((back-reference back-reference) start-pos)
-  (declare #.*standard-optimize-settings*)
-  ;; with enough effort we could possibly do better here, but
-  ;; currently we just give up and return NIL
-  (declare (ignore start-pos))
-  nil)
-
-(defmethod compute-offsets ((filter filter) start-pos)
-  (declare #.*standard-optimize-settings*)
-  (let ((len (len filter)))
-    (if len
-      (+ start-pos len)
-      nil)))
-
-(defmethod compute-offsets ((regex regex) start-pos)
-  (declare #.*standard-optimize-settings*)
-  ;; the general case for ANCHOR, LOOKAHEAD, LOOKBEHIND, VOID, and
-  ;; WORD-BOUNDARY (which all have zero-length)
-  start-pos)
--- a/repetition-closures.lisp
+++ b/repetition-closures.lisp
@ -1,5 +1,5 @@
 ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: CL-PPCRE; Base: 10 -*-
-;;; $Header: /usr/local/cvsrep/cl-ppcre/repetition-closures.lisp,v 1.29 2008/06/25 14:04:28 edi Exp $
+;;; $Header: /usr/local/cvsrep/cl-ppcre/repetition-closures.lisp,v 1.33 2008/07/06 18:12:05 edi Exp $

 ;;; This is actually a part of closures.lisp which we put into a
 ;;; separate file because it is rather complex. We only deal with
@ -33,7 +33,7 @@
 ;;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 ;;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

-(in-package #:cl-ppcre)
+(in-package :cl-ppcre)

 (defmacro incf-after (place &optional (delta 1) &environment env)
  "Utility macro inspired by C's \"place++\", i.e. first return the
@ -58,7 +58,7 @@ CHECK-CURR-POS is a form which checks whether the inner regex of the
 repetition matches at CURR-POS."
  `(if maximum
    (lambda (start-pos)
-      (declare (type fixnum start-pos maximum))
+      (declare (fixnum start-pos maximum))
      ;; because we know LEN we know in advance where to stop at the
      ;; latest; we also take into consideration MIN-REST, i.e. the
      ;; minimal length of the part behind the repetition
@ -68,7 +68,7 @@ repetition matches at CURR-POS."
                                 (+ start-pos
                                    (the fixnum (* len maximum)))))
            (curr-pos start-pos))
-        (declare (type fixnum target-end-pos curr-pos))
+        (declare (fixnum target-end-pos curr-pos))
        (block greedy-constant-length-matcher
          ;; we use an ugly TAGBODY construct because this might be a
          ;; tight loop and this version is a bit faster than our LOOP
@ -95,10 +95,10 @@ repetition matches at CURR-POS."
    ;; basically the same code; it's just a bit easier because we're
    ;; not bounded by MAXIMUM
    (lambda (start-pos)
-      (declare (type fixnum start-pos))
+      (declare (fixnum start-pos))
      (let ((target-end-pos (1+ (- *end-pos* len min-rest)))
            (curr-pos start-pos))
-        (declare (type fixnum target-end-pos curr-pos))
+        (declare (fixnum target-end-pos curr-pos))
        (block greedy-constant-length-matcher
          (tagbody
            forward-loop
@ -117,20 +117,19 @@ repetition matches at CURR-POS."
            (go backward-loop)))))))

 (defun create-greedy-everything-matcher (maximum min-rest next-fn)
-  (declare #.*standard-optimize-settings*)
-  (declare (type fixnum min-rest)
-           (type function next-fn))
  "Creates a closure which just matches as far ahead as possible,
 i.e. a closure for a dot in single-line mode."
+  (declare #.*standard-optimize-settings*)
+  (declare (fixnum min-rest) (function next-fn))
  (if maximum
    (lambda (start-pos)
-      (declare (type fixnum start-pos maximum))
+      (declare (fixnum start-pos maximum))
      ;; because we know LEN we know in advance where to stop at the
      ;; latest; we also take into consideration MIN-REST, i.e. the
      ;; minimal length of the part behind the repetition
      (let ((target-end-pos (min (+ start-pos maximum)
                                 (- *end-pos* min-rest))))
-        (declare (type fixnum target-end-pos))
+        (declare (fixnum target-end-pos))
        ;; start from the highest possible position and go backward
        ;; until we're able to match the rest of the regex
        (loop for curr-pos of-type fixnum from target-end-pos downto start-pos
@ -138,18 +137,18 @@ i.e. a closure for a dot in single-line mode."
    ;; basically the same code; it's just a bit easier because we're
    ;; not bounded by MAXIMUM
    (lambda (start-pos)
-      (declare (type fixnum start-pos))
+      (declare (fixnum start-pos))
      (let ((target-end-pos (- *end-pos* min-rest)))
-        (declare (type fixnum target-end-pos))
+        (declare (fixnum target-end-pos))
        (loop for curr-pos of-type fixnum from target-end-pos downto start-pos
              thereis (funcall next-fn curr-pos))))))

 (defgeneric create-greedy-constant-length-matcher (repetition next-fn)
  (declare #.*standard-optimize-settings*)
-  (:documentation "Creates a closure which tries to match REPETITION. It is assumed
-that REPETITION is greedy and the minimal number of repetitions is
-zero. It is furthermore assumed that the inner regex of REPETITION is
-of fixed length and doesn't contain registers."))
+  (:documentation "Creates a closure which tries to match REPETITION.
+It is assumed that REPETITION is greedy and the minimal number of
+repetitions is zero.  It is furthermore assumed that the inner regex
+of REPETITION is of fixed length and doesn't contain registers."))

 (defmethod create-greedy-constant-length-matcher ((repetition repetition)
                                                  next-fn)
@ -158,8 +157,8 @@ of fixed length and doesn't contain registers."))
        (maximum (maximum repetition))
        (regex (regex repetition))
        (min-rest (min-rest repetition)))
-    (declare (type fixnum len min-rest)
-             (type function next-fn))
+    (declare (fixnum len min-rest)
+             (function next-fn))
    (cond ((zerop len)
            ;; inner regex has zero-length, so we can discard it
            ;; completely
@ -186,11 +185,8 @@ of fixed length and doesn't contain registers."))
              (char-class
                ;; a character class
                (insert-char-class-tester (regex (schar *string* curr-pos))
-                  (if (invertedp regex)
                  (greedy-constant-length-closure
-                     (not (char-class-test)))
-                    (greedy-constant-length-closure
-                     (char-class-test)))))
+                   (char-class-test))))
              (everything
                ;; an EVERYTHING object, i.e. a dot
                (if (single-line-p regex)
@ -202,17 +198,17 @@ of fixed length and doesn't contain registers."))
                ;; just checks for immediate success, i.e. NEXT-FN is
                ;; #'IDENTITY
                (let ((inner-matcher (create-matcher-aux regex #'identity)))
-                  (declare (type function inner-matcher))
+                  (declare (function inner-matcher))
                  (greedy-constant-length-closure
                   (funcall inner-matcher curr-pos)))))))))

 (defgeneric create-greedy-no-zero-matcher (repetition next-fn)
  (declare #.*standard-optimize-settings*)
-  (:documentation "Creates a closure which tries to match REPETITION. It is assumed
-that REPETITION is greedy and the minimal number of repetitions is
-zero. It is furthermore assumed that the inner regex of REPETITION can
-never match a zero-length string (or instead the maximal number of
-repetitions is 1)."))
+  (:documentation "Creates a closure which tries to match REPETITION.
+It is assumed that REPETITION is greedy and the minimal number of
+repetitions is zero.  It is furthermore assumed that the inner regex
+of REPETITION can never match a zero-length string \(or instead the
+maximal number of repetitions is 1)."))

 (defmethod create-greedy-no-zero-matcher ((repetition repetition) next-fn)
  (declare #.*standard-optimize-settings*)
@ -220,7 +216,7 @@ repetitions is 1)."))
        ;; REPEAT-MATCHER is part of the closure's environment but it
        ;; can only be defined after GREEDY-AUX is defined
        repeat-matcher)
-    (declare (type function next-fn))
+    (declare (function next-fn))
    (cond
      ((eql maximum 1)
        ;; this is essentially like the next case but with a known
@ -230,7 +226,7 @@ repetitions is 1)."))
        (setq repeat-matcher
                (create-matcher-aux (regex repetition) next-fn))
        (lambda (start-pos)
-          (declare (type function repeat-matcher))
+          (declare (function repeat-matcher))
          (or (funcall repeat-matcher start-pos)
              (funcall next-fn start-pos))))
      (maximum
@ -239,8 +235,8 @@ repetitions is 1)."))
        ;; repetitions
        (let ((rep-num (incf-after *rep-num*)))
          (flet ((greedy-aux (start-pos)
-                   (declare (type fixnum start-pos maximum rep-num)
-                            (type function repeat-matcher))
+                   (declare (fixnum start-pos maximum rep-num)
+                            (function repeat-matcher))
                   ;; the actual matcher which first tries to match the
                   ;; inner regex of REPETITION (if we haven't done so
                   ;; too often) and on failure calls NEXT-FN
@ -259,15 +255,15 @@ repetitions is 1)."))
            ;; the closure we return is just a thin wrapper around
            ;; GREEDY-AUX to initialize the repetition counter
            (lambda (start-pos)
-              (declare (type fixnum start-pos))
+              (declare (fixnum start-pos))
              (setf (aref *repeat-counters* rep-num) 0)
              (greedy-aux start-pos)))))
      (t
        ;; easier code because we're not bounded by MAXIMUM, but
        ;; basically the same
        (flet ((greedy-aux (start-pos)
-                 (declare (type fixnum start-pos)
-                          (type function repeat-matcher))
+                 (declare (fixnum start-pos)
+                          (function repeat-matcher))
                 (or (funcall repeat-matcher start-pos)
                     (funcall next-fn start-pos))))
          (setq repeat-matcher
@ -276,9 +272,9 @@ repetitions is 1)."))

 (defgeneric create-greedy-matcher (repetition next-fn)
  (declare #.*standard-optimize-settings*)
-  (:documentation "Creates a closure which tries to match REPETITION. It is assumed
-that REPETITION is greedy and the minimal number of repetitions is
-zero."))
+  (:documentation "Creates a closure which tries to match REPETITION.
+It is assumed that REPETITION is greedy and the minimal number of
+repetitions is zero."))

 (defmethod create-greedy-matcher ((repetition repetition) next-fn)
  (declare #.*standard-optimize-settings*)
@ -290,8 +286,8 @@ zero."))
        ;; REPEAT-MATCHER is part of the closure's environment but it
        ;; can only be defined after GREEDY-AUX is defined
        repeat-matcher)
-    (declare (type fixnum zero-length-num)
-             (type function next-fn))
+    (declare (fixnum zero-length-num)
+             (function next-fn))
    (cond
      (maximum
        ;; we make a reservation for our slot in *REPEAT-COUNTERS*
@ -302,8 +298,8 @@ zero."))
                   ;; the actual matcher which first tries to match the
                   ;; inner regex of REPETITION (if we haven't done so
                   ;; too often) and on failure calls NEXT-FN
-                   (declare (type fixnum start-pos maximum rep-num)
-                            (type function repeat-matcher))
+                   (declare (fixnum start-pos maximum rep-num)
+                            (function repeat-matcher))
                   (let ((old-last-pos
                           (svref *last-pos-stores* zero-length-num)))
                     (when (and old-last-pos
@ -333,7 +329,7 @@ zero."))
            ;; GREEDY-AUX to initialize the repetition counter and our
            ;; slot in *LAST-POS-STORES*
            (lambda (start-pos)
-              (declare (type fixnum start-pos))
+              (declare (fixnum start-pos))
              (setf (aref *repeat-counters* rep-num) 0
                    (svref *last-pos-stores* zero-length-num) nil)
              (greedy-aux start-pos)))))
@ -341,8 +337,8 @@ zero."))
        ;; easier code because we're not bounded by MAXIMUM, but
        ;; basically the same
        (flet ((greedy-aux (start-pos)
-                 (declare (type fixnum start-pos)
-                          (type function repeat-matcher))
+                 (declare (fixnum start-pos)
+                          (function repeat-matcher))
                 (let ((old-last-pos
                         (svref *last-pos-stores* zero-length-num)))
                   (when (and old-last-pos
@ -356,14 +352,14 @@ zero."))
          (setq repeat-matcher
                  (create-matcher-aux (regex repetition) #'greedy-aux))
          (lambda (start-pos)
-            (declare (type fixnum start-pos))
+            (declare (fixnum start-pos))
            (setf (svref *last-pos-stores* zero-length-num) nil)
            (greedy-aux start-pos)))))))
  
 ;; code for non-greedy repetitions with minimum zero

 (defmacro non-greedy-constant-length-closure (check-curr-pos)
-  "This is the template for simple non-greedy repetitions (where
+  "This is the template for simple non-greedy repetitions \(where
 simple means that the minimum number of repetitions is zero, that the
 inner regex to be checked is of fixed length LEN, and that it doesn't
 contain registers, i.e. there's no need for backtracking).
@ -371,7 +367,7 @@ CHECK-CURR-POS is a form which checks whether the inner regex of the
 repetition matches at CURR-POS."
  `(if maximum
    (lambda (start-pos)
-      (declare (type fixnum start-pos maximum))
+      (declare (fixnum start-pos maximum))
      ;; because we know LEN we know in advance where to stop at the
      ;; latest; we also take into consideration MIN-REST, i.e. the
      ;; minimal length of the part behind the repetition
@ -389,7 +385,7 @@ repetition matches at CURR-POS."
  ;; basically the same code; it's just a bit easier because we're
  ;; not bounded by MAXIMUM
  (lambda (start-pos)
-    (declare (type fixnum start-pos))
+    (declare (fixnum start-pos))
    (let ((target-end-pos (1+ (- *end-pos* len min-rest))))
      (loop for curr-pos of-type fixnum from start-pos
                                        below target-end-pos
@ -400,10 +396,10 @@ repetition matches at CURR-POS."

 (defgeneric create-non-greedy-constant-length-matcher (repetition next-fn)
  (declare #.*standard-optimize-settings*)
-  (:documentation "Creates a closure which tries to match REPETITION. It is assumed
-that REPETITION is non-greedy and the minimal number of repetitions is
-zero. It is furthermore assumed that the inner regex of REPETITION is
-of fixed length and doesn't contain registers."))
+  (:documentation "Creates a closure which tries to match REPETITION.
+It is assumed that REPETITION is non-greedy and the minimal number of
+repetitions is zero.  It is furthermore assumed that the inner regex
+of REPETITION is of fixed length and doesn't contain registers."))

 (defmethod create-non-greedy-constant-length-matcher ((repetition repetition) next-fn)
  (declare #.*standard-optimize-settings*)
@ -411,8 +407,8 @@ of fixed length and doesn't contain registers."))
        (maximum (maximum repetition))
        (regex (regex repetition))
        (min-rest (min-rest repetition)))
-    (declare (type fixnum len min-rest)
-             (type function next-fn))
+    (declare (fixnum len min-rest)
+             (function next-fn))
    (cond ((zerop len)
            ;; inner regex has zero-length, so we can discard it
            ;; completely
@ -439,11 +435,8 @@ of fixed length and doesn't contain registers."))
              (char-class
                ;; a character class
                (insert-char-class-tester (regex (schar *string* curr-pos))
-                  (if (invertedp regex)
                  (non-greedy-constant-length-closure
-                     (not (char-class-test)))
-                    (non-greedy-constant-length-closure
-                     (char-class-test)))))
+                   (char-class-test))))
              (everything
                (if (single-line-p regex)
                  ;; a dot which really can match everything; we rely
@ -458,17 +451,17 @@ of fixed length and doesn't contain registers."))
                ;; just checks for immediate success, i.e. NEXT-FN is
                ;; #'IDENTITY
                (let ((inner-matcher (create-matcher-aux regex #'identity)))
-                  (declare (type function inner-matcher))
+                  (declare (function inner-matcher))
                  (non-greedy-constant-length-closure
                   (funcall inner-matcher curr-pos)))))))))

 (defgeneric create-non-greedy-no-zero-matcher (repetition next-fn)
  (declare #.*standard-optimize-settings*)
-  (:documentation "Creates a closure which tries to match REPETITION. It is assumed
-that REPETITION is non-greedy and the minimal number of repetitions is
-zero. It is furthermore assumed that the inner regex of REPETITION can
-never match a zero-length string (or instead the maximal number of
-repetitions is 1)."))
+  (:documentation "Creates a closure which tries to match REPETITION.
+It is assumed that REPETITION is non-greedy and the minimal number of
+repetitions is zero.  It is furthermore assumed that the inner regex
+of REPETITION can never match a zero-length string \(or instead the
+maximal number of repetitions is 1)."))

 (defmethod create-non-greedy-no-zero-matcher ((repetition repetition) next-fn)
  (declare #.*standard-optimize-settings*)
@ -476,7 +469,7 @@ repetitions is 1)."))
        ;; REPEAT-MATCHER is part of the closure's environment but it
        ;; can only be defined after NON-GREEDY-AUX is defined
        repeat-matcher)
-    (declare (type function next-fn))
+    (declare (function next-fn))
    (cond
      ((eql maximum 1)
        ;; this is essentially like the next case but with a known
@ -484,7 +477,7 @@ repetitions is 1)."))
        (setq repeat-matcher
                (create-matcher-aux (regex repetition) next-fn))
        (lambda (start-pos)
-          (declare (type function repeat-matcher))
+          (declare (function repeat-matcher))
          (or (funcall next-fn start-pos)
              (funcall repeat-matcher start-pos))))
      (maximum
@ -496,8 +489,8 @@ repetitions is 1)."))
                   ;; the actual matcher which first calls NEXT-FN and
                   ;; on failure tries to match the inner regex of
                   ;; REPETITION (if we haven't done so too often)
-                   (declare (type fixnum start-pos maximum rep-num)
-                            (type function repeat-matcher))
+                   (declare (fixnum start-pos maximum rep-num)
+                            (function repeat-matcher))
                   (or (funcall next-fn start-pos)
                       (and (< (aref *repeat-counters* rep-num) maximum)
                            (incf (aref *repeat-counters* rep-num))
@ -513,15 +506,15 @@ repetitions is 1)."))
            ;; the closure we return is just a thin wrapper around
            ;; NON-GREEDY-AUX to initialize the repetition counter
            (lambda (start-pos)
-              (declare (type fixnum start-pos))
+              (declare (fixnum start-pos))
              (setf (aref *repeat-counters* rep-num) 0)
              (non-greedy-aux start-pos)))))
      (t
        ;; easier code because we're not bounded by MAXIMUM, but
        ;; basically the same
        (flet ((non-greedy-aux (start-pos)
-                 (declare (type fixnum start-pos)
-                          (type function repeat-matcher))
+                 (declare (fixnum start-pos)
+                          (function repeat-matcher))
                 (or (funcall next-fn start-pos)
                     (funcall repeat-matcher start-pos))))
          (setq repeat-matcher
@ -530,9 +523,9 @@ repetitions is 1)."))
  
 (defgeneric create-non-greedy-matcher (repetition next-fn)
  (declare #.*standard-optimize-settings*)
-  (:documentation "Creates a closure which tries to match REPETITION. It is assumed
-that REPETITION is non-greedy and the minimal number of repetitions is
-zero."))
+  (:documentation "Creates a closure which tries to match REPETITION.
+It is assumed that REPETITION is non-greedy and the minimal number of
+repetitions is zero."))

 (defmethod create-non-greedy-matcher ((repetition repetition) next-fn)
  (declare #.*standard-optimize-settings*)
@ -544,8 +537,8 @@ zero."))
        ;; REPEAT-MATCHER is part of the closure's environment but it
        ;; can only be defined after NON-GREEDY-AUX is defined
        repeat-matcher)
-    (declare (type fixnum zero-length-num)
-             (type function next-fn))
+    (declare (fixnum zero-length-num)
+             (function next-fn))
    (cond
      (maximum
        ;; we make a reservation for our slot in *REPEAT-COUNTERS*
@ -556,8 +549,8 @@ zero."))
                   ;; the actual matcher which first calls NEXT-FN and
                   ;; on failure tries to match the inner regex of
                   ;; REPETITION (if we haven't done so too often)
-                   (declare (type fixnum start-pos maximum rep-num)
-                            (type function repeat-matcher))
+                   (declare (fixnum start-pos maximum rep-num)
+                            (function repeat-matcher))
                   (let ((old-last-pos
                           (svref *last-pos-stores* zero-length-num)))
                     (when (and old-last-pos
@ -587,7 +580,7 @@ zero."))
            ;; NON-GREEDY-AUX to initialize the repetition counter and our
            ;; slot in *LAST-POS-STORES*
            (lambda (start-pos)
-              (declare (type fixnum start-pos))
+              (declare (fixnum start-pos))
              (setf (aref *repeat-counters* rep-num) 0
                    (svref *last-pos-stores* zero-length-num) nil)
              (non-greedy-aux start-pos)))))
@ -595,8 +588,8 @@ zero."))
        ;; easier code because we're not bounded by MAXIMUM, but
        ;; basically the same
        (flet ((non-greedy-aux (start-pos)
-                 (declare (type fixnum start-pos)
-                          (type function repeat-matcher))
+                 (declare (fixnum start-pos)
+                          (function repeat-matcher))
                 (let ((old-last-pos
                         (svref *last-pos-stores* zero-length-num)))
                   (when (and old-last-pos
@ -611,7 +604,7 @@ zero."))
          (setq repeat-matcher
                  (create-matcher-aux (regex repetition) #'non-greedy-aux))
          (lambda (start-pos)
-            (declare (type fixnum start-pos))
+            (declare (fixnum start-pos))
            (setf (svref *last-pos-stores* zero-length-num) nil)
            (non-greedy-aux start-pos)))))))
  
@ -622,13 +615,13 @@ zero."))
 means that the inner regex to be checked is of fixed length LEN, and
 that it doesn't contain registers, i.e. there's no need for
 backtracking) and where constant means that MINIMUM is equal to
-MAXIMUM. CHECK-CURR-POS is a form which checks whether the inner regex
-of the repetition matches at CURR-POS."
+MAXIMUM.  CHECK-CURR-POS is a form which checks whether the inner
+regex of the repetition matches at CURR-POS."
  `(lambda (start-pos)
-    (declare (type fixnum start-pos))
+    (declare (fixnum start-pos))
      (let ((target-end-pos (+ start-pos
                               (the fixnum (* len repetitions)))))
-        (declare (type fixnum target-end-pos))
+        (declare (fixnum target-end-pos))
        ;; first check if we won't go beyond the end of the string
        (and (>= *end-pos* target-end-pos)
             ;; then loop through all repetitions step by step
@ -642,10 +635,10 @@ of the repetition matches at CURR-POS."
 (defgeneric create-constant-repetition-constant-length-matcher
    (repetition next-fn)
  (declare #.*standard-optimize-settings*)
-  (:documentation "Creates a closure which tries to match REPETITION. It is assumed
-that REPETITION has a constant number of repetitions. It is
-furthermore assumed that the inner regex of REPETITION is of fixed
-length and doesn't contain registers."))
+  (:documentation "Creates a closure which tries to match REPETITION.
+It is assumed that REPETITION has a constant number of repetitions.
+It is furthermore assumed that the inner regex of REPETITION is of
+fixed length and doesn't contain registers."))

 (defmethod create-constant-repetition-constant-length-matcher
       ((repetition repetition) next-fn)
@ -653,8 +646,8 @@ length and doesn't contain registers."))
  (let ((len (len repetition))
        (repetitions (minimum repetition))
        (regex (regex repetition)))
-    (declare (type fixnum len repetitions)
-             (type function next-fn))
+    (declare (fixnum len repetitions)
+             (function next-fn))
    (if (zerop len)
      ;; if the length is zero it suffices to try once
      (create-matcher-aux regex next-fn)
@ -676,33 +669,29 @@ length and doesn't contain registers."))
              (if (case-insensitive-p regex)
                (constant-repetition-constant-length-closure
                 (let ((next-pos (+ curr-pos len)))
-                   (declare (type fixnum next-pos))
+                   (declare (fixnum next-pos))
                   (and (*string*-equal str curr-pos next-pos 0 len)
                        next-pos)))
                (constant-repetition-constant-length-closure
                 (let ((next-pos (+ curr-pos len)))
-                   (declare (type fixnum next-pos))
+                   (declare (fixnum next-pos))
                   (and (*string*= str curr-pos next-pos 0 len)
                        next-pos)))))))
        (char-class
          ;; a character class
          (insert-char-class-tester (regex (schar *string* curr-pos))
-            (if (invertedp regex)
-              (constant-repetition-constant-length-closure
-               (and (not (char-class-test))
-                    (1+ curr-pos)))
            (constant-repetition-constant-length-closure
             (and (char-class-test)
-                    (1+ curr-pos))))))
+                  (1+ curr-pos)))))
        (everything
          (if (single-line-p regex)
            ;; a dot which really matches everything - we just have to
            ;; advance the index into *STRING* accordingly and check
            ;; if we didn't go past the end
            (lambda (start-pos)
-              (declare (type fixnum start-pos))
+              (declare (fixnum start-pos))
              (let ((next-pos (+ start-pos repetitions)))
-                (declare (type fixnum next-pos))
+                (declare (fixnum next-pos))
                (and (<= next-pos *end-pos*)
                     (funcall next-fn next-pos))))
            ;; a dot which is not in single-line-mode - make sure we
@ -714,14 +703,14 @@ length and doesn't contain registers."))
          ;; the general case - we build an inner matcher which just
          ;; checks for immediate success, i.e. NEXT-FN is #'IDENTITY
          (let ((inner-matcher (create-matcher-aux regex #'identity)))
-            (declare (type function inner-matcher))
+            (declare (function inner-matcher))
            (constant-repetition-constant-length-closure
             (funcall inner-matcher curr-pos))))))))
  
 (defgeneric create-constant-repetition-matcher (repetition next-fn)
  (declare #.*standard-optimize-settings*)
-  (:documentation "Creates a closure which tries to match REPETITION. It is assumed
-that REPETITION has a constant number of repetitions."))
+  (:documentation "Creates a closure which tries to match REPETITION.
+It is assumed that REPETITION has a constant number of repetitions."))

 (defmethod create-constant-repetition-matcher ((repetition repetition) next-fn)
  (declare #.*standard-optimize-settings*)
@ -732,20 +721,20 @@ that REPETITION has a constant number of repetitions."))
        ;; REPEAT-MATCHER is part of the closure's environment but it
        ;; can only be defined after NON-GREEDY-AUX is defined
        repeat-matcher)
-    (declare (type fixnum repetitions rep-num)
-             (type function next-fn))
+    (declare (fixnum repetitions rep-num)
+             (function next-fn))
    (if (zerop (min-len repetition))
      ;; we make a reservation for our slot in *LAST-POS-STORES*
      ;; because we have to watch out for needless loops as the inner
      ;; regex might match zero-length strings
      (let ((zero-length-num (incf-after *zero-length-num*)))
-        (declare (type fixnum zero-length-num))
+        (declare (fixnum zero-length-num))
        (flet ((constant-aux (start-pos)
                 ;; the actual matcher which first calls NEXT-FN and
                 ;; on failure tries to match the inner regex of
                 ;; REPETITION (if we haven't done so too often)
-                 (declare (type fixnum start-pos)
-                          (type function repeat-matcher))
+                 (declare (fixnum start-pos)
+                          (function repeat-matcher))
                 (let ((old-last-pos
                         (svref *last-pos-stores* zero-length-num)))
                   (when (and old-last-pos
@ -778,15 +767,15 @@ that REPETITION has a constant number of repetitions."))
          ;; the closure we return is just a thin wrapper around
          ;; CONSTANT-AUX to initialize the repetition counter
          (lambda (start-pos)
-            (declare (type fixnum start-pos))
+            (declare (fixnum start-pos))
            (setf (aref *repeat-counters* rep-num) 0
                  (aref *last-pos-stores* zero-length-num) nil)
            (constant-aux start-pos))))
      ;; easier code because we don't have to care about zero-length
      ;; matches but basically the same
      (flet ((constant-aux (start-pos)
-               (declare (type fixnum start-pos)
-                        (type function repeat-matcher))
+               (declare (fixnum start-pos)
+                        (function repeat-matcher))
               (cond ((< (aref *repeat-counters* rep-num) repetitions)
                       (incf (aref *repeat-counters* rep-num))
                       (prog1
@ -796,7 +785,7 @@ that REPETITION has a constant number of repetitions."))
        (setq repeat-matcher
                (create-matcher-aux (regex repetition) #'constant-aux))
        (lambda (start-pos)
-          (declare (type fixnum start-pos))
+          (declare (fixnum start-pos))
          (setf (aref *repeat-counters* rep-num) 0)
          (constant-aux start-pos))))))
  
--- a/scanner.lisp
+++ b/scanner.lisp
@ -1,5 +1,5 @@
 ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: CL-PPCRE; Base: 10 -*-
-;;; $Header: /usr/local/cvsrep/cl-ppcre/scanner.lisp,v 1.29 2008/06/25 14:04:28 edi Exp $
+;;; $Header: /usr/local/cvsrep/cl-ppcre/scanner.lisp,v 1.34 2008/07/06 18:12:05 edi Exp $

 ;;; Here the scanner for the actual regex as well as utility scanners
 ;;; for the constant start and end strings are created.
@ -30,13 +30,13 @@
 ;;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 ;;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

-(in-package #:cl-ppcre)
+(in-package :cl-ppcre)

 (defmacro bmh-matcher-aux (&key case-insensitive-p)
  "Auxiliary macro used by CREATE-BMH-MATCHER."
  (let ((char-compare (if case-insensitive-p 'char-equal 'char=)))
    `(lambda (start-pos)
-      (declare (type fixnum start-pos))
+      (declare (fixnum start-pos))
      (if (or (minusp start-pos)
              (> (the fixnum (+ start-pos m)) *end-pos*))
        nil
@ -53,21 +53,21 @@
                                 (return-from bmh-matcher (1+ i)))))))))

 (defun create-bmh-matcher (pattern case-insensitive-p)
-  (declare #.*standard-optimize-settings*)
  "Returns a Boyer-Moore-Horspool matcher which searches the (special)
 simple-string *STRING* for the first occurence of the substring
 PATTERN.  The search starts at the position START-POS within *STRING*
 and stops before *END-POS* is reached.  Depending on the second
 argument the search is case-insensitive or not.  If the special
 variable *USE-BMH-MATCHERS* is NIL, use the standard SEARCH function
-instead. (BMH matchers are faster but need much more space.)"
+instead.  \(BMH matchers are faster but need much more space.)"
+  (declare #.*standard-optimize-settings*)
  ;; see <http://www-igm.univ-mlv.fr/~lecroq/string/node18.html> for
  ;; details
  (unless *use-bmh-matchers*
    (let ((test (if case-insensitive-p #'char-equal #'char=)))
      (return-from create-bmh-matcher
        (lambda (start-pos)
-          (declare (type fixnum start-pos))
+          (declare (fixnum start-pos))
          (and (not (minusp start-pos))
               (search pattern
                       *string*
@ -78,7 +78,7 @@ instead. (BMH matchers are faster but need much more space.)"
 	 (skip (make-array *regex-char-code-limit*
 			  :element-type 'fixnum
 			  :initial-element m)))
-    (declare (type fixnum m))
+    (declare (fixnum m))
    (loop for k of-type fixnum below m
          if case-insensitive-p
            do (setf (aref skip (char-code (char-upcase (schar pattern k)))) (- m k 1)
@ -93,29 +93,28 @@ instead. (BMH matchers are faster but need much more space.)"
  "Auxiliary macro used by CREATE-CHAR-SEARCHER."
  (let ((char-compare (if case-insensitive-p 'char-equal 'char=)))
    `(lambda (start-pos)
-      (declare (type fixnum start-pos))
+      (declare (fixnum start-pos))
      (and (not (minusp start-pos))
           (loop for i of-type fixnum from start-pos below *end-pos*
                 thereis (and (,char-compare (schar *string* i) chr) i))))))

 (defun create-char-searcher (chr case-insensitive-p)
-  (declare #.*standard-optimize-settings*)
  "Returns a function which searches the (special) simple-string
 *STRING* for the first occurence of the character CHR. The search
 starts at the position START-POS within *STRING* and stops before
 *END-POS* is reached.  Depending on the second argument the search is
 case-insensitive or not."
+  (declare #.*standard-optimize-settings*)
  (if case-insensitive-p
    (char-searcher-aux :case-insensitive-p t)
    (char-searcher-aux)))

 (declaim (inline newline-skipper))
-
 (defun newline-skipper (start-pos)
-  (declare #.*standard-optimize-settings*)
-  (declare (type fixnum start-pos))
-  "Find the next occurence of a character in *STRING* which is behind
+  "Finds the next occurence of a character in *STRING* which is behind
 a #\Newline."
+  (declare #.*standard-optimize-settings*)
+  (declare (fixnum start-pos))
  ;; we can start with (1- START-POS) without testing for (PLUSP
  ;; START-POS) because we know we'll never call NEWLINE-SKIPPER on
  ;; the first iteration
@ -159,8 +158,8 @@ ADVANCE-FN. This is a utility macro used by CREATE-SCANNER-AUX."
                                 nil))
              ;; we don't need to try further than MAX-END-POS
              (max-end-pos (- *end-pos* min-len)))
-         (declare (type fixnum scan-start-pos)
-                  (type function match-fn))
+         (declare (fixnum scan-start-pos)
+                  (function match-fn))
         ;; definition of ADVANCE-FN will be inserted here by macrology
         (labels ((advance-fn-definition))
           (declare (inline advance-fn))
@ -185,8 +184,8 @@ ADVANCE-FN. This is a utility macro used by CREATE-SCANNER-AUX."
             ;; is anchored at the very end of the target string
             ;; (perhaps modulo a #\Newline)
             (let ((end-test-pos (- *end-pos* (the fixnum end-string-len))))
-               (declare (type fixnum end-test-pos)
-                        (type function end-string-test))
+               (declare (fixnum end-test-pos)
+                        (function end-string-test))
               (unless (setq *end-string-pos* (funcall end-string-test
                                                       end-test-pos))
                 (when (and (= 1 (the fixnum end-anchored-p))
@ -223,7 +222,7 @@ ADVANCE-FN. This is a utility macro used by CREATE-SCANNER-AUX."
                   (return-from scan nil))
                 (when starts-with-str
                   (locally
-                     (declare (type fixnum starts-with-len))
+                     (declare (fixnum starts-with-len))
                     (cond ((and (case-insensitive-p starts-with)
                                 (not (*string*-equal starts-with-str
                                                      *start-pos*
@ -321,10 +320,10 @@ ADVANCE-FN. This is a utility macro used by CREATE-SCANNER-AUX."
                           rep-num
                           zero-length-num
                           reg-num)
-  (declare #.*standard-optimize-settings*)
-  (declare (type fixnum min-len zero-length-num rep-num reg-num))
  "Auxiliary function to create and return a scanner \(which is
 actually a closure).  Used by CREATE-SCANNER."
+  (declare #.*standard-optimize-settings*)
+  (declare (fixnum min-len zero-length-num rep-num reg-num))
  (let ((starts-with-len (if (typep starts-with 'str)
                           (len starts-with)))
        (starts-with-everything (typep starts-with 'everything)))
@ -341,8 +340,8 @@ actually a closure). Used by CREATE-SCANNER."
        ;; left)
        (insert-advance-fn
          (advance-fn (pos)
-            (declare (type fixnum end-string-offset starts-with-len)
-                     (type function start-string-test end-string-test))
+            (declare (fixnum end-string-offset starts-with-len)
+                     (function start-string-test end-string-test))
            (loop
              (unless (setq pos (funcall start-string-test pos))
                ;; give up completely if we can't find a start string
@ -350,7 +349,7 @@ actually a closure). Used by CREATE-SCANNER."
                (return-from scan nil))
              (locally
                ;; from here we know that POS is a FIXNUM
-                (declare (type fixnum pos))
+                (declare (fixnum pos))
                (when (= pos (- (the fixnum *end-string-pos*) end-string-offset))
                  ;; if we already found an end string candidate the
                  ;; position of which matches the start string
@ -369,7 +368,7 @@ actually a closure). Used by CREATE-SCANNER."
                    ;; according to the end string candidate
                    (let ((new-pos (- (the fixnum *end-string-pos*)
                                      end-string-offset)))
-                      (declare (type fixnum new-pos *end-string-pos*))
+                      (declare (fixnum new-pos *end-string-pos*))
                      (cond ((= new-pos pos)
                              ;; if POS and NEW-POS are equal then the
                              ;; two candidates agree so we're fine
@ -394,15 +393,15 @@ actually a closure). Used by CREATE-SCANNER."
        ;; offset (from the left)
        (insert-advance-fn
          (advance-fn (pos)
-            (declare (type fixnum end-string-offset)
-                     (type function end-string-test))
+            (declare (fixnum end-string-offset)
+                     (function end-string-test))
            (loop
              (unless (setq pos (newline-skipper pos))
                ;; if we can't find a #\Newline we give up immediately
                (return-from scan nil))
              (locally
                ;; from here we know that POS is a FIXNUM
-                (declare (type fixnum pos))
+                (declare (fixnum pos))
                (when (= pos (- (the fixnum *end-string-pos*) end-string-offset))
                  ;; if we already found an end string candidate the
                  ;; position of which matches the place behind the
@ -420,7 +419,7 @@ actually a closure). Used by CREATE-SCANNER."
                    ;; according to the end string candidate
                    (let ((new-pos (- (the fixnum *end-string-pos*)
                                      end-string-offset)))
-                      (declare (type fixnum new-pos *end-string-pos*))
+                      (declare (fixnum new-pos *end-string-pos*))
                      (cond ((= new-pos pos)
                              ;; if POS and NEW-POS are equal then the
                              ;; the end string candidate agrees with
@ -446,7 +445,7 @@ actually a closure). Used by CREATE-SCANNER."
        ;; information to advance POS
        (insert-advance-fn
          (advance-fn (pos)
-            (declare (type function start-string-test end-string-test))
+            (declare (function start-string-test end-string-test))
            (unless (setq pos (funcall start-string-test pos))
              (return-from scan nil))
            (if (<= (the fixnum pos)
@ -463,7 +462,7 @@ actually a closure). Used by CREATE-SCANNER."
        ;; enough information to advance POS
        (insert-advance-fn
          (advance-fn (pos)
-            (declare (type function end-string-test))
+            (declare (function end-string-test))
            (unless (setq pos (newline-skipper pos))
              (return-from scan nil))
            (if (<= (the fixnum pos)
@ -476,7 +475,7 @@ actually a closure). Used by CREATE-SCANNER."
        ;; just check for constant start string candidate
        (insert-advance-fn
          (advance-fn (pos)
-            (declare (type function start-string-test))
+            (declare (function start-string-test))
            (unless (setq pos (funcall start-string-test pos))
              (return-from scan nil))
            pos)))
@ -492,7 +491,7 @@ actually a closure). Used by CREATE-SCANNER."
        ;; advanced beyond the last one
        (insert-advance-fn
          (advance-fn (pos)
-            (declare (type function end-string-test))
+            (declare (function end-string-test))
            (if (<= (the fixnum pos)
                    (the fixnum *end-string-pos*))
              (return-from advance-fn pos))
--- a/specials.lisp
+++ b/specials.lisp
@ -1,5 +1,5 @@
 ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: CL-PPCRE; Base: 10 -*-
-;;; $Header: /usr/local/cvsrep/cl-ppcre/specials.lisp,v 1.27 2008/07/03 08:13:28 edi Exp $
+;;; $Header: /usr/local/cvsrep/cl-ppcre/specials.lisp,v 1.40 2008/07/23 02:14:06 edi Exp $

 ;;; globally declared special variables

@ -29,7 +29,7 @@
 ;;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 ;;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

-(in-package #:cl-ppcre)
+(in-package :cl-ppcre)

 ;;; special variables used to effect declarations

@ -51,7 +51,7 @@

 (defvar *extended-mode-p* nil
  "Whether the parser will start in extended mode.")
-(declaim (type boolean *extended-mode-p*))
+(declaim (boolean *extended-mode-p*))

 ;;; special variables used by the SCAN function and the matchers

@ -60,16 +60,16 @@
 occur in character classes.  Change this value BEFORE creating
 scanners if you don't need the \(full) Unicode support of
 implementations like AllegroCL, CLISP, LispWorks, or SBCL.")
-(declaim (type fixnum *regex-char-code-limit*))
+(declaim (fixnum *regex-char-code-limit*))
  
 (defvar *string* ""
  "The string which is currently scanned by SCAN.
 Will always be coerced to a SIMPLE-STRING.")
-(declaim (type simple-string *string*))
+(declaim (simple-string *string*))

 (defvar *start-pos* 0
  "Where to start scanning within *STRING*.")
-(declaim (type fixnum *start-pos*))
+(declaim (fixnum *start-pos*))

 (defvar *real-start-pos* nil
  "The real start of *STRING*. This is for repeated scans and is only used internally.")
@ -77,22 +77,22 @@ Will always be coerced to a SIMPLE-STRING.")

 (defvar *end-pos* 0
  "Where to stop scanning within *STRING*.")
-(declaim (type fixnum *end-pos*))
+(declaim (fixnum *end-pos*))

 (defvar *reg-starts* (make-array 0)
  "An array which holds the start positions
 of the current register candidates.")
-(declaim (type simple-vector *reg-starts*))
+(declaim (simple-vector *reg-starts*))
  
 (defvar *regs-maybe-start* (make-array 0)
  "An array which holds the next start positions
 of the current register candidates.")
-(declaim (type simple-vector *regs-maybe-start*))
+(declaim (simple-vector *regs-maybe-start*))

 (defvar *reg-ends* (make-array 0)
  "An array which holds the end positions
 of the current register candidates.")
-(declaim (type simple-vector *reg-ends*))
+(declaim (simple-vector *reg-ends*))

 (defvar *end-string-pos* nil
  "Start of the next possible end-string candidate.")
@ -100,12 +100,12 @@ of the current register candidates.")
 (defvar *rep-num* 0
  "Counts the number of \"complicated\" repetitions while the matchers
 are built.")
-(declaim (type fixnum *rep-num*))
+(declaim (fixnum *rep-num*))

 (defvar *zero-length-num* 0
  "Counts the number of repetitions the inner regexes of which may
 have zero-length while the matchers are built.")
-(declaim (type fixnum *zero-length-num*))
+(declaim (fixnum *zero-length-num*))

 (defvar *repeat-counters* (make-array 0
                                      :initial-element 0
@ -118,12 +118,27 @@ repetitive patterns have been tested already.")
  "An array to keep track of the last positions
 where we saw repetitive patterns.
 Only used for patterns which might have zero length.")
-(declaim (type simple-vector *last-pos-stores*))
+(declaim (simple-vector *last-pos-stores*))

 (defvar *use-bmh-matchers* t
  "Whether the scanners created by CREATE-SCANNER should use the \(fast
 but large) Boyer-Moore-Horspool matchers.")

+(defvar *optimize-char-classes* nil
+  "Whether character classes should be compiled into look-ups into
+O\(1) data structures.  This is usually fast but will be costly in
+terms of scanner creation time and might be costly in terms of size if
+*REGEX-CHAR-CODE-LIMIT* is high.  This value will be used as the :KIND
+keyword argument to CREATE-OPTIMIZED-TEST-FUNCTION - see there for the
+possible non-NIL values.")
+
+(defvar *property-resolver* nil
+  "Should be NIL or a designator for a function which accepts strings
+and returns unary character test functions or NIL.  This 'resolver' is
+intended to handle `character properties' like \\p{IsAlpha}.  If
+*PROPERTY-RESOLVER* is NIL, then the parser will simply treat \\p and
+\\P as #\\p and #\\P as in older versions of CL-PPCRE.")
+
 (defvar *allow-quoting* nil
  "Whether the parser should support Perl's \\Q and \\E.")

--- a/lispworks-defsystem.lisp
+++ b/lispworks-defsystem.lisp
@ -1,7 +1,5 @@
 ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: CL-USER; Base: 10 -*-
-;;; $Header: /usr/local/cvsrep/cl-ppcre/lispworks-defsystem.lisp,v 1.4 2008/06/25 14:04:27 edi Exp $
-
-;;; This system definition for LispWorks was kindly provided by Wade Humeniuk
+;;; $Header: /usr/local/cvsrep/cl-ppcre/test/packages.lisp,v 1.3 2008/07/22 12:58:52 edi Exp $

 ;;; Copyright (c) 2002-2008, Dr. Edmund Weitz. All rights reserved.

@ -29,29 +27,11 @@
 ;;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 ;;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

-(in-package #:cl-user)
+(in-package :cl-user)

-(defparameter *cl-ppcre-base-directory*
-  (make-pathname :name nil :type nil :version nil
-                 :defaults (parse-namestring *load-truename*)))
-
-(defsystem cl-ppcre
-  (:default-pathname *cl-ppcre-base-directory*
-   :default-type :lisp-file)
-  :members ("packages"
-            "specials"
-            "util"
-            "errors"
-            "lexer"
-            "parser"
-            "regex-class"
-            "convert"
-            "optimize"
-            "closures"
-            "repetition-closures"
-            "scanner"
-            "api")
-  :rules ((:in-order-to :compile :all
-                        (:requires (:load :previous)))
-          (:in-order-to :load :all
-                        (:requires (:load :previous)))))
+(defpackage :cl-ppcre-test
+  #+genera (:shadowing-import-from :common-lisp :lambda)
+  (:use #-:genera :cl #+:genera :future-common-lisp :cl-ppcre)
+  (:import-from :cl-ppcre :*standard-optimize-settings*
+                          :string-list-to-simple-string)
+  (:export :run-all-tests :unicode-test))
--- a/test/perl-tests.lisp
+++ b/test/perl-tests.lisp
@ -0,0 +1,150 @@
+;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: CL-PPCRE-TEST; Base: 10 -*-
+;;; $Header: /usr/local/cvsrep/cl-ppcre/test/perl-tests.lisp,v 1.7 2008/07/22 23:02:04 edi Exp $
+
+;;; The tests in this file test CL-PPCRE against testdata generated by
+;;; the Perl program `perltest.pl' from the input file `testinput' in
+;;; order to check compatibility with Perl and correctness of the
+;;; regex engine.
+
+;;; Copyright (c) 2002-2008, Dr. Edmund Weitz. All rights reserved.
+
+;;; Redistribution and use in source and binary forms, with or without
+;;; modification, are permitted provided that the following conditions
+;;; are met:
+
+;;;   * Redistributions of source code must retain the above copyright
+;;;     notice, this list of conditions and the following disclaimer.
+
+;;;   * Redistributions in binary form must reproduce the above
+;;;     copyright notice, this list of conditions and the following
+;;;     disclaimer in the documentation and/or other materials
+;;;     provided with the distribution.
+
+;;; THIS SOFTWARE IS PROVIDED BY THE AUTHOR 'AS IS' AND ANY EXPRESSED
+;;; OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+;;; WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+;;; ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+;;; DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+;;; DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
+;;; GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+;;; INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+;;; WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+;;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+;;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+(in-package :cl-ppcre-test)
+
+(defvar *tests-to-skip* '(662 790 1439)
+  "Some tests we skip because the testdata is generated by a Perl
+program and CL-PPCRE differs from Perl for these tests - on purpose.")
+
+(defun create-string-from-input (input)
+  "Converts INPUT to a string which can be used in TEST below.  The
+input file `testdata' encodes strings containing non-printable
+characters as lists where those characters are represented by their
+character code."
+  (etypecase input
+    ((or null string) input)
+    (list (string-list-to-simple-string
+           (loop for element in input
+                 if (stringp element)
+                 collect element
+                 else
+                 collect (string (code-char element)))))))
+
+(defun perl-test (&key (file-name 
+                        (make-pathname :name "perltestdata"
+                                       :type nil :version nil
+                                       :defaults *this-file*)
+                        file-name-provided-p)
+                       (external-format '(:latin-1 :eol-style :lf))
+                       verbose)
+  "Loops through all test cases in FILE-NAME and prints a report if
+VERBOSE is true.  EXTERNAL-FORMAT is the FLEXI-STREAMS external format
+which is used to read the file.  Returns a true value if all tests
+succeeded.
+
+For the syntax of the tests in FILE-NAME refer to the source code of
+this function and to the Perl script perltest.pl which generates such
+test files."
+  (declare #.*standard-optimize-settings*)
+  (with-open-file (binary-stream file-name :element-type 'flex:octet)
+    (let ((stream (flex:make-flexi-stream binary-stream :external-format external-format))
+          ;; the standard Perl tests don't need full Unicode support
+          (*regex-char-code-limit* (if file-name-provided-p *regex-char-code-limit* 256))
+          ;; we need this for the standard test suite or otherwise we
+          ;; might get stack overflows
+          (*optimize-char-classes* (if file-name-provided-p *optimize-char-classes* :charmap))
+          ;; we only check for correctness and don't care about speed
+          ;; that match (but rather about space constraints of the
+          ;; trial versions)
+          (*use-bmh-matchers* (if file-name-provided-p *use-bmh-matchers* nil))
+          ;; some tests in the Perl suite explicitly check for this
+          (*allow-quoting* (if file-name-provided-p *allow-quoting* t)))
+      (do-tests ((format nil "Running tests in file ~S" (file-namestring file-name))
+                 (not verbose))
+        (let ((input-line (or (read stream nil) (done)))
+              errors)
+          (destructuring-bind (counter
+                               info-string%
+                               regex%
+                               case-insensitive-mode
+                               multi-line-mode
+                               single-line-mode
+                               extended-mode
+                               target%
+                               perl-error
+                               expected-result%
+                               expected-registers)
+              input-line
+            (destructuring-bind (info-string regex target expected-result)
+                (mapcar 'create-string-from-input
+                        (list info-string% regex% target% expected-result%))
+              (setq expected-registers (mapcar 'create-string-from-input expected-registers))
+              (unless (find counter *tests-to-skip* :test #'=)
+                (when verbose
+                  (format t "~&~4D: ~S" counter info-string))                  
+                (let ((scanner
+                       (handler-bind ((error (lambda (condition)
+                                               (declare (ignore condition))
+                                               (when perl-error
+                                                 ;; we expected an
+                                                 ;; error, so we can
+                                                 ;; signal success
+                                                 (return-from test-block)))))
+                         (create-scanner regex
+                                         :case-insensitive-mode case-insensitive-mode
+                                         :multi-line-mode multi-line-mode
+                                         :single-line-mode single-line-mode
+                                         :extended-mode extended-mode))))
+                  (block test-block
+                    (multiple-value-bind (start end reg-starts reg-ends)                        
+                        (scan scanner target)
+                      (cond (perl-error
+                             (push (format nil "expected an error but got a result.")
+                                   errors))
+                            (t
+                             (when (not (eq start expected-result))
+                               (if start
+                                 (let ((result (subseq target start end)))
+                                   (unless (string= result expected-result)
+                                     (push (format nil "expected ~S but got ~S."
+                                                   expected-result result)
+                                           errors))
+                                   (setq reg-starts (coerce reg-starts 'list)
+                                         reg-ends (coerce reg-ends 'list))
+                                   (loop for i from 0
+                                         for expected-register in expected-registers
+                                         for reg-start = (nth i reg-starts)
+                                         for reg-end = (nth i reg-ends)
+                                         for register = (if (and reg-start reg-end)
+                                                          (subseq target reg-start reg-end)
+                                                          nil)
+                                         unless (string= expected-register register)
+                                         do (push (format nil "\\~A: expected ~S but got ~S."
+                                                          (1+ i) expected-register register)
+                                                  errors)))
+                                 (push (format nil "expected ~S but got ~S."
+                                               expected-result start)
+                                       errors))))))
+                    errors))))))))))
--- a/test/perltest.pl
+++ b/test/perltest.pl
@ -1,4 +1,5 @@
 #!/usr/bin/perl
+# $Header: /usr/local/cvsrep/cl-ppcre/test/perltest.pl,v 1.1 2008/07/06 21:24:39 edi Exp $

 # This is a heavily modified version of the file 'perltest' which
 # comes with the PCRE library package, which is open source software,
@ -8,8 +9,6 @@
 # The PCRE library package is available from
 # <ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre/>

-use Time::HiRes qw(time);
-
 sub string_for_lisp {
  my(@a, $t, $in_string, $switch);

@ -48,8 +47,6 @@ sub string_for_lisp {
  }
 }

-$min_time = shift;
-
 NEXT_RE: while (1) {
  last
    if !($_ = <>);
@ -132,8 +129,6 @@ if (\$x =~ ${pattern}) {
 };
 END

-    $times = 1;
-    $used = 0;
    $counter++;
    print STDERR "$counter\n";

@ -141,18 +136,9 @@ END
      $error = 't';
    } else {
      $error = 'nil';
-      if ($min_time) {
-        $times = 10;
-        while (1) {
-          $used = &$test($times);
-          last
-            if $used > $min_time;
-          $times *= 10;
-        }
-      }
    }

-    print "($counter $info_string \"$pattern_for_lisp\" $case_insensitive_mode $multi_line_mode $single_line_mode $extended_mode " . string_for_lisp($x) . " $error $times $used ";
+    print "($counter $info_string \"$pattern_for_lisp\" $case_insensitive_mode $multi_line_mode $single_line_mode $extended_mode " . string_for_lisp($x) . " $error ";
    if (!@subs) {
      print 'nil nil';
    } else {
--- a/test/perltestdata
+++ b/test/perltestdata
--- a/test/perltestinput
+++ b/test/perltestinput
--- a/test/simple
+++ b/test/simple
@ -0,0 +1,349 @@
+;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: CL-PPCRE-TEST; Base: 10 -*-
+;;; $Header: /usr/local/cvsrep/cl-ppcre/test/simple,v 1.9 2008/07/23 00:48:00 edi Exp $
+
+;;; some simple tests for CL-PPCRE - entered manually and to be read
+;;; in the CL-PPCRE-TEST package; all forms are expected to return a
+;;; true value on success when EVALuated
+
+(equalp (multiple-value-list (scan "(a)*b" "xaaabd"))
+        (list 1 5 #(3) #(4)))
+
+(equalp (multiple-value-list (scan "(a)*b" "xaaabd" :start 1))
+        (list 1 5 #(3) #(4)))
+
+(equalp (multiple-value-list (scan "(a)*b" "xaaabd" :start 2))
+        (list 2 5 #(3) #(4)))
+
+(null (scan "(a)*b" "xaaabd" :end 4))
+
+(equalp (multiple-value-list (scan '(:greedy-repetition 0 nil #\b) "bbbc"))
+        (list 0 3 #() #()))
+
+(null (scan '(:greedy-repetition 4 6 #\b) "bbbc"))
+
+(let ((s (create-scanner "(([a-c])+)x")))
+  (equalp (multiple-value-list (scan s "abcxy"))
+          (list 0 4 #(0 2) #(3 3))))
+
+(equalp (multiple-value-list (scan-to-strings "[^b]*b" "aaabd"))
+        (list "aaab" #()))
+
+(equalp (multiple-value-list (scan-to-strings "([^b])*b" "aaabd"))
+        (list "aaab" #("a")))
+
+(equalp (multiple-value-list (scan-to-strings "(([^b])*)b" "aaabd"))
+        (list "aaab" #("aaa" "a")))
+
+(equalp (register-groups-bind (first second third fourth)
+            ("((a)|(b)|(c))+" "abababc" :sharedp t)
+          (list first second third fourth))
+        (list "c" "a" "b" "c"))
+
+(equalp (register-groups-bind (nil second third fourth)
+            ("((a)|(b)|(c))()+" "abababc" :start 6)
+          (list second third fourth))
+        (list nil nil "c"))
+
+(null (register-groups-bind (first)
+          ("(a|b)+" "accc" :start 1)
+        first))
+
+(equalp (register-groups-bind (fname lname (#'parse-integer date month year))
+            ("(\\w+)\\s+(\\w+)\\s+(\\d{1,2})\\.(\\d{1,2})\\.(\\d{4})" "Frank Zappa 21.12.1940")
+          (list fname lname (encode-universal-time 0 0 0 date month year 0)))
+        (list "Frank" "Zappa" 1292889600))
+
+(flet ((foo (regex target-string &key (start 0) (end (length target-string)))
+         (let ((sum 0))
+           (do-matches (s e regex target-string nil :start start :end end)
+             (incf sum (- e s)))
+           (/ sum (- end start)))))
+  (and (= 1/3 (foo "a" "abcabcabc"))
+       (= 5/9 (foo "aa|b" "aacabcbbc"))))
+
+(labels ((crossfoot (target-string &key (start 0) (end (length target-string)))
+           (let ((sum 0))
+             (do-matches-as-strings (m :digit-class
+                                       target-string nil
+                                       :start start :end end)
+               (incf sum (parse-integer m)))
+             (if (< sum 10)
+               sum
+               (crossfoot (format nil "~A" sum))))))
+  (and (zerop (crossfoot "bar"))
+       (= 3 (crossfoot "a3x"))
+       (= 6 (crossfoot "12345"))))
+
+(let (result)
+  (do-register-groups (first second third fourth)
+      ("((a)|(b)|(c))" "abababc" nil :start 2 :sharedp t)
+    (push (list first second third fourth) result))
+  (equal (nreverse result)
+         '(("a" "a" nil nil) 
+           ("b" nil "b" nil) 
+           ("a" "a" nil nil) 
+           ("b" nil "b" nil) 
+           ("c" nil nil "c"))))
+
+(let (result)
+  (do-register-groups ((#'parse-integer n) (#'intern sign) whitespace)
+      ("(\\d+)|(\\+|-|\\*|/)|(\\s+)" "12*15 - 42/3")
+    (unless whitespace
+      (push (or n sign) result)))
+  (equal (nreverse result)
+         '(12 * 15 - 42 / 3)))
+
+(equal (all-matches "a" "foo bar baz")
+       (list 5 6 9 10))
+
+(equal (all-matches "\\w*" "foo bar baz")
+       (list 0 3 3 3 4 7 7 7 8 11 11 11))
+
+(equal (all-matches-as-strings "a" "foo bar baz")
+       (list "a" "a"))
+
+(equal (all-matches-as-strings "\\w*" "foo bar baz")
+       (list "foo" "" "bar" "" "baz" ""))
+
+(equal (split "\\s+" "foo   bar baz
+frob")
+       '("foo" "bar" "baz" "frob"))
+
+(equal (split "\\s*" "foo bar   baz")
+       '("f" "o" "o" "b" "a" "r" "b" "a" "z"))
+
+(equal (split "(\\s+)" "foo bar   baz")
+       '("foo" "bar" "baz"))
+
+(equal (split "(\\s+)" "foo bar   baz" :with-registers-p t)
+       '("foo" " " "bar" "   " "baz"))
+
+(equal (split "(\\s)(\\s*)" "foo bar   baz" :with-registers-p t)
+       '("foo" " " "" "bar" " " "  " "baz"))
+
+(equal (split "(,)|(;)" "foo,bar;baz" :with-registers-p t)
+       '("foo" "," nil "bar" nil ";" "baz"))
+
+(equal (split "(,)|(;)" "foo,bar;baz" :with-registers-p t :omit-unmatched-p t)
+       '("foo" "," "bar" ";" "baz"))
+
+(equal (split ":" "a:b:c:d:e:f:g::")
+       '("a" "b" "c" "d" "e" "f" "g"))
+
+(equal (split ":" "a:b:c:d:e:f:g::" :limit 1)
+       '("a:b:c:d:e:f:g::"))
+
+(equal (split ":" "a:b:c:d:e:f:g::" :limit 2)
+       '("a" "b:c:d:e:f:g::"))
+
+(equal (split ":" "a:b:c:d:e:f:g::" :limit 3)
+       '("a" "b" "c:d:e:f:g::"))
+
+(equal (split ":" "a:b:c:d:e:f:g::" :limit 1000)
+       '("a" "b" "c" "d" "e" "f" "g" "" ""))
+
+(equal (multiple-value-list (regex-replace "fo+" "foo bar" "frob"))
+       (list "frob bar" t))
+
+(equal (multiple-value-list (regex-replace "fo+" "FOO bar" "frob"))
+       (list "FOO bar" nil))
+
+(equal (multiple-value-list (regex-replace "(?i)fo+" "FOO bar" "frob"))
+       (list "frob bar" t))
+
+(equal (multiple-value-list (regex-replace "(?i)fo+" "FOO bar" "frob" :preserve-case t))
+       (list "FROB bar" t))
+
+(equal (multiple-value-list (regex-replace "(?i)fo+" "Foo bar" "frob" :preserve-case t))
+       (list "Frob bar" t))
+
+(equal (multiple-value-list (regex-replace "bar" "foo bar baz" "[frob (was '\\&' between '\\`' and '\\'')]"))
+       (list "foo [frob (was 'bar' between 'foo ' and ' baz')] baz" t))
+
+(equal (multiple-value-list
+        (regex-replace "bar" "foo bar baz"
+                       '("[frob (was '" :match "' between '" :before-match "' and '" :after-match "')]")))
+       (list "foo [frob (was 'bar' between 'foo ' and ' baz')] baz" t))
+
+(equal (multiple-value-list (regex-replace "(be)(nev)(o)(lent)"
+                                           "benevolent: adj. generous, kind"
+                                           (lambda (match &rest registers)
+                                             (format nil "~A [~{~A~^.~}]" match registers))
+                                           :simple-calls t))
+       (list "benevolent [be.nev.o.lent]: adj. generous, kind" t))
+
+(equal (multiple-value-list (regex-replace-all "(?i)fo+" "foo Fooo FOOOO bar" "frob" :preserve-case t))
+       (list "frob Frob FROB bar" t))
+
+(string= (regex-replace-all "(?i)f(o+)" "foo Fooo FOOOO bar" "fr\\1b" :preserve-case t)
+         "froob Frooob FROOOOB bar")
+
+(let ((qp-regex (create-scanner "[\\x80-\\xff]")))
+  (flet ((encode-quoted-printable (string)
+           "Converts 8-bit string to quoted-printable representation."
+           ;; won't work for Corman Lisp because non-ASCII characters aren't 8-bit there
+           (flet ((convert (target-string start end match-start match-end reg-starts reg-ends)
+                    (declare (ignore start end match-end reg-starts reg-ends))
+                    (format nil "=~2,'0x" (char-code (char target-string match-start)))))
+             (regex-replace-all qp-regex string #'convert))))
+    (string= (encode-quoted-printable "F<>te S<>rensen na<6E>ve H<>hner Stra<72>e")
+             "F=EAte S=F8rensen na=EFve H=FChner Stra=DFe")))
+
+(let ((url-regex (create-scanner "[^a-zA-Z0-9_\\-.]")))
+  (flet ((url-encode (string)
+           "URL-encodes a string."
+           ;; won't work for Corman Lisp because non-ASCII characters aren't 8-bit there
+           (flet ((convert (target-string start end match-start match-end reg-starts reg-ends)
+                    (declare (ignore start end match-end reg-starts reg-ends))
+                    (format nil "%~2,'0x" (char-code (char target-string match-start)))))
+             (regex-replace-all url-regex string #'convert))))
+    (string= (url-encode "F<>te S<>rensen na<6E>ve H<>hner Stra<72>e")
+             "F%EAte%20S%F8rensen%20na%EFve%20H%FChner%20Stra%DFe")))
+
+(flet ((how-many (target-string start end match-start match-end reg-starts reg-ends)
+         (declare (ignore target-string start end match-start match-end))
+         (format nil "~A" (- (svref reg-ends 0)
+                             (svref reg-starts 0)))))
+  (string= (regex-replace-all "{(.+?)}"
+                              "foo{...}bar{.....}{..}baz{....}frob"
+                              (list "[" #'how-many " dots]"))
+           "foo[3 dots]bar[5 dots][2 dots]baz[4 dots]frob"))
+
+(let ((qp-regex (create-scanner "[\\x80-\\xff]")))
+  (flet ((encode-quoted-printable (string)
+           "Converts 8-bit string to quoted-printable representation.
+Version using SIMPLE-CALLS keyword argument."
+           ;; ;; won't work for Corman Lisp because non-ASCII characters aren't 8-bit there
+           (flet ((convert (match)
+                    (format nil "=~2,'0x" (char-code (char match 0)))))
+             (regex-replace-all qp-regex string #'convert
+                                :simple-calls t))))
+    (string= (encode-quoted-printable "F<>te S<>rensen na<6E>ve H<>hner Stra<72>e")
+             "F=EAte S=F8rensen na=EFve H=FChner Stra=DFe")))
+
+(flet ((how-many (match first-register)
+         (declare (ignore match))
+         (format nil "~A" (length first-register))))
+  (string= (regex-replace-all "{(.+?)}"
+                              "foo{...}bar{.....}{..}baz{....}frob"
+                              (list "[" #'how-many " dots]")
+                              :simple-calls t)
+           "foo[3 dots]bar[5 dots][2 dots]baz[4 dots]frob"))
+
+(flet ((my-repetition (char min)
+         `(:greedy-repetition ,min nil ,char)))
+  (setf (parse-tree-synonym 'a*) (my-repetition #\a 0)
+        (parse-tree-synonym 'b+) (my-repetition #\b 1))
+  (unwind-protect
+      (let ((scanner (create-scanner '(:sequence a* b+))))
+        (equal (mapcar (lambda (target)
+                         (scan scanner target))
+                       '("ab" "b" "aab" "a" "x"))
+               (list 0 0 0 nil nil)))
+    (setf (parse-tree-synonym 'a*) nil
+          (parse-tree-synonym 'b+) nil)))
+
+(null (scan "^a+$" "a+"))
+
+(let ((*allow-quoting* t))
+  ;;we use CREATE-SCANNER because of Lisps like SBCL that don't have an interpreter
+  (equalp (multiple-value-list (scan (create-scanner "^\\Qa+\\E$") "a+"))
+          (list 0 2 #() #())))
+
+(string= (parse-string "\\k<reg>") "k<reg>")
+
+(let ((*allow-named-registers* t))
+  (equal (nth-value 1 (create-scanner "((?<small>[a-z]*)(?<big>[A-Z]*))"))
+         (list nil "small" "big")))
+
+(let ((*allow-named-registers* t))
+  (equal (nth-value 1 (create-scanner '(:register
+                                        (:sequence
+                                         (:named-register "small"
+                                          (:greedy-repetition 0 nil (:char-class (:range #\a #\z))))
+                                         (:named-register "big"
+                                          (:greedy-repetition 0 nil (:char-class (:range #\a #\z))))))))
+         (list nil "small" "big")))
+
+(let ((*allow-named-registers* t))
+  (equalp (multiple-value-list (scan (create-scanner "((?<small>[a-z]*)(?<big>[A-Z]*))") "aaaBBB"))
+          (list 0 6 #(0 0 3) #(6 3 6))))
+
+(let ((*allow-named-registers* t))
+  ;; multiple-choice back-reference
+  (equalp (multiple-value-list (scan (create-scanner "^(?<reg>[ab])(?<reg>[12])\\k<reg>\\k<reg>$") "a1aa"))
+          (list 0 4 #(0 1) #(1 2))))
+
+(let ((*allow-named-registers* t))
+  (equalp (multiple-value-list (scan (create-scanner "^(?<reg>[ab])(?<reg>[12])\\k<reg>\\k<reg>$") "a22a"))
+          (list 0 4 #(0 1) #(1 2))))
+
+(let ((*allow-named-registers* t))
+  ;; demonstrating most-recently-seen-register-first property of back-reference;
+  ;; "greedy" regex (analogous to "aa?")
+  (equalp (multiple-value-list (scan (create-scanner "^(?<reg>)(?<reg>a)(\\k<reg>)") "a"))
+          (list 0 1 #(0 0 1) #(0 1 1))))
+
+(let ((*allow-named-registers* t))
+  (equalp (multiple-value-list (scan (create-scanner "^(?<reg>)(?<reg>a)(\\k<reg>)") "aa"))
+          (list 0 2 #(0 0 1) #(0 1 2))))
+
+(let ((*allow-named-registers* t))
+  ;; switched groups
+  ;; "lazy" regex (analogous to "aa??")
+  (equalp (multiple-value-list (scan (create-scanner "^(?<reg>a)(?<reg>)(\\k<reg>)") "a"))
+          (list 0 1 #(0 1 1) #(1 1 1))))
+
+(let ((*allow-named-registers* t))
+  ;; scanner ignores the second "a"
+  (equalp (multiple-value-list (scan (create-scanner "^(?<reg>a)(?<reg>)(\\k<reg>)") "aa"))
+          (list 0 1 #(0 1 1) #(1 1 1))))
+
+(let ((*allow-named-registers* t))
+  ;; "aa" will be matched only when forced by adding "$" at the end
+  (equalp (multiple-value-list (scan (create-scanner "^(?<reg>a)(?<reg>)(\\k<reg>)$") "aa"))
+          (list 0 2 #(0 1 1) #(1 1 2))))
+
+(string= (quote-meta-chars "[a-z]*") "\\[a\\-z\\]\\*")
+
+(string= (handler-case
+             (create-scanner "foo**x")
+           (ppcre-syntax-error (condition)
+             (format nil "Houston, we've got a problem with the string ~S:  Looks like something went wrong at position ~A.  The last message we received was \"~?\"."
+                     (ppcre-syntax-error-string condition)
+                     (ppcre-syntax-error-pos condition)
+                     (simple-condition-format-control condition)
+                     (simple-condition-format-arguments condition))))
+         "Houston, we've got a problem with the string \"foo**x\":  Looks like something went wrong at position 4.  The last message we received was \"Quantifier '*' not allowed.\".")
+
+(flet ((my-weird-filter (pos)
+         "Only match at this point if either pos is odd and the
+character we're looking at is lowercase or if pos is even and the next
+two characters we're looking at are uppercase. Consume these
+characters if there's a match."
+         (cond ((and (oddp pos)
+                     (< pos cl-ppcre::*end-pos*)
+                     (lower-case-p (char cl-ppcre::*string* pos)))
+                (1+ pos))
+               ((and (evenp pos)
+                     (< (1+ pos) cl-ppcre::*end-pos*)
+                     (upper-case-p (char cl-ppcre::*string* pos))
+                     (upper-case-p (char cl-ppcre::*string* (1+ pos))))
+                (+ pos 2))
+               (t nil))))
+  (let ((weird-regex `(:sequence "+" (:filter ,#'my-weird-filter) "+")))
+    (equalp (multiple-value-list (scan weird-regex "+A++a+AA+"))
+            (list 5 9 #() #()))))
+
+(let ((a "\\E*"))
+  (equalp (multiple-value-list (scan (concatenate 'string "(?:" (quote-meta-chars a) "){2}") "\\E*\\E*"))
+          (list 0 6 #() #())))
+
+(let ((a "\\E*"))
+  (equalp (multiple-value-list (scan `(:greedy-repetition 2 2 ,a) "\\E*\\E*"))
+          (list 0 6 #() #())))
+
+(loop for *optimize-char-classes* in '(:hash-table :hash-table* :charset :charset* :charmap)
+      for s = (create-scanner "(([a-c])+)x")
+      always (equalp (multiple-value-list (scan s "abcxy"))
+                     (list 0 4 #(0 2) #(3 3))))
--- a/test/tests.lisp
+++ b/test/tests.lisp
@ -0,0 +1,159 @@
+;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: CL-PPCRE-TEST; Base: 10 -*-
+;;; $Header: /usr/local/cvsrep/cl-ppcre/test/tests.lisp,v 1.12 2008/07/23 00:48:00 edi Exp $
+
+;;; The tests in this file test CL-PPCRE against testdata generated by
+;;; the Perl program `perltest.pl' from the input file `testinput' in
+;;; order to check compatibility with Perl and correctness of the
+;;; regex engine.
+
+;;; Copyright (c) 2002-2008, Dr. Edmund Weitz. All rights reserved.
+
+;;; Redistribution and use in source and binary forms, with or without
+;;; modification, are permitted provided that the following conditions
+;;; are met:
+
+;;;   * Redistributions of source code must retain the above copyright
+;;;     notice, this list of conditions and the following disclaimer.
+
+;;;   * Redistributions in binary form must reproduce the above
+;;;     copyright notice, this list of conditions and the following
+;;;     disclaimer in the documentation and/or other materials
+;;;     provided with the distribution.
+
+;;; THIS SOFTWARE IS PROVIDED BY THE AUTHOR 'AS IS' AND ANY EXPRESSED
+;;; OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+;;; WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+;;; ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+;;; DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+;;; DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
+;;; GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+;;; INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+;;; WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+;;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+;;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+(in-package :cl-ppcre-test)
+
+(defvar *this-file* (load-time-value
+                     (or #.*compile-file-pathname* *load-pathname*))
+  "The location of this source file.")
+
+(defmacro do-tests ((name &optional show-progress-p) &body body)
+  "Helper macro which repeatedly executes BODY until the code in body
+calls the function DONE.  It is assumed that each invocation of BODY
+will be the execution of one test which returns NIL in case of success
+and list of string describing errors otherwise.
+
+The macro prints a simple progress indicator \(one dots for ten tests)
+to *STANDARD-OUTPUT* unless SHOW-PROGRESS-P is NIL and returns a true
+value iff all tests succeeded.  Errors in BODY are caught and reported
+\(and counted as failures)."
+  `(let ((successp t)
+         (testcount 1))
+     (block test-block
+       (flet ((done ()
+                (return-from test-block successp)))
+         (format t "~&Test: ~A~%" ,name)
+         (loop
+          (when (and ,show-progress-p (zerop (mod testcount 10)))
+            (format t ".")
+            (when (zerop (mod testcount 100))
+              (terpri))
+            (force-output))
+          (let ((errors
+                 (handler-case
+                     (progn ,@body)
+                   (error (msg)
+                     (list (format nil "~&got an unexpected error: ~A" msg))))))
+            (setq successp (and successp (null errors)))
+            (when errors
+              (format t "~&~4@A:~{~&   ~A~}~%" testcount errors))
+            (incf testcount)))))
+     successp))
+
+(defun simple-tests (&key (file-name
+                           (make-pathname :name "simple"
+                                          :type nil :version nil
+                                          :defaults *this-file*))
+                          (external-format '(:latin-1 :eol-style :lf))
+                          verbose)
+  "Loops through all the forms in the file FILE-NAME and executes each
+of them using EVAL.  It is assumed that each FORM specifies a test
+which returns a true value iff it succeeds.  Prints each test form to
+*STANDARD-OUTPUT* if VERBOSE is true and shows a simple progress
+indicator otherwise.  EXTERNAL-FORMAT is the FLEXI-STREAMS external
+format which is used to read the file.  Returns a true value iff all
+tests succeeded."
+  (with-open-file (binary-stream file-name :element-type 'flex:octet)
+    (let ((stream (flex:make-flexi-stream binary-stream :external-format external-format))
+          (*package* (find-package :cl-ppcre-test)))
+      (do-tests ((format nil "Simple tests from file ~S" (file-namestring file-name))
+                 (not verbose))
+        (let ((form (or (read stream nil) (done))))
+          (when verbose
+            (format t "~&~S" form))
+          (cond ((eval form) nil)
+                (t (list (format nil "~S returned NIL" form)))))))))
+
+(defun random-test-function (probability)
+  "Returns a random character test function which contains each
+character with probability PROBABILITY."
+  (let ((hash-table (make-hash-table)))
+    (dotimes (code char-code-limit)
+      (let ((char (code-char code)))
+        (when (and char (< (random 1.0d0) probability))
+          (setf (gethash (code-char code) hash-table) t))))
+    (lambda (char)
+      (gethash char hash-table))))
+
+(defun test-optimized-test-functions% (probability)
+  "Creates a random test function with probability PROBABILITY and six
+\(one for each possible \"kind\") corresponding optimized test
+functions, then checks for each character in turn that all functions
+agree on it."
+  (let* ((test-function (random-test-function probability))
+         (optimized-functions (loop for kind in '(nil
+                                                  :hash-table
+                                                  :hash-table*
+                                                  :charset
+                                                  :charset*
+                                                  :charmap)
+                                    collect (create-optimized-test-function test-function :kind kind))))
+    (loop for code below char-code-limit
+          for char = (code-char code)
+          for expected-result = (and char (funcall test-function char))
+          always (or (null char)
+                     (loop for optimized-function in optimized-functions
+                           always (eq (not (funcall optimized-function char))
+                                      (not expected-result)))))))
+
+(defun test-optimized-test-functions (&key verbose)
+  "Runs TEST-OPTIMIZED-TEST-FUNCTIONS% with different probabilities."
+  (let ((probabilities '(0 .001 .01 .1 1)))
+    (do-tests ("Optimized test functions - this might take some time..." (not verbose))
+      (let ((probability (or (pop probabilities) (done))))
+        (when verbose
+          (format t "~&Probability is ~A" probability))
+        (not (test-optimized-test-functions% probability))))))
+
+(defun run-all-tests (&key more-tests verbose)
+  "Runs all tests for CL-PPCRE and returns a true value iff all tests
+succeeded.  VERBOSE is interpreted by the individual test suites.
+MORE-TESTS can be a list of function designators designating
+additional tests to run.  This facility is used by the tests for
+CL-PPCRE-UNICODE."
+  (let ((successp t))
+    (macrolet ((run-test-suite (&body body)
+                 `(unless (progn ,@body)
+                    (setq successp nil))))
+      ;; run the automatically generated Perl tests
+      (run-test-suite (perl-test :verbose verbose))
+      (run-test-suite (test-optimized-test-functions :verbose verbose))
+      (run-test-suite (simple-tests :verbose verbose))
+      (when more-tests
+        (unless (listp more-tests)
+          (setq more-tests (list more-tests))
+          (dolist (test more-tests)
+            (run-test-suite (funcall test :verbose verbose))))))
+    (format t "~2&~:[Some tests failed~;All tests passed~]." successp)
+    successp))
--- a/test/unicode-tests.lisp
+++ b/test/unicode-tests.lisp
@ -0,0 +1,80 @@
+;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: CL-PPCRE-TEST; Base: 10 -*-
+;;; $Header: /usr/local/cvsrep/cl-ppcre/test/unicode-tests.lisp,v 1.8 2008/07/23 00:17:53 edi Exp $
+
+;;; Copyright (c) 2008, Dr. Edmund Weitz. All rights reserved.
+
+;;; Redistribution and use in source and binary forms, with or without
+;;; modification, are permitted provided that the following conditions
+;;; are met:
+
+;;;   * Redistributions of source code must retain the above copyright
+;;;     notice, this list of conditions and the following disclaimer.
+
+;;;   * Redistributions in binary form must reproduce the above
+;;;     copyright notice, this list of conditions and the following
+;;;     disclaimer in the documentation and/or other materials
+;;;     provided with the distribution.
+
+;;; THIS SOFTWARE IS PROVIDED BY THE AUTHOR 'AS IS' AND ANY EXPRESSED
+;;; OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+;;; WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+;;; ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+;;; DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+;;; DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
+;;; GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+;;; INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+;;; WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+;;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+;;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+(in-package :cl-ppcre-test)
+
+(defun unicode-test (&key (file-name 
+                           (make-pathname :name "unicodetestdata"
+                                          :type nil :version nil
+                                          :defaults *this-file*)
+                           file-name-provided-p)
+                          verbose)
+  "Loops through all test cases in FILE-NAME and prints a report if
+VERBOSE is true.  Returns a true value if all tests succeeded.
+
+For the syntax of the tests in FILE-NAME refer to CL-UNICODE."
+  (with-open-file (stream file-name)
+    (let ((*regex-char-code-limit* (if file-name-provided-p *regex-char-code-limit* char-code-limit))
+          (*optimize-char-classes* (if file-name-provided-p *optimize-char-classes* nil))
+          ;; we only check for correctness and don't care about speed
+          ;; that match (but rather about space constraints of the
+          ;; trial versions)
+          (*use-bmh-matchers* (if file-name-provided-p *use-bmh-matchers* nil)))
+      (do-tests ((format nil "Running Unicode tests in file ~S" (file-namestring file-name))
+                 (not verbose))
+        (let ((input-line (or (read stream nil) (done)))
+              errors)
+          (destructuring-bind (char-code property-name expected-result)
+              input-line
+            (let ((char (and (< char-code char-code-limit) (code-char char-code))))
+              (when char
+                (when verbose
+                  (format t "~&~A: #x~X" property-name char-code))
+                (let* ((string (string char))
+                       (result-1 (scan (format nil "\\p{~A}" property-name) string))
+                       (result-2 (scan (format nil "[\\p{~A}]" property-name) string))
+                       (inverted-result-1 (scan (format nil "\\P{~A}" property-name) string))
+                       (inverted-result-2 (scan (format nil "[\\P{~A}]" property-name) string)))
+                  (unless (eq expected-result (not (not result-1)))
+                    (push (format nil "\(code-char #x~X) should ~:[not ~;~]have matched \"\\p{~A}\""
+                                  char-code expected-result property-name)
+                          errors))
+                  (unless (eq expected-result (not (not result-2)))
+                    (push (format nil "\(code-char #x~X) should ~:[not ~;~]have matched \"[\\p{~A}]\""
+                                  char-code expected-result property-name)
+                          errors))
+                  (unless (eq expected-result (not inverted-result-1))
+                    (push (format nil "\(code-char #x~X) should ~:[~;not ~]have matched \"\\P{~A}\""
+                                  char-code expected-result property-name)
+                          errors))
+                  (unless (eq expected-result (not inverted-result-2))
+                    (push (format nil "\(code-char #x~X) should ~:[~;not ~]have matched \"[\\P{~A}]\""
+                                  char-code expected-result property-name)
+                          errors)))
+                errors))))))))
--- a/test/unicodetestdata
+++ b/test/unicodetestdata
@ -0,0 +1,107 @@
+;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: CL-USER; Base: 10 -*-
+;;; $Header: /usr/local/cvsrep/cl-ppcre/test/unicodetestdata,v 1.10 2008/07/22 14:00:35 edi Exp $
+
+;;; some arbitrary test data for Unicode properties - stolen from CL-UNICODE
+
+(#x0001 "ASCII" t)
+(#x0100 "ASCII" nil)
+(#x000A "Alphabetic" nil)
+(#x0061 "Alphabetic" t)
+(#x0061 "Ll" t)
+(#x0041 "Alphabetic" t)
+(#x0041 "alphabetic" t)
+(#x0041 "IsAlphabetic" t)
+(#x02E4 "Alphabetic" t)
+(#x0970 "Alphabetic" nil)
+(#x030D "BidiClass:NonspacingMark" t)
+(#x030D "NonspacingMark" t)
+(#x030D "nonspacing mark" t)
+(#xE0146 "BidiClass:NonspacingMark" t)
+(#x000D "BidiClass:WhiteSpace" nil)
+(#x0020 "BidiClass:WhiteSpace" t)
+(#x2006 "BidiClass:WhiteSpace" t)
+(#x12470 "Cuneiform" t)
+(#x12470 "IsCuneiform" t)
+(#x12470 "CuneiformNumbersAndPunctuation" t)
+(#x12470 "Block:CuneiformNumbersAndPunctuation" t)
+(#x12470 "InCuneiformNumbersAndPunctuation" t)
+(#x12470 "Script:Cuneiform" t)
+(#x0041 "Block:Hebrew" nil)
+(#x0593 "Block:Hebrew" t)
+(#x0593 "InHebrew" t)
+(#x040D "Block:Cyrillic" t)
+(#x040D "InCyrillic" t)
+(#x0042 "Block:Cyrillic" nil)
+(#x2011 "Dash" t)
+(#x2011 "IsDash" t)
+(#xFF0D "Dash" t)
+(#x003D "Dash" nil)
+(#x00F0 "Lowercase" t)
+(#x00F0 "IsLowercase" t)
+(#x00F0 "lowercase" t)
+(#x00F0 "Ll" t)
+(#x0067 "Lowercase" t)
+(#x010A "Lowercase" nil)
+(#x1D6C1 "Lowercase" nil)
+(#x0023 "CurrencySymbol" nil)
+(#x0024 "CurrencySymbol" t)
+(#x0024 "IsCurrencySymbol" t)
+(#x0024 "currency symbol" t)
+(#x20AC "CurrencySymbol" t)
+(#xFFE6 "CurrencySymbol" t)
+(#x002B "Sm" t)
+(#x002B "Math" t)
+(#x002B "IsMath" t)
+(#x002B "math" t)
+(#x211C "Math" t)
+(#x1D7D2 "Math" t)
+(#x002A "Math" nil)
+(#x25C9 "Math" nil)
+(#x0000 "NonCharacterCodePoint" nil)
+(#xFDD0 "NonCharacterCodePoint" t)
+(#xFDD0 "Non-Character-Code-Point" t)
+(#xFDD0 "non-character-code-point" t)
+(#xFFFFF "NonCharacterCodePoint" t)
+(#x0043 "Arabic" nil)
+(#x0606 "Arabic" t)
+(#x0606 "arabic" t)
+(#x0606 "IsArabic" t)
+(#x0606 "Script:Arabic" t)
+(#x0044 "IsVariationSelector" nil)
+(#x0044 "VariationSelector" nil)
+(#x180B "VariationSelector" t)
+(#x180B "Variation_Selector" t)
+(#x180B "Variation-Selector" t)
+(#x180B "variationselector" t)
+(#x180B "variation selector" t)
+(#x180B "IsVariationSelector" t)
+(#x00B5 "XIDContinue" t)
+(#x00B5 "IsXIDContinue" t)
+(#x00B5 "IsXID_Continue" t)
+(#x00B5 "Is_XID_Continue" t)
+(#x00B5 "XID_Continue" t)
+(#x33FF "Unified_Ideograph" nil)
+(#x33FF "Ideographic" nil)
+(#x3400 "Unified_Ideograph" t)
+(#x3400 "Ideographic" t)
+(#x3400 "Han" t)
+(#x3400 "OtherLetter" t)
+(#x3400 "Alphabetic" t)
+(#x3400 "Common" nil)
+(#x3400 "Assigned" t)
+(#x3400 "Any" t)
+(#x0378 "Cn" t)
+(#x0378 "Unassigned" t)
+(#x0377 "Cn" nil)
+(#x0377 "Unassigned" nil)
+(#x2800 "Braille" t)
+(#x2800 "Script:Braille" t)
+(#x2800 "OtherSymbol" t)
+(#x0027 "QuotationMark" t)
+(#x201C "QuotationMark" t)
+(#x201C "OtherNeutral" t)
+(#x201C "PatternSyntax" t)
+(#x0028 "Bidi_Mirrored" t)
+(#x0028 "BidiMirrored" t)
+(#x0028 "IsBidiMirrored" t)
+(#x0027 "Bidi_Mirrored" nil)
--- a/util.lisp
+++ b/util.lisp
@ -1,5 +1,5 @@
 ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: CL-PPCRE; Base: 10 -*-
-;;; $Header: /usr/local/cvsrep/cl-ppcre/util.lisp,v 1.40 2008/07/03 10:06:16 edi Exp $
+;;; $Header: /usr/local/cvsrep/cl-ppcre/util.lisp,v 1.46 2008/07/06 18:12:05 edi Exp $

 ;;; Utility functions and constants dealing with the character sets we
 ;;; use to encode character classes
@ -30,7 +30,12 @@
 ;;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 ;;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

-(in-package #:cl-ppcre)
+(in-package :cl-ppcre)
+
+(defmacro defconstant (name value &optional doc)
+  "Make sure VALUE is evaluated only once \(to appease SBCL)."
+  `(cl:defconstant ,name (if (boundp ',name) (symbol-value ',name) ,value)
+     ,@(when doc (list doc))))

 #+:lispworks
 (eval-when (:compile-toplevel :load-toplevel :execute)
@ -100,120 +105,36 @@ are discarded \(that is, the body is an implicit PROGN)."
                            `(let (,,@temps)
                              ,,@body))))))

-(eval-when (:compile-toplevel :execute :load-toplevel)
-  (defun make-char-set (test)
-    (declare #.*special-optimize-settings*)
-    "Returns a CHARSET for all characters satisfying test."
-    (loop with set = (make-charset)
-          for code of-type fixnum from 0 below char-code-limit
-          for char = (code-char code)
-          if (and char (funcall test char))
-          do (add-to-charset char set)
-          finally (return set)))
+(declaim (inline digit-char-p))  
+(defun digit-char-p (chr)
+  (declare #.*standard-optimize-settings*)
+  "Tests whether a character is a decimal digit, i.e. the same as
+Perl's [\\d].  Note that this function shadows the standard Common
+Lisp function CL:DIGIT-CHAR-P."
+  (char<= #\0 chr #\9))

 (declaim (inline word-char-p))  
-  
 (defun word-char-p (chr)
  (declare #.*standard-optimize-settings*)
-    "Tests whether a character is a \"word\" character.
-In the ASCII charset this is equivalent to a-z, A-Z, 0-9, or _,
-i.e. the same as Perl's [\\w]."
+  "Tests whether a character is a \"word\" character.  In the ASCII
+charset this is equivalent to a-z, A-Z, 0-9, or _, i.e. the same as
+Perl's [\\w]."
  (or (alphanumericp chr)
      (char= chr #\_)))

-  (unless (boundp '+whitespace-char-string+)
 (defconstant +whitespace-char-string+
-      (coerce
-       '(#\Space #\Tab #\Linefeed #\Return #\Page)
-       'string)
+  (coerce '(#\Space #\Tab #\Linefeed #\Return #\Page) 'string)
  "A string of all characters which are considered to be whitespace.
-Same as Perl's [\\s]."))
+Same as Perl's [\\s].")

 (defun whitespacep (chr)
  (declare #.*special-optimize-settings*)
-    "Tests whether a character is whitespace,
-i.e. whether it would match [\\s] in Perl."
-    (find chr +whitespace-char-string+ :test #'char=)))
-
-;; the following DEFCONSTANT statements are wrapped with
-;; (UNLESS (BOUNDP ...) ...) to make SBCL happy
-
-(unless (boundp '+digit-set+)
-  (defconstant +digit-set+
-    (make-char-set (lambda (chr) (char<= #\0 chr #\9)))
-    "Character set containing the digits from 0 to 9."))
-
-(unless (boundp '+word-char-set+)
-  (defconstant +word-char-set+
-    (make-char-set #'word-char-p)
-    "Character set containing all \"word\" characters."))
-
-(unless (boundp '+whitespace-char-set+)
-  (defconstant +whitespace-char-set+
-    (make-char-set #'whitespacep)
-    "Character set containing all whitespace characters."))
-
-(defun create-ranges-from-set (set &key downcasep)
-  (declare #.*standard-optimize-settings*)
-  "Tries to identify up to three intervals \(with respect to CHAR<)
-which together comprise the charset SET.  Returns NIL if this is not
-possible.  If DOWNCASEP is true it will treat the charset as if it
-represents both the lower-case and the upper-case variants of its
-members and will only return the respective lower-case intervals."
-  ;; discard empty charsets
-  (unless (and set (plusp (charset-count set)))
-    (return-from create-ranges-from-set nil))
-  (loop with min1 and min2 and min3
-        and max1 and max2 and max3
-        ;; loop through all characters in SET, sorted by CHAR<
-        ;; (actually by < on their character codes, see 13.1.6 in the
-        ;; ANSI standard)
-        for code of-type fixnum below *regex-char-code-limit*
-        for char = (code-char code)
-        when (and char (in-charset-p (if downcasep (char-downcase char) char) set))
-        ;; MIN1, MAX1, etc. are _exclusive_
-        ;; bounds of the intervals identified so far
-        do (cond
-            ((not min1)
-             ;; this will only happen once, for the first character
-             (setq min1 (1- code)
-                   max1 (1+ code)))
-            ((<= (the fixnum min1) code (the fixnum max1))
-             ;; we're here as long as CHAR fits into the first interval
-             (setq min1 (min (the fixnum min1) (1- code))
-                   max1 (max (the fixnum max1) (1+ code))))
-            ((not min2)
-             ;; we need to open a second interval
-             ;; this'll also happen only once
-             (setq min2 (1- code)
-                   max2 (1+ code)))
-            ((<= (the fixnum min2) code (the fixnum max2))
-             ;; CHAR fits into the second interval
-             (setq min2 (min (the fixnum min2) (1- code))
-                   max2 (max (the fixnum max2) (1+ code))))
-            ((not min3)
-             ;; we need to open the third interval
-             ;; happens only once
-             (setq min3 (1- code)
-                   max3 (1+ code)))
-            ((<= (the fixnum min3) code (the fixnum max3))
-             ;; CHAR fits into the third interval
-             (setq min3 (min (the fixnum min3) (1- code))
-                   max3 (max (the fixnum max3) (1+ code))))
-            (t
-             ;; we're out of luck, CHAR doesn't fit
-             ;; into one of the three intervals
-             (return nil)))
-        ;; on success return all bounds
-        ;; make them inclusive bounds before returning
-        finally (return (values (code-char (1+ min1))
-                                (code-char (1- max1))
-                                (and min2 (code-char (1+ min2)))
-                                (and max2 (code-char (1- max2)))
-                                (and min3 (code-char (1+ min3)))
-                                (and max3 (code-char (1- max3)))))))
+  "Tests whether a character is whitespace, i.e. whether it would
+match [\\s] in Perl."
+  (find chr +whitespace-char-string+ :test #'char=))

 (defmacro maybe-coerce-to-simple-string (string)
+  "Coerces STRING to a simple STRING unless it already is one."
  (with-unique-names (=string=)
    `(let ((,=string= ,string))
      (cond ((simple-string-p ,=string=)
@ -223,16 +144,16 @@ members and will only return the respective lower-case intervals."

 (declaim (inline nsubseq))
 (defun nsubseq (sequence start &optional (end (length sequence)))
-  "Return a subsequence by pointing to location in original sequence."
+  "Returns a subsequence by pointing to location in original sequence."
  (make-array (- end start)
              :element-type (array-element-type sequence)
              :displaced-to sequence
              :displaced-index-offset start))

 (defun normalize-var-list (var-list)
-  "Utility function for REGISTER-GROUPS-BIND and
-DO-REGISTER-GROUPS. Creates the long form \(a list of \(FUNCTION VAR)
-entries) out of the short form of VAR-LIST."
+  "Utility function for REGISTER-GROUPS-BIND and DO-REGISTER-GROUPS.
+Creates the long form \(a list of \(FUNCTION VAR) entries) out of the
+short form of VAR-LIST."
  (loop for element in var-list
        if (consp element)
          nconc (loop for var in (rest element)
@ -241,20 +162,33 @@ entries) out of the short form of VAR-LIST."
          collect (list '(function identity) element)))

 (defun string-list-to-simple-string (string-list)
-  (declare #.*standard-optimize-settings*)
  "Concatenates a list of strings to one simple-string."
+  (declare #.*standard-optimize-settings*)
  ;; this function provided by JP Massar; note that we can't use APPLY
  ;; with CONCATENATE here because of CALL-ARGUMENTS-LIMIT
  (let ((total-size 0))
-    (declare (type fixnum total-size))
+    (declare (fixnum total-size))
    (dolist (string string-list)
-      #-genera (declare (type string string))
+      #-:genera (declare (string string))
      (incf total-size (length string)))
    (let ((result-string (make-sequence 'simple-string total-size))
          (curr-pos 0))
-      (declare (type fixnum curr-pos))
+      (declare (fixnum curr-pos))
      (dolist (string string-list)
-        #-genera (declare (type string string))
+        #-:genera (declare (string string))
        (replace result-string string :start1 curr-pos)
        (incf curr-pos (length string)))
      result-string)))
+
+(defun complement* (test-function)
+  "Like COMPLEMENT but optimized for unary functions."
+  (declare #.*standard-optimize-settings*)
+  (typecase test-function
+    (function
+     (lambda (char)
+       (declare (character char))
+       (not (funcall (the function test-function) char))))
+    (otherwise
+     (lambda (char)
+       (declare (character char))
+       (not (funcall test-function char))))))