diff --git a/CHANGELOG b/CHANGELOG index 4e12322..fe88252 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,3 +1,9 @@ +Version 2.0.4 +2013-04-13 +Rewrite SEQ without using recursion (Stas Boukarev) +:property and :invert-property scanning bug fix (Cyrus Harmon) +Improve documentation (David Lindes) + Version 2.0.3 2009-10-28 Use LW:SIMPLE-TEXT-STRING throughout for LispWorks diff --git a/doc/index.html b/doc/index.html index 3277cb6..acc9396 100644 --- a/doc/index.html +++ b/doc/index.html @@ -151,7 +151,7 @@ href="http://weitz.de/regex-coach/">The Regex Coach. CL-PPCRE together with this documentation can be downloaded from http://weitz.de/files/cl-ppcre.tar.gz. The -current version is 2.0.3. +current version is 2.0.4.

CL-PPCRE comes with a system definition for ASDF and you compile and @@ -159,13 +159,10 @@ load it in the usual way. There are no dependencies (except that the test suite which is not needed for normal operation depends on FLEXI-STREAMS).

-CL-PPCRE is integrated into the package/port systems -of Debian, Gentoo, -and FreeBSD, but before you -install it from there, you should check if they actually offer the -latest release. Installation -via ASDF-Install -should as well be possible. + The preferred way to install CL-PPCRE is +through Quicklisp: +

(ql:quickload :cl-ppcre)
+

You can run a test suite which tests most aspects of the library with

diff --git a/lexer.lisp b/lexer.lisp
index 71c2642..032b5ca 100644
--- a/lexer.lisp
+++ b/lexer.lisp
@@ -290,8 +290,7 @@ handled elsewhere."
                        (return))
                      (write-char char out))))))
     (list (if (char= first-char #\p) :property :inverted-property)
-          ;; we must reverse here because of what PARSE-STRING does
-          (nreverse name))))
+          name)))
 
 (defun collect-char-class (lexer)
   "Reads and consumes characters from regex string until a right
@@ -571,7 +570,7 @@ closing #\> will also be consumed."
                           ;; back-referencing a named register
                           (incf (lexer-pos lexer))
                           (list :back-reference
-                                (nreverse (parse-register-name-aux lexer))))
+                                (parse-register-name-aux lexer)))
                          (t
                           ;; false alarm, just unescape \k
                           #\k)))
diff --git a/optimize.lisp b/optimize.lisp
index 16b4b36..d538733 100644
--- a/optimize.lisp
+++ b/optimize.lisp
@@ -367,7 +367,7 @@ zero-length assertion."))
 ends with wrapped into a STR object, otherwise NIL.
 OLD-CASE-INSENSITIVE-P is the CASE-INSENSITIVE-P slot of the last STR
 collected or :VOID if no STR has been collected yet. (This is a helper
-function called by END-STRIN.)"))
+function called by END-STRING.)"))
 
 (defmethod end-string-aux ((str str)
                            &optional (old-case-insensitive-p :void))
diff --git a/parser.lisp b/parser.lisp
index f6d9a3d..72e39e9 100644
--- a/parser.lisp
+++ b/parser.lisp
@@ -128,10 +128,7 @@ Will return  or \( ) where
                   ;; have been the "(?:"")" production
                   (cons :group (nconc flags (list reg-expr)))
                   (if (eq open-token :open-paren-less-letter)
-                      (list :named-register
-                            ;; every string was reversed, so we have to
-                            ;; reverse it back to get the name
-                            (nreverse register-name)
+                      (list :named-register register-name
                             reg-expr)
                       (list (case open-token
                               ((:open-paren)
@@ -201,54 +198,42 @@ Will return  or (:SEQUENCE  )."
     ;; to parse a  or  in order to catch empty regular
     ;; expressions
     (if (start-of-subexpr-p lexer)
-      (let ((quant (quant lexer)))
-        (if (start-of-subexpr-p lexer)
-          (let* ((seq (seq lexer))
-                 (quant-is-char-p (characterp quant))
-                 (seq-is-sequence-p (and (consp seq)
-                                         (eq (first seq) :sequence))))
-            (cond ((and quant-is-char-p
-                        (characterp seq))
-                    (make-array-from-two-chars seq quant))
-                  ((and quant-is-char-p
-                        (stringp seq))
-                    (vector-push-extend quant seq)
-                    seq)
-                  ((and quant-is-char-p
-                        seq-is-sequence-p
-                        (characterp (second seq)))
-                    (cond ((cddr seq)
-                            (setf (cdr seq)
-                                    (cons
-                                     (make-array-from-two-chars (second seq)
-                                                                quant)
-                                     (cddr seq)))
-                            seq)
-                          (t (make-array-from-two-chars (second seq) quant))))
-                  ((and quant-is-char-p
-                        seq-is-sequence-p
-                        (stringp (second seq)))
-                    (cond ((cddr seq)
-                            (setf (cdr seq)
-                                    (cons
-                                     (progn
-                                       (vector-push-extend quant (second seq))
-                                       (second seq))
-                                     (cddr seq)))
-                            seq)
-                          (t 
-                            (vector-push-extend quant (second seq))
-                            (second seq))))
-                  (seq-is-sequence-p
-                    ;; if  is also a :SEQUENCE parse tree we merge
-                    ;; both lists into one to avoid unnecessary consing
-                    (setf (cdr seq)
-                            (cons quant (cdr seq)))
-                    seq)
-                  (t (list :sequence quant seq))))
-          quant))
-      :void)))
-  
+        (loop with seq-is-sequence-p = nil
+              with last-cdr
+              for quant = (quant lexer)
+              for quant-is-char-p = (characterp quant)
+              for seq = quant
+              then
+              (cond ((and quant-is-char-p (characterp seq))
+                     (make-array-from-two-chars seq quant))
+                    ((and quant-is-char-p (stringp seq))
+                     (vector-push-extend quant seq)
+                     seq)
+                    ((not seq-is-sequence-p)
+                     (setf last-cdr (list quant)
+                           seq-is-sequence-p t)
+                     (list* :sequence seq last-cdr))
+                    ((and quant-is-char-p
+                          (characterp (car last-cdr)))
+                     (setf (car last-cdr)
+                           (make-array-from-two-chars (car last-cdr)
+                                                      quant))
+                     seq)
+                    ((and quant-is-char-p
+                          (stringp (car last-cdr)))
+                     (vector-push-extend quant (car last-cdr))
+                     seq)
+                    (t
+                     ;; if  is also a :SEQUENCE parse tree we merge
+                     ;; both lists into one
+                     (let ((cons (list quant)))
+                       (psetf last-cdr cons
+                              (cdr last-cdr) cons))
+                     seq))
+              while (start-of-subexpr-p lexer)
+              finally (return seq))
+        :void)))
+
 (defun reg-expr (lexer)
   "Parses and consumes a , a complete regular expression.
 The productions are:  ->  | \"|\".
@@ -294,25 +279,11 @@ Will return  or (:ALTERNATION  )."
               (setf (lexer-pos lexer) pos)
               seq)))))))
 
-(defun reverse-strings (parse-tree)
-  "Recursively walks through PARSE-TREE and destructively reverses all
-strings in it."
-  (declare #.*standard-optimize-settings*)
-  (cond ((stringp parse-tree)
-          (nreverse parse-tree))
-        ((consp parse-tree)
-          (loop for parse-tree-rest on parse-tree
-                while parse-tree-rest
-                do (setf (car parse-tree-rest)
-                           (reverse-strings (car parse-tree-rest))))
-          parse-tree)
-        (t parse-tree)))
-
 (defun parse-string (string)
   "Translate the regex string STRING into a parse tree."
   (declare #.*standard-optimize-settings*)
   (let* ((lexer (make-lexer string))
-         (parse-tree (reverse-strings (reg-expr lexer))))
+         (parse-tree (reg-expr lexer)))
     ;; check whether we've consumed the whole regex string
     (if (end-of-string-p lexer)
       parse-tree
diff --git a/regex-class.lisp b/regex-class.lisp
index fb89c25..43c36e5 100644
--- a/regex-class.lisp
+++ b/regex-class.lisp
@@ -108,6 +108,10 @@ must appear after this repetition.")
 register."))
   (:documentation "REPETITION objects represent repetitions of regexes."))
 
+(defmethod print-object ((repetition repetition) stream)
+  (print-unreadable-object (repetition stream :type t :identity t)
+    (princ (regex repetition) stream)))
+
 (defclass register (regex)
   ((regex :initarg :regex
           :accessor regex
@@ -122,6 +126,10 @@ This is the index into *REGS-START* and *REGS-END*.")
          :documentation "Name of this register or NIL."))
   (:documentation "REGISTER objects represent register groups."))
 
+(defmethod print-object ((register register) stream)
+  (print-unreadable-object (register stream :type t :identity t)
+    (princ (regex register) stream)))
+
 (defclass standalone (regex)
   ((regex :initarg :regex
           :accessor regex
@@ -181,6 +189,10 @@ string because the SCAN function has done this already.")
 STR which starts END-STRING (a slot of MATCHER)."))
   (:documentation "STR objects represent string."))
 
+(defmethod print-object ((str str) stream)
+  (print-unreadable-object (str stream :type t :identity t)
+    (princ (str str) stream)))
+
 (defclass anchor (regex)
   ((startp :initarg :startp
            :reader startp
diff --git a/test/simple b/test/simple
index 69c3fe7..35f052f 100644
--- a/test/simple
+++ b/test/simple
@@ -347,3 +347,19 @@ characters if there's a match."
       for s = (create-scanner "(([a-c])+)x")
       always (equalp (multiple-value-list (scan s "abcxy"))
                      (list 0 4 #(0 2) #(3 3))))
+
+(labels ((char-code-odd-p (char)
+           (oddp (char-code char)))
+         (char-code-even-p (char)
+           (evenp (char-code char)))
+         (resolver (name)
+           (cond ((string= name "odd") #'char-code-odd-p)
+                 ((string= name "even") #'char-code-even-p)
+                 ((string= name "true") (constantly t))
+                 (t (error "Can't resolve ~S." name)))))
+  (equalp
+   (let ((*property-resolver* #'resolver))
+     (list (regex-replace-all (create-scanner "\\p{odd}") "abcd" "+")
+           (regex-replace-all (create-scanner "\\p{even}") "abcd" "+")
+           (regex-replace-all (create-scanner "\\p{true}") "abcd" "+")))
+   '("+b+d" "a+c+" "++++")))