Merge pull request #5 from stassats/master

Rewrite SEQ without using recursion.
This commit is contained in:
Hans Hübner
2013-04-13 05:49:58 -07:00
5 changed files with 69 additions and 71 deletions

View File

@ -290,8 +290,7 @@ handled elsewhere."
(return)) (return))
(write-char char out)))))) (write-char char out))))))
(list (if (char= first-char #\p) :property :inverted-property) (list (if (char= first-char #\p) :property :inverted-property)
;; we must reverse here because of what PARSE-STRING does name)))
(nreverse name))))
(defun collect-char-class (lexer) (defun collect-char-class (lexer)
"Reads and consumes characters from regex string until a right "Reads and consumes characters from regex string until a right
@ -571,7 +570,7 @@ closing #\> will also be consumed."
;; back-referencing a named register ;; back-referencing a named register
(incf (lexer-pos lexer)) (incf (lexer-pos lexer))
(list :back-reference (list :back-reference
(nreverse (parse-register-name-aux lexer)))) (parse-register-name-aux lexer)))
(t (t
;; false alarm, just unescape \k ;; false alarm, just unescape \k
#\k))) #\k)))

View File

@ -367,7 +367,7 @@ zero-length assertion."))
ends with wrapped into a STR object, otherwise NIL. ends with wrapped into a STR object, otherwise NIL.
OLD-CASE-INSENSITIVE-P is the CASE-INSENSITIVE-P slot of the last STR OLD-CASE-INSENSITIVE-P is the CASE-INSENSITIVE-P slot of the last STR
collected or :VOID if no STR has been collected yet. (This is a helper collected or :VOID if no STR has been collected yet. (This is a helper
function called by END-STRIN.)")) function called by END-STRING.)"))
(defmethod end-string-aux ((str str) (defmethod end-string-aux ((str str)
&optional (old-case-insensitive-p :void)) &optional (old-case-insensitive-p :void))

View File

@ -128,10 +128,7 @@ Will return <parse-tree> or \(<grouping-type> <parse-tree>) where
;; have been the "(?:"<regex>")" production ;; have been the "(?:"<regex>")" production
(cons :group (nconc flags (list reg-expr))) (cons :group (nconc flags (list reg-expr)))
(if (eq open-token :open-paren-less-letter) (if (eq open-token :open-paren-less-letter)
(list :named-register (list :named-register register-name
;; every string was reversed, so we have to
;; reverse it back to get the name
(nreverse register-name)
reg-expr) reg-expr)
(list (case open-token (list (case open-token
((:open-paren) ((:open-paren)
@ -201,54 +198,42 @@ Will return <parse-tree> or (:SEQUENCE <parse-tree> <parse-tree>)."
;; to parse a <seq> or <quant> in order to catch empty regular ;; to parse a <seq> or <quant> in order to catch empty regular
;; expressions ;; expressions
(if (start-of-subexpr-p lexer) (if (start-of-subexpr-p lexer)
(let ((quant (quant lexer))) (loop with seq-is-sequence-p = nil
(if (start-of-subexpr-p lexer) with last-cdr
(let* ((seq (seq lexer)) for quant = (quant lexer)
(quant-is-char-p (characterp quant)) for quant-is-char-p = (characterp quant)
(seq-is-sequence-p (and (consp seq) for seq = quant
(eq (first seq) :sequence)))) then
(cond ((and quant-is-char-p (cond ((and quant-is-char-p (characterp seq))
(characterp seq)) (make-array-from-two-chars seq quant))
(make-array-from-two-chars seq quant)) ((and quant-is-char-p (stringp seq))
((and quant-is-char-p (vector-push-extend quant seq)
(stringp seq)) seq)
(vector-push-extend quant seq) ((not seq-is-sequence-p)
seq) (setf last-cdr (list quant)
((and quant-is-char-p seq-is-sequence-p t)
seq-is-sequence-p (list* :sequence seq last-cdr))
(characterp (second seq))) ((and quant-is-char-p
(cond ((cddr seq) (characterp (car last-cdr)))
(setf (cdr seq) (setf (car last-cdr)
(cons (make-array-from-two-chars (car last-cdr)
(make-array-from-two-chars (second seq) quant))
quant) seq)
(cddr seq))) ((and quant-is-char-p
seq) (stringp (car last-cdr)))
(t (make-array-from-two-chars (second seq) quant)))) (vector-push-extend quant (car last-cdr))
((and quant-is-char-p seq)
seq-is-sequence-p (t
(stringp (second seq))) ;; if <seq> is also a :SEQUENCE parse tree we merge
(cond ((cddr seq) ;; both lists into one
(setf (cdr seq) (let ((cons (list quant)))
(cons (psetf last-cdr cons
(progn (cdr last-cdr) cons))
(vector-push-extend quant (second seq)) seq))
(second seq)) while (start-of-subexpr-p lexer)
(cddr seq))) finally (return seq))
seq) :void)))
(t
(vector-push-extend quant (second seq))
(second seq))))
(seq-is-sequence-p
;; if <seq> is also a :SEQUENCE parse tree we merge
;; both lists into one to avoid unnecessary consing
(setf (cdr seq)
(cons quant (cdr seq)))
seq)
(t (list :sequence quant seq))))
quant))
:void)))
(defun reg-expr (lexer) (defun reg-expr (lexer)
"Parses and consumes a <regex>, a complete regular expression. "Parses and consumes a <regex>, a complete regular expression.
The productions are: <regex> -> <seq> | <seq>\"|\"<regex>. The productions are: <regex> -> <seq> | <seq>\"|\"<regex>.
@ -294,25 +279,11 @@ Will return <parse-tree> or (:ALTERNATION <parse-tree> <parse-tree>)."
(setf (lexer-pos lexer) pos) (setf (lexer-pos lexer) pos)
seq))))))) seq)))))))
(defun reverse-strings (parse-tree)
"Recursively walks through PARSE-TREE and destructively reverses all
strings in it."
(declare #.*standard-optimize-settings*)
(cond ((stringp parse-tree)
(nreverse parse-tree))
((consp parse-tree)
(loop for parse-tree-rest on parse-tree
while parse-tree-rest
do (setf (car parse-tree-rest)
(reverse-strings (car parse-tree-rest))))
parse-tree)
(t parse-tree)))
(defun parse-string (string) (defun parse-string (string)
"Translate the regex string STRING into a parse tree." "Translate the regex string STRING into a parse tree."
(declare #.*standard-optimize-settings*) (declare #.*standard-optimize-settings*)
(let* ((lexer (make-lexer string)) (let* ((lexer (make-lexer string))
(parse-tree (reverse-strings (reg-expr lexer)))) (parse-tree (reg-expr lexer)))
;; check whether we've consumed the whole regex string ;; check whether we've consumed the whole regex string
(if (end-of-string-p lexer) (if (end-of-string-p lexer)
parse-tree parse-tree

View File

@ -108,6 +108,10 @@ must appear after this repetition.")
register.")) register."))
(:documentation "REPETITION objects represent repetitions of regexes.")) (:documentation "REPETITION objects represent repetitions of regexes."))
(defmethod print-object ((repetition repetition) stream)
(print-unreadable-object (repetition stream :type t :identity t)
(princ (regex repetition) stream)))
(defclass register (regex) (defclass register (regex)
((regex :initarg :regex ((regex :initarg :regex
:accessor regex :accessor regex
@ -122,6 +126,10 @@ This is the index into *REGS-START* and *REGS-END*.")
:documentation "Name of this register or NIL.")) :documentation "Name of this register or NIL."))
(:documentation "REGISTER objects represent register groups.")) (:documentation "REGISTER objects represent register groups."))
(defmethod print-object ((register register) stream)
(print-unreadable-object (register stream :type t :identity t)
(princ (regex register) stream)))
(defclass standalone (regex) (defclass standalone (regex)
((regex :initarg :regex ((regex :initarg :regex
:accessor regex :accessor regex
@ -181,6 +189,10 @@ string because the SCAN function has done this already.")
STR which starts END-STRING (a slot of MATCHER).")) STR which starts END-STRING (a slot of MATCHER)."))
(:documentation "STR objects represent string.")) (:documentation "STR objects represent string."))
(defmethod print-object ((str str) stream)
(print-unreadable-object (str stream :type t :identity t)
(princ (str str) stream)))
(defclass anchor (regex) (defclass anchor (regex)
((startp :initarg :startp ((startp :initarg :startp
:reader startp :reader startp

View File

@ -347,3 +347,19 @@ characters if there's a match."
for s = (create-scanner "(([a-c])+)x") for s = (create-scanner "(([a-c])+)x")
always (equalp (multiple-value-list (scan s "abcxy")) always (equalp (multiple-value-list (scan s "abcxy"))
(list 0 4 #(0 2) #(3 3)))) (list 0 4 #(0 2) #(3 3))))
(labels ((char-code-odd-p (char)
(oddp (char-code char)))
(char-code-even-p (char)
(evenp (char-code char)))
(resolver (name)
(cond ((string= name "odd") #'char-code-odd-p)
((string= name "even") #'char-code-even-p)
((string= name "true") (constantly t))
(t (error "Can't resolve ~S." name)))))
(equalp
(let ((*property-resolver* #'resolver))
(list (regex-replace-all (create-scanner "\\p{odd}") "abcd" "+")
(regex-replace-all (create-scanner "\\p{even}") "abcd" "+")
(regex-replace-all (create-scanner "\\p{true}") "abcd" "+")))
'("+b+d" "a+c+" "++++")))