Merge pull request #5 from stassats/master
Rewrite SEQ without using recursion.
This commit is contained in:
@ -290,8 +290,7 @@ handled elsewhere."
|
|||||||
(return))
|
(return))
|
||||||
(write-char char out))))))
|
(write-char char out))))))
|
||||||
(list (if (char= first-char #\p) :property :inverted-property)
|
(list (if (char= first-char #\p) :property :inverted-property)
|
||||||
;; we must reverse here because of what PARSE-STRING does
|
name)))
|
||||||
(nreverse name))))
|
|
||||||
|
|
||||||
(defun collect-char-class (lexer)
|
(defun collect-char-class (lexer)
|
||||||
"Reads and consumes characters from regex string until a right
|
"Reads and consumes characters from regex string until a right
|
||||||
@ -571,7 +570,7 @@ closing #\> will also be consumed."
|
|||||||
;; back-referencing a named register
|
;; back-referencing a named register
|
||||||
(incf (lexer-pos lexer))
|
(incf (lexer-pos lexer))
|
||||||
(list :back-reference
|
(list :back-reference
|
||||||
(nreverse (parse-register-name-aux lexer))))
|
(parse-register-name-aux lexer)))
|
||||||
(t
|
(t
|
||||||
;; false alarm, just unescape \k
|
;; false alarm, just unescape \k
|
||||||
#\k)))
|
#\k)))
|
||||||
|
|||||||
@ -367,7 +367,7 @@ zero-length assertion."))
|
|||||||
ends with wrapped into a STR object, otherwise NIL.
|
ends with wrapped into a STR object, otherwise NIL.
|
||||||
OLD-CASE-INSENSITIVE-P is the CASE-INSENSITIVE-P slot of the last STR
|
OLD-CASE-INSENSITIVE-P is the CASE-INSENSITIVE-P slot of the last STR
|
||||||
collected or :VOID if no STR has been collected yet. (This is a helper
|
collected or :VOID if no STR has been collected yet. (This is a helper
|
||||||
function called by END-STRIN.)"))
|
function called by END-STRING.)"))
|
||||||
|
|
||||||
(defmethod end-string-aux ((str str)
|
(defmethod end-string-aux ((str str)
|
||||||
&optional (old-case-insensitive-p :void))
|
&optional (old-case-insensitive-p :void))
|
||||||
|
|||||||
105
parser.lisp
105
parser.lisp
@ -128,10 +128,7 @@ Will return <parse-tree> or \(<grouping-type> <parse-tree>) where
|
|||||||
;; have been the "(?:"<regex>")" production
|
;; have been the "(?:"<regex>")" production
|
||||||
(cons :group (nconc flags (list reg-expr)))
|
(cons :group (nconc flags (list reg-expr)))
|
||||||
(if (eq open-token :open-paren-less-letter)
|
(if (eq open-token :open-paren-less-letter)
|
||||||
(list :named-register
|
(list :named-register register-name
|
||||||
;; every string was reversed, so we have to
|
|
||||||
;; reverse it back to get the name
|
|
||||||
(nreverse register-name)
|
|
||||||
reg-expr)
|
reg-expr)
|
||||||
(list (case open-token
|
(list (case open-token
|
||||||
((:open-paren)
|
((:open-paren)
|
||||||
@ -201,54 +198,42 @@ Will return <parse-tree> or (:SEQUENCE <parse-tree> <parse-tree>)."
|
|||||||
;; to parse a <seq> or <quant> in order to catch empty regular
|
;; to parse a <seq> or <quant> in order to catch empty regular
|
||||||
;; expressions
|
;; expressions
|
||||||
(if (start-of-subexpr-p lexer)
|
(if (start-of-subexpr-p lexer)
|
||||||
(let ((quant (quant lexer)))
|
(loop with seq-is-sequence-p = nil
|
||||||
(if (start-of-subexpr-p lexer)
|
with last-cdr
|
||||||
(let* ((seq (seq lexer))
|
for quant = (quant lexer)
|
||||||
(quant-is-char-p (characterp quant))
|
for quant-is-char-p = (characterp quant)
|
||||||
(seq-is-sequence-p (and (consp seq)
|
for seq = quant
|
||||||
(eq (first seq) :sequence))))
|
then
|
||||||
(cond ((and quant-is-char-p
|
(cond ((and quant-is-char-p (characterp seq))
|
||||||
(characterp seq))
|
(make-array-from-two-chars seq quant))
|
||||||
(make-array-from-two-chars seq quant))
|
((and quant-is-char-p (stringp seq))
|
||||||
((and quant-is-char-p
|
(vector-push-extend quant seq)
|
||||||
(stringp seq))
|
seq)
|
||||||
(vector-push-extend quant seq)
|
((not seq-is-sequence-p)
|
||||||
seq)
|
(setf last-cdr (list quant)
|
||||||
((and quant-is-char-p
|
seq-is-sequence-p t)
|
||||||
seq-is-sequence-p
|
(list* :sequence seq last-cdr))
|
||||||
(characterp (second seq)))
|
((and quant-is-char-p
|
||||||
(cond ((cddr seq)
|
(characterp (car last-cdr)))
|
||||||
(setf (cdr seq)
|
(setf (car last-cdr)
|
||||||
(cons
|
(make-array-from-two-chars (car last-cdr)
|
||||||
(make-array-from-two-chars (second seq)
|
quant))
|
||||||
quant)
|
seq)
|
||||||
(cddr seq)))
|
((and quant-is-char-p
|
||||||
seq)
|
(stringp (car last-cdr)))
|
||||||
(t (make-array-from-two-chars (second seq) quant))))
|
(vector-push-extend quant (car last-cdr))
|
||||||
((and quant-is-char-p
|
seq)
|
||||||
seq-is-sequence-p
|
(t
|
||||||
(stringp (second seq)))
|
;; if <seq> is also a :SEQUENCE parse tree we merge
|
||||||
(cond ((cddr seq)
|
;; both lists into one
|
||||||
(setf (cdr seq)
|
(let ((cons (list quant)))
|
||||||
(cons
|
(psetf last-cdr cons
|
||||||
(progn
|
(cdr last-cdr) cons))
|
||||||
(vector-push-extend quant (second seq))
|
seq))
|
||||||
(second seq))
|
while (start-of-subexpr-p lexer)
|
||||||
(cddr seq)))
|
finally (return seq))
|
||||||
seq)
|
:void)))
|
||||||
(t
|
|
||||||
(vector-push-extend quant (second seq))
|
|
||||||
(second seq))))
|
|
||||||
(seq-is-sequence-p
|
|
||||||
;; if <seq> is also a :SEQUENCE parse tree we merge
|
|
||||||
;; both lists into one to avoid unnecessary consing
|
|
||||||
(setf (cdr seq)
|
|
||||||
(cons quant (cdr seq)))
|
|
||||||
seq)
|
|
||||||
(t (list :sequence quant seq))))
|
|
||||||
quant))
|
|
||||||
:void)))
|
|
||||||
|
|
||||||
(defun reg-expr (lexer)
|
(defun reg-expr (lexer)
|
||||||
"Parses and consumes a <regex>, a complete regular expression.
|
"Parses and consumes a <regex>, a complete regular expression.
|
||||||
The productions are: <regex> -> <seq> | <seq>\"|\"<regex>.
|
The productions are: <regex> -> <seq> | <seq>\"|\"<regex>.
|
||||||
@ -294,25 +279,11 @@ Will return <parse-tree> or (:ALTERNATION <parse-tree> <parse-tree>)."
|
|||||||
(setf (lexer-pos lexer) pos)
|
(setf (lexer-pos lexer) pos)
|
||||||
seq)))))))
|
seq)))))))
|
||||||
|
|
||||||
(defun reverse-strings (parse-tree)
|
|
||||||
"Recursively walks through PARSE-TREE and destructively reverses all
|
|
||||||
strings in it."
|
|
||||||
(declare #.*standard-optimize-settings*)
|
|
||||||
(cond ((stringp parse-tree)
|
|
||||||
(nreverse parse-tree))
|
|
||||||
((consp parse-tree)
|
|
||||||
(loop for parse-tree-rest on parse-tree
|
|
||||||
while parse-tree-rest
|
|
||||||
do (setf (car parse-tree-rest)
|
|
||||||
(reverse-strings (car parse-tree-rest))))
|
|
||||||
parse-tree)
|
|
||||||
(t parse-tree)))
|
|
||||||
|
|
||||||
(defun parse-string (string)
|
(defun parse-string (string)
|
||||||
"Translate the regex string STRING into a parse tree."
|
"Translate the regex string STRING into a parse tree."
|
||||||
(declare #.*standard-optimize-settings*)
|
(declare #.*standard-optimize-settings*)
|
||||||
(let* ((lexer (make-lexer string))
|
(let* ((lexer (make-lexer string))
|
||||||
(parse-tree (reverse-strings (reg-expr lexer))))
|
(parse-tree (reg-expr lexer)))
|
||||||
;; check whether we've consumed the whole regex string
|
;; check whether we've consumed the whole regex string
|
||||||
(if (end-of-string-p lexer)
|
(if (end-of-string-p lexer)
|
||||||
parse-tree
|
parse-tree
|
||||||
|
|||||||
@ -108,6 +108,10 @@ must appear after this repetition.")
|
|||||||
register."))
|
register."))
|
||||||
(:documentation "REPETITION objects represent repetitions of regexes."))
|
(:documentation "REPETITION objects represent repetitions of regexes."))
|
||||||
|
|
||||||
|
(defmethod print-object ((repetition repetition) stream)
|
||||||
|
(print-unreadable-object (repetition stream :type t :identity t)
|
||||||
|
(princ (regex repetition) stream)))
|
||||||
|
|
||||||
(defclass register (regex)
|
(defclass register (regex)
|
||||||
((regex :initarg :regex
|
((regex :initarg :regex
|
||||||
:accessor regex
|
:accessor regex
|
||||||
@ -122,6 +126,10 @@ This is the index into *REGS-START* and *REGS-END*.")
|
|||||||
:documentation "Name of this register or NIL."))
|
:documentation "Name of this register or NIL."))
|
||||||
(:documentation "REGISTER objects represent register groups."))
|
(:documentation "REGISTER objects represent register groups."))
|
||||||
|
|
||||||
|
(defmethod print-object ((register register) stream)
|
||||||
|
(print-unreadable-object (register stream :type t :identity t)
|
||||||
|
(princ (regex register) stream)))
|
||||||
|
|
||||||
(defclass standalone (regex)
|
(defclass standalone (regex)
|
||||||
((regex :initarg :regex
|
((regex :initarg :regex
|
||||||
:accessor regex
|
:accessor regex
|
||||||
@ -181,6 +189,10 @@ string because the SCAN function has done this already.")
|
|||||||
STR which starts END-STRING (a slot of MATCHER)."))
|
STR which starts END-STRING (a slot of MATCHER)."))
|
||||||
(:documentation "STR objects represent string."))
|
(:documentation "STR objects represent string."))
|
||||||
|
|
||||||
|
(defmethod print-object ((str str) stream)
|
||||||
|
(print-unreadable-object (str stream :type t :identity t)
|
||||||
|
(princ (str str) stream)))
|
||||||
|
|
||||||
(defclass anchor (regex)
|
(defclass anchor (regex)
|
||||||
((startp :initarg :startp
|
((startp :initarg :startp
|
||||||
:reader startp
|
:reader startp
|
||||||
|
|||||||
16
test/simple
16
test/simple
@ -347,3 +347,19 @@ characters if there's a match."
|
|||||||
for s = (create-scanner "(([a-c])+)x")
|
for s = (create-scanner "(([a-c])+)x")
|
||||||
always (equalp (multiple-value-list (scan s "abcxy"))
|
always (equalp (multiple-value-list (scan s "abcxy"))
|
||||||
(list 0 4 #(0 2) #(3 3))))
|
(list 0 4 #(0 2) #(3 3))))
|
||||||
|
|
||||||
|
(labels ((char-code-odd-p (char)
|
||||||
|
(oddp (char-code char)))
|
||||||
|
(char-code-even-p (char)
|
||||||
|
(evenp (char-code char)))
|
||||||
|
(resolver (name)
|
||||||
|
(cond ((string= name "odd") #'char-code-odd-p)
|
||||||
|
((string= name "even") #'char-code-even-p)
|
||||||
|
((string= name "true") (constantly t))
|
||||||
|
(t (error "Can't resolve ~S." name)))))
|
||||||
|
(equalp
|
||||||
|
(let ((*property-resolver* #'resolver))
|
||||||
|
(list (regex-replace-all (create-scanner "\\p{odd}") "abcd" "+")
|
||||||
|
(regex-replace-all (create-scanner "\\p{even}") "abcd" "+")
|
||||||
|
(regex-replace-all (create-scanner "\\p{true}") "abcd" "+")))
|
||||||
|
'("+b+d" "a+c+" "++++")))
|
||||||
|
|||||||
Reference in New Issue
Block a user