diff --git a/lexer.lisp b/lexer.lisp index 71c2642..032b5ca 100644 --- a/lexer.lisp +++ b/lexer.lisp @@ -290,8 +290,7 @@ handled elsewhere." (return)) (write-char char out)))))) (list (if (char= first-char #\p) :property :inverted-property) - ;; we must reverse here because of what PARSE-STRING does - (nreverse name)))) + name))) (defun collect-char-class (lexer) "Reads and consumes characters from regex string until a right @@ -571,7 +570,7 @@ closing #\> will also be consumed." ;; back-referencing a named register (incf (lexer-pos lexer)) (list :back-reference - (nreverse (parse-register-name-aux lexer)))) + (parse-register-name-aux lexer))) (t ;; false alarm, just unescape \k #\k))) diff --git a/parser.lisp b/parser.lisp index f6d9a3d..72e39e9 100644 --- a/parser.lisp +++ b/parser.lisp @@ -128,10 +128,7 @@ Will return or \( ) where ;; have been the "(?:"")" production (cons :group (nconc flags (list reg-expr))) (if (eq open-token :open-paren-less-letter) - (list :named-register - ;; every string was reversed, so we have to - ;; reverse it back to get the name - (nreverse register-name) + (list :named-register register-name reg-expr) (list (case open-token ((:open-paren) @@ -201,54 +198,42 @@ Will return or (:SEQUENCE )." ;; to parse a or in order to catch empty regular ;; expressions (if (start-of-subexpr-p lexer) - (let ((quant (quant lexer))) - (if (start-of-subexpr-p lexer) - (let* ((seq (seq lexer)) - (quant-is-char-p (characterp quant)) - (seq-is-sequence-p (and (consp seq) - (eq (first seq) :sequence)))) - (cond ((and quant-is-char-p - (characterp seq)) - (make-array-from-two-chars seq quant)) - ((and quant-is-char-p - (stringp seq)) - (vector-push-extend quant seq) - seq) - ((and quant-is-char-p - seq-is-sequence-p - (characterp (second seq))) - (cond ((cddr seq) - (setf (cdr seq) - (cons - (make-array-from-two-chars (second seq) - quant) - (cddr seq))) - seq) - (t (make-array-from-two-chars (second seq) quant)))) - ((and quant-is-char-p - seq-is-sequence-p - (stringp (second seq))) - (cond ((cddr seq) - (setf (cdr seq) - (cons - (progn - (vector-push-extend quant (second seq)) - (second seq)) - (cddr seq))) - seq) - (t - (vector-push-extend quant (second seq)) - (second seq)))) - (seq-is-sequence-p - ;; if is also a :SEQUENCE parse tree we merge - ;; both lists into one to avoid unnecessary consing - (setf (cdr seq) - (cons quant (cdr seq))) - seq) - (t (list :sequence quant seq)))) - quant)) - :void))) - + (loop with seq-is-sequence-p = nil + with last-cdr + for quant = (quant lexer) + for quant-is-char-p = (characterp quant) + for seq = quant + then + (cond ((and quant-is-char-p (characterp seq)) + (make-array-from-two-chars seq quant)) + ((and quant-is-char-p (stringp seq)) + (vector-push-extend quant seq) + seq) + ((not seq-is-sequence-p) + (setf last-cdr (list quant) + seq-is-sequence-p t) + (list* :sequence seq last-cdr)) + ((and quant-is-char-p + (characterp (car last-cdr))) + (setf (car last-cdr) + (make-array-from-two-chars (car last-cdr) + quant)) + seq) + ((and quant-is-char-p + (stringp (car last-cdr))) + (vector-push-extend quant (car last-cdr)) + seq) + (t + ;; if is also a :SEQUENCE parse tree we merge + ;; both lists into one + (let ((cons (list quant))) + (psetf last-cdr cons + (cdr last-cdr) cons)) + seq)) + while (start-of-subexpr-p lexer) + finally (return seq)) + :void))) + (defun reg-expr (lexer) "Parses and consumes a , a complete regular expression. The productions are: -> | \"|\". @@ -294,25 +279,11 @@ Will return or (:ALTERNATION )." (setf (lexer-pos lexer) pos) seq))))))) -(defun reverse-strings (parse-tree) - "Recursively walks through PARSE-TREE and destructively reverses all -strings in it." - (declare #.*standard-optimize-settings*) - (cond ((stringp parse-tree) - (nreverse parse-tree)) - ((consp parse-tree) - (loop for parse-tree-rest on parse-tree - while parse-tree-rest - do (setf (car parse-tree-rest) - (reverse-strings (car parse-tree-rest)))) - parse-tree) - (t parse-tree))) - (defun parse-string (string) "Translate the regex string STRING into a parse tree." (declare #.*standard-optimize-settings*) (let* ((lexer (make-lexer string)) - (parse-tree (reverse-strings (reg-expr lexer)))) + (parse-tree (reg-expr lexer))) ;; check whether we've consumed the whole regex string (if (end-of-string-p lexer) parse-tree