Update to current dev version

git-svn-id: svn://bknr.net/svn/trunk/thirdparty/cl-ppcre@3581 4281704c-cde7-0310-8518-8e2dc76b1ff0
This commit is contained in:
Edi Weitz
2008-07-23 11:44:08 +00:00
parent 2974af4010
commit 25c3dedeeb
37 changed files with 5443 additions and 6794 deletions

View File

@ -1,11 +1,11 @@
;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: CL-PPCRE; Base: 10 -*-
;;; $Header: /usr/local/cvsrep/cl-ppcre/parser.lisp,v 1.25 2008/06/25 14:04:28 edi Exp $
;;; $Header: /usr/local/cvsrep/cl-ppcre/parser.lisp,v 1.30 2008/07/06 18:12:05 edi Exp $
;;; The parser will - with the help of the lexer - parse a regex
;;; string and convert it into a "parse tree" (see docs for details
;;; about the syntax of these trees). Note that the lexer might return
;;; illegal parse trees. It is assumed that the conversion process
;;; later on will track them down.
;;; about the syntax of these trees). Note that the lexer might
;;; return illegal parse trees. It is assumed that the conversion
;;; process later on will track them down.
;;; Copyright (c) 2002-2008, Dr. Edmund Weitz. All rights reserved.
@ -33,10 +33,9 @@
;;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
;;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
(in-package #:cl-ppcre)
(in-package :cl-ppcre)
(defun group (lexer)
(declare #.*standard-optimize-settings*)
"Parses and consumes a <group>.
The productions are: <group> -> \"\(\"<regex>\")\"
\"\(?:\"<regex>\")\"
@ -53,6 +52,7 @@ The productions are: <group> -> \"\(\"<regex>\")\"
where <flags> is parsed by the lexer function MAYBE-PARSE-FLAGS.
Will return <parse-tree> or \(<grouping-type> <parse-tree>) where
<grouping-type> is one of six keywords - see source for details."
(declare #.*standard-optimize-settings*)
(multiple-value-bind (open-token flags)
(get-token lexer)
(cond ((eq open-token :open-paren-paren)
@ -65,7 +65,7 @@ Will return <parse-tree> or \(<grouping-type> <parse-tree>) where
(number (try-number lexer :no-whitespace-p t))
;; make changes to extended-mode-p local
(*extended-mode-p* *extended-mode-p*))
(declare (type fixnum open-paren-pos))
(declare (fixnum open-paren-pos))
(cond (number
;; condition is a number (i.e. refers to a
;; back-reference)
@ -73,13 +73,11 @@ Will return <parse-tree> or \(<grouping-type> <parse-tree>) where
(reg-expr (reg-expr lexer))
(close-token (get-token lexer)))
(unless (eq inner-close-token :close-paren)
(signal-ppcre-syntax-error*
(+ open-paren-pos 2)
"Opening paren has no matching closing paren"))
(signal-syntax-error* (+ open-paren-pos 2)
"Opening paren has no matching closing paren."))
(unless (eq close-token :close-paren)
(signal-ppcre-syntax-error*
open-paren-pos
"Opening paren has no matching closing paren"))
(signal-syntax-error* open-paren-pos
"Opening paren has no matching closing paren."))
(list :branch number reg-expr)))
(t
;; condition must be a full regex (actually a
@ -94,9 +92,8 @@ Will return <parse-tree> or \(<grouping-type> <parse-tree>) where
(reg-expr (reg-expr lexer))
(close-token (get-token lexer)))
(unless (eq close-token :close-paren)
(signal-ppcre-syntax-error*
open-paren-pos
"Opening paren has no matching closing paren"))
(signal-syntax-error* open-paren-pos
"Opening paren has no matching closing paren."))
(list :branch inner-reg-expr reg-expr))))))
((member open-token '(:open-paren
:open-paren-colon
@ -124,9 +121,8 @@ Will return <parse-tree> or \(<grouping-type> <parse-tree>) where
(unless (eq close-token :close-paren)
;; the token following <regex> must be the closing
;; parenthesis or this is a syntax error
(signal-ppcre-syntax-error*
open-paren-pos
"Opening paren has no matching closing paren"))
(signal-syntax-error* open-paren-pos
"Opening paren has no matching closing paren."))
(if flags
;; if the lexer has returned a list of flags this must
;; have been the "(?:"<regex>")" production
@ -160,11 +156,11 @@ Will return <parse-tree> or \(<grouping-type> <parse-tree>) where
open-token))))
(defun greedy-quant (lexer)
(declare #.*standard-optimize-settings*)
"Parses and consumes a <greedy-quant>.
The productions are: <greedy-quant> -> <group> | <group><quantifier>
where <quantifier> is parsed by the lexer function GET-QUANTIFIER.
Will return <parse-tree> or (:GREEDY-REPETITION <min> <max> <parse-tree>)."
(declare #.*standard-optimize-settings*)
(let* ((group (group lexer))
(token (get-quantifier lexer)))
(if token
@ -174,11 +170,11 @@ Will return <parse-tree> or (:GREEDY-REPETITION <min> <max> <parse-tree>)."
group)))
(defun quant (lexer)
(declare #.*standard-optimize-settings*)
"Parses and consumes a <quant>.
The productions are: <quant> -> <greedy-quant> | <greedy-quant>\"?\".
Will return the <parse-tree> returned by GREEDY-QUANT and optionally
change :GREEDY-REPETITION to :NON-GREEDY-REPETITION."
(declare #.*standard-optimize-settings*)
(let* ((greedy-quant (greedy-quant lexer))
(pos (lexer-pos lexer))
(next-char (next-char lexer)))
@ -189,10 +185,10 @@ change :GREEDY-REPETITION to :NON-GREEDY-REPETITION."
greedy-quant))
(defun seq (lexer)
(declare #.*standard-optimize-settings*)
"Parses and consumes a <seq>.
The productions are: <seq> -> <quant> | <quant><seq>.
Will return <parse-tree> or (:SEQUENCE <parse-tree> <parse-tree>)."
(declare #.*standard-optimize-settings*)
(flet ((make-array-from-two-chars (char1 char2)
(let ((string (make-array 2
:element-type 'character
@ -254,10 +250,10 @@ Will return <parse-tree> or (:SEQUENCE <parse-tree> <parse-tree>)."
:void)))
(defun reg-expr (lexer)
(declare #.*standard-optimize-settings*)
"Parses and consumes a <regex>, a complete regular expression.
The productions are: <regex> -> <seq> | <seq>\"|\"<regex>.
Will return <parse-tree> or (:ALTERNATION <parse-tree> <parse-tree>)."
(declare #.*standard-optimize-settings*)
(let ((pos (lexer-pos lexer)))
(case (next-char lexer)
((nil)
@ -299,6 +295,8 @@ Will return <parse-tree> or (:ALTERNATION <parse-tree> <parse-tree>)."
seq)))))))
(defun reverse-strings (parse-tree)
"Recursively walks through PARSE-TREE and destructively reverses all
strings in it."
(declare #.*standard-optimize-settings*)
(cond ((stringp parse-tree)
(nreverse parse-tree))
@ -311,13 +309,11 @@ Will return <parse-tree> or (:ALTERNATION <parse-tree> <parse-tree>)."
(t parse-tree)))
(defun parse-string (string)
(declare #.*standard-optimize-settings*)
"Translate the regex string STRING into a parse tree."
(declare #.*standard-optimize-settings*)
(let* ((lexer (make-lexer string))
(parse-tree (reverse-strings (reg-expr lexer))))
;; check whether we've consumed the whole regex string
(if (end-of-string-p lexer)
parse-tree
(signal-ppcre-syntax-error*
(lexer-pos lexer)
"Expected end of string"))))
(signal-syntax-error* (lexer-pos lexer) "Expected end of string."))))