Update to version 1.2.12 from weitz.de
git-svn-id: svn://bknr.net/svn/trunk/thirdparty/cl-ppcre@1779 4281704c-cde7-0310-8518-8e2dc76b1ff0
This commit is contained in:
123
CHANGELOG
123
CHANGELOG
@ -1,3 +1,124 @@
|
||||
Version 1.2.12
|
||||
2005-11-01
|
||||
REGEX-APROPOS-AUX now also uses :INHERITED
|
||||
Fixed typo in parser.lisp (thanks to Derek Peschel)
|
||||
Fixed value of *REGEX-CHAR-CODE-LIMIT* in docs and test (thanks to Christophe Rhodes)
|
||||
|
||||
Version 1.2.11
|
||||
2005-08-01
|
||||
Added external format for SBCL in ppcre-tests.lisp (thanks to Christophe Rhodes)
|
||||
|
||||
Version 1.2.10
|
||||
2005-07-20
|
||||
Fixed bug in CHAR-SEARCHER-AUX (caught by Peter Schuller)
|
||||
Don't redefine what's already there (for LispWorks)
|
||||
|
||||
Version 1.2.9
|
||||
2005-06-27
|
||||
Hide compiler macros from CCL (thanks to Karsten Poeck)
|
||||
|
||||
Version 1.2.8
|
||||
2005-06-10
|
||||
Change EQ to EQL in REGEX-LENGTH for ANSI conformance and ABCL compatibility (thanks to Peter Graves)
|
||||
|
||||
Version 1.2.7
|
||||
2005-05-16
|
||||
Added lispworks-defsystem.lisp (thanks to Wade Humeniuk)
|
||||
Fixed bug in WORD-BOUNDARY-P
|
||||
|
||||
Version 1.2.6
|
||||
2005-04-13
|
||||
Added some DEFGENERICs to appease SBCL (thanks to Alan Shields)
|
||||
Removed wrong FTYPE declaration for STR (thanks to Alan Shields)
|
||||
|
||||
Version 1.2.5
|
||||
2005-03-09
|
||||
Customizable optimize qualities (thanks to Damien Kick)
|
||||
|
||||
Version 1.2.4
|
||||
2005-03-07
|
||||
Changed DEBUG optimize quality from 0 to 1
|
||||
|
||||
Version 1.2.3
|
||||
2005-02-02
|
||||
Wrapped WITH-COMPILATION-UNIT around loop in load.lisp
|
||||
|
||||
Version 1.2.2
|
||||
2005-02-02
|
||||
Fixed bug in hash table optimization (introduced in 1.1.0)
|
||||
|
||||
Version 1.2.1
|
||||
2005-01-25
|
||||
There was a wrong read-time conditional in api.lisp, sorry
|
||||
|
||||
Version 1.2.0
|
||||
2005-01-24
|
||||
AllegroCL compatibility mode
|
||||
Fixed broken load.lisp file (caught by Jim Prewett and Zach Beane)
|
||||
|
||||
Version 1.1.0
|
||||
2005-01-23
|
||||
Cleaned up load.lisp and cl-ppcre.asd
|
||||
Make large hash tables smaller, if possible
|
||||
Correct treatment of constant regular expressions in DO-SCANS
|
||||
|
||||
Version 1.0.0
|
||||
2004-12-22
|
||||
Special anniversary release... :)
|
||||
|
||||
Version 0.9.4
|
||||
2004-12-18
|
||||
Fixed bug in NORMALIZE-VAR-LIST (caught by Dave Roberts)
|
||||
|
||||
Version 0.9.3
|
||||
2004-12-09
|
||||
Fixed bug in CREATE-SCANNER-AUX (caught by Allan Ruttenberg and Gary Byers)
|
||||
|
||||
Version 0.9.2
|
||||
2004-12-06
|
||||
More compiler macros (thanks to Allan Ruttenberg)
|
||||
|
||||
Version 0.9.1
|
||||
2004-11-29
|
||||
Shortcuts for REGISTER-GROUPS-BIND and DO-REGISTER-GROUPS (suggested by Alexander Kjeldaas)
|
||||
|
||||
Version 0.9.0
|
||||
2004-10-14
|
||||
Experimental support for "filters"
|
||||
Bugfix for standalone regular expressions (ACCUMULATE-START-P wasn't set to NIL)
|
||||
|
||||
Version 0.8.1
|
||||
2004-09-30
|
||||
Patches for Genera 8.5 (thanks to Patrick O'Donnell)
|
||||
|
||||
Version 0.8.0
|
||||
2004-09-16
|
||||
Added parse tree synonyms (thanks to Patrick O'Donnell)
|
||||
|
||||
Version 0.7.9
|
||||
2004-07-13
|
||||
Fixed bug in DO-SCANS (caught by Jan Rychter)
|
||||
|
||||
Version 0.7.8
|
||||
2004-07-13
|
||||
New SIMPLE-CALLS keyword argument for REGEX-REPLACE(-ALL)
|
||||
Added environment parameter to compiler macros (thanks to c.l.l article <aczhx5hj.fsf@ccs.neu.edu> by Joe Marshall)
|
||||
Added compiler macros for SCAN-TO-STRINGS and REGEX-REPLACE(-ALL) (they somehow got lost)
|
||||
|
||||
Version 0.7.7
|
||||
2004-05-19
|
||||
Fixed bug in NEWLINE-SKIPPER (caught by RegexCoach user Thomas-Paz Hartman)
|
||||
Added doc strings for PPCRE-SYNTAX-ERROR and friends (after playing with slime-apropos-package)
|
||||
Added hyperdoc support
|
||||
|
||||
Version 0.7.6
|
||||
2004-04-20
|
||||
The closures created by CREATE-BMH-MATCHER now cleanly cope with negative arguments (bug caught by Damien Kick)
|
||||
|
||||
Version 0.7.5
|
||||
2004-04-19
|
||||
Fixed a bug with constant-length repetitions of . (dot) in single-line mode (caught by RegexCoach user Lee Gold)
|
||||
|
||||
Version 0.7.4
|
||||
2004-02-16
|
||||
Fixed wrong call to SIGNAL-PPCRE-SIGNAL-ERROR in lexer.lisp (caught by Peter Graves)
|
||||
@ -6,7 +127,7 @@ Compiler macro for SPLIT
|
||||
|
||||
Version 0.7.3
|
||||
2004-01-28
|
||||
Fixed bug in CURRENT-MIN-REST for lookaheads (reported by Thomas-Paz Hartman)
|
||||
Fixed bug in CURRENT-MIN-REST for lookaheads (reported by RegexCoach user Thomas-Paz Hartman)
|
||||
Added tests for this bug
|
||||
|
||||
Version 0.7.2
|
||||
|
||||
11
README
11
README
@ -1,6 +1,10 @@
|
||||
Complete documentation for CL-PPCRE can be found in the 'doc'
|
||||
directory.
|
||||
|
||||
CL-PPCRE also supports Nikodemus Siivola's HYPERDOC, see
|
||||
<http://common-lisp.net/project/hyperdoc/> and
|
||||
<http://www.cliki.net/hyperdoc>.
|
||||
|
||||
1. Installation
|
||||
|
||||
1.1. Probably the easiest way is
|
||||
@ -24,6 +28,9 @@ directory.
|
||||
1.3. You can also use ASDF instead of MK:DEFSYSTEM in a similar way
|
||||
(use the .asd files instead of the .system files).
|
||||
|
||||
1.4. For LispWorks there's a file 'lispworks-defsystem.lisp' which includes
|
||||
a system definition for LispWork's Common Defsystem.
|
||||
|
||||
2. Test
|
||||
|
||||
CL-PPCRE comes with a test suite that can be used to check its
|
||||
@ -49,3 +56,7 @@ and 1439) which are explained in the documentation.
|
||||
|
||||
MCL might report an error for the ninth test case which is also
|
||||
explained in the docs.
|
||||
|
||||
Genera notes (thanks to Patrick O'Donnell): Some more tests will fail
|
||||
because characters like #\Return, #\Linefeed, or #\Tab have encodings
|
||||
which differ from Perl's (and thus CL-PPCRE's) expectations.
|
||||
|
||||
396
api.lisp
396
api.lisp
@ -1,9 +1,9 @@
|
||||
;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: CL-PPCRE; Base: 10 -*-
|
||||
;;; $Header: /home/manuel/bknr-cvs/cvs/thirdparty/cl-ppcre/api.lisp,v 1.1 2004/06/23 08:27:10 hans Exp $
|
||||
;;; $Header: /usr/local/cvsrep/cl-ppcre/api.lisp,v 1.60 2005/11/01 09:51:01 edi Exp $
|
||||
|
||||
;;; The external API for creating and using scanners.
|
||||
|
||||
;;; Copyright (c) 2002-2003, Dr. Edmund Weitz. All rights reserved.
|
||||
;;; Copyright (c) 2002-2005, Dr. Edmund Weitz. All rights reserved.
|
||||
|
||||
;;; Redistribution and use in source and binary forms, with or without
|
||||
;;; modification, are permitted provided that the following conditions
|
||||
@ -43,17 +43,13 @@ are equivalent to the imsx modifiers in Perl. If DESTRUCTIVE is not
|
||||
NIL the function is allowed to destructively modify its first argument
|
||||
\(but only if it's a parse tree)."))
|
||||
|
||||
#-:use-acl-regexp2-engine
|
||||
(defmethod create-scanner ((regex-string string) &key case-insensitive-mode
|
||||
multi-line-mode
|
||||
single-line-mode
|
||||
extended-mode
|
||||
destructive)
|
||||
(declare (optimize speed
|
||||
(safety 0)
|
||||
(space 0)
|
||||
(debug 0)
|
||||
(compilation-speed 0)
|
||||
#+:lispworks (hcl:fixnum-safety 0)))
|
||||
(declare #.*standard-optimize-settings*)
|
||||
(declare (ignore destructive))
|
||||
;; parse the string into a parse-tree and then call CREATE-SCANNER
|
||||
;; again
|
||||
@ -70,34 +66,26 @@ NIL the function is allowed to destructively modify its first argument
|
||||
:single-line-mode single-line-mode
|
||||
:destructive t)))
|
||||
|
||||
#-:use-acl-regexp2-engine
|
||||
(defmethod create-scanner ((scanner function) &key case-insensitive-mode
|
||||
multi-line-mode
|
||||
single-line-mode
|
||||
extended-mode
|
||||
destructive)
|
||||
(declare (optimize speed
|
||||
(safety 0)
|
||||
(space 0)
|
||||
(debug 0)
|
||||
(compilation-speed 0)
|
||||
#+:lispworks (hcl:fixnum-safety 0)))
|
||||
(declare #.*standard-optimize-settings*)
|
||||
(declare (ignore destructive))
|
||||
(when (or case-insensitive-mode multi-line-mode single-line-mode extended-mode)
|
||||
(signal-ppcre-invocation-error
|
||||
"You can't use the keyword arguments to modify an existing scanner."))
|
||||
scanner)
|
||||
|
||||
#-:use-acl-regexp2-engine
|
||||
(defmethod create-scanner ((parse-tree t) &key case-insensitive-mode
|
||||
multi-line-mode
|
||||
single-line-mode
|
||||
extended-mode
|
||||
destructive)
|
||||
(declare (optimize speed
|
||||
(safety 0)
|
||||
(space 0)
|
||||
(debug 0)
|
||||
(compilation-speed 0)
|
||||
#+:lispworks (hcl:fixnum-safety 0)))
|
||||
(declare #.*standard-optimize-settings*)
|
||||
(when extended-mode
|
||||
(signal-ppcre-invocation-error
|
||||
"Extended mode doesn't make sense in parse trees."))
|
||||
@ -188,6 +176,35 @@ NIL the function is allowed to destructively modify its first argument
|
||||
*zero-length-num*
|
||||
reg-num))))))
|
||||
|
||||
#+:use-acl-regexp2-engine
|
||||
(declaim (inline create-scanner))
|
||||
|
||||
#+:use-acl-regexp2-engine
|
||||
(defmethod create-scanner ((scanner regexp::regular-expression) &key case-insensitive-mode
|
||||
multi-line-mode
|
||||
single-line-mode
|
||||
extended-mode
|
||||
destructive)
|
||||
(declare (ignore destructive))
|
||||
(when (or case-insensitive-mode multi-line-mode single-line-mode extended-mode)
|
||||
(signal-ppcre-invocation-error
|
||||
"You can't use the keyword arguments to modify an existing scanner."))
|
||||
scanner)
|
||||
|
||||
#+:use-acl-regexp2-engine
|
||||
(defmethod create-scanner ((parse-tree t) &key case-insensitive-mode
|
||||
multi-line-mode
|
||||
single-line-mode
|
||||
extended-mode
|
||||
destructive)
|
||||
(declare (ignore destructive))
|
||||
(excl:compile-re parse-tree
|
||||
:case-fold case-insensitive-mode
|
||||
:ignore-whitespace extended-mode
|
||||
:multiple-lines multi-line-mode
|
||||
:single-line single-line-mode
|
||||
:return :index))
|
||||
|
||||
(defgeneric scan (regex target-string &key start end)
|
||||
(:documentation "Searches TARGET-STRING from START to END and tries
|
||||
to match REGEX. On success returns four values - the start of the
|
||||
@ -197,50 +214,66 @@ string which will be parsed according to Perl syntax, a parse tree, or
|
||||
a pre-compiled scanner created by CREATE-SCANNER. TARGET-STRING will
|
||||
be coerced to a simple string if it isn't one already."))
|
||||
|
||||
#-:use-acl-regexp2-engine
|
||||
(defmethod scan ((regex-string string) target-string
|
||||
&key (start 0)
|
||||
(end (length target-string)))
|
||||
(declare (optimize speed
|
||||
(safety 0)
|
||||
(space 0)
|
||||
(debug 0)
|
||||
(compilation-speed 0)
|
||||
#+:lispworks (hcl:fixnum-safety 0)))
|
||||
(declare #.*standard-optimize-settings*)
|
||||
;; note that the scanners are optimized for simple strings so we
|
||||
;; have to coerce TARGET-STRING into one if it isn't already
|
||||
(funcall (create-scanner regex-string)
|
||||
(maybe-coerce-to-simple-string target-string)
|
||||
start end))
|
||||
|
||||
#-:use-acl-regexp2-engine
|
||||
(defmethod scan ((scanner function) target-string
|
||||
&key (start 0)
|
||||
(end (length target-string)))
|
||||
(declare (optimize speed
|
||||
(safety 0)
|
||||
(space 0)
|
||||
(debug 0)
|
||||
(compilation-speed 0)
|
||||
#+:lispworks (hcl:fixnum-safety 0)))
|
||||
(declare #.*standard-optimize-settings*)
|
||||
(funcall scanner
|
||||
(maybe-coerce-to-simple-string target-string)
|
||||
start end))
|
||||
|
||||
#-:use-acl-regexp2-engine
|
||||
(defmethod scan ((parse-tree t) target-string
|
||||
&key (start 0)
|
||||
(end (length target-string)))
|
||||
(declare (optimize speed
|
||||
(safety 0)
|
||||
(space 0)
|
||||
(debug 0)
|
||||
(compilation-speed 0)
|
||||
#+:lispworks (hcl:fixnum-safety 0)))
|
||||
(declare #.*standard-optimize-settings*)
|
||||
(funcall (create-scanner parse-tree)
|
||||
(maybe-coerce-to-simple-string target-string)
|
||||
start end))
|
||||
|
||||
(define-compiler-macro scan (&whole form regex target-string &rest rest)
|
||||
#+:use-acl-regexp2-engine
|
||||
(declaim (inline scan))
|
||||
|
||||
#+:use-acl-regexp2-engine
|
||||
(defmethod scan ((parse-tree t) target-string
|
||||
&key (start 0)
|
||||
(end (length target-string)))
|
||||
(when (< end start)
|
||||
(return-from scan nil))
|
||||
(let ((results (multiple-value-list (excl:match-re parse-tree target-string
|
||||
:start start
|
||||
:end end
|
||||
:return :index))))
|
||||
(declare (dynamic-extent results))
|
||||
(cond ((null (first results)) nil)
|
||||
(t (let* ((no-of-regs (- (length results) 2))
|
||||
(reg-starts (make-array no-of-regs
|
||||
:element-type '(or null fixnum)))
|
||||
(reg-ends (make-array no-of-regs
|
||||
:element-type '(or null fixnum)))
|
||||
(match (second results)))
|
||||
(loop for (start . end) in (cddr results)
|
||||
for i from 0
|
||||
do (setf (aref reg-starts i) start
|
||||
(aref reg-ends i) end))
|
||||
(values (car match) (cdr match) reg-starts reg-ends))))))
|
||||
|
||||
#-:cormanlisp
|
||||
(define-compiler-macro scan (&whole form &environment env regex target-string &rest rest)
|
||||
"Make sure that constant forms are compiled into scanners at compile time."
|
||||
(cond ((constantp regex)
|
||||
(cond ((constantp regex env)
|
||||
`(scan (load-time-value
|
||||
(create-scanner ,regex))
|
||||
,target-string ,@rest))
|
||||
@ -249,12 +282,7 @@ be coerced to a simple string if it isn't one already."))
|
||||
(defun scan-to-strings (regex target-string &key (start 0)
|
||||
(end (length target-string))
|
||||
sharedp)
|
||||
(declare (optimize speed
|
||||
(safety 0)
|
||||
(space 0)
|
||||
(debug 0)
|
||||
(compilation-speed 0)
|
||||
#+:lispworks (hcl:fixnum-safety 0)))
|
||||
(declare #.*standard-optimize-settings*)
|
||||
"Like SCAN but returns substrings of TARGET-STRING instead of
|
||||
positions, i.e. this function returns two values on success: the whole
|
||||
match as a string plus an array of substrings (or NILs) corresponding
|
||||
@ -276,6 +304,16 @@ structure with TARGET-STRING."
|
||||
reg-starts
|
||||
reg-ends)))))
|
||||
|
||||
#-:cormanlisp
|
||||
(define-compiler-macro scan-to-strings
|
||||
(&whole form &environment env regex target-string &rest rest)
|
||||
"Make sure that constant forms are compiled into scanners at compile time."
|
||||
(cond ((constantp regex env)
|
||||
`(scan-to-strings (load-time-value
|
||||
(create-scanner ,regex))
|
||||
,target-string ,@rest))
|
||||
(t form)))
|
||||
|
||||
(defmacro register-groups-bind (var-list (regex target-string
|
||||
&key start end sharedp)
|
||||
&body body)
|
||||
@ -287,7 +325,7 @@ VAR-LIST which is NIL there's no binding to the corresponding register
|
||||
group. The number of variables in VAR-LIST must not be greater than
|
||||
the number of register groups. If SHAREDP is true, the substrings may
|
||||
share structure with TARGET-STRING."
|
||||
(rebinding (target-string)
|
||||
(with-rebinding (target-string)
|
||||
(with-unique-names (match-start match-end reg-starts reg-ends
|
||||
start-index substr-fn)
|
||||
`(multiple-value-bind (,match-start ,match-end ,reg-starts ,reg-ends)
|
||||
@ -299,16 +337,17 @@ share structure with TARGET-STRING."
|
||||
`(,substr-fn (if ,sharedp
|
||||
#'nsubseq
|
||||
#'subseq))
|
||||
(loop for var in var-list
|
||||
(loop for (function var) in (normalize-var-list var-list)
|
||||
for counter from 0
|
||||
when var
|
||||
collect `(,var (let ((,start-index
|
||||
(aref ,reg-starts ,counter)))
|
||||
(if ,start-index
|
||||
(funcall ,function
|
||||
(funcall ,substr-fn
|
||||
,target-string
|
||||
,start-index
|
||||
(aref ,reg-ends ,counter))
|
||||
(aref ,reg-ends ,counter)))
|
||||
nil)))))
|
||||
,@body))))))
|
||||
|
||||
@ -316,7 +355,8 @@ share structure with TARGET-STRING."
|
||||
target-string
|
||||
&optional result-form
|
||||
&key start end)
|
||||
&body body)
|
||||
&body body
|
||||
&environment env)
|
||||
"Iterates over TARGET-STRING and tries to match REGEX as often as
|
||||
possible evaluating BODY with MATCH-START, MATCH-END, REG-STARTS, and
|
||||
REG-ENDS bound to the four return values of each match in turn. After
|
||||
@ -325,19 +365,24 @@ implicit block named NIL surrounds DO-SCANS; RETURN may be used to
|
||||
terminate the loop immediately. If REGEX matches an empty string the
|
||||
scan is continued one position behind this match. BODY may start with
|
||||
declarations."
|
||||
(rebinding (target-string regex)
|
||||
(with-unique-names (%start %end scanner loop-tag block-name)
|
||||
(with-rebinding (target-string)
|
||||
(with-unique-names (%start %end %regex scanner loop-tag block-name)
|
||||
(declare (ignorable %regex scanner))
|
||||
;; the NIL BLOCK to enable exits via (RETURN ...)
|
||||
`(block nil
|
||||
(let* ((,%start (or ,start 0))
|
||||
(*real-start-pos* ,%start)
|
||||
(,%end (or ,end (length ,target-string)))
|
||||
;; create a scanner unless the regex is already a
|
||||
;; function (otherwise SCAN will do this on each
|
||||
;; iteration)
|
||||
(,scanner (typecase ,regex
|
||||
(function ,regex)
|
||||
(t (create-scanner ,regex)))))
|
||||
,@(unless (constantp regex env)
|
||||
;; leave constant regular expressions as they are -
|
||||
;; SCAN's compiler macro will take care of them;
|
||||
;; otherwise create a scanner unless the regex is
|
||||
;; already a function (otherwise SCAN will do this
|
||||
;; on each iteration)
|
||||
`((,%regex ,regex)
|
||||
(,scanner (typecase ,%regex
|
||||
(function ,%regex)
|
||||
(t (create-scanner ,%regex)))))))
|
||||
;; coerce TARGET-STRING to a simple string unless it is one
|
||||
;; already (otherwise SCAN will do this on each iteration)
|
||||
(setq ,target-string
|
||||
@ -350,7 +395,9 @@ declarations."
|
||||
;; provided variables
|
||||
(multiple-value-bind
|
||||
(,match-start ,match-end ,reg-starts ,reg-ends)
|
||||
(scan ,scanner ,target-string :start ,%start :end ,%end)
|
||||
(scan ,(cond ((constantp regex env) regex)
|
||||
(t scanner))
|
||||
,target-string :start ,%start :end ,%end)
|
||||
;; declare the variables to be IGNORABLE to prevent the
|
||||
;; compiler from issuing warnings
|
||||
(declare
|
||||
@ -363,7 +410,7 @@ declarations."
|
||||
(locally
|
||||
,@body)
|
||||
;; advance by one position if we had a zero-length match
|
||||
(setq ,%start (if (= ,%start ,match-end)
|
||||
(setq ,%start (if (= ,match-start ,match-end)
|
||||
(1+ ,match-end)
|
||||
,match-end)))
|
||||
(go ,loop-tag))))))))
|
||||
@ -405,7 +452,7 @@ terminate the loop immediately. If REGEX matches an empty string the
|
||||
scan is continued one position behind this match. If SHAREDP is true,
|
||||
the substrings may share structure with TARGET-STRING. BODY may start
|
||||
with declarations."
|
||||
(rebinding (target-string)
|
||||
(with-rebinding (target-string)
|
||||
(with-unique-names (match-start match-end substr-fn)
|
||||
`(let ((,substr-fn (if ,sharedp #'nsubseq #'subseq)))
|
||||
;; simple use DO-MATCHES to extract the substrings
|
||||
@ -432,7 +479,7 @@ surrounds DO-REGISTER-GROUPS; RETURN may be used to terminate the loop
|
||||
immediately. If REGEX matches an empty string the scan is continued
|
||||
one position behind this match. If SHAREDP is true, the substrings may
|
||||
share structure with TARGET-STRING. BODY may start with declarations."
|
||||
(rebinding (target-string)
|
||||
(with-rebinding (target-string)
|
||||
(with-unique-names (substr-fn match-start match-end
|
||||
reg-starts reg-ends start-index)
|
||||
`(let ((,substr-fn (if ,sharedp
|
||||
@ -441,27 +488,24 @@ share structure with TARGET-STRING. BODY may start with declarations."
|
||||
(do-scans (,match-start ,match-end ,reg-starts ,reg-ends
|
||||
,regex ,target-string
|
||||
,result-form :start ,start :end ,end)
|
||||
(let ,(loop for var in var-list
|
||||
(let ,(loop for (function var) in (normalize-var-list var-list)
|
||||
for counter from 0
|
||||
when var
|
||||
collect `(,var (let ((,start-index
|
||||
(aref ,reg-starts ,counter)))
|
||||
(if ,start-index
|
||||
(funcall ,function
|
||||
(funcall ,substr-fn
|
||||
,target-string
|
||||
,start-index
|
||||
(aref ,reg-ends ,counter))
|
||||
(aref ,reg-ends ,counter)))
|
||||
nil))))
|
||||
,@body))))))
|
||||
|
||||
(defun all-matches (regex target-string
|
||||
&key (start 0)
|
||||
(end (length target-string)))
|
||||
(declare (optimize speed
|
||||
(safety 0)
|
||||
(space 0)
|
||||
(debug 0)
|
||||
(compilation-speed 0)
|
||||
#+:lispworks (hcl:fixnum-safety 0)))
|
||||
(declare #.*standard-optimize-settings*)
|
||||
"Returns a list containing the start and end positions of all
|
||||
matches of REGEX against TARGET-STRING, i.e. if there are N matches
|
||||
the list contains (* 2 N) elements. If REGEX matches an empty string
|
||||
@ -474,16 +518,21 @@ the scan is continued one position behind this match."
|
||||
(push match-start result-list)
|
||||
(push match-end result-list))))
|
||||
|
||||
#-:cormanlisp
|
||||
(define-compiler-macro all-matches (&whole form &environment env regex &rest rest)
|
||||
"Make sure that constant forms are compiled into scanners at
|
||||
compile time."
|
||||
(cond ((constantp regex env)
|
||||
`(all-matches (load-time-value
|
||||
(create-scanner ,regex))
|
||||
,@rest))
|
||||
(t form)))
|
||||
|
||||
(defun all-matches-as-strings (regex target-string
|
||||
&key (start 0)
|
||||
(end (length target-string))
|
||||
sharedp)
|
||||
(declare (optimize speed
|
||||
(safety 0)
|
||||
(space 0)
|
||||
(debug 0)
|
||||
(compilation-speed 0)
|
||||
#+:lispworks (hcl:fixnum-safety 0)))
|
||||
(declare #.*standard-optimize-settings*)
|
||||
"Returns a list containing all substrings of TARGET-STRING which
|
||||
match REGEX. If REGEX matches an empty string the scan is continued
|
||||
one position behind this match. If SHAREDP is true, the substrings may
|
||||
@ -493,6 +542,17 @@ share structure with TARGET-STRING."
|
||||
:start start :end end :sharedp sharedp)
|
||||
(push match result-list))))
|
||||
|
||||
#-:cormanlisp
|
||||
(define-compiler-macro all-matches-as-strings (&whole form &environment env regex &rest rest)
|
||||
"Make sure that constant forms are compiled into scanners at
|
||||
compile time."
|
||||
(cond ((constantp regex env)
|
||||
`(all-matches-as-strings
|
||||
(load-time-value
|
||||
(create-scanner ,regex))
|
||||
,@rest))
|
||||
(t form)))
|
||||
|
||||
(defun split (regex target-string
|
||||
&key (start 0)
|
||||
(end (length target-string))
|
||||
@ -500,12 +560,7 @@ share structure with TARGET-STRING."
|
||||
with-registers-p
|
||||
omit-unmatched-p
|
||||
sharedp)
|
||||
(declare (optimize speed
|
||||
(safety 0)
|
||||
(space 0)
|
||||
(debug 0)
|
||||
(compilation-speed 0)
|
||||
#+:lispworks (hcl:fixnum-safety 0)))
|
||||
(declare #.*standard-optimize-settings*)
|
||||
"Matches REGEX against TARGET-STRING as often as possible and
|
||||
returns a list of the substrings between the matches. If
|
||||
WITH-REGISTERS-P is true, substrings corresponding to matched
|
||||
@ -569,21 +624,17 @@ TARGET-STRING."
|
||||
target-string this-start this-end)
|
||||
nil)))))
|
||||
|
||||
(define-compiler-macro split (&whole form regex target-string &rest rest)
|
||||
#-:cormanlisp
|
||||
(define-compiler-macro split (&whole form &environment env regex target-string &rest rest)
|
||||
"Make sure that constant forms are compiled into scanners at compile time."
|
||||
(cond ((constantp regex)
|
||||
(cond ((constantp regex env)
|
||||
`(split (load-time-value
|
||||
(create-scanner ,regex))
|
||||
,target-string ,@rest))
|
||||
(t form)))
|
||||
|
||||
(defun string-case-modifier (str from to start end)
|
||||
(declare (optimize speed
|
||||
(safety 0)
|
||||
(space 0)
|
||||
(debug 0)
|
||||
(compilation-speed 0)
|
||||
#+:lispworks (hcl:fixnum-safety 0)))
|
||||
(declare #.*standard-optimize-settings*)
|
||||
(declare (type fixnum from to start end))
|
||||
"Checks whether all words in STR between FROM and TO are upcased,
|
||||
downcased or capitalized and returns a function which applies a
|
||||
@ -648,19 +699,18 @@ that (<= START FROM TO END)."
|
||||
|
||||
;; first create a scanner to identify the special parts of the
|
||||
;; replacement string (eat your own dog food...)
|
||||
|
||||
(defgeneric build-replacement-template (replacement-string)
|
||||
(declare #.*standard-optimize-settings*)
|
||||
(:documentation "Converts a replacement string for REGEX-REPLACE or
|
||||
REGEX-REPLACE-ALL into a replacement template which is an
|
||||
S-expression."))
|
||||
|
||||
#-:cormanlisp
|
||||
(let* ((*use-bmh-matchers* nil)
|
||||
(reg-scanner (create-scanner "\\\\(?:\\\\|{\\d+}|\\d+|&|`|')")))
|
||||
(reg-scanner (create-scanner "\\\\(?:\\\\|\\{\\d+\\}|\\d+|&|`|')")))
|
||||
(defmethod build-replacement-template ((replacement-string string))
|
||||
(declare (optimize speed
|
||||
(safety 0)
|
||||
(space 0)
|
||||
(debug 0)
|
||||
(compilation-speed 0)
|
||||
#+:lispworks (hcl:fixnum-safety 0)))
|
||||
"Converts a replacement string for REGEX-REPLACE or
|
||||
REGEX-REPLACE-ALL into a replacement template which is an
|
||||
S-expression."
|
||||
(declare #.*standard-optimize-settings*)
|
||||
(let ((from 0)
|
||||
;; COLLECTOR will hold the (reversed) template
|
||||
(collector '()))
|
||||
@ -714,14 +764,9 @@ S-expression."
|
||||
;;; Corman Lisp's methods can't be closures... :(
|
||||
#+:cormanlisp
|
||||
(let* ((*use-bmh-matchers* nil)
|
||||
(reg-scanner (create-scanner "\\\\(?:\\\\|{\\d+}|\\d+|&|`|')")))
|
||||
(reg-scanner (create-scanner "\\\\(?:\\\\|\\{\\d+\\}|\\d+|&|`|')")))
|
||||
(defun build-replacement-template (replacement)
|
||||
(declare (optimize speed
|
||||
(safety 0)
|
||||
(space 0)
|
||||
(debug 0)
|
||||
(compilation-speed 0)
|
||||
#+:lispworks (hcl:fixnum-safety 0)))
|
||||
(declare #.*standard-optimize-settings*)
|
||||
(typecase replacement
|
||||
(string
|
||||
(let ((from 0)
|
||||
@ -770,13 +815,9 @@ S-expression."
|
||||
target-string
|
||||
start end
|
||||
match-start match-end
|
||||
reg-starts reg-ends)
|
||||
(declare (optimize speed
|
||||
(safety 0)
|
||||
(space 0)
|
||||
(debug 0)
|
||||
(compilation-speed 0)
|
||||
#+:lispworks (hcl:fixnum-safety 0)))
|
||||
reg-starts reg-ends
|
||||
simple-calls)
|
||||
(declare #.*standard-optimize-settings*)
|
||||
"Accepts a replacement template and the current values from the
|
||||
matching process in REGEX-REPLACE or REGEX-REPLACE-ALL and returns the
|
||||
corresponding template."
|
||||
@ -806,11 +847,21 @@ corresponding template."
|
||||
:start (svref reg-starts token)
|
||||
:end (svref reg-ends token))))
|
||||
(function
|
||||
(write-string (funcall token
|
||||
(write-string
|
||||
(cond (simple-calls
|
||||
(apply token
|
||||
(nsubseq target-string match-start match-end)
|
||||
(map 'list
|
||||
(lambda (reg-start reg-end)
|
||||
(and reg-start
|
||||
(nsubseq target-string reg-start reg-end)))
|
||||
reg-starts reg-ends)))
|
||||
(t
|
||||
(funcall token
|
||||
target-string
|
||||
start end
|
||||
match-start match-end
|
||||
reg-starts reg-ends)
|
||||
reg-starts reg-ends)))
|
||||
s))
|
||||
(symbol
|
||||
(case token
|
||||
@ -833,20 +884,26 @@ corresponding template."
|
||||
:start match-end
|
||||
:end end))
|
||||
(otherwise
|
||||
(write-string (funcall token
|
||||
(write-string
|
||||
(cond (simple-calls
|
||||
(apply token
|
||||
(nsubseq target-string match-start match-end)
|
||||
(map 'list
|
||||
(lambda (reg-start reg-end)
|
||||
(and reg-start
|
||||
(nsubseq target-string reg-start reg-end)))
|
||||
reg-starts reg-ends)))
|
||||
(t
|
||||
(funcall token
|
||||
target-string
|
||||
start end
|
||||
match-start match-end
|
||||
reg-starts reg-ends)
|
||||
reg-starts reg-ends)))
|
||||
s)))))))))
|
||||
|
||||
(defun replace-aux (target-string replacement pos-list reg-list start end preserve-case)
|
||||
(declare (optimize speed
|
||||
(safety 0)
|
||||
(space 0)
|
||||
(debug 0)
|
||||
(compilation-speed 0)
|
||||
#+:lispworks (hcl:fixnum-safety 0)))
|
||||
(defun replace-aux (target-string replacement pos-list reg-list
|
||||
start end preserve-case simple-calls)
|
||||
(declare #.*standard-optimize-settings*)
|
||||
"Auxiliary function used by REGEX-REPLACE and
|
||||
REGEX-REPLACE-ALL. POS-LIST contains a list with the start and end
|
||||
positions of all matches while REG-LIST contains a list of arrays
|
||||
@ -867,7 +924,8 @@ representing the corresponding register start and end positions."
|
||||
target-string
|
||||
start end
|
||||
from to
|
||||
reg-starts reg-ends)
|
||||
reg-starts reg-ends
|
||||
simple-calls)
|
||||
nil)
|
||||
while to
|
||||
if replace
|
||||
@ -887,13 +945,9 @@ representing the corresponding register start and end positions."
|
||||
(defun regex-replace (regex target-string replacement
|
||||
&key (start 0)
|
||||
(end (length target-string))
|
||||
preserve-case)
|
||||
(declare (optimize speed
|
||||
(safety 0)
|
||||
(space 0)
|
||||
(debug 0)
|
||||
(compilation-speed 0)
|
||||
#+:lispworks (hcl:fixnum-safety 0)))
|
||||
preserve-case
|
||||
simple-calls)
|
||||
(declare #.*standard-optimize-settings*)
|
||||
"Try to match TARGET-STRING between START and END against REGEX and
|
||||
replace the first match with REPLACEMENT.
|
||||
|
||||
@ -926,19 +980,25 @@ match."
|
||||
(replace-aux target-string replacement
|
||||
(list match-start match-end)
|
||||
(list reg-starts reg-ends)
|
||||
start end preserve-case)
|
||||
start end preserve-case simple-calls)
|
||||
(subseq target-string start end))))
|
||||
|
||||
#-:cormanlisp
|
||||
(define-compiler-macro regex-replace
|
||||
(&whole form &environment env regex target-string replacement &rest rest)
|
||||
"Make sure that constant forms are compiled into scanners at compile time."
|
||||
(cond ((constantp regex env)
|
||||
`(regex-replace (load-time-value
|
||||
(create-scanner ,regex))
|
||||
,target-string ,replacement ,@rest))
|
||||
(t form)))
|
||||
|
||||
(defun regex-replace-all (regex target-string replacement
|
||||
&key (start 0)
|
||||
(end (length target-string))
|
||||
preserve-case)
|
||||
(declare (optimize speed
|
||||
(safety 0)
|
||||
(space 0)
|
||||
(debug 0)
|
||||
(compilation-speed 0)
|
||||
#+:lispworks (hcl:fixnum-safety 0)))
|
||||
preserve-case
|
||||
simple-calls)
|
||||
(declare #.*standard-optimize-settings*)
|
||||
"Try to match TARGET-STRING between START and END against REGEX and
|
||||
replace all matches with REPLACEMENT.
|
||||
|
||||
@ -978,9 +1038,19 @@ match."
|
||||
(replace-aux target-string replacement
|
||||
(nreverse pos-list)
|
||||
(nreverse reg-list)
|
||||
start end preserve-case)
|
||||
start end preserve-case simple-calls)
|
||||
(subseq target-string start end))))
|
||||
|
||||
#-:cormanlisp
|
||||
(define-compiler-macro regex-replace-all
|
||||
(&whole form &environment env regex target-string replacement &rest rest)
|
||||
"Make sure that constant forms are compiled into scanners at compile time."
|
||||
(cond ((constantp regex env)
|
||||
`(regex-replace-all (load-time-value
|
||||
(create-scanner ,regex))
|
||||
,target-string ,replacement ,@rest))
|
||||
(t form)))
|
||||
|
||||
#-:cormanlisp
|
||||
(defmacro regex-apropos-aux ((regex packages case-insensitive &optional return-form)
|
||||
&body body)
|
||||
@ -989,7 +1059,7 @@ through PACKAGES and executes BODY with SYMBOL bound to each symbol
|
||||
which matches REGEX. Optionally evaluates and returns RETURN-FORM at
|
||||
the end. If CASE-INSENSITIVE is true and REGEX isn't already a
|
||||
scanner, a case-insensitive scanner is used."
|
||||
(rebinding (regex)
|
||||
(with-rebinding (regex)
|
||||
(with-unique-names (scanner %packages next morep)
|
||||
`(let* ((,scanner (create-scanner ,regex
|
||||
:case-insensitive-mode
|
||||
@ -997,7 +1067,7 @@ scanner, a case-insensitive scanner is used."
|
||||
(not (functionp ,regex)))))
|
||||
(,%packages (or ,packages
|
||||
(list-all-packages))))
|
||||
(with-package-iterator (,next ,%packages :external :internal)
|
||||
(with-package-iterator (,next ,%packages :external :internal :inherited)
|
||||
(loop
|
||||
(multiple-value-bind (,morep symbol)
|
||||
(,next)
|
||||
@ -1026,7 +1096,7 @@ through PACKAGES and executes BODY with SYMBOL bound to each symbol
|
||||
which matches REGEX. Optionally evaluates and returns RETURN-FORM at
|
||||
the end. If CASE-INSENSITIVE is true and REGEX isn't already a
|
||||
scanner, a case-insensitive scanner is used."
|
||||
(rebinding (regex)
|
||||
(with-rebinding (regex)
|
||||
(with-unique-names (scanner %packages)
|
||||
`(let* ((,scanner (create-scanner ,regex
|
||||
:case-insensitive-mode
|
||||
@ -1040,12 +1110,7 @@ scanner, a case-insensitive scanner is used."
|
||||
,return-form))))
|
||||
|
||||
(defun regex-apropos-list (regex &optional packages &key (case-insensitive t))
|
||||
(declare (optimize speed
|
||||
(safety 0)
|
||||
(space 0)
|
||||
(debug 0)
|
||||
(compilation-speed 0)
|
||||
#+:lispworks (hcl:fixnum-safety 0)))
|
||||
(declare #.*standard-optimize-settings*)
|
||||
"Similar to the standard function APROPOS-LIST but returns a list of
|
||||
all symbols which match the regular expression REGEX. If
|
||||
CASE-INSENSITIVE is true and REGEX isn't already a scanner, a
|
||||
@ -1057,12 +1122,7 @@ case-insensitive scanner is used."
|
||||
(defun print-symbol-info (symbol)
|
||||
"Auxiliary function used by REGEX-APROPOS. Tries to print some
|
||||
meaningful information about a symbol."
|
||||
(declare (optimize speed
|
||||
(safety 0)
|
||||
(space 0)
|
||||
(debug 0)
|
||||
(compilation-speed 0)
|
||||
#+:lispworks (hcl:fixnum-safety 0)))
|
||||
(declare #.*standard-optimize-settings*)
|
||||
(handler-case
|
||||
(let ((output-list '()))
|
||||
(cond ((special-operator-p symbol)
|
||||
@ -1107,12 +1167,7 @@ meaningful information about a symbol."
|
||||
symbols which match the regular expression REGEX. If CASE-INSENSITIVE
|
||||
is true and REGEX isn't already a scanner, a case-insensitive scanner
|
||||
is used."
|
||||
(declare (optimize speed
|
||||
(safety 0)
|
||||
(space 0)
|
||||
(debug 0)
|
||||
(compilation-speed 0)
|
||||
#+:lispworks (hcl:fixnum-safety 0)))
|
||||
(declare #.*standard-optimize-settings*)
|
||||
(regex-apropos-aux (regex packages case-insensitive)
|
||||
(print-symbol-info symbol))
|
||||
(values))
|
||||
@ -1169,3 +1224,18 @@ end-of-line comments, i.e. those starting with #\\# and ending with
|
||||
comment-scanner)
|
||||
string
|
||||
#'remove-tokens))))
|
||||
|
||||
(defun parse-tree-synonym (symbol)
|
||||
"Returns the parse tree the SYMBOL symbol is a synonym for. Returns
|
||||
NIL is SYMBOL wasn't yet defined to be a synonym."
|
||||
(get symbol 'parse-tree-synonym))
|
||||
|
||||
(defun (setf parse-tree-synonym) (new-parse-tree symbol)
|
||||
"Defines SYMBOL to be a synonm for the parse tree NEW-PARSE-TREE."
|
||||
(setf (get symbol 'parse-tree-synonym) new-parse-tree))
|
||||
|
||||
(defmacro define-parse-tree-synonym (name parse-tree)
|
||||
"Defines the symbol NAME to be a synonym for the parse tree
|
||||
PARSE-TREE. Both arguments are quoted."
|
||||
`(eval-when (:compile-toplevel :load-toplevel :execute)
|
||||
(setf (parse-tree-synonym ',name) ',parse-tree)))
|
||||
|
||||
@ -1,9 +1,9 @@
|
||||
;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: CL-USER; Base: 10 -*-
|
||||
;;; $Header: /home/manuel/bknr-cvs/cvs/thirdparty/cl-ppcre/cl-ppcre-test.asd,v 1.1 2004/06/23 08:27:10 hans Exp $
|
||||
;;; $Header: /usr/local/cvsrep/cl-ppcre/cl-ppcre-test.asd,v 1.8 2005/11/01 09:51:01 edi Exp $
|
||||
|
||||
;;; This ASDF system definition was kindly provided by Marco Baringer.
|
||||
|
||||
;;; Copyright (c) 2002-2003, Dr. Edmund Weitz. All rights reserved.
|
||||
;;; Copyright (c) 2002-2005, Dr. Edmund Weitz. All rights reserved.
|
||||
|
||||
;;; Redistribution and use in source and binary forms, with or without
|
||||
;;; modification, are permitted provided that the following conditions
|
||||
@ -36,5 +36,6 @@
|
||||
(in-package #:cl-ppcre-test.system)
|
||||
|
||||
(defsystem #:cl-ppcre-test
|
||||
:version "1.2.12"
|
||||
:depends-on (#:cl-ppcre)
|
||||
:components ((:file "ppcre-tests")))
|
||||
|
||||
@ -1,7 +1,7 @@
|
||||
;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: CL-USER; Base: 10 -*-
|
||||
;;; $Header: /home/manuel/bknr-cvs/cvs/thirdparty/cl-ppcre/cl-ppcre-test.system,v 1.1 2004/06/23 08:27:10 hans Exp $
|
||||
;;; $Header: /usr/local/cvsrep/cl-ppcre/cl-ppcre-test.system,v 1.9 2005/04/01 21:29:09 edi Exp $
|
||||
|
||||
;;; Copyright (c) 2002-2003, Dr. Edmund Weitz. All rights reserved.
|
||||
;;; Copyright (c) 2002-2005, Dr. Edmund Weitz. All rights reserved.
|
||||
|
||||
;;; Redistribution and use in source and binary forms, with or without
|
||||
;;; modification, are permitted provided that the following conditions
|
||||
|
||||
38
cl-ppcre.asd
38
cl-ppcre.asd
@ -1,9 +1,9 @@
|
||||
;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: CL-USER; Base: 10 -*-
|
||||
;;; $Header: /home/manuel/bknr-cvs/cvs/thirdparty/cl-ppcre/cl-ppcre.asd,v 1.1 2004/06/23 08:27:10 hans Exp $
|
||||
;;; $Header: /usr/local/cvsrep/cl-ppcre/cl-ppcre.asd,v 1.12 2005/11/01 09:51:01 edi Exp $
|
||||
|
||||
;;; This ASDF system definition was kindly provided by Marco Baringer.
|
||||
|
||||
;;; Copyright (c) 2002-2003, Dr. Edmund Weitz. All rights reserved.
|
||||
;;; Copyright (c) 2002-2005, Dr. Edmund Weitz. All rights reserved.
|
||||
|
||||
;;; Redistribution and use in source and binary forms, with or without
|
||||
;;; modification, are permitted provided that the following conditions
|
||||
@ -36,16 +36,26 @@
|
||||
(in-package #:cl-ppcre.system)
|
||||
|
||||
(defsystem #:cl-ppcre
|
||||
:version "1.2.12"
|
||||
:serial t
|
||||
:components ((:file "packages")
|
||||
(:file "specials" :depends-on ("packages"))
|
||||
(:file "util" :depends-on ("packages"))
|
||||
(:file "errors" :depends-on ("util"))
|
||||
(:file "lexer" :depends-on ("errors" "specials"))
|
||||
(:file "parser" :depends-on ("lexer"))
|
||||
(:file "regex-class" :depends-on ("parser"))
|
||||
(:file "convert" :depends-on ("regex-class"))
|
||||
(:file "optimize" :depends-on ("convert"))
|
||||
(:file "closures" :depends-on ("optimize" "specials"))
|
||||
(:file "repetition-closures" :depends-on ("closures"))
|
||||
(:file "scanner" :depends-on ("repetition-closures"))
|
||||
(:file "api" :depends-on ("scanner"))))
|
||||
(:file "specials")
|
||||
(:file "util")
|
||||
(:file "errors")
|
||||
#-:use-acl-regexp2-engine
|
||||
(:file "lexer")
|
||||
#-:use-acl-regexp2-engine
|
||||
(:file "parser")
|
||||
#-:use-acl-regexp2-engine
|
||||
(:file "regex-class")
|
||||
#-:use-acl-regexp2-engine
|
||||
(:file "convert")
|
||||
#-:use-acl-regexp2-engine
|
||||
(:file "optimize")
|
||||
#-:use-acl-regexp2-engine
|
||||
(:file "closures")
|
||||
#-:use-acl-regexp2-engine
|
||||
(:file "repetition-closures")
|
||||
#-:use-acl-regexp2-engine
|
||||
(:file "scanner")
|
||||
(:file "api")))
|
||||
|
||||
@ -1,7 +1,7 @@
|
||||
;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: CL-USER; Base: 10 -*-
|
||||
;;; $Header: /home/manuel/bknr-cvs/cvs/thirdparty/cl-ppcre/cl-ppcre.system,v 1.1 2004/06/23 08:27:10 hans Exp $
|
||||
;;; $Header: /usr/local/cvsrep/cl-ppcre/cl-ppcre.system,v 1.11 2005/04/01 21:29:09 edi Exp $
|
||||
|
||||
;;; Copyright (c) 2002-2003, Dr. Edmund Weitz. All rights reserved.
|
||||
;;; Copyright (c) 2002-2005, Dr. Edmund Weitz. All rights reserved.
|
||||
|
||||
;;; Redistribution and use in source and binary forms, with or without
|
||||
;;; modification, are permitted provided that the following conditions
|
||||
@ -40,12 +40,20 @@
|
||||
(:file "specials" :depends-on ("packages"))
|
||||
(:file "util" :depends-on ("packages"))
|
||||
(:file "errors" :depends-on ("util"))
|
||||
#-:use-acl-regexp2-engine
|
||||
(:file "lexer" :depends-on ("errors" "specials"))
|
||||
#-:use-acl-regexp2-engine
|
||||
(:file "parser" :depends-on ("lexer"))
|
||||
#-:use-acl-regexp2-engine
|
||||
(:file "regex-class" :depends-on ("parser"))
|
||||
#-:use-acl-regexp2-engine
|
||||
(:file "convert" :depends-on ("regex-class"))
|
||||
#-:use-acl-regexp2-engine
|
||||
(:file "optimize" :depends-on ("convert"))
|
||||
#-:use-acl-regexp2-engine
|
||||
(:file "closures" :depends-on ("optimize" "specials"))
|
||||
#-:use-acl-regexp2-engine
|
||||
(:file "repetition-closures" :depends-on ("closures"))
|
||||
#-:use-acl-regexp2-engine
|
||||
(:file "scanner" :depends-on ("repetition-closures"))
|
||||
(:file "api" :depends-on ("scanner"))))
|
||||
|
||||
@ -1,10 +1,10 @@
|
||||
;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: CL-PPCRE; Base: 10 -*-
|
||||
;;; $Header: /home/manuel/bknr-cvs/cvs/thirdparty/cl-ppcre/closures.lisp,v 1.1 2004/06/23 08:27:10 hans Exp $
|
||||
;;; $Header: /usr/local/cvsrep/cl-ppcre/closures.lisp,v 1.29 2005/05/16 16:29:23 edi Exp $
|
||||
|
||||
;;; Here we create the closures which together build the final
|
||||
;;; scanner.
|
||||
|
||||
;;; Copyright (c) 2002-2003, Dr. Edmund Weitz. All rights reserved.
|
||||
;;; Copyright (c) 2002-2005, Dr. Edmund Weitz. All rights reserved.
|
||||
|
||||
;;; Redistribution and use in source and binary forms, with or without
|
||||
;;; modification, are permitted provided that the following conditions
|
||||
@ -38,12 +38,7 @@
|
||||
"Like STRING=, i.e. compares the special string *STRING* from START1
|
||||
to END1 with STRING2 from START2 to END2. Note that there's no
|
||||
boundary check - this has to be implemented by the caller."
|
||||
(declare (optimize speed
|
||||
(safety 0)
|
||||
(space 0)
|
||||
(debug 0)
|
||||
(compilation-speed 0)
|
||||
#+:lispworks (hcl:fixnum-safety 0)))
|
||||
(declare #.*standard-optimize-settings*)
|
||||
(declare (type fixnum start1 end1 start2 end2))
|
||||
(loop for string1-idx of-type fixnum from start1 below end1
|
||||
for string2-idx of-type fixnum from start2 below end2
|
||||
@ -54,12 +49,7 @@ boundary check - this has to be implemented by the caller."
|
||||
"Like STRING-EQUAL, i.e. compares the special string *STRING* from
|
||||
START1 to END1 with STRING2 from START2 to END2. Note that there's no
|
||||
boundary check - this has to be implemented by the caller."
|
||||
(declare (optimize speed
|
||||
(safety 0)
|
||||
(space 0)
|
||||
(debug 0)
|
||||
(compilation-speed 0)
|
||||
#+:lispworks (hcl:fixnum-safety 0)))
|
||||
(declare #.*standard-optimize-settings*)
|
||||
(declare (type fixnum start1 end1 start2 end2))
|
||||
(loop for string1-idx of-type fixnum from start1 below end1
|
||||
for string2-idx of-type fixnum from start2 below end2
|
||||
@ -67,12 +57,7 @@ boundary check - this has to be implemented by the caller."
|
||||
(schar string2 string2-idx))))
|
||||
|
||||
(defgeneric create-matcher-aux (regex next-fn)
|
||||
(declare (optimize speed
|
||||
(safety 0)
|
||||
(space 0)
|
||||
(debug 0)
|
||||
(compilation-speed 0)
|
||||
#+:lispworks (hcl:fixnum-safety 0)))
|
||||
(declare #.*standard-optimize-settings*)
|
||||
(:documentation "Creates a closure which takes one parameter,
|
||||
START-POS, and tests whether REGEX can match *STRING* at START-POS
|
||||
such that the call to NEXT-FN after the match would succeed."))
|
||||
@ -399,14 +384,10 @@ against CHR-EXPR."
|
||||
|
||||
(defun word-boundary-p (start-pos)
|
||||
"Check whether START-POS is a word-boundary within *STRING*."
|
||||
(declare (optimize speed
|
||||
(safety 0)
|
||||
(space 0)
|
||||
(debug 0)
|
||||
(compilation-speed 0)
|
||||
#+:lispworks (hcl:fixnum-safety 0)))
|
||||
(declare #.*standard-optimize-settings*)
|
||||
(declare (type fixnum start-pos))
|
||||
(let ((1-start-pos (1- start-pos)))
|
||||
(let ((1-start-pos (1- start-pos))
|
||||
(*start-pos* (or *real-start-pos* *start-pos*)))
|
||||
;; either the character before START-POS is a word-constituent and
|
||||
;; the character at START-POS isn't...
|
||||
(or (and (or (= start-pos *end-pos*)
|
||||
@ -571,6 +552,13 @@ against CHR-EXPR."
|
||||
(and next-pos
|
||||
(funcall next-fn next-pos))))))
|
||||
|
||||
(defmethod create-matcher-aux ((filter filter) next-fn)
|
||||
(let ((fn (fn filter)))
|
||||
(lambda (start-pos)
|
||||
(let ((next-pos (funcall fn start-pos)))
|
||||
(and next-pos
|
||||
(funcall next-fn next-pos))))))
|
||||
|
||||
(defmethod create-matcher-aux ((void void) next-fn)
|
||||
;; optimize away VOIDs: don't create a closure, just return NEXT-FN
|
||||
next-fn)
|
||||
|
||||
72
convert.lisp
72
convert.lisp
@ -1,11 +1,11 @@
|
||||
;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: CL-PPCRE; Base: 10 -*-
|
||||
;;; $Header: /home/manuel/bknr-cvs/cvs/thirdparty/cl-ppcre/convert.lisp,v 1.1 2004/06/23 08:27:10 hans Exp $
|
||||
;;; $Header: /usr/local/cvsrep/cl-ppcre/convert.lisp,v 1.22 2005/04/01 21:29:09 edi Exp $
|
||||
|
||||
;;; Here the parse tree is converted into its internal representation
|
||||
;;; using REGEX objects. At the same time some optimizations are
|
||||
;;; already applied.
|
||||
|
||||
;;; Copyright (c) 2002-2003, Dr. Edmund Weitz. All rights reserved.
|
||||
;;; Copyright (c) 2002-2005, Dr. Edmund Weitz. All rights reserved.
|
||||
|
||||
;;; Redistribution and use in source and binary forms, with or without
|
||||
;;; modification, are permitted provided that the following conditions
|
||||
@ -50,12 +50,7 @@
|
||||
`(third ,flags))
|
||||
|
||||
(defun set-flag (token)
|
||||
(declare (optimize speed
|
||||
(safety 0)
|
||||
(space 0)
|
||||
(debug 0)
|
||||
(compilation-speed 0)
|
||||
#+:lispworks (hcl:fixnum-safety 0)))
|
||||
(declare #.*standard-optimize-settings*)
|
||||
(declare (special flags))
|
||||
"Reads a flag token and sets or unsets the corresponding entry in
|
||||
the special FLAGS list."
|
||||
@ -76,12 +71,7 @@ the special FLAGS list."
|
||||
(signal-ppcre-syntax-error "Unknown flag token ~A" token))))
|
||||
|
||||
(defun add-range-to-hash (hash from to)
|
||||
(declare (optimize speed
|
||||
(safety 0)
|
||||
(space 0)
|
||||
(debug 0)
|
||||
(compilation-speed 0)
|
||||
#+:lispworks (hcl:fixnum-safety 0)))
|
||||
(declare #.*standard-optimize-settings*)
|
||||
(declare (special flags))
|
||||
"Adds all characters from character FROM to character TO (inclusive)
|
||||
to the char class hash HASH. Does the right thing with respect to
|
||||
@ -102,12 +92,7 @@ case-(in)sensitivity as specified by the special variable FLAGS."
|
||||
hash))
|
||||
|
||||
(defun convert-char-class-to-hash (list)
|
||||
(declare (optimize speed
|
||||
(safety 0)
|
||||
(space 0)
|
||||
(debug 0)
|
||||
(compilation-speed 0)
|
||||
#+:lispworks (hcl:fixnum-safety 0)))
|
||||
(declare #.*standard-optimize-settings*)
|
||||
"Combines all items in LIST into one char class hash and returns it.
|
||||
Items can be single characters, character ranges like \(:RANGE #\\A
|
||||
#\\E), or special character classes like :DIGIT-CLASS. Does the right
|
||||
@ -115,7 +100,7 @@ thing with respect to case-\(in)sensitivity as specified by the
|
||||
special variable FLAGS."
|
||||
(loop with hash = (make-hash-table :size (ceiling (expt *regex-char-code-limit* (/ 1 4)))
|
||||
:rehash-size (float (expt *regex-char-code-limit* (/ 1 4)))
|
||||
:rehash-threshold 1.0)
|
||||
:rehash-threshold #-genera 1.0 #+genera 0.99)
|
||||
for item in list
|
||||
if (characterp item)
|
||||
;; treat a single character C like a range (:RANGE C C)
|
||||
@ -157,12 +142,7 @@ special variable FLAGS."
|
||||
min-len
|
||||
length
|
||||
reg-seen)
|
||||
(declare (optimize speed
|
||||
(safety 0)
|
||||
(space 0)
|
||||
(debug 0)
|
||||
(compilation-speed 0)
|
||||
#+:lispworks (hcl:fixnum-safety 0)))
|
||||
(declare #.*standard-optimize-settings*)
|
||||
(declare (type fixnum minimum)
|
||||
(type (or fixnum null) maximum))
|
||||
"Splits a REPETITION object into a constant and a varying part if
|
||||
@ -230,13 +210,8 @@ the same name."
|
||||
;; case if the regex starts with ".*" which implicitely anchors the
|
||||
;; regex at the start (perhaps modulo #\Newline).
|
||||
|
||||
(defmethod maybe-accumulate ((str str))
|
||||
(declare (optimize speed
|
||||
(safety 0)
|
||||
(space 0)
|
||||
(debug 0)
|
||||
(compilation-speed 0)
|
||||
#+:lispworks (hcl:fixnum-safety 0)))
|
||||
(defun maybe-accumulate (str)
|
||||
(declare #.*standard-optimize-settings*)
|
||||
(declare (special accumulate-start-p starts-with))
|
||||
(declare (ftype (function (t) fixnum) len))
|
||||
"Accumulate STR into the special variable STARTS-WITH if
|
||||
@ -291,12 +266,7 @@ NIL or a STR object of the same case mode. Always returns NIL."
|
||||
nil)
|
||||
|
||||
(defun convert-aux (parse-tree)
|
||||
(declare (optimize speed
|
||||
(safety 0)
|
||||
(space 0)
|
||||
(debug 0)
|
||||
(compilation-speed 0)
|
||||
#+:lispworks (hcl:fixnum-safety 0)))
|
||||
(declare #.*standard-optimize-settings*)
|
||||
(declare (special flags reg-num accumulate-start-p starts-with max-back-ref))
|
||||
"Converts the parse tree PARSE-TREE into a REGEX object and returns it.
|
||||
|
||||
@ -538,8 +508,17 @@ Will also
|
||||
(make-instance 'register
|
||||
:regex (convert-aux (second parse-tree))
|
||||
:num stored-reg-num)))
|
||||
;; (:FILTER <function> &optional <length>)
|
||||
((:filter)
|
||||
;; stop accumulating into STARTS-WITH
|
||||
(setq accumulate-start-p nil)
|
||||
(make-instance 'filter
|
||||
:fn (second parse-tree)
|
||||
:len (third parse-tree)))
|
||||
;; (:STANDALONE <regex>)
|
||||
((:standalone)
|
||||
;; stop accumulating into STARTS-WITH
|
||||
(setq accumulate-start-p nil)
|
||||
;; keep the effect of modifiers local to the enclosed
|
||||
;; regex
|
||||
(let ((flags (copy-list flags)))
|
||||
@ -739,16 +718,15 @@ Will also
|
||||
(set-flag parse-tree)
|
||||
(make-instance 'void))
|
||||
(otherwise
|
||||
(let ((translation (and (symbolp parse-tree)
|
||||
(parse-tree-synonym parse-tree))))
|
||||
(if translation
|
||||
(convert-aux (copy-tree translation))
|
||||
(signal-ppcre-syntax-error "Unknown token ~A in parse-tree"
|
||||
parse-tree))))))
|
||||
parse-tree))))))))
|
||||
|
||||
(defun convert (parse-tree)
|
||||
(declare (optimize speed
|
||||
(safety 0)
|
||||
(space 0)
|
||||
(debug 0)
|
||||
(compilation-speed 0)
|
||||
#+:lispworks (hcl:fixnum-safety 0)))
|
||||
(declare #.*standard-optimize-settings*)
|
||||
"Converts the parse tree PARSE-TREE into an equivalent REGEX object
|
||||
and returns three values: the REGEX object, the number of registers
|
||||
seen and an object the regex starts with which is either a STR object
|
||||
|
||||
620
doc/index.html
620
doc/index.html
@ -6,14 +6,12 @@
|
||||
<title>CL-PPCRE - portable Perl-compatible regular expressions for Common Lisp</title>
|
||||
<style type="text/css">
|
||||
pre { padding:5px; background-color:#e0e0e0 }
|
||||
a.none { text-decoration: none; color:black }
|
||||
a.none:visited { text-decoration: none; color:black }
|
||||
a.none:active { text-decoration: none; color:black }
|
||||
a.none:hover { text-decoration: none; color:black }
|
||||
a { text-decoration: none; }
|
||||
a:visited { text-decoration: none; }
|
||||
a:active { text-decoration: underline; }
|
||||
a:hover { text-decoration: underline; }
|
||||
a.none:hover { border:1px solid white; }
|
||||
a { border:1px solid white; }
|
||||
a:hover { border: 1px solid black; }
|
||||
a.noborder { border:0px }
|
||||
a.noborder:hover { border:0px }
|
||||
</style>
|
||||
</head>
|
||||
|
||||
@ -47,7 +45,7 @@ to CLISP's own regex implementation which is also written in
|
||||
C.
|
||||
|
||||
<li>It is <b>portable</b>, i.e. the code aims to be strictly <a
|
||||
href="http://www.lispworks.com/reference/HyperSpec/Front/index.htm">ANSI-compliant</a>. If
|
||||
href="http://www.lispworks.com/documentation/HyperSpec/Front/index.htm">ANSI-compliant</a>. If
|
||||
you encounter any deviations this is an error and should be
|
||||
reported to <a
|
||||
href="#mail">the mailing list</a>. CL-PPCRE has been
|
||||
@ -55,16 +53,18 @@ successfully tested with the following Common Lisp implementations:
|
||||
|
||||
<ul>
|
||||
|
||||
<li><a href="http://www.franz.com/products/allegrocl/">Allegro Common Lisp</a> (6.2 trial on Gentoo Linux 1.1a)
|
||||
<li><a href="http://clisp.sourceforge.net/">CLISP</a> (2.30 on Gentoo Linux 1.1a and 2.29 on Windows XP pro)
|
||||
<li><a href="http://www.cons.org/cmucl/">CMUCL</a> (18e on Gentoo Linux 1.1a)
|
||||
<li><a href="http://www.cormanlisp.com/">Corman Lisp</a> (2.5 on Windows XP pro)
|
||||
<li><a href="http://ecls.sourceforge.net/">ECL</a> (0.9c on Gentoo Linux 1.1a)
|
||||
<li><a href="http://www.digitool.com/">Macintosh Common Lisp</a> (4.3 demo on MacOS 9.1 - only tested with CL-PPCRE 0.1.x)
|
||||
<li><a href="http://openmcl.clozure.com/">OpenMCL</a> (0.13.4 on MacOS X 10.2.2 - only tested with CL-PPCRE 0.1.x)
|
||||
<li><a href="http://sbcl.sourceforge.net/">SBCL</a> (0.8.4 on Gentoo Linux 1.1a)
|
||||
<li><a href="http://www.scieneer.com/scl/">Scieneer Common Lisp</a> (1.1.1 evaluation on Gentoo Linux 1.1a - only tested with CL-PPCRE 0.1.x)
|
||||
<li><a href="http://www.lispworks.com/">Xanalys LispWorks</a> (4.2.7 professional on Gentoo Linux 1.1a and 4.3.6 professional on Windows XP pro)
|
||||
<li><a href="http://www.franz.com/products/allegrocl/">Allegro Common Lisp</a>
|
||||
<li><a href="http://armedbear.org/abcl.html">Armed Bear Common Lisp</a>
|
||||
<li><a href="http://clisp.sourceforge.net/">CLISP</a>
|
||||
<li><a href="http://www.cons.org/cmucl/">CMUCL</a>
|
||||
<li><a href="http://www.cormanlisp.com/">Corman Lisp</a>
|
||||
<li><a href="http://ecls.sourceforge.net/">ECL</a>
|
||||
<li><a href="http://www.symbolics.com/">Genera</a>
|
||||
<li><a href="http://www.digitool.com/">Macintosh Common Lisp</a>
|
||||
<li><a href="http://openmcl.clozure.com/">OpenMCL</a>
|
||||
<li><a href="http://sbcl.sourceforge.net/">SBCL</a>
|
||||
<li><a href="http://www.scieneer.com/scl/">Scieneer Common Lisp</a>
|
||||
<li><a href="http://www.lispworks.com/">LispWorks</a>
|
||||
|
||||
</ul>
|
||||
|
||||
@ -116,14 +116,26 @@ license</b></a> so you can basically do with it whatever you want.
|
||||
|
||||
</ul>
|
||||
|
||||
CL-PPCRE has been used successfully in various applications like <a
|
||||
href="http://nostoc.stanford.edu/Docs/">BioLingua</a>, <a
|
||||
href="http://www.hpc.unm.edu/~download/LoGS/">LoGS</a>, <a href="http://cafespot.net/">CafeSpot</a>, <a href="http://www.eboy.com/">Eboy</a>, or <a
|
||||
href="http://weitz.de/regex-coach/">The Regex Coach</a>.
|
||||
|
||||
<p>
|
||||
<font color=red>Download shortcut:</font> <a href="http://weitz.de/files/cl-ppcre.tar.gz">http://weitz.de/files/cl-ppcre.tar.gz</a>.
|
||||
|
||||
</blockquote>
|
||||
|
||||
<br> <br><h3><a class=none name="contents">Contents</a></h3>
|
||||
<ol>
|
||||
<li><a href="#howto">How to use CL-PPCRE</a>
|
||||
<li><a href="#install">Download and installation</a>
|
||||
<li><a href="#mail">Support and mailing lists</a>
|
||||
<li><a href="#dict">The CL-PPCRE dictionary</a>
|
||||
<ol>
|
||||
<li><a href="#create-scanner1"><code>create-scanner</code></a> (for Perl regex strings)
|
||||
<li><a href="#create-scanner"><code>create-scanner</code></a> (for Perl regex strings)
|
||||
<li><a href="#create-scanner2"><code>create-scanner</code></a> (for parse trees)
|
||||
<li><a href="#parse-tree-synonym"><code>parse-tree-synonym</code></a>
|
||||
<li><a href="#define-parse-tree-synonym"><code>define-parse-tree-synonym</code></a>
|
||||
<li><a href="#scan"><code>scan</code></a>
|
||||
<li><a href="#scan-to-strings"><code>scan-to-strings</code></a>
|
||||
<li><a href="#register-groups-bind"><code>register-groups-bind</code></a>
|
||||
@ -148,8 +160,7 @@ license</b></a> so you can basically do with it whatever you want.
|
||||
<li><a href="#ppcre-syntax-error-string"><code>ppcre-syntax-error-string</code></a>
|
||||
<li><a href="#ppcre-syntax-error-pos"><code>ppcre-syntax-error-pos</code></a>
|
||||
</ol>
|
||||
<li><a href="#install">Download and installation</a>
|
||||
<li><a href="#mail">Support and mailing lists</a>
|
||||
<li><a href="#filters">Filters</a>
|
||||
<li><a href="#test">Testing CL-PPCRE</a>
|
||||
<li><a href="#perl">Compatibility with Perl</a>
|
||||
<ol>
|
||||
@ -173,19 +184,84 @@ license</b></a> so you can basically do with it whatever you want.
|
||||
<li><a href="#backslash">Backslashes may confuse you...</a>
|
||||
</ol>
|
||||
<li><a href="#remarks">Remarks</a>
|
||||
<li><a href="#allegro">AllegroCL compatibility mode</a>
|
||||
<li><a href="#ack">Acknowledgements</a>
|
||||
</ol>
|
||||
|
||||
<br> <br><h3><a class=none name="howto">How to use CL-PPCRE</a></h3>
|
||||
<br> <br><h3><a name="install" class=none>Download and installation</a></h3>
|
||||
|
||||
CL-PPCRE together with this documentation can be downloaded from <a
|
||||
href="http://weitz.de/files/cl-ppcre.tar.gz">http://weitz.de/files/cl-ppcre.tar.gz</a>. The
|
||||
current version is 1.2.12. A <a
|
||||
href="CHANGELOG">CHANGELOG</a> is available.
|
||||
<p>
|
||||
If you're on <a href="http://www.debian.org/">Debian</a> you should
|
||||
probably use the <a
|
||||
href="http://packages.debian.org/cgi-bin/search_packages.pl?keywords=cl-ppcre&searchon=names&version=all&release=all">cl-ppcre
|
||||
Debian package</a> which is available thanks to <a href="http://pvaneynd.mailworks.org/">Peter van Eynde</a> and <a href="http://b9.com/">Kevin
|
||||
Rosenberg</a>. There's also a port
|
||||
for <a href="http://www.cliki.net/gentoo">Gentoo Linux</a> thanks to Matthew Kennedy and a <a href="http://www.freebsd.org/cgi/url.cgi?ports/textproc/cl-ppcre/pkg-descr">FreeBSD port</a> thanks to Henrik Motakef.
|
||||
Installation via <a
|
||||
href="http://www.cliki.net/asdf-install">asdf-install</a> should as well
|
||||
be possible.
|
||||
<p>
|
||||
CL-PPCRE comes with simple system definitions for <a
|
||||
href="http://www.cliki.net/mk-defsystem">MK:DEFSYSTEM</a> and <a
|
||||
href="http://www.cliki.net/asdf">asdf</a> so you can either adapt it
|
||||
to your needs or just unpack the archive and from within the CL-PPCRE
|
||||
directory start your Lisp image and evaluate the form
|
||||
<code>(mk:compile-system "cl-ppcre")</code> (or the
|
||||
equivalent one for asdf) which should compile and load the whole
|
||||
system.
|
||||
<p>
|
||||
If for some reason you don't want to use MK:DEFSYSTEM or asdf you
|
||||
can just <code>LOAD</code> the file <code>load.lisp</code> or you
|
||||
can also get away with something like this:
|
||||
|
||||
<pre>
|
||||
(loop for name in '("packages" "specials" "util" "errors" "lexer"
|
||||
"parser" "regex-class" "convert" "optimize"
|
||||
"closures" "repetition-closures" "scanner" "api")
|
||||
do (compile-file (make-pathname :name name
|
||||
:type "lisp"))
|
||||
(load name))
|
||||
</pre>
|
||||
|
||||
Note that on CL implementations which use the Python compiler
|
||||
(i.e. CMUCL, SBCL, SCL) you can concatenate the compiled object files
|
||||
to create one single object file which you can load afterwards:
|
||||
|
||||
<pre>
|
||||
cat {packages,specials,util,errors,lexer,parser,regex-class,convert,optimize,closures,repetition-closures,scanner,api}.x86f > cl-ppcre.x86f
|
||||
</pre>
|
||||
|
||||
(Replace ".<code>x86f</code>" with the correct suffix for
|
||||
your platform.)
|
||||
<p>
|
||||
Note that there is <em>no</em> public CVS repository for CL-PPCRE - the repository at <a href="http://common-lisp.net/">common-lisp.net</a> is out of date and not in sync with the (current) version distributed from <a href="http://weitz.de/">weitz.de</a>.
|
||||
|
||||
|
||||
<br> <br><h3><a name="mail" class=none>Support and mailing lists</a></h3>
|
||||
|
||||
For questions, bug reports, feature requests, improvements, or patches
|
||||
please use the <a
|
||||
href="http://common-lisp.net/mailman/listinfo/cl-ppcre-devel">cl-ppcre-devel
|
||||
mailing list</a>. If you want to be notified about future releases
|
||||
subscribe to the <a
|
||||
href="http://common-lisp.net/mailman/listinfo/cl-ppcre-announce">cl-ppcre-announce
|
||||
mailing list</a>. These mailing lists were made available thanks to
|
||||
the services of <a href="http://common-lisp.net/">common-lisp.net</a>.
|
||||
|
||||
<br> <br><h3><a class=none name="dict">The CL-PPCRE dictionary</a></h3>
|
||||
|
||||
CL-PPCRE exports the following symbols:
|
||||
|
||||
<p><br>[Function]
|
||||
<br><a class=none name="create-scanner1"><b>create-scanner</b> <i>string <tt>&key</tt> case-insensitive-mode multi-line-mode single-line-mode extended-mode destructive</i> => <i>scanner</i></a>
|
||||
<p><br>[Method]
|
||||
<br><a class=none name="create-scanner"><b>create-scanner</b> <i>(string string)<tt>&key</tt> case-insensitive-mode multi-line-mode single-line-mode extended-mode destructive</i> => <i>scanner</i></a>
|
||||
|
||||
<blockquote><br> Accepts a string which is a regular expression in
|
||||
Perl syntax and returns a closure which will scan strings for this
|
||||
regular expression. The mode keyboard arguments are equivalent to the
|
||||
regular expression. The mode keyword arguments are equivalent to the
|
||||
<code>"imsx"</code> modifiers in Perl. The
|
||||
<code>destructive</code> keyword will be ignored.
|
||||
<p>
|
||||
@ -236,12 +312,17 @@ The keyword arguments are just for your
|
||||
convenience. You can always use embedded modifiers like
|
||||
<code>"(?i-s)"</code> instead.</blockquote>
|
||||
|
||||
<p><br>[Method]
|
||||
<br><a class=none name="create-scanner"><b>create-scanner</b> <i>(function function)<tt>&key</tt> case-insensitive-mode multi-line-mode single-line-mode extended-mode destructive</i> => <i>scanner</i></a>
|
||||
<blockquote><br>
|
||||
In this case <code><i>function</i></code> should be a scanner returned by another invocation of <code>CREATE-SCANNER</code>. It will be returned as is.
|
||||
</blockquote>
|
||||
|
||||
<p><br>[Function]
|
||||
<br><a class=none name="create-scanner2"><b>create-scanner</b> <i>parse-tree <tt>&key</tt> case-insensitive-mode multi-line-mode single-line-mode extended-mode destructive</i> => <i>scanner</i></a>
|
||||
<p><br>[Method]
|
||||
<br><a class=none name="create-scanner2"><b>create-scanner</b> <i>(parse-tree t)<tt>&key</tt> case-insensitive-mode multi-line-mode single-line-mode extended-mode destructive</i> => <i>scanner</i></a>
|
||||
<blockquote><br>
|
||||
This is similar to <a
|
||||
href="#create-scanner1"><code>CREATE-SCANNER</code></a> above but
|
||||
href="#create-scanner"><code>CREATE-SCANNER</code></a> for regex strings above but
|
||||
accepts a <em>parse tree</em> as its first argument. A parse tree is an S-expression
|
||||
conforming to the following syntax:
|
||||
|
||||
@ -290,6 +371,11 @@ and <code>:NOT-SINGLE-LINE-MODE-P</code> are equivalent to Perl's
|
||||
kept local to the innermost enclosing grouping or clustering
|
||||
construct.
|
||||
|
||||
</li><li>All other symbols will signal an error of type <a
|
||||
href="#ppcre-syntax-error"><code>PPCRE-SYNTAX-ERROR</code></a>
|
||||
<em>unless</em> they are defined to be <a
|
||||
href="#parse-tree-synonym"><em>parse tree synonyms</em></a>.
|
||||
|
||||
<li><code>(:FLAGS {<modifier>}*)</code> where
|
||||
<code><modifier></code> is one of the modifier symbols from
|
||||
above is used to group modifier symbols. The modifiers are applied
|
||||
@ -357,6 +443,14 @@ beginning with 1.
|
||||
<code><<i>number</i>></code> is a positive integer is a back-reference to a
|
||||
register group.
|
||||
|
||||
<li><a class=none name="filterdef"><code>(:FILTER <<i>function</i>> <tt>&optional</tt>
|
||||
<<i>length</i>>)</code></a> where
|
||||
<code><<i>function</i>></code> is a <a
|
||||
href="http://www.lispworks.com/documentation/HyperSpec/Body/26_glo_f.htm#function_designator">function
|
||||
designator</a> and <code><<i>length</i>></code> is a
|
||||
non-negative integer or <code>NIL</code> is a user-defined <a
|
||||
href="#filters">filter</a>.
|
||||
|
||||
<li><code>(:CHAR-CLASS|:INVERTED-CHAR-CLASS
|
||||
{<<i>item</i>>}*)</code> where <code><<i>item</i>></code>
|
||||
is either a character, a <em>character range</em>, or a symbol for a
|
||||
@ -379,10 +473,10 @@ Perl regex strings when given to <code>CREATE-SCANNER</code>. To
|
||||
circumvent this you can always use the equivalent parse tree <code>(:GROUP
|
||||
<<i>string</i>>)</code> instead.
|
||||
<p>
|
||||
Note that currently <code>CREATE-SCANNER</code> doesn't always check
|
||||
Note that <code>CREATE-SCANNER</code> doesn't always check
|
||||
for the well-formedness of its first argument, i.e. you are expected
|
||||
to provide <em>correct</em> parse trees. This will most likely change in
|
||||
future releases.
|
||||
to provide <em>correct</em> parse trees.
|
||||
|
||||
<p>
|
||||
The usage of the keyword argument <code>extended-mode</code> obviously
|
||||
doesn't make sense if <code>CREATE-SCANNER</code> is applied to parse
|
||||
@ -418,6 +512,72 @@ regex strings to parse trees. Here are some examples:
|
||||
(:SEQUENCE (:POSITIVE-LOOKAHEAD #\a) #\b)
|
||||
</pre></blockquote>
|
||||
|
||||
<p><br>[Accessor]
|
||||
<br><a class="none" name="parse-tree-synonym"><b>parse-tree-synonym</b> <i>symbol</i> => <i>parse-tree</i>
|
||||
<br><tt>(setf (</tt><b>parse-tree-synonym</b> <i>symbol</i>) <i>new-parse-tree</i><tt>)</tt></a>
|
||||
|
||||
</p><blockquote><br>
|
||||
Any symbol (unless it's a keyword with a special meaning in parse
|
||||
trees) can be made a "synonym", i.e. an abbreviation, for another parse
|
||||
tree by this accessor. <code>PARSE-TREE-SYNONYM</code> returns <code>NIL</code> if <code><i>symbol</i></code> isn't a synonym yet.
|
||||
<p>
|
||||
Here's an example:
|
||||
|
||||
</p><pre>* (cl-ppcre::parse-string "a*b+")
|
||||
(:SEQUENCE (:GREEDY-REPETITION 0 NIL #\a) (:GREEDY-REPETITION 1 NIL #\b))
|
||||
|
||||
* (defun my-repetition (char min)
|
||||
`(:greedy-repetition ,min nil ,char))
|
||||
MY-REPETITION
|
||||
|
||||
* (setf (parse-tree-synonym 'a*) (my-repetition #\a 0))
|
||||
(:GREEDY-REPETITION 0 NIL #\a)
|
||||
|
||||
* (setf (parse-tree-synonym 'b+) (my-repetition #\b 1))
|
||||
(:GREEDY-REPETITION 1 NIL #\b)
|
||||
|
||||
* (let ((scanner (create-scanner '(:sequence a* b+))))
|
||||
(dolist (string '("ab" "b" "aab" "a" "x"))
|
||||
(print (scan scanner string)))
|
||||
(values))
|
||||
0
|
||||
0
|
||||
0
|
||||
NIL
|
||||
NIL
|
||||
|
||||
* (parse-tree-synonym 'a*)
|
||||
(:GREEDY-REPETITION 0 NIL #\a)
|
||||
|
||||
* (parse-tree-synonym 'a+)
|
||||
NIL
|
||||
</pre></blockquote>
|
||||
|
||||
<p><br>[Macro]
|
||||
<br><a class="none" name="define-parse-tree-synonym"><b>define-parse-tree-synonym</b> <i>name parse-tree</i> => <i>parse-tree</i></a>
|
||||
|
||||
</p><blockquote><br>
|
||||
This is a convenience macro for parse tree synonyms defined as
|
||||
|
||||
<pre>(defmacro define-parse-tree-synonym (name parse-tree)
|
||||
`(eval-when (:compile-toplevel :load-toplevel :execute)
|
||||
(setf (parse-tree-synonym ',name) ',parse-tree)))
|
||||
</pre>
|
||||
|
||||
so you can write code like this:
|
||||
|
||||
<pre>
|
||||
(define-parse-tree-synonym a-z
|
||||
(:char-class (:range #\a #\z) (:range #\a #\z)))
|
||||
|
||||
(define-parse-tree-synonym a-z*
|
||||
(:greedy-repetition 0 nil a-z))
|
||||
|
||||
(defun ascii-char-tester (string)
|
||||
(scan '(:sequence :start-anchor a-z* :end-anchor)
|
||||
string))
|
||||
</pre></blockquote>
|
||||
|
||||
<p><br>
|
||||
<b>For the rest of this section </b><code><i>regex</i></code><b> can
|
||||
always be a string (which is interpreted as a Perl regular
|
||||
@ -430,7 +590,7 @@ href="#scan"><code>SCAN</code></a><b>.</b>
|
||||
|
||||
|
||||
|
||||
<p><br>[Function]
|
||||
<p><br>[Standard Generic Function]
|
||||
<br><a class=none name="scan"><b>scan</b> <i>regex target-string <tt>&key</tt> start end</i> => <i>match-start, match-end, reg-starts, reg-ends</i></a>
|
||||
|
||||
<blockquote><br>
|
||||
@ -525,7 +685,15 @@ Examples:
|
||||
Evaluates <code><i>statement*</i></code> with the variables in <code><i>var-list</i></code> bound to the
|
||||
corresponding register groups after <code><i>target-string</i></code> has been matched
|
||||
against <code><i>regex</i></code>, i.e. each variable is either
|
||||
bound to a string or to <code>NIL</code>. If there is no match, the <code><i>statement*</i></code> forms are <em>not</em>
|
||||
bound to a string or to <code>NIL</code>.
|
||||
As a shortcut, the elements of <code><i>var-list</i></code> can also be lists of the form <code>(FN VAR)</code> where <code>VAR</code> is the variable symbol
|
||||
and <code>FN</code> is a <a
|
||||
href="http://www.lispworks.com/documentation/HyperSpec/Body/26_glo_f.htm#function_designator">function
|
||||
designator</a> (which is evaluated) denoting a function which is to be applied to the string before the result is bound to <code>VAR</code>.
|
||||
To make this even more convenient the form <code>(FN VAR1 ...VARn)</code> can be used as an abbreviation for
|
||||
<code>(FN VAR1) ... (FN VARn).
|
||||
<p>
|
||||
If there is no match, the <code><i>statement*</i></code> forms are <em>not</em>
|
||||
executed. For each element of
|
||||
<code><i>var-list</i></code> which is <code>NIL</code> there's no binding to the corresponding register
|
||||
group. The number of variables in <code><i>var-list</i></code> must not be greater than
|
||||
@ -537,15 +705,22 @@ share structure with <code><i>target-string</i></code>.
|
||||
("((a)|(b)|(c))+" "abababc" :sharedp t)
|
||||
(list first second third fourth))
|
||||
("c" "a" "b" "c")
|
||||
|
||||
* (register-groups-bind (nil second third fourth)
|
||||
<font color=orange>;; note that we don't bind the first and fifth register group</font>
|
||||
("((a)|(b)|(c))()+" "abababc" :start 6)
|
||||
(list second third fourth))
|
||||
(NIL NIL "c")
|
||||
|
||||
* (register-groups-bind (first)
|
||||
("(a|b)+" "accc" :start 1)
|
||||
(format t "This will not be printed: ~A" first))
|
||||
NIL
|
||||
|
||||
* (register-groups-bind (fname lname (#'parse-integer date month year))
|
||||
("(\\w+)\\s+(\\w+)\\s+(\\d{1,2})\\.(\\d{1,2})\\.(\\d{4})" "Frank Zappa 21.12.1940")
|
||||
(list fname lname (encode-universal-time 0 0 0 date month year)))
|
||||
("Frank" "Zappa" 1292882400)
|
||||
</pre>
|
||||
</blockquote>
|
||||
|
||||
@ -639,7 +814,7 @@ CROSSFOOT
|
||||
6
|
||||
</pre>
|
||||
|
||||
Of course, in real life you would do this with <a href="#do-matches"><code>DO-MATCHES</code></a> and use the <code><i>start</i></code> and <code><i>end</i></code> keyword parameters of <a href="http://www.lispworks.com/reference/HyperSpec/Body/f_parse_.htm"><code>PARSE-INTEGER</code></a>.</blockquote>
|
||||
Of course, in real life you would do this with <a href="#do-matches"><code>DO-MATCHES</code></a> and use the <code><i>start</i></code> and <code><i>end</i></code> keyword parameters of <a href="http://www.lispworks.com/documentation/HyperSpec/Body/f_parse_.htm"><code>PARSE-INTEGER</code></a>.</blockquote>
|
||||
|
||||
<p><br>[Macro]
|
||||
<br><a class=none name="do-register-groups"><b>do-register-groups</b> <i>var-list (regex target-string <tt>&optional</tt> result-form <tt>&key</tt> start end sharedp) declaration* statement*</i> => <i>result*</i></a>
|
||||
@ -648,7 +823,7 @@ Of course, in real life you would do this with <a href="#do-matches"><code>DO-MA
|
||||
Iterates over <code><i>target-string</i></code> and tries to match <code><i>regex</i></code> as often as
|
||||
possible evaluating <code><i>statement*</i></code> with the variables in <code><i>var-list</i></code> bound to the
|
||||
corresponding register groups for each match in turn, i.e. each
|
||||
variable is either bound to a string or to <code>NIL</code>. The number of
|
||||
variable is either bound to a string or to <code>NIL</code>. You can use the same shortcuts and abbreviations as in <a href="#register-groups-bind"><code>REGISTER-GROUPS-BIND</code></a>. The number of
|
||||
variables in <code><i>var-list</i></code> must not be greater than the number of register
|
||||
groups. For each element of
|
||||
<code><i>var-list</i></code> which is <code>NIL</code> there's no binding to the corresponding register
|
||||
@ -669,6 +844,14 @@ match. If <code><i>sharedp</i></code> is true, the substrings may share structur
|
||||
("b" NIL "b" NIL)
|
||||
("c" NIL NIL "c")
|
||||
NIL
|
||||
|
||||
* (let (result)
|
||||
(do-register-groups ((#'parse-integer n) (#'intern sign) whitespace)
|
||||
("(\\d+)|(\\+|-|\\*|/)|(\\s+)" "12*15 - 42/3")
|
||||
(unless whitespace
|
||||
(push (or n sign) result)))
|
||||
(nreverse result))
|
||||
(12 * 15 - 42 / 3)
|
||||
</pre>
|
||||
</blockquote>
|
||||
|
||||
@ -787,7 +970,7 @@ frob")
|
||||
|
||||
|
||||
<p><br>[Function]
|
||||
<br><a class=none name="regex-replace"><b>regex-replace</b> <i>regex target-string replacement <tt>&key</tt> start end preserve-case</i> => <i>list</i></a>
|
||||
<br><a class=none name="regex-replace"><b>regex-replace</b> <i>regex target-string replacement <tt>&key</tt> start end preserve-case simple-calls</i> => <i>list</i></a>
|
||||
|
||||
<blockquote><br> Try to match <code><i>target-string</i></code>
|
||||
between <code><i>start</i></code> and <code><i>end</i></code> against
|
||||
@ -804,7 +987,7 @@ match, <code>"\`"</code> for the part of
|
||||
<code>N</code>th register where <code>N</code> is a positive integer.
|
||||
<p>
|
||||
<code><i>replacement</i></code> can also be a <a
|
||||
href="http://www.lispworks.com/reference/HyperSpec/Body/26_glo_f.htm#function_designator">function
|
||||
href="http://www.lispworks.com/documentation/HyperSpec/Body/26_glo_f.htm#function_designator">function
|
||||
designator</a> in which case the match will be replaced with the
|
||||
result of calling the function designated by
|
||||
<code><i>replacement</i></code> with the arguments
|
||||
@ -816,6 +999,15 @@ result of calling the function designated by
|
||||
positions of matched registers (or <code>NIL</code>) - the meaning of
|
||||
the other arguments should be obvious.)
|
||||
<p>
|
||||
If <code><i>simple-calls</i></code> is true, a function designated by
|
||||
<code><i>replacement</i></code> will instead be called with the
|
||||
arguments <code><i>match</i></code>, <code><i>register-1</i></code>,
|
||||
..., <code><i>register-n</i></code> where <code><i>match</i></code> is
|
||||
the whole match as a string and <code><i>register-1</i></code> to
|
||||
<code><i>register-n</i></code> are the matched registers, also as
|
||||
strings (or <code>NIL</code>). Note that these strings share structure with
|
||||
<code><i>target-string</i></code> so you must not modify them.
|
||||
<p>
|
||||
Finally, <code><i>replacement</i></code> can be a list where each
|
||||
element is a string (which will be inserted verbatim), one of the
|
||||
symbols <code>:match</code>, <code>:before-match</code>, or
|
||||
@ -829,7 +1021,7 @@ If <code><i>preserve-case</i></code> is true (default is
|
||||
<code>NIL</code>), the replacement will try to preserve the case (all
|
||||
upper case, all lower case, or capitalized) of the match. The result
|
||||
will always be a <a
|
||||
href="http://www.lispworks.com/reference/HyperSpec/Body/26_glo_f.htm#fresh">fresh</a>
|
||||
href="http://www.lispworks.com/documentation/HyperSpec/Body/26_glo_f.htm#fresh">fresh</a>
|
||||
string, even if <code><i>regex</i></code> doesn't match.
|
||||
<p>
|
||||
Examples:
|
||||
@ -860,7 +1052,7 @@ Examples:
|
||||
|
||||
|
||||
<p><br>[Function]
|
||||
<br><a class=none name="regex-replace-all"><b>regex-replace-all</b> <i>regex target-string replacement <tt>&key</tt> start end preserve-case</i> => <i>list</i></a>
|
||||
<br><a class=none name="regex-replace-all"><b>regex-replace-all</b> <i>regex target-string replacement <tt>&key</tt> start end preserve-case simple-calls</i> => <i>list</i></a>
|
||||
|
||||
<blockquote><br>
|
||||
Like <a href="#regex-replace"><code>REGEX-REPLACE</code></a> but replaces all matches.
|
||||
@ -912,6 +1104,34 @@ HOW-MANY
|
||||
"foo{...}bar{.....}{..}baz{....}frob"
|
||||
(list "[" 'how-many " dots]"))
|
||||
"foo[3 dots]bar[5 dots][2 dots]baz[4 dots]frob"
|
||||
|
||||
* (let ((qp-regex (cl-ppcre:create-scanner "[\\x80-\\xff]")))
|
||||
(defun encode-quoted-printable (string)
|
||||
"Convert 8-bit string to quoted-printable representation.
|
||||
Version using SIMPLE-CALLS keyword argument."
|
||||
<font color=orange>;; ;; won't work for Corman Lisp because non-ASCII characters aren't 8-bit there</font>
|
||||
(flet ((convert (match)
|
||||
(format nil "=~2,'0x" (char-code (char match 0)))))
|
||||
(cl-ppcre:regex-replace-all qp-regex string #'convert
|
||||
:simple-calls t))))
|
||||
|
||||
Converted ENCODE-QUOTED-PRINTABLE.
|
||||
ENCODE-QUOTED-PRINTABLE
|
||||
|
||||
* (encode-quoted-printable "Fête Sørensen naïve Hühner Straße")
|
||||
"F=EAte S=F8rensen na=EFve H=FChner Stra=DFe"
|
||||
|
||||
* (defun how-many (match first-register)
|
||||
(declare (ignore match))
|
||||
(format nil "~A" (length first-register)))
|
||||
HOW-MANY
|
||||
|
||||
* (cl-ppcre:regex-replace-all "{(.+?)}"
|
||||
"foo{...}bar{.....}{..}baz{....}frob"
|
||||
(list "[" 'how-many " dots]")
|
||||
:simple-calls t)
|
||||
|
||||
"foo[3 dots]bar[5 dots][2 dots]baz[4 dots]frob"
|
||||
</pre></blockquote>
|
||||
|
||||
<p><br>[Function]
|
||||
@ -919,7 +1139,7 @@ HOW-MANY
|
||||
|
||||
<blockquote><br>
|
||||
Like <a
|
||||
href="http://www.lispworks.com/reference/HyperSpec/Body/f_apropo.htm"><code>APROPOS</code></a>
|
||||
href="http://www.lispworks.com/documentation/HyperSpec/Body/f_apropo.htm"><code>APROPOS</code></a>
|
||||
but searches for interned symbols which match the regular expression
|
||||
<code><i>regex</i></code>. The output is implementation-dependent. If
|
||||
<code><i>case-insensitive</i></code> is true (which is the default)
|
||||
@ -983,7 +1203,7 @@ FOOBOO [variable] value: 43
|
||||
|
||||
<blockquote><br>
|
||||
Like <a
|
||||
href="http://www.lispworks.com/reference/HyperSpec/Body/f_apropo.htm"><code>APROPOS-LIST</code></a>
|
||||
href="http://www.lispworks.com/documentation/HyperSpec/Body/f_apropo.htm"><code>APROPOS-LIST</code></a>
|
||||
but searches for interned symbols which match the regular expression
|
||||
<code><i>regex</i></code>. If <code><i>case-insensitive</i></code> is
|
||||
true (which is the default) and <code><i>regex</i></code> isn't
|
||||
@ -1001,18 +1221,18 @@ Example (continued from above):
|
||||
|
||||
<blockquote><br>This variable controls whether scanners take into
|
||||
account all characters of your CL implementation or only those the <a
|
||||
href="http://www.lispworks.com/reference/HyperSpec/Body/f_char_c.htm#char-code"><code>CHAR-CODE</code></a>
|
||||
href="http://www.lispworks.com/documentation/HyperSpec/Body/f_char_c.htm#char-code"><code>CHAR-CODE</code></a>
|
||||
of which is not larger than its value. It is only relevant if the
|
||||
regular expression contains certain character classes. The default is
|
||||
<a
|
||||
href="http://www.lispworks.com/reference/HyperSpec/Body/v_char_c.htm"><code>CHAR-CODE-LIMIT</code></a>,
|
||||
href="http://www.lispworks.com/documentation/HyperSpec/Body/v_char_c.htm"><code>CHAR-CODE-LIMIT</code></a>,
|
||||
and you might see significant speed and space improvements during
|
||||
scanner <em>creation</em> if, say, your target strings only contain <a
|
||||
href="http://wwwwbs.cs.tu-berlin.de/user/czyborra/charsets/">ISO-8859-1</a>
|
||||
characters and you're using an implementation like AllegroCL,
|
||||
LispWorks, or CLISP where <code>CHAR-CODE-LIMIT</code> has a value
|
||||
much higher than 255. The <a href="#test">test suite</a> will
|
||||
automatically set <code>*REGEX-CHAR-CODE-LIMIT*</code> to 255 while
|
||||
CLISP, LispWorks, or SBCL where <code>CHAR-CODE-LIMIT</code> has a value
|
||||
much higher than 256. The <a href="#test">test suite</a> will
|
||||
automatically set <code>*REGEX-CHAR-CODE-LIMIT*</code> to 256 while
|
||||
you're running the default test.
|
||||
<p>
|
||||
Here's an example with LispWorks:
|
||||
@ -1028,8 +1248,8 @@ Allocation = 546600 bytes standard / 2162611 bytes fixlen
|
||||
0 Page faults
|
||||
#<closure 20654AF2>
|
||||
|
||||
CL-USER 24 > (time (let ((cl-ppcre:*regex-char-code-limit* 255)) (cl-ppcre:create-scanner "[3\\D]")))
|
||||
Timing the evaluation of (LET ((CL-PPCRE:*REGEX-CHAR-CODE-LIMIT* 255)) (CL-PPCRE:CREATE-SCANNER "[3\\D]"))
|
||||
CL-USER 24 > (time (let ((cl-ppcre:*regex-char-code-limit* 256)) (cl-ppcre:create-scanner "[3\\D]")))
|
||||
Timing the evaluation of (LET ((CL-PPCRE:*REGEX-CHAR-CODE-LIMIT* 256)) (CL-PPCRE:CREATE-SCANNER "[3\\D]"))
|
||||
|
||||
user time = 0.000
|
||||
system time = 0.000
|
||||
@ -1042,7 +1262,7 @@ Allocation = 3336 bytes standard / 8338 bytes fixlen
|
||||
Note: Due to the nature of <code>LOAD-TIME-VALUE</code> and the <a
|
||||
href="#compiler-macro">compiler macro for <code>SCAN</code></a> some
|
||||
scanners might be created in a <a
|
||||
href="http://www.lispworks.com/reference/HyperSpec/Body/26_glo_n.htm#null_lexical_environment">null
|
||||
href="http://www.lispworks.com/documentation/HyperSpec/Body/26_glo_n.htm#null_lexical_environment">null
|
||||
lexical environment</a> at load time or at compile time so be careful
|
||||
to which value <code>*REGEX-CHAR-CODE-LIMIT*</code> is bound at that
|
||||
time. The default value should always yield correct results unless you
|
||||
@ -1052,14 +1272,14 @@ play dirty tricks with implementation-dependent behaviour, though.</blockquote>
|
||||
<br><a class=none name="use-bmh-matchers"><b>*use-bmh-matchers*</b></a>
|
||||
|
||||
<blockquote><br>Usually, the scanners created by <a
|
||||
href="#create-scanner1"><code>CREATE-SCANNER</code></a> (or
|
||||
href="#create-scanner"><code>CREATE-SCANNER</code></a> (or
|
||||
implicitely by other functions and macros) will use fast <a
|
||||
href="http://www-igm.univ-mlv.fr/~lecroq/string/node18.html">Boyer-Moore-Horspool
|
||||
matchers</a> to check for constant strings at the start or end of the
|
||||
regular expression. If <code>*USE-BMH-MATCHERS*</code> is
|
||||
<code>NIL</code> (the default is <code>T</code>), the standard
|
||||
function <a
|
||||
href="http://www.lispworks.com/reference/HyperSpec/Body/f_search.htm"><code>SEARCH</code></a>
|
||||
href="http://www.lispworks.com/documentation/HyperSpec/Body/f_search.htm"><code>SEARCH</code></a>
|
||||
will be used instead. This will usually be a bit slower but can save
|
||||
lots of space if you're storing many scanners. The <a
|
||||
href="#test">test suite</a> will automatically set
|
||||
@ -1069,7 +1289,7 @@ the default test.
|
||||
Note: Due to the nature of <code>LOAD-TIME-VALUE</code> and the <a
|
||||
href="#compiler-macro">compiler macro for <code>SCAN</code></a> some
|
||||
scanners might be created in a <a
|
||||
href="http://www.lispworks.com/reference/HyperSpec/Body/26_glo_n.htm#null_lexical_environment">null
|
||||
href="http://www.lispworks.com/documentation/HyperSpec/Body/26_glo_n.htm#null_lexical_environment">null
|
||||
lexical environment</a> at load time or at compile time so be careful
|
||||
to which value <code>*USE-BMH-MATCHERS*</code> is bound at that
|
||||
time.</blockquote>
|
||||
@ -1134,7 +1354,7 @@ href="#*allow-quoting*"><code>*ALLOW-QUOTING*</code></a> is
|
||||
non-word characters (everything except ASCII characters, digits and
|
||||
underline) of <code>STRING</code> are quoted by prepending a
|
||||
backslash similar to Perl's <code>quotemeta</code> function. It always returns a <a
|
||||
href="http://www.lispworks.com/reference/HyperSpec/Body/26_glo_f.htm#fresh">fresh</a>
|
||||
href="http://www.lispworks.com/documentation/HyperSpec/Body/26_glo_f.htm#fresh">fresh</a>
|
||||
string.
|
||||
<pre>
|
||||
* (cl-ppcre:quote-meta-chars "[a-z]*")
|
||||
@ -1147,7 +1367,7 @@ string.
|
||||
<blockquote><br>
|
||||
Every error signaled by CL-PPCRE is of type
|
||||
<code>PPCRE-ERROR</code>. This is a direct subtype of <a
|
||||
href="http://www.lispworks.com/reference/HyperSpec/Body/e_smp_er.htm"><code>SIMPLE-ERROR</code></a>
|
||||
href="http://www.lispworks.com/documentation/HyperSpec/Body/e_smp_er.htm"><code>SIMPLE-ERROR</code></a>
|
||||
without any additional slots or options.
|
||||
</blockquote>
|
||||
|
||||
@ -1210,7 +1430,7 @@ encountered (or <code>NIL</code> if the error happened while trying to
|
||||
convert a parse tree). This might be particularly useful when <a
|
||||
href="#*allow-quoting*"><code>*ALLOW-QUOTING*</code></a> is
|
||||
<em>true</em> because in this case the offending string might not be the one you gave to the <a
|
||||
href="#create-scanner1"><code>CREATE-SCANNER</code></a> function.
|
||||
href="#create-scanner"><code>CREATE-SCANNER</code></a> function.
|
||||
</blockquote>
|
||||
|
||||
<p><br>[Function]
|
||||
@ -1225,69 +1445,185 @@ convert a parse tree).
|
||||
</blockquote>
|
||||
|
||||
|
||||
<br> <br><h3><a name="install" class=none>Download and installation</a></h3>
|
||||
<br> <br><h3><a name="filters" class=none>Filters</a></h3>
|
||||
|
||||
CL-PPCRE together with this documentation can be downloaded from <a
|
||||
href="http://weitz.de/files/cl-ppcre.tgz">http://weitz.de/files/cl-ppcre.tgz</a>. The
|
||||
current version is 0.7.4 - older versions are
|
||||
available for download through URLs like
|
||||
<code>http://weitz.de/files/cl-ppcre-<version>.tgz</code>. A <a
|
||||
href="CHANGELOG">CHANGELOG</a> is available.
|
||||
Because several users have asked for it, CL-PPCRE now offers
|
||||
"filters" (see <a href="#filterdef">above</a> for syntax)
|
||||
which are basically arbitrary, user-defined functions that can act as
|
||||
regex building blocks. Filters can only be used within <a
|
||||
href="#create-scanner2">parse trees</a>, not within Perl regex
|
||||
strings.
|
||||
<p>
|
||||
If you're on <a href="http://www.debian.org/">Debian</a> you should
|
||||
probably use the <a
|
||||
href="http://packages.debian.org/cgi-bin/search_packages.pl?keywords=cl-ppcre&searchon=names&version=all&release=all">cl-ppcre
|
||||
Debian package</a> which is available thanks to <a href="http://b9.com/">Kevin
|
||||
Rosenberg</a>. There's also a port
|
||||
for <a href="http://www.cliki.net/gentoo">Gentoo Linux</a> thanks to Matthew Kennedy and a <a href="http://www.freebsd.org/cgi/url.cgi?ports/textproc/cl-ppcre/pkg-descr">FreeBSD port</a> thanks to Henrik Motakef.
|
||||
Installation via <a
|
||||
href="http://www.cliki.net/asdf-install">asdf-install</a> should as well
|
||||
be possible.
|
||||
Note that filters are currently considered an experimental feature and
|
||||
their API might change in the future.
|
||||
<p>
|
||||
CL-PPCRE comes with simple system definitions for <a
|
||||
href="http://www.cliki.net/mk-defsystem">MK:DEFSYSTEM</a> and <a
|
||||
href="http://www.cliki.net/asdf">asdf</a> so you can either adapt it
|
||||
to your needs or just unpack the archive and from within the CL-PPCRE
|
||||
directory start your Lisp image and evaluate the form
|
||||
<code>(mk:compile-system "cl-ppcre")</code> (or the
|
||||
equivalent one for asdf) which should compile and load the whole
|
||||
system.
|
||||
A filter is defined by its <em>filter function</em> which must be a
|
||||
function of one argument. During the parsing process this function
|
||||
might be called once or several times or it might not be called at
|
||||
all. If it's called its argument is an integer <code><i>pos</i></code>
|
||||
which is the current position within the target string. The filter can
|
||||
either return <code>NIL</code> (which means that the subexpression
|
||||
represented by this filter didn't match) or an integer not smaller
|
||||
than <code><i>pos</i></code> for success. A zero-length assertion
|
||||
should return <code><i>pos</i></code> itself while a filter which
|
||||
wants to consume <code>N</code> characters should return
|
||||
<code>(+ POS N)</code>.
|
||||
<p>
|
||||
If for some reason you don't want to use MK:DEFSYSTEM or asdf you
|
||||
can just <code>LOAD</code> the file <code>load.lisp</code> or you
|
||||
can also get away with something like this:
|
||||
If you supply the optional value <code><i>length</i></code> and it is
|
||||
not <code>NIL</code> then this is a promise to the regex engine that
|
||||
your filter will <em>always</em> consume <em>exactly</em>
|
||||
<code><i>length</i></code> characters. The regex engine might use this
|
||||
information for optimization purposes but it is otherwise irrelevant
|
||||
to the outcome of the matching process.
|
||||
<p>
|
||||
The filter function can access the following special variables from
|
||||
its code body:
|
||||
<ul>
|
||||
|
||||
<li><code>CL-PPCRE::*STRING*</code>: The target (a string) of the
|
||||
current matching process.
|
||||
|
||||
<li><code>CL-PPCRE::*START-POS*</code> and
|
||||
<code>CL-PPCRE::*END-POS*</code>: The start and end (integers) indices
|
||||
of the current matching process. These correspond to the
|
||||
<code>START</code> and <code>END</code> keyword parameters of <a
|
||||
href="#scan"><code>SCAN</code></a>.
|
||||
|
||||
<li><code>CL-PPCRE::*REAL-START-POS*</code>: The initial starting
|
||||
position. This is only relevant for repeated scans (as in <a
|
||||
href="#do-scans"><code>DO-SCANS</code></a>) where
|
||||
<code>CL-PPCRE::*START-POS*</code> will be moved forward while
|
||||
<code>CL-PPCRE::*REAL-START-POS*</code> won't. For normal scans the
|
||||
value of this variable is <code>NIL</code>.
|
||||
|
||||
<li><CODE>CL-PPCRE::*REG-STARTS*</CODE> and
|
||||
<CODE>CL-PPCRE::*REG-ENDS*</CODE>: Two simple vectors which denote the
|
||||
start and end indices of registers within the regular expression. The
|
||||
first register is indexed by 0. If a register hasn't matched yet
|
||||
then its corresponding entry in <CODE>CL-PPCRE::*REG-STARTS*</CODE> is
|
||||
<code>NIL</code>.
|
||||
|
||||
</ul>
|
||||
|
||||
These variables should be considered read-only. Do <em>not</em> change
|
||||
these values unless you really know what you're doing!
|
||||
<p>
|
||||
Note that the names of the variables are not exported from the
|
||||
<code>CL-PPCRE</code> package because there's currently no guarantee
|
||||
that they will be available in future releases.
|
||||
<p>
|
||||
Here are some filter examples:
|
||||
<pre>
|
||||
(loop for name in '("packages" "specials" "util" "errors" "lexer"
|
||||
"parser" "regex-class" "convert" "optimize"
|
||||
"closures" "repetition-closures" "scanner" "api")
|
||||
do (compile-file (make-pathname :name name
|
||||
:type "lisp"))
|
||||
(load name))
|
||||
* (defun my-info-filter (pos)
|
||||
"Show some info about the matching process."
|
||||
(format t "Called at position ~A~%" pos)
|
||||
(loop with dim = (array-dimension cl-ppcre::*reg-starts* 0)
|
||||
for i below dim
|
||||
for reg-start = (aref cl-ppcre::*reg-starts* i)
|
||||
for reg-end = (aref cl-ppcre::*reg-ends* i)
|
||||
do (format t "Register ~A is currently " (1+ i))
|
||||
when reg-start
|
||||
(write-string cl-ppcre::*string* nil
|
||||
do (write-char #\')
|
||||
(write-string cl-ppcre::*string* nil
|
||||
:start reg-start :end reg-end)
|
||||
(write-char #\')
|
||||
else
|
||||
do (write-string "unbound")
|
||||
do (terpri))
|
||||
(terpri)
|
||||
pos)
|
||||
MY-INFO-FILTER
|
||||
|
||||
* (scan '(:sequence
|
||||
(:register
|
||||
(:greedy-repetition 0 nil
|
||||
(:char-class (:range #\a #\z))))
|
||||
(:filter my-info-filter 0) "X")
|
||||
"bYcdeX")
|
||||
Called at position 1
|
||||
Register 1 is currently 'b'
|
||||
|
||||
Called at position 0
|
||||
Register 1 is currently ''
|
||||
|
||||
Called at position 1
|
||||
Register 1 is currently ''
|
||||
|
||||
Called at position 5
|
||||
Register 1 is currently 'cde'
|
||||
|
||||
2
|
||||
6
|
||||
#(2)
|
||||
#(5)
|
||||
|
||||
* (scan '(:sequence
|
||||
(:register
|
||||
(:greedy-repetition 0 nil
|
||||
(:char-class (:range #\a #\z))))
|
||||
(:filter my-info-filter 0) "X")
|
||||
"bYcdeZ")
|
||||
NIL
|
||||
|
||||
* (defun my-weird-filter (pos)
|
||||
"Only match at this point if either pos is odd and the character
|
||||
we're looking at is lowerrcase or if pos is even and the next two
|
||||
characters we're looking at are uppercase. Consume these characters if
|
||||
there's a match."
|
||||
(format t "Trying at position ~A~%" pos)
|
||||
(cond ((and (oddp pos)
|
||||
(< pos cl-ppcre::*end-pos*)
|
||||
(lower-case-p (char cl-ppcre::*string* pos)))
|
||||
(1+ pos))
|
||||
((and (evenp pos)
|
||||
(< (1+ pos) cl-ppcre::*end-pos*)
|
||||
(upper-case-p (char cl-ppcre::*string* pos))
|
||||
(upper-case-p (char cl-ppcre::*string* (1+ pos))))
|
||||
(+ pos 2))
|
||||
(t nil)))
|
||||
MY-WEIRD-FILTER
|
||||
|
||||
* (defparameter *weird-regex*
|
||||
`(:sequence "+" (:filter ,#'my-weird-filter) "+"))
|
||||
*WEIRD-REGEX*
|
||||
|
||||
* (scan *weird-regex* "+A++a+AA+")
|
||||
Trying at position 1
|
||||
Trying at position 3
|
||||
Trying at position 4
|
||||
Trying at position 6
|
||||
5
|
||||
9
|
||||
#()
|
||||
#()
|
||||
|
||||
* (fmakunbound 'my-weird-filter)
|
||||
MY-WEIRD-FILTER
|
||||
|
||||
* (scan *weird-regex* "+A++a+AA+")
|
||||
Trying at position 1
|
||||
Trying at position 3
|
||||
Trying at position 4
|
||||
Trying at position 6
|
||||
5
|
||||
9
|
||||
#()
|
||||
#()
|
||||
</pre>
|
||||
|
||||
Note that on CL implementations which use the Python compiler
|
||||
(i.e. CMUCL, SBCL, SCL) you can concatenate the compiled object files
|
||||
to create one single object file which you can load afterwards:
|
||||
Note that in the second call to <code>SCAN</code> our filter wasn't
|
||||
invoked at all - it was optimized away by the regex engine because it
|
||||
knew that it couldn't match. Also note that <code>*WEIRD-REGEX*</code>
|
||||
still worked after we removed the global function definition of
|
||||
<code>MY-WEIRD-FILTER</code> because the regular expression had
|
||||
captured the original definition.
|
||||
|
||||
<pre>
|
||||
cat {packages,specials,util,errors,lexer,parser,regex-class,convert,optimize,closures,repetition-closures,scanner,api}.x86f > cl-ppcre.x86f
|
||||
</pre>
|
||||
<p>
|
||||
|
||||
(Replace ".<code>x86f</code>" with the correct suffix for
|
||||
your platform.)
|
||||
|
||||
|
||||
<br> <br><h3><a name="mail" class=none>Support and mailing lists</a></h3>
|
||||
|
||||
For questions, bug reports, feature requests, improvements, or patches
|
||||
please use the <a
|
||||
href="http://common-lisp.net/mailman/listinfo/cl-ppcre-devel">cl-ppcre-devel
|
||||
mailing list</a>. If you want to be notified about future releases
|
||||
subscribe to the <a
|
||||
href="http://common-lisp.net/mailman/listinfo/cl-ppcre-announce">cl-ppcre-announce
|
||||
mailing list</a>. These mailing lists were made available thanks to
|
||||
the services of <a href="http://common-lisp.net/">common-lisp.net</a>.
|
||||
For more ideas about what you can do with filters see <a
|
||||
href="http://common-lisp.net/pipermail/cl-ppcre-devel/2004-October/000069.html">this
|
||||
thread</a> on the <a href="#mail">mailing list</a>.
|
||||
|
||||
<br> <br><h3><a name="test" class=none>Testing CL-PPCRE</a></h3>
|
||||
|
||||
@ -1317,7 +1653,7 @@ NIL
|
||||
* (cl-ppcre-test:test)
|
||||
|
||||
<font color=orange>;; ....
|
||||
;; (a list of <a href="#perl">incompatibilities with Perl</a>)</font color=orange>
|
||||
;; (a list of <a class=noborder href="#perl">incompatibilities with Perl</a>)</font color=orange>
|
||||
</pre>
|
||||
|
||||
(If you're not using MK:DEFSYSTEM or asdf it suffices to build
|
||||
@ -1398,7 +1734,7 @@ translates <code>"\r"</code> to <code>(CODE-CHAR
|
||||
<h4><a name="alpha" class=none>What about <code>"\w"</code>?</a></h4>
|
||||
|
||||
CL-PPCRE uses <a
|
||||
href="http://www.lispworks.com/reference/HyperSpec/Body/f_alphan.htm"><code>ALPHANUMERICP</code></a>
|
||||
href="http://www.lispworks.com/documentation/HyperSpec/Body/f_alphan.htm"><code>ALPHANUMERICP</code></a>
|
||||
to decide whether a character matches Perl's
|
||||
<code>"\w"</code>, so depending on your CL implementation
|
||||
you might encounter differences between Perl and CL-PPCRE when
|
||||
@ -1410,7 +1746,7 @@ matching non-ASCII characters.
|
||||
|
||||
The <a href="">CL-PPCRE test suite</a> can also be used for
|
||||
benchmarking purposes: If you call <code>perltest.pl</code> with a
|
||||
command line argument it will be interpreted as the number of seconds
|
||||
command line argument it will be interpreted as the minimum number of seconds
|
||||
each test should run. Perl will time its tests accordingly and create
|
||||
output which, when fed to <code>CL-PPCRE-TEST:TEST</code>, will result
|
||||
in a benchmark. Here's an example:
|
||||
@ -1554,13 +1890,13 @@ for you automatically.
|
||||
<p>
|
||||
However, beginning with version 0.5.2, CL-PPCRE uses a <a
|
||||
name="compiler-macro"
|
||||
href="http://www.lispworks.com/reference/HyperSpec/Body/26_glo_c.htm#compiler_macro">compiler
|
||||
href="http://www.lispworks.com/documentation/HyperSpec/Body/26_glo_c.htm#compiler_macro">compiler
|
||||
macro</a> and <a
|
||||
href="http://www.lispworks.com/reference/HyperSpec/Body/s_ld_tim.htm"><code>LOAD-TIME-VALUE</code></a>
|
||||
href="http://www.lispworks.com/documentation/HyperSpec/Body/s_ld_tim.htm"><code>LOAD-TIME-VALUE</code></a>
|
||||
to make sure that the scanner is only built once if the first argument
|
||||
to <a href="#scan"><code>SCAN</code></a>, <a href="#scan-to-strings"><code>SCAN-TO-STRINGS</code></a>, <a href="#split"><code>SPLIT</code></a>, or
|
||||
<a href="#regex-replace"><code>REGEX-REPLACE</code></a> is a <a
|
||||
href="http://www.lispworks.com/reference/HyperSpec/Body/26_glo_c.htm#constant_form">constant
|
||||
to <a href="#scan"><code>SCAN</code></a>, <a href="#scan-to-strings"><code>SCAN-TO-STRINGS</code></a>, <a href="#split"><code>SPLIT</code></a>,
|
||||
<a href="#regex-replace"><code>REGEX-REPLACE</code></a>, or <a href="#regex-replace-all"><code>REGEX-REPLACE-ALL</code></a> is a <a
|
||||
href="http://www.lispworks.com/documentation/HyperSpec/Body/26_glo_c.htm#constant_form">constant
|
||||
form</a>. (But see the notes for <a
|
||||
href="#regex-char-code-limit"><code>*REGEX-CHAR-CODE-LIMIT*</code></a> and
|
||||
<a href="#use-bmh-matchers"><code>*USE-BMH-MATCHERS*</code></a>.)
|
||||
@ -1674,7 +2010,7 @@ target strings.
|
||||
<p>
|
||||
Another thing to consider is that, for performance reasons, CL-PPCRE
|
||||
assumes that most of the target strings you're trying to match are <a
|
||||
href="http://www.lispworks.com/reference/HyperSpec/Body/t_smp_st.htm">simple
|
||||
href="http://www.lispworks.com/documentation/HyperSpec/Body/t_smp_st.htm">simple
|
||||
strings</a> and coerces non-simple strings to simple strings before
|
||||
scanning them. If you plan on working with non-simple strings mostly
|
||||
you might consider modifying the CL-PPCRE source code. This is easy:
|
||||
@ -1746,6 +2082,8 @@ TARGET
|
||||
With CMUCL the situation is better and worse at the same time. It will
|
||||
take a lot longer until CMUCL gives up but if it gives up the whole
|
||||
Lisp image will silently die (at least on my machine):
|
||||
<p>
|
||||
[Note: This was true for CMUCL 18e - CMUCL 19a behaves in a much nicer way and gives you a chance to recover.]
|
||||
|
||||
<pre>
|
||||
* (defun target (n) (concatenate 'string (make-string n :initial-element #\a) "b"))
|
||||
@ -1900,6 +2238,50 @@ IBM Thinkpad T23 laptop (Pentium III 1.2 GHz,
|
||||
768 MB RAM) running <a href="http://www.gentoo.org/">Gentoo
|
||||
Linux</a> 1.1a.
|
||||
|
||||
<br> <br><h3><a class=none name="allegro">AllegroCL compatibility mode</a></h3>
|
||||
|
||||
Since autumn 2004 <a
|
||||
href="http://www.franz.com/products/allegrocl/">AllegroCL</a> offers
|
||||
<a
|
||||
href="http://www.franz.com/support/documentation/7.0/doc/regexp.htm">a
|
||||
new regular expression API</a> with a syntax very similar to
|
||||
CL-PPCRE. Although CL-PPCRE is quite fast already, AllegroCL's engine will
|
||||
most likely be even faster (but only on AllegroCL, of course). However, you might want to
|
||||
stick to CL-PPCRE because you have a "legacy" application or because
|
||||
you want your code to be portable to other Lisp implementations.
|
||||
Therefore, beginning from version 1.2.0, CL-PPCRE offers a
|
||||
"compatibility mode" where you can continue using the CL-PPCRE API as
|
||||
described <a href="#dict">above</a> but deploy the AllegroCL regex
|
||||
engine under the hood. (The details are: Calls to <a
|
||||
href="#create-scanner"><code>CREATE-SCANNER</code></a> and <a
|
||||
href="#scan"><code>SCAN</code></a> are dispatched to their AllegroCL
|
||||
counterparts <a
|
||||
href="http://www.franz.com/support/documentation/7.0/doc/operators/excl/compile-re.htm"><code>EXCL:COMPILE-RE</code></a>
|
||||
and <a
|
||||
href="http://www.franz.com/support/documentation/7.0/doc/operators/excl/match-re.htm"><code>EXCL:MATCH-RE</code></a>
|
||||
while everything else is left as is.)
|
||||
<p>
|
||||
The advantage of this mode is that you'll get a much smaller image and
|
||||
most likely faster code. (But note that CL-PPCRE needs to do a small amount of work to massage AllegroCL's output into the format expected by CL-PPCRE.) The downside is that your code won't be
|
||||
fully compatible with CL-PPCRE anymore. Here are some of the
|
||||
differences (most of which probably don't matter very often):
|
||||
<ul>
|
||||
<li>The AllegroCL engine doesn't offer <a
|
||||
href="#parse-tree-synonym">parse tree synonyms</a> and <a href="#filters">filters</a>.
|
||||
<li>The AllegroCL engine <a href="http://www.franz.com/support/documentation/7.0/doc/regexp.htm#regexp-new-compatibility-2">will choke on some regular expressions involving curly braces</a> that are accepted by Perl and CL-PPCRE's native engine.
|
||||
<li>The AllegroCL engine's case-folding mode switch (which is used instead of CL-PPCRE's <a href="#create-scanner"><code>:CASE-INSENSITIVE</code> keyword parameter</a>) <a href="http://www.franz.com/support/documentation/7.0/doc/regexp.htm#regexp-new-matching-2">is currently only effective for ASCII characters</a>.
|
||||
<li>CL-PPCRE's engine doesn't understand the <a href="http://www.franz.com/support/documentation/7.0/doc/regexp.htm#regexp-new-capturing-2">named register groups</a> provided by AllegroCL.
|
||||
<li>The AllegroCL engine <a href="http://www.franz.com/support/documentation/7.0/doc/regexp.htm#regexp-new-compatibility-2">doesn't support</a> <a href="#*allow-quoting*">quoting of metacharacters</a>.
|
||||
<li>In AllegroCL compatibility mode compiled regular expressions (as returned by <a href="#create-scanner"><code>CREATE-SCANNER</code></a>) aren't functions but structures.
|
||||
</ul>
|
||||
For more details about the AllegroCL engine and possible deviations from CL-PPCRE see the <a href="http://www.franz.com/support/documentation/7.0/doc/regexp.htm">documentation</a> at the <a href="http://www.franz.com/">Franz Inc. website</a>.
|
||||
<p>
|
||||
To use the AllegroCL compatibility mode you have to
|
||||
<pre>
|
||||
(push :use-acl-regexp2-engine *features*)
|
||||
</pre>
|
||||
<em>before</em> you compile CL-PPCRE.
|
||||
|
||||
<br> <br><h3><a class=none name="ack">Acknowledgements</a></h3>
|
||||
|
||||
Although I didn't use their code I was heavily inspired by looking at
|
||||
@ -1927,7 +2309,7 @@ where I wrote most of the code and thanks to my wife for lending me
|
||||
her PowerBook to test CL-PPCRE with MCL and OpenMCL.
|
||||
|
||||
<p>
|
||||
$Header: /home/manuel/bknr-cvs/cvs/thirdparty/cl-ppcre/doc/index.html,v 1.1 2004/06/23 08:27:10 hans Exp $
|
||||
$Header: /usr/local/cvsrep/cl-ppcre/doc/index.html,v 1.131 2005/11/01 09:51:02 edi Exp $
|
||||
<p><a href="http://weitz.de/index.html">BACK TO MY HOMEPAGE</a>
|
||||
|
||||
</body>
|
||||
|
||||
20
errors.lisp
20
errors.lisp
@ -1,7 +1,7 @@
|
||||
;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: CL-PPCRE-LISP; Base: 10 -*-
|
||||
;;; $Header: /home/manuel/bknr-cvs/cvs/thirdparty/cl-ppcre/errors.lisp,v 1.1 2004/06/23 08:27:10 hans Exp $
|
||||
;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: CL-PPCRE; Base: 10 -*-
|
||||
;;; $Header: /usr/local/cvsrep/cl-ppcre/errors.lisp,v 1.14 2005/04/01 21:29:09 edi Exp $
|
||||
|
||||
;;; Copyright (c) 2002-2003, Dr. Edmund Weitz. All rights reserved.
|
||||
;;; Copyright (c) 2002-2005, Dr. Edmund Weitz. All rights reserved.
|
||||
|
||||
;;; Redistribution and use in source and binary forms, with or without
|
||||
;;; modification, are permitted provided that the following conditions
|
||||
@ -50,7 +50,19 @@ this type."))
|
||||
(simple-condition-format-control condition)
|
||||
(simple-condition-format-arguments condition)
|
||||
(ppcre-syntax-error-pos condition)
|
||||
(ppcre-syntax-error-string condition)))))
|
||||
(ppcre-syntax-error-string condition))))
|
||||
(:documentation "Signaled if CL-PPCRE's parser encounters an error
|
||||
when trying to parse a regex string or to convert a parse tree into
|
||||
its internal representation."))
|
||||
|
||||
(setf (documentation 'ppcre-syntax-error-string 'function)
|
||||
"Returns the string the parser was parsing when the error was
|
||||
encountered \(or NIL if the error happened while trying to convert a
|
||||
parse tree).")
|
||||
|
||||
(setf (documentation 'ppcre-syntax-error-pos 'function)
|
||||
"Returns the position within the string where the error occured
|
||||
\(or NIL if the error happened while trying to convert a parse tree")
|
||||
|
||||
(define-condition ppcre-invocation-error (ppcre-error)
|
||||
()
|
||||
|
||||
127
lexer.lisp
127
lexer.lisp
@ -1,5 +1,5 @@
|
||||
;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: CL-PPCRE; Base: 10 -*-
|
||||
;;; $Header: /home/manuel/bknr-cvs/cvs/thirdparty/cl-ppcre/lexer.lisp,v 1.1 2004/06/23 08:27:10 hans Exp $
|
||||
;;; $Header: /usr/local/cvsrep/cl-ppcre/lexer.lisp,v 1.24 2005/04/01 21:29:09 edi Exp $
|
||||
|
||||
;;; The lexer's responsibility is to convert the regex string into a
|
||||
;;; sequence of tokens which are in turn consumed by the parser.
|
||||
@ -9,7 +9,7 @@
|
||||
;;; has opened so far. (The latter is necessary for interpreting
|
||||
;;; strings like "\\10" correctly.)
|
||||
|
||||
;;; Copyright (c) 2002-2003, Dr. Edmund Weitz. All rights reserved.
|
||||
;;; Copyright (c) 2002-2005, Dr. Edmund Weitz. All rights reserved.
|
||||
|
||||
;;; Redistribution and use in source and binary forms, with or without
|
||||
;;; modification, are permitted provided that the following conditions
|
||||
@ -39,12 +39,7 @@
|
||||
|
||||
(declaim (inline map-char-to-special-class))
|
||||
(defun map-char-to-special-char-class (chr)
|
||||
(declare (optimize speed
|
||||
(safety 0)
|
||||
(space 0)
|
||||
(debug 0)
|
||||
(compilation-speed 0)
|
||||
#+:lispworks (hcl:fixnum-safety 0)))
|
||||
(declare #.*standard-optimize-settings*)
|
||||
"Maps escaped characters like \"\\d\" to the tokens which represent
|
||||
their associated character classes."
|
||||
(case chr
|
||||
@ -62,12 +57,7 @@ their associated character classes."
|
||||
:non-whitespace-char-class)))
|
||||
|
||||
(locally
|
||||
(declare (optimize speed
|
||||
(safety 0)
|
||||
(space 0)
|
||||
(debug 0)
|
||||
(compilation-speed 0)
|
||||
#+:lispworks (hcl:fixnum-safety 0)))
|
||||
(declare #.*standard-optimize-settings*)
|
||||
(defstruct (lexer (:constructor make-lexer-internal))
|
||||
"LEXER structures are used to hold the regex string which is
|
||||
currently lexed and to keep track of the lexer's state."
|
||||
@ -86,30 +76,20 @@ currently lexed and to keep track of the lexer's state."
|
||||
|
||||
(defun make-lexer (string)
|
||||
(declare (inline make-lexer-internal)
|
||||
(type string string))
|
||||
#-genera (type string string))
|
||||
(make-lexer-internal :str (maybe-coerce-to-simple-string string)
|
||||
:len (length string)))
|
||||
|
||||
(declaim (inline end-of-string-p))
|
||||
(defun end-of-string-p (lexer)
|
||||
(declare (optimize speed
|
||||
(safety 0)
|
||||
(space 0)
|
||||
(debug 0)
|
||||
(compilation-speed 0)
|
||||
#+:lispworks (hcl:fixnum-safety 0)))
|
||||
(declare #.*standard-optimize-settings*)
|
||||
"Tests whether we're at the end of the regex string."
|
||||
(<= (lexer-len lexer)
|
||||
(lexer-pos lexer)))
|
||||
|
||||
(declaim (inline looking-at-p))
|
||||
(defun looking-at-p (lexer chr)
|
||||
(declare (optimize speed
|
||||
(safety 0)
|
||||
(space 0)
|
||||
(debug 0)
|
||||
(compilation-speed 0)
|
||||
#+:lispworks (hcl:fixnum-safety 0)))
|
||||
(declare #.*standard-optimize-settings*)
|
||||
"Tests whether the next character the lexer would see is CHR.
|
||||
Does not respect extended mode."
|
||||
(and (not (end-of-string-p lexer))
|
||||
@ -118,12 +98,7 @@ Does not respect extended mode."
|
||||
|
||||
(declaim (inline next-char-non-extended))
|
||||
(defun next-char-non-extended (lexer)
|
||||
(declare (optimize speed
|
||||
(safety 0)
|
||||
(space 0)
|
||||
(debug 0)
|
||||
(compilation-speed 0)
|
||||
#+:lispworks (hcl:fixnum-safety 0)))
|
||||
(declare #.*standard-optimize-settings*)
|
||||
"Returns the next character which is to be examined and updates the
|
||||
POS slot. Does not respect extended mode."
|
||||
(cond ((end-of-string-p lexer)
|
||||
@ -134,12 +109,7 @@ POS slot. Does not respect extended mode."
|
||||
(incf (lexer-pos lexer))))))
|
||||
|
||||
(defun next-char (lexer)
|
||||
(declare (optimize speed
|
||||
(safety 0)
|
||||
(space 0)
|
||||
(debug 0)
|
||||
(compilation-speed 0)
|
||||
#+:lispworks (hcl:fixnum-safety 0)))
|
||||
(declare #.*standard-optimize-settings*)
|
||||
"Returns the next character which is to be examined and updates the
|
||||
POS slot. Respects extended mode, i.e. whitespace, comments, and also
|
||||
nested comments are skipped if applicable."
|
||||
@ -203,12 +173,7 @@ nested comments are skipped if applicable."
|
||||
|
||||
(declaim (inline fail))
|
||||
(defun fail (lexer)
|
||||
(declare (optimize speed
|
||||
(safety 0)
|
||||
(space 0)
|
||||
(debug 0)
|
||||
(compilation-speed 0)
|
||||
#+:lispworks (hcl:fixnum-safety 0)))
|
||||
(declare #.*standard-optimize-settings*)
|
||||
"Moves (LEXER-POS LEXER) back to the last position stored in
|
||||
\(LEXER-LAST-POS LEXER) and pops the LAST-POS stack."
|
||||
(unless (lexer-last-pos lexer)
|
||||
@ -217,12 +182,7 @@ nested comments are skipped if applicable."
|
||||
nil)
|
||||
|
||||
(defun get-number (lexer &key (radix 10) max-length no-whitespace-p)
|
||||
(declare (optimize speed
|
||||
(safety 0)
|
||||
(space 0)
|
||||
(debug 0)
|
||||
(compilation-speed 0)
|
||||
#+:lispworks (hcl:fixnum-safety 0)))
|
||||
(declare #.*standard-optimize-settings*)
|
||||
"Read and consume the number the lexer is currently looking at and
|
||||
return it. Returns NIL if no number could be identified.
|
||||
RADIX is used as in PARSE-INTEGER. If MAX-LENGTH is not NIL we'll read
|
||||
@ -252,12 +212,7 @@ we don't tolerate whitespace in front of the number."
|
||||
|
||||
(declaim (inline try-number))
|
||||
(defun try-number (lexer &key (radix 10) max-length no-whitespace-p)
|
||||
(declare (optimize speed
|
||||
(safety 0)
|
||||
(space 0)
|
||||
(debug 0)
|
||||
(compilation-speed 0)
|
||||
#+:lispworks (hcl:fixnum-safety 0)))
|
||||
(declare #.*standard-optimize-settings*)
|
||||
"Like GET-NUMBER but won't consume anything if no number is seen."
|
||||
;; remember current position
|
||||
(push (lexer-pos lexer) (lexer-last-pos lexer))
|
||||
@ -269,16 +224,11 @@ we don't tolerate whitespace in front of the number."
|
||||
|
||||
(declaim (inline make-char-from-code))
|
||||
(defun make-char-from-code (number error-pos)
|
||||
(declare (optimize speed
|
||||
(safety 0)
|
||||
(space 0)
|
||||
(debug 0)
|
||||
(compilation-speed 0)
|
||||
#+:lispworks (hcl:fixnum-safety 0)))
|
||||
(declare #.*standard-optimize-settings*)
|
||||
"Create character from char-code NUMBER. NUMBER can be NIL
|
||||
which is interpreted as 0. ERROR-POS is the position where
|
||||
the corresponding number started within the regex string."
|
||||
;; Only look at rightmost eight bits in compliance with Perl
|
||||
;; only look at rightmost eight bits in compliance with Perl
|
||||
(let ((code (logand #o377 (the fixnum (or number 0)))))
|
||||
(or (and (< code char-code-limit)
|
||||
(code-char code))
|
||||
@ -288,12 +238,7 @@ the corresponding number started within the regex string."
|
||||
number))))
|
||||
|
||||
(defun unescape-char (lexer)
|
||||
(declare (optimize speed
|
||||
(safety 0)
|
||||
(space 0)
|
||||
(debug 0)
|
||||
(compilation-speed 0)
|
||||
#+:lispworks (hcl:fixnum-safety 0)))
|
||||
(declare #.*standard-optimize-settings*)
|
||||
"Convert the characters(s) following a backslash into a token
|
||||
which is returned. This function is to be called when the backslash
|
||||
has already been consumed. Special character classes like \\W are
|
||||
@ -351,12 +296,7 @@ handled elsewhere."
|
||||
chr))))
|
||||
|
||||
(defun collect-char-class (lexer)
|
||||
(declare (optimize speed
|
||||
(safety 0)
|
||||
(space 0)
|
||||
(debug 0)
|
||||
(compilation-speed 0)
|
||||
#+:lispworks (hcl:fixnum-safety 0)))
|
||||
(declare #.*standard-optimize-settings*)
|
||||
"Reads and consumes characters from regex string until a right
|
||||
bracket is seen. Assembles them into a list \(which is returned) of
|
||||
characters, character ranges, like \(:RANGE #\\A #\\E) for a-e, and
|
||||
@ -437,12 +377,7 @@ we're inside a range or not."
|
||||
"Missing right bracket to close character class"))))
|
||||
|
||||
(defun maybe-parse-flags (lexer)
|
||||
(declare (optimize speed
|
||||
(safety 0)
|
||||
(space 0)
|
||||
(debug 0)
|
||||
(compilation-speed 0)
|
||||
#+:lispworks (hcl:fixnum-safety 0)))
|
||||
(declare #.*standard-optimize-settings*)
|
||||
"Reads a sequence of modifiers \(including #\\- to reverse their
|
||||
meaning) and returns a corresponding list of \"flag\" tokens. The
|
||||
\"x\" modifier is treated specially in that it dynamically modifies
|
||||
@ -478,12 +413,7 @@ the behaviour of the lexer itself via the special variable
|
||||
(decf (lexer-pos lexer))))
|
||||
|
||||
(defun get-quantifier (lexer)
|
||||
(declare (optimize speed
|
||||
(safety 0)
|
||||
(space 0)
|
||||
(debug 0)
|
||||
(compilation-speed 0)
|
||||
#+:lispworks (hcl:fixnum-safety 0)))
|
||||
(declare #.*standard-optimize-settings*)
|
||||
"Returns a list of two values (min max) if what the lexer is looking
|
||||
at can be interpreted as a quantifier. Otherwise returns NIL and
|
||||
resets the lexer to its old position."
|
||||
@ -533,12 +463,7 @@ resets the lexer to its old position."
|
||||
(fail lexer)))))
|
||||
|
||||
(defun get-token (lexer)
|
||||
(declare (optimize speed
|
||||
(safety 0)
|
||||
(space 0)
|
||||
(debug 0)
|
||||
(compilation-speed 0)
|
||||
#+:lispworks (hcl:fixnum-safety 0)))
|
||||
(declare #.*standard-optimize-settings*)
|
||||
"Returns and consumes the next token from the regex string (or NIL)."
|
||||
;; remember starting position for UNGET-TOKEN function
|
||||
(push (lexer-pos lexer)
|
||||
@ -737,12 +662,7 @@ resets the lexer to its old position."
|
||||
|
||||
(declaim (inline unget-token))
|
||||
(defun unget-token (lexer)
|
||||
(declare (optimize speed
|
||||
(safety 0)
|
||||
(space 0)
|
||||
(debug 0)
|
||||
(compilation-speed 0)
|
||||
#+:lispworks (hcl:fixnum-safety 0)))
|
||||
(declare #.*standard-optimize-settings*)
|
||||
"Moves the lexer back to the last position stored in the LAST-POS stack."
|
||||
(if (lexer-last-pos lexer)
|
||||
(setf (lexer-pos lexer)
|
||||
@ -751,12 +671,7 @@ resets the lexer to its old position."
|
||||
|
||||
(declaim (inline start-of-subexpr-p))
|
||||
(defun start-of-subexpr-p (lexer)
|
||||
(declare (optimize speed
|
||||
(safety 0)
|
||||
(space 0)
|
||||
(debug 0)
|
||||
(compilation-speed 0)
|
||||
#+:lispworks (hcl:fixnum-safety 0)))
|
||||
(declare #.*standard-optimize-settings*)
|
||||
"Tests whether the next token can start a valid sub-expression, i.e.
|
||||
a stand-alone regex."
|
||||
(let* ((pos (lexer-pos lexer))
|
||||
|
||||
57
lispworks-defsystem.lisp
Normal file
57
lispworks-defsystem.lisp
Normal file
@ -0,0 +1,57 @@
|
||||
;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: CL-USER; Base: 10 -*-
|
||||
;;; $Header: /usr/local/cvsrep/cl-ppcre/lispworks-defsystem.lisp,v 1.1 2005/04/30 20:00:50 edi Exp $
|
||||
|
||||
;;; This system definition for LispWorks was kindly provided by Wade Humeniuk
|
||||
|
||||
;;; Copyright (c) 2002-2005, Dr. Edmund Weitz. All rights reserved.
|
||||
|
||||
;;; Redistribution and use in source and binary forms, with or without
|
||||
;;; modification, are permitted provided that the following conditions
|
||||
;;; are met:
|
||||
|
||||
;;; * Redistributions of source code must retain the above copyright
|
||||
;;; notice, this list of conditions and the following disclaimer.
|
||||
|
||||
;;; * Redistributions in binary form must reproduce the above
|
||||
;;; copyright notice, this list of conditions and the following
|
||||
;;; disclaimer in the documentation and/or other materials
|
||||
;;; provided with the distribution.
|
||||
|
||||
;;; THIS SOFTWARE IS PROVIDED BY THE AUTHOR 'AS IS' AND ANY EXPRESSED
|
||||
;;; OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
;;; WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
;;; ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
|
||||
;;; DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
;;; DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
|
||||
;;; GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
;;; INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
|
||||
;;; WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
;;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
;;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
(in-package #:cl-user)
|
||||
|
||||
(defparameter *cl-ppcre-base-directory*
|
||||
(make-pathname :name nil :type nil :version nil
|
||||
:defaults (parse-namestring *load-truename*)))
|
||||
|
||||
(defsystem cl-ppcre
|
||||
(:default-pathname *cl-ppcre-base-directory*
|
||||
:default-type :lisp-file)
|
||||
:members ("packages"
|
||||
"specials"
|
||||
"util"
|
||||
"errors"
|
||||
"lexer"
|
||||
"parser"
|
||||
"regex-class"
|
||||
"convert"
|
||||
"optimize"
|
||||
"closures"
|
||||
"repetition-closures"
|
||||
"scanner"
|
||||
"api")
|
||||
:rules ((:in-order-to :compile :all
|
||||
(:requires (:load :previous)))
|
||||
(:in-order-to :load :all
|
||||
(:requires (:load :previous)))))
|
||||
47
load.lisp
47
load.lisp
@ -1,7 +1,7 @@
|
||||
;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: CL-USER; Base: 10 -*-
|
||||
;;; $Header: /home/manuel/bknr-cvs/cvs/thirdparty/cl-ppcre/load.lisp,v 1.1 2004/06/23 08:27:10 hans Exp $
|
||||
;;; $Header: /usr/local/cvsrep/cl-ppcre/load.lisp,v 1.13 2005/04/01 21:29:09 edi Exp $
|
||||
|
||||
;;; Copyright (c) 2002-2003, Dr. Edmund Weitz. All rights reserved.
|
||||
;;; Copyright (c) 2002-2005, Dr. Edmund Weitz. All rights reserved.
|
||||
|
||||
;;; Redistribution and use in source and binary forms, with or without
|
||||
;;; modification, are permitted provided that the following conditions
|
||||
@ -27,36 +27,41 @@
|
||||
;;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
;;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
(in-package #:cl-user)
|
||||
(in-package :cl-user)
|
||||
|
||||
(defparameter *cl-ppcre-base-directory*
|
||||
(let ((cl-ppcre-base-directory
|
||||
(make-pathname :name nil :type nil :version nil
|
||||
:defaults (parse-namestring *load-truename*)))
|
||||
|
||||
(loop for file in '("packages"
|
||||
must-compile)
|
||||
(with-compilation-unit ()
|
||||
(dolist (file '("packages"
|
||||
"specials"
|
||||
"util"
|
||||
"errors"
|
||||
"lexer"
|
||||
"parser"
|
||||
"regex-class"
|
||||
"convert"
|
||||
"optimize"
|
||||
"closures"
|
||||
"repetition-closures"
|
||||
"scanner"
|
||||
#-:use-acl-regexp2-engine "lexer"
|
||||
#-:use-acl-regexp2-engine "parser"
|
||||
#-:use-acl-regexp2-engine "regex-class"
|
||||
#-:use-acl-regexp2-engine "convert"
|
||||
#-:use-acl-regexp2-engine "optimize"
|
||||
#-:use-acl-regexp2-engine "closures"
|
||||
#-:use-acl-regexp2-engine "repetition-closures"
|
||||
#-:use-acl-regexp2-engine "scanner"
|
||||
"api"
|
||||
"ppcre-tests")
|
||||
do (let ((pathname (make-pathname :name file :type "lisp" :version nil
|
||||
:defaults *cl-ppcre-base-directory*)))
|
||||
"ppcre-tests"))
|
||||
(let ((pathname (make-pathname :name file :type "lisp" :version nil
|
||||
:defaults cl-ppcre-base-directory)))
|
||||
;; don't use COMPILE-FILE in Corman Lisp, it's broken - LOAD
|
||||
;; will yield compiled functions anyway
|
||||
#-:cormanlisp
|
||||
(let ((compiled-pathname (compile-file-pathname pathname)))
|
||||
(unless (probe-file compiled-pathname)
|
||||
(unless (and (not must-compile)
|
||||
(probe-file compiled-pathname)
|
||||
(< (file-write-date pathname)
|
||||
(file-write-date compiled-pathname)))
|
||||
(setq must-compile t)
|
||||
(compile-file pathname))
|
||||
(setq pathname compiled-pathname))
|
||||
(load pathname)))
|
||||
|
||||
|
||||
(load pathname)))))
|
||||
|
||||
|
||||
|
||||
|
||||
@ -1,10 +1,10 @@
|
||||
;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: CL-PPCRE; Base: 10 -*-
|
||||
;;; $Header: /home/manuel/bknr-cvs/cvs/thirdparty/cl-ppcre/optimize.lisp,v 1.1 2004/06/23 08:27:10 hans Exp $
|
||||
;;; $Header: /usr/local/cvsrep/cl-ppcre/optimize.lisp,v 1.26 2005/04/13 15:35:57 edi Exp $
|
||||
|
||||
;;; This file contains optimizations which can be applied to converted
|
||||
;;; parse trees.
|
||||
|
||||
;;; Copyright (c) 2002-2003, Dr. Edmund Weitz. All rights reserved.
|
||||
;;; Copyright (c) 2002-2005, Dr. Edmund Weitz. All rights reserved.
|
||||
|
||||
;;; Redistribution and use in source and binary forms, with or without
|
||||
;;; modification, are permitted provided that the following conditions
|
||||
@ -32,37 +32,8 @@
|
||||
|
||||
(in-package #:cl-ppcre)
|
||||
|
||||
(defun string-list-to-simple-string (string-list)
|
||||
(declare (optimize speed
|
||||
(safety 0)
|
||||
(space 0)
|
||||
(debug 0)
|
||||
(compilation-speed 0)
|
||||
#+:lispworks (hcl:fixnum-safety 0)))
|
||||
"Concatenates a list of strings to one simple-string."
|
||||
;; this function provided by JP Massar; note that we can't use APPLY
|
||||
;; with CONCATENATE here because of CALL-ARGUMENTS-LIMIT
|
||||
(let ((total-size 0))
|
||||
(declare (type fixnum total-size))
|
||||
(dolist (string string-list)
|
||||
(declare (type string string))
|
||||
(incf total-size (length string)))
|
||||
(let ((result-string (make-sequence 'simple-string total-size))
|
||||
(curr-pos 0))
|
||||
(declare (type fixnum curr-pos))
|
||||
(dolist (string string-list)
|
||||
(declare (type string string))
|
||||
(replace result-string string :start1 curr-pos)
|
||||
(incf curr-pos (length string)))
|
||||
result-string)))
|
||||
|
||||
(defgeneric flatten (regex)
|
||||
(declare (optimize speed
|
||||
(safety 0)
|
||||
(space 0)
|
||||
(debug 0)
|
||||
(compilation-speed 0)
|
||||
#+:lispworks (hcl:fixnum-safety 0)))
|
||||
(declare #.*standard-optimize-settings*)
|
||||
(:documentation "Merges adjacent sequences and alternations, i.e. it
|
||||
transforms #<SEQ #<STR \"a\"> #<SEQ #<STR \"b\"> #<STR \"c\">>> to
|
||||
#<SEQ #<STR \"a\"> #<STR \"b\"> #<STR \"c\">>. This is a destructive
|
||||
@ -148,17 +119,12 @@ operation on REGEX."))
|
||||
regex)
|
||||
(t
|
||||
;; otherwise (ANCHOR, BACK-REFERENCE, CHAR-CLASS, EVERYTHING,
|
||||
;; LOOKAHEAD, LOOKBEHIND, STR, VOID, and WORD-BOUNDARY) do
|
||||
;; nothing
|
||||
;; LOOKAHEAD, LOOKBEHIND, STR, VOID, FILTER, and WORD-BOUNDARY)
|
||||
;; do nothing
|
||||
regex)))
|
||||
|
||||
(defgeneric gather-strings (regex)
|
||||
(declare (optimize speed
|
||||
(safety 0)
|
||||
(space 0)
|
||||
(debug 0)
|
||||
(compilation-speed 0)
|
||||
#+:lispworks (hcl:fixnum-safety 0)))
|
||||
(declare #.*standard-optimize-settings*)
|
||||
(:documentation "Collects adjacent strings or characters into one
|
||||
string provided they have the same case mode. This is a destructive
|
||||
operation on REGEX."))
|
||||
@ -310,19 +276,14 @@ operation on REGEX."))
|
||||
regex)
|
||||
(t
|
||||
;; otherwise (ANCHOR, BACK-REFERENCE, CHAR-CLASS, EVERYTHING,
|
||||
;; LOOKAHEAD, LOOKBEHIND, STR, VOID, and WORD-BOUNDARY) do
|
||||
;; nothing
|
||||
;; LOOKAHEAD, LOOKBEHIND, STR, VOID, FILTER, and WORD-BOUNDARY)
|
||||
;; do nothing
|
||||
regex)))
|
||||
|
||||
;; Note that START-ANCHORED-P will be called after FLATTEN and GATHER-STRINGS.
|
||||
|
||||
(defgeneric start-anchored-p (regex &optional in-seq-p)
|
||||
(declare (optimize speed
|
||||
(safety 0)
|
||||
(space 0)
|
||||
(debug 0)
|
||||
(compilation-speed 0)
|
||||
#+:lispworks (hcl:fixnum-safety 0)))
|
||||
(declare #.*standard-optimize-settings*)
|
||||
(:documentation "Returns T if REGEX starts with a \"real\" start
|
||||
anchor, i.e. one that's not in multi-line mode, NIL otherwise. If
|
||||
IN-SEQ-P is true the function will return :ZERO-LENGTH if REGEX is a
|
||||
@ -378,6 +339,12 @@ zero-length assertion."))
|
||||
(if in-seq-p
|
||||
:zero-length
|
||||
nil))
|
||||
(filter
|
||||
(if (and in-seq-p
|
||||
(len regex)
|
||||
(zerop (len regex)))
|
||||
:zero-length
|
||||
nil))
|
||||
(t
|
||||
;; BACK-REFERENCE, CHAR-CLASS, EVERYTHING, and STR
|
||||
nil)))
|
||||
@ -385,12 +352,7 @@ zero-length assertion."))
|
||||
;; Note that END-STRING-AUX will be called after FLATTEN and GATHER-STRINGS.
|
||||
|
||||
(defgeneric end-string-aux (regex &optional old-case-insensitive-p)
|
||||
(declare (optimize speed
|
||||
(safety 0)
|
||||
(space 0)
|
||||
(debug 0)
|
||||
(compilation-speed 0)
|
||||
#+:lispworks (hcl:fixnum-safety 0)))
|
||||
(declare #.*standard-optimize-settings*)
|
||||
(:documentation "Returns the constant string (if it exists) REGEX
|
||||
ends with wrapped into a STR object, otherwise NIL.
|
||||
OLD-CASE-INSENSITIVE-P is the CASE-INSENSITIVE-P slot of the last STR
|
||||
@ -509,19 +471,17 @@ function called by END-STRIN.)"))
|
||||
:case-insensitive-p :void))
|
||||
(t
|
||||
;; (ALTERNATION, BACK-REFERENCE, BRANCH, CHAR-CLASS, EVERYTHING,
|
||||
;; REPETITION)
|
||||
;; REPETITION, FILTER)
|
||||
nil)))
|
||||
|
||||
(defgeneric end-string (regex)
|
||||
(declare #.*standard-optimize-settings*)
|
||||
(:documentation "Returns the constant string (if it exists) REGEX ends with wrapped
|
||||
into a STR object, otherwise NIL."))
|
||||
|
||||
(defmethod end-string ((regex regex))
|
||||
(declare (special end-string-offset))
|
||||
(declare (optimize speed
|
||||
(safety 0)
|
||||
(space 0)
|
||||
(debug 0)
|
||||
(compilation-speed 0)
|
||||
#+:lispworks (hcl:fixnum-safety 0)))
|
||||
"Returns the constant string (if it exists) REGEX ends with wrapped
|
||||
into a STR object, otherwise NIL."
|
||||
(declare #.*standard-optimize-settings*)
|
||||
;; LAST-STR points to the last STR object (seen from the end) that's
|
||||
;; part of END-STRING; CONTINUEP is set to T if we stop collecting
|
||||
;; in the middle of a SEQ
|
||||
@ -539,12 +499,7 @@ into a STR object, otherwise NIL."
|
||||
end-string-offset (offset last-str))))))
|
||||
|
||||
(defgeneric compute-min-rest (regex current-min-rest)
|
||||
(declare (optimize speed
|
||||
(safety 0)
|
||||
(space 0)
|
||||
(debug 0)
|
||||
(compilation-speed 0)
|
||||
#+:lispworks (hcl:fixnum-safety 0)))
|
||||
(declare #.*standard-optimize-settings*)
|
||||
(:documentation "Returns the minimal length of REGEX plus
|
||||
CURRENT-MIN-REST. This is similar to REGEX-MIN-LENGTH except that it
|
||||
recurses down into REGEX and sets the MIN-REST slots of REPETITION
|
||||
@ -567,6 +522,9 @@ objects."))
|
||||
(defmethod compute-min-rest ((str str) current-min-rest)
|
||||
(+ current-min-rest (len str)))
|
||||
|
||||
(defmethod compute-min-rest ((filter filter) current-min-rest)
|
||||
(+ current-min-rest (or (len filter) 0)))
|
||||
|
||||
(defmethod compute-min-rest ((repetition repetition) current-min-rest)
|
||||
(setf (min-rest repetition) current-min-rest)
|
||||
(compute-min-rest (regex repetition) current-min-rest)
|
||||
|
||||
@ -1,7 +1,7 @@
|
||||
;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: CL-USER; Base: 10 -*-
|
||||
;;; $Header: /home/manuel/bknr-cvs/cvs/thirdparty/cl-ppcre/packages.lisp,v 1.1 2004/06/23 08:27:10 hans Exp $
|
||||
;;; $Header: /usr/local/cvsrep/cl-ppcre/packages.lisp,v 1.19 2005/04/01 21:29:10 edi Exp $
|
||||
|
||||
;;; Copyright (c) 2002-2003, Dr. Edmund Weitz. All rights reserved.
|
||||
;;; Copyright (c) 2002-2005, Dr. Edmund Weitz. All rights reserved.
|
||||
|
||||
;;; Redistribution and use in source and binary forms, with or without
|
||||
;;; modification, are permitted provided that the following conditions
|
||||
@ -27,13 +27,16 @@
|
||||
;;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
;;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
(in-package #:cl-user)
|
||||
(in-package :cl-user)
|
||||
|
||||
#-:cormanlisp
|
||||
(defpackage #:cl-ppcre
|
||||
(:nicknames #:ppcre)
|
||||
(:use #:cl)
|
||||
#+genera (:shadowing-import-from #:common-lisp #:lambda #:simple-string #:string)
|
||||
(:use #-genera #:cl #+genera #:future-common-lisp)
|
||||
(:export #:create-scanner
|
||||
#:parse-tree-synonym
|
||||
#:define-parse-tree-synonym
|
||||
#:scan
|
||||
#:scan-to-strings
|
||||
#:do-scans
|
||||
@ -56,13 +59,17 @@
|
||||
#:ppcre-syntax-error-string
|
||||
#:ppcre-syntax-error-pos
|
||||
#:register-groups-bind
|
||||
#:do-register-groups))
|
||||
#:do-register-groups
|
||||
#:*standard-optimize-settings*
|
||||
#:*special-optimize-settings*))
|
||||
|
||||
#+:cormanlisp
|
||||
(defpackage "CL-PPCRE"
|
||||
(:nicknames "PPCRE")
|
||||
(:use "CL")
|
||||
(:export "CREATE-SCANNER"
|
||||
"PARSE-TREE-SYNONYM"
|
||||
"DEFINE-PARSE-TREE-SYNONYM"
|
||||
"SCAN"
|
||||
"SCAN-TO-STRINGS"
|
||||
"DO-SCANS"
|
||||
@ -85,4 +92,17 @@
|
||||
"PPCRE-SYNTAX-ERROR-STRING"
|
||||
"PPCRE-SYNTAX-ERROR-POS"
|
||||
"REGISTER-GROUPS-BIND"
|
||||
"DO-REGISTER-GROUPS"))
|
||||
"DO-REGISTER-GROUPS"
|
||||
"*STANDARD-OPTIMIZE-SETTINGS*"
|
||||
"*SPECIAL-OPTIMIZE-SETTINGS*"))
|
||||
|
||||
#-:cormanlisp
|
||||
(defpackage #:cl-ppcre-test
|
||||
#+genera (:shadowing-import-from #:common-lisp #:lambda)
|
||||
(:use #-genera #:cl #+genera #:future-common-lisp #:cl-ppcre)
|
||||
(:export #:test))
|
||||
|
||||
#+:cormanlisp
|
||||
(defpackage "CL-PPCRE-TEST"
|
||||
(:use "CL" "CL-PPCRE")
|
||||
(:export "TEST"))
|
||||
|
||||
55
parser.lisp
55
parser.lisp
@ -1,5 +1,5 @@
|
||||
;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: CL-PPCRE; Base: 10 -*-
|
||||
;;; $Header: /home/manuel/bknr-cvs/cvs/thirdparty/cl-ppcre/parser.lisp,v 1.1 2004/06/23 08:27:10 hans Exp $
|
||||
;;; $Header: /usr/local/cvsrep/cl-ppcre/parser.lisp,v 1.21 2005/08/03 21:11:27 edi Exp $
|
||||
|
||||
;;; The parser will - with the help of the lexer - parse a regex
|
||||
;;; string and convert it into a "parse tree" (see docs for details
|
||||
@ -7,7 +7,7 @@
|
||||
;;; illegal parse trees. It is assumed that the conversion process
|
||||
;;; later on will track them down.
|
||||
|
||||
;;; Copyright (c) 2002-2003, Dr. Edmund Weitz. All rights reserved.
|
||||
;;; Copyright (c) 2002-2005, Dr. Edmund Weitz. All rights reserved.
|
||||
|
||||
;;; Redistribution and use in source and binary forms, with or without
|
||||
;;; modification, are permitted provided that the following conditions
|
||||
@ -36,16 +36,11 @@
|
||||
(in-package #:cl-ppcre)
|
||||
|
||||
(defun group (lexer)
|
||||
(declare (optimize speed
|
||||
(safety 0)
|
||||
(space 0)
|
||||
(debug 0)
|
||||
(compilation-speed 0)
|
||||
#+:lispworks (hcl:fixnum-safety 0)))
|
||||
(declare #.*standard-optimize-settings*)
|
||||
"Parses and consumes a <group>.
|
||||
The productions are: <group> -> \"(\"<regex>\")\"
|
||||
\"(?:\"<regex>\")\"
|
||||
\"(?<\"<regex>\")\"
|
||||
\"(?>\"<regex>\")\"
|
||||
\"(?<flags>:\"<regex>\")\"
|
||||
\"(?=\"<regex>\")\"
|
||||
\"(?!\"<regex>\")\"
|
||||
@ -154,12 +149,7 @@ Will return <parse-tree> or (<grouping-type> <parse-tree>) where
|
||||
open-token))))
|
||||
|
||||
(defun greedy-quant (lexer)
|
||||
(declare (optimize speed
|
||||
(safety 0)
|
||||
(space 0)
|
||||
(debug 0)
|
||||
(compilation-speed 0)
|
||||
#+:lispworks (hcl:fixnum-safety 0)))
|
||||
(declare #.*standard-optimize-settings*)
|
||||
"Parses and consumes a <greedy-quant>.
|
||||
The productions are: <greedy-quant> -> <group> | <group><quantifier>
|
||||
where <quantifier> is parsed by the lexer function GET-QUANTIFIER.
|
||||
@ -173,12 +163,7 @@ Will return <parse-tree> or (:GREEDY-REPETITION <min> <max> <parse-tree>)."
|
||||
group)))
|
||||
|
||||
(defun quant (lexer)
|
||||
(declare (optimize speed
|
||||
(safety 0)
|
||||
(space 0)
|
||||
(debug 0)
|
||||
(compilation-speed 0)
|
||||
#+:lispworks (hcl:fixnum-safety 0)))
|
||||
(declare #.*standard-optimize-settings*)
|
||||
"Parses and consumes a <quant>.
|
||||
The productions are: <quant> -> <greedy-quant> | <greedy-quant>\"?\".
|
||||
Will return the <parse-tree> returned by GREEDY-QUANT and optionally
|
||||
@ -193,12 +178,7 @@ change :GREEDY-REPETITION to :NON-GREEDY-REPETITION."
|
||||
greedy-quant))
|
||||
|
||||
(defun seq (lexer)
|
||||
(declare (optimize speed
|
||||
(safety 0)
|
||||
(space 0)
|
||||
(debug 0)
|
||||
(compilation-speed 0)
|
||||
#+:lispworks (hcl:fixnum-safety 0)))
|
||||
(declare #.*standard-optimize-settings*)
|
||||
"Parses and consumes a <seq>.
|
||||
The productions are: <seq> -> <quant> | <quant><seq>.
|
||||
Will return <parse-tree> or (:SEQUENCE <parse-tree> <parse-tree>)."
|
||||
@ -263,12 +243,7 @@ Will return <parse-tree> or (:SEQUENCE <parse-tree> <parse-tree>)."
|
||||
:void)))
|
||||
|
||||
(defun reg-expr (lexer)
|
||||
(declare (optimize speed
|
||||
(safety 0)
|
||||
(space 0)
|
||||
(debug 0)
|
||||
(compilation-speed 0)
|
||||
#+:lispworks (hcl:fixnum-safety 0)))
|
||||
(declare #.*standard-optimize-settings*)
|
||||
"Parses and consumes a <regex>, a complete regular expression.
|
||||
The productions are: <regex> -> <seq> | <seq>\"|\"<regex>.
|
||||
Will return <parse-tree> or (:ALTERNATION <parse-tree> <parse-tree>)."
|
||||
@ -313,12 +288,7 @@ Will return <parse-tree> or (:ALTERNATION <parse-tree> <parse-tree>)."
|
||||
seq)))))))
|
||||
|
||||
(defun reverse-strings (parse-tree)
|
||||
(declare (optimize speed
|
||||
(safety 0)
|
||||
(space 0)
|
||||
(debug 0)
|
||||
(compilation-speed 0)
|
||||
#+:lispworks (hcl:fixnum-safety 0)))
|
||||
(declare #.*standard-optimize-settings*)
|
||||
(cond ((stringp parse-tree)
|
||||
(nreverse parse-tree))
|
||||
((consp parse-tree)
|
||||
@ -330,12 +300,7 @@ Will return <parse-tree> or (:ALTERNATION <parse-tree> <parse-tree>)."
|
||||
(t parse-tree)))
|
||||
|
||||
(defun parse-string (string)
|
||||
(declare (optimize speed
|
||||
(safety 0)
|
||||
(space 0)
|
||||
(debug 0)
|
||||
(compilation-speed 0)
|
||||
#+:lispworks (hcl:fixnum-safety 0)))
|
||||
(declare #.*standard-optimize-settings*)
|
||||
"Translate the regex string STRING into a parse tree."
|
||||
(let* ((lexer (make-lexer string))
|
||||
(parse-tree (reverse-strings (reg-expr lexer))))
|
||||
|
||||
@ -1,7 +1,7 @@
|
||||
;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: CL-PPCRE-TEST; Base: 10 -*-
|
||||
;;; $Header: /home/manuel/bknr-cvs/cvs/thirdparty/cl-ppcre/ppcre-tests.lisp,v 1.1 2004/06/23 08:27:10 hans Exp $
|
||||
;;; $Header: /usr/local/cvsrep/cl-ppcre/ppcre-tests.lisp,v 1.31 2005/08/23 12:23:13 edi Exp $
|
||||
|
||||
;;; Copyright (c) 2002-2003, Dr. Edmund Weitz. All rights reserved.
|
||||
;;; Copyright (c) 2002-2005, Dr. Edmund Weitz. All rights reserved.
|
||||
|
||||
;;; Redistribution and use in source and binary forms, with or without
|
||||
;;; modification, are permitted provided that the following conditions
|
||||
@ -27,18 +27,6 @@
|
||||
;;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
;;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
(in-package #:cl-user)
|
||||
|
||||
#-:cormanlisp
|
||||
(defpackage #:cl-ppcre-test
|
||||
(:use #:cl #:cl-ppcre)
|
||||
(:export #:test))
|
||||
|
||||
#+:cormanlisp
|
||||
(defpackage "CL-PPCRE-TEST"
|
||||
(:use "CL" "CL-PPCRE")
|
||||
(:export "TEST"))
|
||||
|
||||
(in-package #:cl-ppcre-test)
|
||||
|
||||
(defparameter *cl-ppcre-test-base-directory*
|
||||
@ -64,12 +52,7 @@
|
||||
multi-line-mode
|
||||
single-line-mode
|
||||
extended-mode)
|
||||
(declare (optimize speed
|
||||
(safety 0)
|
||||
(space 0)
|
||||
(debug 0)
|
||||
(compilation-speed 0)
|
||||
#+:lispworks (hcl:fixnum-safety 0)))
|
||||
(declare #.*standard-optimize-settings*)
|
||||
"Auxiliary function used by TEST to benchmark a regex scanner
|
||||
against Perl timings."
|
||||
(declare (type string string))
|
||||
@ -90,12 +73,7 @@ against Perl timings."
|
||||
lispworks
|
||||
(and sbcl sb-thread))
|
||||
(defun threaded-scan (scanner target-string &key (threads 10) (repetitions 5000))
|
||||
(declare (optimize speed
|
||||
(safety 0)
|
||||
(space 0)
|
||||
(debug 0)
|
||||
(compilation-speed 0)
|
||||
#+:lispworks (hcl:fixnum-safety 0)))
|
||||
(declare #.*standard-optimize-settings*)
|
||||
"Auxiliary function used by TEST to check whether SCANNER is thread-safe."
|
||||
(full-gc)
|
||||
(let ((collector (make-array threads))
|
||||
@ -155,32 +133,26 @@ against Perl timings."
|
||||
:defaults *cl-ppcre-test-base-directory*)
|
||||
file-name-provided-p)
|
||||
threaded)
|
||||
(declare (optimize speed
|
||||
(safety 0)
|
||||
(space 0)
|
||||
(debug 0)
|
||||
(compilation-speed 0)
|
||||
#+:lispworks (hcl:fixnum-safety 0)))
|
||||
(declare #.*standard-optimize-settings*)
|
||||
(declare (ignorable threaded))
|
||||
"Loop through all test cases in FILE-NAME and print report. Only in
|
||||
LispWorks and SCL: If THREADED is true, also test whether the scanners
|
||||
work multi-threaded."
|
||||
(with-open-file (stream file-name
|
||||
#+(or :allegro :clisp :scl)
|
||||
#+(or :allegro :clisp :scl :sbcl)
|
||||
:external-format
|
||||
#+(or :allegro :clisp :scl)
|
||||
#+(or :allegro :clisp :scl :sbcl)
|
||||
(if file-name-provided-p
|
||||
:default
|
||||
#+:allegro :iso-8859-1
|
||||
#+:clisp charset:iso-8859-1
|
||||
#+:scl :iso-8859-1))
|
||||
#+(or :allegro :scl :sbcl) :iso-8859-1
|
||||
#+:clisp charset:iso-8859-1))
|
||||
(loop with testcount of-type fixnum = 0
|
||||
with *regex-char-code-limit* = (if file-name-provided-p
|
||||
*regex-char-code-limit*
|
||||
;; the standard test suite
|
||||
;; doesn't need full
|
||||
;; Unicode support
|
||||
255)
|
||||
;; doesn't need Unicode
|
||||
;; support
|
||||
256)
|
||||
with *allow-quoting* = (if file-name-provided-p
|
||||
*allow-quoting*
|
||||
t)
|
||||
|
||||
187
regex-class.lisp
187
regex-class.lisp
@ -1,11 +1,11 @@
|
||||
;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: CL-PPCRE; Base: 10 -*-
|
||||
;;; $Header: /home/manuel/bknr-cvs/cvs/thirdparty/cl-ppcre/regex-class.lisp,v 1.1 2004/06/23 08:27:10 hans Exp $
|
||||
;;; $Header: /usr/local/cvsrep/cl-ppcre/regex-class.lisp,v 1.26 2005/06/10 10:23:42 edi Exp $
|
||||
|
||||
;;; This file defines the REGEX class and some utility methods for
|
||||
;;; this class. REGEX objects are used to represent the (transformed)
|
||||
;;; parse trees internally
|
||||
|
||||
;;; Copyright (c) 2002-2003, Dr. Edmund Weitz. All rights reserved.
|
||||
;;; Copyright (c) 2002-2005, Dr. Edmund Weitz. All rights reserved.
|
||||
|
||||
;;; Redistribution and use in source and binary forms, with or without
|
||||
;;; modification, are permitted provided that the following conditions
|
||||
@ -33,16 +33,15 @@
|
||||
|
||||
(in-package #:cl-ppcre)
|
||||
|
||||
;; Genera need the eval-when, here, or the types created by the class
|
||||
;; definitions aren't seen by the typep calls later in the file.
|
||||
(eval-when (:compile-toplevel :load-toplevel :execute)
|
||||
(locally
|
||||
(declare (optimize speed
|
||||
(safety 0)
|
||||
(space 0)
|
||||
(debug 0)
|
||||
(compilation-speed 0)
|
||||
#+:lispworks (hcl:fixnum-safety 0)))
|
||||
(declare #.*standard-optimize-settings*)
|
||||
(defclass regex ()
|
||||
()
|
||||
(:documentation "The REGEX base class. All other classes inherit from this one."))
|
||||
(:documentation "The REGEX base class. All other classes inherit
|
||||
from this one."))
|
||||
|
||||
|
||||
(defclass seq (regex)
|
||||
@ -103,7 +102,8 @@ Can be NIL for unbounded.")
|
||||
:documentation "The minimal length of the enclosed regex.")
|
||||
(len :initarg :len
|
||||
:reader len
|
||||
:documentation "The length of the enclosed regex. NIL if unknown.")
|
||||
:documentation "The length of the enclosed regex. NIL
|
||||
if unknown.")
|
||||
(min-rest :initform 0
|
||||
:accessor min-rest
|
||||
:type fixnum
|
||||
@ -135,24 +135,28 @@ This is the index into *REGS-START* and *REGS-END*."))
|
||||
((num :initarg :num
|
||||
:accessor num
|
||||
:type fixnum
|
||||
:documentation "The number of the register this reference refers to.")
|
||||
:documentation "The number of the register this
|
||||
reference refers to.")
|
||||
(case-insensitive-p :initarg :case-insensitive-p
|
||||
:reader case-insensitive-p
|
||||
:documentation "Whether we check case-insensitively."))
|
||||
:documentation "Whether we check
|
||||
case-insensitively."))
|
||||
(:documentation "BACK-REFERENCE objects represent backreferences."))
|
||||
|
||||
(defclass char-class (regex)
|
||||
((hash :initarg :hash
|
||||
:reader hash
|
||||
:type (or hash-table null)
|
||||
:documentation "A hash table the keys of which are the characters;
|
||||
the values are always T.")
|
||||
:documentation "A hash table the keys of which are the
|
||||
characters; the values are always T.")
|
||||
(case-insensitive-p :initarg :case-insensitive-p
|
||||
:reader case-insensitive-p
|
||||
:documentation "If the char class case-insensitive.")
|
||||
:documentation "If the char class
|
||||
case-insensitive.")
|
||||
(invertedp :initarg :invertedp
|
||||
:reader invertedp
|
||||
:documentation "Whether we mean the inverse of the char class.")
|
||||
:documentation "Whether we mean the inverse of
|
||||
the char class.")
|
||||
(word-char-class-p :initarg :word-char-class-p
|
||||
:reader word-char-class-p
|
||||
:documentation "Whether this CHAR CLASS
|
||||
@ -173,18 +177,18 @@ represents the special class WORD-CHAR-CLASS."))
|
||||
:documentation "If we match case-insensitively.")
|
||||
(offset :initform nil
|
||||
:accessor offset
|
||||
:documentation "Offset from the left of the whole parse tree.
|
||||
The first regex has offset 0.
|
||||
NIL if unknown, i.e. behind a variable-length regex.")
|
||||
:documentation "Offset from the left of the whole
|
||||
parse tree. The first regex has offset 0. NIL if unknown, i.e. behind
|
||||
a variable-length regex.")
|
||||
(skip :initform nil
|
||||
:initarg :skip
|
||||
:accessor skip
|
||||
:documentation "If we can avoid testing for this string
|
||||
because the SCAN function has done this already.")
|
||||
:documentation "If we can avoid testing for this
|
||||
string because the SCAN function has done this already.")
|
||||
(start-of-end-string-p :initform nil
|
||||
:accessor start-of-end-string-p
|
||||
:documentation "If this is the unique STR which
|
||||
starts END-STRING (a slot of MATCHER)."))
|
||||
:documentation "If this is the unique
|
||||
STR which starts END-STRING (a slot of MATCHER)."))
|
||||
(:documentation "STR objects represent string."))
|
||||
|
||||
(defclass anchor (regex)
|
||||
@ -218,8 +222,8 @@ i.e. no word-boundary."))
|
||||
(defclass branch (regex)
|
||||
((test :initarg :test
|
||||
:accessor test
|
||||
:documentation "The test of this branch, one of LOOKAHEAD,
|
||||
LOOKBEHIND, or a number.")
|
||||
:documentation "The test of this branch, one of
|
||||
LOOKAHEAD, LOOKBEHIND, or a number.")
|
||||
(then-regex :initarg :then-regex
|
||||
:accessor then-regex
|
||||
:documentation "The regex that's to be matched if the
|
||||
@ -232,22 +236,40 @@ test fails."))
|
||||
(:documentation "BRANCH objects represent Perl's conditional regular
|
||||
expressions."))
|
||||
|
||||
(defclass filter (regex)
|
||||
((fn :initarg :fn
|
||||
:accessor fn
|
||||
:type (or function symbol)
|
||||
:documentation "The user-defined function.")
|
||||
(len :initarg :len
|
||||
:reader len
|
||||
:documentation "The fixed length of this filter or NIL."))
|
||||
(:documentation "FILTER objects represent arbitrary functions
|
||||
defined by the user."))
|
||||
|
||||
(defclass void (regex)
|
||||
()
|
||||
(:documentation "VOID objects represent empty regular expressions.")))
|
||||
(:documentation "VOID objects represent empty regular expressions."))))
|
||||
|
||||
(declaim (ftype (function (t) simple-string) str))
|
||||
(defmethod initialize-instance :after ((char-class char-class) &rest init-args)
|
||||
(declare #.*standard-optimize-settings*)
|
||||
"Make large hash tables smaller, if possible."
|
||||
(let ((hash (getf init-args :hash)))
|
||||
(when (and hash
|
||||
(> *regex-char-code-limit* 256)
|
||||
(> (hash-table-count hash)
|
||||
(/ *regex-char-code-limit* 2)))
|
||||
(setf (slot-value char-class 'hash)
|
||||
(merge-inverted-hash (make-hash-table)
|
||||
hash)
|
||||
(slot-value char-class 'invertedp)
|
||||
(not (slot-value char-class 'invertedp))))))
|
||||
|
||||
;;; The following four methods allow a VOID object to behave like a
|
||||
;;; zero-length STR object (only readers needed)
|
||||
|
||||
(defmethod initialize-instance :after ((str str) &rest init-args)
|
||||
(declare (optimize speed
|
||||
(safety 0)
|
||||
(space 0)
|
||||
(debug 0)
|
||||
(compilation-speed 0)
|
||||
#+:lispworks (hcl:fixnum-safety 0)))
|
||||
(declare #.*standard-optimize-settings*)
|
||||
(declare (ignore init-args))
|
||||
"Automatically computes the length of a STR after initialization."
|
||||
(let ((str-slot (slot-value str 'str)))
|
||||
@ -256,48 +278,23 @@ expressions."))
|
||||
(setf (len str) (length (str str))))
|
||||
|
||||
(defmethod len ((void void))
|
||||
(declare (optimize speed
|
||||
(safety 0)
|
||||
(space 0)
|
||||
(debug 0)
|
||||
(compilation-speed 0)
|
||||
#+:lispworks (hcl:fixnum-safety 0)))
|
||||
(declare #.*standard-optimize-settings*)
|
||||
0)
|
||||
|
||||
(defmethod str ((void void))
|
||||
(declare (optimize speed
|
||||
(safety 0)
|
||||
(space 0)
|
||||
(debug 0)
|
||||
(compilation-speed 0)
|
||||
#+:lispworks (hcl:fixnum-safety 0)))
|
||||
(declare #.*standard-optimize-settings*)
|
||||
"")
|
||||
|
||||
(defmethod skip ((void void))
|
||||
(declare (optimize speed
|
||||
(safety 0)
|
||||
(space 0)
|
||||
(debug 0)
|
||||
(compilation-speed 0)
|
||||
#+:lispworks (hcl:fixnum-safety 0)))
|
||||
(declare #.*standard-optimize-settings*)
|
||||
nil)
|
||||
|
||||
(defmethod start-of-end-string-p ((void void))
|
||||
(declare (optimize speed
|
||||
(safety 0)
|
||||
(space 0)
|
||||
(debug 0)
|
||||
(compilation-speed 0)
|
||||
#+:lispworks (hcl:fixnum-safety 0)))
|
||||
(declare #.*standard-optimize-settings*)
|
||||
nil)
|
||||
|
||||
(defgeneric case-mode (regex old-case-mode)
|
||||
(declare (optimize speed
|
||||
(safety 0)
|
||||
(space 0)
|
||||
(debug 0)
|
||||
(compilation-speed 0)
|
||||
#+:lispworks (hcl:fixnum-safety 0)))
|
||||
(declare #.*standard-optimize-settings*)
|
||||
(:documentation "Utility function used by the optimizer (see GATHER-STRINGS).
|
||||
Returns a keyword denoting the case-(in)sensitivity of a STR or its
|
||||
second argument if the STR has length 0. Returns NIL for REGEX objects
|
||||
@ -316,12 +313,7 @@ which are not of type STR."))
|
||||
nil)
|
||||
|
||||
(defgeneric copy-regex (regex)
|
||||
(declare (optimize speed
|
||||
(safety 0)
|
||||
(space 0)
|
||||
(debug 0)
|
||||
(compilation-speed 0)
|
||||
#+:lispworks (hcl:fixnum-safety 0)))
|
||||
(declare #.*standard-optimize-settings*)
|
||||
(:documentation "Implements a deep copy of a REGEX object."))
|
||||
|
||||
(defmethod copy-regex ((anchor anchor))
|
||||
@ -406,6 +398,11 @@ which are not of type STR."))
|
||||
:str (str str)
|
||||
:case-insensitive-p (case-insensitive-p str)))
|
||||
|
||||
(defmethod copy-regex ((filter filter))
|
||||
(make-instance 'filter
|
||||
:fn (fn filter)
|
||||
:len (len filter)))
|
||||
|
||||
;;; Note that COPY-REGEX and REMOVE-REGISTERS could have easily been
|
||||
;;; wrapped into one function. Maybe in the next release...
|
||||
|
||||
@ -417,12 +414,7 @@ which are not of type STR."))
|
||||
;;; and therefore we stop REGISTER removal once we see an ALTERNATION.
|
||||
|
||||
(defgeneric remove-registers (regex)
|
||||
(declare (optimize speed
|
||||
(safety 0)
|
||||
(space 0)
|
||||
(debug 0)
|
||||
(compilation-speed 0)
|
||||
#+:lispworks (hcl:fixnum-safety 0)))
|
||||
(declare #.*standard-optimize-settings*)
|
||||
(:documentation "Returns a deep copy of a REGEX (see COPY-REGEX) and
|
||||
optionally removes embedded REGISTER objects if possible and if the
|
||||
special variable REMOVE-REGISTERS-P is true."))
|
||||
@ -491,12 +483,7 @@ special variable REMOVE-REGISTERS-P is true."))
|
||||
:elements (mapcar #'remove-registers (elements seq))))
|
||||
|
||||
(defgeneric everythingp (regex)
|
||||
(declare (optimize speed
|
||||
(safety 0)
|
||||
(space 0)
|
||||
(debug 0)
|
||||
(compilation-speed 0)
|
||||
#+:lispworks (hcl:fixnum-safety 0)))
|
||||
(declare #.*standard-optimize-settings*)
|
||||
(:documentation "Returns an EVERYTHING object if REGEX is equivalent
|
||||
to this object, otherwise NIL. So, \"(.){1}\" would return true
|
||||
(i.e. the object corresponding to \".\", for example."))
|
||||
@ -539,16 +526,11 @@ to this object, otherwise NIL. So, \"(.){1}\" would return true
|
||||
|
||||
(defmethod everythingp ((regex regex))
|
||||
;; the general case for ANCHOR, BACK-REFERENCE, BRANCH, CHAR-CLASS,
|
||||
;; LOOKAHEAD, LOOKBEHIND, STR, VOID, and WORD-BOUNDARY
|
||||
;; LOOKAHEAD, LOOKBEHIND, STR, VOID, FILTER, and WORD-BOUNDARY
|
||||
nil)
|
||||
|
||||
(defgeneric regex-length (regex)
|
||||
(declare (optimize speed
|
||||
(safety 0)
|
||||
(space 0)
|
||||
(debug 0)
|
||||
(compilation-speed 0)
|
||||
#+:lispworks (hcl:fixnum-safety 0)))
|
||||
(declare #.*standard-optimize-settings*)
|
||||
(:documentation "Return the length of REGEX if it is fixed, NIL otherwise."))
|
||||
|
||||
(defmethod regex-length ((seq seq))
|
||||
@ -586,7 +568,7 @@ to this object, otherwise NIL. So, \"(.){1}\" would return true
|
||||
(maximum maximum))
|
||||
repetition
|
||||
(if (and len
|
||||
(eq minimum maximum))
|
||||
(eql minimum maximum))
|
||||
(* minimum len)
|
||||
nil)))
|
||||
|
||||
@ -610,18 +592,16 @@ to this object, otherwise NIL. So, \"(.){1}\" would return true
|
||||
(defmethod regex-length ((str str))
|
||||
(len str))
|
||||
|
||||
(defmethod regex-length ((filter filter))
|
||||
(len filter))
|
||||
|
||||
(defmethod regex-length ((regex regex))
|
||||
;; the general case for ANCHOR, LOOKAHEAD, LOOKBEHIND, VOID, and
|
||||
;; WORD-BOUNDARY (which all have zero-length)
|
||||
0)
|
||||
|
||||
(defgeneric regex-min-length (regex)
|
||||
(declare (optimize speed
|
||||
(safety 0)
|
||||
(space 0)
|
||||
(debug 0)
|
||||
(compilation-speed 0)
|
||||
#+:lispworks (hcl:fixnum-safety 0)))
|
||||
(declare #.*standard-optimize-settings*)
|
||||
(:documentation "Returns the minimal length of REGEX."))
|
||||
|
||||
(defmethod regex-min-length ((seq seq))
|
||||
@ -662,18 +642,17 @@ to this object, otherwise NIL. So, \"(.){1}\" would return true
|
||||
(defmethod regex-min-length ((str str))
|
||||
(len str))
|
||||
|
||||
(defmethod regex-min-length ((filter filter))
|
||||
(or (len filter)
|
||||
0))
|
||||
|
||||
(defmethod regex-min-length ((regex regex))
|
||||
;; the general case for ANCHOR, BACK-REFERENCE, LOOKAHEAD,
|
||||
;; LOOKBEHIND, VOID, and WORD-BOUNDARY
|
||||
0)
|
||||
|
||||
(defgeneric compute-offsets (regex start-pos)
|
||||
(declare (optimize speed
|
||||
(safety 0)
|
||||
(space 0)
|
||||
(debug 0)
|
||||
(compilation-speed 0)
|
||||
#+:lispworks (hcl:fixnum-safety 0)))
|
||||
(declare #.*standard-optimize-settings*)
|
||||
(:documentation "Returns the offset the following regex would have
|
||||
relative to START-POS or NIL if we can't compute it. Sets the OFFSET
|
||||
slot of REGEX to START-POS if REGEX is a STR. May also affect OFFSET
|
||||
@ -746,6 +725,12 @@ slots of STR objects further down the tree."))
|
||||
(declare (ignore start-pos))
|
||||
nil)
|
||||
|
||||
(defmethod compute-offsets ((filter filter) start-pos)
|
||||
(let ((len (len filter)))
|
||||
(if len
|
||||
(+ start-pos len)
|
||||
nil)))
|
||||
|
||||
(defmethod compute-offsets ((regex regex) start-pos)
|
||||
;; the general case for ANCHOR, LOOKAHEAD, LOOKBEHIND, VOID, and
|
||||
;; WORD-BOUNDARY (which all have zero-length)
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: CL-PPCRE; Base: 10 -*-
|
||||
;;; $Header: /home/manuel/bknr-cvs/cvs/thirdparty/cl-ppcre/repetition-closures.lisp,v 1.1 2004/06/23 08:27:10 hans Exp $
|
||||
;;; $Header: /usr/local/cvsrep/cl-ppcre/repetition-closures.lisp,v 1.24 2005/04/13 15:35:58 edi Exp $
|
||||
|
||||
;;; This is actually a part of closures.lisp which we put into a
|
||||
;;; separate file because it is rather complex. We only deal with
|
||||
@ -7,7 +7,7 @@
|
||||
;;; rather crazy micro-optimizations which were introduced to be as
|
||||
;;; competitive with Perl as possible in tight loops.
|
||||
|
||||
;;; Copyright (c) 2002-2003, Dr. Edmund Weitz. All rights reserved.
|
||||
;;; Copyright (c) 2002-2005, Dr. Edmund Weitz. All rights reserved.
|
||||
|
||||
;;; Redistribution and use in source and binary forms, with or without
|
||||
;;; modification, are permitted provided that the following conditions
|
||||
@ -117,12 +117,7 @@ repetition matches at CURR-POS."
|
||||
(go backward-loop)))))))
|
||||
|
||||
(defun create-greedy-everything-matcher (maximum min-rest next-fn)
|
||||
(declare (optimize speed
|
||||
(safety 0)
|
||||
(space 0)
|
||||
(debug 0)
|
||||
(compilation-speed 0)
|
||||
#+:lispworks (hcl:fixnum-safety 0)))
|
||||
(declare #.*standard-optimize-settings*)
|
||||
(declare (type fixnum min-rest)
|
||||
(type function next-fn))
|
||||
"Creates a closure which just matches as far ahead as possible,
|
||||
@ -149,18 +144,16 @@ i.e. a closure for a dot in single-line mode."
|
||||
(loop for curr-pos of-type fixnum from target-end-pos downto start-pos
|
||||
thereis (funcall next-fn curr-pos))))))
|
||||
|
||||
(defmethod create-greedy-constant-length-matcher ((repetition repetition)
|
||||
next-fn)
|
||||
(declare (optimize speed
|
||||
(safety 0)
|
||||
(space 0)
|
||||
(debug 0)
|
||||
(compilation-speed 0)
|
||||
#+:lispworks (hcl:fixnum-safety 0)))
|
||||
"Creates a closure which tries to match REPETITION. It is assumed
|
||||
(defgeneric create-greedy-constant-length-matcher (repetition next-fn)
|
||||
(declare #.*standard-optimize-settings*)
|
||||
(:documentation "Creates a closure which tries to match REPETITION. It is assumed
|
||||
that REPETITION is greedy and the minimal number of repetitions is
|
||||
zero. It is furthermore assumed that the inner regex of REPETITION is
|
||||
of fixed length and doesn't contain registers."
|
||||
of fixed length and doesn't contain registers."))
|
||||
|
||||
(defmethod create-greedy-constant-length-matcher ((repetition repetition)
|
||||
next-fn)
|
||||
(declare #.*standard-optimize-settings*)
|
||||
(let ((len (len repetition))
|
||||
(maximum (maximum repetition))
|
||||
(regex (regex repetition))
|
||||
@ -213,18 +206,16 @@ of fixed length and doesn't contain registers."
|
||||
(greedy-constant-length-closure
|
||||
(funcall inner-matcher curr-pos)))))))))
|
||||
|
||||
(defmethod create-greedy-no-zero-matcher ((repetition repetition) next-fn)
|
||||
(declare (optimize speed
|
||||
(safety 0)
|
||||
(space 0)
|
||||
(debug 0)
|
||||
(compilation-speed 0)
|
||||
#+:lispworks (hcl:fixnum-safety 0)))
|
||||
"Creates a closure which tries to match REPETITION. It is assumed
|
||||
(defgeneric create-greedy-no-zero-matcher (repetition next-fn)
|
||||
(declare #.*standard-optimize-settings*)
|
||||
(:documentation "Creates a closure which tries to match REPETITION. It is assumed
|
||||
that REPETITION is greedy and the minimal number of repetitions is
|
||||
zero. It is furthermore assumed that the inner regex of REPETITION can
|
||||
never match a zero-length string (or instead the maximal number of
|
||||
repetitions is 1)."
|
||||
repetitions is 1)."))
|
||||
|
||||
(defmethod create-greedy-no-zero-matcher ((repetition repetition) next-fn)
|
||||
(declare #.*standard-optimize-settings*)
|
||||
(let ((maximum (maximum repetition))
|
||||
;; REPEAT-MATCHER is part of the closure's environment but it
|
||||
;; can only be defined after GREEDY-AUX is defined
|
||||
@ -283,16 +274,14 @@ repetitions is 1)."
|
||||
(create-matcher-aux (regex repetition) #'greedy-aux))
|
||||
#'greedy-aux)))))
|
||||
|
||||
(defmethod create-greedy-matcher ((repetition repetition) next-fn)
|
||||
(declare (optimize speed
|
||||
(safety 0)
|
||||
(space 0)
|
||||
(debug 0)
|
||||
(compilation-speed 0)
|
||||
#+:lispworks (hcl:fixnum-safety 0)))
|
||||
"Creates a closure which tries to match REPETITION. It is assumed
|
||||
(defgeneric create-greedy-matcher (repetition next-fn)
|
||||
(declare #.*standard-optimize-settings*)
|
||||
(:documentation "Creates a closure which tries to match REPETITION. It is assumed
|
||||
that REPETITION is greedy and the minimal number of repetitions is
|
||||
zero."
|
||||
zero."))
|
||||
|
||||
(defmethod create-greedy-matcher ((repetition repetition) next-fn)
|
||||
(declare #.*standard-optimize-settings*)
|
||||
(let ((maximum (maximum repetition))
|
||||
;; we make a reservation for our slot in *LAST-POS-STORES* because
|
||||
;; we have to watch out for endless loops as the inner regex might
|
||||
@ -409,17 +398,15 @@ repetition matches at CURR-POS."
|
||||
while ,check-curr-pos
|
||||
finally (return (funcall next-fn curr-pos)))))))
|
||||
|
||||
(defmethod create-non-greedy-constant-length-matcher ((repetition repetition) next-fn)
|
||||
(declare (optimize speed
|
||||
(safety 0)
|
||||
(space 0)
|
||||
(debug 0)
|
||||
(compilation-speed 0)
|
||||
#+:lispworks (hcl:fixnum-safety 0)))
|
||||
"Creates a closure which tries to match REPETITION. It is assumed
|
||||
(defgeneric create-non-greedy-constant-length-matcher (repetition next-fn)
|
||||
(declare #.*standard-optimize-settings*)
|
||||
(:documentation "Creates a closure which tries to match REPETITION. It is assumed
|
||||
that REPETITION is non-greedy and the minimal number of repetitions is
|
||||
zero. It is furthermore assumed that the inner regex of REPETITION is
|
||||
of fixed length and doesn't contain registers."
|
||||
of fixed length and doesn't contain registers."))
|
||||
|
||||
(defmethod create-non-greedy-constant-length-matcher ((repetition repetition) next-fn)
|
||||
(declare #.*standard-optimize-settings*)
|
||||
(let ((len (len repetition))
|
||||
(maximum (maximum repetition))
|
||||
(regex (regex repetition))
|
||||
@ -475,18 +462,16 @@ of fixed length and doesn't contain registers."
|
||||
(non-greedy-constant-length-closure
|
||||
(funcall inner-matcher curr-pos)))))))))
|
||||
|
||||
(defmethod create-non-greedy-no-zero-matcher ((repetition repetition) next-fn)
|
||||
(declare (optimize speed
|
||||
(safety 0)
|
||||
(space 0)
|
||||
(debug 0)
|
||||
(compilation-speed 0)
|
||||
#+:lispworks (hcl:fixnum-safety 0)))
|
||||
"Creates a closure which tries to match REPETITION. It is assumed
|
||||
(defgeneric create-non-greedy-no-zero-matcher (repetition next-fn)
|
||||
(declare #.*standard-optimize-settings*)
|
||||
(:documentation "Creates a closure which tries to match REPETITION. It is assumed
|
||||
that REPETITION is non-greedy and the minimal number of repetitions is
|
||||
zero. It is furthermore assumed that the inner regex of REPETITION can
|
||||
never match a zero-length string (or instead the maximal number of
|
||||
repetitions is 1)."
|
||||
repetitions is 1)."))
|
||||
|
||||
(defmethod create-non-greedy-no-zero-matcher ((repetition repetition) next-fn)
|
||||
(declare #.*standard-optimize-settings*)
|
||||
(let ((maximum (maximum repetition))
|
||||
;; REPEAT-MATCHER is part of the closure's environment but it
|
||||
;; can only be defined after NON-GREEDY-AUX is defined
|
||||
@ -543,16 +528,14 @@ repetitions is 1)."
|
||||
(create-matcher-aux (regex repetition) #'non-greedy-aux))
|
||||
#'non-greedy-aux)))))
|
||||
|
||||
(defmethod create-non-greedy-matcher ((repetition repetition) next-fn)
|
||||
(declare (optimize speed
|
||||
(safety 0)
|
||||
(space 0)
|
||||
(debug 0)
|
||||
(compilation-speed 0)
|
||||
#+:lispworks (hcl:fixnum-safety 0)))
|
||||
"Creates a closure which tries to match REPETITION. It is assumed
|
||||
(defgeneric create-non-greedy-matcher (repetition next-fn)
|
||||
(declare #.*standard-optimize-settings*)
|
||||
(:documentation "Creates a closure which tries to match REPETITION. It is assumed
|
||||
that REPETITION is non-greedy and the minimal number of repetitions is
|
||||
zero."
|
||||
zero."))
|
||||
|
||||
(defmethod create-non-greedy-matcher ((repetition repetition) next-fn)
|
||||
(declare #.*standard-optimize-settings*)
|
||||
;; we make a reservation for our slot in *LAST-POS-STORES* because
|
||||
;; we have to watch out for endless loops as the inner regex might
|
||||
;; match zero-length strings
|
||||
@ -656,18 +639,17 @@ of the repetition matches at CURR-POS."
|
||||
;; finally call NEXT-FN if we made it that far
|
||||
(funcall next-fn target-end-pos)))))
|
||||
|
||||
(defmethod create-constant-repetition-constant-length-matcher
|
||||
((repetition repetition) next-fn)
|
||||
(declare (optimize speed
|
||||
(safety 0)
|
||||
(space 0)
|
||||
(debug 0)
|
||||
(compilation-speed 0)
|
||||
#+:lispworks (hcl:fixnum-safety 0)))
|
||||
"Creates a closure which tries to match REPETITION. It is assumed
|
||||
(defgeneric create-constant-repetition-constant-length-matcher
|
||||
(repetition next-fn)
|
||||
(declare #.*standard-optimize-settings*)
|
||||
(:documentation "Creates a closure which tries to match REPETITION. It is assumed
|
||||
that REPETITION has a constant number of repetitions. It is
|
||||
furthermore assumed that the inner regex of REPETITION is of fixed
|
||||
length and doesn't contain registers."
|
||||
length and doesn't contain registers."))
|
||||
|
||||
(defmethod create-constant-repetition-constant-length-matcher
|
||||
((repetition repetition) next-fn)
|
||||
(declare #.*standard-optimize-settings*)
|
||||
(let ((len (len repetition))
|
||||
(repetitions (minimum repetition))
|
||||
(regex (regex repetition)))
|
||||
@ -721,7 +703,7 @@ length and doesn't contain registers."
|
||||
(declare (type fixnum start-pos))
|
||||
(let ((next-pos (+ start-pos repetitions)))
|
||||
(declare (type fixnum next-pos))
|
||||
(or (<= next-pos *end-pos*)
|
||||
(and (<= next-pos *end-pos*)
|
||||
(funcall next-fn next-pos))))
|
||||
;; a dot which is not in single-line-mode - make sure we
|
||||
;; don't match #\Newline
|
||||
@ -736,15 +718,13 @@ length and doesn't contain registers."
|
||||
(constant-repetition-constant-length-closure
|
||||
(funcall inner-matcher curr-pos))))))))
|
||||
|
||||
(defgeneric create-constant-repetition-matcher (repetition next-fn)
|
||||
(declare #.*standard-optimize-settings*)
|
||||
(:documentation "Creates a closure which tries to match REPETITION. It is assumed
|
||||
that REPETITION has a constant number of repetitions."))
|
||||
|
||||
(defmethod create-constant-repetition-matcher ((repetition repetition) next-fn)
|
||||
(declare (optimize speed
|
||||
(safety 0)
|
||||
(space 0)
|
||||
(debug 0)
|
||||
(compilation-speed 0)
|
||||
#+:lispworks (hcl:fixnum-safety 0)))
|
||||
"Creates a closure which tries to match REPETITION. It is assumed
|
||||
that REPETITION has a constant number of repetitions."
|
||||
(declare #.*standard-optimize-settings*)
|
||||
(let ((repetitions (minimum repetition))
|
||||
;; we make a reservation for our slot in *REPEAT-COUNTERS*
|
||||
;; because we need to keep track of the number of repetitions
|
||||
|
||||
50
scanner.lisp
50
scanner.lisp
@ -1,10 +1,10 @@
|
||||
;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: CL-PPCRE; Base: 10 -*-
|
||||
;;; $Header: /home/manuel/bknr-cvs/cvs/thirdparty/cl-ppcre/scanner.lisp,v 1.1 2004/06/23 08:27:10 hans Exp $
|
||||
;;; $Header: /usr/local/cvsrep/cl-ppcre/scanner.lisp,v 1.26 2005/07/19 23:18:15 edi Exp $
|
||||
|
||||
;;; Here the scanner for the actual regex as well as utility scanners
|
||||
;;; for the constant start and end strings are created.
|
||||
|
||||
;;; Copyright (c) 2002-2003, Dr. Edmund Weitz. All rights reserved.
|
||||
;;; Copyright (c) 2002-2005, Dr. Edmund Weitz. All rights reserved.
|
||||
|
||||
;;; Redistribution and use in source and binary forms, with or without
|
||||
;;; modification, are permitted provided that the following conditions
|
||||
@ -37,7 +37,8 @@
|
||||
(let ((char-compare (if case-insensitive-p 'char-equal 'char=)))
|
||||
`(lambda (start-pos)
|
||||
(declare (type fixnum start-pos))
|
||||
(if (> (the fixnum (+ start-pos m)) *end-pos*)
|
||||
(if (or (minusp start-pos)
|
||||
(> (the fixnum (+ start-pos m)) *end-pos*))
|
||||
nil
|
||||
(loop named bmh-matcher
|
||||
for k of-type fixnum = (+ start-pos m -1)
|
||||
@ -52,12 +53,7 @@
|
||||
(return-from bmh-matcher (1+ i)))))))))
|
||||
|
||||
(defun create-bmh-matcher (pattern case-insensitive-p)
|
||||
(declare (optimize speed
|
||||
(safety 0)
|
||||
(space 0)
|
||||
(debug 0)
|
||||
(compilation-speed 0)
|
||||
#+:lispworks (hcl:fixnum-safety 0)))
|
||||
(declare #.*standard-optimize-settings*)
|
||||
"Returns a Boyer-Moore-Horspool matcher which searches the (special)
|
||||
simple-string *STRING* for the first occurence of the substring
|
||||
PATTERN. The search starts at the position START-POS within *STRING*
|
||||
@ -72,11 +68,12 @@ instead. (BMH matchers are faster but need much more space.)"
|
||||
(return-from create-bmh-matcher
|
||||
(lambda (start-pos)
|
||||
(declare (type fixnum start-pos))
|
||||
(and (not (minusp start-pos))
|
||||
(search pattern
|
||||
*string*
|
||||
:start2 start-pos
|
||||
:end2 *end-pos*
|
||||
:test test)))))
|
||||
:test test))))))
|
||||
(let* ((m (length pattern))
|
||||
(skip (make-array *regex-char-code-limit*
|
||||
:element-type 'fixnum
|
||||
@ -97,16 +94,12 @@ instead. (BMH matchers are faster but need much more space.)"
|
||||
(let ((char-compare (if case-insensitive-p 'char-equal 'char=)))
|
||||
`(lambda (start-pos)
|
||||
(declare (type fixnum start-pos))
|
||||
(and (not (minusp start-pos))
|
||||
(loop for i of-type fixnum from start-pos below *end-pos*
|
||||
thereis (and (,char-compare (schar *string* i) chr) i)))))
|
||||
thereis (and (,char-compare (schar *string* i) chr) i))))))
|
||||
|
||||
(defun create-char-searcher (chr case-insensitive-p)
|
||||
(declare (optimize speed
|
||||
(safety 0)
|
||||
(space 0)
|
||||
(debug 0)
|
||||
(compilation-speed 0)
|
||||
#+:lispworks (hcl:fixnum-safety 0)))
|
||||
(declare #.*standard-optimize-settings*)
|
||||
"Returns a function which searches the (special) simple-string
|
||||
*STRING* for the first occurence of the character CHR. The search
|
||||
starts at the position START-POS within *STRING* and stops before
|
||||
@ -119,17 +112,16 @@ case-insensitive or not."
|
||||
(declaim (inline newline-skipper))
|
||||
|
||||
(defun newline-skipper (start-pos)
|
||||
(declare (optimize speed
|
||||
(safety 0)
|
||||
(space 0)
|
||||
(debug 0)
|
||||
(compilation-speed 0)
|
||||
#+:lispworks (hcl:fixnum-safety 0)))
|
||||
(declare #.*standard-optimize-settings*)
|
||||
(declare (type fixnum start-pos))
|
||||
"Find the next occurence of a character in *STRING* which is behind
|
||||
a #\Newline."
|
||||
(loop for i of-type fixnum from start-pos below *end-pos*
|
||||
thereis (and (char= (schar *string* i) #\Newline)
|
||||
;; we can start with (1- START-POS) without testing for (PLUSP
|
||||
;; START-POS) because we know we'll never call NEWLINE-SKIPPER on
|
||||
;; the first iteration
|
||||
(loop for i of-type fixnum from (1- start-pos) below *end-pos*
|
||||
thereis (and (char= (schar *string* i)
|
||||
#\Newline)
|
||||
(1+ i))))
|
||||
|
||||
(defmacro insert-advance-fn (advance-fn)
|
||||
@ -198,6 +190,7 @@ ADVANCE-FN. This is a utility macro used by CREATE-SCANNER-AUX."
|
||||
(unless (setq *end-string-pos* (funcall end-string-test
|
||||
end-test-pos))
|
||||
(when (and (= 1 (the fixnum end-anchored-p))
|
||||
(> *end-pos* scan-start-pos)
|
||||
(char= #\Newline (schar *string* (1- *end-pos*))))
|
||||
;; if we didn't find an end string candidate from
|
||||
;; END-TEST-POS and if a #\Newline at the end is
|
||||
@ -328,12 +321,7 @@ ADVANCE-FN. This is a utility macro used by CREATE-SCANNER-AUX."
|
||||
rep-num
|
||||
zero-length-num
|
||||
reg-num)
|
||||
(declare (optimize speed
|
||||
(safety 0)
|
||||
(space 0)
|
||||
(debug 0)
|
||||
(compilation-speed 0)
|
||||
#+:lispworks (hcl:fixnum-safety 0)))
|
||||
(declare #.*standard-optimize-settings*)
|
||||
(declare (type fixnum min-len zero-length-num rep-num reg-num))
|
||||
"Auxiliary function to create and return a scanner \(which is
|
||||
actually a closure). Used by CREATE-SCANNER."
|
||||
|
||||
@ -1,9 +1,9 @@
|
||||
;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: CL-PPCRE; Base: 10 -*-
|
||||
;;; $Header: /home/manuel/bknr-cvs/cvs/thirdparty/cl-ppcre/specials.lisp,v 1.1 2004/06/23 08:27:10 hans Exp $
|
||||
;;; $Header: /usr/local/cvsrep/cl-ppcre/specials.lisp,v 1.21 2005/04/01 21:29:10 edi Exp $
|
||||
|
||||
;;; globally declared special variables
|
||||
|
||||
;;; Copyright (c) 2002-2003, Dr. Edmund Weitz. All rights reserved.
|
||||
;;; Copyright (c) 2002-2005, Dr. Edmund Weitz. All rights reserved.
|
||||
|
||||
;;; Redistribution and use in source and binary forms, with or without
|
||||
;;; modification, are permitted provided that the following conditions
|
||||
@ -31,6 +31,22 @@
|
||||
|
||||
(in-package #:cl-ppcre)
|
||||
|
||||
;;; special variables used to effect declarations
|
||||
|
||||
(defvar *standard-optimize-settings*
|
||||
'(optimize
|
||||
speed
|
||||
(safety 0)
|
||||
(space 0)
|
||||
(debug 1)
|
||||
(compilation-speed 0)
|
||||
#+:lispworks (hcl:fixnum-safety 0))
|
||||
"The standard optimize settings used by most declaration expressions.")
|
||||
|
||||
(defvar *special-optimize-settings*
|
||||
'(optimize speed space)
|
||||
"Special optimize settings used only be a few declaration expressions.")
|
||||
|
||||
;;; special variables used by the lexer/parser combo
|
||||
|
||||
(defvar *extended-mode-p* nil
|
||||
@ -105,3 +121,22 @@ but large) Boyer-Moore-Horspool matchers.")
|
||||
"Whether the parser should support Perl's \\Q and \\E.")
|
||||
|
||||
(pushnew :cl-ppcre *features*)
|
||||
|
||||
;; stuff for Nikodemus Siivola's HYPERDOC
|
||||
;; see <http://common-lisp.net/project/hyperdoc/>
|
||||
;; and <http://www.cliki.net/hyperdoc>
|
||||
|
||||
(defvar *hyperdoc-base-uri* "http://weitz.de/cl-ppcre/")
|
||||
|
||||
(let ((exported-symbols-alist
|
||||
(loop for symbol being the external-symbols of :cl-ppcre
|
||||
collect (cons symbol
|
||||
(concatenate 'string
|
||||
"#"
|
||||
(string-downcase symbol))))))
|
||||
(defun hyperdoc-lookup (symbol type)
|
||||
(declare (ignore type))
|
||||
(cdr (assoc symbol
|
||||
exported-symbols-alist
|
||||
:test #'eq))))
|
||||
|
||||
|
||||
88
util.lisp
88
util.lisp
@ -1,5 +1,5 @@
|
||||
;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: CL-PPCRE; Base: 10 -*-
|
||||
;;; $Header: /home/manuel/bknr-cvs/cvs/thirdparty/cl-ppcre/util.lisp,v 1.1 2004/06/23 08:27:10 hans Exp $
|
||||
;;; $Header: /usr/local/cvsrep/cl-ppcre/util.lisp,v 1.32 2005/08/23 10:32:30 edi Exp $
|
||||
|
||||
;;; Utility functions and constants dealing with the hash-tables
|
||||
;;; we use to encode character classes
|
||||
@ -7,7 +7,7 @@
|
||||
;;; Hash-tables are treated like sets, i.e. a character C is a member of the
|
||||
;;; hash-table H iff (GETHASH C H) is true.
|
||||
|
||||
;;; Copyright (c) 2002-2003, Dr. Edmund Weitz. All rights reserved.
|
||||
;;; Copyright (c) 2002-2005, Dr. Edmund Weitz. All rights reserved.
|
||||
|
||||
;;; Redistribution and use in source and binary forms, with or without
|
||||
;;; modification, are permitted provided that the following conditions
|
||||
@ -35,6 +35,10 @@
|
||||
|
||||
(in-package #:cl-ppcre)
|
||||
|
||||
#+:lispworks
|
||||
(import 'lw:with-unique-names)
|
||||
|
||||
#-:lispworks
|
||||
(defmacro with-unique-names ((&rest bindings) &body body)
|
||||
"Syntax: WITH-UNIQUE-NAMES ( { var | (var x) }* ) declaration* form*
|
||||
|
||||
@ -65,8 +69,14 @@ are discarded \(that is, the body is an implicit PROGN)."
|
||||
bindings)
|
||||
,@body))
|
||||
|
||||
(defmacro rebinding (bindings &body body)
|
||||
"REBINDING ( { var | (var prefix) }* ) form*
|
||||
#+:lispworks
|
||||
(eval-when (:compile-toplevel :load-toplevel :execute)
|
||||
(setf (macro-function 'with-rebinding)
|
||||
(macro-function 'lw:rebinding)))
|
||||
|
||||
#-:lispworks
|
||||
(defmacro with-rebinding (bindings &body body)
|
||||
"WITH-REBINDING ( { var | (var prefix) }* ) form*
|
||||
|
||||
Evaluates a series of forms in the lexical environment that is
|
||||
formed by adding the binding of each VAR to a fresh, uninterned
|
||||
@ -94,14 +104,14 @@ are discarded \(that is, the body is an implicit PROGN)."
|
||||
|
||||
(eval-when (:compile-toplevel :execute :load-toplevel)
|
||||
(defvar *regex-char-code-limit* char-code-limit
|
||||
"The upper exclusive bound on the char-codes of characters
|
||||
which can occur in character classes.
|
||||
Change this value BEFORE creating scanners if you don't need
|
||||
the full Unicode support of LW, ACL, or CLISP.")
|
||||
"The upper exclusive bound on the char-codes of characters which
|
||||
can occur in character classes. Change this value BEFORE creating
|
||||
scanners if you don't need the Unicode support of implementations like
|
||||
AllegroCL, CLISP, LispWorks, or SBCL.")
|
||||
(declaim (type fixnum *regex-char-code-limit*))
|
||||
|
||||
(defun make-char-hash (test)
|
||||
(declare (optimize speed space))
|
||||
(declare #.*special-optimize-settings*)
|
||||
"Returns a hash-table of all characters satisfying test."
|
||||
(loop with hash = (make-hash-table)
|
||||
for c of-type fixnum from 0 below char-code-limit
|
||||
@ -113,12 +123,7 @@ the full Unicode support of LW, ACL, or CLISP.")
|
||||
(declaim (inline word-char-p))
|
||||
|
||||
(defun word-char-p (chr)
|
||||
(declare (optimize speed
|
||||
(safety 0)
|
||||
(space 0)
|
||||
(debug 0)
|
||||
(compilation-speed 0)
|
||||
#+:lispworks (hcl:fixnum-safety 0)))
|
||||
(declare #.*standard-optimize-settings*)
|
||||
"Tests whether a character is a \"word\" character.
|
||||
In the ASCII charset this is equivalent to a-z, A-Z, 0-9, or _,
|
||||
i.e. the same as Perl's [\\w]."
|
||||
@ -134,7 +139,7 @@ i.e. the same as Perl's [\\w]."
|
||||
Same as Perl's [\\s]."))
|
||||
|
||||
(defun whitespacep (chr)
|
||||
(declare (optimize speed space))
|
||||
(declare #.*special-optimize-settings*)
|
||||
"Tests whether a character is whitespace,
|
||||
i.e. whether it would match [\\s] in Perl."
|
||||
(find chr +whitespace-char-string+ :test #'char=)))
|
||||
@ -158,12 +163,7 @@ i.e. whether it would match [\\s] in Perl."
|
||||
"Hash-table containing all whitespace characters."))
|
||||
|
||||
(defun merge-hash (hash1 hash2)
|
||||
(declare (optimize speed
|
||||
(safety 0)
|
||||
(space 0)
|
||||
(debug 0)
|
||||
(compilation-speed 0)
|
||||
#+:lispworks (hcl:fixnum-safety 0)))
|
||||
(declare #.*standard-optimize-settings*)
|
||||
"Returns the \"sum\" of two hashes. This is a destructive operation
|
||||
on HASH1."
|
||||
(cond ((> (hash-table-count hash2)
|
||||
@ -180,12 +180,7 @@ on HASH1."
|
||||
hash1)
|
||||
|
||||
(defun merge-inverted-hash (hash1 hash2)
|
||||
(declare (optimize speed
|
||||
(safety 0)
|
||||
(space 0)
|
||||
(debug 0)
|
||||
(compilation-speed 0)
|
||||
#+:lispworks (hcl:fixnum-safety 0)))
|
||||
(declare #.*standard-optimize-settings*)
|
||||
"Returns the \"sum\" of HASH1 and the \"inverse\" of HASH2. This is
|
||||
a destructive operation on HASH1."
|
||||
(loop for c of-type fixnum from 0 below *regex-char-code-limit*
|
||||
@ -195,12 +190,7 @@ a destructive operation on HASH1."
|
||||
hash1)
|
||||
|
||||
(defun create-ranges-from-hash (hash &key downcasep)
|
||||
(declare (optimize speed
|
||||
(safety 0)
|
||||
(space 0)
|
||||
(debug 0)
|
||||
(compilation-speed 0)
|
||||
#+:lispworks (hcl:fixnum-safety 0)))
|
||||
(declare #.*standard-optimize-settings*)
|
||||
"Tries to identify up to three intervals (with respect to CHAR<)
|
||||
which together comprise HASH. Returns NIL if this is not possible.
|
||||
If DOWNCASEP is true it will treat the hash-table as if it represents
|
||||
@ -276,3 +266,33 @@ will only return the respective lower-case intervals."
|
||||
:element-type (array-element-type sequence)
|
||||
:displaced-to sequence
|
||||
:displaced-index-offset start))
|
||||
|
||||
(defun normalize-var-list (var-list)
|
||||
"Utility function for REGISTER-GROUPS-BIND and
|
||||
DO-REGISTER-GROUPS. Creates the long form \(a list of \(FUNCTION VAR)
|
||||
entries) out of the short form of VAR-LIST."
|
||||
(loop for element in var-list
|
||||
if (consp element)
|
||||
nconc (loop for var in (rest element)
|
||||
collect (list (first element) var))
|
||||
else
|
||||
collect (list '(function identity) element)))
|
||||
|
||||
(defun string-list-to-simple-string (string-list)
|
||||
(declare #.*standard-optimize-settings*)
|
||||
"Concatenates a list of strings to one simple-string."
|
||||
;; this function provided by JP Massar; note that we can't use APPLY
|
||||
;; with CONCATENATE here because of CALL-ARGUMENTS-LIMIT
|
||||
(let ((total-size 0))
|
||||
(declare (type fixnum total-size))
|
||||
(dolist (string string-list)
|
||||
#-genera (declare (type string string))
|
||||
(incf total-size (length string)))
|
||||
(let ((result-string (make-sequence 'simple-string total-size))
|
||||
(curr-pos 0))
|
||||
(declare (type fixnum curr-pos))
|
||||
(dolist (string string-list)
|
||||
#-genera (declare (type string string))
|
||||
(replace result-string string :start1 curr-pos)
|
||||
(incf curr-pos (length string)))
|
||||
result-string)))
|
||||
|
||||
Reference in New Issue
Block a user