Update to version 1.2.12 from weitz.de
git-svn-id: svn://bknr.net/svn/trunk/thirdparty/cl-ppcre@1779 4281704c-cde7-0310-8518-8e2dc76b1ff0
This commit is contained in:
123
CHANGELOG
123
CHANGELOG
@ -1,3 +1,124 @@
|
|||||||
|
Version 1.2.12
|
||||||
|
2005-11-01
|
||||||
|
REGEX-APROPOS-AUX now also uses :INHERITED
|
||||||
|
Fixed typo in parser.lisp (thanks to Derek Peschel)
|
||||||
|
Fixed value of *REGEX-CHAR-CODE-LIMIT* in docs and test (thanks to Christophe Rhodes)
|
||||||
|
|
||||||
|
Version 1.2.11
|
||||||
|
2005-08-01
|
||||||
|
Added external format for SBCL in ppcre-tests.lisp (thanks to Christophe Rhodes)
|
||||||
|
|
||||||
|
Version 1.2.10
|
||||||
|
2005-07-20
|
||||||
|
Fixed bug in CHAR-SEARCHER-AUX (caught by Peter Schuller)
|
||||||
|
Don't redefine what's already there (for LispWorks)
|
||||||
|
|
||||||
|
Version 1.2.9
|
||||||
|
2005-06-27
|
||||||
|
Hide compiler macros from CCL (thanks to Karsten Poeck)
|
||||||
|
|
||||||
|
Version 1.2.8
|
||||||
|
2005-06-10
|
||||||
|
Change EQ to EQL in REGEX-LENGTH for ANSI conformance and ABCL compatibility (thanks to Peter Graves)
|
||||||
|
|
||||||
|
Version 1.2.7
|
||||||
|
2005-05-16
|
||||||
|
Added lispworks-defsystem.lisp (thanks to Wade Humeniuk)
|
||||||
|
Fixed bug in WORD-BOUNDARY-P
|
||||||
|
|
||||||
|
Version 1.2.6
|
||||||
|
2005-04-13
|
||||||
|
Added some DEFGENERICs to appease SBCL (thanks to Alan Shields)
|
||||||
|
Removed wrong FTYPE declaration for STR (thanks to Alan Shields)
|
||||||
|
|
||||||
|
Version 1.2.5
|
||||||
|
2005-03-09
|
||||||
|
Customizable optimize qualities (thanks to Damien Kick)
|
||||||
|
|
||||||
|
Version 1.2.4
|
||||||
|
2005-03-07
|
||||||
|
Changed DEBUG optimize quality from 0 to 1
|
||||||
|
|
||||||
|
Version 1.2.3
|
||||||
|
2005-02-02
|
||||||
|
Wrapped WITH-COMPILATION-UNIT around loop in load.lisp
|
||||||
|
|
||||||
|
Version 1.2.2
|
||||||
|
2005-02-02
|
||||||
|
Fixed bug in hash table optimization (introduced in 1.1.0)
|
||||||
|
|
||||||
|
Version 1.2.1
|
||||||
|
2005-01-25
|
||||||
|
There was a wrong read-time conditional in api.lisp, sorry
|
||||||
|
|
||||||
|
Version 1.2.0
|
||||||
|
2005-01-24
|
||||||
|
AllegroCL compatibility mode
|
||||||
|
Fixed broken load.lisp file (caught by Jim Prewett and Zach Beane)
|
||||||
|
|
||||||
|
Version 1.1.0
|
||||||
|
2005-01-23
|
||||||
|
Cleaned up load.lisp and cl-ppcre.asd
|
||||||
|
Make large hash tables smaller, if possible
|
||||||
|
Correct treatment of constant regular expressions in DO-SCANS
|
||||||
|
|
||||||
|
Version 1.0.0
|
||||||
|
2004-12-22
|
||||||
|
Special anniversary release... :)
|
||||||
|
|
||||||
|
Version 0.9.4
|
||||||
|
2004-12-18
|
||||||
|
Fixed bug in NORMALIZE-VAR-LIST (caught by Dave Roberts)
|
||||||
|
|
||||||
|
Version 0.9.3
|
||||||
|
2004-12-09
|
||||||
|
Fixed bug in CREATE-SCANNER-AUX (caught by Allan Ruttenberg and Gary Byers)
|
||||||
|
|
||||||
|
Version 0.9.2
|
||||||
|
2004-12-06
|
||||||
|
More compiler macros (thanks to Allan Ruttenberg)
|
||||||
|
|
||||||
|
Version 0.9.1
|
||||||
|
2004-11-29
|
||||||
|
Shortcuts for REGISTER-GROUPS-BIND and DO-REGISTER-GROUPS (suggested by Alexander Kjeldaas)
|
||||||
|
|
||||||
|
Version 0.9.0
|
||||||
|
2004-10-14
|
||||||
|
Experimental support for "filters"
|
||||||
|
Bugfix for standalone regular expressions (ACCUMULATE-START-P wasn't set to NIL)
|
||||||
|
|
||||||
|
Version 0.8.1
|
||||||
|
2004-09-30
|
||||||
|
Patches for Genera 8.5 (thanks to Patrick O'Donnell)
|
||||||
|
|
||||||
|
Version 0.8.0
|
||||||
|
2004-09-16
|
||||||
|
Added parse tree synonyms (thanks to Patrick O'Donnell)
|
||||||
|
|
||||||
|
Version 0.7.9
|
||||||
|
2004-07-13
|
||||||
|
Fixed bug in DO-SCANS (caught by Jan Rychter)
|
||||||
|
|
||||||
|
Version 0.7.8
|
||||||
|
2004-07-13
|
||||||
|
New SIMPLE-CALLS keyword argument for REGEX-REPLACE(-ALL)
|
||||||
|
Added environment parameter to compiler macros (thanks to c.l.l article <aczhx5hj.fsf@ccs.neu.edu> by Joe Marshall)
|
||||||
|
Added compiler macros for SCAN-TO-STRINGS and REGEX-REPLACE(-ALL) (they somehow got lost)
|
||||||
|
|
||||||
|
Version 0.7.7
|
||||||
|
2004-05-19
|
||||||
|
Fixed bug in NEWLINE-SKIPPER (caught by RegexCoach user Thomas-Paz Hartman)
|
||||||
|
Added doc strings for PPCRE-SYNTAX-ERROR and friends (after playing with slime-apropos-package)
|
||||||
|
Added hyperdoc support
|
||||||
|
|
||||||
|
Version 0.7.6
|
||||||
|
2004-04-20
|
||||||
|
The closures created by CREATE-BMH-MATCHER now cleanly cope with negative arguments (bug caught by Damien Kick)
|
||||||
|
|
||||||
|
Version 0.7.5
|
||||||
|
2004-04-19
|
||||||
|
Fixed a bug with constant-length repetitions of . (dot) in single-line mode (caught by RegexCoach user Lee Gold)
|
||||||
|
|
||||||
Version 0.7.4
|
Version 0.7.4
|
||||||
2004-02-16
|
2004-02-16
|
||||||
Fixed wrong call to SIGNAL-PPCRE-SIGNAL-ERROR in lexer.lisp (caught by Peter Graves)
|
Fixed wrong call to SIGNAL-PPCRE-SIGNAL-ERROR in lexer.lisp (caught by Peter Graves)
|
||||||
@ -6,7 +127,7 @@ Compiler macro for SPLIT
|
|||||||
|
|
||||||
Version 0.7.3
|
Version 0.7.3
|
||||||
2004-01-28
|
2004-01-28
|
||||||
Fixed bug in CURRENT-MIN-REST for lookaheads (reported by Thomas-Paz Hartman)
|
Fixed bug in CURRENT-MIN-REST for lookaheads (reported by RegexCoach user Thomas-Paz Hartman)
|
||||||
Added tests for this bug
|
Added tests for this bug
|
||||||
|
|
||||||
Version 0.7.2
|
Version 0.7.2
|
||||||
|
|||||||
13
README
13
README
@ -1,6 +1,10 @@
|
|||||||
Complete documentation for CL-PPCRE can be found in the 'doc'
|
Complete documentation for CL-PPCRE can be found in the 'doc'
|
||||||
directory.
|
directory.
|
||||||
|
|
||||||
|
CL-PPCRE also supports Nikodemus Siivola's HYPERDOC, see
|
||||||
|
<http://common-lisp.net/project/hyperdoc/> and
|
||||||
|
<http://www.cliki.net/hyperdoc>.
|
||||||
|
|
||||||
1. Installation
|
1. Installation
|
||||||
|
|
||||||
1.1. Probably the easiest way is
|
1.1. Probably the easiest way is
|
||||||
@ -24,6 +28,9 @@ directory.
|
|||||||
1.3. You can also use ASDF instead of MK:DEFSYSTEM in a similar way
|
1.3. You can also use ASDF instead of MK:DEFSYSTEM in a similar way
|
||||||
(use the .asd files instead of the .system files).
|
(use the .asd files instead of the .system files).
|
||||||
|
|
||||||
|
1.4. For LispWorks there's a file 'lispworks-defsystem.lisp' which includes
|
||||||
|
a system definition for LispWork's Common Defsystem.
|
||||||
|
|
||||||
2. Test
|
2. Test
|
||||||
|
|
||||||
CL-PPCRE comes with a test suite that can be used to check its
|
CL-PPCRE comes with a test suite that can be used to check its
|
||||||
@ -48,4 +55,8 @@ visual feedback.) It should exactly report three 'errors' (662, 790,
|
|||||||
and 1439) which are explained in the documentation.
|
and 1439) which are explained in the documentation.
|
||||||
|
|
||||||
MCL might report an error for the ninth test case which is also
|
MCL might report an error for the ninth test case which is also
|
||||||
explained in the docs.
|
explained in the docs.
|
||||||
|
|
||||||
|
Genera notes (thanks to Patrick O'Donnell): Some more tests will fail
|
||||||
|
because characters like #\Return, #\Linefeed, or #\Tab have encodings
|
||||||
|
which differ from Perl's (and thus CL-PPCRE's) expectations.
|
||||||
|
|||||||
454
api.lisp
454
api.lisp
@ -1,9 +1,9 @@
|
|||||||
;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: CL-PPCRE; Base: 10 -*-
|
;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: CL-PPCRE; Base: 10 -*-
|
||||||
;;; $Header: /home/manuel/bknr-cvs/cvs/thirdparty/cl-ppcre/api.lisp,v 1.1 2004/06/23 08:27:10 hans Exp $
|
;;; $Header: /usr/local/cvsrep/cl-ppcre/api.lisp,v 1.60 2005/11/01 09:51:01 edi Exp $
|
||||||
|
|
||||||
;;; The external API for creating and using scanners.
|
;;; The external API for creating and using scanners.
|
||||||
|
|
||||||
;;; Copyright (c) 2002-2003, Dr. Edmund Weitz. All rights reserved.
|
;;; Copyright (c) 2002-2005, Dr. Edmund Weitz. All rights reserved.
|
||||||
|
|
||||||
;;; Redistribution and use in source and binary forms, with or without
|
;;; Redistribution and use in source and binary forms, with or without
|
||||||
;;; modification, are permitted provided that the following conditions
|
;;; modification, are permitted provided that the following conditions
|
||||||
@ -43,17 +43,13 @@ are equivalent to the imsx modifiers in Perl. If DESTRUCTIVE is not
|
|||||||
NIL the function is allowed to destructively modify its first argument
|
NIL the function is allowed to destructively modify its first argument
|
||||||
\(but only if it's a parse tree)."))
|
\(but only if it's a parse tree)."))
|
||||||
|
|
||||||
|
#-:use-acl-regexp2-engine
|
||||||
(defmethod create-scanner ((regex-string string) &key case-insensitive-mode
|
(defmethod create-scanner ((regex-string string) &key case-insensitive-mode
|
||||||
multi-line-mode
|
multi-line-mode
|
||||||
single-line-mode
|
single-line-mode
|
||||||
extended-mode
|
extended-mode
|
||||||
destructive)
|
destructive)
|
||||||
(declare (optimize speed
|
(declare #.*standard-optimize-settings*)
|
||||||
(safety 0)
|
|
||||||
(space 0)
|
|
||||||
(debug 0)
|
|
||||||
(compilation-speed 0)
|
|
||||||
#+:lispworks (hcl:fixnum-safety 0)))
|
|
||||||
(declare (ignore destructive))
|
(declare (ignore destructive))
|
||||||
;; parse the string into a parse-tree and then call CREATE-SCANNER
|
;; parse the string into a parse-tree and then call CREATE-SCANNER
|
||||||
;; again
|
;; again
|
||||||
@ -70,34 +66,26 @@ NIL the function is allowed to destructively modify its first argument
|
|||||||
:single-line-mode single-line-mode
|
:single-line-mode single-line-mode
|
||||||
:destructive t)))
|
:destructive t)))
|
||||||
|
|
||||||
|
#-:use-acl-regexp2-engine
|
||||||
(defmethod create-scanner ((scanner function) &key case-insensitive-mode
|
(defmethod create-scanner ((scanner function) &key case-insensitive-mode
|
||||||
multi-line-mode
|
multi-line-mode
|
||||||
single-line-mode
|
single-line-mode
|
||||||
extended-mode
|
extended-mode
|
||||||
destructive)
|
destructive)
|
||||||
(declare (optimize speed
|
(declare #.*standard-optimize-settings*)
|
||||||
(safety 0)
|
|
||||||
(space 0)
|
|
||||||
(debug 0)
|
|
||||||
(compilation-speed 0)
|
|
||||||
#+:lispworks (hcl:fixnum-safety 0)))
|
|
||||||
(declare (ignore destructive))
|
(declare (ignore destructive))
|
||||||
(when (or case-insensitive-mode multi-line-mode single-line-mode extended-mode)
|
(when (or case-insensitive-mode multi-line-mode single-line-mode extended-mode)
|
||||||
(signal-ppcre-invocation-error
|
(signal-ppcre-invocation-error
|
||||||
"You can't use the keyword arguments to modify an existing scanner."))
|
"You can't use the keyword arguments to modify an existing scanner."))
|
||||||
scanner)
|
scanner)
|
||||||
|
|
||||||
|
#-:use-acl-regexp2-engine
|
||||||
(defmethod create-scanner ((parse-tree t) &key case-insensitive-mode
|
(defmethod create-scanner ((parse-tree t) &key case-insensitive-mode
|
||||||
multi-line-mode
|
multi-line-mode
|
||||||
single-line-mode
|
single-line-mode
|
||||||
extended-mode
|
extended-mode
|
||||||
destructive)
|
destructive)
|
||||||
(declare (optimize speed
|
(declare #.*standard-optimize-settings*)
|
||||||
(safety 0)
|
|
||||||
(space 0)
|
|
||||||
(debug 0)
|
|
||||||
(compilation-speed 0)
|
|
||||||
#+:lispworks (hcl:fixnum-safety 0)))
|
|
||||||
(when extended-mode
|
(when extended-mode
|
||||||
(signal-ppcre-invocation-error
|
(signal-ppcre-invocation-error
|
||||||
"Extended mode doesn't make sense in parse trees."))
|
"Extended mode doesn't make sense in parse trees."))
|
||||||
@ -188,6 +176,35 @@ NIL the function is allowed to destructively modify its first argument
|
|||||||
*zero-length-num*
|
*zero-length-num*
|
||||||
reg-num))))))
|
reg-num))))))
|
||||||
|
|
||||||
|
#+:use-acl-regexp2-engine
|
||||||
|
(declaim (inline create-scanner))
|
||||||
|
|
||||||
|
#+:use-acl-regexp2-engine
|
||||||
|
(defmethod create-scanner ((scanner regexp::regular-expression) &key case-insensitive-mode
|
||||||
|
multi-line-mode
|
||||||
|
single-line-mode
|
||||||
|
extended-mode
|
||||||
|
destructive)
|
||||||
|
(declare (ignore destructive))
|
||||||
|
(when (or case-insensitive-mode multi-line-mode single-line-mode extended-mode)
|
||||||
|
(signal-ppcre-invocation-error
|
||||||
|
"You can't use the keyword arguments to modify an existing scanner."))
|
||||||
|
scanner)
|
||||||
|
|
||||||
|
#+:use-acl-regexp2-engine
|
||||||
|
(defmethod create-scanner ((parse-tree t) &key case-insensitive-mode
|
||||||
|
multi-line-mode
|
||||||
|
single-line-mode
|
||||||
|
extended-mode
|
||||||
|
destructive)
|
||||||
|
(declare (ignore destructive))
|
||||||
|
(excl:compile-re parse-tree
|
||||||
|
:case-fold case-insensitive-mode
|
||||||
|
:ignore-whitespace extended-mode
|
||||||
|
:multiple-lines multi-line-mode
|
||||||
|
:single-line single-line-mode
|
||||||
|
:return :index))
|
||||||
|
|
||||||
(defgeneric scan (regex target-string &key start end)
|
(defgeneric scan (regex target-string &key start end)
|
||||||
(:documentation "Searches TARGET-STRING from START to END and tries
|
(:documentation "Searches TARGET-STRING from START to END and tries
|
||||||
to match REGEX. On success returns four values - the start of the
|
to match REGEX. On success returns four values - the start of the
|
||||||
@ -197,50 +214,66 @@ string which will be parsed according to Perl syntax, a parse tree, or
|
|||||||
a pre-compiled scanner created by CREATE-SCANNER. TARGET-STRING will
|
a pre-compiled scanner created by CREATE-SCANNER. TARGET-STRING will
|
||||||
be coerced to a simple string if it isn't one already."))
|
be coerced to a simple string if it isn't one already."))
|
||||||
|
|
||||||
|
#-:use-acl-regexp2-engine
|
||||||
(defmethod scan ((regex-string string) target-string
|
(defmethod scan ((regex-string string) target-string
|
||||||
&key (start 0)
|
&key (start 0)
|
||||||
(end (length target-string)))
|
(end (length target-string)))
|
||||||
(declare (optimize speed
|
(declare #.*standard-optimize-settings*)
|
||||||
(safety 0)
|
|
||||||
(space 0)
|
|
||||||
(debug 0)
|
|
||||||
(compilation-speed 0)
|
|
||||||
#+:lispworks (hcl:fixnum-safety 0)))
|
|
||||||
;; note that the scanners are optimized for simple strings so we
|
;; note that the scanners are optimized for simple strings so we
|
||||||
;; have to coerce TARGET-STRING into one if it isn't already
|
;; have to coerce TARGET-STRING into one if it isn't already
|
||||||
(funcall (create-scanner regex-string)
|
(funcall (create-scanner regex-string)
|
||||||
(maybe-coerce-to-simple-string target-string)
|
(maybe-coerce-to-simple-string target-string)
|
||||||
start end))
|
start end))
|
||||||
|
|
||||||
|
#-:use-acl-regexp2-engine
|
||||||
(defmethod scan ((scanner function) target-string
|
(defmethod scan ((scanner function) target-string
|
||||||
&key (start 0)
|
&key (start 0)
|
||||||
(end (length target-string)))
|
(end (length target-string)))
|
||||||
(declare (optimize speed
|
(declare #.*standard-optimize-settings*)
|
||||||
(safety 0)
|
|
||||||
(space 0)
|
|
||||||
(debug 0)
|
|
||||||
(compilation-speed 0)
|
|
||||||
#+:lispworks (hcl:fixnum-safety 0)))
|
|
||||||
(funcall scanner
|
(funcall scanner
|
||||||
(maybe-coerce-to-simple-string target-string)
|
(maybe-coerce-to-simple-string target-string)
|
||||||
start end))
|
start end))
|
||||||
|
|
||||||
|
#-:use-acl-regexp2-engine
|
||||||
(defmethod scan ((parse-tree t) target-string
|
(defmethod scan ((parse-tree t) target-string
|
||||||
&key (start 0)
|
&key (start 0)
|
||||||
(end (length target-string)))
|
(end (length target-string)))
|
||||||
(declare (optimize speed
|
(declare #.*standard-optimize-settings*)
|
||||||
(safety 0)
|
|
||||||
(space 0)
|
|
||||||
(debug 0)
|
|
||||||
(compilation-speed 0)
|
|
||||||
#+:lispworks (hcl:fixnum-safety 0)))
|
|
||||||
(funcall (create-scanner parse-tree)
|
(funcall (create-scanner parse-tree)
|
||||||
(maybe-coerce-to-simple-string target-string)
|
(maybe-coerce-to-simple-string target-string)
|
||||||
start end))
|
start end))
|
||||||
|
|
||||||
(define-compiler-macro scan (&whole form regex target-string &rest rest)
|
#+:use-acl-regexp2-engine
|
||||||
|
(declaim (inline scan))
|
||||||
|
|
||||||
|
#+:use-acl-regexp2-engine
|
||||||
|
(defmethod scan ((parse-tree t) target-string
|
||||||
|
&key (start 0)
|
||||||
|
(end (length target-string)))
|
||||||
|
(when (< end start)
|
||||||
|
(return-from scan nil))
|
||||||
|
(let ((results (multiple-value-list (excl:match-re parse-tree target-string
|
||||||
|
:start start
|
||||||
|
:end end
|
||||||
|
:return :index))))
|
||||||
|
(declare (dynamic-extent results))
|
||||||
|
(cond ((null (first results)) nil)
|
||||||
|
(t (let* ((no-of-regs (- (length results) 2))
|
||||||
|
(reg-starts (make-array no-of-regs
|
||||||
|
:element-type '(or null fixnum)))
|
||||||
|
(reg-ends (make-array no-of-regs
|
||||||
|
:element-type '(or null fixnum)))
|
||||||
|
(match (second results)))
|
||||||
|
(loop for (start . end) in (cddr results)
|
||||||
|
for i from 0
|
||||||
|
do (setf (aref reg-starts i) start
|
||||||
|
(aref reg-ends i) end))
|
||||||
|
(values (car match) (cdr match) reg-starts reg-ends))))))
|
||||||
|
|
||||||
|
#-:cormanlisp
|
||||||
|
(define-compiler-macro scan (&whole form &environment env regex target-string &rest rest)
|
||||||
"Make sure that constant forms are compiled into scanners at compile time."
|
"Make sure that constant forms are compiled into scanners at compile time."
|
||||||
(cond ((constantp regex)
|
(cond ((constantp regex env)
|
||||||
`(scan (load-time-value
|
`(scan (load-time-value
|
||||||
(create-scanner ,regex))
|
(create-scanner ,regex))
|
||||||
,target-string ,@rest))
|
,target-string ,@rest))
|
||||||
@ -249,12 +282,7 @@ be coerced to a simple string if it isn't one already."))
|
|||||||
(defun scan-to-strings (regex target-string &key (start 0)
|
(defun scan-to-strings (regex target-string &key (start 0)
|
||||||
(end (length target-string))
|
(end (length target-string))
|
||||||
sharedp)
|
sharedp)
|
||||||
(declare (optimize speed
|
(declare #.*standard-optimize-settings*)
|
||||||
(safety 0)
|
|
||||||
(space 0)
|
|
||||||
(debug 0)
|
|
||||||
(compilation-speed 0)
|
|
||||||
#+:lispworks (hcl:fixnum-safety 0)))
|
|
||||||
"Like SCAN but returns substrings of TARGET-STRING instead of
|
"Like SCAN but returns substrings of TARGET-STRING instead of
|
||||||
positions, i.e. this function returns two values on success: the whole
|
positions, i.e. this function returns two values on success: the whole
|
||||||
match as a string plus an array of substrings (or NILs) corresponding
|
match as a string plus an array of substrings (or NILs) corresponding
|
||||||
@ -276,6 +304,16 @@ structure with TARGET-STRING."
|
|||||||
reg-starts
|
reg-starts
|
||||||
reg-ends)))))
|
reg-ends)))))
|
||||||
|
|
||||||
|
#-:cormanlisp
|
||||||
|
(define-compiler-macro scan-to-strings
|
||||||
|
(&whole form &environment env regex target-string &rest rest)
|
||||||
|
"Make sure that constant forms are compiled into scanners at compile time."
|
||||||
|
(cond ((constantp regex env)
|
||||||
|
`(scan-to-strings (load-time-value
|
||||||
|
(create-scanner ,regex))
|
||||||
|
,target-string ,@rest))
|
||||||
|
(t form)))
|
||||||
|
|
||||||
(defmacro register-groups-bind (var-list (regex target-string
|
(defmacro register-groups-bind (var-list (regex target-string
|
||||||
&key start end sharedp)
|
&key start end sharedp)
|
||||||
&body body)
|
&body body)
|
||||||
@ -287,7 +325,7 @@ VAR-LIST which is NIL there's no binding to the corresponding register
|
|||||||
group. The number of variables in VAR-LIST must not be greater than
|
group. The number of variables in VAR-LIST must not be greater than
|
||||||
the number of register groups. If SHAREDP is true, the substrings may
|
the number of register groups. If SHAREDP is true, the substrings may
|
||||||
share structure with TARGET-STRING."
|
share structure with TARGET-STRING."
|
||||||
(rebinding (target-string)
|
(with-rebinding (target-string)
|
||||||
(with-unique-names (match-start match-end reg-starts reg-ends
|
(with-unique-names (match-start match-end reg-starts reg-ends
|
||||||
start-index substr-fn)
|
start-index substr-fn)
|
||||||
`(multiple-value-bind (,match-start ,match-end ,reg-starts ,reg-ends)
|
`(multiple-value-bind (,match-start ,match-end ,reg-starts ,reg-ends)
|
||||||
@ -299,24 +337,26 @@ share structure with TARGET-STRING."
|
|||||||
`(,substr-fn (if ,sharedp
|
`(,substr-fn (if ,sharedp
|
||||||
#'nsubseq
|
#'nsubseq
|
||||||
#'subseq))
|
#'subseq))
|
||||||
(loop for var in var-list
|
(loop for (function var) in (normalize-var-list var-list)
|
||||||
for counter from 0
|
for counter from 0
|
||||||
when var
|
when var
|
||||||
collect `(,var (let ((,start-index
|
collect `(,var (let ((,start-index
|
||||||
(aref ,reg-starts ,counter)))
|
(aref ,reg-starts ,counter)))
|
||||||
(if ,start-index
|
(if ,start-index
|
||||||
(funcall ,substr-fn
|
(funcall ,function
|
||||||
,target-string
|
(funcall ,substr-fn
|
||||||
,start-index
|
,target-string
|
||||||
(aref ,reg-ends ,counter))
|
,start-index
|
||||||
nil)))))
|
(aref ,reg-ends ,counter)))
|
||||||
|
nil)))))
|
||||||
,@body))))))
|
,@body))))))
|
||||||
|
|
||||||
(defmacro do-scans ((match-start match-end reg-starts reg-ends regex
|
(defmacro do-scans ((match-start match-end reg-starts reg-ends regex
|
||||||
target-string
|
target-string
|
||||||
&optional result-form
|
&optional result-form
|
||||||
&key start end)
|
&key start end)
|
||||||
&body body)
|
&body body
|
||||||
|
&environment env)
|
||||||
"Iterates over TARGET-STRING and tries to match REGEX as often as
|
"Iterates over TARGET-STRING and tries to match REGEX as often as
|
||||||
possible evaluating BODY with MATCH-START, MATCH-END, REG-STARTS, and
|
possible evaluating BODY with MATCH-START, MATCH-END, REG-STARTS, and
|
||||||
REG-ENDS bound to the four return values of each match in turn. After
|
REG-ENDS bound to the four return values of each match in turn. After
|
||||||
@ -325,19 +365,24 @@ implicit block named NIL surrounds DO-SCANS; RETURN may be used to
|
|||||||
terminate the loop immediately. If REGEX matches an empty string the
|
terminate the loop immediately. If REGEX matches an empty string the
|
||||||
scan is continued one position behind this match. BODY may start with
|
scan is continued one position behind this match. BODY may start with
|
||||||
declarations."
|
declarations."
|
||||||
(rebinding (target-string regex)
|
(with-rebinding (target-string)
|
||||||
(with-unique-names (%start %end scanner loop-tag block-name)
|
(with-unique-names (%start %end %regex scanner loop-tag block-name)
|
||||||
|
(declare (ignorable %regex scanner))
|
||||||
;; the NIL BLOCK to enable exits via (RETURN ...)
|
;; the NIL BLOCK to enable exits via (RETURN ...)
|
||||||
`(block nil
|
`(block nil
|
||||||
(let* ((,%start (or ,start 0))
|
(let* ((,%start (or ,start 0))
|
||||||
(*real-start-pos* ,%start)
|
(*real-start-pos* ,%start)
|
||||||
(,%end (or ,end (length ,target-string)))
|
(,%end (or ,end (length ,target-string)))
|
||||||
;; create a scanner unless the regex is already a
|
,@(unless (constantp regex env)
|
||||||
;; function (otherwise SCAN will do this on each
|
;; leave constant regular expressions as they are -
|
||||||
;; iteration)
|
;; SCAN's compiler macro will take care of them;
|
||||||
(,scanner (typecase ,regex
|
;; otherwise create a scanner unless the regex is
|
||||||
(function ,regex)
|
;; already a function (otherwise SCAN will do this
|
||||||
(t (create-scanner ,regex)))))
|
;; on each iteration)
|
||||||
|
`((,%regex ,regex)
|
||||||
|
(,scanner (typecase ,%regex
|
||||||
|
(function ,%regex)
|
||||||
|
(t (create-scanner ,%regex)))))))
|
||||||
;; coerce TARGET-STRING to a simple string unless it is one
|
;; coerce TARGET-STRING to a simple string unless it is one
|
||||||
;; already (otherwise SCAN will do this on each iteration)
|
;; already (otherwise SCAN will do this on each iteration)
|
||||||
(setq ,target-string
|
(setq ,target-string
|
||||||
@ -350,7 +395,9 @@ declarations."
|
|||||||
;; provided variables
|
;; provided variables
|
||||||
(multiple-value-bind
|
(multiple-value-bind
|
||||||
(,match-start ,match-end ,reg-starts ,reg-ends)
|
(,match-start ,match-end ,reg-starts ,reg-ends)
|
||||||
(scan ,scanner ,target-string :start ,%start :end ,%end)
|
(scan ,(cond ((constantp regex env) regex)
|
||||||
|
(t scanner))
|
||||||
|
,target-string :start ,%start :end ,%end)
|
||||||
;; declare the variables to be IGNORABLE to prevent the
|
;; declare the variables to be IGNORABLE to prevent the
|
||||||
;; compiler from issuing warnings
|
;; compiler from issuing warnings
|
||||||
(declare
|
(declare
|
||||||
@ -363,7 +410,7 @@ declarations."
|
|||||||
(locally
|
(locally
|
||||||
,@body)
|
,@body)
|
||||||
;; advance by one position if we had a zero-length match
|
;; advance by one position if we had a zero-length match
|
||||||
(setq ,%start (if (= ,%start ,match-end)
|
(setq ,%start (if (= ,match-start ,match-end)
|
||||||
(1+ ,match-end)
|
(1+ ,match-end)
|
||||||
,match-end)))
|
,match-end)))
|
||||||
(go ,loop-tag))))))))
|
(go ,loop-tag))))))))
|
||||||
@ -405,7 +452,7 @@ terminate the loop immediately. If REGEX matches an empty string the
|
|||||||
scan is continued one position behind this match. If SHAREDP is true,
|
scan is continued one position behind this match. If SHAREDP is true,
|
||||||
the substrings may share structure with TARGET-STRING. BODY may start
|
the substrings may share structure with TARGET-STRING. BODY may start
|
||||||
with declarations."
|
with declarations."
|
||||||
(rebinding (target-string)
|
(with-rebinding (target-string)
|
||||||
(with-unique-names (match-start match-end substr-fn)
|
(with-unique-names (match-start match-end substr-fn)
|
||||||
`(let ((,substr-fn (if ,sharedp #'nsubseq #'subseq)))
|
`(let ((,substr-fn (if ,sharedp #'nsubseq #'subseq)))
|
||||||
;; simple use DO-MATCHES to extract the substrings
|
;; simple use DO-MATCHES to extract the substrings
|
||||||
@ -432,7 +479,7 @@ surrounds DO-REGISTER-GROUPS; RETURN may be used to terminate the loop
|
|||||||
immediately. If REGEX matches an empty string the scan is continued
|
immediately. If REGEX matches an empty string the scan is continued
|
||||||
one position behind this match. If SHAREDP is true, the substrings may
|
one position behind this match. If SHAREDP is true, the substrings may
|
||||||
share structure with TARGET-STRING. BODY may start with declarations."
|
share structure with TARGET-STRING. BODY may start with declarations."
|
||||||
(rebinding (target-string)
|
(with-rebinding (target-string)
|
||||||
(with-unique-names (substr-fn match-start match-end
|
(with-unique-names (substr-fn match-start match-end
|
||||||
reg-starts reg-ends start-index)
|
reg-starts reg-ends start-index)
|
||||||
`(let ((,substr-fn (if ,sharedp
|
`(let ((,substr-fn (if ,sharedp
|
||||||
@ -441,27 +488,24 @@ share structure with TARGET-STRING. BODY may start with declarations."
|
|||||||
(do-scans (,match-start ,match-end ,reg-starts ,reg-ends
|
(do-scans (,match-start ,match-end ,reg-starts ,reg-ends
|
||||||
,regex ,target-string
|
,regex ,target-string
|
||||||
,result-form :start ,start :end ,end)
|
,result-form :start ,start :end ,end)
|
||||||
(let ,(loop for var in var-list
|
(let ,(loop for (function var) in (normalize-var-list var-list)
|
||||||
for counter from 0
|
for counter from 0
|
||||||
collect `(,var (let ((,start-index
|
when var
|
||||||
(aref ,reg-starts ,counter)))
|
collect `(,var (let ((,start-index
|
||||||
(if ,start-index
|
(aref ,reg-starts ,counter)))
|
||||||
(funcall ,substr-fn
|
(if ,start-index
|
||||||
,target-string
|
(funcall ,function
|
||||||
,start-index
|
(funcall ,substr-fn
|
||||||
(aref ,reg-ends ,counter))
|
,target-string
|
||||||
nil))))
|
,start-index
|
||||||
|
(aref ,reg-ends ,counter)))
|
||||||
|
nil))))
|
||||||
,@body))))))
|
,@body))))))
|
||||||
|
|
||||||
(defun all-matches (regex target-string
|
(defun all-matches (regex target-string
|
||||||
&key (start 0)
|
&key (start 0)
|
||||||
(end (length target-string)))
|
(end (length target-string)))
|
||||||
(declare (optimize speed
|
(declare #.*standard-optimize-settings*)
|
||||||
(safety 0)
|
|
||||||
(space 0)
|
|
||||||
(debug 0)
|
|
||||||
(compilation-speed 0)
|
|
||||||
#+:lispworks (hcl:fixnum-safety 0)))
|
|
||||||
"Returns a list containing the start and end positions of all
|
"Returns a list containing the start and end positions of all
|
||||||
matches of REGEX against TARGET-STRING, i.e. if there are N matches
|
matches of REGEX against TARGET-STRING, i.e. if there are N matches
|
||||||
the list contains (* 2 N) elements. If REGEX matches an empty string
|
the list contains (* 2 N) elements. If REGEX matches an empty string
|
||||||
@ -474,16 +518,21 @@ the scan is continued one position behind this match."
|
|||||||
(push match-start result-list)
|
(push match-start result-list)
|
||||||
(push match-end result-list))))
|
(push match-end result-list))))
|
||||||
|
|
||||||
|
#-:cormanlisp
|
||||||
|
(define-compiler-macro all-matches (&whole form &environment env regex &rest rest)
|
||||||
|
"Make sure that constant forms are compiled into scanners at
|
||||||
|
compile time."
|
||||||
|
(cond ((constantp regex env)
|
||||||
|
`(all-matches (load-time-value
|
||||||
|
(create-scanner ,regex))
|
||||||
|
,@rest))
|
||||||
|
(t form)))
|
||||||
|
|
||||||
(defun all-matches-as-strings (regex target-string
|
(defun all-matches-as-strings (regex target-string
|
||||||
&key (start 0)
|
&key (start 0)
|
||||||
(end (length target-string))
|
(end (length target-string))
|
||||||
sharedp)
|
sharedp)
|
||||||
(declare (optimize speed
|
(declare #.*standard-optimize-settings*)
|
||||||
(safety 0)
|
|
||||||
(space 0)
|
|
||||||
(debug 0)
|
|
||||||
(compilation-speed 0)
|
|
||||||
#+:lispworks (hcl:fixnum-safety 0)))
|
|
||||||
"Returns a list containing all substrings of TARGET-STRING which
|
"Returns a list containing all substrings of TARGET-STRING which
|
||||||
match REGEX. If REGEX matches an empty string the scan is continued
|
match REGEX. If REGEX matches an empty string the scan is continued
|
||||||
one position behind this match. If SHAREDP is true, the substrings may
|
one position behind this match. If SHAREDP is true, the substrings may
|
||||||
@ -493,6 +542,17 @@ share structure with TARGET-STRING."
|
|||||||
:start start :end end :sharedp sharedp)
|
:start start :end end :sharedp sharedp)
|
||||||
(push match result-list))))
|
(push match result-list))))
|
||||||
|
|
||||||
|
#-:cormanlisp
|
||||||
|
(define-compiler-macro all-matches-as-strings (&whole form &environment env regex &rest rest)
|
||||||
|
"Make sure that constant forms are compiled into scanners at
|
||||||
|
compile time."
|
||||||
|
(cond ((constantp regex env)
|
||||||
|
`(all-matches-as-strings
|
||||||
|
(load-time-value
|
||||||
|
(create-scanner ,regex))
|
||||||
|
,@rest))
|
||||||
|
(t form)))
|
||||||
|
|
||||||
(defun split (regex target-string
|
(defun split (regex target-string
|
||||||
&key (start 0)
|
&key (start 0)
|
||||||
(end (length target-string))
|
(end (length target-string))
|
||||||
@ -500,12 +560,7 @@ share structure with TARGET-STRING."
|
|||||||
with-registers-p
|
with-registers-p
|
||||||
omit-unmatched-p
|
omit-unmatched-p
|
||||||
sharedp)
|
sharedp)
|
||||||
(declare (optimize speed
|
(declare #.*standard-optimize-settings*)
|
||||||
(safety 0)
|
|
||||||
(space 0)
|
|
||||||
(debug 0)
|
|
||||||
(compilation-speed 0)
|
|
||||||
#+:lispworks (hcl:fixnum-safety 0)))
|
|
||||||
"Matches REGEX against TARGET-STRING as often as possible and
|
"Matches REGEX against TARGET-STRING as often as possible and
|
||||||
returns a list of the substrings between the matches. If
|
returns a list of the substrings between the matches. If
|
||||||
WITH-REGISTERS-P is true, substrings corresponding to matched
|
WITH-REGISTERS-P is true, substrings corresponding to matched
|
||||||
@ -569,21 +624,17 @@ TARGET-STRING."
|
|||||||
target-string this-start this-end)
|
target-string this-start this-end)
|
||||||
nil)))))
|
nil)))))
|
||||||
|
|
||||||
(define-compiler-macro split (&whole form regex target-string &rest rest)
|
#-:cormanlisp
|
||||||
|
(define-compiler-macro split (&whole form &environment env regex target-string &rest rest)
|
||||||
"Make sure that constant forms are compiled into scanners at compile time."
|
"Make sure that constant forms are compiled into scanners at compile time."
|
||||||
(cond ((constantp regex)
|
(cond ((constantp regex env)
|
||||||
`(split (load-time-value
|
`(split (load-time-value
|
||||||
(create-scanner ,regex))
|
(create-scanner ,regex))
|
||||||
,target-string ,@rest))
|
,target-string ,@rest))
|
||||||
(t form)))
|
(t form)))
|
||||||
|
|
||||||
(defun string-case-modifier (str from to start end)
|
(defun string-case-modifier (str from to start end)
|
||||||
(declare (optimize speed
|
(declare #.*standard-optimize-settings*)
|
||||||
(safety 0)
|
|
||||||
(space 0)
|
|
||||||
(debug 0)
|
|
||||||
(compilation-speed 0)
|
|
||||||
#+:lispworks (hcl:fixnum-safety 0)))
|
|
||||||
(declare (type fixnum from to start end))
|
(declare (type fixnum from to start end))
|
||||||
"Checks whether all words in STR between FROM and TO are upcased,
|
"Checks whether all words in STR between FROM and TO are upcased,
|
||||||
downcased or capitalized and returns a function which applies a
|
downcased or capitalized and returns a function which applies a
|
||||||
@ -648,19 +699,18 @@ that (<= START FROM TO END)."
|
|||||||
|
|
||||||
;; first create a scanner to identify the special parts of the
|
;; first create a scanner to identify the special parts of the
|
||||||
;; replacement string (eat your own dog food...)
|
;; replacement string (eat your own dog food...)
|
||||||
|
|
||||||
|
(defgeneric build-replacement-template (replacement-string)
|
||||||
|
(declare #.*standard-optimize-settings*)
|
||||||
|
(:documentation "Converts a replacement string for REGEX-REPLACE or
|
||||||
|
REGEX-REPLACE-ALL into a replacement template which is an
|
||||||
|
S-expression."))
|
||||||
|
|
||||||
#-:cormanlisp
|
#-:cormanlisp
|
||||||
(let* ((*use-bmh-matchers* nil)
|
(let* ((*use-bmh-matchers* nil)
|
||||||
(reg-scanner (create-scanner "\\\\(?:\\\\|{\\d+}|\\d+|&|`|')")))
|
(reg-scanner (create-scanner "\\\\(?:\\\\|\\{\\d+\\}|\\d+|&|`|')")))
|
||||||
(defmethod build-replacement-template ((replacement-string string))
|
(defmethod build-replacement-template ((replacement-string string))
|
||||||
(declare (optimize speed
|
(declare #.*standard-optimize-settings*)
|
||||||
(safety 0)
|
|
||||||
(space 0)
|
|
||||||
(debug 0)
|
|
||||||
(compilation-speed 0)
|
|
||||||
#+:lispworks (hcl:fixnum-safety 0)))
|
|
||||||
"Converts a replacement string for REGEX-REPLACE or
|
|
||||||
REGEX-REPLACE-ALL into a replacement template which is an
|
|
||||||
S-expression."
|
|
||||||
(let ((from 0)
|
(let ((from 0)
|
||||||
;; COLLECTOR will hold the (reversed) template
|
;; COLLECTOR will hold the (reversed) template
|
||||||
(collector '()))
|
(collector '()))
|
||||||
@ -714,14 +764,9 @@ S-expression."
|
|||||||
;;; Corman Lisp's methods can't be closures... :(
|
;;; Corman Lisp's methods can't be closures... :(
|
||||||
#+:cormanlisp
|
#+:cormanlisp
|
||||||
(let* ((*use-bmh-matchers* nil)
|
(let* ((*use-bmh-matchers* nil)
|
||||||
(reg-scanner (create-scanner "\\\\(?:\\\\|{\\d+}|\\d+|&|`|')")))
|
(reg-scanner (create-scanner "\\\\(?:\\\\|\\{\\d+\\}|\\d+|&|`|')")))
|
||||||
(defun build-replacement-template (replacement)
|
(defun build-replacement-template (replacement)
|
||||||
(declare (optimize speed
|
(declare #.*standard-optimize-settings*)
|
||||||
(safety 0)
|
|
||||||
(space 0)
|
|
||||||
(debug 0)
|
|
||||||
(compilation-speed 0)
|
|
||||||
#+:lispworks (hcl:fixnum-safety 0)))
|
|
||||||
(typecase replacement
|
(typecase replacement
|
||||||
(string
|
(string
|
||||||
(let ((from 0)
|
(let ((from 0)
|
||||||
@ -770,13 +815,9 @@ S-expression."
|
|||||||
target-string
|
target-string
|
||||||
start end
|
start end
|
||||||
match-start match-end
|
match-start match-end
|
||||||
reg-starts reg-ends)
|
reg-starts reg-ends
|
||||||
(declare (optimize speed
|
simple-calls)
|
||||||
(safety 0)
|
(declare #.*standard-optimize-settings*)
|
||||||
(space 0)
|
|
||||||
(debug 0)
|
|
||||||
(compilation-speed 0)
|
|
||||||
#+:lispworks (hcl:fixnum-safety 0)))
|
|
||||||
"Accepts a replacement template and the current values from the
|
"Accepts a replacement template and the current values from the
|
||||||
matching process in REGEX-REPLACE or REGEX-REPLACE-ALL and returns the
|
matching process in REGEX-REPLACE or REGEX-REPLACE-ALL and returns the
|
||||||
corresponding template."
|
corresponding template."
|
||||||
@ -806,12 +847,22 @@ corresponding template."
|
|||||||
:start (svref reg-starts token)
|
:start (svref reg-starts token)
|
||||||
:end (svref reg-ends token))))
|
:end (svref reg-ends token))))
|
||||||
(function
|
(function
|
||||||
(write-string (funcall token
|
(write-string
|
||||||
target-string
|
(cond (simple-calls
|
||||||
start end
|
(apply token
|
||||||
match-start match-end
|
(nsubseq target-string match-start match-end)
|
||||||
reg-starts reg-ends)
|
(map 'list
|
||||||
s))
|
(lambda (reg-start reg-end)
|
||||||
|
(and reg-start
|
||||||
|
(nsubseq target-string reg-start reg-end)))
|
||||||
|
reg-starts reg-ends)))
|
||||||
|
(t
|
||||||
|
(funcall token
|
||||||
|
target-string
|
||||||
|
start end
|
||||||
|
match-start match-end
|
||||||
|
reg-starts reg-ends)))
|
||||||
|
s))
|
||||||
(symbol
|
(symbol
|
||||||
(case token
|
(case token
|
||||||
((:backslash)
|
((:backslash)
|
||||||
@ -833,20 +884,26 @@ corresponding template."
|
|||||||
:start match-end
|
:start match-end
|
||||||
:end end))
|
:end end))
|
||||||
(otherwise
|
(otherwise
|
||||||
(write-string (funcall token
|
(write-string
|
||||||
target-string
|
(cond (simple-calls
|
||||||
start end
|
(apply token
|
||||||
match-start match-end
|
(nsubseq target-string match-start match-end)
|
||||||
reg-starts reg-ends)
|
(map 'list
|
||||||
s)))))))))
|
(lambda (reg-start reg-end)
|
||||||
|
(and reg-start
|
||||||
|
(nsubseq target-string reg-start reg-end)))
|
||||||
|
reg-starts reg-ends)))
|
||||||
|
(t
|
||||||
|
(funcall token
|
||||||
|
target-string
|
||||||
|
start end
|
||||||
|
match-start match-end
|
||||||
|
reg-starts reg-ends)))
|
||||||
|
s)))))))))
|
||||||
|
|
||||||
(defun replace-aux (target-string replacement pos-list reg-list start end preserve-case)
|
(defun replace-aux (target-string replacement pos-list reg-list
|
||||||
(declare (optimize speed
|
start end preserve-case simple-calls)
|
||||||
(safety 0)
|
(declare #.*standard-optimize-settings*)
|
||||||
(space 0)
|
|
||||||
(debug 0)
|
|
||||||
(compilation-speed 0)
|
|
||||||
#+:lispworks (hcl:fixnum-safety 0)))
|
|
||||||
"Auxiliary function used by REGEX-REPLACE and
|
"Auxiliary function used by REGEX-REPLACE and
|
||||||
REGEX-REPLACE-ALL. POS-LIST contains a list with the start and end
|
REGEX-REPLACE-ALL. POS-LIST contains a list with the start and end
|
||||||
positions of all matches while REG-LIST contains a list of arrays
|
positions of all matches while REG-LIST contains a list of arrays
|
||||||
@ -867,7 +924,8 @@ representing the corresponding register start and end positions."
|
|||||||
target-string
|
target-string
|
||||||
start end
|
start end
|
||||||
from to
|
from to
|
||||||
reg-starts reg-ends)
|
reg-starts reg-ends
|
||||||
|
simple-calls)
|
||||||
nil)
|
nil)
|
||||||
while to
|
while to
|
||||||
if replace
|
if replace
|
||||||
@ -887,13 +945,9 @@ representing the corresponding register start and end positions."
|
|||||||
(defun regex-replace (regex target-string replacement
|
(defun regex-replace (regex target-string replacement
|
||||||
&key (start 0)
|
&key (start 0)
|
||||||
(end (length target-string))
|
(end (length target-string))
|
||||||
preserve-case)
|
preserve-case
|
||||||
(declare (optimize speed
|
simple-calls)
|
||||||
(safety 0)
|
(declare #.*standard-optimize-settings*)
|
||||||
(space 0)
|
|
||||||
(debug 0)
|
|
||||||
(compilation-speed 0)
|
|
||||||
#+:lispworks (hcl:fixnum-safety 0)))
|
|
||||||
"Try to match TARGET-STRING between START and END against REGEX and
|
"Try to match TARGET-STRING between START and END against REGEX and
|
||||||
replace the first match with REPLACEMENT.
|
replace the first match with REPLACEMENT.
|
||||||
|
|
||||||
@ -926,19 +980,25 @@ match."
|
|||||||
(replace-aux target-string replacement
|
(replace-aux target-string replacement
|
||||||
(list match-start match-end)
|
(list match-start match-end)
|
||||||
(list reg-starts reg-ends)
|
(list reg-starts reg-ends)
|
||||||
start end preserve-case)
|
start end preserve-case simple-calls)
|
||||||
(subseq target-string start end))))
|
(subseq target-string start end))))
|
||||||
|
|
||||||
|
#-:cormanlisp
|
||||||
|
(define-compiler-macro regex-replace
|
||||||
|
(&whole form &environment env regex target-string replacement &rest rest)
|
||||||
|
"Make sure that constant forms are compiled into scanners at compile time."
|
||||||
|
(cond ((constantp regex env)
|
||||||
|
`(regex-replace (load-time-value
|
||||||
|
(create-scanner ,regex))
|
||||||
|
,target-string ,replacement ,@rest))
|
||||||
|
(t form)))
|
||||||
|
|
||||||
(defun regex-replace-all (regex target-string replacement
|
(defun regex-replace-all (regex target-string replacement
|
||||||
&key (start 0)
|
&key (start 0)
|
||||||
(end (length target-string))
|
(end (length target-string))
|
||||||
preserve-case)
|
preserve-case
|
||||||
(declare (optimize speed
|
simple-calls)
|
||||||
(safety 0)
|
(declare #.*standard-optimize-settings*)
|
||||||
(space 0)
|
|
||||||
(debug 0)
|
|
||||||
(compilation-speed 0)
|
|
||||||
#+:lispworks (hcl:fixnum-safety 0)))
|
|
||||||
"Try to match TARGET-STRING between START and END against REGEX and
|
"Try to match TARGET-STRING between START and END against REGEX and
|
||||||
replace all matches with REPLACEMENT.
|
replace all matches with REPLACEMENT.
|
||||||
|
|
||||||
@ -978,9 +1038,19 @@ match."
|
|||||||
(replace-aux target-string replacement
|
(replace-aux target-string replacement
|
||||||
(nreverse pos-list)
|
(nreverse pos-list)
|
||||||
(nreverse reg-list)
|
(nreverse reg-list)
|
||||||
start end preserve-case)
|
start end preserve-case simple-calls)
|
||||||
(subseq target-string start end))))
|
(subseq target-string start end))))
|
||||||
|
|
||||||
|
#-:cormanlisp
|
||||||
|
(define-compiler-macro regex-replace-all
|
||||||
|
(&whole form &environment env regex target-string replacement &rest rest)
|
||||||
|
"Make sure that constant forms are compiled into scanners at compile time."
|
||||||
|
(cond ((constantp regex env)
|
||||||
|
`(regex-replace-all (load-time-value
|
||||||
|
(create-scanner ,regex))
|
||||||
|
,target-string ,replacement ,@rest))
|
||||||
|
(t form)))
|
||||||
|
|
||||||
#-:cormanlisp
|
#-:cormanlisp
|
||||||
(defmacro regex-apropos-aux ((regex packages case-insensitive &optional return-form)
|
(defmacro regex-apropos-aux ((regex packages case-insensitive &optional return-form)
|
||||||
&body body)
|
&body body)
|
||||||
@ -989,7 +1059,7 @@ through PACKAGES and executes BODY with SYMBOL bound to each symbol
|
|||||||
which matches REGEX. Optionally evaluates and returns RETURN-FORM at
|
which matches REGEX. Optionally evaluates and returns RETURN-FORM at
|
||||||
the end. If CASE-INSENSITIVE is true and REGEX isn't already a
|
the end. If CASE-INSENSITIVE is true and REGEX isn't already a
|
||||||
scanner, a case-insensitive scanner is used."
|
scanner, a case-insensitive scanner is used."
|
||||||
(rebinding (regex)
|
(with-rebinding (regex)
|
||||||
(with-unique-names (scanner %packages next morep)
|
(with-unique-names (scanner %packages next morep)
|
||||||
`(let* ((,scanner (create-scanner ,regex
|
`(let* ((,scanner (create-scanner ,regex
|
||||||
:case-insensitive-mode
|
:case-insensitive-mode
|
||||||
@ -997,14 +1067,14 @@ scanner, a case-insensitive scanner is used."
|
|||||||
(not (functionp ,regex)))))
|
(not (functionp ,regex)))))
|
||||||
(,%packages (or ,packages
|
(,%packages (or ,packages
|
||||||
(list-all-packages))))
|
(list-all-packages))))
|
||||||
(with-package-iterator (,next ,%packages :external :internal)
|
(with-package-iterator (,next ,%packages :external :internal :inherited)
|
||||||
(loop
|
(loop
|
||||||
(multiple-value-bind (,morep symbol)
|
(multiple-value-bind (,morep symbol)
|
||||||
(,next)
|
(,next)
|
||||||
(unless ,morep
|
(unless ,morep
|
||||||
(return ,return-form))
|
(return ,return-form))
|
||||||
(when (scan ,scanner (symbol-name symbol))
|
(when (scan ,scanner (symbol-name symbol))
|
||||||
,@body))))))))
|
,@body))))))))
|
||||||
|
|
||||||
;;; The following two functions were provided by Karsten Poeck
|
;;; The following two functions were provided by Karsten Poeck
|
||||||
|
|
||||||
@ -1026,7 +1096,7 @@ through PACKAGES and executes BODY with SYMBOL bound to each symbol
|
|||||||
which matches REGEX. Optionally evaluates and returns RETURN-FORM at
|
which matches REGEX. Optionally evaluates and returns RETURN-FORM at
|
||||||
the end. If CASE-INSENSITIVE is true and REGEX isn't already a
|
the end. If CASE-INSENSITIVE is true and REGEX isn't already a
|
||||||
scanner, a case-insensitive scanner is used."
|
scanner, a case-insensitive scanner is used."
|
||||||
(rebinding (regex)
|
(with-rebinding (regex)
|
||||||
(with-unique-names (scanner %packages)
|
(with-unique-names (scanner %packages)
|
||||||
`(let* ((,scanner (create-scanner ,regex
|
`(let* ((,scanner (create-scanner ,regex
|
||||||
:case-insensitive-mode
|
:case-insensitive-mode
|
||||||
@ -1040,12 +1110,7 @@ scanner, a case-insensitive scanner is used."
|
|||||||
,return-form))))
|
,return-form))))
|
||||||
|
|
||||||
(defun regex-apropos-list (regex &optional packages &key (case-insensitive t))
|
(defun regex-apropos-list (regex &optional packages &key (case-insensitive t))
|
||||||
(declare (optimize speed
|
(declare #.*standard-optimize-settings*)
|
||||||
(safety 0)
|
|
||||||
(space 0)
|
|
||||||
(debug 0)
|
|
||||||
(compilation-speed 0)
|
|
||||||
#+:lispworks (hcl:fixnum-safety 0)))
|
|
||||||
"Similar to the standard function APROPOS-LIST but returns a list of
|
"Similar to the standard function APROPOS-LIST but returns a list of
|
||||||
all symbols which match the regular expression REGEX. If
|
all symbols which match the regular expression REGEX. If
|
||||||
CASE-INSENSITIVE is true and REGEX isn't already a scanner, a
|
CASE-INSENSITIVE is true and REGEX isn't already a scanner, a
|
||||||
@ -1057,12 +1122,7 @@ case-insensitive scanner is used."
|
|||||||
(defun print-symbol-info (symbol)
|
(defun print-symbol-info (symbol)
|
||||||
"Auxiliary function used by REGEX-APROPOS. Tries to print some
|
"Auxiliary function used by REGEX-APROPOS. Tries to print some
|
||||||
meaningful information about a symbol."
|
meaningful information about a symbol."
|
||||||
(declare (optimize speed
|
(declare #.*standard-optimize-settings*)
|
||||||
(safety 0)
|
|
||||||
(space 0)
|
|
||||||
(debug 0)
|
|
||||||
(compilation-speed 0)
|
|
||||||
#+:lispworks (hcl:fixnum-safety 0)))
|
|
||||||
(handler-case
|
(handler-case
|
||||||
(let ((output-list '()))
|
(let ((output-list '()))
|
||||||
(cond ((special-operator-p symbol)
|
(cond ((special-operator-p symbol)
|
||||||
@ -1107,12 +1167,7 @@ meaningful information about a symbol."
|
|||||||
symbols which match the regular expression REGEX. If CASE-INSENSITIVE
|
symbols which match the regular expression REGEX. If CASE-INSENSITIVE
|
||||||
is true and REGEX isn't already a scanner, a case-insensitive scanner
|
is true and REGEX isn't already a scanner, a case-insensitive scanner
|
||||||
is used."
|
is used."
|
||||||
(declare (optimize speed
|
(declare #.*standard-optimize-settings*)
|
||||||
(safety 0)
|
|
||||||
(space 0)
|
|
||||||
(debug 0)
|
|
||||||
(compilation-speed 0)
|
|
||||||
#+:lispworks (hcl:fixnum-safety 0)))
|
|
||||||
(regex-apropos-aux (regex packages case-insensitive)
|
(regex-apropos-aux (regex packages case-insensitive)
|
||||||
(print-symbol-info symbol))
|
(print-symbol-info symbol))
|
||||||
(values))
|
(values))
|
||||||
@ -1169,3 +1224,18 @@ end-of-line comments, i.e. those starting with #\\# and ending with
|
|||||||
comment-scanner)
|
comment-scanner)
|
||||||
string
|
string
|
||||||
#'remove-tokens))))
|
#'remove-tokens))))
|
||||||
|
|
||||||
|
(defun parse-tree-synonym (symbol)
|
||||||
|
"Returns the parse tree the SYMBOL symbol is a synonym for. Returns
|
||||||
|
NIL is SYMBOL wasn't yet defined to be a synonym."
|
||||||
|
(get symbol 'parse-tree-synonym))
|
||||||
|
|
||||||
|
(defun (setf parse-tree-synonym) (new-parse-tree symbol)
|
||||||
|
"Defines SYMBOL to be a synonm for the parse tree NEW-PARSE-TREE."
|
||||||
|
(setf (get symbol 'parse-tree-synonym) new-parse-tree))
|
||||||
|
|
||||||
|
(defmacro define-parse-tree-synonym (name parse-tree)
|
||||||
|
"Defines the symbol NAME to be a synonym for the parse tree
|
||||||
|
PARSE-TREE. Both arguments are quoted."
|
||||||
|
`(eval-when (:compile-toplevel :load-toplevel :execute)
|
||||||
|
(setf (parse-tree-synonym ',name) ',parse-tree)))
|
||||||
|
|||||||
@ -1,9 +1,9 @@
|
|||||||
;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: CL-USER; Base: 10 -*-
|
;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: CL-USER; Base: 10 -*-
|
||||||
;;; $Header: /home/manuel/bknr-cvs/cvs/thirdparty/cl-ppcre/cl-ppcre-test.asd,v 1.1 2004/06/23 08:27:10 hans Exp $
|
;;; $Header: /usr/local/cvsrep/cl-ppcre/cl-ppcre-test.asd,v 1.8 2005/11/01 09:51:01 edi Exp $
|
||||||
|
|
||||||
;;; This ASDF system definition was kindly provided by Marco Baringer.
|
;;; This ASDF system definition was kindly provided by Marco Baringer.
|
||||||
|
|
||||||
;;; Copyright (c) 2002-2003, Dr. Edmund Weitz. All rights reserved.
|
;;; Copyright (c) 2002-2005, Dr. Edmund Weitz. All rights reserved.
|
||||||
|
|
||||||
;;; Redistribution and use in source and binary forms, with or without
|
;;; Redistribution and use in source and binary forms, with or without
|
||||||
;;; modification, are permitted provided that the following conditions
|
;;; modification, are permitted provided that the following conditions
|
||||||
@ -36,5 +36,6 @@
|
|||||||
(in-package #:cl-ppcre-test.system)
|
(in-package #:cl-ppcre-test.system)
|
||||||
|
|
||||||
(defsystem #:cl-ppcre-test
|
(defsystem #:cl-ppcre-test
|
||||||
:depends-on (#:cl-ppcre)
|
:version "1.2.12"
|
||||||
:components ((:file "ppcre-tests")))
|
:depends-on (#:cl-ppcre)
|
||||||
|
:components ((:file "ppcre-tests")))
|
||||||
|
|||||||
@ -1,7 +1,7 @@
|
|||||||
;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: CL-USER; Base: 10 -*-
|
;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: CL-USER; Base: 10 -*-
|
||||||
;;; $Header: /home/manuel/bknr-cvs/cvs/thirdparty/cl-ppcre/cl-ppcre-test.system,v 1.1 2004/06/23 08:27:10 hans Exp $
|
;;; $Header: /usr/local/cvsrep/cl-ppcre/cl-ppcre-test.system,v 1.9 2005/04/01 21:29:09 edi Exp $
|
||||||
|
|
||||||
;;; Copyright (c) 2002-2003, Dr. Edmund Weitz. All rights reserved.
|
;;; Copyright (c) 2002-2005, Dr. Edmund Weitz. All rights reserved.
|
||||||
|
|
||||||
;;; Redistribution and use in source and binary forms, with or without
|
;;; Redistribution and use in source and binary forms, with or without
|
||||||
;;; modification, are permitted provided that the following conditions
|
;;; modification, are permitted provided that the following conditions
|
||||||
|
|||||||
40
cl-ppcre.asd
40
cl-ppcre.asd
@ -1,9 +1,9 @@
|
|||||||
;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: CL-USER; Base: 10 -*-
|
;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: CL-USER; Base: 10 -*-
|
||||||
;;; $Header: /home/manuel/bknr-cvs/cvs/thirdparty/cl-ppcre/cl-ppcre.asd,v 1.1 2004/06/23 08:27:10 hans Exp $
|
;;; $Header: /usr/local/cvsrep/cl-ppcre/cl-ppcre.asd,v 1.12 2005/11/01 09:51:01 edi Exp $
|
||||||
|
|
||||||
;;; This ASDF system definition was kindly provided by Marco Baringer.
|
;;; This ASDF system definition was kindly provided by Marco Baringer.
|
||||||
|
|
||||||
;;; Copyright (c) 2002-2003, Dr. Edmund Weitz. All rights reserved.
|
;;; Copyright (c) 2002-2005, Dr. Edmund Weitz. All rights reserved.
|
||||||
|
|
||||||
;;; Redistribution and use in source and binary forms, with or without
|
;;; Redistribution and use in source and binary forms, with or without
|
||||||
;;; modification, are permitted provided that the following conditions
|
;;; modification, are permitted provided that the following conditions
|
||||||
@ -36,16 +36,26 @@
|
|||||||
(in-package #:cl-ppcre.system)
|
(in-package #:cl-ppcre.system)
|
||||||
|
|
||||||
(defsystem #:cl-ppcre
|
(defsystem #:cl-ppcre
|
||||||
:components ((:file "packages")
|
:version "1.2.12"
|
||||||
(:file "specials" :depends-on ("packages"))
|
:serial t
|
||||||
(:file "util" :depends-on ("packages"))
|
:components ((:file "packages")
|
||||||
(:file "errors" :depends-on ("util"))
|
(:file "specials")
|
||||||
(:file "lexer" :depends-on ("errors" "specials"))
|
(:file "util")
|
||||||
(:file "parser" :depends-on ("lexer"))
|
(:file "errors")
|
||||||
(:file "regex-class" :depends-on ("parser"))
|
#-:use-acl-regexp2-engine
|
||||||
(:file "convert" :depends-on ("regex-class"))
|
(:file "lexer")
|
||||||
(:file "optimize" :depends-on ("convert"))
|
#-:use-acl-regexp2-engine
|
||||||
(:file "closures" :depends-on ("optimize" "specials"))
|
(:file "parser")
|
||||||
(:file "repetition-closures" :depends-on ("closures"))
|
#-:use-acl-regexp2-engine
|
||||||
(:file "scanner" :depends-on ("repetition-closures"))
|
(:file "regex-class")
|
||||||
(:file "api" :depends-on ("scanner"))))
|
#-:use-acl-regexp2-engine
|
||||||
|
(:file "convert")
|
||||||
|
#-:use-acl-regexp2-engine
|
||||||
|
(:file "optimize")
|
||||||
|
#-:use-acl-regexp2-engine
|
||||||
|
(:file "closures")
|
||||||
|
#-:use-acl-regexp2-engine
|
||||||
|
(:file "repetition-closures")
|
||||||
|
#-:use-acl-regexp2-engine
|
||||||
|
(:file "scanner")
|
||||||
|
(:file "api")))
|
||||||
|
|||||||
@ -1,7 +1,7 @@
|
|||||||
;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: CL-USER; Base: 10 -*-
|
;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: CL-USER; Base: 10 -*-
|
||||||
;;; $Header: /home/manuel/bknr-cvs/cvs/thirdparty/cl-ppcre/cl-ppcre.system,v 1.1 2004/06/23 08:27:10 hans Exp $
|
;;; $Header: /usr/local/cvsrep/cl-ppcre/cl-ppcre.system,v 1.11 2005/04/01 21:29:09 edi Exp $
|
||||||
|
|
||||||
;;; Copyright (c) 2002-2003, Dr. Edmund Weitz. All rights reserved.
|
;;; Copyright (c) 2002-2005, Dr. Edmund Weitz. All rights reserved.
|
||||||
|
|
||||||
;;; Redistribution and use in source and binary forms, with or without
|
;;; Redistribution and use in source and binary forms, with or without
|
||||||
;;; modification, are permitted provided that the following conditions
|
;;; modification, are permitted provided that the following conditions
|
||||||
@ -40,12 +40,20 @@
|
|||||||
(:file "specials" :depends-on ("packages"))
|
(:file "specials" :depends-on ("packages"))
|
||||||
(:file "util" :depends-on ("packages"))
|
(:file "util" :depends-on ("packages"))
|
||||||
(:file "errors" :depends-on ("util"))
|
(:file "errors" :depends-on ("util"))
|
||||||
|
#-:use-acl-regexp2-engine
|
||||||
(:file "lexer" :depends-on ("errors" "specials"))
|
(:file "lexer" :depends-on ("errors" "specials"))
|
||||||
|
#-:use-acl-regexp2-engine
|
||||||
(:file "parser" :depends-on ("lexer"))
|
(:file "parser" :depends-on ("lexer"))
|
||||||
|
#-:use-acl-regexp2-engine
|
||||||
(:file "regex-class" :depends-on ("parser"))
|
(:file "regex-class" :depends-on ("parser"))
|
||||||
|
#-:use-acl-regexp2-engine
|
||||||
(:file "convert" :depends-on ("regex-class"))
|
(:file "convert" :depends-on ("regex-class"))
|
||||||
|
#-:use-acl-regexp2-engine
|
||||||
(:file "optimize" :depends-on ("convert"))
|
(:file "optimize" :depends-on ("convert"))
|
||||||
|
#-:use-acl-regexp2-engine
|
||||||
(:file "closures" :depends-on ("optimize" "specials"))
|
(:file "closures" :depends-on ("optimize" "specials"))
|
||||||
|
#-:use-acl-regexp2-engine
|
||||||
(:file "repetition-closures" :depends-on ("closures"))
|
(:file "repetition-closures" :depends-on ("closures"))
|
||||||
|
#-:use-acl-regexp2-engine
|
||||||
(:file "scanner" :depends-on ("repetition-closures"))
|
(:file "scanner" :depends-on ("repetition-closures"))
|
||||||
(:file "api" :depends-on ("scanner"))))
|
(:file "api" :depends-on ("scanner"))))
|
||||||
|
|||||||
@ -1,10 +1,10 @@
|
|||||||
;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: CL-PPCRE; Base: 10 -*-
|
;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: CL-PPCRE; Base: 10 -*-
|
||||||
;;; $Header: /home/manuel/bknr-cvs/cvs/thirdparty/cl-ppcre/closures.lisp,v 1.1 2004/06/23 08:27:10 hans Exp $
|
;;; $Header: /usr/local/cvsrep/cl-ppcre/closures.lisp,v 1.29 2005/05/16 16:29:23 edi Exp $
|
||||||
|
|
||||||
;;; Here we create the closures which together build the final
|
;;; Here we create the closures which together build the final
|
||||||
;;; scanner.
|
;;; scanner.
|
||||||
|
|
||||||
;;; Copyright (c) 2002-2003, Dr. Edmund Weitz. All rights reserved.
|
;;; Copyright (c) 2002-2005, Dr. Edmund Weitz. All rights reserved.
|
||||||
|
|
||||||
;;; Redistribution and use in source and binary forms, with or without
|
;;; Redistribution and use in source and binary forms, with or without
|
||||||
;;; modification, are permitted provided that the following conditions
|
;;; modification, are permitted provided that the following conditions
|
||||||
@ -38,12 +38,7 @@
|
|||||||
"Like STRING=, i.e. compares the special string *STRING* from START1
|
"Like STRING=, i.e. compares the special string *STRING* from START1
|
||||||
to END1 with STRING2 from START2 to END2. Note that there's no
|
to END1 with STRING2 from START2 to END2. Note that there's no
|
||||||
boundary check - this has to be implemented by the caller."
|
boundary check - this has to be implemented by the caller."
|
||||||
(declare (optimize speed
|
(declare #.*standard-optimize-settings*)
|
||||||
(safety 0)
|
|
||||||
(space 0)
|
|
||||||
(debug 0)
|
|
||||||
(compilation-speed 0)
|
|
||||||
#+:lispworks (hcl:fixnum-safety 0)))
|
|
||||||
(declare (type fixnum start1 end1 start2 end2))
|
(declare (type fixnum start1 end1 start2 end2))
|
||||||
(loop for string1-idx of-type fixnum from start1 below end1
|
(loop for string1-idx of-type fixnum from start1 below end1
|
||||||
for string2-idx of-type fixnum from start2 below end2
|
for string2-idx of-type fixnum from start2 below end2
|
||||||
@ -54,12 +49,7 @@ boundary check - this has to be implemented by the caller."
|
|||||||
"Like STRING-EQUAL, i.e. compares the special string *STRING* from
|
"Like STRING-EQUAL, i.e. compares the special string *STRING* from
|
||||||
START1 to END1 with STRING2 from START2 to END2. Note that there's no
|
START1 to END1 with STRING2 from START2 to END2. Note that there's no
|
||||||
boundary check - this has to be implemented by the caller."
|
boundary check - this has to be implemented by the caller."
|
||||||
(declare (optimize speed
|
(declare #.*standard-optimize-settings*)
|
||||||
(safety 0)
|
|
||||||
(space 0)
|
|
||||||
(debug 0)
|
|
||||||
(compilation-speed 0)
|
|
||||||
#+:lispworks (hcl:fixnum-safety 0)))
|
|
||||||
(declare (type fixnum start1 end1 start2 end2))
|
(declare (type fixnum start1 end1 start2 end2))
|
||||||
(loop for string1-idx of-type fixnum from start1 below end1
|
(loop for string1-idx of-type fixnum from start1 below end1
|
||||||
for string2-idx of-type fixnum from start2 below end2
|
for string2-idx of-type fixnum from start2 below end2
|
||||||
@ -67,12 +57,7 @@ boundary check - this has to be implemented by the caller."
|
|||||||
(schar string2 string2-idx))))
|
(schar string2 string2-idx))))
|
||||||
|
|
||||||
(defgeneric create-matcher-aux (regex next-fn)
|
(defgeneric create-matcher-aux (regex next-fn)
|
||||||
(declare (optimize speed
|
(declare #.*standard-optimize-settings*)
|
||||||
(safety 0)
|
|
||||||
(space 0)
|
|
||||||
(debug 0)
|
|
||||||
(compilation-speed 0)
|
|
||||||
#+:lispworks (hcl:fixnum-safety 0)))
|
|
||||||
(:documentation "Creates a closure which takes one parameter,
|
(:documentation "Creates a closure which takes one parameter,
|
||||||
START-POS, and tests whether REGEX can match *STRING* at START-POS
|
START-POS, and tests whether REGEX can match *STRING* at START-POS
|
||||||
such that the call to NEXT-FN after the match would succeed."))
|
such that the call to NEXT-FN after the match would succeed."))
|
||||||
@ -399,14 +384,10 @@ against CHR-EXPR."
|
|||||||
|
|
||||||
(defun word-boundary-p (start-pos)
|
(defun word-boundary-p (start-pos)
|
||||||
"Check whether START-POS is a word-boundary within *STRING*."
|
"Check whether START-POS is a word-boundary within *STRING*."
|
||||||
(declare (optimize speed
|
(declare #.*standard-optimize-settings*)
|
||||||
(safety 0)
|
|
||||||
(space 0)
|
|
||||||
(debug 0)
|
|
||||||
(compilation-speed 0)
|
|
||||||
#+:lispworks (hcl:fixnum-safety 0)))
|
|
||||||
(declare (type fixnum start-pos))
|
(declare (type fixnum start-pos))
|
||||||
(let ((1-start-pos (1- start-pos)))
|
(let ((1-start-pos (1- start-pos))
|
||||||
|
(*start-pos* (or *real-start-pos* *start-pos*)))
|
||||||
;; either the character before START-POS is a word-constituent and
|
;; either the character before START-POS is a word-constituent and
|
||||||
;; the character at START-POS isn't...
|
;; the character at START-POS isn't...
|
||||||
(or (and (or (= start-pos *end-pos*)
|
(or (and (or (= start-pos *end-pos*)
|
||||||
@ -571,6 +552,13 @@ against CHR-EXPR."
|
|||||||
(and next-pos
|
(and next-pos
|
||||||
(funcall next-fn next-pos))))))
|
(funcall next-fn next-pos))))))
|
||||||
|
|
||||||
|
(defmethod create-matcher-aux ((filter filter) next-fn)
|
||||||
|
(let ((fn (fn filter)))
|
||||||
|
(lambda (start-pos)
|
||||||
|
(let ((next-pos (funcall fn start-pos)))
|
||||||
|
(and next-pos
|
||||||
|
(funcall next-fn next-pos))))))
|
||||||
|
|
||||||
(defmethod create-matcher-aux ((void void) next-fn)
|
(defmethod create-matcher-aux ((void void) next-fn)
|
||||||
;; optimize away VOIDs: don't create a closure, just return NEXT-FN
|
;; optimize away VOIDs: don't create a closure, just return NEXT-FN
|
||||||
next-fn)
|
next-fn)
|
||||||
|
|||||||
74
convert.lisp
74
convert.lisp
@ -1,11 +1,11 @@
|
|||||||
;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: CL-PPCRE; Base: 10 -*-
|
;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: CL-PPCRE; Base: 10 -*-
|
||||||
;;; $Header: /home/manuel/bknr-cvs/cvs/thirdparty/cl-ppcre/convert.lisp,v 1.1 2004/06/23 08:27:10 hans Exp $
|
;;; $Header: /usr/local/cvsrep/cl-ppcre/convert.lisp,v 1.22 2005/04/01 21:29:09 edi Exp $
|
||||||
|
|
||||||
;;; Here the parse tree is converted into its internal representation
|
;;; Here the parse tree is converted into its internal representation
|
||||||
;;; using REGEX objects. At the same time some optimizations are
|
;;; using REGEX objects. At the same time some optimizations are
|
||||||
;;; already applied.
|
;;; already applied.
|
||||||
|
|
||||||
;;; Copyright (c) 2002-2003, Dr. Edmund Weitz. All rights reserved.
|
;;; Copyright (c) 2002-2005, Dr. Edmund Weitz. All rights reserved.
|
||||||
|
|
||||||
;;; Redistribution and use in source and binary forms, with or without
|
;;; Redistribution and use in source and binary forms, with or without
|
||||||
;;; modification, are permitted provided that the following conditions
|
;;; modification, are permitted provided that the following conditions
|
||||||
@ -50,12 +50,7 @@
|
|||||||
`(third ,flags))
|
`(third ,flags))
|
||||||
|
|
||||||
(defun set-flag (token)
|
(defun set-flag (token)
|
||||||
(declare (optimize speed
|
(declare #.*standard-optimize-settings*)
|
||||||
(safety 0)
|
|
||||||
(space 0)
|
|
||||||
(debug 0)
|
|
||||||
(compilation-speed 0)
|
|
||||||
#+:lispworks (hcl:fixnum-safety 0)))
|
|
||||||
(declare (special flags))
|
(declare (special flags))
|
||||||
"Reads a flag token and sets or unsets the corresponding entry in
|
"Reads a flag token and sets or unsets the corresponding entry in
|
||||||
the special FLAGS list."
|
the special FLAGS list."
|
||||||
@ -76,12 +71,7 @@ the special FLAGS list."
|
|||||||
(signal-ppcre-syntax-error "Unknown flag token ~A" token))))
|
(signal-ppcre-syntax-error "Unknown flag token ~A" token))))
|
||||||
|
|
||||||
(defun add-range-to-hash (hash from to)
|
(defun add-range-to-hash (hash from to)
|
||||||
(declare (optimize speed
|
(declare #.*standard-optimize-settings*)
|
||||||
(safety 0)
|
|
||||||
(space 0)
|
|
||||||
(debug 0)
|
|
||||||
(compilation-speed 0)
|
|
||||||
#+:lispworks (hcl:fixnum-safety 0)))
|
|
||||||
(declare (special flags))
|
(declare (special flags))
|
||||||
"Adds all characters from character FROM to character TO (inclusive)
|
"Adds all characters from character FROM to character TO (inclusive)
|
||||||
to the char class hash HASH. Does the right thing with respect to
|
to the char class hash HASH. Does the right thing with respect to
|
||||||
@ -102,12 +92,7 @@ case-(in)sensitivity as specified by the special variable FLAGS."
|
|||||||
hash))
|
hash))
|
||||||
|
|
||||||
(defun convert-char-class-to-hash (list)
|
(defun convert-char-class-to-hash (list)
|
||||||
(declare (optimize speed
|
(declare #.*standard-optimize-settings*)
|
||||||
(safety 0)
|
|
||||||
(space 0)
|
|
||||||
(debug 0)
|
|
||||||
(compilation-speed 0)
|
|
||||||
#+:lispworks (hcl:fixnum-safety 0)))
|
|
||||||
"Combines all items in LIST into one char class hash and returns it.
|
"Combines all items in LIST into one char class hash and returns it.
|
||||||
Items can be single characters, character ranges like \(:RANGE #\\A
|
Items can be single characters, character ranges like \(:RANGE #\\A
|
||||||
#\\E), or special character classes like :DIGIT-CLASS. Does the right
|
#\\E), or special character classes like :DIGIT-CLASS. Does the right
|
||||||
@ -115,7 +100,7 @@ thing with respect to case-\(in)sensitivity as specified by the
|
|||||||
special variable FLAGS."
|
special variable FLAGS."
|
||||||
(loop with hash = (make-hash-table :size (ceiling (expt *regex-char-code-limit* (/ 1 4)))
|
(loop with hash = (make-hash-table :size (ceiling (expt *regex-char-code-limit* (/ 1 4)))
|
||||||
:rehash-size (float (expt *regex-char-code-limit* (/ 1 4)))
|
:rehash-size (float (expt *regex-char-code-limit* (/ 1 4)))
|
||||||
:rehash-threshold 1.0)
|
:rehash-threshold #-genera 1.0 #+genera 0.99)
|
||||||
for item in list
|
for item in list
|
||||||
if (characterp item)
|
if (characterp item)
|
||||||
;; treat a single character C like a range (:RANGE C C)
|
;; treat a single character C like a range (:RANGE C C)
|
||||||
@ -157,12 +142,7 @@ special variable FLAGS."
|
|||||||
min-len
|
min-len
|
||||||
length
|
length
|
||||||
reg-seen)
|
reg-seen)
|
||||||
(declare (optimize speed
|
(declare #.*standard-optimize-settings*)
|
||||||
(safety 0)
|
|
||||||
(space 0)
|
|
||||||
(debug 0)
|
|
||||||
(compilation-speed 0)
|
|
||||||
#+:lispworks (hcl:fixnum-safety 0)))
|
|
||||||
(declare (type fixnum minimum)
|
(declare (type fixnum minimum)
|
||||||
(type (or fixnum null) maximum))
|
(type (or fixnum null) maximum))
|
||||||
"Splits a REPETITION object into a constant and a varying part if
|
"Splits a REPETITION object into a constant and a varying part if
|
||||||
@ -230,13 +210,8 @@ the same name."
|
|||||||
;; case if the regex starts with ".*" which implicitely anchors the
|
;; case if the regex starts with ".*" which implicitely anchors the
|
||||||
;; regex at the start (perhaps modulo #\Newline).
|
;; regex at the start (perhaps modulo #\Newline).
|
||||||
|
|
||||||
(defmethod maybe-accumulate ((str str))
|
(defun maybe-accumulate (str)
|
||||||
(declare (optimize speed
|
(declare #.*standard-optimize-settings*)
|
||||||
(safety 0)
|
|
||||||
(space 0)
|
|
||||||
(debug 0)
|
|
||||||
(compilation-speed 0)
|
|
||||||
#+:lispworks (hcl:fixnum-safety 0)))
|
|
||||||
(declare (special accumulate-start-p starts-with))
|
(declare (special accumulate-start-p starts-with))
|
||||||
(declare (ftype (function (t) fixnum) len))
|
(declare (ftype (function (t) fixnum) len))
|
||||||
"Accumulate STR into the special variable STARTS-WITH if
|
"Accumulate STR into the special variable STARTS-WITH if
|
||||||
@ -291,12 +266,7 @@ NIL or a STR object of the same case mode. Always returns NIL."
|
|||||||
nil)
|
nil)
|
||||||
|
|
||||||
(defun convert-aux (parse-tree)
|
(defun convert-aux (parse-tree)
|
||||||
(declare (optimize speed
|
(declare #.*standard-optimize-settings*)
|
||||||
(safety 0)
|
|
||||||
(space 0)
|
|
||||||
(debug 0)
|
|
||||||
(compilation-speed 0)
|
|
||||||
#+:lispworks (hcl:fixnum-safety 0)))
|
|
||||||
(declare (special flags reg-num accumulate-start-p starts-with max-back-ref))
|
(declare (special flags reg-num accumulate-start-p starts-with max-back-ref))
|
||||||
"Converts the parse tree PARSE-TREE into a REGEX object and returns it.
|
"Converts the parse tree PARSE-TREE into a REGEX object and returns it.
|
||||||
|
|
||||||
@ -538,8 +508,17 @@ Will also
|
|||||||
(make-instance 'register
|
(make-instance 'register
|
||||||
:regex (convert-aux (second parse-tree))
|
:regex (convert-aux (second parse-tree))
|
||||||
:num stored-reg-num)))
|
:num stored-reg-num)))
|
||||||
|
;; (:FILTER <function> &optional <length>)
|
||||||
|
((:filter)
|
||||||
|
;; stop accumulating into STARTS-WITH
|
||||||
|
(setq accumulate-start-p nil)
|
||||||
|
(make-instance 'filter
|
||||||
|
:fn (second parse-tree)
|
||||||
|
:len (third parse-tree)))
|
||||||
;; (:STANDALONE <regex>)
|
;; (:STANDALONE <regex>)
|
||||||
((:standalone)
|
((:standalone)
|
||||||
|
;; stop accumulating into STARTS-WITH
|
||||||
|
(setq accumulate-start-p nil)
|
||||||
;; keep the effect of modifiers local to the enclosed
|
;; keep the effect of modifiers local to the enclosed
|
||||||
;; regex
|
;; regex
|
||||||
(let ((flags (copy-list flags)))
|
(let ((flags (copy-list flags)))
|
||||||
@ -739,16 +718,15 @@ Will also
|
|||||||
(set-flag parse-tree)
|
(set-flag parse-tree)
|
||||||
(make-instance 'void))
|
(make-instance 'void))
|
||||||
(otherwise
|
(otherwise
|
||||||
(signal-ppcre-syntax-error "Unknown token ~A in parse-tree"
|
(let ((translation (and (symbolp parse-tree)
|
||||||
parse-tree))))))
|
(parse-tree-synonym parse-tree))))
|
||||||
|
(if translation
|
||||||
|
(convert-aux (copy-tree translation))
|
||||||
|
(signal-ppcre-syntax-error "Unknown token ~A in parse-tree"
|
||||||
|
parse-tree))))))))
|
||||||
|
|
||||||
(defun convert (parse-tree)
|
(defun convert (parse-tree)
|
||||||
(declare (optimize speed
|
(declare #.*standard-optimize-settings*)
|
||||||
(safety 0)
|
|
||||||
(space 0)
|
|
||||||
(debug 0)
|
|
||||||
(compilation-speed 0)
|
|
||||||
#+:lispworks (hcl:fixnum-safety 0)))
|
|
||||||
"Converts the parse tree PARSE-TREE into an equivalent REGEX object
|
"Converts the parse tree PARSE-TREE into an equivalent REGEX object
|
||||||
and returns three values: the REGEX object, the number of registers
|
and returns three values: the REGEX object, the number of registers
|
||||||
seen and an object the regex starts with which is either a STR object
|
seen and an object the regex starts with which is either a STR object
|
||||||
|
|||||||
620
doc/index.html
620
doc/index.html
@ -6,14 +6,12 @@
|
|||||||
<title>CL-PPCRE - portable Perl-compatible regular expressions for Common Lisp</title>
|
<title>CL-PPCRE - portable Perl-compatible regular expressions for Common Lisp</title>
|
||||||
<style type="text/css">
|
<style type="text/css">
|
||||||
pre { padding:5px; background-color:#e0e0e0 }
|
pre { padding:5px; background-color:#e0e0e0 }
|
||||||
a.none { text-decoration: none; color:black }
|
|
||||||
a.none:visited { text-decoration: none; color:black }
|
|
||||||
a.none:active { text-decoration: none; color:black }
|
|
||||||
a.none:hover { text-decoration: none; color:black }
|
|
||||||
a { text-decoration: none; }
|
a { text-decoration: none; }
|
||||||
a:visited { text-decoration: none; }
|
a.none:hover { border:1px solid white; }
|
||||||
a:active { text-decoration: underline; }
|
a { border:1px solid white; }
|
||||||
a:hover { text-decoration: underline; }
|
a:hover { border: 1px solid black; }
|
||||||
|
a.noborder { border:0px }
|
||||||
|
a.noborder:hover { border:0px }
|
||||||
</style>
|
</style>
|
||||||
</head>
|
</head>
|
||||||
|
|
||||||
@ -47,7 +45,7 @@ to CLISP's own regex implementation which is also written in
|
|||||||
C.
|
C.
|
||||||
|
|
||||||
<li>It is <b>portable</b>, i.e. the code aims to be strictly <a
|
<li>It is <b>portable</b>, i.e. the code aims to be strictly <a
|
||||||
href="http://www.lispworks.com/reference/HyperSpec/Front/index.htm">ANSI-compliant</a>. If
|
href="http://www.lispworks.com/documentation/HyperSpec/Front/index.htm">ANSI-compliant</a>. If
|
||||||
you encounter any deviations this is an error and should be
|
you encounter any deviations this is an error and should be
|
||||||
reported to <a
|
reported to <a
|
||||||
href="#mail">the mailing list</a>. CL-PPCRE has been
|
href="#mail">the mailing list</a>. CL-PPCRE has been
|
||||||
@ -55,16 +53,18 @@ successfully tested with the following Common Lisp implementations:
|
|||||||
|
|
||||||
<ul>
|
<ul>
|
||||||
|
|
||||||
<li><a href="http://www.franz.com/products/allegrocl/">Allegro Common Lisp</a> (6.2 trial on Gentoo Linux 1.1a)
|
<li><a href="http://www.franz.com/products/allegrocl/">Allegro Common Lisp</a>
|
||||||
<li><a href="http://clisp.sourceforge.net/">CLISP</a> (2.30 on Gentoo Linux 1.1a and 2.29 on Windows XP pro)
|
<li><a href="http://armedbear.org/abcl.html">Armed Bear Common Lisp</a>
|
||||||
<li><a href="http://www.cons.org/cmucl/">CMUCL</a> (18e on Gentoo Linux 1.1a)
|
<li><a href="http://clisp.sourceforge.net/">CLISP</a>
|
||||||
<li><a href="http://www.cormanlisp.com/">Corman Lisp</a> (2.5 on Windows XP pro)
|
<li><a href="http://www.cons.org/cmucl/">CMUCL</a>
|
||||||
<li><a href="http://ecls.sourceforge.net/">ECL</a> (0.9c on Gentoo Linux 1.1a)
|
<li><a href="http://www.cormanlisp.com/">Corman Lisp</a>
|
||||||
<li><a href="http://www.digitool.com/">Macintosh Common Lisp</a> (4.3 demo on MacOS 9.1 - only tested with CL-PPCRE 0.1.x)
|
<li><a href="http://ecls.sourceforge.net/">ECL</a>
|
||||||
<li><a href="http://openmcl.clozure.com/">OpenMCL</a> (0.13.4 on MacOS X 10.2.2 - only tested with CL-PPCRE 0.1.x)
|
<li><a href="http://www.symbolics.com/">Genera</a>
|
||||||
<li><a href="http://sbcl.sourceforge.net/">SBCL</a> (0.8.4 on Gentoo Linux 1.1a)
|
<li><a href="http://www.digitool.com/">Macintosh Common Lisp</a>
|
||||||
<li><a href="http://www.scieneer.com/scl/">Scieneer Common Lisp</a> (1.1.1 evaluation on Gentoo Linux 1.1a - only tested with CL-PPCRE 0.1.x)
|
<li><a href="http://openmcl.clozure.com/">OpenMCL</a>
|
||||||
<li><a href="http://www.lispworks.com/">Xanalys LispWorks</a> (4.2.7 professional on Gentoo Linux 1.1a and 4.3.6 professional on Windows XP pro)
|
<li><a href="http://sbcl.sourceforge.net/">SBCL</a>
|
||||||
|
<li><a href="http://www.scieneer.com/scl/">Scieneer Common Lisp</a>
|
||||||
|
<li><a href="http://www.lispworks.com/">LispWorks</a>
|
||||||
|
|
||||||
</ul>
|
</ul>
|
||||||
|
|
||||||
@ -116,14 +116,26 @@ license</b></a> so you can basically do with it whatever you want.
|
|||||||
|
|
||||||
</ul>
|
</ul>
|
||||||
|
|
||||||
|
CL-PPCRE has been used successfully in various applications like <a
|
||||||
|
href="http://nostoc.stanford.edu/Docs/">BioLingua</a>, <a
|
||||||
|
href="http://www.hpc.unm.edu/~download/LoGS/">LoGS</a>, <a href="http://cafespot.net/">CafeSpot</a>, <a href="http://www.eboy.com/">Eboy</a>, or <a
|
||||||
|
href="http://weitz.de/regex-coach/">The Regex Coach</a>.
|
||||||
|
|
||||||
|
<p>
|
||||||
|
<font color=red>Download shortcut:</font> <a href="http://weitz.de/files/cl-ppcre.tar.gz">http://weitz.de/files/cl-ppcre.tar.gz</a>.
|
||||||
|
|
||||||
</blockquote>
|
</blockquote>
|
||||||
|
|
||||||
<br> <br><h3><a class=none name="contents">Contents</a></h3>
|
<br> <br><h3><a class=none name="contents">Contents</a></h3>
|
||||||
<ol>
|
<ol>
|
||||||
<li><a href="#howto">How to use CL-PPCRE</a>
|
<li><a href="#install">Download and installation</a>
|
||||||
|
<li><a href="#mail">Support and mailing lists</a>
|
||||||
|
<li><a href="#dict">The CL-PPCRE dictionary</a>
|
||||||
<ol>
|
<ol>
|
||||||
<li><a href="#create-scanner1"><code>create-scanner</code></a> (for Perl regex strings)
|
<li><a href="#create-scanner"><code>create-scanner</code></a> (for Perl regex strings)
|
||||||
<li><a href="#create-scanner2"><code>create-scanner</code></a> (for parse trees)
|
<li><a href="#create-scanner2"><code>create-scanner</code></a> (for parse trees)
|
||||||
|
<li><a href="#parse-tree-synonym"><code>parse-tree-synonym</code></a>
|
||||||
|
<li><a href="#define-parse-tree-synonym"><code>define-parse-tree-synonym</code></a>
|
||||||
<li><a href="#scan"><code>scan</code></a>
|
<li><a href="#scan"><code>scan</code></a>
|
||||||
<li><a href="#scan-to-strings"><code>scan-to-strings</code></a>
|
<li><a href="#scan-to-strings"><code>scan-to-strings</code></a>
|
||||||
<li><a href="#register-groups-bind"><code>register-groups-bind</code></a>
|
<li><a href="#register-groups-bind"><code>register-groups-bind</code></a>
|
||||||
@ -148,8 +160,7 @@ license</b></a> so you can basically do with it whatever you want.
|
|||||||
<li><a href="#ppcre-syntax-error-string"><code>ppcre-syntax-error-string</code></a>
|
<li><a href="#ppcre-syntax-error-string"><code>ppcre-syntax-error-string</code></a>
|
||||||
<li><a href="#ppcre-syntax-error-pos"><code>ppcre-syntax-error-pos</code></a>
|
<li><a href="#ppcre-syntax-error-pos"><code>ppcre-syntax-error-pos</code></a>
|
||||||
</ol>
|
</ol>
|
||||||
<li><a href="#install">Download and installation</a>
|
<li><a href="#filters">Filters</a>
|
||||||
<li><a href="#mail">Support and mailing lists</a>
|
|
||||||
<li><a href="#test">Testing CL-PPCRE</a>
|
<li><a href="#test">Testing CL-PPCRE</a>
|
||||||
<li><a href="#perl">Compatibility with Perl</a>
|
<li><a href="#perl">Compatibility with Perl</a>
|
||||||
<ol>
|
<ol>
|
||||||
@ -173,19 +184,84 @@ license</b></a> so you can basically do with it whatever you want.
|
|||||||
<li><a href="#backslash">Backslashes may confuse you...</a>
|
<li><a href="#backslash">Backslashes may confuse you...</a>
|
||||||
</ol>
|
</ol>
|
||||||
<li><a href="#remarks">Remarks</a>
|
<li><a href="#remarks">Remarks</a>
|
||||||
|
<li><a href="#allegro">AllegroCL compatibility mode</a>
|
||||||
<li><a href="#ack">Acknowledgements</a>
|
<li><a href="#ack">Acknowledgements</a>
|
||||||
</ol>
|
</ol>
|
||||||
|
|
||||||
<br> <br><h3><a class=none name="howto">How to use CL-PPCRE</a></h3>
|
<br> <br><h3><a name="install" class=none>Download and installation</a></h3>
|
||||||
|
|
||||||
|
CL-PPCRE together with this documentation can be downloaded from <a
|
||||||
|
href="http://weitz.de/files/cl-ppcre.tar.gz">http://weitz.de/files/cl-ppcre.tar.gz</a>. The
|
||||||
|
current version is 1.2.12. A <a
|
||||||
|
href="CHANGELOG">CHANGELOG</a> is available.
|
||||||
|
<p>
|
||||||
|
If you're on <a href="http://www.debian.org/">Debian</a> you should
|
||||||
|
probably use the <a
|
||||||
|
href="http://packages.debian.org/cgi-bin/search_packages.pl?keywords=cl-ppcre&searchon=names&version=all&release=all">cl-ppcre
|
||||||
|
Debian package</a> which is available thanks to <a href="http://pvaneynd.mailworks.org/">Peter van Eynde</a> and <a href="http://b9.com/">Kevin
|
||||||
|
Rosenberg</a>. There's also a port
|
||||||
|
for <a href="http://www.cliki.net/gentoo">Gentoo Linux</a> thanks to Matthew Kennedy and a <a href="http://www.freebsd.org/cgi/url.cgi?ports/textproc/cl-ppcre/pkg-descr">FreeBSD port</a> thanks to Henrik Motakef.
|
||||||
|
Installation via <a
|
||||||
|
href="http://www.cliki.net/asdf-install">asdf-install</a> should as well
|
||||||
|
be possible.
|
||||||
|
<p>
|
||||||
|
CL-PPCRE comes with simple system definitions for <a
|
||||||
|
href="http://www.cliki.net/mk-defsystem">MK:DEFSYSTEM</a> and <a
|
||||||
|
href="http://www.cliki.net/asdf">asdf</a> so you can either adapt it
|
||||||
|
to your needs or just unpack the archive and from within the CL-PPCRE
|
||||||
|
directory start your Lisp image and evaluate the form
|
||||||
|
<code>(mk:compile-system "cl-ppcre")</code> (or the
|
||||||
|
equivalent one for asdf) which should compile and load the whole
|
||||||
|
system.
|
||||||
|
<p>
|
||||||
|
If for some reason you don't want to use MK:DEFSYSTEM or asdf you
|
||||||
|
can just <code>LOAD</code> the file <code>load.lisp</code> or you
|
||||||
|
can also get away with something like this:
|
||||||
|
|
||||||
|
<pre>
|
||||||
|
(loop for name in '("packages" "specials" "util" "errors" "lexer"
|
||||||
|
"parser" "regex-class" "convert" "optimize"
|
||||||
|
"closures" "repetition-closures" "scanner" "api")
|
||||||
|
do (compile-file (make-pathname :name name
|
||||||
|
:type "lisp"))
|
||||||
|
(load name))
|
||||||
|
</pre>
|
||||||
|
|
||||||
|
Note that on CL implementations which use the Python compiler
|
||||||
|
(i.e. CMUCL, SBCL, SCL) you can concatenate the compiled object files
|
||||||
|
to create one single object file which you can load afterwards:
|
||||||
|
|
||||||
|
<pre>
|
||||||
|
cat {packages,specials,util,errors,lexer,parser,regex-class,convert,optimize,closures,repetition-closures,scanner,api}.x86f > cl-ppcre.x86f
|
||||||
|
</pre>
|
||||||
|
|
||||||
|
(Replace ".<code>x86f</code>" with the correct suffix for
|
||||||
|
your platform.)
|
||||||
|
<p>
|
||||||
|
Note that there is <em>no</em> public CVS repository for CL-PPCRE - the repository at <a href="http://common-lisp.net/">common-lisp.net</a> is out of date and not in sync with the (current) version distributed from <a href="http://weitz.de/">weitz.de</a>.
|
||||||
|
|
||||||
|
|
||||||
|
<br> <br><h3><a name="mail" class=none>Support and mailing lists</a></h3>
|
||||||
|
|
||||||
|
For questions, bug reports, feature requests, improvements, or patches
|
||||||
|
please use the <a
|
||||||
|
href="http://common-lisp.net/mailman/listinfo/cl-ppcre-devel">cl-ppcre-devel
|
||||||
|
mailing list</a>. If you want to be notified about future releases
|
||||||
|
subscribe to the <a
|
||||||
|
href="http://common-lisp.net/mailman/listinfo/cl-ppcre-announce">cl-ppcre-announce
|
||||||
|
mailing list</a>. These mailing lists were made available thanks to
|
||||||
|
the services of <a href="http://common-lisp.net/">common-lisp.net</a>.
|
||||||
|
|
||||||
|
<br> <br><h3><a class=none name="dict">The CL-PPCRE dictionary</a></h3>
|
||||||
|
|
||||||
CL-PPCRE exports the following symbols:
|
CL-PPCRE exports the following symbols:
|
||||||
|
|
||||||
<p><br>[Function]
|
<p><br>[Method]
|
||||||
<br><a class=none name="create-scanner1"><b>create-scanner</b> <i>string <tt>&key</tt> case-insensitive-mode multi-line-mode single-line-mode extended-mode destructive</i> => <i>scanner</i></a>
|
<br><a class=none name="create-scanner"><b>create-scanner</b> <i>(string string)<tt>&key</tt> case-insensitive-mode multi-line-mode single-line-mode extended-mode destructive</i> => <i>scanner</i></a>
|
||||||
|
|
||||||
<blockquote><br> Accepts a string which is a regular expression in
|
<blockquote><br> Accepts a string which is a regular expression in
|
||||||
Perl syntax and returns a closure which will scan strings for this
|
Perl syntax and returns a closure which will scan strings for this
|
||||||
regular expression. The mode keyboard arguments are equivalent to the
|
regular expression. The mode keyword arguments are equivalent to the
|
||||||
<code>"imsx"</code> modifiers in Perl. The
|
<code>"imsx"</code> modifiers in Perl. The
|
||||||
<code>destructive</code> keyword will be ignored.
|
<code>destructive</code> keyword will be ignored.
|
||||||
<p>
|
<p>
|
||||||
@ -236,12 +312,17 @@ The keyword arguments are just for your
|
|||||||
convenience. You can always use embedded modifiers like
|
convenience. You can always use embedded modifiers like
|
||||||
<code>"(?i-s)"</code> instead.</blockquote>
|
<code>"(?i-s)"</code> instead.</blockquote>
|
||||||
|
|
||||||
|
<p><br>[Method]
|
||||||
|
<br><a class=none name="create-scanner"><b>create-scanner</b> <i>(function function)<tt>&key</tt> case-insensitive-mode multi-line-mode single-line-mode extended-mode destructive</i> => <i>scanner</i></a>
|
||||||
|
<blockquote><br>
|
||||||
|
In this case <code><i>function</i></code> should be a scanner returned by another invocation of <code>CREATE-SCANNER</code>. It will be returned as is.
|
||||||
|
</blockquote>
|
||||||
|
|
||||||
<p><br>[Function]
|
<p><br>[Method]
|
||||||
<br><a class=none name="create-scanner2"><b>create-scanner</b> <i>parse-tree <tt>&key</tt> case-insensitive-mode multi-line-mode single-line-mode extended-mode destructive</i> => <i>scanner</i></a>
|
<br><a class=none name="create-scanner2"><b>create-scanner</b> <i>(parse-tree t)<tt>&key</tt> case-insensitive-mode multi-line-mode single-line-mode extended-mode destructive</i> => <i>scanner</i></a>
|
||||||
<blockquote><br>
|
<blockquote><br>
|
||||||
This is similar to <a
|
This is similar to <a
|
||||||
href="#create-scanner1"><code>CREATE-SCANNER</code></a> above but
|
href="#create-scanner"><code>CREATE-SCANNER</code></a> for regex strings above but
|
||||||
accepts a <em>parse tree</em> as its first argument. A parse tree is an S-expression
|
accepts a <em>parse tree</em> as its first argument. A parse tree is an S-expression
|
||||||
conforming to the following syntax:
|
conforming to the following syntax:
|
||||||
|
|
||||||
@ -290,6 +371,11 @@ and <code>:NOT-SINGLE-LINE-MODE-P</code> are equivalent to Perl's
|
|||||||
kept local to the innermost enclosing grouping or clustering
|
kept local to the innermost enclosing grouping or clustering
|
||||||
construct.
|
construct.
|
||||||
|
|
||||||
|
</li><li>All other symbols will signal an error of type <a
|
||||||
|
href="#ppcre-syntax-error"><code>PPCRE-SYNTAX-ERROR</code></a>
|
||||||
|
<em>unless</em> they are defined to be <a
|
||||||
|
href="#parse-tree-synonym"><em>parse tree synonyms</em></a>.
|
||||||
|
|
||||||
<li><code>(:FLAGS {<modifier>}*)</code> where
|
<li><code>(:FLAGS {<modifier>}*)</code> where
|
||||||
<code><modifier></code> is one of the modifier symbols from
|
<code><modifier></code> is one of the modifier symbols from
|
||||||
above is used to group modifier symbols. The modifiers are applied
|
above is used to group modifier symbols. The modifiers are applied
|
||||||
@ -357,6 +443,14 @@ beginning with 1.
|
|||||||
<code><<i>number</i>></code> is a positive integer is a back-reference to a
|
<code><<i>number</i>></code> is a positive integer is a back-reference to a
|
||||||
register group.
|
register group.
|
||||||
|
|
||||||
|
<li><a class=none name="filterdef"><code>(:FILTER <<i>function</i>> <tt>&optional</tt>
|
||||||
|
<<i>length</i>>)</code></a> where
|
||||||
|
<code><<i>function</i>></code> is a <a
|
||||||
|
href="http://www.lispworks.com/documentation/HyperSpec/Body/26_glo_f.htm#function_designator">function
|
||||||
|
designator</a> and <code><<i>length</i>></code> is a
|
||||||
|
non-negative integer or <code>NIL</code> is a user-defined <a
|
||||||
|
href="#filters">filter</a>.
|
||||||
|
|
||||||
<li><code>(:CHAR-CLASS|:INVERTED-CHAR-CLASS
|
<li><code>(:CHAR-CLASS|:INVERTED-CHAR-CLASS
|
||||||
{<<i>item</i>>}*)</code> where <code><<i>item</i>></code>
|
{<<i>item</i>>}*)</code> where <code><<i>item</i>></code>
|
||||||
is either a character, a <em>character range</em>, or a symbol for a
|
is either a character, a <em>character range</em>, or a symbol for a
|
||||||
@ -379,10 +473,10 @@ Perl regex strings when given to <code>CREATE-SCANNER</code>. To
|
|||||||
circumvent this you can always use the equivalent parse tree <code>(:GROUP
|
circumvent this you can always use the equivalent parse tree <code>(:GROUP
|
||||||
<<i>string</i>>)</code> instead.
|
<<i>string</i>>)</code> instead.
|
||||||
<p>
|
<p>
|
||||||
Note that currently <code>CREATE-SCANNER</code> doesn't always check
|
Note that <code>CREATE-SCANNER</code> doesn't always check
|
||||||
for the well-formedness of its first argument, i.e. you are expected
|
for the well-formedness of its first argument, i.e. you are expected
|
||||||
to provide <em>correct</em> parse trees. This will most likely change in
|
to provide <em>correct</em> parse trees.
|
||||||
future releases.
|
|
||||||
<p>
|
<p>
|
||||||
The usage of the keyword argument <code>extended-mode</code> obviously
|
The usage of the keyword argument <code>extended-mode</code> obviously
|
||||||
doesn't make sense if <code>CREATE-SCANNER</code> is applied to parse
|
doesn't make sense if <code>CREATE-SCANNER</code> is applied to parse
|
||||||
@ -418,6 +512,72 @@ regex strings to parse trees. Here are some examples:
|
|||||||
(:SEQUENCE (:POSITIVE-LOOKAHEAD #\a) #\b)
|
(:SEQUENCE (:POSITIVE-LOOKAHEAD #\a) #\b)
|
||||||
</pre></blockquote>
|
</pre></blockquote>
|
||||||
|
|
||||||
|
<p><br>[Accessor]
|
||||||
|
<br><a class="none" name="parse-tree-synonym"><b>parse-tree-synonym</b> <i>symbol</i> => <i>parse-tree</i>
|
||||||
|
<br><tt>(setf (</tt><b>parse-tree-synonym</b> <i>symbol</i>) <i>new-parse-tree</i><tt>)</tt></a>
|
||||||
|
|
||||||
|
</p><blockquote><br>
|
||||||
|
Any symbol (unless it's a keyword with a special meaning in parse
|
||||||
|
trees) can be made a "synonym", i.e. an abbreviation, for another parse
|
||||||
|
tree by this accessor. <code>PARSE-TREE-SYNONYM</code> returns <code>NIL</code> if <code><i>symbol</i></code> isn't a synonym yet.
|
||||||
|
<p>
|
||||||
|
Here's an example:
|
||||||
|
|
||||||
|
</p><pre>* (cl-ppcre::parse-string "a*b+")
|
||||||
|
(:SEQUENCE (:GREEDY-REPETITION 0 NIL #\a) (:GREEDY-REPETITION 1 NIL #\b))
|
||||||
|
|
||||||
|
* (defun my-repetition (char min)
|
||||||
|
`(:greedy-repetition ,min nil ,char))
|
||||||
|
MY-REPETITION
|
||||||
|
|
||||||
|
* (setf (parse-tree-synonym 'a*) (my-repetition #\a 0))
|
||||||
|
(:GREEDY-REPETITION 0 NIL #\a)
|
||||||
|
|
||||||
|
* (setf (parse-tree-synonym 'b+) (my-repetition #\b 1))
|
||||||
|
(:GREEDY-REPETITION 1 NIL #\b)
|
||||||
|
|
||||||
|
* (let ((scanner (create-scanner '(:sequence a* b+))))
|
||||||
|
(dolist (string '("ab" "b" "aab" "a" "x"))
|
||||||
|
(print (scan scanner string)))
|
||||||
|
(values))
|
||||||
|
0
|
||||||
|
0
|
||||||
|
0
|
||||||
|
NIL
|
||||||
|
NIL
|
||||||
|
|
||||||
|
* (parse-tree-synonym 'a*)
|
||||||
|
(:GREEDY-REPETITION 0 NIL #\a)
|
||||||
|
|
||||||
|
* (parse-tree-synonym 'a+)
|
||||||
|
NIL
|
||||||
|
</pre></blockquote>
|
||||||
|
|
||||||
|
<p><br>[Macro]
|
||||||
|
<br><a class="none" name="define-parse-tree-synonym"><b>define-parse-tree-synonym</b> <i>name parse-tree</i> => <i>parse-tree</i></a>
|
||||||
|
|
||||||
|
</p><blockquote><br>
|
||||||
|
This is a convenience macro for parse tree synonyms defined as
|
||||||
|
|
||||||
|
<pre>(defmacro define-parse-tree-synonym (name parse-tree)
|
||||||
|
`(eval-when (:compile-toplevel :load-toplevel :execute)
|
||||||
|
(setf (parse-tree-synonym ',name) ',parse-tree)))
|
||||||
|
</pre>
|
||||||
|
|
||||||
|
so you can write code like this:
|
||||||
|
|
||||||
|
<pre>
|
||||||
|
(define-parse-tree-synonym a-z
|
||||||
|
(:char-class (:range #\a #\z) (:range #\a #\z)))
|
||||||
|
|
||||||
|
(define-parse-tree-synonym a-z*
|
||||||
|
(:greedy-repetition 0 nil a-z))
|
||||||
|
|
||||||
|
(defun ascii-char-tester (string)
|
||||||
|
(scan '(:sequence :start-anchor a-z* :end-anchor)
|
||||||
|
string))
|
||||||
|
</pre></blockquote>
|
||||||
|
|
||||||
<p><br>
|
<p><br>
|
||||||
<b>For the rest of this section </b><code><i>regex</i></code><b> can
|
<b>For the rest of this section </b><code><i>regex</i></code><b> can
|
||||||
always be a string (which is interpreted as a Perl regular
|
always be a string (which is interpreted as a Perl regular
|
||||||
@ -430,7 +590,7 @@ href="#scan"><code>SCAN</code></a><b>.</b>
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
<p><br>[Function]
|
<p><br>[Standard Generic Function]
|
||||||
<br><a class=none name="scan"><b>scan</b> <i>regex target-string <tt>&key</tt> start end</i> => <i>match-start, match-end, reg-starts, reg-ends</i></a>
|
<br><a class=none name="scan"><b>scan</b> <i>regex target-string <tt>&key</tt> start end</i> => <i>match-start, match-end, reg-starts, reg-ends</i></a>
|
||||||
|
|
||||||
<blockquote><br>
|
<blockquote><br>
|
||||||
@ -525,7 +685,15 @@ Examples:
|
|||||||
Evaluates <code><i>statement*</i></code> with the variables in <code><i>var-list</i></code> bound to the
|
Evaluates <code><i>statement*</i></code> with the variables in <code><i>var-list</i></code> bound to the
|
||||||
corresponding register groups after <code><i>target-string</i></code> has been matched
|
corresponding register groups after <code><i>target-string</i></code> has been matched
|
||||||
against <code><i>regex</i></code>, i.e. each variable is either
|
against <code><i>regex</i></code>, i.e. each variable is either
|
||||||
bound to a string or to <code>NIL</code>. If there is no match, the <code><i>statement*</i></code> forms are <em>not</em>
|
bound to a string or to <code>NIL</code>.
|
||||||
|
As a shortcut, the elements of <code><i>var-list</i></code> can also be lists of the form <code>(FN VAR)</code> where <code>VAR</code> is the variable symbol
|
||||||
|
and <code>FN</code> is a <a
|
||||||
|
href="http://www.lispworks.com/documentation/HyperSpec/Body/26_glo_f.htm#function_designator">function
|
||||||
|
designator</a> (which is evaluated) denoting a function which is to be applied to the string before the result is bound to <code>VAR</code>.
|
||||||
|
To make this even more convenient the form <code>(FN VAR1 ...VARn)</code> can be used as an abbreviation for
|
||||||
|
<code>(FN VAR1) ... (FN VARn).
|
||||||
|
<p>
|
||||||
|
If there is no match, the <code><i>statement*</i></code> forms are <em>not</em>
|
||||||
executed. For each element of
|
executed. For each element of
|
||||||
<code><i>var-list</i></code> which is <code>NIL</code> there's no binding to the corresponding register
|
<code><i>var-list</i></code> which is <code>NIL</code> there's no binding to the corresponding register
|
||||||
group. The number of variables in <code><i>var-list</i></code> must not be greater than
|
group. The number of variables in <code><i>var-list</i></code> must not be greater than
|
||||||
@ -537,15 +705,22 @@ share structure with <code><i>target-string</i></code>.
|
|||||||
("((a)|(b)|(c))+" "abababc" :sharedp t)
|
("((a)|(b)|(c))+" "abababc" :sharedp t)
|
||||||
(list first second third fourth))
|
(list first second third fourth))
|
||||||
("c" "a" "b" "c")
|
("c" "a" "b" "c")
|
||||||
|
|
||||||
* (register-groups-bind (nil second third fourth)
|
* (register-groups-bind (nil second third fourth)
|
||||||
<font color=orange>;; note that we don't bind the first and fifth register group</font>
|
<font color=orange>;; note that we don't bind the first and fifth register group</font>
|
||||||
("((a)|(b)|(c))()+" "abababc" :start 6)
|
("((a)|(b)|(c))()+" "abababc" :start 6)
|
||||||
(list second third fourth))
|
(list second third fourth))
|
||||||
(NIL NIL "c")
|
(NIL NIL "c")
|
||||||
|
|
||||||
* (register-groups-bind (first)
|
* (register-groups-bind (first)
|
||||||
("(a|b)+" "accc" :start 1)
|
("(a|b)+" "accc" :start 1)
|
||||||
(format t "This will not be printed: ~A" first))
|
(format t "This will not be printed: ~A" first))
|
||||||
NIL
|
NIL
|
||||||
|
|
||||||
|
* (register-groups-bind (fname lname (#'parse-integer date month year))
|
||||||
|
("(\\w+)\\s+(\\w+)\\s+(\\d{1,2})\\.(\\d{1,2})\\.(\\d{4})" "Frank Zappa 21.12.1940")
|
||||||
|
(list fname lname (encode-universal-time 0 0 0 date month year)))
|
||||||
|
("Frank" "Zappa" 1292882400)
|
||||||
</pre>
|
</pre>
|
||||||
</blockquote>
|
</blockquote>
|
||||||
|
|
||||||
@ -639,7 +814,7 @@ CROSSFOOT
|
|||||||
6
|
6
|
||||||
</pre>
|
</pre>
|
||||||
|
|
||||||
Of course, in real life you would do this with <a href="#do-matches"><code>DO-MATCHES</code></a> and use the <code><i>start</i></code> and <code><i>end</i></code> keyword parameters of <a href="http://www.lispworks.com/reference/HyperSpec/Body/f_parse_.htm"><code>PARSE-INTEGER</code></a>.</blockquote>
|
Of course, in real life you would do this with <a href="#do-matches"><code>DO-MATCHES</code></a> and use the <code><i>start</i></code> and <code><i>end</i></code> keyword parameters of <a href="http://www.lispworks.com/documentation/HyperSpec/Body/f_parse_.htm"><code>PARSE-INTEGER</code></a>.</blockquote>
|
||||||
|
|
||||||
<p><br>[Macro]
|
<p><br>[Macro]
|
||||||
<br><a class=none name="do-register-groups"><b>do-register-groups</b> <i>var-list (regex target-string <tt>&optional</tt> result-form <tt>&key</tt> start end sharedp) declaration* statement*</i> => <i>result*</i></a>
|
<br><a class=none name="do-register-groups"><b>do-register-groups</b> <i>var-list (regex target-string <tt>&optional</tt> result-form <tt>&key</tt> start end sharedp) declaration* statement*</i> => <i>result*</i></a>
|
||||||
@ -648,7 +823,7 @@ Of course, in real life you would do this with <a href="#do-matches"><code>DO-MA
|
|||||||
Iterates over <code><i>target-string</i></code> and tries to match <code><i>regex</i></code> as often as
|
Iterates over <code><i>target-string</i></code> and tries to match <code><i>regex</i></code> as often as
|
||||||
possible evaluating <code><i>statement*</i></code> with the variables in <code><i>var-list</i></code> bound to the
|
possible evaluating <code><i>statement*</i></code> with the variables in <code><i>var-list</i></code> bound to the
|
||||||
corresponding register groups for each match in turn, i.e. each
|
corresponding register groups for each match in turn, i.e. each
|
||||||
variable is either bound to a string or to <code>NIL</code>. The number of
|
variable is either bound to a string or to <code>NIL</code>. You can use the same shortcuts and abbreviations as in <a href="#register-groups-bind"><code>REGISTER-GROUPS-BIND</code></a>. The number of
|
||||||
variables in <code><i>var-list</i></code> must not be greater than the number of register
|
variables in <code><i>var-list</i></code> must not be greater than the number of register
|
||||||
groups. For each element of
|
groups. For each element of
|
||||||
<code><i>var-list</i></code> which is <code>NIL</code> there's no binding to the corresponding register
|
<code><i>var-list</i></code> which is <code>NIL</code> there's no binding to the corresponding register
|
||||||
@ -669,6 +844,14 @@ match. If <code><i>sharedp</i></code> is true, the substrings may share structur
|
|||||||
("b" NIL "b" NIL)
|
("b" NIL "b" NIL)
|
||||||
("c" NIL NIL "c")
|
("c" NIL NIL "c")
|
||||||
NIL
|
NIL
|
||||||
|
|
||||||
|
* (let (result)
|
||||||
|
(do-register-groups ((#'parse-integer n) (#'intern sign) whitespace)
|
||||||
|
("(\\d+)|(\\+|-|\\*|/)|(\\s+)" "12*15 - 42/3")
|
||||||
|
(unless whitespace
|
||||||
|
(push (or n sign) result)))
|
||||||
|
(nreverse result))
|
||||||
|
(12 * 15 - 42 / 3)
|
||||||
</pre>
|
</pre>
|
||||||
</blockquote>
|
</blockquote>
|
||||||
|
|
||||||
@ -787,7 +970,7 @@ frob")
|
|||||||
|
|
||||||
|
|
||||||
<p><br>[Function]
|
<p><br>[Function]
|
||||||
<br><a class=none name="regex-replace"><b>regex-replace</b> <i>regex target-string replacement <tt>&key</tt> start end preserve-case</i> => <i>list</i></a>
|
<br><a class=none name="regex-replace"><b>regex-replace</b> <i>regex target-string replacement <tt>&key</tt> start end preserve-case simple-calls</i> => <i>list</i></a>
|
||||||
|
|
||||||
<blockquote><br> Try to match <code><i>target-string</i></code>
|
<blockquote><br> Try to match <code><i>target-string</i></code>
|
||||||
between <code><i>start</i></code> and <code><i>end</i></code> against
|
between <code><i>start</i></code> and <code><i>end</i></code> against
|
||||||
@ -804,7 +987,7 @@ match, <code>"\`"</code> for the part of
|
|||||||
<code>N</code>th register where <code>N</code> is a positive integer.
|
<code>N</code>th register where <code>N</code> is a positive integer.
|
||||||
<p>
|
<p>
|
||||||
<code><i>replacement</i></code> can also be a <a
|
<code><i>replacement</i></code> can also be a <a
|
||||||
href="http://www.lispworks.com/reference/HyperSpec/Body/26_glo_f.htm#function_designator">function
|
href="http://www.lispworks.com/documentation/HyperSpec/Body/26_glo_f.htm#function_designator">function
|
||||||
designator</a> in which case the match will be replaced with the
|
designator</a> in which case the match will be replaced with the
|
||||||
result of calling the function designated by
|
result of calling the function designated by
|
||||||
<code><i>replacement</i></code> with the arguments
|
<code><i>replacement</i></code> with the arguments
|
||||||
@ -816,6 +999,15 @@ result of calling the function designated by
|
|||||||
positions of matched registers (or <code>NIL</code>) - the meaning of
|
positions of matched registers (or <code>NIL</code>) - the meaning of
|
||||||
the other arguments should be obvious.)
|
the other arguments should be obvious.)
|
||||||
<p>
|
<p>
|
||||||
|
If <code><i>simple-calls</i></code> is true, a function designated by
|
||||||
|
<code><i>replacement</i></code> will instead be called with the
|
||||||
|
arguments <code><i>match</i></code>, <code><i>register-1</i></code>,
|
||||||
|
..., <code><i>register-n</i></code> where <code><i>match</i></code> is
|
||||||
|
the whole match as a string and <code><i>register-1</i></code> to
|
||||||
|
<code><i>register-n</i></code> are the matched registers, also as
|
||||||
|
strings (or <code>NIL</code>). Note that these strings share structure with
|
||||||
|
<code><i>target-string</i></code> so you must not modify them.
|
||||||
|
<p>
|
||||||
Finally, <code><i>replacement</i></code> can be a list where each
|
Finally, <code><i>replacement</i></code> can be a list where each
|
||||||
element is a string (which will be inserted verbatim), one of the
|
element is a string (which will be inserted verbatim), one of the
|
||||||
symbols <code>:match</code>, <code>:before-match</code>, or
|
symbols <code>:match</code>, <code>:before-match</code>, or
|
||||||
@ -829,7 +1021,7 @@ If <code><i>preserve-case</i></code> is true (default is
|
|||||||
<code>NIL</code>), the replacement will try to preserve the case (all
|
<code>NIL</code>), the replacement will try to preserve the case (all
|
||||||
upper case, all lower case, or capitalized) of the match. The result
|
upper case, all lower case, or capitalized) of the match. The result
|
||||||
will always be a <a
|
will always be a <a
|
||||||
href="http://www.lispworks.com/reference/HyperSpec/Body/26_glo_f.htm#fresh">fresh</a>
|
href="http://www.lispworks.com/documentation/HyperSpec/Body/26_glo_f.htm#fresh">fresh</a>
|
||||||
string, even if <code><i>regex</i></code> doesn't match.
|
string, even if <code><i>regex</i></code> doesn't match.
|
||||||
<p>
|
<p>
|
||||||
Examples:
|
Examples:
|
||||||
@ -860,7 +1052,7 @@ Examples:
|
|||||||
|
|
||||||
|
|
||||||
<p><br>[Function]
|
<p><br>[Function]
|
||||||
<br><a class=none name="regex-replace-all"><b>regex-replace-all</b> <i>regex target-string replacement <tt>&key</tt> start end preserve-case</i> => <i>list</i></a>
|
<br><a class=none name="regex-replace-all"><b>regex-replace-all</b> <i>regex target-string replacement <tt>&key</tt> start end preserve-case simple-calls</i> => <i>list</i></a>
|
||||||
|
|
||||||
<blockquote><br>
|
<blockquote><br>
|
||||||
Like <a href="#regex-replace"><code>REGEX-REPLACE</code></a> but replaces all matches.
|
Like <a href="#regex-replace"><code>REGEX-REPLACE</code></a> but replaces all matches.
|
||||||
@ -912,6 +1104,34 @@ HOW-MANY
|
|||||||
"foo{...}bar{.....}{..}baz{....}frob"
|
"foo{...}bar{.....}{..}baz{....}frob"
|
||||||
(list "[" 'how-many " dots]"))
|
(list "[" 'how-many " dots]"))
|
||||||
"foo[3 dots]bar[5 dots][2 dots]baz[4 dots]frob"
|
"foo[3 dots]bar[5 dots][2 dots]baz[4 dots]frob"
|
||||||
|
|
||||||
|
* (let ((qp-regex (cl-ppcre:create-scanner "[\\x80-\\xff]")))
|
||||||
|
(defun encode-quoted-printable (string)
|
||||||
|
"Convert 8-bit string to quoted-printable representation.
|
||||||
|
Version using SIMPLE-CALLS keyword argument."
|
||||||
|
<font color=orange>;; ;; won't work for Corman Lisp because non-ASCII characters aren't 8-bit there</font>
|
||||||
|
(flet ((convert (match)
|
||||||
|
(format nil "=~2,'0x" (char-code (char match 0)))))
|
||||||
|
(cl-ppcre:regex-replace-all qp-regex string #'convert
|
||||||
|
:simple-calls t))))
|
||||||
|
|
||||||
|
Converted ENCODE-QUOTED-PRINTABLE.
|
||||||
|
ENCODE-QUOTED-PRINTABLE
|
||||||
|
|
||||||
|
* (encode-quoted-printable "Fête Sørensen naïve Hühner Straße")
|
||||||
|
"F=EAte S=F8rensen na=EFve H=FChner Stra=DFe"
|
||||||
|
|
||||||
|
* (defun how-many (match first-register)
|
||||||
|
(declare (ignore match))
|
||||||
|
(format nil "~A" (length first-register)))
|
||||||
|
HOW-MANY
|
||||||
|
|
||||||
|
* (cl-ppcre:regex-replace-all "{(.+?)}"
|
||||||
|
"foo{...}bar{.....}{..}baz{....}frob"
|
||||||
|
(list "[" 'how-many " dots]")
|
||||||
|
:simple-calls t)
|
||||||
|
|
||||||
|
"foo[3 dots]bar[5 dots][2 dots]baz[4 dots]frob"
|
||||||
</pre></blockquote>
|
</pre></blockquote>
|
||||||
|
|
||||||
<p><br>[Function]
|
<p><br>[Function]
|
||||||
@ -919,7 +1139,7 @@ HOW-MANY
|
|||||||
|
|
||||||
<blockquote><br>
|
<blockquote><br>
|
||||||
Like <a
|
Like <a
|
||||||
href="http://www.lispworks.com/reference/HyperSpec/Body/f_apropo.htm"><code>APROPOS</code></a>
|
href="http://www.lispworks.com/documentation/HyperSpec/Body/f_apropo.htm"><code>APROPOS</code></a>
|
||||||
but searches for interned symbols which match the regular expression
|
but searches for interned symbols which match the regular expression
|
||||||
<code><i>regex</i></code>. The output is implementation-dependent. If
|
<code><i>regex</i></code>. The output is implementation-dependent. If
|
||||||
<code><i>case-insensitive</i></code> is true (which is the default)
|
<code><i>case-insensitive</i></code> is true (which is the default)
|
||||||
@ -983,7 +1203,7 @@ FOOBOO [variable] value: 43
|
|||||||
|
|
||||||
<blockquote><br>
|
<blockquote><br>
|
||||||
Like <a
|
Like <a
|
||||||
href="http://www.lispworks.com/reference/HyperSpec/Body/f_apropo.htm"><code>APROPOS-LIST</code></a>
|
href="http://www.lispworks.com/documentation/HyperSpec/Body/f_apropo.htm"><code>APROPOS-LIST</code></a>
|
||||||
but searches for interned symbols which match the regular expression
|
but searches for interned symbols which match the regular expression
|
||||||
<code><i>regex</i></code>. If <code><i>case-insensitive</i></code> is
|
<code><i>regex</i></code>. If <code><i>case-insensitive</i></code> is
|
||||||
true (which is the default) and <code><i>regex</i></code> isn't
|
true (which is the default) and <code><i>regex</i></code> isn't
|
||||||
@ -1001,18 +1221,18 @@ Example (continued from above):
|
|||||||
|
|
||||||
<blockquote><br>This variable controls whether scanners take into
|
<blockquote><br>This variable controls whether scanners take into
|
||||||
account all characters of your CL implementation or only those the <a
|
account all characters of your CL implementation or only those the <a
|
||||||
href="http://www.lispworks.com/reference/HyperSpec/Body/f_char_c.htm#char-code"><code>CHAR-CODE</code></a>
|
href="http://www.lispworks.com/documentation/HyperSpec/Body/f_char_c.htm#char-code"><code>CHAR-CODE</code></a>
|
||||||
of which is not larger than its value. It is only relevant if the
|
of which is not larger than its value. It is only relevant if the
|
||||||
regular expression contains certain character classes. The default is
|
regular expression contains certain character classes. The default is
|
||||||
<a
|
<a
|
||||||
href="http://www.lispworks.com/reference/HyperSpec/Body/v_char_c.htm"><code>CHAR-CODE-LIMIT</code></a>,
|
href="http://www.lispworks.com/documentation/HyperSpec/Body/v_char_c.htm"><code>CHAR-CODE-LIMIT</code></a>,
|
||||||
and you might see significant speed and space improvements during
|
and you might see significant speed and space improvements during
|
||||||
scanner <em>creation</em> if, say, your target strings only contain <a
|
scanner <em>creation</em> if, say, your target strings only contain <a
|
||||||
href="http://wwwwbs.cs.tu-berlin.de/user/czyborra/charsets/">ISO-8859-1</a>
|
href="http://wwwwbs.cs.tu-berlin.de/user/czyborra/charsets/">ISO-8859-1</a>
|
||||||
characters and you're using an implementation like AllegroCL,
|
characters and you're using an implementation like AllegroCL,
|
||||||
LispWorks, or CLISP where <code>CHAR-CODE-LIMIT</code> has a value
|
CLISP, LispWorks, or SBCL where <code>CHAR-CODE-LIMIT</code> has a value
|
||||||
much higher than 255. The <a href="#test">test suite</a> will
|
much higher than 256. The <a href="#test">test suite</a> will
|
||||||
automatically set <code>*REGEX-CHAR-CODE-LIMIT*</code> to 255 while
|
automatically set <code>*REGEX-CHAR-CODE-LIMIT*</code> to 256 while
|
||||||
you're running the default test.
|
you're running the default test.
|
||||||
<p>
|
<p>
|
||||||
Here's an example with LispWorks:
|
Here's an example with LispWorks:
|
||||||
@ -1028,8 +1248,8 @@ Allocation = 546600 bytes standard / 2162611 bytes fixlen
|
|||||||
0 Page faults
|
0 Page faults
|
||||||
#<closure 20654AF2>
|
#<closure 20654AF2>
|
||||||
|
|
||||||
CL-USER 24 > (time (let ((cl-ppcre:*regex-char-code-limit* 255)) (cl-ppcre:create-scanner "[3\\D]")))
|
CL-USER 24 > (time (let ((cl-ppcre:*regex-char-code-limit* 256)) (cl-ppcre:create-scanner "[3\\D]")))
|
||||||
Timing the evaluation of (LET ((CL-PPCRE:*REGEX-CHAR-CODE-LIMIT* 255)) (CL-PPCRE:CREATE-SCANNER "[3\\D]"))
|
Timing the evaluation of (LET ((CL-PPCRE:*REGEX-CHAR-CODE-LIMIT* 256)) (CL-PPCRE:CREATE-SCANNER "[3\\D]"))
|
||||||
|
|
||||||
user time = 0.000
|
user time = 0.000
|
||||||
system time = 0.000
|
system time = 0.000
|
||||||
@ -1042,7 +1262,7 @@ Allocation = 3336 bytes standard / 8338 bytes fixlen
|
|||||||
Note: Due to the nature of <code>LOAD-TIME-VALUE</code> and the <a
|
Note: Due to the nature of <code>LOAD-TIME-VALUE</code> and the <a
|
||||||
href="#compiler-macro">compiler macro for <code>SCAN</code></a> some
|
href="#compiler-macro">compiler macro for <code>SCAN</code></a> some
|
||||||
scanners might be created in a <a
|
scanners might be created in a <a
|
||||||
href="http://www.lispworks.com/reference/HyperSpec/Body/26_glo_n.htm#null_lexical_environment">null
|
href="http://www.lispworks.com/documentation/HyperSpec/Body/26_glo_n.htm#null_lexical_environment">null
|
||||||
lexical environment</a> at load time or at compile time so be careful
|
lexical environment</a> at load time or at compile time so be careful
|
||||||
to which value <code>*REGEX-CHAR-CODE-LIMIT*</code> is bound at that
|
to which value <code>*REGEX-CHAR-CODE-LIMIT*</code> is bound at that
|
||||||
time. The default value should always yield correct results unless you
|
time. The default value should always yield correct results unless you
|
||||||
@ -1052,14 +1272,14 @@ play dirty tricks with implementation-dependent behaviour, though.</blockquote>
|
|||||||
<br><a class=none name="use-bmh-matchers"><b>*use-bmh-matchers*</b></a>
|
<br><a class=none name="use-bmh-matchers"><b>*use-bmh-matchers*</b></a>
|
||||||
|
|
||||||
<blockquote><br>Usually, the scanners created by <a
|
<blockquote><br>Usually, the scanners created by <a
|
||||||
href="#create-scanner1"><code>CREATE-SCANNER</code></a> (or
|
href="#create-scanner"><code>CREATE-SCANNER</code></a> (or
|
||||||
implicitely by other functions and macros) will use fast <a
|
implicitely by other functions and macros) will use fast <a
|
||||||
href="http://www-igm.univ-mlv.fr/~lecroq/string/node18.html">Boyer-Moore-Horspool
|
href="http://www-igm.univ-mlv.fr/~lecroq/string/node18.html">Boyer-Moore-Horspool
|
||||||
matchers</a> to check for constant strings at the start or end of the
|
matchers</a> to check for constant strings at the start or end of the
|
||||||
regular expression. If <code>*USE-BMH-MATCHERS*</code> is
|
regular expression. If <code>*USE-BMH-MATCHERS*</code> is
|
||||||
<code>NIL</code> (the default is <code>T</code>), the standard
|
<code>NIL</code> (the default is <code>T</code>), the standard
|
||||||
function <a
|
function <a
|
||||||
href="http://www.lispworks.com/reference/HyperSpec/Body/f_search.htm"><code>SEARCH</code></a>
|
href="http://www.lispworks.com/documentation/HyperSpec/Body/f_search.htm"><code>SEARCH</code></a>
|
||||||
will be used instead. This will usually be a bit slower but can save
|
will be used instead. This will usually be a bit slower but can save
|
||||||
lots of space if you're storing many scanners. The <a
|
lots of space if you're storing many scanners. The <a
|
||||||
href="#test">test suite</a> will automatically set
|
href="#test">test suite</a> will automatically set
|
||||||
@ -1069,7 +1289,7 @@ the default test.
|
|||||||
Note: Due to the nature of <code>LOAD-TIME-VALUE</code> and the <a
|
Note: Due to the nature of <code>LOAD-TIME-VALUE</code> and the <a
|
||||||
href="#compiler-macro">compiler macro for <code>SCAN</code></a> some
|
href="#compiler-macro">compiler macro for <code>SCAN</code></a> some
|
||||||
scanners might be created in a <a
|
scanners might be created in a <a
|
||||||
href="http://www.lispworks.com/reference/HyperSpec/Body/26_glo_n.htm#null_lexical_environment">null
|
href="http://www.lispworks.com/documentation/HyperSpec/Body/26_glo_n.htm#null_lexical_environment">null
|
||||||
lexical environment</a> at load time or at compile time so be careful
|
lexical environment</a> at load time or at compile time so be careful
|
||||||
to which value <code>*USE-BMH-MATCHERS*</code> is bound at that
|
to which value <code>*USE-BMH-MATCHERS*</code> is bound at that
|
||||||
time.</blockquote>
|
time.</blockquote>
|
||||||
@ -1134,7 +1354,7 @@ href="#*allow-quoting*"><code>*ALLOW-QUOTING*</code></a> is
|
|||||||
non-word characters (everything except ASCII characters, digits and
|
non-word characters (everything except ASCII characters, digits and
|
||||||
underline) of <code>STRING</code> are quoted by prepending a
|
underline) of <code>STRING</code> are quoted by prepending a
|
||||||
backslash similar to Perl's <code>quotemeta</code> function. It always returns a <a
|
backslash similar to Perl's <code>quotemeta</code> function. It always returns a <a
|
||||||
href="http://www.lispworks.com/reference/HyperSpec/Body/26_glo_f.htm#fresh">fresh</a>
|
href="http://www.lispworks.com/documentation/HyperSpec/Body/26_glo_f.htm#fresh">fresh</a>
|
||||||
string.
|
string.
|
||||||
<pre>
|
<pre>
|
||||||
* (cl-ppcre:quote-meta-chars "[a-z]*")
|
* (cl-ppcre:quote-meta-chars "[a-z]*")
|
||||||
@ -1147,7 +1367,7 @@ string.
|
|||||||
<blockquote><br>
|
<blockquote><br>
|
||||||
Every error signaled by CL-PPCRE is of type
|
Every error signaled by CL-PPCRE is of type
|
||||||
<code>PPCRE-ERROR</code>. This is a direct subtype of <a
|
<code>PPCRE-ERROR</code>. This is a direct subtype of <a
|
||||||
href="http://www.lispworks.com/reference/HyperSpec/Body/e_smp_er.htm"><code>SIMPLE-ERROR</code></a>
|
href="http://www.lispworks.com/documentation/HyperSpec/Body/e_smp_er.htm"><code>SIMPLE-ERROR</code></a>
|
||||||
without any additional slots or options.
|
without any additional slots or options.
|
||||||
</blockquote>
|
</blockquote>
|
||||||
|
|
||||||
@ -1210,7 +1430,7 @@ encountered (or <code>NIL</code> if the error happened while trying to
|
|||||||
convert a parse tree). This might be particularly useful when <a
|
convert a parse tree). This might be particularly useful when <a
|
||||||
href="#*allow-quoting*"><code>*ALLOW-QUOTING*</code></a> is
|
href="#*allow-quoting*"><code>*ALLOW-QUOTING*</code></a> is
|
||||||
<em>true</em> because in this case the offending string might not be the one you gave to the <a
|
<em>true</em> because in this case the offending string might not be the one you gave to the <a
|
||||||
href="#create-scanner1"><code>CREATE-SCANNER</code></a> function.
|
href="#create-scanner"><code>CREATE-SCANNER</code></a> function.
|
||||||
</blockquote>
|
</blockquote>
|
||||||
|
|
||||||
<p><br>[Function]
|
<p><br>[Function]
|
||||||
@ -1225,69 +1445,185 @@ convert a parse tree).
|
|||||||
</blockquote>
|
</blockquote>
|
||||||
|
|
||||||
|
|
||||||
<br> <br><h3><a name="install" class=none>Download and installation</a></h3>
|
<br> <br><h3><a name="filters" class=none>Filters</a></h3>
|
||||||
|
|
||||||
CL-PPCRE together with this documentation can be downloaded from <a
|
Because several users have asked for it, CL-PPCRE now offers
|
||||||
href="http://weitz.de/files/cl-ppcre.tgz">http://weitz.de/files/cl-ppcre.tgz</a>. The
|
"filters" (see <a href="#filterdef">above</a> for syntax)
|
||||||
current version is 0.7.4 - older versions are
|
which are basically arbitrary, user-defined functions that can act as
|
||||||
available for download through URLs like
|
regex building blocks. Filters can only be used within <a
|
||||||
<code>http://weitz.de/files/cl-ppcre-<version>.tgz</code>. A <a
|
href="#create-scanner2">parse trees</a>, not within Perl regex
|
||||||
href="CHANGELOG">CHANGELOG</a> is available.
|
strings.
|
||||||
<p>
|
<p>
|
||||||
If you're on <a href="http://www.debian.org/">Debian</a> you should
|
Note that filters are currently considered an experimental feature and
|
||||||
probably use the <a
|
their API might change in the future.
|
||||||
href="http://packages.debian.org/cgi-bin/search_packages.pl?keywords=cl-ppcre&searchon=names&version=all&release=all">cl-ppcre
|
|
||||||
Debian package</a> which is available thanks to <a href="http://b9.com/">Kevin
|
|
||||||
Rosenberg</a>. There's also a port
|
|
||||||
for <a href="http://www.cliki.net/gentoo">Gentoo Linux</a> thanks to Matthew Kennedy and a <a href="http://www.freebsd.org/cgi/url.cgi?ports/textproc/cl-ppcre/pkg-descr">FreeBSD port</a> thanks to Henrik Motakef.
|
|
||||||
Installation via <a
|
|
||||||
href="http://www.cliki.net/asdf-install">asdf-install</a> should as well
|
|
||||||
be possible.
|
|
||||||
<p>
|
<p>
|
||||||
CL-PPCRE comes with simple system definitions for <a
|
A filter is defined by its <em>filter function</em> which must be a
|
||||||
href="http://www.cliki.net/mk-defsystem">MK:DEFSYSTEM</a> and <a
|
function of one argument. During the parsing process this function
|
||||||
href="http://www.cliki.net/asdf">asdf</a> so you can either adapt it
|
might be called once or several times or it might not be called at
|
||||||
to your needs or just unpack the archive and from within the CL-PPCRE
|
all. If it's called its argument is an integer <code><i>pos</i></code>
|
||||||
directory start your Lisp image and evaluate the form
|
which is the current position within the target string. The filter can
|
||||||
<code>(mk:compile-system "cl-ppcre")</code> (or the
|
either return <code>NIL</code> (which means that the subexpression
|
||||||
equivalent one for asdf) which should compile and load the whole
|
represented by this filter didn't match) or an integer not smaller
|
||||||
system.
|
than <code><i>pos</i></code> for success. A zero-length assertion
|
||||||
|
should return <code><i>pos</i></code> itself while a filter which
|
||||||
|
wants to consume <code>N</code> characters should return
|
||||||
|
<code>(+ POS N)</code>.
|
||||||
<p>
|
<p>
|
||||||
If for some reason you don't want to use MK:DEFSYSTEM or asdf you
|
If you supply the optional value <code><i>length</i></code> and it is
|
||||||
can just <code>LOAD</code> the file <code>load.lisp</code> or you
|
not <code>NIL</code> then this is a promise to the regex engine that
|
||||||
can also get away with something like this:
|
your filter will <em>always</em> consume <em>exactly</em>
|
||||||
|
<code><i>length</i></code> characters. The regex engine might use this
|
||||||
|
information for optimization purposes but it is otherwise irrelevant
|
||||||
|
to the outcome of the matching process.
|
||||||
|
<p>
|
||||||
|
The filter function can access the following special variables from
|
||||||
|
its code body:
|
||||||
|
<ul>
|
||||||
|
|
||||||
|
<li><code>CL-PPCRE::*STRING*</code>: The target (a string) of the
|
||||||
|
current matching process.
|
||||||
|
|
||||||
|
<li><code>CL-PPCRE::*START-POS*</code> and
|
||||||
|
<code>CL-PPCRE::*END-POS*</code>: The start and end (integers) indices
|
||||||
|
of the current matching process. These correspond to the
|
||||||
|
<code>START</code> and <code>END</code> keyword parameters of <a
|
||||||
|
href="#scan"><code>SCAN</code></a>.
|
||||||
|
|
||||||
|
<li><code>CL-PPCRE::*REAL-START-POS*</code>: The initial starting
|
||||||
|
position. This is only relevant for repeated scans (as in <a
|
||||||
|
href="#do-scans"><code>DO-SCANS</code></a>) where
|
||||||
|
<code>CL-PPCRE::*START-POS*</code> will be moved forward while
|
||||||
|
<code>CL-PPCRE::*REAL-START-POS*</code> won't. For normal scans the
|
||||||
|
value of this variable is <code>NIL</code>.
|
||||||
|
|
||||||
|
<li><CODE>CL-PPCRE::*REG-STARTS*</CODE> and
|
||||||
|
<CODE>CL-PPCRE::*REG-ENDS*</CODE>: Two simple vectors which denote the
|
||||||
|
start and end indices of registers within the regular expression. The
|
||||||
|
first register is indexed by 0. If a register hasn't matched yet
|
||||||
|
then its corresponding entry in <CODE>CL-PPCRE::*REG-STARTS*</CODE> is
|
||||||
|
<code>NIL</code>.
|
||||||
|
|
||||||
|
</ul>
|
||||||
|
|
||||||
|
These variables should be considered read-only. Do <em>not</em> change
|
||||||
|
these values unless you really know what you're doing!
|
||||||
|
<p>
|
||||||
|
Note that the names of the variables are not exported from the
|
||||||
|
<code>CL-PPCRE</code> package because there's currently no guarantee
|
||||||
|
that they will be available in future releases.
|
||||||
|
<p>
|
||||||
|
Here are some filter examples:
|
||||||
<pre>
|
<pre>
|
||||||
(loop for name in '("packages" "specials" "util" "errors" "lexer"
|
* (defun my-info-filter (pos)
|
||||||
"parser" "regex-class" "convert" "optimize"
|
"Show some info about the matching process."
|
||||||
"closures" "repetition-closures" "scanner" "api")
|
(format t "Called at position ~A~%" pos)
|
||||||
do (compile-file (make-pathname :name name
|
(loop with dim = (array-dimension cl-ppcre::*reg-starts* 0)
|
||||||
:type "lisp"))
|
for i below dim
|
||||||
(load name))
|
for reg-start = (aref cl-ppcre::*reg-starts* i)
|
||||||
|
for reg-end = (aref cl-ppcre::*reg-ends* i)
|
||||||
|
do (format t "Register ~A is currently " (1+ i))
|
||||||
|
when reg-start
|
||||||
|
(write-string cl-ppcre::*string* nil
|
||||||
|
do (write-char #\')
|
||||||
|
(write-string cl-ppcre::*string* nil
|
||||||
|
:start reg-start :end reg-end)
|
||||||
|
(write-char #\')
|
||||||
|
else
|
||||||
|
do (write-string "unbound")
|
||||||
|
do (terpri))
|
||||||
|
(terpri)
|
||||||
|
pos)
|
||||||
|
MY-INFO-FILTER
|
||||||
|
|
||||||
|
* (scan '(:sequence
|
||||||
|
(:register
|
||||||
|
(:greedy-repetition 0 nil
|
||||||
|
(:char-class (:range #\a #\z))))
|
||||||
|
(:filter my-info-filter 0) "X")
|
||||||
|
"bYcdeX")
|
||||||
|
Called at position 1
|
||||||
|
Register 1 is currently 'b'
|
||||||
|
|
||||||
|
Called at position 0
|
||||||
|
Register 1 is currently ''
|
||||||
|
|
||||||
|
Called at position 1
|
||||||
|
Register 1 is currently ''
|
||||||
|
|
||||||
|
Called at position 5
|
||||||
|
Register 1 is currently 'cde'
|
||||||
|
|
||||||
|
2
|
||||||
|
6
|
||||||
|
#(2)
|
||||||
|
#(5)
|
||||||
|
|
||||||
|
* (scan '(:sequence
|
||||||
|
(:register
|
||||||
|
(:greedy-repetition 0 nil
|
||||||
|
(:char-class (:range #\a #\z))))
|
||||||
|
(:filter my-info-filter 0) "X")
|
||||||
|
"bYcdeZ")
|
||||||
|
NIL
|
||||||
|
|
||||||
|
* (defun my-weird-filter (pos)
|
||||||
|
"Only match at this point if either pos is odd and the character
|
||||||
|
we're looking at is lowerrcase or if pos is even and the next two
|
||||||
|
characters we're looking at are uppercase. Consume these characters if
|
||||||
|
there's a match."
|
||||||
|
(format t "Trying at position ~A~%" pos)
|
||||||
|
(cond ((and (oddp pos)
|
||||||
|
(< pos cl-ppcre::*end-pos*)
|
||||||
|
(lower-case-p (char cl-ppcre::*string* pos)))
|
||||||
|
(1+ pos))
|
||||||
|
((and (evenp pos)
|
||||||
|
(< (1+ pos) cl-ppcre::*end-pos*)
|
||||||
|
(upper-case-p (char cl-ppcre::*string* pos))
|
||||||
|
(upper-case-p (char cl-ppcre::*string* (1+ pos))))
|
||||||
|
(+ pos 2))
|
||||||
|
(t nil)))
|
||||||
|
MY-WEIRD-FILTER
|
||||||
|
|
||||||
|
* (defparameter *weird-regex*
|
||||||
|
`(:sequence "+" (:filter ,#'my-weird-filter) "+"))
|
||||||
|
*WEIRD-REGEX*
|
||||||
|
|
||||||
|
* (scan *weird-regex* "+A++a+AA+")
|
||||||
|
Trying at position 1
|
||||||
|
Trying at position 3
|
||||||
|
Trying at position 4
|
||||||
|
Trying at position 6
|
||||||
|
5
|
||||||
|
9
|
||||||
|
#()
|
||||||
|
#()
|
||||||
|
|
||||||
|
* (fmakunbound 'my-weird-filter)
|
||||||
|
MY-WEIRD-FILTER
|
||||||
|
|
||||||
|
* (scan *weird-regex* "+A++a+AA+")
|
||||||
|
Trying at position 1
|
||||||
|
Trying at position 3
|
||||||
|
Trying at position 4
|
||||||
|
Trying at position 6
|
||||||
|
5
|
||||||
|
9
|
||||||
|
#()
|
||||||
|
#()
|
||||||
</pre>
|
</pre>
|
||||||
|
|
||||||
Note that on CL implementations which use the Python compiler
|
Note that in the second call to <code>SCAN</code> our filter wasn't
|
||||||
(i.e. CMUCL, SBCL, SCL) you can concatenate the compiled object files
|
invoked at all - it was optimized away by the regex engine because it
|
||||||
to create one single object file which you can load afterwards:
|
knew that it couldn't match. Also note that <code>*WEIRD-REGEX*</code>
|
||||||
|
still worked after we removed the global function definition of
|
||||||
|
<code>MY-WEIRD-FILTER</code> because the regular expression had
|
||||||
|
captured the original definition.
|
||||||
|
|
||||||
<pre>
|
<p>
|
||||||
cat {packages,specials,util,errors,lexer,parser,regex-class,convert,optimize,closures,repetition-closures,scanner,api}.x86f > cl-ppcre.x86f
|
|
||||||
</pre>
|
|
||||||
|
|
||||||
(Replace ".<code>x86f</code>" with the correct suffix for
|
For more ideas about what you can do with filters see <a
|
||||||
your platform.)
|
href="http://common-lisp.net/pipermail/cl-ppcre-devel/2004-October/000069.html">this
|
||||||
|
thread</a> on the <a href="#mail">mailing list</a>.
|
||||||
|
|
||||||
<br> <br><h3><a name="mail" class=none>Support and mailing lists</a></h3>
|
|
||||||
|
|
||||||
For questions, bug reports, feature requests, improvements, or patches
|
|
||||||
please use the <a
|
|
||||||
href="http://common-lisp.net/mailman/listinfo/cl-ppcre-devel">cl-ppcre-devel
|
|
||||||
mailing list</a>. If you want to be notified about future releases
|
|
||||||
subscribe to the <a
|
|
||||||
href="http://common-lisp.net/mailman/listinfo/cl-ppcre-announce">cl-ppcre-announce
|
|
||||||
mailing list</a>. These mailing lists were made available thanks to
|
|
||||||
the services of <a href="http://common-lisp.net/">common-lisp.net</a>.
|
|
||||||
|
|
||||||
<br> <br><h3><a name="test" class=none>Testing CL-PPCRE</a></h3>
|
<br> <br><h3><a name="test" class=none>Testing CL-PPCRE</a></h3>
|
||||||
|
|
||||||
@ -1317,7 +1653,7 @@ NIL
|
|||||||
* (cl-ppcre-test:test)
|
* (cl-ppcre-test:test)
|
||||||
|
|
||||||
<font color=orange>;; ....
|
<font color=orange>;; ....
|
||||||
;; (a list of <a href="#perl">incompatibilities with Perl</a>)</font color=orange>
|
;; (a list of <a class=noborder href="#perl">incompatibilities with Perl</a>)</font color=orange>
|
||||||
</pre>
|
</pre>
|
||||||
|
|
||||||
(If you're not using MK:DEFSYSTEM or asdf it suffices to build
|
(If you're not using MK:DEFSYSTEM or asdf it suffices to build
|
||||||
@ -1398,7 +1734,7 @@ translates <code>"\r"</code> to <code>(CODE-CHAR
|
|||||||
<h4><a name="alpha" class=none>What about <code>"\w"</code>?</a></h4>
|
<h4><a name="alpha" class=none>What about <code>"\w"</code>?</a></h4>
|
||||||
|
|
||||||
CL-PPCRE uses <a
|
CL-PPCRE uses <a
|
||||||
href="http://www.lispworks.com/reference/HyperSpec/Body/f_alphan.htm"><code>ALPHANUMERICP</code></a>
|
href="http://www.lispworks.com/documentation/HyperSpec/Body/f_alphan.htm"><code>ALPHANUMERICP</code></a>
|
||||||
to decide whether a character matches Perl's
|
to decide whether a character matches Perl's
|
||||||
<code>"\w"</code>, so depending on your CL implementation
|
<code>"\w"</code>, so depending on your CL implementation
|
||||||
you might encounter differences between Perl and CL-PPCRE when
|
you might encounter differences between Perl and CL-PPCRE when
|
||||||
@ -1410,7 +1746,7 @@ matching non-ASCII characters.
|
|||||||
|
|
||||||
The <a href="">CL-PPCRE test suite</a> can also be used for
|
The <a href="">CL-PPCRE test suite</a> can also be used for
|
||||||
benchmarking purposes: If you call <code>perltest.pl</code> with a
|
benchmarking purposes: If you call <code>perltest.pl</code> with a
|
||||||
command line argument it will be interpreted as the number of seconds
|
command line argument it will be interpreted as the minimum number of seconds
|
||||||
each test should run. Perl will time its tests accordingly and create
|
each test should run. Perl will time its tests accordingly and create
|
||||||
output which, when fed to <code>CL-PPCRE-TEST:TEST</code>, will result
|
output which, when fed to <code>CL-PPCRE-TEST:TEST</code>, will result
|
||||||
in a benchmark. Here's an example:
|
in a benchmark. Here's an example:
|
||||||
@ -1554,13 +1890,13 @@ for you automatically.
|
|||||||
<p>
|
<p>
|
||||||
However, beginning with version 0.5.2, CL-PPCRE uses a <a
|
However, beginning with version 0.5.2, CL-PPCRE uses a <a
|
||||||
name="compiler-macro"
|
name="compiler-macro"
|
||||||
href="http://www.lispworks.com/reference/HyperSpec/Body/26_glo_c.htm#compiler_macro">compiler
|
href="http://www.lispworks.com/documentation/HyperSpec/Body/26_glo_c.htm#compiler_macro">compiler
|
||||||
macro</a> and <a
|
macro</a> and <a
|
||||||
href="http://www.lispworks.com/reference/HyperSpec/Body/s_ld_tim.htm"><code>LOAD-TIME-VALUE</code></a>
|
href="http://www.lispworks.com/documentation/HyperSpec/Body/s_ld_tim.htm"><code>LOAD-TIME-VALUE</code></a>
|
||||||
to make sure that the scanner is only built once if the first argument
|
to make sure that the scanner is only built once if the first argument
|
||||||
to <a href="#scan"><code>SCAN</code></a>, <a href="#scan-to-strings"><code>SCAN-TO-STRINGS</code></a>, <a href="#split"><code>SPLIT</code></a>, or
|
to <a href="#scan"><code>SCAN</code></a>, <a href="#scan-to-strings"><code>SCAN-TO-STRINGS</code></a>, <a href="#split"><code>SPLIT</code></a>,
|
||||||
<a href="#regex-replace"><code>REGEX-REPLACE</code></a> is a <a
|
<a href="#regex-replace"><code>REGEX-REPLACE</code></a>, or <a href="#regex-replace-all"><code>REGEX-REPLACE-ALL</code></a> is a <a
|
||||||
href="http://www.lispworks.com/reference/HyperSpec/Body/26_glo_c.htm#constant_form">constant
|
href="http://www.lispworks.com/documentation/HyperSpec/Body/26_glo_c.htm#constant_form">constant
|
||||||
form</a>. (But see the notes for <a
|
form</a>. (But see the notes for <a
|
||||||
href="#regex-char-code-limit"><code>*REGEX-CHAR-CODE-LIMIT*</code></a> and
|
href="#regex-char-code-limit"><code>*REGEX-CHAR-CODE-LIMIT*</code></a> and
|
||||||
<a href="#use-bmh-matchers"><code>*USE-BMH-MATCHERS*</code></a>.)
|
<a href="#use-bmh-matchers"><code>*USE-BMH-MATCHERS*</code></a>.)
|
||||||
@ -1674,7 +2010,7 @@ target strings.
|
|||||||
<p>
|
<p>
|
||||||
Another thing to consider is that, for performance reasons, CL-PPCRE
|
Another thing to consider is that, for performance reasons, CL-PPCRE
|
||||||
assumes that most of the target strings you're trying to match are <a
|
assumes that most of the target strings you're trying to match are <a
|
||||||
href="http://www.lispworks.com/reference/HyperSpec/Body/t_smp_st.htm">simple
|
href="http://www.lispworks.com/documentation/HyperSpec/Body/t_smp_st.htm">simple
|
||||||
strings</a> and coerces non-simple strings to simple strings before
|
strings</a> and coerces non-simple strings to simple strings before
|
||||||
scanning them. If you plan on working with non-simple strings mostly
|
scanning them. If you plan on working with non-simple strings mostly
|
||||||
you might consider modifying the CL-PPCRE source code. This is easy:
|
you might consider modifying the CL-PPCRE source code. This is easy:
|
||||||
@ -1746,6 +2082,8 @@ TARGET
|
|||||||
With CMUCL the situation is better and worse at the same time. It will
|
With CMUCL the situation is better and worse at the same time. It will
|
||||||
take a lot longer until CMUCL gives up but if it gives up the whole
|
take a lot longer until CMUCL gives up but if it gives up the whole
|
||||||
Lisp image will silently die (at least on my machine):
|
Lisp image will silently die (at least on my machine):
|
||||||
|
<p>
|
||||||
|
[Note: This was true for CMUCL 18e - CMUCL 19a behaves in a much nicer way and gives you a chance to recover.]
|
||||||
|
|
||||||
<pre>
|
<pre>
|
||||||
* (defun target (n) (concatenate 'string (make-string n :initial-element #\a) "b"))
|
* (defun target (n) (concatenate 'string (make-string n :initial-element #\a) "b"))
|
||||||
@ -1900,6 +2238,50 @@ IBM Thinkpad T23 laptop (Pentium III 1.2 GHz,
|
|||||||
768 MB RAM) running <a href="http://www.gentoo.org/">Gentoo
|
768 MB RAM) running <a href="http://www.gentoo.org/">Gentoo
|
||||||
Linux</a> 1.1a.
|
Linux</a> 1.1a.
|
||||||
|
|
||||||
|
<br> <br><h3><a class=none name="allegro">AllegroCL compatibility mode</a></h3>
|
||||||
|
|
||||||
|
Since autumn 2004 <a
|
||||||
|
href="http://www.franz.com/products/allegrocl/">AllegroCL</a> offers
|
||||||
|
<a
|
||||||
|
href="http://www.franz.com/support/documentation/7.0/doc/regexp.htm">a
|
||||||
|
new regular expression API</a> with a syntax very similar to
|
||||||
|
CL-PPCRE. Although CL-PPCRE is quite fast already, AllegroCL's engine will
|
||||||
|
most likely be even faster (but only on AllegroCL, of course). However, you might want to
|
||||||
|
stick to CL-PPCRE because you have a "legacy" application or because
|
||||||
|
you want your code to be portable to other Lisp implementations.
|
||||||
|
Therefore, beginning from version 1.2.0, CL-PPCRE offers a
|
||||||
|
"compatibility mode" where you can continue using the CL-PPCRE API as
|
||||||
|
described <a href="#dict">above</a> but deploy the AllegroCL regex
|
||||||
|
engine under the hood. (The details are: Calls to <a
|
||||||
|
href="#create-scanner"><code>CREATE-SCANNER</code></a> and <a
|
||||||
|
href="#scan"><code>SCAN</code></a> are dispatched to their AllegroCL
|
||||||
|
counterparts <a
|
||||||
|
href="http://www.franz.com/support/documentation/7.0/doc/operators/excl/compile-re.htm"><code>EXCL:COMPILE-RE</code></a>
|
||||||
|
and <a
|
||||||
|
href="http://www.franz.com/support/documentation/7.0/doc/operators/excl/match-re.htm"><code>EXCL:MATCH-RE</code></a>
|
||||||
|
while everything else is left as is.)
|
||||||
|
<p>
|
||||||
|
The advantage of this mode is that you'll get a much smaller image and
|
||||||
|
most likely faster code. (But note that CL-PPCRE needs to do a small amount of work to massage AllegroCL's output into the format expected by CL-PPCRE.) The downside is that your code won't be
|
||||||
|
fully compatible with CL-PPCRE anymore. Here are some of the
|
||||||
|
differences (most of which probably don't matter very often):
|
||||||
|
<ul>
|
||||||
|
<li>The AllegroCL engine doesn't offer <a
|
||||||
|
href="#parse-tree-synonym">parse tree synonyms</a> and <a href="#filters">filters</a>.
|
||||||
|
<li>The AllegroCL engine <a href="http://www.franz.com/support/documentation/7.0/doc/regexp.htm#regexp-new-compatibility-2">will choke on some regular expressions involving curly braces</a> that are accepted by Perl and CL-PPCRE's native engine.
|
||||||
|
<li>The AllegroCL engine's case-folding mode switch (which is used instead of CL-PPCRE's <a href="#create-scanner"><code>:CASE-INSENSITIVE</code> keyword parameter</a>) <a href="http://www.franz.com/support/documentation/7.0/doc/regexp.htm#regexp-new-matching-2">is currently only effective for ASCII characters</a>.
|
||||||
|
<li>CL-PPCRE's engine doesn't understand the <a href="http://www.franz.com/support/documentation/7.0/doc/regexp.htm#regexp-new-capturing-2">named register groups</a> provided by AllegroCL.
|
||||||
|
<li>The AllegroCL engine <a href="http://www.franz.com/support/documentation/7.0/doc/regexp.htm#regexp-new-compatibility-2">doesn't support</a> <a href="#*allow-quoting*">quoting of metacharacters</a>.
|
||||||
|
<li>In AllegroCL compatibility mode compiled regular expressions (as returned by <a href="#create-scanner"><code>CREATE-SCANNER</code></a>) aren't functions but structures.
|
||||||
|
</ul>
|
||||||
|
For more details about the AllegroCL engine and possible deviations from CL-PPCRE see the <a href="http://www.franz.com/support/documentation/7.0/doc/regexp.htm">documentation</a> at the <a href="http://www.franz.com/">Franz Inc. website</a>.
|
||||||
|
<p>
|
||||||
|
To use the AllegroCL compatibility mode you have to
|
||||||
|
<pre>
|
||||||
|
(push :use-acl-regexp2-engine *features*)
|
||||||
|
</pre>
|
||||||
|
<em>before</em> you compile CL-PPCRE.
|
||||||
|
|
||||||
<br> <br><h3><a class=none name="ack">Acknowledgements</a></h3>
|
<br> <br><h3><a class=none name="ack">Acknowledgements</a></h3>
|
||||||
|
|
||||||
Although I didn't use their code I was heavily inspired by looking at
|
Although I didn't use their code I was heavily inspired by looking at
|
||||||
@ -1927,7 +2309,7 @@ where I wrote most of the code and thanks to my wife for lending me
|
|||||||
her PowerBook to test CL-PPCRE with MCL and OpenMCL.
|
her PowerBook to test CL-PPCRE with MCL and OpenMCL.
|
||||||
|
|
||||||
<p>
|
<p>
|
||||||
$Header: /home/manuel/bknr-cvs/cvs/thirdparty/cl-ppcre/doc/index.html,v 1.1 2004/06/23 08:27:10 hans Exp $
|
$Header: /usr/local/cvsrep/cl-ppcre/doc/index.html,v 1.131 2005/11/01 09:51:02 edi Exp $
|
||||||
<p><a href="http://weitz.de/index.html">BACK TO MY HOMEPAGE</a>
|
<p><a href="http://weitz.de/index.html">BACK TO MY HOMEPAGE</a>
|
||||||
|
|
||||||
</body>
|
</body>
|
||||||
|
|||||||
20
errors.lisp
20
errors.lisp
@ -1,7 +1,7 @@
|
|||||||
;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: CL-PPCRE-LISP; Base: 10 -*-
|
;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: CL-PPCRE; Base: 10 -*-
|
||||||
;;; $Header: /home/manuel/bknr-cvs/cvs/thirdparty/cl-ppcre/errors.lisp,v 1.1 2004/06/23 08:27:10 hans Exp $
|
;;; $Header: /usr/local/cvsrep/cl-ppcre/errors.lisp,v 1.14 2005/04/01 21:29:09 edi Exp $
|
||||||
|
|
||||||
;;; Copyright (c) 2002-2003, Dr. Edmund Weitz. All rights reserved.
|
;;; Copyright (c) 2002-2005, Dr. Edmund Weitz. All rights reserved.
|
||||||
|
|
||||||
;;; Redistribution and use in source and binary forms, with or without
|
;;; Redistribution and use in source and binary forms, with or without
|
||||||
;;; modification, are permitted provided that the following conditions
|
;;; modification, are permitted provided that the following conditions
|
||||||
@ -50,7 +50,19 @@ this type."))
|
|||||||
(simple-condition-format-control condition)
|
(simple-condition-format-control condition)
|
||||||
(simple-condition-format-arguments condition)
|
(simple-condition-format-arguments condition)
|
||||||
(ppcre-syntax-error-pos condition)
|
(ppcre-syntax-error-pos condition)
|
||||||
(ppcre-syntax-error-string condition)))))
|
(ppcre-syntax-error-string condition))))
|
||||||
|
(:documentation "Signaled if CL-PPCRE's parser encounters an error
|
||||||
|
when trying to parse a regex string or to convert a parse tree into
|
||||||
|
its internal representation."))
|
||||||
|
|
||||||
|
(setf (documentation 'ppcre-syntax-error-string 'function)
|
||||||
|
"Returns the string the parser was parsing when the error was
|
||||||
|
encountered \(or NIL if the error happened while trying to convert a
|
||||||
|
parse tree).")
|
||||||
|
|
||||||
|
(setf (documentation 'ppcre-syntax-error-pos 'function)
|
||||||
|
"Returns the position within the string where the error occured
|
||||||
|
\(or NIL if the error happened while trying to convert a parse tree")
|
||||||
|
|
||||||
(define-condition ppcre-invocation-error (ppcre-error)
|
(define-condition ppcre-invocation-error (ppcre-error)
|
||||||
()
|
()
|
||||||
|
|||||||
129
lexer.lisp
129
lexer.lisp
@ -1,5 +1,5 @@
|
|||||||
;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: CL-PPCRE; Base: 10 -*-
|
;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: CL-PPCRE; Base: 10 -*-
|
||||||
;;; $Header: /home/manuel/bknr-cvs/cvs/thirdparty/cl-ppcre/lexer.lisp,v 1.1 2004/06/23 08:27:10 hans Exp $
|
;;; $Header: /usr/local/cvsrep/cl-ppcre/lexer.lisp,v 1.24 2005/04/01 21:29:09 edi Exp $
|
||||||
|
|
||||||
;;; The lexer's responsibility is to convert the regex string into a
|
;;; The lexer's responsibility is to convert the regex string into a
|
||||||
;;; sequence of tokens which are in turn consumed by the parser.
|
;;; sequence of tokens which are in turn consumed by the parser.
|
||||||
@ -9,7 +9,7 @@
|
|||||||
;;; has opened so far. (The latter is necessary for interpreting
|
;;; has opened so far. (The latter is necessary for interpreting
|
||||||
;;; strings like "\\10" correctly.)
|
;;; strings like "\\10" correctly.)
|
||||||
|
|
||||||
;;; Copyright (c) 2002-2003, Dr. Edmund Weitz. All rights reserved.
|
;;; Copyright (c) 2002-2005, Dr. Edmund Weitz. All rights reserved.
|
||||||
|
|
||||||
;;; Redistribution and use in source and binary forms, with or without
|
;;; Redistribution and use in source and binary forms, with or without
|
||||||
;;; modification, are permitted provided that the following conditions
|
;;; modification, are permitted provided that the following conditions
|
||||||
@ -39,12 +39,7 @@
|
|||||||
|
|
||||||
(declaim (inline map-char-to-special-class))
|
(declaim (inline map-char-to-special-class))
|
||||||
(defun map-char-to-special-char-class (chr)
|
(defun map-char-to-special-char-class (chr)
|
||||||
(declare (optimize speed
|
(declare #.*standard-optimize-settings*)
|
||||||
(safety 0)
|
|
||||||
(space 0)
|
|
||||||
(debug 0)
|
|
||||||
(compilation-speed 0)
|
|
||||||
#+:lispworks (hcl:fixnum-safety 0)))
|
|
||||||
"Maps escaped characters like \"\\d\" to the tokens which represent
|
"Maps escaped characters like \"\\d\" to the tokens which represent
|
||||||
their associated character classes."
|
their associated character classes."
|
||||||
(case chr
|
(case chr
|
||||||
@ -62,12 +57,7 @@ their associated character classes."
|
|||||||
:non-whitespace-char-class)))
|
:non-whitespace-char-class)))
|
||||||
|
|
||||||
(locally
|
(locally
|
||||||
(declare (optimize speed
|
(declare #.*standard-optimize-settings*)
|
||||||
(safety 0)
|
|
||||||
(space 0)
|
|
||||||
(debug 0)
|
|
||||||
(compilation-speed 0)
|
|
||||||
#+:lispworks (hcl:fixnum-safety 0)))
|
|
||||||
(defstruct (lexer (:constructor make-lexer-internal))
|
(defstruct (lexer (:constructor make-lexer-internal))
|
||||||
"LEXER structures are used to hold the regex string which is
|
"LEXER structures are used to hold the regex string which is
|
||||||
currently lexed and to keep track of the lexer's state."
|
currently lexed and to keep track of the lexer's state."
|
||||||
@ -86,30 +76,20 @@ currently lexed and to keep track of the lexer's state."
|
|||||||
|
|
||||||
(defun make-lexer (string)
|
(defun make-lexer (string)
|
||||||
(declare (inline make-lexer-internal)
|
(declare (inline make-lexer-internal)
|
||||||
(type string string))
|
#-genera (type string string))
|
||||||
(make-lexer-internal :str (maybe-coerce-to-simple-string string)
|
(make-lexer-internal :str (maybe-coerce-to-simple-string string)
|
||||||
:len (length string)))
|
:len (length string)))
|
||||||
|
|
||||||
(declaim (inline end-of-string-p))
|
(declaim (inline end-of-string-p))
|
||||||
(defun end-of-string-p (lexer)
|
(defun end-of-string-p (lexer)
|
||||||
(declare (optimize speed
|
(declare #.*standard-optimize-settings*)
|
||||||
(safety 0)
|
|
||||||
(space 0)
|
|
||||||
(debug 0)
|
|
||||||
(compilation-speed 0)
|
|
||||||
#+:lispworks (hcl:fixnum-safety 0)))
|
|
||||||
"Tests whether we're at the end of the regex string."
|
"Tests whether we're at the end of the regex string."
|
||||||
(<= (lexer-len lexer)
|
(<= (lexer-len lexer)
|
||||||
(lexer-pos lexer)))
|
(lexer-pos lexer)))
|
||||||
|
|
||||||
(declaim (inline looking-at-p))
|
(declaim (inline looking-at-p))
|
||||||
(defun looking-at-p (lexer chr)
|
(defun looking-at-p (lexer chr)
|
||||||
(declare (optimize speed
|
(declare #.*standard-optimize-settings*)
|
||||||
(safety 0)
|
|
||||||
(space 0)
|
|
||||||
(debug 0)
|
|
||||||
(compilation-speed 0)
|
|
||||||
#+:lispworks (hcl:fixnum-safety 0)))
|
|
||||||
"Tests whether the next character the lexer would see is CHR.
|
"Tests whether the next character the lexer would see is CHR.
|
||||||
Does not respect extended mode."
|
Does not respect extended mode."
|
||||||
(and (not (end-of-string-p lexer))
|
(and (not (end-of-string-p lexer))
|
||||||
@ -118,12 +98,7 @@ Does not respect extended mode."
|
|||||||
|
|
||||||
(declaim (inline next-char-non-extended))
|
(declaim (inline next-char-non-extended))
|
||||||
(defun next-char-non-extended (lexer)
|
(defun next-char-non-extended (lexer)
|
||||||
(declare (optimize speed
|
(declare #.*standard-optimize-settings*)
|
||||||
(safety 0)
|
|
||||||
(space 0)
|
|
||||||
(debug 0)
|
|
||||||
(compilation-speed 0)
|
|
||||||
#+:lispworks (hcl:fixnum-safety 0)))
|
|
||||||
"Returns the next character which is to be examined and updates the
|
"Returns the next character which is to be examined and updates the
|
||||||
POS slot. Does not respect extended mode."
|
POS slot. Does not respect extended mode."
|
||||||
(cond ((end-of-string-p lexer)
|
(cond ((end-of-string-p lexer)
|
||||||
@ -134,12 +109,7 @@ POS slot. Does not respect extended mode."
|
|||||||
(incf (lexer-pos lexer))))))
|
(incf (lexer-pos lexer))))))
|
||||||
|
|
||||||
(defun next-char (lexer)
|
(defun next-char (lexer)
|
||||||
(declare (optimize speed
|
(declare #.*standard-optimize-settings*)
|
||||||
(safety 0)
|
|
||||||
(space 0)
|
|
||||||
(debug 0)
|
|
||||||
(compilation-speed 0)
|
|
||||||
#+:lispworks (hcl:fixnum-safety 0)))
|
|
||||||
"Returns the next character which is to be examined and updates the
|
"Returns the next character which is to be examined and updates the
|
||||||
POS slot. Respects extended mode, i.e. whitespace, comments, and also
|
POS slot. Respects extended mode, i.e. whitespace, comments, and also
|
||||||
nested comments are skipped if applicable."
|
nested comments are skipped if applicable."
|
||||||
@ -203,12 +173,7 @@ nested comments are skipped if applicable."
|
|||||||
|
|
||||||
(declaim (inline fail))
|
(declaim (inline fail))
|
||||||
(defun fail (lexer)
|
(defun fail (lexer)
|
||||||
(declare (optimize speed
|
(declare #.*standard-optimize-settings*)
|
||||||
(safety 0)
|
|
||||||
(space 0)
|
|
||||||
(debug 0)
|
|
||||||
(compilation-speed 0)
|
|
||||||
#+:lispworks (hcl:fixnum-safety 0)))
|
|
||||||
"Moves (LEXER-POS LEXER) back to the last position stored in
|
"Moves (LEXER-POS LEXER) back to the last position stored in
|
||||||
\(LEXER-LAST-POS LEXER) and pops the LAST-POS stack."
|
\(LEXER-LAST-POS LEXER) and pops the LAST-POS stack."
|
||||||
(unless (lexer-last-pos lexer)
|
(unless (lexer-last-pos lexer)
|
||||||
@ -217,12 +182,7 @@ nested comments are skipped if applicable."
|
|||||||
nil)
|
nil)
|
||||||
|
|
||||||
(defun get-number (lexer &key (radix 10) max-length no-whitespace-p)
|
(defun get-number (lexer &key (radix 10) max-length no-whitespace-p)
|
||||||
(declare (optimize speed
|
(declare #.*standard-optimize-settings*)
|
||||||
(safety 0)
|
|
||||||
(space 0)
|
|
||||||
(debug 0)
|
|
||||||
(compilation-speed 0)
|
|
||||||
#+:lispworks (hcl:fixnum-safety 0)))
|
|
||||||
"Read and consume the number the lexer is currently looking at and
|
"Read and consume the number the lexer is currently looking at and
|
||||||
return it. Returns NIL if no number could be identified.
|
return it. Returns NIL if no number could be identified.
|
||||||
RADIX is used as in PARSE-INTEGER. If MAX-LENGTH is not NIL we'll read
|
RADIX is used as in PARSE-INTEGER. If MAX-LENGTH is not NIL we'll read
|
||||||
@ -252,12 +212,7 @@ we don't tolerate whitespace in front of the number."
|
|||||||
|
|
||||||
(declaim (inline try-number))
|
(declaim (inline try-number))
|
||||||
(defun try-number (lexer &key (radix 10) max-length no-whitespace-p)
|
(defun try-number (lexer &key (radix 10) max-length no-whitespace-p)
|
||||||
(declare (optimize speed
|
(declare #.*standard-optimize-settings*)
|
||||||
(safety 0)
|
|
||||||
(space 0)
|
|
||||||
(debug 0)
|
|
||||||
(compilation-speed 0)
|
|
||||||
#+:lispworks (hcl:fixnum-safety 0)))
|
|
||||||
"Like GET-NUMBER but won't consume anything if no number is seen."
|
"Like GET-NUMBER but won't consume anything if no number is seen."
|
||||||
;; remember current position
|
;; remember current position
|
||||||
(push (lexer-pos lexer) (lexer-last-pos lexer))
|
(push (lexer-pos lexer) (lexer-last-pos lexer))
|
||||||
@ -269,16 +224,11 @@ we don't tolerate whitespace in front of the number."
|
|||||||
|
|
||||||
(declaim (inline make-char-from-code))
|
(declaim (inline make-char-from-code))
|
||||||
(defun make-char-from-code (number error-pos)
|
(defun make-char-from-code (number error-pos)
|
||||||
(declare (optimize speed
|
(declare #.*standard-optimize-settings*)
|
||||||
(safety 0)
|
|
||||||
(space 0)
|
|
||||||
(debug 0)
|
|
||||||
(compilation-speed 0)
|
|
||||||
#+:lispworks (hcl:fixnum-safety 0)))
|
|
||||||
"Create character from char-code NUMBER. NUMBER can be NIL
|
"Create character from char-code NUMBER. NUMBER can be NIL
|
||||||
which is interpreted as 0. ERROR-POS is the position where
|
which is interpreted as 0. ERROR-POS is the position where
|
||||||
the corresponding number started within the regex string."
|
the corresponding number started within the regex string."
|
||||||
;; Only look at rightmost eight bits in compliance with Perl
|
;; only look at rightmost eight bits in compliance with Perl
|
||||||
(let ((code (logand #o377 (the fixnum (or number 0)))))
|
(let ((code (logand #o377 (the fixnum (or number 0)))))
|
||||||
(or (and (< code char-code-limit)
|
(or (and (< code char-code-limit)
|
||||||
(code-char code))
|
(code-char code))
|
||||||
@ -288,12 +238,7 @@ the corresponding number started within the regex string."
|
|||||||
number))))
|
number))))
|
||||||
|
|
||||||
(defun unescape-char (lexer)
|
(defun unescape-char (lexer)
|
||||||
(declare (optimize speed
|
(declare #.*standard-optimize-settings*)
|
||||||
(safety 0)
|
|
||||||
(space 0)
|
|
||||||
(debug 0)
|
|
||||||
(compilation-speed 0)
|
|
||||||
#+:lispworks (hcl:fixnum-safety 0)))
|
|
||||||
"Convert the characters(s) following a backslash into a token
|
"Convert the characters(s) following a backslash into a token
|
||||||
which is returned. This function is to be called when the backslash
|
which is returned. This function is to be called when the backslash
|
||||||
has already been consumed. Special character classes like \\W are
|
has already been consumed. Special character classes like \\W are
|
||||||
@ -351,12 +296,7 @@ handled elsewhere."
|
|||||||
chr))))
|
chr))))
|
||||||
|
|
||||||
(defun collect-char-class (lexer)
|
(defun collect-char-class (lexer)
|
||||||
(declare (optimize speed
|
(declare #.*standard-optimize-settings*)
|
||||||
(safety 0)
|
|
||||||
(space 0)
|
|
||||||
(debug 0)
|
|
||||||
(compilation-speed 0)
|
|
||||||
#+:lispworks (hcl:fixnum-safety 0)))
|
|
||||||
"Reads and consumes characters from regex string until a right
|
"Reads and consumes characters from regex string until a right
|
||||||
bracket is seen. Assembles them into a list \(which is returned) of
|
bracket is seen. Assembles them into a list \(which is returned) of
|
||||||
characters, character ranges, like \(:RANGE #\\A #\\E) for a-e, and
|
characters, character ranges, like \(:RANGE #\\A #\\E) for a-e, and
|
||||||
@ -437,12 +377,7 @@ we're inside a range or not."
|
|||||||
"Missing right bracket to close character class"))))
|
"Missing right bracket to close character class"))))
|
||||||
|
|
||||||
(defun maybe-parse-flags (lexer)
|
(defun maybe-parse-flags (lexer)
|
||||||
(declare (optimize speed
|
(declare #.*standard-optimize-settings*)
|
||||||
(safety 0)
|
|
||||||
(space 0)
|
|
||||||
(debug 0)
|
|
||||||
(compilation-speed 0)
|
|
||||||
#+:lispworks (hcl:fixnum-safety 0)))
|
|
||||||
"Reads a sequence of modifiers \(including #\\- to reverse their
|
"Reads a sequence of modifiers \(including #\\- to reverse their
|
||||||
meaning) and returns a corresponding list of \"flag\" tokens. The
|
meaning) and returns a corresponding list of \"flag\" tokens. The
|
||||||
\"x\" modifier is treated specially in that it dynamically modifies
|
\"x\" modifier is treated specially in that it dynamically modifies
|
||||||
@ -478,12 +413,7 @@ the behaviour of the lexer itself via the special variable
|
|||||||
(decf (lexer-pos lexer))))
|
(decf (lexer-pos lexer))))
|
||||||
|
|
||||||
(defun get-quantifier (lexer)
|
(defun get-quantifier (lexer)
|
||||||
(declare (optimize speed
|
(declare #.*standard-optimize-settings*)
|
||||||
(safety 0)
|
|
||||||
(space 0)
|
|
||||||
(debug 0)
|
|
||||||
(compilation-speed 0)
|
|
||||||
#+:lispworks (hcl:fixnum-safety 0)))
|
|
||||||
"Returns a list of two values (min max) if what the lexer is looking
|
"Returns a list of two values (min max) if what the lexer is looking
|
||||||
at can be interpreted as a quantifier. Otherwise returns NIL and
|
at can be interpreted as a quantifier. Otherwise returns NIL and
|
||||||
resets the lexer to its old position."
|
resets the lexer to its old position."
|
||||||
@ -533,12 +463,7 @@ resets the lexer to its old position."
|
|||||||
(fail lexer)))))
|
(fail lexer)))))
|
||||||
|
|
||||||
(defun get-token (lexer)
|
(defun get-token (lexer)
|
||||||
(declare (optimize speed
|
(declare #.*standard-optimize-settings*)
|
||||||
(safety 0)
|
|
||||||
(space 0)
|
|
||||||
(debug 0)
|
|
||||||
(compilation-speed 0)
|
|
||||||
#+:lispworks (hcl:fixnum-safety 0)))
|
|
||||||
"Returns and consumes the next token from the regex string (or NIL)."
|
"Returns and consumes the next token from the regex string (or NIL)."
|
||||||
;; remember starting position for UNGET-TOKEN function
|
;; remember starting position for UNGET-TOKEN function
|
||||||
(push (lexer-pos lexer)
|
(push (lexer-pos lexer)
|
||||||
@ -737,12 +662,7 @@ resets the lexer to its old position."
|
|||||||
|
|
||||||
(declaim (inline unget-token))
|
(declaim (inline unget-token))
|
||||||
(defun unget-token (lexer)
|
(defun unget-token (lexer)
|
||||||
(declare (optimize speed
|
(declare #.*standard-optimize-settings*)
|
||||||
(safety 0)
|
|
||||||
(space 0)
|
|
||||||
(debug 0)
|
|
||||||
(compilation-speed 0)
|
|
||||||
#+:lispworks (hcl:fixnum-safety 0)))
|
|
||||||
"Moves the lexer back to the last position stored in the LAST-POS stack."
|
"Moves the lexer back to the last position stored in the LAST-POS stack."
|
||||||
(if (lexer-last-pos lexer)
|
(if (lexer-last-pos lexer)
|
||||||
(setf (lexer-pos lexer)
|
(setf (lexer-pos lexer)
|
||||||
@ -751,12 +671,7 @@ resets the lexer to its old position."
|
|||||||
|
|
||||||
(declaim (inline start-of-subexpr-p))
|
(declaim (inline start-of-subexpr-p))
|
||||||
(defun start-of-subexpr-p (lexer)
|
(defun start-of-subexpr-p (lexer)
|
||||||
(declare (optimize speed
|
(declare #.*standard-optimize-settings*)
|
||||||
(safety 0)
|
|
||||||
(space 0)
|
|
||||||
(debug 0)
|
|
||||||
(compilation-speed 0)
|
|
||||||
#+:lispworks (hcl:fixnum-safety 0)))
|
|
||||||
"Tests whether the next token can start a valid sub-expression, i.e.
|
"Tests whether the next token can start a valid sub-expression, i.e.
|
||||||
a stand-alone regex."
|
a stand-alone regex."
|
||||||
(let* ((pos (lexer-pos lexer))
|
(let* ((pos (lexer-pos lexer))
|
||||||
@ -766,4 +681,4 @@ a stand-alone regex."
|
|||||||
(member (the character next-char)
|
(member (the character next-char)
|
||||||
'(#\) #\|)
|
'(#\) #\|)
|
||||||
:test #'char=)
|
:test #'char=)
|
||||||
(setf (lexer-pos lexer) pos))))))
|
(setf (lexer-pos lexer) pos))))))
|
||||||
|
|||||||
57
lispworks-defsystem.lisp
Normal file
57
lispworks-defsystem.lisp
Normal file
@ -0,0 +1,57 @@
|
|||||||
|
;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: CL-USER; Base: 10 -*-
|
||||||
|
;;; $Header: /usr/local/cvsrep/cl-ppcre/lispworks-defsystem.lisp,v 1.1 2005/04/30 20:00:50 edi Exp $
|
||||||
|
|
||||||
|
;;; This system definition for LispWorks was kindly provided by Wade Humeniuk
|
||||||
|
|
||||||
|
;;; Copyright (c) 2002-2005, Dr. Edmund Weitz. All rights reserved.
|
||||||
|
|
||||||
|
;;; Redistribution and use in source and binary forms, with or without
|
||||||
|
;;; modification, are permitted provided that the following conditions
|
||||||
|
;;; are met:
|
||||||
|
|
||||||
|
;;; * Redistributions of source code must retain the above copyright
|
||||||
|
;;; notice, this list of conditions and the following disclaimer.
|
||||||
|
|
||||||
|
;;; * Redistributions in binary form must reproduce the above
|
||||||
|
;;; copyright notice, this list of conditions and the following
|
||||||
|
;;; disclaimer in the documentation and/or other materials
|
||||||
|
;;; provided with the distribution.
|
||||||
|
|
||||||
|
;;; THIS SOFTWARE IS PROVIDED BY THE AUTHOR 'AS IS' AND ANY EXPRESSED
|
||||||
|
;;; OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||||
|
;;; WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
;;; ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
|
||||||
|
;;; DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||||
|
;;; DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
|
||||||
|
;;; GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
;;; INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
|
||||||
|
;;; WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||||
|
;;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
|
;;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
|
(in-package #:cl-user)
|
||||||
|
|
||||||
|
(defparameter *cl-ppcre-base-directory*
|
||||||
|
(make-pathname :name nil :type nil :version nil
|
||||||
|
:defaults (parse-namestring *load-truename*)))
|
||||||
|
|
||||||
|
(defsystem cl-ppcre
|
||||||
|
(:default-pathname *cl-ppcre-base-directory*
|
||||||
|
:default-type :lisp-file)
|
||||||
|
:members ("packages"
|
||||||
|
"specials"
|
||||||
|
"util"
|
||||||
|
"errors"
|
||||||
|
"lexer"
|
||||||
|
"parser"
|
||||||
|
"regex-class"
|
||||||
|
"convert"
|
||||||
|
"optimize"
|
||||||
|
"closures"
|
||||||
|
"repetition-closures"
|
||||||
|
"scanner"
|
||||||
|
"api")
|
||||||
|
:rules ((:in-order-to :compile :all
|
||||||
|
(:requires (:load :previous)))
|
||||||
|
(:in-order-to :load :all
|
||||||
|
(:requires (:load :previous)))))
|
||||||
59
load.lisp
59
load.lisp
@ -1,7 +1,7 @@
|
|||||||
;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: CL-USER; Base: 10 -*-
|
;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: CL-USER; Base: 10 -*-
|
||||||
;;; $Header: /home/manuel/bknr-cvs/cvs/thirdparty/cl-ppcre/load.lisp,v 1.1 2004/06/23 08:27:10 hans Exp $
|
;;; $Header: /usr/local/cvsrep/cl-ppcre/load.lisp,v 1.13 2005/04/01 21:29:09 edi Exp $
|
||||||
|
|
||||||
;;; Copyright (c) 2002-2003, Dr. Edmund Weitz. All rights reserved.
|
;;; Copyright (c) 2002-2005, Dr. Edmund Weitz. All rights reserved.
|
||||||
|
|
||||||
;;; Redistribution and use in source and binary forms, with or without
|
;;; Redistribution and use in source and binary forms, with or without
|
||||||
;;; modification, are permitted provided that the following conditions
|
;;; modification, are permitted provided that the following conditions
|
||||||
@ -27,36 +27,41 @@
|
|||||||
;;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
;;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
;;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
;;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
(in-package #:cl-user)
|
(in-package :cl-user)
|
||||||
|
|
||||||
(defparameter *cl-ppcre-base-directory*
|
(let ((cl-ppcre-base-directory
|
||||||
(make-pathname :name nil :type nil :version nil
|
(make-pathname :name nil :type nil :version nil
|
||||||
:defaults (parse-namestring *load-truename*)))
|
:defaults (parse-namestring *load-truename*)))
|
||||||
|
must-compile)
|
||||||
(loop for file in '("packages"
|
(with-compilation-unit ()
|
||||||
|
(dolist (file '("packages"
|
||||||
"specials"
|
"specials"
|
||||||
"util"
|
"util"
|
||||||
"errors"
|
"errors"
|
||||||
"lexer"
|
#-:use-acl-regexp2-engine "lexer"
|
||||||
"parser"
|
#-:use-acl-regexp2-engine "parser"
|
||||||
"regex-class"
|
#-:use-acl-regexp2-engine "regex-class"
|
||||||
"convert"
|
#-:use-acl-regexp2-engine "convert"
|
||||||
"optimize"
|
#-:use-acl-regexp2-engine "optimize"
|
||||||
"closures"
|
#-:use-acl-regexp2-engine "closures"
|
||||||
"repetition-closures"
|
#-:use-acl-regexp2-engine "repetition-closures"
|
||||||
"scanner"
|
#-:use-acl-regexp2-engine "scanner"
|
||||||
"api"
|
"api"
|
||||||
"ppcre-tests")
|
"ppcre-tests"))
|
||||||
do (let ((pathname (make-pathname :name file :type "lisp" :version nil
|
(let ((pathname (make-pathname :name file :type "lisp" :version nil
|
||||||
:defaults *cl-ppcre-base-directory*)))
|
:defaults cl-ppcre-base-directory)))
|
||||||
#-:cormanlisp
|
;; don't use COMPILE-FILE in Corman Lisp, it's broken - LOAD
|
||||||
(let ((compiled-pathname (compile-file-pathname pathname)))
|
;; will yield compiled functions anyway
|
||||||
(unless (probe-file compiled-pathname)
|
#-:cormanlisp
|
||||||
(compile-file pathname))
|
(let ((compiled-pathname (compile-file-pathname pathname)))
|
||||||
(setq pathname compiled-pathname))
|
(unless (and (not must-compile)
|
||||||
(load pathname)))
|
(probe-file compiled-pathname)
|
||||||
|
(< (file-write-date pathname)
|
||||||
|
(file-write-date compiled-pathname)))
|
||||||
|
(setq must-compile t)
|
||||||
|
(compile-file pathname))
|
||||||
|
(setq pathname compiled-pathname))
|
||||||
|
(load pathname)))))
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@ -1,10 +1,10 @@
|
|||||||
;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: CL-PPCRE; Base: 10 -*-
|
;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: CL-PPCRE; Base: 10 -*-
|
||||||
;;; $Header: /home/manuel/bknr-cvs/cvs/thirdparty/cl-ppcre/optimize.lisp,v 1.1 2004/06/23 08:27:10 hans Exp $
|
;;; $Header: /usr/local/cvsrep/cl-ppcre/optimize.lisp,v 1.26 2005/04/13 15:35:57 edi Exp $
|
||||||
|
|
||||||
;;; This file contains optimizations which can be applied to converted
|
;;; This file contains optimizations which can be applied to converted
|
||||||
;;; parse trees.
|
;;; parse trees.
|
||||||
|
|
||||||
;;; Copyright (c) 2002-2003, Dr. Edmund Weitz. All rights reserved.
|
;;; Copyright (c) 2002-2005, Dr. Edmund Weitz. All rights reserved.
|
||||||
|
|
||||||
;;; Redistribution and use in source and binary forms, with or without
|
;;; Redistribution and use in source and binary forms, with or without
|
||||||
;;; modification, are permitted provided that the following conditions
|
;;; modification, are permitted provided that the following conditions
|
||||||
@ -32,37 +32,8 @@
|
|||||||
|
|
||||||
(in-package #:cl-ppcre)
|
(in-package #:cl-ppcre)
|
||||||
|
|
||||||
(defun string-list-to-simple-string (string-list)
|
|
||||||
(declare (optimize speed
|
|
||||||
(safety 0)
|
|
||||||
(space 0)
|
|
||||||
(debug 0)
|
|
||||||
(compilation-speed 0)
|
|
||||||
#+:lispworks (hcl:fixnum-safety 0)))
|
|
||||||
"Concatenates a list of strings to one simple-string."
|
|
||||||
;; this function provided by JP Massar; note that we can't use APPLY
|
|
||||||
;; with CONCATENATE here because of CALL-ARGUMENTS-LIMIT
|
|
||||||
(let ((total-size 0))
|
|
||||||
(declare (type fixnum total-size))
|
|
||||||
(dolist (string string-list)
|
|
||||||
(declare (type string string))
|
|
||||||
(incf total-size (length string)))
|
|
||||||
(let ((result-string (make-sequence 'simple-string total-size))
|
|
||||||
(curr-pos 0))
|
|
||||||
(declare (type fixnum curr-pos))
|
|
||||||
(dolist (string string-list)
|
|
||||||
(declare (type string string))
|
|
||||||
(replace result-string string :start1 curr-pos)
|
|
||||||
(incf curr-pos (length string)))
|
|
||||||
result-string)))
|
|
||||||
|
|
||||||
(defgeneric flatten (regex)
|
(defgeneric flatten (regex)
|
||||||
(declare (optimize speed
|
(declare #.*standard-optimize-settings*)
|
||||||
(safety 0)
|
|
||||||
(space 0)
|
|
||||||
(debug 0)
|
|
||||||
(compilation-speed 0)
|
|
||||||
#+:lispworks (hcl:fixnum-safety 0)))
|
|
||||||
(:documentation "Merges adjacent sequences and alternations, i.e. it
|
(:documentation "Merges adjacent sequences and alternations, i.e. it
|
||||||
transforms #<SEQ #<STR \"a\"> #<SEQ #<STR \"b\"> #<STR \"c\">>> to
|
transforms #<SEQ #<STR \"a\"> #<SEQ #<STR \"b\"> #<STR \"c\">>> to
|
||||||
#<SEQ #<STR \"a\"> #<STR \"b\"> #<STR \"c\">>. This is a destructive
|
#<SEQ #<STR \"a\"> #<STR \"b\"> #<STR \"c\">>. This is a destructive
|
||||||
@ -148,17 +119,12 @@ operation on REGEX."))
|
|||||||
regex)
|
regex)
|
||||||
(t
|
(t
|
||||||
;; otherwise (ANCHOR, BACK-REFERENCE, CHAR-CLASS, EVERYTHING,
|
;; otherwise (ANCHOR, BACK-REFERENCE, CHAR-CLASS, EVERYTHING,
|
||||||
;; LOOKAHEAD, LOOKBEHIND, STR, VOID, and WORD-BOUNDARY) do
|
;; LOOKAHEAD, LOOKBEHIND, STR, VOID, FILTER, and WORD-BOUNDARY)
|
||||||
;; nothing
|
;; do nothing
|
||||||
regex)))
|
regex)))
|
||||||
|
|
||||||
(defgeneric gather-strings (regex)
|
(defgeneric gather-strings (regex)
|
||||||
(declare (optimize speed
|
(declare #.*standard-optimize-settings*)
|
||||||
(safety 0)
|
|
||||||
(space 0)
|
|
||||||
(debug 0)
|
|
||||||
(compilation-speed 0)
|
|
||||||
#+:lispworks (hcl:fixnum-safety 0)))
|
|
||||||
(:documentation "Collects adjacent strings or characters into one
|
(:documentation "Collects adjacent strings or characters into one
|
||||||
string provided they have the same case mode. This is a destructive
|
string provided they have the same case mode. This is a destructive
|
||||||
operation on REGEX."))
|
operation on REGEX."))
|
||||||
@ -310,19 +276,14 @@ operation on REGEX."))
|
|||||||
regex)
|
regex)
|
||||||
(t
|
(t
|
||||||
;; otherwise (ANCHOR, BACK-REFERENCE, CHAR-CLASS, EVERYTHING,
|
;; otherwise (ANCHOR, BACK-REFERENCE, CHAR-CLASS, EVERYTHING,
|
||||||
;; LOOKAHEAD, LOOKBEHIND, STR, VOID, and WORD-BOUNDARY) do
|
;; LOOKAHEAD, LOOKBEHIND, STR, VOID, FILTER, and WORD-BOUNDARY)
|
||||||
;; nothing
|
;; do nothing
|
||||||
regex)))
|
regex)))
|
||||||
|
|
||||||
;; Note that START-ANCHORED-P will be called after FLATTEN and GATHER-STRINGS.
|
;; Note that START-ANCHORED-P will be called after FLATTEN and GATHER-STRINGS.
|
||||||
|
|
||||||
(defgeneric start-anchored-p (regex &optional in-seq-p)
|
(defgeneric start-anchored-p (regex &optional in-seq-p)
|
||||||
(declare (optimize speed
|
(declare #.*standard-optimize-settings*)
|
||||||
(safety 0)
|
|
||||||
(space 0)
|
|
||||||
(debug 0)
|
|
||||||
(compilation-speed 0)
|
|
||||||
#+:lispworks (hcl:fixnum-safety 0)))
|
|
||||||
(:documentation "Returns T if REGEX starts with a \"real\" start
|
(:documentation "Returns T if REGEX starts with a \"real\" start
|
||||||
anchor, i.e. one that's not in multi-line mode, NIL otherwise. If
|
anchor, i.e. one that's not in multi-line mode, NIL otherwise. If
|
||||||
IN-SEQ-P is true the function will return :ZERO-LENGTH if REGEX is a
|
IN-SEQ-P is true the function will return :ZERO-LENGTH if REGEX is a
|
||||||
@ -378,6 +339,12 @@ zero-length assertion."))
|
|||||||
(if in-seq-p
|
(if in-seq-p
|
||||||
:zero-length
|
:zero-length
|
||||||
nil))
|
nil))
|
||||||
|
(filter
|
||||||
|
(if (and in-seq-p
|
||||||
|
(len regex)
|
||||||
|
(zerop (len regex)))
|
||||||
|
:zero-length
|
||||||
|
nil))
|
||||||
(t
|
(t
|
||||||
;; BACK-REFERENCE, CHAR-CLASS, EVERYTHING, and STR
|
;; BACK-REFERENCE, CHAR-CLASS, EVERYTHING, and STR
|
||||||
nil)))
|
nil)))
|
||||||
@ -385,12 +352,7 @@ zero-length assertion."))
|
|||||||
;; Note that END-STRING-AUX will be called after FLATTEN and GATHER-STRINGS.
|
;; Note that END-STRING-AUX will be called after FLATTEN and GATHER-STRINGS.
|
||||||
|
|
||||||
(defgeneric end-string-aux (regex &optional old-case-insensitive-p)
|
(defgeneric end-string-aux (regex &optional old-case-insensitive-p)
|
||||||
(declare (optimize speed
|
(declare #.*standard-optimize-settings*)
|
||||||
(safety 0)
|
|
||||||
(space 0)
|
|
||||||
(debug 0)
|
|
||||||
(compilation-speed 0)
|
|
||||||
#+:lispworks (hcl:fixnum-safety 0)))
|
|
||||||
(:documentation "Returns the constant string (if it exists) REGEX
|
(:documentation "Returns the constant string (if it exists) REGEX
|
||||||
ends with wrapped into a STR object, otherwise NIL.
|
ends with wrapped into a STR object, otherwise NIL.
|
||||||
OLD-CASE-INSENSITIVE-P is the CASE-INSENSITIVE-P slot of the last STR
|
OLD-CASE-INSENSITIVE-P is the CASE-INSENSITIVE-P slot of the last STR
|
||||||
@ -509,19 +471,17 @@ function called by END-STRIN.)"))
|
|||||||
:case-insensitive-p :void))
|
:case-insensitive-p :void))
|
||||||
(t
|
(t
|
||||||
;; (ALTERNATION, BACK-REFERENCE, BRANCH, CHAR-CLASS, EVERYTHING,
|
;; (ALTERNATION, BACK-REFERENCE, BRANCH, CHAR-CLASS, EVERYTHING,
|
||||||
;; REPETITION)
|
;; REPETITION, FILTER)
|
||||||
nil)))
|
nil)))
|
||||||
|
|
||||||
|
(defgeneric end-string (regex)
|
||||||
|
(declare #.*standard-optimize-settings*)
|
||||||
|
(:documentation "Returns the constant string (if it exists) REGEX ends with wrapped
|
||||||
|
into a STR object, otherwise NIL."))
|
||||||
|
|
||||||
(defmethod end-string ((regex regex))
|
(defmethod end-string ((regex regex))
|
||||||
(declare (special end-string-offset))
|
(declare (special end-string-offset))
|
||||||
(declare (optimize speed
|
(declare #.*standard-optimize-settings*)
|
||||||
(safety 0)
|
|
||||||
(space 0)
|
|
||||||
(debug 0)
|
|
||||||
(compilation-speed 0)
|
|
||||||
#+:lispworks (hcl:fixnum-safety 0)))
|
|
||||||
"Returns the constant string (if it exists) REGEX ends with wrapped
|
|
||||||
into a STR object, otherwise NIL."
|
|
||||||
;; LAST-STR points to the last STR object (seen from the end) that's
|
;; LAST-STR points to the last STR object (seen from the end) that's
|
||||||
;; part of END-STRING; CONTINUEP is set to T if we stop collecting
|
;; part of END-STRING; CONTINUEP is set to T if we stop collecting
|
||||||
;; in the middle of a SEQ
|
;; in the middle of a SEQ
|
||||||
@ -539,12 +499,7 @@ into a STR object, otherwise NIL."
|
|||||||
end-string-offset (offset last-str))))))
|
end-string-offset (offset last-str))))))
|
||||||
|
|
||||||
(defgeneric compute-min-rest (regex current-min-rest)
|
(defgeneric compute-min-rest (regex current-min-rest)
|
||||||
(declare (optimize speed
|
(declare #.*standard-optimize-settings*)
|
||||||
(safety 0)
|
|
||||||
(space 0)
|
|
||||||
(debug 0)
|
|
||||||
(compilation-speed 0)
|
|
||||||
#+:lispworks (hcl:fixnum-safety 0)))
|
|
||||||
(:documentation "Returns the minimal length of REGEX plus
|
(:documentation "Returns the minimal length of REGEX plus
|
||||||
CURRENT-MIN-REST. This is similar to REGEX-MIN-LENGTH except that it
|
CURRENT-MIN-REST. This is similar to REGEX-MIN-LENGTH except that it
|
||||||
recurses down into REGEX and sets the MIN-REST slots of REPETITION
|
recurses down into REGEX and sets the MIN-REST slots of REPETITION
|
||||||
@ -567,6 +522,9 @@ objects."))
|
|||||||
(defmethod compute-min-rest ((str str) current-min-rest)
|
(defmethod compute-min-rest ((str str) current-min-rest)
|
||||||
(+ current-min-rest (len str)))
|
(+ current-min-rest (len str)))
|
||||||
|
|
||||||
|
(defmethod compute-min-rest ((filter filter) current-min-rest)
|
||||||
|
(+ current-min-rest (or (len filter) 0)))
|
||||||
|
|
||||||
(defmethod compute-min-rest ((repetition repetition) current-min-rest)
|
(defmethod compute-min-rest ((repetition repetition) current-min-rest)
|
||||||
(setf (min-rest repetition) current-min-rest)
|
(setf (min-rest repetition) current-min-rest)
|
||||||
(compute-min-rest (regex repetition) current-min-rest)
|
(compute-min-rest (regex repetition) current-min-rest)
|
||||||
@ -594,4 +552,4 @@ objects."))
|
|||||||
(t
|
(t
|
||||||
;; zero min-len and no embedded regexes (ANCHOR,
|
;; zero min-len and no embedded regexes (ANCHOR,
|
||||||
;; BACK-REFERENCE, VOID, and WORD-BOUNDARY)
|
;; BACK-REFERENCE, VOID, and WORD-BOUNDARY)
|
||||||
current-min-rest)))
|
current-min-rest)))
|
||||||
|
|||||||
@ -1,7 +1,7 @@
|
|||||||
;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: CL-USER; Base: 10 -*-
|
;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: CL-USER; Base: 10 -*-
|
||||||
;;; $Header: /home/manuel/bknr-cvs/cvs/thirdparty/cl-ppcre/packages.lisp,v 1.1 2004/06/23 08:27:10 hans Exp $
|
;;; $Header: /usr/local/cvsrep/cl-ppcre/packages.lisp,v 1.19 2005/04/01 21:29:10 edi Exp $
|
||||||
|
|
||||||
;;; Copyright (c) 2002-2003, Dr. Edmund Weitz. All rights reserved.
|
;;; Copyright (c) 2002-2005, Dr. Edmund Weitz. All rights reserved.
|
||||||
|
|
||||||
;;; Redistribution and use in source and binary forms, with or without
|
;;; Redistribution and use in source and binary forms, with or without
|
||||||
;;; modification, are permitted provided that the following conditions
|
;;; modification, are permitted provided that the following conditions
|
||||||
@ -27,13 +27,16 @@
|
|||||||
;;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
;;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
;;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
;;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
(in-package #:cl-user)
|
(in-package :cl-user)
|
||||||
|
|
||||||
#-:cormanlisp
|
#-:cormanlisp
|
||||||
(defpackage #:cl-ppcre
|
(defpackage #:cl-ppcre
|
||||||
(:nicknames #:ppcre)
|
(:nicknames #:ppcre)
|
||||||
(:use #:cl)
|
#+genera (:shadowing-import-from #:common-lisp #:lambda #:simple-string #:string)
|
||||||
|
(:use #-genera #:cl #+genera #:future-common-lisp)
|
||||||
(:export #:create-scanner
|
(:export #:create-scanner
|
||||||
|
#:parse-tree-synonym
|
||||||
|
#:define-parse-tree-synonym
|
||||||
#:scan
|
#:scan
|
||||||
#:scan-to-strings
|
#:scan-to-strings
|
||||||
#:do-scans
|
#:do-scans
|
||||||
@ -56,13 +59,17 @@
|
|||||||
#:ppcre-syntax-error-string
|
#:ppcre-syntax-error-string
|
||||||
#:ppcre-syntax-error-pos
|
#:ppcre-syntax-error-pos
|
||||||
#:register-groups-bind
|
#:register-groups-bind
|
||||||
#:do-register-groups))
|
#:do-register-groups
|
||||||
|
#:*standard-optimize-settings*
|
||||||
|
#:*special-optimize-settings*))
|
||||||
|
|
||||||
#+:cormanlisp
|
#+:cormanlisp
|
||||||
(defpackage "CL-PPCRE"
|
(defpackage "CL-PPCRE"
|
||||||
(:nicknames "PPCRE")
|
(:nicknames "PPCRE")
|
||||||
(:use "CL")
|
(:use "CL")
|
||||||
(:export "CREATE-SCANNER"
|
(:export "CREATE-SCANNER"
|
||||||
|
"PARSE-TREE-SYNONYM"
|
||||||
|
"DEFINE-PARSE-TREE-SYNONYM"
|
||||||
"SCAN"
|
"SCAN"
|
||||||
"SCAN-TO-STRINGS"
|
"SCAN-TO-STRINGS"
|
||||||
"DO-SCANS"
|
"DO-SCANS"
|
||||||
@ -85,4 +92,17 @@
|
|||||||
"PPCRE-SYNTAX-ERROR-STRING"
|
"PPCRE-SYNTAX-ERROR-STRING"
|
||||||
"PPCRE-SYNTAX-ERROR-POS"
|
"PPCRE-SYNTAX-ERROR-POS"
|
||||||
"REGISTER-GROUPS-BIND"
|
"REGISTER-GROUPS-BIND"
|
||||||
"DO-REGISTER-GROUPS"))
|
"DO-REGISTER-GROUPS"
|
||||||
|
"*STANDARD-OPTIMIZE-SETTINGS*"
|
||||||
|
"*SPECIAL-OPTIMIZE-SETTINGS*"))
|
||||||
|
|
||||||
|
#-:cormanlisp
|
||||||
|
(defpackage #:cl-ppcre-test
|
||||||
|
#+genera (:shadowing-import-from #:common-lisp #:lambda)
|
||||||
|
(:use #-genera #:cl #+genera #:future-common-lisp #:cl-ppcre)
|
||||||
|
(:export #:test))
|
||||||
|
|
||||||
|
#+:cormanlisp
|
||||||
|
(defpackage "CL-PPCRE-TEST"
|
||||||
|
(:use "CL" "CL-PPCRE")
|
||||||
|
(:export "TEST"))
|
||||||
|
|||||||
57
parser.lisp
57
parser.lisp
@ -1,5 +1,5 @@
|
|||||||
;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: CL-PPCRE; Base: 10 -*-
|
;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: CL-PPCRE; Base: 10 -*-
|
||||||
;;; $Header: /home/manuel/bknr-cvs/cvs/thirdparty/cl-ppcre/parser.lisp,v 1.1 2004/06/23 08:27:10 hans Exp $
|
;;; $Header: /usr/local/cvsrep/cl-ppcre/parser.lisp,v 1.21 2005/08/03 21:11:27 edi Exp $
|
||||||
|
|
||||||
;;; The parser will - with the help of the lexer - parse a regex
|
;;; The parser will - with the help of the lexer - parse a regex
|
||||||
;;; string and convert it into a "parse tree" (see docs for details
|
;;; string and convert it into a "parse tree" (see docs for details
|
||||||
@ -7,7 +7,7 @@
|
|||||||
;;; illegal parse trees. It is assumed that the conversion process
|
;;; illegal parse trees. It is assumed that the conversion process
|
||||||
;;; later on will track them down.
|
;;; later on will track them down.
|
||||||
|
|
||||||
;;; Copyright (c) 2002-2003, Dr. Edmund Weitz. All rights reserved.
|
;;; Copyright (c) 2002-2005, Dr. Edmund Weitz. All rights reserved.
|
||||||
|
|
||||||
;;; Redistribution and use in source and binary forms, with or without
|
;;; Redistribution and use in source and binary forms, with or without
|
||||||
;;; modification, are permitted provided that the following conditions
|
;;; modification, are permitted provided that the following conditions
|
||||||
@ -36,16 +36,11 @@
|
|||||||
(in-package #:cl-ppcre)
|
(in-package #:cl-ppcre)
|
||||||
|
|
||||||
(defun group (lexer)
|
(defun group (lexer)
|
||||||
(declare (optimize speed
|
(declare #.*standard-optimize-settings*)
|
||||||
(safety 0)
|
|
||||||
(space 0)
|
|
||||||
(debug 0)
|
|
||||||
(compilation-speed 0)
|
|
||||||
#+:lispworks (hcl:fixnum-safety 0)))
|
|
||||||
"Parses and consumes a <group>.
|
"Parses and consumes a <group>.
|
||||||
The productions are: <group> -> \"(\"<regex>\")\"
|
The productions are: <group> -> \"(\"<regex>\")\"
|
||||||
\"(?:\"<regex>\")\"
|
\"(?:\"<regex>\")\"
|
||||||
\"(?<\"<regex>\")\"
|
\"(?>\"<regex>\")\"
|
||||||
\"(?<flags>:\"<regex>\")\"
|
\"(?<flags>:\"<regex>\")\"
|
||||||
\"(?=\"<regex>\")\"
|
\"(?=\"<regex>\")\"
|
||||||
\"(?!\"<regex>\")\"
|
\"(?!\"<regex>\")\"
|
||||||
@ -154,12 +149,7 @@ Will return <parse-tree> or (<grouping-type> <parse-tree>) where
|
|||||||
open-token))))
|
open-token))))
|
||||||
|
|
||||||
(defun greedy-quant (lexer)
|
(defun greedy-quant (lexer)
|
||||||
(declare (optimize speed
|
(declare #.*standard-optimize-settings*)
|
||||||
(safety 0)
|
|
||||||
(space 0)
|
|
||||||
(debug 0)
|
|
||||||
(compilation-speed 0)
|
|
||||||
#+:lispworks (hcl:fixnum-safety 0)))
|
|
||||||
"Parses and consumes a <greedy-quant>.
|
"Parses and consumes a <greedy-quant>.
|
||||||
The productions are: <greedy-quant> -> <group> | <group><quantifier>
|
The productions are: <greedy-quant> -> <group> | <group><quantifier>
|
||||||
where <quantifier> is parsed by the lexer function GET-QUANTIFIER.
|
where <quantifier> is parsed by the lexer function GET-QUANTIFIER.
|
||||||
@ -173,12 +163,7 @@ Will return <parse-tree> or (:GREEDY-REPETITION <min> <max> <parse-tree>)."
|
|||||||
group)))
|
group)))
|
||||||
|
|
||||||
(defun quant (lexer)
|
(defun quant (lexer)
|
||||||
(declare (optimize speed
|
(declare #.*standard-optimize-settings*)
|
||||||
(safety 0)
|
|
||||||
(space 0)
|
|
||||||
(debug 0)
|
|
||||||
(compilation-speed 0)
|
|
||||||
#+:lispworks (hcl:fixnum-safety 0)))
|
|
||||||
"Parses and consumes a <quant>.
|
"Parses and consumes a <quant>.
|
||||||
The productions are: <quant> -> <greedy-quant> | <greedy-quant>\"?\".
|
The productions are: <quant> -> <greedy-quant> | <greedy-quant>\"?\".
|
||||||
Will return the <parse-tree> returned by GREEDY-QUANT and optionally
|
Will return the <parse-tree> returned by GREEDY-QUANT and optionally
|
||||||
@ -193,12 +178,7 @@ change :GREEDY-REPETITION to :NON-GREEDY-REPETITION."
|
|||||||
greedy-quant))
|
greedy-quant))
|
||||||
|
|
||||||
(defun seq (lexer)
|
(defun seq (lexer)
|
||||||
(declare (optimize speed
|
(declare #.*standard-optimize-settings*)
|
||||||
(safety 0)
|
|
||||||
(space 0)
|
|
||||||
(debug 0)
|
|
||||||
(compilation-speed 0)
|
|
||||||
#+:lispworks (hcl:fixnum-safety 0)))
|
|
||||||
"Parses and consumes a <seq>.
|
"Parses and consumes a <seq>.
|
||||||
The productions are: <seq> -> <quant> | <quant><seq>.
|
The productions are: <seq> -> <quant> | <quant><seq>.
|
||||||
Will return <parse-tree> or (:SEQUENCE <parse-tree> <parse-tree>)."
|
Will return <parse-tree> or (:SEQUENCE <parse-tree> <parse-tree>)."
|
||||||
@ -263,12 +243,7 @@ Will return <parse-tree> or (:SEQUENCE <parse-tree> <parse-tree>)."
|
|||||||
:void)))
|
:void)))
|
||||||
|
|
||||||
(defun reg-expr (lexer)
|
(defun reg-expr (lexer)
|
||||||
(declare (optimize speed
|
(declare #.*standard-optimize-settings*)
|
||||||
(safety 0)
|
|
||||||
(space 0)
|
|
||||||
(debug 0)
|
|
||||||
(compilation-speed 0)
|
|
||||||
#+:lispworks (hcl:fixnum-safety 0)))
|
|
||||||
"Parses and consumes a <regex>, a complete regular expression.
|
"Parses and consumes a <regex>, a complete regular expression.
|
||||||
The productions are: <regex> -> <seq> | <seq>\"|\"<regex>.
|
The productions are: <regex> -> <seq> | <seq>\"|\"<regex>.
|
||||||
Will return <parse-tree> or (:ALTERNATION <parse-tree> <parse-tree>)."
|
Will return <parse-tree> or (:ALTERNATION <parse-tree> <parse-tree>)."
|
||||||
@ -313,12 +288,7 @@ Will return <parse-tree> or (:ALTERNATION <parse-tree> <parse-tree>)."
|
|||||||
seq)))))))
|
seq)))))))
|
||||||
|
|
||||||
(defun reverse-strings (parse-tree)
|
(defun reverse-strings (parse-tree)
|
||||||
(declare (optimize speed
|
(declare #.*standard-optimize-settings*)
|
||||||
(safety 0)
|
|
||||||
(space 0)
|
|
||||||
(debug 0)
|
|
||||||
(compilation-speed 0)
|
|
||||||
#+:lispworks (hcl:fixnum-safety 0)))
|
|
||||||
(cond ((stringp parse-tree)
|
(cond ((stringp parse-tree)
|
||||||
(nreverse parse-tree))
|
(nreverse parse-tree))
|
||||||
((consp parse-tree)
|
((consp parse-tree)
|
||||||
@ -330,12 +300,7 @@ Will return <parse-tree> or (:ALTERNATION <parse-tree> <parse-tree>)."
|
|||||||
(t parse-tree)))
|
(t parse-tree)))
|
||||||
|
|
||||||
(defun parse-string (string)
|
(defun parse-string (string)
|
||||||
(declare (optimize speed
|
(declare #.*standard-optimize-settings*)
|
||||||
(safety 0)
|
|
||||||
(space 0)
|
|
||||||
(debug 0)
|
|
||||||
(compilation-speed 0)
|
|
||||||
#+:lispworks (hcl:fixnum-safety 0)))
|
|
||||||
"Translate the regex string STRING into a parse tree."
|
"Translate the regex string STRING into a parse tree."
|
||||||
(let* ((lexer (make-lexer string))
|
(let* ((lexer (make-lexer string))
|
||||||
(parse-tree (reverse-strings (reg-expr lexer))))
|
(parse-tree (reverse-strings (reg-expr lexer))))
|
||||||
@ -344,4 +309,4 @@ Will return <parse-tree> or (:ALTERNATION <parse-tree> <parse-tree>)."
|
|||||||
parse-tree
|
parse-tree
|
||||||
(signal-ppcre-syntax-error*
|
(signal-ppcre-syntax-error*
|
||||||
(lexer-pos lexer)
|
(lexer-pos lexer)
|
||||||
"Expected end of string"))))
|
"Expected end of string"))))
|
||||||
|
|||||||
@ -1,7 +1,7 @@
|
|||||||
;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: CL-PPCRE-TEST; Base: 10 -*-
|
;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: CL-PPCRE-TEST; Base: 10 -*-
|
||||||
;;; $Header: /home/manuel/bknr-cvs/cvs/thirdparty/cl-ppcre/ppcre-tests.lisp,v 1.1 2004/06/23 08:27:10 hans Exp $
|
;;; $Header: /usr/local/cvsrep/cl-ppcre/ppcre-tests.lisp,v 1.31 2005/08/23 12:23:13 edi Exp $
|
||||||
|
|
||||||
;;; Copyright (c) 2002-2003, Dr. Edmund Weitz. All rights reserved.
|
;;; Copyright (c) 2002-2005, Dr. Edmund Weitz. All rights reserved.
|
||||||
|
|
||||||
;;; Redistribution and use in source and binary forms, with or without
|
;;; Redistribution and use in source and binary forms, with or without
|
||||||
;;; modification, are permitted provided that the following conditions
|
;;; modification, are permitted provided that the following conditions
|
||||||
@ -27,18 +27,6 @@
|
|||||||
;;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
;;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
;;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
;;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
(in-package #:cl-user)
|
|
||||||
|
|
||||||
#-:cormanlisp
|
|
||||||
(defpackage #:cl-ppcre-test
|
|
||||||
(:use #:cl #:cl-ppcre)
|
|
||||||
(:export #:test))
|
|
||||||
|
|
||||||
#+:cormanlisp
|
|
||||||
(defpackage "CL-PPCRE-TEST"
|
|
||||||
(:use "CL" "CL-PPCRE")
|
|
||||||
(:export "TEST"))
|
|
||||||
|
|
||||||
(in-package #:cl-ppcre-test)
|
(in-package #:cl-ppcre-test)
|
||||||
|
|
||||||
(defparameter *cl-ppcre-test-base-directory*
|
(defparameter *cl-ppcre-test-base-directory*
|
||||||
@ -64,12 +52,7 @@
|
|||||||
multi-line-mode
|
multi-line-mode
|
||||||
single-line-mode
|
single-line-mode
|
||||||
extended-mode)
|
extended-mode)
|
||||||
(declare (optimize speed
|
(declare #.*standard-optimize-settings*)
|
||||||
(safety 0)
|
|
||||||
(space 0)
|
|
||||||
(debug 0)
|
|
||||||
(compilation-speed 0)
|
|
||||||
#+:lispworks (hcl:fixnum-safety 0)))
|
|
||||||
"Auxiliary function used by TEST to benchmark a regex scanner
|
"Auxiliary function used by TEST to benchmark a regex scanner
|
||||||
against Perl timings."
|
against Perl timings."
|
||||||
(declare (type string string))
|
(declare (type string string))
|
||||||
@ -90,12 +73,7 @@ against Perl timings."
|
|||||||
lispworks
|
lispworks
|
||||||
(and sbcl sb-thread))
|
(and sbcl sb-thread))
|
||||||
(defun threaded-scan (scanner target-string &key (threads 10) (repetitions 5000))
|
(defun threaded-scan (scanner target-string &key (threads 10) (repetitions 5000))
|
||||||
(declare (optimize speed
|
(declare #.*standard-optimize-settings*)
|
||||||
(safety 0)
|
|
||||||
(space 0)
|
|
||||||
(debug 0)
|
|
||||||
(compilation-speed 0)
|
|
||||||
#+:lispworks (hcl:fixnum-safety 0)))
|
|
||||||
"Auxiliary function used by TEST to check whether SCANNER is thread-safe."
|
"Auxiliary function used by TEST to check whether SCANNER is thread-safe."
|
||||||
(full-gc)
|
(full-gc)
|
||||||
(let ((collector (make-array threads))
|
(let ((collector (make-array threads))
|
||||||
@ -155,32 +133,26 @@ against Perl timings."
|
|||||||
:defaults *cl-ppcre-test-base-directory*)
|
:defaults *cl-ppcre-test-base-directory*)
|
||||||
file-name-provided-p)
|
file-name-provided-p)
|
||||||
threaded)
|
threaded)
|
||||||
(declare (optimize speed
|
(declare #.*standard-optimize-settings*)
|
||||||
(safety 0)
|
|
||||||
(space 0)
|
|
||||||
(debug 0)
|
|
||||||
(compilation-speed 0)
|
|
||||||
#+:lispworks (hcl:fixnum-safety 0)))
|
|
||||||
(declare (ignorable threaded))
|
(declare (ignorable threaded))
|
||||||
"Loop through all test cases in FILE-NAME and print report. Only in
|
"Loop through all test cases in FILE-NAME and print report. Only in
|
||||||
LispWorks and SCL: If THREADED is true, also test whether the scanners
|
LispWorks and SCL: If THREADED is true, also test whether the scanners
|
||||||
work multi-threaded."
|
work multi-threaded."
|
||||||
(with-open-file (stream file-name
|
(with-open-file (stream file-name
|
||||||
#+(or :allegro :clisp :scl)
|
#+(or :allegro :clisp :scl :sbcl)
|
||||||
:external-format
|
:external-format
|
||||||
#+(or :allegro :clisp :scl)
|
#+(or :allegro :clisp :scl :sbcl)
|
||||||
(if file-name-provided-p
|
(if file-name-provided-p
|
||||||
:default
|
:default
|
||||||
#+:allegro :iso-8859-1
|
#+(or :allegro :scl :sbcl) :iso-8859-1
|
||||||
#+:clisp charset:iso-8859-1
|
#+:clisp charset:iso-8859-1))
|
||||||
#+:scl :iso-8859-1))
|
|
||||||
(loop with testcount of-type fixnum = 0
|
(loop with testcount of-type fixnum = 0
|
||||||
with *regex-char-code-limit* = (if file-name-provided-p
|
with *regex-char-code-limit* = (if file-name-provided-p
|
||||||
*regex-char-code-limit*
|
*regex-char-code-limit*
|
||||||
;; the standard test suite
|
;; the standard test suite
|
||||||
;; doesn't need full
|
;; doesn't need Unicode
|
||||||
;; Unicode support
|
;; support
|
||||||
255)
|
256)
|
||||||
with *allow-quoting* = (if file-name-provided-p
|
with *allow-quoting* = (if file-name-provided-p
|
||||||
*allow-quoting*
|
*allow-quoting*
|
||||||
t)
|
t)
|
||||||
|
|||||||
489
regex-class.lisp
489
regex-class.lisp
@ -1,11 +1,11 @@
|
|||||||
;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: CL-PPCRE; Base: 10 -*-
|
;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: CL-PPCRE; Base: 10 -*-
|
||||||
;;; $Header: /home/manuel/bknr-cvs/cvs/thirdparty/cl-ppcre/regex-class.lisp,v 1.1 2004/06/23 08:27:10 hans Exp $
|
;;; $Header: /usr/local/cvsrep/cl-ppcre/regex-class.lisp,v 1.26 2005/06/10 10:23:42 edi Exp $
|
||||||
|
|
||||||
;;; This file defines the REGEX class and some utility methods for
|
;;; This file defines the REGEX class and some utility methods for
|
||||||
;;; this class. REGEX objects are used to represent the (transformed)
|
;;; this class. REGEX objects are used to represent the (transformed)
|
||||||
;;; parse trees internally
|
;;; parse trees internally
|
||||||
|
|
||||||
;;; Copyright (c) 2002-2003, Dr. Edmund Weitz. All rights reserved.
|
;;; Copyright (c) 2002-2005, Dr. Edmund Weitz. All rights reserved.
|
||||||
|
|
||||||
;;; Redistribution and use in source and binary forms, with or without
|
;;; Redistribution and use in source and binary forms, with or without
|
||||||
;;; modification, are permitted provided that the following conditions
|
;;; modification, are permitted provided that the following conditions
|
||||||
@ -33,221 +33,243 @@
|
|||||||
|
|
||||||
(in-package #:cl-ppcre)
|
(in-package #:cl-ppcre)
|
||||||
|
|
||||||
(locally
|
;; Genera need the eval-when, here, or the types created by the class
|
||||||
(declare (optimize speed
|
;; definitions aren't seen by the typep calls later in the file.
|
||||||
(safety 0)
|
(eval-when (:compile-toplevel :load-toplevel :execute)
|
||||||
(space 0)
|
(locally
|
||||||
(debug 0)
|
(declare #.*standard-optimize-settings*)
|
||||||
(compilation-speed 0)
|
(defclass regex ()
|
||||||
#+:lispworks (hcl:fixnum-safety 0)))
|
()
|
||||||
(defclass regex ()
|
(:documentation "The REGEX base class. All other classes inherit
|
||||||
()
|
from this one."))
|
||||||
(:documentation "The REGEX base class. All other classes inherit from this one."))
|
|
||||||
|
|
||||||
|
|
||||||
(defclass seq (regex)
|
(defclass seq (regex)
|
||||||
((elements :initarg :elements
|
((elements :initarg :elements
|
||||||
:accessor elements
|
:accessor elements
|
||||||
:type cons
|
:type cons
|
||||||
:documentation "A list of REGEX objects."))
|
:documentation "A list of REGEX objects."))
|
||||||
(:documentation "SEQ objects represents sequences of
|
(:documentation "SEQ objects represents sequences of
|
||||||
regexes. (Like \"ab\" is the sequence of \"a\" and \"b\".)"))
|
regexes. (Like \"ab\" is the sequence of \"a\" and \"b\".)"))
|
||||||
|
|
||||||
(defclass alternation (regex)
|
(defclass alternation (regex)
|
||||||
((choices :initarg :choices
|
((choices :initarg :choices
|
||||||
:accessor choices
|
:accessor choices
|
||||||
:type cons
|
:type cons
|
||||||
:documentation "A list of REGEX objects"))
|
:documentation "A list of REGEX objects"))
|
||||||
(:documentation "ALTERNATION objects represent alternations of
|
(:documentation "ALTERNATION objects represent alternations of
|
||||||
regexes. (Like \"a|b\" ist the alternation of \"a\" or \"b\".)"))
|
regexes. (Like \"a|b\" ist the alternation of \"a\" or \"b\".)"))
|
||||||
|
|
||||||
(defclass lookahead (regex)
|
(defclass lookahead (regex)
|
||||||
((regex :initarg :regex
|
((regex :initarg :regex
|
||||||
:accessor regex
|
:accessor regex
|
||||||
:documentation "The REGEX object we're checking.")
|
:documentation "The REGEX object we're checking.")
|
||||||
(positivep :initarg :positivep
|
(positivep :initarg :positivep
|
||||||
:reader positivep
|
:reader positivep
|
||||||
:documentation "Whether this assertion is positive."))
|
:documentation "Whether this assertion is positive."))
|
||||||
(:documentation "LOOKAHEAD objects represent look-ahead assertions."))
|
(:documentation "LOOKAHEAD objects represent look-ahead assertions."))
|
||||||
|
|
||||||
(defclass lookbehind (regex)
|
(defclass lookbehind (regex)
|
||||||
((regex :initarg :regex
|
((regex :initarg :regex
|
||||||
:accessor regex
|
:accessor regex
|
||||||
:documentation "The REGEX object we're checking.")
|
:documentation "The REGEX object we're checking.")
|
||||||
(positivep :initarg :positivep
|
(positivep :initarg :positivep
|
||||||
:reader positivep
|
:reader positivep
|
||||||
:documentation "Whether this assertion is positive.")
|
:documentation "Whether this assertion is positive.")
|
||||||
(len :initarg :len
|
(len :initarg :len
|
||||||
:accessor len
|
:accessor len
|
||||||
:type fixnum
|
|
||||||
:documentation "The (fixed) length of the enclosed regex."))
|
|
||||||
(:documentation "LOOKBEHIND objects represent look-behind assertions."))
|
|
||||||
|
|
||||||
(defclass repetition (regex)
|
|
||||||
((regex :initarg :regex
|
|
||||||
:accessor regex
|
|
||||||
:documentation "The REGEX that's repeated.")
|
|
||||||
(greedyp :initarg :greedyp
|
|
||||||
:reader greedyp
|
|
||||||
:documentation "Whether the repetition is greedy.")
|
|
||||||
(minimum :initarg :minimum
|
|
||||||
:accessor minimum
|
|
||||||
:type fixnum
|
|
||||||
:documentation "The minimal number of repetitions.")
|
|
||||||
(maximum :initarg :maximum
|
|
||||||
:accessor maximum
|
|
||||||
:documentation "The maximal number of repetitions.
|
|
||||||
Can be NIL for unbounded.")
|
|
||||||
(min-len :initarg :min-len
|
|
||||||
:reader min-len
|
|
||||||
:documentation "The minimal length of the enclosed regex.")
|
|
||||||
(len :initarg :len
|
|
||||||
:reader len
|
|
||||||
:documentation "The length of the enclosed regex. NIL if unknown.")
|
|
||||||
(min-rest :initform 0
|
|
||||||
:accessor min-rest
|
|
||||||
:type fixnum
|
:type fixnum
|
||||||
:documentation "The minimal number of characters which must
|
:documentation "The (fixed) length of the enclosed regex."))
|
||||||
|
(:documentation "LOOKBEHIND objects represent look-behind assertions."))
|
||||||
|
|
||||||
|
(defclass repetition (regex)
|
||||||
|
((regex :initarg :regex
|
||||||
|
:accessor regex
|
||||||
|
:documentation "The REGEX that's repeated.")
|
||||||
|
(greedyp :initarg :greedyp
|
||||||
|
:reader greedyp
|
||||||
|
:documentation "Whether the repetition is greedy.")
|
||||||
|
(minimum :initarg :minimum
|
||||||
|
:accessor minimum
|
||||||
|
:type fixnum
|
||||||
|
:documentation "The minimal number of repetitions.")
|
||||||
|
(maximum :initarg :maximum
|
||||||
|
:accessor maximum
|
||||||
|
:documentation "The maximal number of repetitions.
|
||||||
|
Can be NIL for unbounded.")
|
||||||
|
(min-len :initarg :min-len
|
||||||
|
:reader min-len
|
||||||
|
:documentation "The minimal length of the enclosed regex.")
|
||||||
|
(len :initarg :len
|
||||||
|
:reader len
|
||||||
|
:documentation "The length of the enclosed regex. NIL
|
||||||
|
if unknown.")
|
||||||
|
(min-rest :initform 0
|
||||||
|
:accessor min-rest
|
||||||
|
:type fixnum
|
||||||
|
:documentation "The minimal number of characters which must
|
||||||
appear after this repetition.")
|
appear after this repetition.")
|
||||||
(contains-register-p :initarg :contains-register-p
|
(contains-register-p :initarg :contains-register-p
|
||||||
:reader contains-register-p
|
:reader contains-register-p
|
||||||
:documentation "If the regex contains a register."))
|
:documentation "If the regex contains a register."))
|
||||||
(:documentation "REPETITION objects represent repetitions of regexes."))
|
(:documentation "REPETITION objects represent repetitions of regexes."))
|
||||||
|
|
||||||
(defclass register (regex)
|
(defclass register (regex)
|
||||||
((regex :initarg :regex
|
((regex :initarg :regex
|
||||||
:accessor regex
|
:accessor regex
|
||||||
:documentation "The inner regex.")
|
:documentation "The inner regex.")
|
||||||
(num :initarg :num
|
(num :initarg :num
|
||||||
:reader num
|
:reader num
|
||||||
:type fixnum
|
:type fixnum
|
||||||
:documentation "The number of this register, starting from 0.
|
:documentation "The number of this register, starting from 0.
|
||||||
This is the index into *REGS-START* and *REGS-END*."))
|
This is the index into *REGS-START* and *REGS-END*."))
|
||||||
(:documentation "REGISTER objects represent register groups."))
|
(:documentation "REGISTER objects represent register groups."))
|
||||||
|
|
||||||
(defclass standalone (regex)
|
(defclass standalone (regex)
|
||||||
((regex :initarg :regex
|
((regex :initarg :regex
|
||||||
:accessor regex
|
:accessor regex
|
||||||
:documentation "The inner regex."))
|
:documentation "The inner regex."))
|
||||||
(:documentation "A standalone regular expression."))
|
(:documentation "A standalone regular expression."))
|
||||||
|
|
||||||
(defclass back-reference (regex)
|
(defclass back-reference (regex)
|
||||||
((num :initarg :num
|
((num :initarg :num
|
||||||
:accessor num
|
:accessor num
|
||||||
:type fixnum
|
:type fixnum
|
||||||
:documentation "The number of the register this reference refers to.")
|
:documentation "The number of the register this
|
||||||
(case-insensitive-p :initarg :case-insensitive-p
|
reference refers to.")
|
||||||
:reader case-insensitive-p
|
(case-insensitive-p :initarg :case-insensitive-p
|
||||||
:documentation "Whether we check case-insensitively."))
|
:reader case-insensitive-p
|
||||||
(:documentation "BACK-REFERENCE objects represent backreferences."))
|
:documentation "Whether we check
|
||||||
|
case-insensitively."))
|
||||||
|
(:documentation "BACK-REFERENCE objects represent backreferences."))
|
||||||
|
|
||||||
(defclass char-class (regex)
|
(defclass char-class (regex)
|
||||||
((hash :initarg :hash
|
((hash :initarg :hash
|
||||||
:reader hash
|
:reader hash
|
||||||
:type (or hash-table null)
|
:type (or hash-table null)
|
||||||
:documentation "A hash table the keys of which are the characters;
|
:documentation "A hash table the keys of which are the
|
||||||
the values are always T.")
|
characters; the values are always T.")
|
||||||
(case-insensitive-p :initarg :case-insensitive-p
|
(case-insensitive-p :initarg :case-insensitive-p
|
||||||
:reader case-insensitive-p
|
:reader case-insensitive-p
|
||||||
:documentation "If the char class case-insensitive.")
|
:documentation "If the char class
|
||||||
(invertedp :initarg :invertedp
|
case-insensitive.")
|
||||||
:reader invertedp
|
(invertedp :initarg :invertedp
|
||||||
:documentation "Whether we mean the inverse of the char class.")
|
:reader invertedp
|
||||||
(word-char-class-p :initarg :word-char-class-p
|
:documentation "Whether we mean the inverse of
|
||||||
:reader word-char-class-p
|
the char class.")
|
||||||
:documentation "Whether this CHAR CLASS
|
(word-char-class-p :initarg :word-char-class-p
|
||||||
|
:reader word-char-class-p
|
||||||
|
:documentation "Whether this CHAR CLASS
|
||||||
represents the special class WORD-CHAR-CLASS."))
|
represents the special class WORD-CHAR-CLASS."))
|
||||||
(:documentation "CHAR-CLASS objects represent character classes."))
|
(:documentation "CHAR-CLASS objects represent character classes."))
|
||||||
|
|
||||||
(defclass str (regex)
|
(defclass str (regex)
|
||||||
((str :initarg :str
|
((str :initarg :str
|
||||||
:accessor str
|
:accessor str
|
||||||
:type string
|
:type string
|
||||||
:documentation "The actual string.")
|
:documentation "The actual string.")
|
||||||
(len :initform 0
|
(len :initform 0
|
||||||
:accessor len
|
:accessor len
|
||||||
:type fixnum
|
:type fixnum
|
||||||
:documentation "The length of the string.")
|
:documentation "The length of the string.")
|
||||||
(case-insensitive-p :initarg :case-insensitive-p
|
(case-insensitive-p :initarg :case-insensitive-p
|
||||||
:reader case-insensitive-p
|
:reader case-insensitive-p
|
||||||
:documentation "If we match case-insensitively.")
|
:documentation "If we match case-insensitively.")
|
||||||
(offset :initform nil
|
(offset :initform nil
|
||||||
:accessor offset
|
:accessor offset
|
||||||
:documentation "Offset from the left of the whole parse tree.
|
:documentation "Offset from the left of the whole
|
||||||
The first regex has offset 0.
|
parse tree. The first regex has offset 0. NIL if unknown, i.e. behind
|
||||||
NIL if unknown, i.e. behind a variable-length regex.")
|
a variable-length regex.")
|
||||||
(skip :initform nil
|
(skip :initform nil
|
||||||
:initarg :skip
|
:initarg :skip
|
||||||
:accessor skip
|
:accessor skip
|
||||||
:documentation "If we can avoid testing for this string
|
:documentation "If we can avoid testing for this
|
||||||
because the SCAN function has done this already.")
|
string because the SCAN function has done this already.")
|
||||||
(start-of-end-string-p :initform nil
|
(start-of-end-string-p :initform nil
|
||||||
:accessor start-of-end-string-p
|
:accessor start-of-end-string-p
|
||||||
:documentation "If this is the unique STR which
|
:documentation "If this is the unique
|
||||||
starts END-STRING (a slot of MATCHER)."))
|
STR which starts END-STRING (a slot of MATCHER)."))
|
||||||
(:documentation "STR objects represent string."))
|
(:documentation "STR objects represent string."))
|
||||||
|
|
||||||
(defclass anchor (regex)
|
(defclass anchor (regex)
|
||||||
((startp :initarg :startp
|
((startp :initarg :startp
|
||||||
:reader startp
|
:reader startp
|
||||||
:documentation "Whether this is a \"start anchor\".")
|
:documentation "Whether this is a \"start anchor\".")
|
||||||
(multi-line-p :initarg :multi-line-p
|
(multi-line-p :initarg :multi-line-p
|
||||||
:reader multi-line-p
|
:reader multi-line-p
|
||||||
:documentation "Whether we're in multi-line mode,
|
:documentation "Whether we're in multi-line mode,
|
||||||
i.e. whether each #\\Newline is surrounded by anchors.")
|
i.e. whether each #\\Newline is surrounded by anchors.")
|
||||||
(no-newline-p :initarg :no-newline-p
|
(no-newline-p :initarg :no-newline-p
|
||||||
:reader no-newline-p
|
:reader no-newline-p
|
||||||
:documentation "Whether we ignore #\\Newline at the end."))
|
:documentation "Whether we ignore #\\Newline at the end."))
|
||||||
(:documentation "ANCHOR objects represent anchors like \"^\" or \"$\"."))
|
(:documentation "ANCHOR objects represent anchors like \"^\" or \"$\"."))
|
||||||
|
|
||||||
(defclass everything (regex)
|
(defclass everything (regex)
|
||||||
((single-line-p :initarg :single-line-p
|
((single-line-p :initarg :single-line-p
|
||||||
:reader single-line-p
|
:reader single-line-p
|
||||||
:documentation "Whether we're in single-line mode,
|
:documentation "Whether we're in single-line mode,
|
||||||
i.e. whether we also match #\\Newline."))
|
i.e. whether we also match #\\Newline."))
|
||||||
(:documentation "EVERYTHING objects represent regexes matching
|
(:documentation "EVERYTHING objects represent regexes matching
|
||||||
\"everything\", i.e. dots."))
|
\"everything\", i.e. dots."))
|
||||||
|
|
||||||
(defclass word-boundary (regex)
|
(defclass word-boundary (regex)
|
||||||
((negatedp :initarg :negatedp
|
((negatedp :initarg :negatedp
|
||||||
:reader negatedp
|
:reader negatedp
|
||||||
:documentation "Whether we mean the opposite,
|
:documentation "Whether we mean the opposite,
|
||||||
i.e. no word-boundary."))
|
i.e. no word-boundary."))
|
||||||
(:documentation "WORD-BOUNDARY objects represent word-boundary assertions."))
|
(:documentation "WORD-BOUNDARY objects represent word-boundary assertions."))
|
||||||
|
|
||||||
(defclass branch (regex)
|
(defclass branch (regex)
|
||||||
((test :initarg :test
|
((test :initarg :test
|
||||||
:accessor test
|
:accessor test
|
||||||
:documentation "The test of this branch, one of LOOKAHEAD,
|
:documentation "The test of this branch, one of
|
||||||
LOOKBEHIND, or a number.")
|
LOOKAHEAD, LOOKBEHIND, or a number.")
|
||||||
(then-regex :initarg :then-regex
|
(then-regex :initarg :then-regex
|
||||||
:accessor then-regex
|
:accessor then-regex
|
||||||
:documentation "The regex that's to be matched if the
|
:documentation "The regex that's to be matched if the
|
||||||
test succeeds.")
|
test succeeds.")
|
||||||
(else-regex :initarg :else-regex
|
(else-regex :initarg :else-regex
|
||||||
:initform (make-instance 'void)
|
:initform (make-instance 'void)
|
||||||
:accessor else-regex
|
:accessor else-regex
|
||||||
:documentation "The regex that's to be matched if the
|
:documentation "The regex that's to be matched if the
|
||||||
test fails."))
|
test fails."))
|
||||||
(:documentation "BRANCH objects represent Perl's conditional regular
|
(:documentation "BRANCH objects represent Perl's conditional regular
|
||||||
expressions."))
|
expressions."))
|
||||||
|
|
||||||
|
(defclass filter (regex)
|
||||||
|
((fn :initarg :fn
|
||||||
|
:accessor fn
|
||||||
|
:type (or function symbol)
|
||||||
|
:documentation "The user-defined function.")
|
||||||
|
(len :initarg :len
|
||||||
|
:reader len
|
||||||
|
:documentation "The fixed length of this filter or NIL."))
|
||||||
|
(:documentation "FILTER objects represent arbitrary functions
|
||||||
|
defined by the user."))
|
||||||
|
|
||||||
(defclass void (regex)
|
(defclass void (regex)
|
||||||
()
|
()
|
||||||
(:documentation "VOID objects represent empty regular expressions.")))
|
(:documentation "VOID objects represent empty regular expressions."))))
|
||||||
|
|
||||||
(declaim (ftype (function (t) simple-string) str))
|
(defmethod initialize-instance :after ((char-class char-class) &rest init-args)
|
||||||
|
(declare #.*standard-optimize-settings*)
|
||||||
|
"Make large hash tables smaller, if possible."
|
||||||
|
(let ((hash (getf init-args :hash)))
|
||||||
|
(when (and hash
|
||||||
|
(> *regex-char-code-limit* 256)
|
||||||
|
(> (hash-table-count hash)
|
||||||
|
(/ *regex-char-code-limit* 2)))
|
||||||
|
(setf (slot-value char-class 'hash)
|
||||||
|
(merge-inverted-hash (make-hash-table)
|
||||||
|
hash)
|
||||||
|
(slot-value char-class 'invertedp)
|
||||||
|
(not (slot-value char-class 'invertedp))))))
|
||||||
|
|
||||||
;;; The following four methods allow a VOID object to behave like a
|
;;; The following four methods allow a VOID object to behave like a
|
||||||
;;; zero-length STR object (only readers needed)
|
;;; zero-length STR object (only readers needed)
|
||||||
|
|
||||||
(defmethod initialize-instance :after ((str str) &rest init-args)
|
(defmethod initialize-instance :after ((str str) &rest init-args)
|
||||||
(declare (optimize speed
|
(declare #.*standard-optimize-settings*)
|
||||||
(safety 0)
|
|
||||||
(space 0)
|
|
||||||
(debug 0)
|
|
||||||
(compilation-speed 0)
|
|
||||||
#+:lispworks (hcl:fixnum-safety 0)))
|
|
||||||
(declare (ignore init-args))
|
(declare (ignore init-args))
|
||||||
"Automatically computes the length of a STR after initialization."
|
"Automatically computes the length of a STR after initialization."
|
||||||
(let ((str-slot (slot-value str 'str)))
|
(let ((str-slot (slot-value str 'str)))
|
||||||
@ -256,48 +278,23 @@ expressions."))
|
|||||||
(setf (len str) (length (str str))))
|
(setf (len str) (length (str str))))
|
||||||
|
|
||||||
(defmethod len ((void void))
|
(defmethod len ((void void))
|
||||||
(declare (optimize speed
|
(declare #.*standard-optimize-settings*)
|
||||||
(safety 0)
|
|
||||||
(space 0)
|
|
||||||
(debug 0)
|
|
||||||
(compilation-speed 0)
|
|
||||||
#+:lispworks (hcl:fixnum-safety 0)))
|
|
||||||
0)
|
0)
|
||||||
|
|
||||||
(defmethod str ((void void))
|
(defmethod str ((void void))
|
||||||
(declare (optimize speed
|
(declare #.*standard-optimize-settings*)
|
||||||
(safety 0)
|
|
||||||
(space 0)
|
|
||||||
(debug 0)
|
|
||||||
(compilation-speed 0)
|
|
||||||
#+:lispworks (hcl:fixnum-safety 0)))
|
|
||||||
"")
|
"")
|
||||||
|
|
||||||
(defmethod skip ((void void))
|
(defmethod skip ((void void))
|
||||||
(declare (optimize speed
|
(declare #.*standard-optimize-settings*)
|
||||||
(safety 0)
|
|
||||||
(space 0)
|
|
||||||
(debug 0)
|
|
||||||
(compilation-speed 0)
|
|
||||||
#+:lispworks (hcl:fixnum-safety 0)))
|
|
||||||
nil)
|
nil)
|
||||||
|
|
||||||
(defmethod start-of-end-string-p ((void void))
|
(defmethod start-of-end-string-p ((void void))
|
||||||
(declare (optimize speed
|
(declare #.*standard-optimize-settings*)
|
||||||
(safety 0)
|
|
||||||
(space 0)
|
|
||||||
(debug 0)
|
|
||||||
(compilation-speed 0)
|
|
||||||
#+:lispworks (hcl:fixnum-safety 0)))
|
|
||||||
nil)
|
nil)
|
||||||
|
|
||||||
(defgeneric case-mode (regex old-case-mode)
|
(defgeneric case-mode (regex old-case-mode)
|
||||||
(declare (optimize speed
|
(declare #.*standard-optimize-settings*)
|
||||||
(safety 0)
|
|
||||||
(space 0)
|
|
||||||
(debug 0)
|
|
||||||
(compilation-speed 0)
|
|
||||||
#+:lispworks (hcl:fixnum-safety 0)))
|
|
||||||
(:documentation "Utility function used by the optimizer (see GATHER-STRINGS).
|
(:documentation "Utility function used by the optimizer (see GATHER-STRINGS).
|
||||||
Returns a keyword denoting the case-(in)sensitivity of a STR or its
|
Returns a keyword denoting the case-(in)sensitivity of a STR or its
|
||||||
second argument if the STR has length 0. Returns NIL for REGEX objects
|
second argument if the STR has length 0. Returns NIL for REGEX objects
|
||||||
@ -316,12 +313,7 @@ which are not of type STR."))
|
|||||||
nil)
|
nil)
|
||||||
|
|
||||||
(defgeneric copy-regex (regex)
|
(defgeneric copy-regex (regex)
|
||||||
(declare (optimize speed
|
(declare #.*standard-optimize-settings*)
|
||||||
(safety 0)
|
|
||||||
(space 0)
|
|
||||||
(debug 0)
|
|
||||||
(compilation-speed 0)
|
|
||||||
#+:lispworks (hcl:fixnum-safety 0)))
|
|
||||||
(:documentation "Implements a deep copy of a REGEX object."))
|
(:documentation "Implements a deep copy of a REGEX object."))
|
||||||
|
|
||||||
(defmethod copy-regex ((anchor anchor))
|
(defmethod copy-regex ((anchor anchor))
|
||||||
@ -406,6 +398,11 @@ which are not of type STR."))
|
|||||||
:str (str str)
|
:str (str str)
|
||||||
:case-insensitive-p (case-insensitive-p str)))
|
:case-insensitive-p (case-insensitive-p str)))
|
||||||
|
|
||||||
|
(defmethod copy-regex ((filter filter))
|
||||||
|
(make-instance 'filter
|
||||||
|
:fn (fn filter)
|
||||||
|
:len (len filter)))
|
||||||
|
|
||||||
;;; Note that COPY-REGEX and REMOVE-REGISTERS could have easily been
|
;;; Note that COPY-REGEX and REMOVE-REGISTERS could have easily been
|
||||||
;;; wrapped into one function. Maybe in the next release...
|
;;; wrapped into one function. Maybe in the next release...
|
||||||
|
|
||||||
@ -417,12 +414,7 @@ which are not of type STR."))
|
|||||||
;;; and therefore we stop REGISTER removal once we see an ALTERNATION.
|
;;; and therefore we stop REGISTER removal once we see an ALTERNATION.
|
||||||
|
|
||||||
(defgeneric remove-registers (regex)
|
(defgeneric remove-registers (regex)
|
||||||
(declare (optimize speed
|
(declare #.*standard-optimize-settings*)
|
||||||
(safety 0)
|
|
||||||
(space 0)
|
|
||||||
(debug 0)
|
|
||||||
(compilation-speed 0)
|
|
||||||
#+:lispworks (hcl:fixnum-safety 0)))
|
|
||||||
(:documentation "Returns a deep copy of a REGEX (see COPY-REGEX) and
|
(:documentation "Returns a deep copy of a REGEX (see COPY-REGEX) and
|
||||||
optionally removes embedded REGISTER objects if possible and if the
|
optionally removes embedded REGISTER objects if possible and if the
|
||||||
special variable REMOVE-REGISTERS-P is true."))
|
special variable REMOVE-REGISTERS-P is true."))
|
||||||
@ -491,12 +483,7 @@ special variable REMOVE-REGISTERS-P is true."))
|
|||||||
:elements (mapcar #'remove-registers (elements seq))))
|
:elements (mapcar #'remove-registers (elements seq))))
|
||||||
|
|
||||||
(defgeneric everythingp (regex)
|
(defgeneric everythingp (regex)
|
||||||
(declare (optimize speed
|
(declare #.*standard-optimize-settings*)
|
||||||
(safety 0)
|
|
||||||
(space 0)
|
|
||||||
(debug 0)
|
|
||||||
(compilation-speed 0)
|
|
||||||
#+:lispworks (hcl:fixnum-safety 0)))
|
|
||||||
(:documentation "Returns an EVERYTHING object if REGEX is equivalent
|
(:documentation "Returns an EVERYTHING object if REGEX is equivalent
|
||||||
to this object, otherwise NIL. So, \"(.){1}\" would return true
|
to this object, otherwise NIL. So, \"(.){1}\" would return true
|
||||||
(i.e. the object corresponding to \".\", for example."))
|
(i.e. the object corresponding to \".\", for example."))
|
||||||
@ -539,16 +526,11 @@ to this object, otherwise NIL. So, \"(.){1}\" would return true
|
|||||||
|
|
||||||
(defmethod everythingp ((regex regex))
|
(defmethod everythingp ((regex regex))
|
||||||
;; the general case for ANCHOR, BACK-REFERENCE, BRANCH, CHAR-CLASS,
|
;; the general case for ANCHOR, BACK-REFERENCE, BRANCH, CHAR-CLASS,
|
||||||
;; LOOKAHEAD, LOOKBEHIND, STR, VOID, and WORD-BOUNDARY
|
;; LOOKAHEAD, LOOKBEHIND, STR, VOID, FILTER, and WORD-BOUNDARY
|
||||||
nil)
|
nil)
|
||||||
|
|
||||||
(defgeneric regex-length (regex)
|
(defgeneric regex-length (regex)
|
||||||
(declare (optimize speed
|
(declare #.*standard-optimize-settings*)
|
||||||
(safety 0)
|
|
||||||
(space 0)
|
|
||||||
(debug 0)
|
|
||||||
(compilation-speed 0)
|
|
||||||
#+:lispworks (hcl:fixnum-safety 0)))
|
|
||||||
(:documentation "Return the length of REGEX if it is fixed, NIL otherwise."))
|
(:documentation "Return the length of REGEX if it is fixed, NIL otherwise."))
|
||||||
|
|
||||||
(defmethod regex-length ((seq seq))
|
(defmethod regex-length ((seq seq))
|
||||||
@ -586,7 +568,7 @@ to this object, otherwise NIL. So, \"(.){1}\" would return true
|
|||||||
(maximum maximum))
|
(maximum maximum))
|
||||||
repetition
|
repetition
|
||||||
(if (and len
|
(if (and len
|
||||||
(eq minimum maximum))
|
(eql minimum maximum))
|
||||||
(* minimum len)
|
(* minimum len)
|
||||||
nil)))
|
nil)))
|
||||||
|
|
||||||
@ -610,18 +592,16 @@ to this object, otherwise NIL. So, \"(.){1}\" would return true
|
|||||||
(defmethod regex-length ((str str))
|
(defmethod regex-length ((str str))
|
||||||
(len str))
|
(len str))
|
||||||
|
|
||||||
|
(defmethod regex-length ((filter filter))
|
||||||
|
(len filter))
|
||||||
|
|
||||||
(defmethod regex-length ((regex regex))
|
(defmethod regex-length ((regex regex))
|
||||||
;; the general case for ANCHOR, LOOKAHEAD, LOOKBEHIND, VOID, and
|
;; the general case for ANCHOR, LOOKAHEAD, LOOKBEHIND, VOID, and
|
||||||
;; WORD-BOUNDARY (which all have zero-length)
|
;; WORD-BOUNDARY (which all have zero-length)
|
||||||
0)
|
0)
|
||||||
|
|
||||||
(defgeneric regex-min-length (regex)
|
(defgeneric regex-min-length (regex)
|
||||||
(declare (optimize speed
|
(declare #.*standard-optimize-settings*)
|
||||||
(safety 0)
|
|
||||||
(space 0)
|
|
||||||
(debug 0)
|
|
||||||
(compilation-speed 0)
|
|
||||||
#+:lispworks (hcl:fixnum-safety 0)))
|
|
||||||
(:documentation "Returns the minimal length of REGEX."))
|
(:documentation "Returns the minimal length of REGEX."))
|
||||||
|
|
||||||
(defmethod regex-min-length ((seq seq))
|
(defmethod regex-min-length ((seq seq))
|
||||||
@ -662,18 +642,17 @@ to this object, otherwise NIL. So, \"(.){1}\" would return true
|
|||||||
(defmethod regex-min-length ((str str))
|
(defmethod regex-min-length ((str str))
|
||||||
(len str))
|
(len str))
|
||||||
|
|
||||||
|
(defmethod regex-min-length ((filter filter))
|
||||||
|
(or (len filter)
|
||||||
|
0))
|
||||||
|
|
||||||
(defmethod regex-min-length ((regex regex))
|
(defmethod regex-min-length ((regex regex))
|
||||||
;; the general case for ANCHOR, BACK-REFERENCE, LOOKAHEAD,
|
;; the general case for ANCHOR, BACK-REFERENCE, LOOKAHEAD,
|
||||||
;; LOOKBEHIND, VOID, and WORD-BOUNDARY
|
;; LOOKBEHIND, VOID, and WORD-BOUNDARY
|
||||||
0)
|
0)
|
||||||
|
|
||||||
(defgeneric compute-offsets (regex start-pos)
|
(defgeneric compute-offsets (regex start-pos)
|
||||||
(declare (optimize speed
|
(declare #.*standard-optimize-settings*)
|
||||||
(safety 0)
|
|
||||||
(space 0)
|
|
||||||
(debug 0)
|
|
||||||
(compilation-speed 0)
|
|
||||||
#+:lispworks (hcl:fixnum-safety 0)))
|
|
||||||
(:documentation "Returns the offset the following regex would have
|
(:documentation "Returns the offset the following regex would have
|
||||||
relative to START-POS or NIL if we can't compute it. Sets the OFFSET
|
relative to START-POS or NIL if we can't compute it. Sets the OFFSET
|
||||||
slot of REGEX to START-POS if REGEX is a STR. May also affect OFFSET
|
slot of REGEX to START-POS if REGEX is a STR. May also affect OFFSET
|
||||||
@ -746,7 +725,13 @@ slots of STR objects further down the tree."))
|
|||||||
(declare (ignore start-pos))
|
(declare (ignore start-pos))
|
||||||
nil)
|
nil)
|
||||||
|
|
||||||
|
(defmethod compute-offsets ((filter filter) start-pos)
|
||||||
|
(let ((len (len filter)))
|
||||||
|
(if len
|
||||||
|
(+ start-pos len)
|
||||||
|
nil)))
|
||||||
|
|
||||||
(defmethod compute-offsets ((regex regex) start-pos)
|
(defmethod compute-offsets ((regex regex) start-pos)
|
||||||
;; the general case for ANCHOR, LOOKAHEAD, LOOKBEHIND, VOID, and
|
;; the general case for ANCHOR, LOOKAHEAD, LOOKBEHIND, VOID, and
|
||||||
;; WORD-BOUNDARY (which all have zero-length)
|
;; WORD-BOUNDARY (which all have zero-length)
|
||||||
start-pos)
|
start-pos)
|
||||||
|
|||||||
@ -1,5 +1,5 @@
|
|||||||
;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: CL-PPCRE; Base: 10 -*-
|
;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: CL-PPCRE; Base: 10 -*-
|
||||||
;;; $Header: /home/manuel/bknr-cvs/cvs/thirdparty/cl-ppcre/repetition-closures.lisp,v 1.1 2004/06/23 08:27:10 hans Exp $
|
;;; $Header: /usr/local/cvsrep/cl-ppcre/repetition-closures.lisp,v 1.24 2005/04/13 15:35:58 edi Exp $
|
||||||
|
|
||||||
;;; This is actually a part of closures.lisp which we put into a
|
;;; This is actually a part of closures.lisp which we put into a
|
||||||
;;; separate file because it is rather complex. We only deal with
|
;;; separate file because it is rather complex. We only deal with
|
||||||
@ -7,7 +7,7 @@
|
|||||||
;;; rather crazy micro-optimizations which were introduced to be as
|
;;; rather crazy micro-optimizations which were introduced to be as
|
||||||
;;; competitive with Perl as possible in tight loops.
|
;;; competitive with Perl as possible in tight loops.
|
||||||
|
|
||||||
;;; Copyright (c) 2002-2003, Dr. Edmund Weitz. All rights reserved.
|
;;; Copyright (c) 2002-2005, Dr. Edmund Weitz. All rights reserved.
|
||||||
|
|
||||||
;;; Redistribution and use in source and binary forms, with or without
|
;;; Redistribution and use in source and binary forms, with or without
|
||||||
;;; modification, are permitted provided that the following conditions
|
;;; modification, are permitted provided that the following conditions
|
||||||
@ -117,12 +117,7 @@ repetition matches at CURR-POS."
|
|||||||
(go backward-loop)))))))
|
(go backward-loop)))))))
|
||||||
|
|
||||||
(defun create-greedy-everything-matcher (maximum min-rest next-fn)
|
(defun create-greedy-everything-matcher (maximum min-rest next-fn)
|
||||||
(declare (optimize speed
|
(declare #.*standard-optimize-settings*)
|
||||||
(safety 0)
|
|
||||||
(space 0)
|
|
||||||
(debug 0)
|
|
||||||
(compilation-speed 0)
|
|
||||||
#+:lispworks (hcl:fixnum-safety 0)))
|
|
||||||
(declare (type fixnum min-rest)
|
(declare (type fixnum min-rest)
|
||||||
(type function next-fn))
|
(type function next-fn))
|
||||||
"Creates a closure which just matches as far ahead as possible,
|
"Creates a closure which just matches as far ahead as possible,
|
||||||
@ -149,18 +144,16 @@ i.e. a closure for a dot in single-line mode."
|
|||||||
(loop for curr-pos of-type fixnum from target-end-pos downto start-pos
|
(loop for curr-pos of-type fixnum from target-end-pos downto start-pos
|
||||||
thereis (funcall next-fn curr-pos))))))
|
thereis (funcall next-fn curr-pos))))))
|
||||||
|
|
||||||
(defmethod create-greedy-constant-length-matcher ((repetition repetition)
|
(defgeneric create-greedy-constant-length-matcher (repetition next-fn)
|
||||||
next-fn)
|
(declare #.*standard-optimize-settings*)
|
||||||
(declare (optimize speed
|
(:documentation "Creates a closure which tries to match REPETITION. It is assumed
|
||||||
(safety 0)
|
|
||||||
(space 0)
|
|
||||||
(debug 0)
|
|
||||||
(compilation-speed 0)
|
|
||||||
#+:lispworks (hcl:fixnum-safety 0)))
|
|
||||||
"Creates a closure which tries to match REPETITION. It is assumed
|
|
||||||
that REPETITION is greedy and the minimal number of repetitions is
|
that REPETITION is greedy and the minimal number of repetitions is
|
||||||
zero. It is furthermore assumed that the inner regex of REPETITION is
|
zero. It is furthermore assumed that the inner regex of REPETITION is
|
||||||
of fixed length and doesn't contain registers."
|
of fixed length and doesn't contain registers."))
|
||||||
|
|
||||||
|
(defmethod create-greedy-constant-length-matcher ((repetition repetition)
|
||||||
|
next-fn)
|
||||||
|
(declare #.*standard-optimize-settings*)
|
||||||
(let ((len (len repetition))
|
(let ((len (len repetition))
|
||||||
(maximum (maximum repetition))
|
(maximum (maximum repetition))
|
||||||
(regex (regex repetition))
|
(regex (regex repetition))
|
||||||
@ -212,19 +205,17 @@ of fixed length and doesn't contain registers."
|
|||||||
(declare (type function inner-matcher))
|
(declare (type function inner-matcher))
|
||||||
(greedy-constant-length-closure
|
(greedy-constant-length-closure
|
||||||
(funcall inner-matcher curr-pos)))))))))
|
(funcall inner-matcher curr-pos)))))))))
|
||||||
|
|
||||||
(defmethod create-greedy-no-zero-matcher ((repetition repetition) next-fn)
|
(defgeneric create-greedy-no-zero-matcher (repetition next-fn)
|
||||||
(declare (optimize speed
|
(declare #.*standard-optimize-settings*)
|
||||||
(safety 0)
|
(:documentation "Creates a closure which tries to match REPETITION. It is assumed
|
||||||
(space 0)
|
|
||||||
(debug 0)
|
|
||||||
(compilation-speed 0)
|
|
||||||
#+:lispworks (hcl:fixnum-safety 0)))
|
|
||||||
"Creates a closure which tries to match REPETITION. It is assumed
|
|
||||||
that REPETITION is greedy and the minimal number of repetitions is
|
that REPETITION is greedy and the minimal number of repetitions is
|
||||||
zero. It is furthermore assumed that the inner regex of REPETITION can
|
zero. It is furthermore assumed that the inner regex of REPETITION can
|
||||||
never match a zero-length string (or instead the maximal number of
|
never match a zero-length string (or instead the maximal number of
|
||||||
repetitions is 1)."
|
repetitions is 1)."))
|
||||||
|
|
||||||
|
(defmethod create-greedy-no-zero-matcher ((repetition repetition) next-fn)
|
||||||
|
(declare #.*standard-optimize-settings*)
|
||||||
(let ((maximum (maximum repetition))
|
(let ((maximum (maximum repetition))
|
||||||
;; REPEAT-MATCHER is part of the closure's environment but it
|
;; REPEAT-MATCHER is part of the closure's environment but it
|
||||||
;; can only be defined after GREEDY-AUX is defined
|
;; can only be defined after GREEDY-AUX is defined
|
||||||
@ -283,16 +274,14 @@ repetitions is 1)."
|
|||||||
(create-matcher-aux (regex repetition) #'greedy-aux))
|
(create-matcher-aux (regex repetition) #'greedy-aux))
|
||||||
#'greedy-aux)))))
|
#'greedy-aux)))))
|
||||||
|
|
||||||
(defmethod create-greedy-matcher ((repetition repetition) next-fn)
|
(defgeneric create-greedy-matcher (repetition next-fn)
|
||||||
(declare (optimize speed
|
(declare #.*standard-optimize-settings*)
|
||||||
(safety 0)
|
(:documentation "Creates a closure which tries to match REPETITION. It is assumed
|
||||||
(space 0)
|
|
||||||
(debug 0)
|
|
||||||
(compilation-speed 0)
|
|
||||||
#+:lispworks (hcl:fixnum-safety 0)))
|
|
||||||
"Creates a closure which tries to match REPETITION. It is assumed
|
|
||||||
that REPETITION is greedy and the minimal number of repetitions is
|
that REPETITION is greedy and the minimal number of repetitions is
|
||||||
zero."
|
zero."))
|
||||||
|
|
||||||
|
(defmethod create-greedy-matcher ((repetition repetition) next-fn)
|
||||||
|
(declare #.*standard-optimize-settings*)
|
||||||
(let ((maximum (maximum repetition))
|
(let ((maximum (maximum repetition))
|
||||||
;; we make a reservation for our slot in *LAST-POS-STORES* because
|
;; we make a reservation for our slot in *LAST-POS-STORES* because
|
||||||
;; we have to watch out for endless loops as the inner regex might
|
;; we have to watch out for endless loops as the inner regex might
|
||||||
@ -409,17 +398,15 @@ repetition matches at CURR-POS."
|
|||||||
while ,check-curr-pos
|
while ,check-curr-pos
|
||||||
finally (return (funcall next-fn curr-pos)))))))
|
finally (return (funcall next-fn curr-pos)))))))
|
||||||
|
|
||||||
(defmethod create-non-greedy-constant-length-matcher ((repetition repetition) next-fn)
|
(defgeneric create-non-greedy-constant-length-matcher (repetition next-fn)
|
||||||
(declare (optimize speed
|
(declare #.*standard-optimize-settings*)
|
||||||
(safety 0)
|
(:documentation "Creates a closure which tries to match REPETITION. It is assumed
|
||||||
(space 0)
|
|
||||||
(debug 0)
|
|
||||||
(compilation-speed 0)
|
|
||||||
#+:lispworks (hcl:fixnum-safety 0)))
|
|
||||||
"Creates a closure which tries to match REPETITION. It is assumed
|
|
||||||
that REPETITION is non-greedy and the minimal number of repetitions is
|
that REPETITION is non-greedy and the minimal number of repetitions is
|
||||||
zero. It is furthermore assumed that the inner regex of REPETITION is
|
zero. It is furthermore assumed that the inner regex of REPETITION is
|
||||||
of fixed length and doesn't contain registers."
|
of fixed length and doesn't contain registers."))
|
||||||
|
|
||||||
|
(defmethod create-non-greedy-constant-length-matcher ((repetition repetition) next-fn)
|
||||||
|
(declare #.*standard-optimize-settings*)
|
||||||
(let ((len (len repetition))
|
(let ((len (len repetition))
|
||||||
(maximum (maximum repetition))
|
(maximum (maximum repetition))
|
||||||
(regex (regex repetition))
|
(regex (regex repetition))
|
||||||
@ -475,18 +462,16 @@ of fixed length and doesn't contain registers."
|
|||||||
(non-greedy-constant-length-closure
|
(non-greedy-constant-length-closure
|
||||||
(funcall inner-matcher curr-pos)))))))))
|
(funcall inner-matcher curr-pos)))))))))
|
||||||
|
|
||||||
(defmethod create-non-greedy-no-zero-matcher ((repetition repetition) next-fn)
|
(defgeneric create-non-greedy-no-zero-matcher (repetition next-fn)
|
||||||
(declare (optimize speed
|
(declare #.*standard-optimize-settings*)
|
||||||
(safety 0)
|
(:documentation "Creates a closure which tries to match REPETITION. It is assumed
|
||||||
(space 0)
|
|
||||||
(debug 0)
|
|
||||||
(compilation-speed 0)
|
|
||||||
#+:lispworks (hcl:fixnum-safety 0)))
|
|
||||||
"Creates a closure which tries to match REPETITION. It is assumed
|
|
||||||
that REPETITION is non-greedy and the minimal number of repetitions is
|
that REPETITION is non-greedy and the minimal number of repetitions is
|
||||||
zero. It is furthermore assumed that the inner regex of REPETITION can
|
zero. It is furthermore assumed that the inner regex of REPETITION can
|
||||||
never match a zero-length string (or instead the maximal number of
|
never match a zero-length string (or instead the maximal number of
|
||||||
repetitions is 1)."
|
repetitions is 1)."))
|
||||||
|
|
||||||
|
(defmethod create-non-greedy-no-zero-matcher ((repetition repetition) next-fn)
|
||||||
|
(declare #.*standard-optimize-settings*)
|
||||||
(let ((maximum (maximum repetition))
|
(let ((maximum (maximum repetition))
|
||||||
;; REPEAT-MATCHER is part of the closure's environment but it
|
;; REPEAT-MATCHER is part of the closure's environment but it
|
||||||
;; can only be defined after NON-GREEDY-AUX is defined
|
;; can only be defined after NON-GREEDY-AUX is defined
|
||||||
@ -543,16 +528,14 @@ repetitions is 1)."
|
|||||||
(create-matcher-aux (regex repetition) #'non-greedy-aux))
|
(create-matcher-aux (regex repetition) #'non-greedy-aux))
|
||||||
#'non-greedy-aux)))))
|
#'non-greedy-aux)))))
|
||||||
|
|
||||||
(defmethod create-non-greedy-matcher ((repetition repetition) next-fn)
|
(defgeneric create-non-greedy-matcher (repetition next-fn)
|
||||||
(declare (optimize speed
|
(declare #.*standard-optimize-settings*)
|
||||||
(safety 0)
|
(:documentation "Creates a closure which tries to match REPETITION. It is assumed
|
||||||
(space 0)
|
|
||||||
(debug 0)
|
|
||||||
(compilation-speed 0)
|
|
||||||
#+:lispworks (hcl:fixnum-safety 0)))
|
|
||||||
"Creates a closure which tries to match REPETITION. It is assumed
|
|
||||||
that REPETITION is non-greedy and the minimal number of repetitions is
|
that REPETITION is non-greedy and the minimal number of repetitions is
|
||||||
zero."
|
zero."))
|
||||||
|
|
||||||
|
(defmethod create-non-greedy-matcher ((repetition repetition) next-fn)
|
||||||
|
(declare #.*standard-optimize-settings*)
|
||||||
;; we make a reservation for our slot in *LAST-POS-STORES* because
|
;; we make a reservation for our slot in *LAST-POS-STORES* because
|
||||||
;; we have to watch out for endless loops as the inner regex might
|
;; we have to watch out for endless loops as the inner regex might
|
||||||
;; match zero-length strings
|
;; match zero-length strings
|
||||||
@ -656,18 +639,17 @@ of the repetition matches at CURR-POS."
|
|||||||
;; finally call NEXT-FN if we made it that far
|
;; finally call NEXT-FN if we made it that far
|
||||||
(funcall next-fn target-end-pos)))))
|
(funcall next-fn target-end-pos)))))
|
||||||
|
|
||||||
(defmethod create-constant-repetition-constant-length-matcher
|
(defgeneric create-constant-repetition-constant-length-matcher
|
||||||
((repetition repetition) next-fn)
|
(repetition next-fn)
|
||||||
(declare (optimize speed
|
(declare #.*standard-optimize-settings*)
|
||||||
(safety 0)
|
(:documentation "Creates a closure which tries to match REPETITION. It is assumed
|
||||||
(space 0)
|
|
||||||
(debug 0)
|
|
||||||
(compilation-speed 0)
|
|
||||||
#+:lispworks (hcl:fixnum-safety 0)))
|
|
||||||
"Creates a closure which tries to match REPETITION. It is assumed
|
|
||||||
that REPETITION has a constant number of repetitions. It is
|
that REPETITION has a constant number of repetitions. It is
|
||||||
furthermore assumed that the inner regex of REPETITION is of fixed
|
furthermore assumed that the inner regex of REPETITION is of fixed
|
||||||
length and doesn't contain registers."
|
length and doesn't contain registers."))
|
||||||
|
|
||||||
|
(defmethod create-constant-repetition-constant-length-matcher
|
||||||
|
((repetition repetition) next-fn)
|
||||||
|
(declare #.*standard-optimize-settings*)
|
||||||
(let ((len (len repetition))
|
(let ((len (len repetition))
|
||||||
(repetitions (minimum repetition))
|
(repetitions (minimum repetition))
|
||||||
(regex (regex repetition)))
|
(regex (regex repetition)))
|
||||||
@ -721,8 +703,8 @@ length and doesn't contain registers."
|
|||||||
(declare (type fixnum start-pos))
|
(declare (type fixnum start-pos))
|
||||||
(let ((next-pos (+ start-pos repetitions)))
|
(let ((next-pos (+ start-pos repetitions)))
|
||||||
(declare (type fixnum next-pos))
|
(declare (type fixnum next-pos))
|
||||||
(or (<= next-pos *end-pos*)
|
(and (<= next-pos *end-pos*)
|
||||||
(funcall next-fn next-pos))))
|
(funcall next-fn next-pos))))
|
||||||
;; a dot which is not in single-line-mode - make sure we
|
;; a dot which is not in single-line-mode - make sure we
|
||||||
;; don't match #\Newline
|
;; don't match #\Newline
|
||||||
(constant-repetition-constant-length-closure
|
(constant-repetition-constant-length-closure
|
||||||
@ -736,15 +718,13 @@ length and doesn't contain registers."
|
|||||||
(constant-repetition-constant-length-closure
|
(constant-repetition-constant-length-closure
|
||||||
(funcall inner-matcher curr-pos))))))))
|
(funcall inner-matcher curr-pos))))))))
|
||||||
|
|
||||||
|
(defgeneric create-constant-repetition-matcher (repetition next-fn)
|
||||||
|
(declare #.*standard-optimize-settings*)
|
||||||
|
(:documentation "Creates a closure which tries to match REPETITION. It is assumed
|
||||||
|
that REPETITION has a constant number of repetitions."))
|
||||||
|
|
||||||
(defmethod create-constant-repetition-matcher ((repetition repetition) next-fn)
|
(defmethod create-constant-repetition-matcher ((repetition repetition) next-fn)
|
||||||
(declare (optimize speed
|
(declare #.*standard-optimize-settings*)
|
||||||
(safety 0)
|
|
||||||
(space 0)
|
|
||||||
(debug 0)
|
|
||||||
(compilation-speed 0)
|
|
||||||
#+:lispworks (hcl:fixnum-safety 0)))
|
|
||||||
"Creates a closure which tries to match REPETITION. It is assumed
|
|
||||||
that REPETITION has a constant number of repetitions."
|
|
||||||
(let ((repetitions (minimum repetition))
|
(let ((repetitions (minimum repetition))
|
||||||
;; we make a reservation for our slot in *REPEAT-COUNTERS*
|
;; we make a reservation for our slot in *REPEAT-COUNTERS*
|
||||||
;; because we need to keep track of the number of repetitions
|
;; because we need to keep track of the number of repetitions
|
||||||
|
|||||||
62
scanner.lisp
62
scanner.lisp
@ -1,10 +1,10 @@
|
|||||||
;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: CL-PPCRE; Base: 10 -*-
|
;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: CL-PPCRE; Base: 10 -*-
|
||||||
;;; $Header: /home/manuel/bknr-cvs/cvs/thirdparty/cl-ppcre/scanner.lisp,v 1.1 2004/06/23 08:27:10 hans Exp $
|
;;; $Header: /usr/local/cvsrep/cl-ppcre/scanner.lisp,v 1.26 2005/07/19 23:18:15 edi Exp $
|
||||||
|
|
||||||
;;; Here the scanner for the actual regex as well as utility scanners
|
;;; Here the scanner for the actual regex as well as utility scanners
|
||||||
;;; for the constant start and end strings are created.
|
;;; for the constant start and end strings are created.
|
||||||
|
|
||||||
;;; Copyright (c) 2002-2003, Dr. Edmund Weitz. All rights reserved.
|
;;; Copyright (c) 2002-2005, Dr. Edmund Weitz. All rights reserved.
|
||||||
|
|
||||||
;;; Redistribution and use in source and binary forms, with or without
|
;;; Redistribution and use in source and binary forms, with or without
|
||||||
;;; modification, are permitted provided that the following conditions
|
;;; modification, are permitted provided that the following conditions
|
||||||
@ -37,7 +37,8 @@
|
|||||||
(let ((char-compare (if case-insensitive-p 'char-equal 'char=)))
|
(let ((char-compare (if case-insensitive-p 'char-equal 'char=)))
|
||||||
`(lambda (start-pos)
|
`(lambda (start-pos)
|
||||||
(declare (type fixnum start-pos))
|
(declare (type fixnum start-pos))
|
||||||
(if (> (the fixnum (+ start-pos m)) *end-pos*)
|
(if (or (minusp start-pos)
|
||||||
|
(> (the fixnum (+ start-pos m)) *end-pos*))
|
||||||
nil
|
nil
|
||||||
(loop named bmh-matcher
|
(loop named bmh-matcher
|
||||||
for k of-type fixnum = (+ start-pos m -1)
|
for k of-type fixnum = (+ start-pos m -1)
|
||||||
@ -52,12 +53,7 @@
|
|||||||
(return-from bmh-matcher (1+ i)))))))))
|
(return-from bmh-matcher (1+ i)))))))))
|
||||||
|
|
||||||
(defun create-bmh-matcher (pattern case-insensitive-p)
|
(defun create-bmh-matcher (pattern case-insensitive-p)
|
||||||
(declare (optimize speed
|
(declare #.*standard-optimize-settings*)
|
||||||
(safety 0)
|
|
||||||
(space 0)
|
|
||||||
(debug 0)
|
|
||||||
(compilation-speed 0)
|
|
||||||
#+:lispworks (hcl:fixnum-safety 0)))
|
|
||||||
"Returns a Boyer-Moore-Horspool matcher which searches the (special)
|
"Returns a Boyer-Moore-Horspool matcher which searches the (special)
|
||||||
simple-string *STRING* for the first occurence of the substring
|
simple-string *STRING* for the first occurence of the substring
|
||||||
PATTERN. The search starts at the position START-POS within *STRING*
|
PATTERN. The search starts at the position START-POS within *STRING*
|
||||||
@ -72,11 +68,12 @@ instead. (BMH matchers are faster but need much more space.)"
|
|||||||
(return-from create-bmh-matcher
|
(return-from create-bmh-matcher
|
||||||
(lambda (start-pos)
|
(lambda (start-pos)
|
||||||
(declare (type fixnum start-pos))
|
(declare (type fixnum start-pos))
|
||||||
(search pattern
|
(and (not (minusp start-pos))
|
||||||
*string*
|
(search pattern
|
||||||
:start2 start-pos
|
*string*
|
||||||
:end2 *end-pos*
|
:start2 start-pos
|
||||||
:test test)))))
|
:end2 *end-pos*
|
||||||
|
:test test))))))
|
||||||
(let* ((m (length pattern))
|
(let* ((m (length pattern))
|
||||||
(skip (make-array *regex-char-code-limit*
|
(skip (make-array *regex-char-code-limit*
|
||||||
:element-type 'fixnum
|
:element-type 'fixnum
|
||||||
@ -97,16 +94,12 @@ instead. (BMH matchers are faster but need much more space.)"
|
|||||||
(let ((char-compare (if case-insensitive-p 'char-equal 'char=)))
|
(let ((char-compare (if case-insensitive-p 'char-equal 'char=)))
|
||||||
`(lambda (start-pos)
|
`(lambda (start-pos)
|
||||||
(declare (type fixnum start-pos))
|
(declare (type fixnum start-pos))
|
||||||
(loop for i of-type fixnum from start-pos below *end-pos*
|
(and (not (minusp start-pos))
|
||||||
thereis (and (,char-compare (schar *string* i) chr) i)))))
|
(loop for i of-type fixnum from start-pos below *end-pos*
|
||||||
|
thereis (and (,char-compare (schar *string* i) chr) i))))))
|
||||||
|
|
||||||
(defun create-char-searcher (chr case-insensitive-p)
|
(defun create-char-searcher (chr case-insensitive-p)
|
||||||
(declare (optimize speed
|
(declare #.*standard-optimize-settings*)
|
||||||
(safety 0)
|
|
||||||
(space 0)
|
|
||||||
(debug 0)
|
|
||||||
(compilation-speed 0)
|
|
||||||
#+:lispworks (hcl:fixnum-safety 0)))
|
|
||||||
"Returns a function which searches the (special) simple-string
|
"Returns a function which searches the (special) simple-string
|
||||||
*STRING* for the first occurence of the character CHR. The search
|
*STRING* for the first occurence of the character CHR. The search
|
||||||
starts at the position START-POS within *STRING* and stops before
|
starts at the position START-POS within *STRING* and stops before
|
||||||
@ -119,17 +112,16 @@ case-insensitive or not."
|
|||||||
(declaim (inline newline-skipper))
|
(declaim (inline newline-skipper))
|
||||||
|
|
||||||
(defun newline-skipper (start-pos)
|
(defun newline-skipper (start-pos)
|
||||||
(declare (optimize speed
|
(declare #.*standard-optimize-settings*)
|
||||||
(safety 0)
|
|
||||||
(space 0)
|
|
||||||
(debug 0)
|
|
||||||
(compilation-speed 0)
|
|
||||||
#+:lispworks (hcl:fixnum-safety 0)))
|
|
||||||
(declare (type fixnum start-pos))
|
(declare (type fixnum start-pos))
|
||||||
"Find the next occurence of a character in *STRING* which is behind
|
"Find the next occurence of a character in *STRING* which is behind
|
||||||
a #\Newline."
|
a #\Newline."
|
||||||
(loop for i of-type fixnum from start-pos below *end-pos*
|
;; we can start with (1- START-POS) without testing for (PLUSP
|
||||||
thereis (and (char= (schar *string* i) #\Newline)
|
;; START-POS) because we know we'll never call NEWLINE-SKIPPER on
|
||||||
|
;; the first iteration
|
||||||
|
(loop for i of-type fixnum from (1- start-pos) below *end-pos*
|
||||||
|
thereis (and (char= (schar *string* i)
|
||||||
|
#\Newline)
|
||||||
(1+ i))))
|
(1+ i))))
|
||||||
|
|
||||||
(defmacro insert-advance-fn (advance-fn)
|
(defmacro insert-advance-fn (advance-fn)
|
||||||
@ -198,6 +190,7 @@ ADVANCE-FN. This is a utility macro used by CREATE-SCANNER-AUX."
|
|||||||
(unless (setq *end-string-pos* (funcall end-string-test
|
(unless (setq *end-string-pos* (funcall end-string-test
|
||||||
end-test-pos))
|
end-test-pos))
|
||||||
(when (and (= 1 (the fixnum end-anchored-p))
|
(when (and (= 1 (the fixnum end-anchored-p))
|
||||||
|
(> *end-pos* scan-start-pos)
|
||||||
(char= #\Newline (schar *string* (1- *end-pos*))))
|
(char= #\Newline (schar *string* (1- *end-pos*))))
|
||||||
;; if we didn't find an end string candidate from
|
;; if we didn't find an end string candidate from
|
||||||
;; END-TEST-POS and if a #\Newline at the end is
|
;; END-TEST-POS and if a #\Newline at the end is
|
||||||
@ -328,12 +321,7 @@ ADVANCE-FN. This is a utility macro used by CREATE-SCANNER-AUX."
|
|||||||
rep-num
|
rep-num
|
||||||
zero-length-num
|
zero-length-num
|
||||||
reg-num)
|
reg-num)
|
||||||
(declare (optimize speed
|
(declare #.*standard-optimize-settings*)
|
||||||
(safety 0)
|
|
||||||
(space 0)
|
|
||||||
(debug 0)
|
|
||||||
(compilation-speed 0)
|
|
||||||
#+:lispworks (hcl:fixnum-safety 0)))
|
|
||||||
(declare (type fixnum min-len zero-length-num rep-num reg-num))
|
(declare (type fixnum min-len zero-length-num rep-num reg-num))
|
||||||
"Auxiliary function to create and return a scanner \(which is
|
"Auxiliary function to create and return a scanner \(which is
|
||||||
actually a closure). Used by CREATE-SCANNER."
|
actually a closure). Used by CREATE-SCANNER."
|
||||||
@ -516,4 +504,4 @@ actually a closure). Used by CREATE-SCANNER."
|
|||||||
;; expression to optimize so we just return POS
|
;; expression to optimize so we just return POS
|
||||||
(insert-advance-fn
|
(insert-advance-fn
|
||||||
(advance-fn (pos)
|
(advance-fn (pos)
|
||||||
pos))))))
|
pos))))))
|
||||||
|
|||||||
@ -1,9 +1,9 @@
|
|||||||
;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: CL-PPCRE; Base: 10 -*-
|
;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: CL-PPCRE; Base: 10 -*-
|
||||||
;;; $Header: /home/manuel/bknr-cvs/cvs/thirdparty/cl-ppcre/specials.lisp,v 1.1 2004/06/23 08:27:10 hans Exp $
|
;;; $Header: /usr/local/cvsrep/cl-ppcre/specials.lisp,v 1.21 2005/04/01 21:29:10 edi Exp $
|
||||||
|
|
||||||
;;; globally declared special variables
|
;;; globally declared special variables
|
||||||
|
|
||||||
;;; Copyright (c) 2002-2003, Dr. Edmund Weitz. All rights reserved.
|
;;; Copyright (c) 2002-2005, Dr. Edmund Weitz. All rights reserved.
|
||||||
|
|
||||||
;;; Redistribution and use in source and binary forms, with or without
|
;;; Redistribution and use in source and binary forms, with or without
|
||||||
;;; modification, are permitted provided that the following conditions
|
;;; modification, are permitted provided that the following conditions
|
||||||
@ -31,6 +31,22 @@
|
|||||||
|
|
||||||
(in-package #:cl-ppcre)
|
(in-package #:cl-ppcre)
|
||||||
|
|
||||||
|
;;; special variables used to effect declarations
|
||||||
|
|
||||||
|
(defvar *standard-optimize-settings*
|
||||||
|
'(optimize
|
||||||
|
speed
|
||||||
|
(safety 0)
|
||||||
|
(space 0)
|
||||||
|
(debug 1)
|
||||||
|
(compilation-speed 0)
|
||||||
|
#+:lispworks (hcl:fixnum-safety 0))
|
||||||
|
"The standard optimize settings used by most declaration expressions.")
|
||||||
|
|
||||||
|
(defvar *special-optimize-settings*
|
||||||
|
'(optimize speed space)
|
||||||
|
"Special optimize settings used only be a few declaration expressions.")
|
||||||
|
|
||||||
;;; special variables used by the lexer/parser combo
|
;;; special variables used by the lexer/parser combo
|
||||||
|
|
||||||
(defvar *extended-mode-p* nil
|
(defvar *extended-mode-p* nil
|
||||||
@ -104,4 +120,23 @@ but large) Boyer-Moore-Horspool matchers.")
|
|||||||
(defvar *allow-quoting* nil
|
(defvar *allow-quoting* nil
|
||||||
"Whether the parser should support Perl's \\Q and \\E.")
|
"Whether the parser should support Perl's \\Q and \\E.")
|
||||||
|
|
||||||
(pushnew :cl-ppcre *features*)
|
(pushnew :cl-ppcre *features*)
|
||||||
|
|
||||||
|
;; stuff for Nikodemus Siivola's HYPERDOC
|
||||||
|
;; see <http://common-lisp.net/project/hyperdoc/>
|
||||||
|
;; and <http://www.cliki.net/hyperdoc>
|
||||||
|
|
||||||
|
(defvar *hyperdoc-base-uri* "http://weitz.de/cl-ppcre/")
|
||||||
|
|
||||||
|
(let ((exported-symbols-alist
|
||||||
|
(loop for symbol being the external-symbols of :cl-ppcre
|
||||||
|
collect (cons symbol
|
||||||
|
(concatenate 'string
|
||||||
|
"#"
|
||||||
|
(string-downcase symbol))))))
|
||||||
|
(defun hyperdoc-lookup (symbol type)
|
||||||
|
(declare (ignore type))
|
||||||
|
(cdr (assoc symbol
|
||||||
|
exported-symbols-alist
|
||||||
|
:test #'eq))))
|
||||||
|
|
||||||
|
|||||||
88
util.lisp
88
util.lisp
@ -1,5 +1,5 @@
|
|||||||
;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: CL-PPCRE; Base: 10 -*-
|
;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: CL-PPCRE; Base: 10 -*-
|
||||||
;;; $Header: /home/manuel/bknr-cvs/cvs/thirdparty/cl-ppcre/util.lisp,v 1.1 2004/06/23 08:27:10 hans Exp $
|
;;; $Header: /usr/local/cvsrep/cl-ppcre/util.lisp,v 1.32 2005/08/23 10:32:30 edi Exp $
|
||||||
|
|
||||||
;;; Utility functions and constants dealing with the hash-tables
|
;;; Utility functions and constants dealing with the hash-tables
|
||||||
;;; we use to encode character classes
|
;;; we use to encode character classes
|
||||||
@ -7,7 +7,7 @@
|
|||||||
;;; Hash-tables are treated like sets, i.e. a character C is a member of the
|
;;; Hash-tables are treated like sets, i.e. a character C is a member of the
|
||||||
;;; hash-table H iff (GETHASH C H) is true.
|
;;; hash-table H iff (GETHASH C H) is true.
|
||||||
|
|
||||||
;;; Copyright (c) 2002-2003, Dr. Edmund Weitz. All rights reserved.
|
;;; Copyright (c) 2002-2005, Dr. Edmund Weitz. All rights reserved.
|
||||||
|
|
||||||
;;; Redistribution and use in source and binary forms, with or without
|
;;; Redistribution and use in source and binary forms, with or without
|
||||||
;;; modification, are permitted provided that the following conditions
|
;;; modification, are permitted provided that the following conditions
|
||||||
@ -35,6 +35,10 @@
|
|||||||
|
|
||||||
(in-package #:cl-ppcre)
|
(in-package #:cl-ppcre)
|
||||||
|
|
||||||
|
#+:lispworks
|
||||||
|
(import 'lw:with-unique-names)
|
||||||
|
|
||||||
|
#-:lispworks
|
||||||
(defmacro with-unique-names ((&rest bindings) &body body)
|
(defmacro with-unique-names ((&rest bindings) &body body)
|
||||||
"Syntax: WITH-UNIQUE-NAMES ( { var | (var x) }* ) declaration* form*
|
"Syntax: WITH-UNIQUE-NAMES ( { var | (var x) }* ) declaration* form*
|
||||||
|
|
||||||
@ -65,8 +69,14 @@ are discarded \(that is, the body is an implicit PROGN)."
|
|||||||
bindings)
|
bindings)
|
||||||
,@body))
|
,@body))
|
||||||
|
|
||||||
(defmacro rebinding (bindings &body body)
|
#+:lispworks
|
||||||
"REBINDING ( { var | (var prefix) }* ) form*
|
(eval-when (:compile-toplevel :load-toplevel :execute)
|
||||||
|
(setf (macro-function 'with-rebinding)
|
||||||
|
(macro-function 'lw:rebinding)))
|
||||||
|
|
||||||
|
#-:lispworks
|
||||||
|
(defmacro with-rebinding (bindings &body body)
|
||||||
|
"WITH-REBINDING ( { var | (var prefix) }* ) form*
|
||||||
|
|
||||||
Evaluates a series of forms in the lexical environment that is
|
Evaluates a series of forms in the lexical environment that is
|
||||||
formed by adding the binding of each VAR to a fresh, uninterned
|
formed by adding the binding of each VAR to a fresh, uninterned
|
||||||
@ -94,14 +104,14 @@ are discarded \(that is, the body is an implicit PROGN)."
|
|||||||
|
|
||||||
(eval-when (:compile-toplevel :execute :load-toplevel)
|
(eval-when (:compile-toplevel :execute :load-toplevel)
|
||||||
(defvar *regex-char-code-limit* char-code-limit
|
(defvar *regex-char-code-limit* char-code-limit
|
||||||
"The upper exclusive bound on the char-codes of characters
|
"The upper exclusive bound on the char-codes of characters which
|
||||||
which can occur in character classes.
|
can occur in character classes. Change this value BEFORE creating
|
||||||
Change this value BEFORE creating scanners if you don't need
|
scanners if you don't need the Unicode support of implementations like
|
||||||
the full Unicode support of LW, ACL, or CLISP.")
|
AllegroCL, CLISP, LispWorks, or SBCL.")
|
||||||
(declaim (type fixnum *regex-char-code-limit*))
|
(declaim (type fixnum *regex-char-code-limit*))
|
||||||
|
|
||||||
(defun make-char-hash (test)
|
(defun make-char-hash (test)
|
||||||
(declare (optimize speed space))
|
(declare #.*special-optimize-settings*)
|
||||||
"Returns a hash-table of all characters satisfying test."
|
"Returns a hash-table of all characters satisfying test."
|
||||||
(loop with hash = (make-hash-table)
|
(loop with hash = (make-hash-table)
|
||||||
for c of-type fixnum from 0 below char-code-limit
|
for c of-type fixnum from 0 below char-code-limit
|
||||||
@ -113,12 +123,7 @@ the full Unicode support of LW, ACL, or CLISP.")
|
|||||||
(declaim (inline word-char-p))
|
(declaim (inline word-char-p))
|
||||||
|
|
||||||
(defun word-char-p (chr)
|
(defun word-char-p (chr)
|
||||||
(declare (optimize speed
|
(declare #.*standard-optimize-settings*)
|
||||||
(safety 0)
|
|
||||||
(space 0)
|
|
||||||
(debug 0)
|
|
||||||
(compilation-speed 0)
|
|
||||||
#+:lispworks (hcl:fixnum-safety 0)))
|
|
||||||
"Tests whether a character is a \"word\" character.
|
"Tests whether a character is a \"word\" character.
|
||||||
In the ASCII charset this is equivalent to a-z, A-Z, 0-9, or _,
|
In the ASCII charset this is equivalent to a-z, A-Z, 0-9, or _,
|
||||||
i.e. the same as Perl's [\\w]."
|
i.e. the same as Perl's [\\w]."
|
||||||
@ -134,7 +139,7 @@ i.e. the same as Perl's [\\w]."
|
|||||||
Same as Perl's [\\s]."))
|
Same as Perl's [\\s]."))
|
||||||
|
|
||||||
(defun whitespacep (chr)
|
(defun whitespacep (chr)
|
||||||
(declare (optimize speed space))
|
(declare #.*special-optimize-settings*)
|
||||||
"Tests whether a character is whitespace,
|
"Tests whether a character is whitespace,
|
||||||
i.e. whether it would match [\\s] in Perl."
|
i.e. whether it would match [\\s] in Perl."
|
||||||
(find chr +whitespace-char-string+ :test #'char=)))
|
(find chr +whitespace-char-string+ :test #'char=)))
|
||||||
@ -158,12 +163,7 @@ i.e. whether it would match [\\s] in Perl."
|
|||||||
"Hash-table containing all whitespace characters."))
|
"Hash-table containing all whitespace characters."))
|
||||||
|
|
||||||
(defun merge-hash (hash1 hash2)
|
(defun merge-hash (hash1 hash2)
|
||||||
(declare (optimize speed
|
(declare #.*standard-optimize-settings*)
|
||||||
(safety 0)
|
|
||||||
(space 0)
|
|
||||||
(debug 0)
|
|
||||||
(compilation-speed 0)
|
|
||||||
#+:lispworks (hcl:fixnum-safety 0)))
|
|
||||||
"Returns the \"sum\" of two hashes. This is a destructive operation
|
"Returns the \"sum\" of two hashes. This is a destructive operation
|
||||||
on HASH1."
|
on HASH1."
|
||||||
(cond ((> (hash-table-count hash2)
|
(cond ((> (hash-table-count hash2)
|
||||||
@ -180,12 +180,7 @@ on HASH1."
|
|||||||
hash1)
|
hash1)
|
||||||
|
|
||||||
(defun merge-inverted-hash (hash1 hash2)
|
(defun merge-inverted-hash (hash1 hash2)
|
||||||
(declare (optimize speed
|
(declare #.*standard-optimize-settings*)
|
||||||
(safety 0)
|
|
||||||
(space 0)
|
|
||||||
(debug 0)
|
|
||||||
(compilation-speed 0)
|
|
||||||
#+:lispworks (hcl:fixnum-safety 0)))
|
|
||||||
"Returns the \"sum\" of HASH1 and the \"inverse\" of HASH2. This is
|
"Returns the \"sum\" of HASH1 and the \"inverse\" of HASH2. This is
|
||||||
a destructive operation on HASH1."
|
a destructive operation on HASH1."
|
||||||
(loop for c of-type fixnum from 0 below *regex-char-code-limit*
|
(loop for c of-type fixnum from 0 below *regex-char-code-limit*
|
||||||
@ -195,12 +190,7 @@ a destructive operation on HASH1."
|
|||||||
hash1)
|
hash1)
|
||||||
|
|
||||||
(defun create-ranges-from-hash (hash &key downcasep)
|
(defun create-ranges-from-hash (hash &key downcasep)
|
||||||
(declare (optimize speed
|
(declare #.*standard-optimize-settings*)
|
||||||
(safety 0)
|
|
||||||
(space 0)
|
|
||||||
(debug 0)
|
|
||||||
(compilation-speed 0)
|
|
||||||
#+:lispworks (hcl:fixnum-safety 0)))
|
|
||||||
"Tries to identify up to three intervals (with respect to CHAR<)
|
"Tries to identify up to three intervals (with respect to CHAR<)
|
||||||
which together comprise HASH. Returns NIL if this is not possible.
|
which together comprise HASH. Returns NIL if this is not possible.
|
||||||
If DOWNCASEP is true it will treat the hash-table as if it represents
|
If DOWNCASEP is true it will treat the hash-table as if it represents
|
||||||
@ -276,3 +266,33 @@ will only return the respective lower-case intervals."
|
|||||||
:element-type (array-element-type sequence)
|
:element-type (array-element-type sequence)
|
||||||
:displaced-to sequence
|
:displaced-to sequence
|
||||||
:displaced-index-offset start))
|
:displaced-index-offset start))
|
||||||
|
|
||||||
|
(defun normalize-var-list (var-list)
|
||||||
|
"Utility function for REGISTER-GROUPS-BIND and
|
||||||
|
DO-REGISTER-GROUPS. Creates the long form \(a list of \(FUNCTION VAR)
|
||||||
|
entries) out of the short form of VAR-LIST."
|
||||||
|
(loop for element in var-list
|
||||||
|
if (consp element)
|
||||||
|
nconc (loop for var in (rest element)
|
||||||
|
collect (list (first element) var))
|
||||||
|
else
|
||||||
|
collect (list '(function identity) element)))
|
||||||
|
|
||||||
|
(defun string-list-to-simple-string (string-list)
|
||||||
|
(declare #.*standard-optimize-settings*)
|
||||||
|
"Concatenates a list of strings to one simple-string."
|
||||||
|
;; this function provided by JP Massar; note that we can't use APPLY
|
||||||
|
;; with CONCATENATE here because of CALL-ARGUMENTS-LIMIT
|
||||||
|
(let ((total-size 0))
|
||||||
|
(declare (type fixnum total-size))
|
||||||
|
(dolist (string string-list)
|
||||||
|
#-genera (declare (type string string))
|
||||||
|
(incf total-size (length string)))
|
||||||
|
(let ((result-string (make-sequence 'simple-string total-size))
|
||||||
|
(curr-pos 0))
|
||||||
|
(declare (type fixnum curr-pos))
|
||||||
|
(dolist (string string-list)
|
||||||
|
#-genera (declare (type string string))
|
||||||
|
(replace result-string string :start1 curr-pos)
|
||||||
|
(incf curr-pos (length string)))
|
||||||
|
result-string)))
|
||||||
|
|||||||
Reference in New Issue
Block a user