Update to current dev version

git-svn-id: svn://bknr.net/svn/trunk/thirdparty/cl-ppcre@3581 4281704c-cde7-0310-8518-8e2dc76b1ff0
This commit is contained in:
Edi Weitz
2008-07-23 11:44:08 +00:00
parent 2974af4010
commit 25c3dedeeb
37 changed files with 5443 additions and 6794 deletions

37
test/packages.lisp Normal file
View File

@ -0,0 +1,37 @@
;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: CL-USER; Base: 10 -*-
;;; $Header: /usr/local/cvsrep/cl-ppcre/test/packages.lisp,v 1.3 2008/07/22 12:58:52 edi Exp $
;;; Copyright (c) 2002-2008, Dr. Edmund Weitz. All rights reserved.
;;; Redistribution and use in source and binary forms, with or without
;;; modification, are permitted provided that the following conditions
;;; are met:
;;; * Redistributions of source code must retain the above copyright
;;; notice, this list of conditions and the following disclaimer.
;;; * Redistributions in binary form must reproduce the above
;;; copyright notice, this list of conditions and the following
;;; disclaimer in the documentation and/or other materials
;;; provided with the distribution.
;;; THIS SOFTWARE IS PROVIDED BY THE AUTHOR 'AS IS' AND ANY EXPRESSED
;;; OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
;;; WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
;;; ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
;;; DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
;;; DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
;;; GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
;;; INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
;;; WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
;;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
;;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
(in-package :cl-user)
(defpackage :cl-ppcre-test
#+genera (:shadowing-import-from :common-lisp :lambda)
(:use #-:genera :cl #+:genera :future-common-lisp :cl-ppcre)
(:import-from :cl-ppcre :*standard-optimize-settings*
:string-list-to-simple-string)
(:export :run-all-tests :unicode-test))

150
test/perl-tests.lisp Normal file
View File

@ -0,0 +1,150 @@
;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: CL-PPCRE-TEST; Base: 10 -*-
;;; $Header: /usr/local/cvsrep/cl-ppcre/test/perl-tests.lisp,v 1.7 2008/07/22 23:02:04 edi Exp $
;;; The tests in this file test CL-PPCRE against testdata generated by
;;; the Perl program `perltest.pl' from the input file `testinput' in
;;; order to check compatibility with Perl and correctness of the
;;; regex engine.
;;; Copyright (c) 2002-2008, Dr. Edmund Weitz. All rights reserved.
;;; Redistribution and use in source and binary forms, with or without
;;; modification, are permitted provided that the following conditions
;;; are met:
;;; * Redistributions of source code must retain the above copyright
;;; notice, this list of conditions and the following disclaimer.
;;; * Redistributions in binary form must reproduce the above
;;; copyright notice, this list of conditions and the following
;;; disclaimer in the documentation and/or other materials
;;; provided with the distribution.
;;; THIS SOFTWARE IS PROVIDED BY THE AUTHOR 'AS IS' AND ANY EXPRESSED
;;; OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
;;; WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
;;; ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
;;; DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
;;; DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
;;; GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
;;; INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
;;; WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
;;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
;;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
(in-package :cl-ppcre-test)
(defvar *tests-to-skip* '(662 790 1439)
"Some tests we skip because the testdata is generated by a Perl
program and CL-PPCRE differs from Perl for these tests - on purpose.")
(defun create-string-from-input (input)
"Converts INPUT to a string which can be used in TEST below. The
input file `testdata' encodes strings containing non-printable
characters as lists where those characters are represented by their
character code."
(etypecase input
((or null string) input)
(list (string-list-to-simple-string
(loop for element in input
if (stringp element)
collect element
else
collect (string (code-char element)))))))
(defun perl-test (&key (file-name
(make-pathname :name "perltestdata"
:type nil :version nil
:defaults *this-file*)
file-name-provided-p)
(external-format '(:latin-1 :eol-style :lf))
verbose)
"Loops through all test cases in FILE-NAME and prints a report if
VERBOSE is true. EXTERNAL-FORMAT is the FLEXI-STREAMS external format
which is used to read the file. Returns a true value if all tests
succeeded.
For the syntax of the tests in FILE-NAME refer to the source code of
this function and to the Perl script perltest.pl which generates such
test files."
(declare #.*standard-optimize-settings*)
(with-open-file (binary-stream file-name :element-type 'flex:octet)
(let ((stream (flex:make-flexi-stream binary-stream :external-format external-format))
;; the standard Perl tests don't need full Unicode support
(*regex-char-code-limit* (if file-name-provided-p *regex-char-code-limit* 256))
;; we need this for the standard test suite or otherwise we
;; might get stack overflows
(*optimize-char-classes* (if file-name-provided-p *optimize-char-classes* :charmap))
;; we only check for correctness and don't care about speed
;; that match (but rather about space constraints of the
;; trial versions)
(*use-bmh-matchers* (if file-name-provided-p *use-bmh-matchers* nil))
;; some tests in the Perl suite explicitly check for this
(*allow-quoting* (if file-name-provided-p *allow-quoting* t)))
(do-tests ((format nil "Running tests in file ~S" (file-namestring file-name))
(not verbose))
(let ((input-line (or (read stream nil) (done)))
errors)
(destructuring-bind (counter
info-string%
regex%
case-insensitive-mode
multi-line-mode
single-line-mode
extended-mode
target%
perl-error
expected-result%
expected-registers)
input-line
(destructuring-bind (info-string regex target expected-result)
(mapcar 'create-string-from-input
(list info-string% regex% target% expected-result%))
(setq expected-registers (mapcar 'create-string-from-input expected-registers))
(unless (find counter *tests-to-skip* :test #'=)
(when verbose
(format t "~&~4D: ~S" counter info-string))
(let ((scanner
(handler-bind ((error (lambda (condition)
(declare (ignore condition))
(when perl-error
;; we expected an
;; error, so we can
;; signal success
(return-from test-block)))))
(create-scanner regex
:case-insensitive-mode case-insensitive-mode
:multi-line-mode multi-line-mode
:single-line-mode single-line-mode
:extended-mode extended-mode))))
(block test-block
(multiple-value-bind (start end reg-starts reg-ends)
(scan scanner target)
(cond (perl-error
(push (format nil "expected an error but got a result.")
errors))
(t
(when (not (eq start expected-result))
(if start
(let ((result (subseq target start end)))
(unless (string= result expected-result)
(push (format nil "expected ~S but got ~S."
expected-result result)
errors))
(setq reg-starts (coerce reg-starts 'list)
reg-ends (coerce reg-ends 'list))
(loop for i from 0
for expected-register in expected-registers
for reg-start = (nth i reg-starts)
for reg-end = (nth i reg-ends)
for register = (if (and reg-start reg-end)
(subseq target reg-start reg-end)
nil)
unless (string= expected-register register)
do (push (format nil "\\~A: expected ~S but got ~S."
(1+ i) expected-register register)
errors)))
(push (format nil "expected ~S but got ~S."
expected-result start)
errors))))))
errors))))))))))

160
test/perltest.pl Normal file
View File

@ -0,0 +1,160 @@
#!/usr/bin/perl
# $Header: /usr/local/cvsrep/cl-ppcre/test/perltest.pl,v 1.1 2008/07/06 21:24:39 edi Exp $
# This is a heavily modified version of the file 'perltest' which
# comes with the PCRE library package, which is open source software,
# written by Philip Hazel, and copyright by the University of
# Cambridge, England.
# The PCRE library package is available from
# <ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre/>
sub string_for_lisp {
my(@a, $t, $in_string, $switch);
my $string = shift;
$string =~ s/\\/\\\\/g;
$string =~ s/"/\\"/g;
return "\"$string\""
if $string =~ /^[\n\x20-\x7f]*$/;
$in_string = 1;
foreach $c (split(//, $string)) {
if (ord $c >= 32 and ord $c < 127) {
if ($in_string) {
$t .= $c;
} else {
$in_string = 1;
$t = $c;
}
} else {
if ($in_string) {
push @a, "\"$t\"";
$in_string = 0;
$switch = 1;
}
push @a, ord $c;
}
}
if ($switch) {
if ($in_string) {
push @a, "\"$t\"";
}
'(' . (join ' ', @a) . ')';
} else {
"\"$t\"";
}
}
NEXT_RE: while (1) {
last
if !($_ = <>);
next
if $_ eq "";
$pattern = $_;
while ($pattern !~ /^\s*(.).*\1/s) {
last
if !($_ = <>);
$pattern .= $_;
}
chomp($pattern);
$pattern =~ s/\s+$//;
$pattern =~ s/\+(?=[a-z]*$)//;
$multi_line_mode = ($pattern =~ /m[a-z]*$/) ? 't' : 'nil';
$single_line_mode = ($pattern =~ /s[a-z]*$/) ? 't' : 'nil';
$extended_mode = ($pattern =~ /x[a-z]*$/) ? 't' : 'nil';
$case_insensitive_mode = ($pattern =~ /i[a-z]*$/) ? 't' : 'nil';
$pattern =~ s/^(.*)g([a-z]*)$/\1\2/;
$pattern_for_lisp = $pattern;
$pattern_for_lisp =~ s/[a-z]*$//;
$pattern_for_lisp =~ s/^\s*(.)(.*)\1/$2/s;
$pattern_for_lisp =~ s/\\/\\\\/g;
$pattern_for_lisp =~ s/"/\\"/g;
$pattern = "/(?#)/$2"
if ($pattern =~ /^(.)\1(.*)$/);
while (1) {
last NEXT_RE
if !($_ = <>);
chomp;
s/\s+$//;
s/^\s+//;
last
if ($_ eq "");
$info_string = string_for_lisp "\"$_\" =~ $pattern";
$x = eval "\"$_\"";
@subs = ();
eval <<"END";
if (\$x =~ ${pattern}) {
push \@subs,\$&;
push \@subs,\$1;
push \@subs,\$2;
push \@subs,\$3;
push \@subs,\$4;
push \@subs,\$5;
push \@subs,\$6;
push \@subs,\$7;
push \@subs,\$8;
push \@subs,\$9;
push \@subs,\$10;
push \@subs,\$11;
push \@subs,\$12;
push \@subs,\$13;
push \@subs,\$14;
push \@subs,\$15;
push \@subs,\$16;
}
\$test = sub {
my \$times = shift;
my \$start = time;
for (my \$i = 0; \$i < \$times; \$i++) {
\$x =~ ${pattern};
}
return time - \$start;
};
END
$counter++;
print STDERR "$counter\n";
if ($@) {
$error = 't';
} else {
$error = 'nil';
}
print "($counter $info_string \"$pattern_for_lisp\" $case_insensitive_mode $multi_line_mode $single_line_mode $extended_mode " . string_for_lisp($x) . " $error ";
if (!@subs) {
print 'nil nil';
} else {
print string_for_lisp($subs[0]) . ' (';
undef $not_first;
for ($i = 1; $i <= 16; $i++) {
print ' '
unless $i == 1;
if (defined $subs[$i]) {
print string_for_lisp $subs[$i];
} else {
print 'nil';
}
}
print ')';
}
print ")\n";
}
}

14288
test/perltestdata Normal file

File diff suppressed because one or more lines are too long

3948
test/perltestinput Normal file

File diff suppressed because it is too large Load Diff

349
test/simple Normal file
View File

@ -0,0 +1,349 @@
;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: CL-PPCRE-TEST; Base: 10 -*-
;;; $Header: /usr/local/cvsrep/cl-ppcre/test/simple,v 1.9 2008/07/23 00:48:00 edi Exp $
;;; some simple tests for CL-PPCRE - entered manually and to be read
;;; in the CL-PPCRE-TEST package; all forms are expected to return a
;;; true value on success when EVALuated
(equalp (multiple-value-list (scan "(a)*b" "xaaabd"))
(list 1 5 #(3) #(4)))
(equalp (multiple-value-list (scan "(a)*b" "xaaabd" :start 1))
(list 1 5 #(3) #(4)))
(equalp (multiple-value-list (scan "(a)*b" "xaaabd" :start 2))
(list 2 5 #(3) #(4)))
(null (scan "(a)*b" "xaaabd" :end 4))
(equalp (multiple-value-list (scan '(:greedy-repetition 0 nil #\b) "bbbc"))
(list 0 3 #() #()))
(null (scan '(:greedy-repetition 4 6 #\b) "bbbc"))
(let ((s (create-scanner "(([a-c])+)x")))
(equalp (multiple-value-list (scan s "abcxy"))
(list 0 4 #(0 2) #(3 3))))
(equalp (multiple-value-list (scan-to-strings "[^b]*b" "aaabd"))
(list "aaab" #()))
(equalp (multiple-value-list (scan-to-strings "([^b])*b" "aaabd"))
(list "aaab" #("a")))
(equalp (multiple-value-list (scan-to-strings "(([^b])*)b" "aaabd"))
(list "aaab" #("aaa" "a")))
(equalp (register-groups-bind (first second third fourth)
("((a)|(b)|(c))+" "abababc" :sharedp t)
(list first second third fourth))
(list "c" "a" "b" "c"))
(equalp (register-groups-bind (nil second third fourth)
("((a)|(b)|(c))()+" "abababc" :start 6)
(list second third fourth))
(list nil nil "c"))
(null (register-groups-bind (first)
("(a|b)+" "accc" :start 1)
first))
(equalp (register-groups-bind (fname lname (#'parse-integer date month year))
("(\\w+)\\s+(\\w+)\\s+(\\d{1,2})\\.(\\d{1,2})\\.(\\d{4})" "Frank Zappa 21.12.1940")
(list fname lname (encode-universal-time 0 0 0 date month year 0)))
(list "Frank" "Zappa" 1292889600))
(flet ((foo (regex target-string &key (start 0) (end (length target-string)))
(let ((sum 0))
(do-matches (s e regex target-string nil :start start :end end)
(incf sum (- e s)))
(/ sum (- end start)))))
(and (= 1/3 (foo "a" "abcabcabc"))
(= 5/9 (foo "aa|b" "aacabcbbc"))))
(labels ((crossfoot (target-string &key (start 0) (end (length target-string)))
(let ((sum 0))
(do-matches-as-strings (m :digit-class
target-string nil
:start start :end end)
(incf sum (parse-integer m)))
(if (< sum 10)
sum
(crossfoot (format nil "~A" sum))))))
(and (zerop (crossfoot "bar"))
(= 3 (crossfoot "a3x"))
(= 6 (crossfoot "12345"))))
(let (result)
(do-register-groups (first second third fourth)
("((a)|(b)|(c))" "abababc" nil :start 2 :sharedp t)
(push (list first second third fourth) result))
(equal (nreverse result)
'(("a" "a" nil nil)
("b" nil "b" nil)
("a" "a" nil nil)
("b" nil "b" nil)
("c" nil nil "c"))))
(let (result)
(do-register-groups ((#'parse-integer n) (#'intern sign) whitespace)
("(\\d+)|(\\+|-|\\*|/)|(\\s+)" "12*15 - 42/3")
(unless whitespace
(push (or n sign) result)))
(equal (nreverse result)
'(12 * 15 - 42 / 3)))
(equal (all-matches "a" "foo bar baz")
(list 5 6 9 10))
(equal (all-matches "\\w*" "foo bar baz")
(list 0 3 3 3 4 7 7 7 8 11 11 11))
(equal (all-matches-as-strings "a" "foo bar baz")
(list "a" "a"))
(equal (all-matches-as-strings "\\w*" "foo bar baz")
(list "foo" "" "bar" "" "baz" ""))
(equal (split "\\s+" "foo bar baz
frob")
'("foo" "bar" "baz" "frob"))
(equal (split "\\s*" "foo bar baz")
'("f" "o" "o" "b" "a" "r" "b" "a" "z"))
(equal (split "(\\s+)" "foo bar baz")
'("foo" "bar" "baz"))
(equal (split "(\\s+)" "foo bar baz" :with-registers-p t)
'("foo" " " "bar" " " "baz"))
(equal (split "(\\s)(\\s*)" "foo bar baz" :with-registers-p t)
'("foo" " " "" "bar" " " " " "baz"))
(equal (split "(,)|(;)" "foo,bar;baz" :with-registers-p t)
'("foo" "," nil "bar" nil ";" "baz"))
(equal (split "(,)|(;)" "foo,bar;baz" :with-registers-p t :omit-unmatched-p t)
'("foo" "," "bar" ";" "baz"))
(equal (split ":" "a:b:c:d:e:f:g::")
'("a" "b" "c" "d" "e" "f" "g"))
(equal (split ":" "a:b:c:d:e:f:g::" :limit 1)
'("a:b:c:d:e:f:g::"))
(equal (split ":" "a:b:c:d:e:f:g::" :limit 2)
'("a" "b:c:d:e:f:g::"))
(equal (split ":" "a:b:c:d:e:f:g::" :limit 3)
'("a" "b" "c:d:e:f:g::"))
(equal (split ":" "a:b:c:d:e:f:g::" :limit 1000)
'("a" "b" "c" "d" "e" "f" "g" "" ""))
(equal (multiple-value-list (regex-replace "fo+" "foo bar" "frob"))
(list "frob bar" t))
(equal (multiple-value-list (regex-replace "fo+" "FOO bar" "frob"))
(list "FOO bar" nil))
(equal (multiple-value-list (regex-replace "(?i)fo+" "FOO bar" "frob"))
(list "frob bar" t))
(equal (multiple-value-list (regex-replace "(?i)fo+" "FOO bar" "frob" :preserve-case t))
(list "FROB bar" t))
(equal (multiple-value-list (regex-replace "(?i)fo+" "Foo bar" "frob" :preserve-case t))
(list "Frob bar" t))
(equal (multiple-value-list (regex-replace "bar" "foo bar baz" "[frob (was '\\&' between '\\`' and '\\'')]"))
(list "foo [frob (was 'bar' between 'foo ' and ' baz')] baz" t))
(equal (multiple-value-list
(regex-replace "bar" "foo bar baz"
'("[frob (was '" :match "' between '" :before-match "' and '" :after-match "')]")))
(list "foo [frob (was 'bar' between 'foo ' and ' baz')] baz" t))
(equal (multiple-value-list (regex-replace "(be)(nev)(o)(lent)"
"benevolent: adj. generous, kind"
(lambda (match &rest registers)
(format nil "~A [~{~A~^.~}]" match registers))
:simple-calls t))
(list "benevolent [be.nev.o.lent]: adj. generous, kind" t))
(equal (multiple-value-list (regex-replace-all "(?i)fo+" "foo Fooo FOOOO bar" "frob" :preserve-case t))
(list "frob Frob FROB bar" t))
(string= (regex-replace-all "(?i)f(o+)" "foo Fooo FOOOO bar" "fr\\1b" :preserve-case t)
"froob Frooob FROOOOB bar")
(let ((qp-regex (create-scanner "[\\x80-\\xff]")))
(flet ((encode-quoted-printable (string)
"Converts 8-bit string to quoted-printable representation."
;; won't work for Corman Lisp because non-ASCII characters aren't 8-bit there
(flet ((convert (target-string start end match-start match-end reg-starts reg-ends)
(declare (ignore start end match-end reg-starts reg-ends))
(format nil "=~2,'0x" (char-code (char target-string match-start)))))
(regex-replace-all qp-regex string #'convert))))
(string= (encode-quoted-printable "F<>te S<>rensen na<6E>ve H<>hner Stra<72>e")
"F=EAte S=F8rensen na=EFve H=FChner Stra=DFe")))
(let ((url-regex (create-scanner "[^a-zA-Z0-9_\\-.]")))
(flet ((url-encode (string)
"URL-encodes a string."
;; won't work for Corman Lisp because non-ASCII characters aren't 8-bit there
(flet ((convert (target-string start end match-start match-end reg-starts reg-ends)
(declare (ignore start end match-end reg-starts reg-ends))
(format nil "%~2,'0x" (char-code (char target-string match-start)))))
(regex-replace-all url-regex string #'convert))))
(string= (url-encode "F<>te S<>rensen na<6E>ve H<>hner Stra<72>e")
"F%EAte%20S%F8rensen%20na%EFve%20H%FChner%20Stra%DFe")))
(flet ((how-many (target-string start end match-start match-end reg-starts reg-ends)
(declare (ignore target-string start end match-start match-end))
(format nil "~A" (- (svref reg-ends 0)
(svref reg-starts 0)))))
(string= (regex-replace-all "{(.+?)}"
"foo{...}bar{.....}{..}baz{....}frob"
(list "[" #'how-many " dots]"))
"foo[3 dots]bar[5 dots][2 dots]baz[4 dots]frob"))
(let ((qp-regex (create-scanner "[\\x80-\\xff]")))
(flet ((encode-quoted-printable (string)
"Converts 8-bit string to quoted-printable representation.
Version using SIMPLE-CALLS keyword argument."
;; ;; won't work for Corman Lisp because non-ASCII characters aren't 8-bit there
(flet ((convert (match)
(format nil "=~2,'0x" (char-code (char match 0)))))
(regex-replace-all qp-regex string #'convert
:simple-calls t))))
(string= (encode-quoted-printable "F<>te S<>rensen na<6E>ve H<>hner Stra<72>e")
"F=EAte S=F8rensen na=EFve H=FChner Stra=DFe")))
(flet ((how-many (match first-register)
(declare (ignore match))
(format nil "~A" (length first-register))))
(string= (regex-replace-all "{(.+?)}"
"foo{...}bar{.....}{..}baz{....}frob"
(list "[" #'how-many " dots]")
:simple-calls t)
"foo[3 dots]bar[5 dots][2 dots]baz[4 dots]frob"))
(flet ((my-repetition (char min)
`(:greedy-repetition ,min nil ,char)))
(setf (parse-tree-synonym 'a*) (my-repetition #\a 0)
(parse-tree-synonym 'b+) (my-repetition #\b 1))
(unwind-protect
(let ((scanner (create-scanner '(:sequence a* b+))))
(equal (mapcar (lambda (target)
(scan scanner target))
'("ab" "b" "aab" "a" "x"))
(list 0 0 0 nil nil)))
(setf (parse-tree-synonym 'a*) nil
(parse-tree-synonym 'b+) nil)))
(null (scan "^a+$" "a+"))
(let ((*allow-quoting* t))
;;we use CREATE-SCANNER because of Lisps like SBCL that don't have an interpreter
(equalp (multiple-value-list (scan (create-scanner "^\\Qa+\\E$") "a+"))
(list 0 2 #() #())))
(string= (parse-string "\\k<reg>") "k<reg>")
(let ((*allow-named-registers* t))
(equal (nth-value 1 (create-scanner "((?<small>[a-z]*)(?<big>[A-Z]*))"))
(list nil "small" "big")))
(let ((*allow-named-registers* t))
(equal (nth-value 1 (create-scanner '(:register
(:sequence
(:named-register "small"
(:greedy-repetition 0 nil (:char-class (:range #\a #\z))))
(:named-register "big"
(:greedy-repetition 0 nil (:char-class (:range #\a #\z))))))))
(list nil "small" "big")))
(let ((*allow-named-registers* t))
(equalp (multiple-value-list (scan (create-scanner "((?<small>[a-z]*)(?<big>[A-Z]*))") "aaaBBB"))
(list 0 6 #(0 0 3) #(6 3 6))))
(let ((*allow-named-registers* t))
;; multiple-choice back-reference
(equalp (multiple-value-list (scan (create-scanner "^(?<reg>[ab])(?<reg>[12])\\k<reg>\\k<reg>$") "a1aa"))
(list 0 4 #(0 1) #(1 2))))
(let ((*allow-named-registers* t))
(equalp (multiple-value-list (scan (create-scanner "^(?<reg>[ab])(?<reg>[12])\\k<reg>\\k<reg>$") "a22a"))
(list 0 4 #(0 1) #(1 2))))
(let ((*allow-named-registers* t))
;; demonstrating most-recently-seen-register-first property of back-reference;
;; "greedy" regex (analogous to "aa?")
(equalp (multiple-value-list (scan (create-scanner "^(?<reg>)(?<reg>a)(\\k<reg>)") "a"))
(list 0 1 #(0 0 1) #(0 1 1))))
(let ((*allow-named-registers* t))
(equalp (multiple-value-list (scan (create-scanner "^(?<reg>)(?<reg>a)(\\k<reg>)") "aa"))
(list 0 2 #(0 0 1) #(0 1 2))))
(let ((*allow-named-registers* t))
;; switched groups
;; "lazy" regex (analogous to "aa??")
(equalp (multiple-value-list (scan (create-scanner "^(?<reg>a)(?<reg>)(\\k<reg>)") "a"))
(list 0 1 #(0 1 1) #(1 1 1))))
(let ((*allow-named-registers* t))
;; scanner ignores the second "a"
(equalp (multiple-value-list (scan (create-scanner "^(?<reg>a)(?<reg>)(\\k<reg>)") "aa"))
(list 0 1 #(0 1 1) #(1 1 1))))
(let ((*allow-named-registers* t))
;; "aa" will be matched only when forced by adding "$" at the end
(equalp (multiple-value-list (scan (create-scanner "^(?<reg>a)(?<reg>)(\\k<reg>)$") "aa"))
(list 0 2 #(0 1 1) #(1 1 2))))
(string= (quote-meta-chars "[a-z]*") "\\[a\\-z\\]\\*")
(string= (handler-case
(create-scanner "foo**x")
(ppcre-syntax-error (condition)
(format nil "Houston, we've got a problem with the string ~S: Looks like something went wrong at position ~A. The last message we received was \"~?\"."
(ppcre-syntax-error-string condition)
(ppcre-syntax-error-pos condition)
(simple-condition-format-control condition)
(simple-condition-format-arguments condition))))
"Houston, we've got a problem with the string \"foo**x\": Looks like something went wrong at position 4. The last message we received was \"Quantifier '*' not allowed.\".")
(flet ((my-weird-filter (pos)
"Only match at this point if either pos is odd and the
character we're looking at is lowercase or if pos is even and the next
two characters we're looking at are uppercase. Consume these
characters if there's a match."
(cond ((and (oddp pos)
(< pos cl-ppcre::*end-pos*)
(lower-case-p (char cl-ppcre::*string* pos)))
(1+ pos))
((and (evenp pos)
(< (1+ pos) cl-ppcre::*end-pos*)
(upper-case-p (char cl-ppcre::*string* pos))
(upper-case-p (char cl-ppcre::*string* (1+ pos))))
(+ pos 2))
(t nil))))
(let ((weird-regex `(:sequence "+" (:filter ,#'my-weird-filter) "+")))
(equalp (multiple-value-list (scan weird-regex "+A++a+AA+"))
(list 5 9 #() #()))))
(let ((a "\\E*"))
(equalp (multiple-value-list (scan (concatenate 'string "(?:" (quote-meta-chars a) "){2}") "\\E*\\E*"))
(list 0 6 #() #())))
(let ((a "\\E*"))
(equalp (multiple-value-list (scan `(:greedy-repetition 2 2 ,a) "\\E*\\E*"))
(list 0 6 #() #())))
(loop for *optimize-char-classes* in '(:hash-table :hash-table* :charset :charset* :charmap)
for s = (create-scanner "(([a-c])+)x")
always (equalp (multiple-value-list (scan s "abcxy"))
(list 0 4 #(0 2) #(3 3))))

159
test/tests.lisp Normal file
View File

@ -0,0 +1,159 @@
;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: CL-PPCRE-TEST; Base: 10 -*-
;;; $Header: /usr/local/cvsrep/cl-ppcre/test/tests.lisp,v 1.12 2008/07/23 00:48:00 edi Exp $
;;; The tests in this file test CL-PPCRE against testdata generated by
;;; the Perl program `perltest.pl' from the input file `testinput' in
;;; order to check compatibility with Perl and correctness of the
;;; regex engine.
;;; Copyright (c) 2002-2008, Dr. Edmund Weitz. All rights reserved.
;;; Redistribution and use in source and binary forms, with or without
;;; modification, are permitted provided that the following conditions
;;; are met:
;;; * Redistributions of source code must retain the above copyright
;;; notice, this list of conditions and the following disclaimer.
;;; * Redistributions in binary form must reproduce the above
;;; copyright notice, this list of conditions and the following
;;; disclaimer in the documentation and/or other materials
;;; provided with the distribution.
;;; THIS SOFTWARE IS PROVIDED BY THE AUTHOR 'AS IS' AND ANY EXPRESSED
;;; OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
;;; WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
;;; ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
;;; DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
;;; DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
;;; GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
;;; INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
;;; WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
;;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
;;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
(in-package :cl-ppcre-test)
(defvar *this-file* (load-time-value
(or #.*compile-file-pathname* *load-pathname*))
"The location of this source file.")
(defmacro do-tests ((name &optional show-progress-p) &body body)
"Helper macro which repeatedly executes BODY until the code in body
calls the function DONE. It is assumed that each invocation of BODY
will be the execution of one test which returns NIL in case of success
and list of string describing errors otherwise.
The macro prints a simple progress indicator \(one dots for ten tests)
to *STANDARD-OUTPUT* unless SHOW-PROGRESS-P is NIL and returns a true
value iff all tests succeeded. Errors in BODY are caught and reported
\(and counted as failures)."
`(let ((successp t)
(testcount 1))
(block test-block
(flet ((done ()
(return-from test-block successp)))
(format t "~&Test: ~A~%" ,name)
(loop
(when (and ,show-progress-p (zerop (mod testcount 10)))
(format t ".")
(when (zerop (mod testcount 100))
(terpri))
(force-output))
(let ((errors
(handler-case
(progn ,@body)
(error (msg)
(list (format nil "~&got an unexpected error: ~A" msg))))))
(setq successp (and successp (null errors)))
(when errors
(format t "~&~4@A:~{~& ~A~}~%" testcount errors))
(incf testcount)))))
successp))
(defun simple-tests (&key (file-name
(make-pathname :name "simple"
:type nil :version nil
:defaults *this-file*))
(external-format '(:latin-1 :eol-style :lf))
verbose)
"Loops through all the forms in the file FILE-NAME and executes each
of them using EVAL. It is assumed that each FORM specifies a test
which returns a true value iff it succeeds. Prints each test form to
*STANDARD-OUTPUT* if VERBOSE is true and shows a simple progress
indicator otherwise. EXTERNAL-FORMAT is the FLEXI-STREAMS external
format which is used to read the file. Returns a true value iff all
tests succeeded."
(with-open-file (binary-stream file-name :element-type 'flex:octet)
(let ((stream (flex:make-flexi-stream binary-stream :external-format external-format))
(*package* (find-package :cl-ppcre-test)))
(do-tests ((format nil "Simple tests from file ~S" (file-namestring file-name))
(not verbose))
(let ((form (or (read stream nil) (done))))
(when verbose
(format t "~&~S" form))
(cond ((eval form) nil)
(t (list (format nil "~S returned NIL" form)))))))))
(defun random-test-function (probability)
"Returns a random character test function which contains each
character with probability PROBABILITY."
(let ((hash-table (make-hash-table)))
(dotimes (code char-code-limit)
(let ((char (code-char code)))
(when (and char (< (random 1.0d0) probability))
(setf (gethash (code-char code) hash-table) t))))
(lambda (char)
(gethash char hash-table))))
(defun test-optimized-test-functions% (probability)
"Creates a random test function with probability PROBABILITY and six
\(one for each possible \"kind\") corresponding optimized test
functions, then checks for each character in turn that all functions
agree on it."
(let* ((test-function (random-test-function probability))
(optimized-functions (loop for kind in '(nil
:hash-table
:hash-table*
:charset
:charset*
:charmap)
collect (create-optimized-test-function test-function :kind kind))))
(loop for code below char-code-limit
for char = (code-char code)
for expected-result = (and char (funcall test-function char))
always (or (null char)
(loop for optimized-function in optimized-functions
always (eq (not (funcall optimized-function char))
(not expected-result)))))))
(defun test-optimized-test-functions (&key verbose)
"Runs TEST-OPTIMIZED-TEST-FUNCTIONS% with different probabilities."
(let ((probabilities '(0 .001 .01 .1 1)))
(do-tests ("Optimized test functions - this might take some time..." (not verbose))
(let ((probability (or (pop probabilities) (done))))
(when verbose
(format t "~&Probability is ~A" probability))
(not (test-optimized-test-functions% probability))))))
(defun run-all-tests (&key more-tests verbose)
"Runs all tests for CL-PPCRE and returns a true value iff all tests
succeeded. VERBOSE is interpreted by the individual test suites.
MORE-TESTS can be a list of function designators designating
additional tests to run. This facility is used by the tests for
CL-PPCRE-UNICODE."
(let ((successp t))
(macrolet ((run-test-suite (&body body)
`(unless (progn ,@body)
(setq successp nil))))
;; run the automatically generated Perl tests
(run-test-suite (perl-test :verbose verbose))
(run-test-suite (test-optimized-test-functions :verbose verbose))
(run-test-suite (simple-tests :verbose verbose))
(when more-tests
(unless (listp more-tests)
(setq more-tests (list more-tests))
(dolist (test more-tests)
(run-test-suite (funcall test :verbose verbose))))))
(format t "~2&~:[Some tests failed~;All tests passed~]." successp)
successp))

80
test/unicode-tests.lisp Normal file
View File

@ -0,0 +1,80 @@
;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: CL-PPCRE-TEST; Base: 10 -*-
;;; $Header: /usr/local/cvsrep/cl-ppcre/test/unicode-tests.lisp,v 1.8 2008/07/23 00:17:53 edi Exp $
;;; Copyright (c) 2008, Dr. Edmund Weitz. All rights reserved.
;;; Redistribution and use in source and binary forms, with or without
;;; modification, are permitted provided that the following conditions
;;; are met:
;;; * Redistributions of source code must retain the above copyright
;;; notice, this list of conditions and the following disclaimer.
;;; * Redistributions in binary form must reproduce the above
;;; copyright notice, this list of conditions and the following
;;; disclaimer in the documentation and/or other materials
;;; provided with the distribution.
;;; THIS SOFTWARE IS PROVIDED BY THE AUTHOR 'AS IS' AND ANY EXPRESSED
;;; OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
;;; WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
;;; ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
;;; DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
;;; DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
;;; GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
;;; INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
;;; WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
;;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
;;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
(in-package :cl-ppcre-test)
(defun unicode-test (&key (file-name
(make-pathname :name "unicodetestdata"
:type nil :version nil
:defaults *this-file*)
file-name-provided-p)
verbose)
"Loops through all test cases in FILE-NAME and prints a report if
VERBOSE is true. Returns a true value if all tests succeeded.
For the syntax of the tests in FILE-NAME refer to CL-UNICODE."
(with-open-file (stream file-name)
(let ((*regex-char-code-limit* (if file-name-provided-p *regex-char-code-limit* char-code-limit))
(*optimize-char-classes* (if file-name-provided-p *optimize-char-classes* nil))
;; we only check for correctness and don't care about speed
;; that match (but rather about space constraints of the
;; trial versions)
(*use-bmh-matchers* (if file-name-provided-p *use-bmh-matchers* nil)))
(do-tests ((format nil "Running Unicode tests in file ~S" (file-namestring file-name))
(not verbose))
(let ((input-line (or (read stream nil) (done)))
errors)
(destructuring-bind (char-code property-name expected-result)
input-line
(let ((char (and (< char-code char-code-limit) (code-char char-code))))
(when char
(when verbose
(format t "~&~A: #x~X" property-name char-code))
(let* ((string (string char))
(result-1 (scan (format nil "\\p{~A}" property-name) string))
(result-2 (scan (format nil "[\\p{~A}]" property-name) string))
(inverted-result-1 (scan (format nil "\\P{~A}" property-name) string))
(inverted-result-2 (scan (format nil "[\\P{~A}]" property-name) string)))
(unless (eq expected-result (not (not result-1)))
(push (format nil "\(code-char #x~X) should ~:[not ~;~]have matched \"\\p{~A}\""
char-code expected-result property-name)
errors))
(unless (eq expected-result (not (not result-2)))
(push (format nil "\(code-char #x~X) should ~:[not ~;~]have matched \"[\\p{~A}]\""
char-code expected-result property-name)
errors))
(unless (eq expected-result (not inverted-result-1))
(push (format nil "\(code-char #x~X) should ~:[~;not ~]have matched \"\\P{~A}\""
char-code expected-result property-name)
errors))
(unless (eq expected-result (not inverted-result-2))
(push (format nil "\(code-char #x~X) should ~:[~;not ~]have matched \"[\\P{~A}]\""
char-code expected-result property-name)
errors)))
errors))))))))

107
test/unicodetestdata Normal file
View File

@ -0,0 +1,107 @@
;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: CL-USER; Base: 10 -*-
;;; $Header: /usr/local/cvsrep/cl-ppcre/test/unicodetestdata,v 1.10 2008/07/22 14:00:35 edi Exp $
;;; some arbitrary test data for Unicode properties - stolen from CL-UNICODE
(#x0001 "ASCII" t)
(#x0100 "ASCII" nil)
(#x000A "Alphabetic" nil)
(#x0061 "Alphabetic" t)
(#x0061 "Ll" t)
(#x0041 "Alphabetic" t)
(#x0041 "alphabetic" t)
(#x0041 "IsAlphabetic" t)
(#x02E4 "Alphabetic" t)
(#x0970 "Alphabetic" nil)
(#x030D "BidiClass:NonspacingMark" t)
(#x030D "NonspacingMark" t)
(#x030D "nonspacing mark" t)
(#xE0146 "BidiClass:NonspacingMark" t)
(#x000D "BidiClass:WhiteSpace" nil)
(#x0020 "BidiClass:WhiteSpace" t)
(#x2006 "BidiClass:WhiteSpace" t)
(#x12470 "Cuneiform" t)
(#x12470 "IsCuneiform" t)
(#x12470 "CuneiformNumbersAndPunctuation" t)
(#x12470 "Block:CuneiformNumbersAndPunctuation" t)
(#x12470 "InCuneiformNumbersAndPunctuation" t)
(#x12470 "Script:Cuneiform" t)
(#x0041 "Block:Hebrew" nil)
(#x0593 "Block:Hebrew" t)
(#x0593 "InHebrew" t)
(#x040D "Block:Cyrillic" t)
(#x040D "InCyrillic" t)
(#x0042 "Block:Cyrillic" nil)
(#x2011 "Dash" t)
(#x2011 "IsDash" t)
(#xFF0D "Dash" t)
(#x003D "Dash" nil)
(#x00F0 "Lowercase" t)
(#x00F0 "IsLowercase" t)
(#x00F0 "lowercase" t)
(#x00F0 "Ll" t)
(#x0067 "Lowercase" t)
(#x010A "Lowercase" nil)
(#x1D6C1 "Lowercase" nil)
(#x0023 "CurrencySymbol" nil)
(#x0024 "CurrencySymbol" t)
(#x0024 "IsCurrencySymbol" t)
(#x0024 "currency symbol" t)
(#x20AC "CurrencySymbol" t)
(#xFFE6 "CurrencySymbol" t)
(#x002B "Sm" t)
(#x002B "Math" t)
(#x002B "IsMath" t)
(#x002B "math" t)
(#x211C "Math" t)
(#x1D7D2 "Math" t)
(#x002A "Math" nil)
(#x25C9 "Math" nil)
(#x0000 "NonCharacterCodePoint" nil)
(#xFDD0 "NonCharacterCodePoint" t)
(#xFDD0 "Non-Character-Code-Point" t)
(#xFDD0 "non-character-code-point" t)
(#xFFFFF "NonCharacterCodePoint" t)
(#x0043 "Arabic" nil)
(#x0606 "Arabic" t)
(#x0606 "arabic" t)
(#x0606 "IsArabic" t)
(#x0606 "Script:Arabic" t)
(#x0044 "IsVariationSelector" nil)
(#x0044 "VariationSelector" nil)
(#x180B "VariationSelector" t)
(#x180B "Variation_Selector" t)
(#x180B "Variation-Selector" t)
(#x180B "variationselector" t)
(#x180B "variation selector" t)
(#x180B "IsVariationSelector" t)
(#x00B5 "XIDContinue" t)
(#x00B5 "IsXIDContinue" t)
(#x00B5 "IsXID_Continue" t)
(#x00B5 "Is_XID_Continue" t)
(#x00B5 "XID_Continue" t)
(#x33FF "Unified_Ideograph" nil)
(#x33FF "Ideographic" nil)
(#x3400 "Unified_Ideograph" t)
(#x3400 "Ideographic" t)
(#x3400 "Han" t)
(#x3400 "OtherLetter" t)
(#x3400 "Alphabetic" t)
(#x3400 "Common" nil)
(#x3400 "Assigned" t)
(#x3400 "Any" t)
(#x0378 "Cn" t)
(#x0378 "Unassigned" t)
(#x0377 "Cn" nil)
(#x0377 "Unassigned" nil)
(#x2800 "Braille" t)
(#x2800 "Script:Braille" t)
(#x2800 "OtherSymbol" t)
(#x0027 "QuotationMark" t)
(#x201C "QuotationMark" t)
(#x201C "OtherNeutral" t)
(#x201C "PatternSyntax" t)
(#x0028 "Bidi_Mirrored" t)
(#x0028 "BidiMirrored" t)
(#x0028 "IsBidiMirrored" t)
(#x0027 "Bidi_Mirrored" nil)