Update to current dev version
git-svn-id: svn://bknr.net/svn/trunk/thirdparty/cl-ppcre@3581 4281704c-cde7-0310-8518-8e2dc76b1ff0
This commit is contained in:
37
test/packages.lisp
Normal file
37
test/packages.lisp
Normal file
@ -0,0 +1,37 @@
|
||||
;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: CL-USER; Base: 10 -*-
|
||||
;;; $Header: /usr/local/cvsrep/cl-ppcre/test/packages.lisp,v 1.3 2008/07/22 12:58:52 edi Exp $
|
||||
|
||||
;;; Copyright (c) 2002-2008, Dr. Edmund Weitz. All rights reserved.
|
||||
|
||||
;;; Redistribution and use in source and binary forms, with or without
|
||||
;;; modification, are permitted provided that the following conditions
|
||||
;;; are met:
|
||||
|
||||
;;; * Redistributions of source code must retain the above copyright
|
||||
;;; notice, this list of conditions and the following disclaimer.
|
||||
|
||||
;;; * Redistributions in binary form must reproduce the above
|
||||
;;; copyright notice, this list of conditions and the following
|
||||
;;; disclaimer in the documentation and/or other materials
|
||||
;;; provided with the distribution.
|
||||
|
||||
;;; THIS SOFTWARE IS PROVIDED BY THE AUTHOR 'AS IS' AND ANY EXPRESSED
|
||||
;;; OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
;;; WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
;;; ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
|
||||
;;; DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
;;; DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
|
||||
;;; GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
;;; INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
|
||||
;;; WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
;;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
;;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
(in-package :cl-user)
|
||||
|
||||
(defpackage :cl-ppcre-test
|
||||
#+genera (:shadowing-import-from :common-lisp :lambda)
|
||||
(:use #-:genera :cl #+:genera :future-common-lisp :cl-ppcre)
|
||||
(:import-from :cl-ppcre :*standard-optimize-settings*
|
||||
:string-list-to-simple-string)
|
||||
(:export :run-all-tests :unicode-test))
|
||||
150
test/perl-tests.lisp
Normal file
150
test/perl-tests.lisp
Normal file
@ -0,0 +1,150 @@
|
||||
;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: CL-PPCRE-TEST; Base: 10 -*-
|
||||
;;; $Header: /usr/local/cvsrep/cl-ppcre/test/perl-tests.lisp,v 1.7 2008/07/22 23:02:04 edi Exp $
|
||||
|
||||
;;; The tests in this file test CL-PPCRE against testdata generated by
|
||||
;;; the Perl program `perltest.pl' from the input file `testinput' in
|
||||
;;; order to check compatibility with Perl and correctness of the
|
||||
;;; regex engine.
|
||||
|
||||
;;; Copyright (c) 2002-2008, Dr. Edmund Weitz. All rights reserved.
|
||||
|
||||
;;; Redistribution and use in source and binary forms, with or without
|
||||
;;; modification, are permitted provided that the following conditions
|
||||
;;; are met:
|
||||
|
||||
;;; * Redistributions of source code must retain the above copyright
|
||||
;;; notice, this list of conditions and the following disclaimer.
|
||||
|
||||
;;; * Redistributions in binary form must reproduce the above
|
||||
;;; copyright notice, this list of conditions and the following
|
||||
;;; disclaimer in the documentation and/or other materials
|
||||
;;; provided with the distribution.
|
||||
|
||||
;;; THIS SOFTWARE IS PROVIDED BY THE AUTHOR 'AS IS' AND ANY EXPRESSED
|
||||
;;; OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
;;; WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
;;; ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
|
||||
;;; DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
;;; DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
|
||||
;;; GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
;;; INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
|
||||
;;; WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
;;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
;;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
(in-package :cl-ppcre-test)
|
||||
|
||||
(defvar *tests-to-skip* '(662 790 1439)
|
||||
"Some tests we skip because the testdata is generated by a Perl
|
||||
program and CL-PPCRE differs from Perl for these tests - on purpose.")
|
||||
|
||||
(defun create-string-from-input (input)
|
||||
"Converts INPUT to a string which can be used in TEST below. The
|
||||
input file `testdata' encodes strings containing non-printable
|
||||
characters as lists where those characters are represented by their
|
||||
character code."
|
||||
(etypecase input
|
||||
((or null string) input)
|
||||
(list (string-list-to-simple-string
|
||||
(loop for element in input
|
||||
if (stringp element)
|
||||
collect element
|
||||
else
|
||||
collect (string (code-char element)))))))
|
||||
|
||||
(defun perl-test (&key (file-name
|
||||
(make-pathname :name "perltestdata"
|
||||
:type nil :version nil
|
||||
:defaults *this-file*)
|
||||
file-name-provided-p)
|
||||
(external-format '(:latin-1 :eol-style :lf))
|
||||
verbose)
|
||||
"Loops through all test cases in FILE-NAME and prints a report if
|
||||
VERBOSE is true. EXTERNAL-FORMAT is the FLEXI-STREAMS external format
|
||||
which is used to read the file. Returns a true value if all tests
|
||||
succeeded.
|
||||
|
||||
For the syntax of the tests in FILE-NAME refer to the source code of
|
||||
this function and to the Perl script perltest.pl which generates such
|
||||
test files."
|
||||
(declare #.*standard-optimize-settings*)
|
||||
(with-open-file (binary-stream file-name :element-type 'flex:octet)
|
||||
(let ((stream (flex:make-flexi-stream binary-stream :external-format external-format))
|
||||
;; the standard Perl tests don't need full Unicode support
|
||||
(*regex-char-code-limit* (if file-name-provided-p *regex-char-code-limit* 256))
|
||||
;; we need this for the standard test suite or otherwise we
|
||||
;; might get stack overflows
|
||||
(*optimize-char-classes* (if file-name-provided-p *optimize-char-classes* :charmap))
|
||||
;; we only check for correctness and don't care about speed
|
||||
;; that match (but rather about space constraints of the
|
||||
;; trial versions)
|
||||
(*use-bmh-matchers* (if file-name-provided-p *use-bmh-matchers* nil))
|
||||
;; some tests in the Perl suite explicitly check for this
|
||||
(*allow-quoting* (if file-name-provided-p *allow-quoting* t)))
|
||||
(do-tests ((format nil "Running tests in file ~S" (file-namestring file-name))
|
||||
(not verbose))
|
||||
(let ((input-line (or (read stream nil) (done)))
|
||||
errors)
|
||||
(destructuring-bind (counter
|
||||
info-string%
|
||||
regex%
|
||||
case-insensitive-mode
|
||||
multi-line-mode
|
||||
single-line-mode
|
||||
extended-mode
|
||||
target%
|
||||
perl-error
|
||||
expected-result%
|
||||
expected-registers)
|
||||
input-line
|
||||
(destructuring-bind (info-string regex target expected-result)
|
||||
(mapcar 'create-string-from-input
|
||||
(list info-string% regex% target% expected-result%))
|
||||
(setq expected-registers (mapcar 'create-string-from-input expected-registers))
|
||||
(unless (find counter *tests-to-skip* :test #'=)
|
||||
(when verbose
|
||||
(format t "~&~4D: ~S" counter info-string))
|
||||
(let ((scanner
|
||||
(handler-bind ((error (lambda (condition)
|
||||
(declare (ignore condition))
|
||||
(when perl-error
|
||||
;; we expected an
|
||||
;; error, so we can
|
||||
;; signal success
|
||||
(return-from test-block)))))
|
||||
(create-scanner regex
|
||||
:case-insensitive-mode case-insensitive-mode
|
||||
:multi-line-mode multi-line-mode
|
||||
:single-line-mode single-line-mode
|
||||
:extended-mode extended-mode))))
|
||||
(block test-block
|
||||
(multiple-value-bind (start end reg-starts reg-ends)
|
||||
(scan scanner target)
|
||||
(cond (perl-error
|
||||
(push (format nil "expected an error but got a result.")
|
||||
errors))
|
||||
(t
|
||||
(when (not (eq start expected-result))
|
||||
(if start
|
||||
(let ((result (subseq target start end)))
|
||||
(unless (string= result expected-result)
|
||||
(push (format nil "expected ~S but got ~S."
|
||||
expected-result result)
|
||||
errors))
|
||||
(setq reg-starts (coerce reg-starts 'list)
|
||||
reg-ends (coerce reg-ends 'list))
|
||||
(loop for i from 0
|
||||
for expected-register in expected-registers
|
||||
for reg-start = (nth i reg-starts)
|
||||
for reg-end = (nth i reg-ends)
|
||||
for register = (if (and reg-start reg-end)
|
||||
(subseq target reg-start reg-end)
|
||||
nil)
|
||||
unless (string= expected-register register)
|
||||
do (push (format nil "\\~A: expected ~S but got ~S."
|
||||
(1+ i) expected-register register)
|
||||
errors)))
|
||||
(push (format nil "expected ~S but got ~S."
|
||||
expected-result start)
|
||||
errors))))))
|
||||
errors))))))))))
|
||||
160
test/perltest.pl
Normal file
160
test/perltest.pl
Normal file
@ -0,0 +1,160 @@
|
||||
#!/usr/bin/perl
|
||||
# $Header: /usr/local/cvsrep/cl-ppcre/test/perltest.pl,v 1.1 2008/07/06 21:24:39 edi Exp $
|
||||
|
||||
# This is a heavily modified version of the file 'perltest' which
|
||||
# comes with the PCRE library package, which is open source software,
|
||||
# written by Philip Hazel, and copyright by the University of
|
||||
# Cambridge, England.
|
||||
|
||||
# The PCRE library package is available from
|
||||
# <ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre/>
|
||||
|
||||
sub string_for_lisp {
|
||||
my(@a, $t, $in_string, $switch);
|
||||
|
||||
my $string = shift;
|
||||
$string =~ s/\\/\\\\/g;
|
||||
$string =~ s/"/\\"/g;
|
||||
|
||||
return "\"$string\""
|
||||
if $string =~ /^[\n\x20-\x7f]*$/;
|
||||
|
||||
$in_string = 1;
|
||||
foreach $c (split(//, $string)) {
|
||||
if (ord $c >= 32 and ord $c < 127) {
|
||||
if ($in_string) {
|
||||
$t .= $c;
|
||||
} else {
|
||||
$in_string = 1;
|
||||
$t = $c;
|
||||
}
|
||||
} else {
|
||||
if ($in_string) {
|
||||
push @a, "\"$t\"";
|
||||
$in_string = 0;
|
||||
$switch = 1;
|
||||
}
|
||||
push @a, ord $c;
|
||||
}
|
||||
}
|
||||
if ($switch) {
|
||||
if ($in_string) {
|
||||
push @a, "\"$t\"";
|
||||
}
|
||||
'(' . (join ' ', @a) . ')';
|
||||
} else {
|
||||
"\"$t\"";
|
||||
}
|
||||
}
|
||||
|
||||
NEXT_RE: while (1) {
|
||||
last
|
||||
if !($_ = <>);
|
||||
next
|
||||
if $_ eq "";
|
||||
|
||||
$pattern = $_;
|
||||
|
||||
while ($pattern !~ /^\s*(.).*\1/s) {
|
||||
last
|
||||
if !($_ = <>);
|
||||
$pattern .= $_;
|
||||
}
|
||||
|
||||
chomp($pattern);
|
||||
$pattern =~ s/\s+$//;
|
||||
$pattern =~ s/\+(?=[a-z]*$)//;
|
||||
|
||||
$multi_line_mode = ($pattern =~ /m[a-z]*$/) ? 't' : 'nil';
|
||||
$single_line_mode = ($pattern =~ /s[a-z]*$/) ? 't' : 'nil';
|
||||
$extended_mode = ($pattern =~ /x[a-z]*$/) ? 't' : 'nil';
|
||||
$case_insensitive_mode = ($pattern =~ /i[a-z]*$/) ? 't' : 'nil';
|
||||
$pattern =~ s/^(.*)g([a-z]*)$/\1\2/;
|
||||
|
||||
$pattern_for_lisp = $pattern;
|
||||
$pattern_for_lisp =~ s/[a-z]*$//;
|
||||
$pattern_for_lisp =~ s/^\s*(.)(.*)\1/$2/s;
|
||||
$pattern_for_lisp =~ s/\\/\\\\/g;
|
||||
$pattern_for_lisp =~ s/"/\\"/g;
|
||||
|
||||
$pattern = "/(?#)/$2"
|
||||
if ($pattern =~ /^(.)\1(.*)$/);
|
||||
|
||||
while (1) {
|
||||
last NEXT_RE
|
||||
if !($_ = <>);
|
||||
|
||||
chomp;
|
||||
|
||||
s/\s+$//;
|
||||
s/^\s+//;
|
||||
|
||||
last
|
||||
if ($_ eq "");
|
||||
|
||||
$info_string = string_for_lisp "\"$_\" =~ $pattern";
|
||||
$x = eval "\"$_\"";
|
||||
|
||||
@subs = ();
|
||||
|
||||
eval <<"END";
|
||||
if (\$x =~ ${pattern}) {
|
||||
push \@subs,\$&;
|
||||
push \@subs,\$1;
|
||||
push \@subs,\$2;
|
||||
push \@subs,\$3;
|
||||
push \@subs,\$4;
|
||||
push \@subs,\$5;
|
||||
push \@subs,\$6;
|
||||
push \@subs,\$7;
|
||||
push \@subs,\$8;
|
||||
push \@subs,\$9;
|
||||
push \@subs,\$10;
|
||||
push \@subs,\$11;
|
||||
push \@subs,\$12;
|
||||
push \@subs,\$13;
|
||||
push \@subs,\$14;
|
||||
push \@subs,\$15;
|
||||
push \@subs,\$16;
|
||||
}
|
||||
|
||||
\$test = sub {
|
||||
my \$times = shift;
|
||||
|
||||
my \$start = time;
|
||||
for (my \$i = 0; \$i < \$times; \$i++) {
|
||||
\$x =~ ${pattern};
|
||||
}
|
||||
return time - \$start;
|
||||
};
|
||||
END
|
||||
|
||||
$counter++;
|
||||
print STDERR "$counter\n";
|
||||
|
||||
if ($@) {
|
||||
$error = 't';
|
||||
} else {
|
||||
$error = 'nil';
|
||||
}
|
||||
|
||||
print "($counter $info_string \"$pattern_for_lisp\" $case_insensitive_mode $multi_line_mode $single_line_mode $extended_mode " . string_for_lisp($x) . " $error ";
|
||||
if (!@subs) {
|
||||
print 'nil nil';
|
||||
} else {
|
||||
print string_for_lisp($subs[0]) . ' (';
|
||||
undef $not_first;
|
||||
for ($i = 1; $i <= 16; $i++) {
|
||||
print ' '
|
||||
unless $i == 1;
|
||||
if (defined $subs[$i]) {
|
||||
print string_for_lisp $subs[$i];
|
||||
} else {
|
||||
print 'nil';
|
||||
}
|
||||
}
|
||||
print ')';
|
||||
}
|
||||
print ")\n";
|
||||
}
|
||||
}
|
||||
14288
test/perltestdata
Normal file
14288
test/perltestdata
Normal file
File diff suppressed because one or more lines are too long
3948
test/perltestinput
Normal file
3948
test/perltestinput
Normal file
File diff suppressed because it is too large
Load Diff
349
test/simple
Normal file
349
test/simple
Normal file
@ -0,0 +1,349 @@
|
||||
;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: CL-PPCRE-TEST; Base: 10 -*-
|
||||
;;; $Header: /usr/local/cvsrep/cl-ppcre/test/simple,v 1.9 2008/07/23 00:48:00 edi Exp $
|
||||
|
||||
;;; some simple tests for CL-PPCRE - entered manually and to be read
|
||||
;;; in the CL-PPCRE-TEST package; all forms are expected to return a
|
||||
;;; true value on success when EVALuated
|
||||
|
||||
(equalp (multiple-value-list (scan "(a)*b" "xaaabd"))
|
||||
(list 1 5 #(3) #(4)))
|
||||
|
||||
(equalp (multiple-value-list (scan "(a)*b" "xaaabd" :start 1))
|
||||
(list 1 5 #(3) #(4)))
|
||||
|
||||
(equalp (multiple-value-list (scan "(a)*b" "xaaabd" :start 2))
|
||||
(list 2 5 #(3) #(4)))
|
||||
|
||||
(null (scan "(a)*b" "xaaabd" :end 4))
|
||||
|
||||
(equalp (multiple-value-list (scan '(:greedy-repetition 0 nil #\b) "bbbc"))
|
||||
(list 0 3 #() #()))
|
||||
|
||||
(null (scan '(:greedy-repetition 4 6 #\b) "bbbc"))
|
||||
|
||||
(let ((s (create-scanner "(([a-c])+)x")))
|
||||
(equalp (multiple-value-list (scan s "abcxy"))
|
||||
(list 0 4 #(0 2) #(3 3))))
|
||||
|
||||
(equalp (multiple-value-list (scan-to-strings "[^b]*b" "aaabd"))
|
||||
(list "aaab" #()))
|
||||
|
||||
(equalp (multiple-value-list (scan-to-strings "([^b])*b" "aaabd"))
|
||||
(list "aaab" #("a")))
|
||||
|
||||
(equalp (multiple-value-list (scan-to-strings "(([^b])*)b" "aaabd"))
|
||||
(list "aaab" #("aaa" "a")))
|
||||
|
||||
(equalp (register-groups-bind (first second third fourth)
|
||||
("((a)|(b)|(c))+" "abababc" :sharedp t)
|
||||
(list first second third fourth))
|
||||
(list "c" "a" "b" "c"))
|
||||
|
||||
(equalp (register-groups-bind (nil second third fourth)
|
||||
("((a)|(b)|(c))()+" "abababc" :start 6)
|
||||
(list second third fourth))
|
||||
(list nil nil "c"))
|
||||
|
||||
(null (register-groups-bind (first)
|
||||
("(a|b)+" "accc" :start 1)
|
||||
first))
|
||||
|
||||
(equalp (register-groups-bind (fname lname (#'parse-integer date month year))
|
||||
("(\\w+)\\s+(\\w+)\\s+(\\d{1,2})\\.(\\d{1,2})\\.(\\d{4})" "Frank Zappa 21.12.1940")
|
||||
(list fname lname (encode-universal-time 0 0 0 date month year 0)))
|
||||
(list "Frank" "Zappa" 1292889600))
|
||||
|
||||
(flet ((foo (regex target-string &key (start 0) (end (length target-string)))
|
||||
(let ((sum 0))
|
||||
(do-matches (s e regex target-string nil :start start :end end)
|
||||
(incf sum (- e s)))
|
||||
(/ sum (- end start)))))
|
||||
(and (= 1/3 (foo "a" "abcabcabc"))
|
||||
(= 5/9 (foo "aa|b" "aacabcbbc"))))
|
||||
|
||||
(labels ((crossfoot (target-string &key (start 0) (end (length target-string)))
|
||||
(let ((sum 0))
|
||||
(do-matches-as-strings (m :digit-class
|
||||
target-string nil
|
||||
:start start :end end)
|
||||
(incf sum (parse-integer m)))
|
||||
(if (< sum 10)
|
||||
sum
|
||||
(crossfoot (format nil "~A" sum))))))
|
||||
(and (zerop (crossfoot "bar"))
|
||||
(= 3 (crossfoot "a3x"))
|
||||
(= 6 (crossfoot "12345"))))
|
||||
|
||||
(let (result)
|
||||
(do-register-groups (first second third fourth)
|
||||
("((a)|(b)|(c))" "abababc" nil :start 2 :sharedp t)
|
||||
(push (list first second third fourth) result))
|
||||
(equal (nreverse result)
|
||||
'(("a" "a" nil nil)
|
||||
("b" nil "b" nil)
|
||||
("a" "a" nil nil)
|
||||
("b" nil "b" nil)
|
||||
("c" nil nil "c"))))
|
||||
|
||||
(let (result)
|
||||
(do-register-groups ((#'parse-integer n) (#'intern sign) whitespace)
|
||||
("(\\d+)|(\\+|-|\\*|/)|(\\s+)" "12*15 - 42/3")
|
||||
(unless whitespace
|
||||
(push (or n sign) result)))
|
||||
(equal (nreverse result)
|
||||
'(12 * 15 - 42 / 3)))
|
||||
|
||||
(equal (all-matches "a" "foo bar baz")
|
||||
(list 5 6 9 10))
|
||||
|
||||
(equal (all-matches "\\w*" "foo bar baz")
|
||||
(list 0 3 3 3 4 7 7 7 8 11 11 11))
|
||||
|
||||
(equal (all-matches-as-strings "a" "foo bar baz")
|
||||
(list "a" "a"))
|
||||
|
||||
(equal (all-matches-as-strings "\\w*" "foo bar baz")
|
||||
(list "foo" "" "bar" "" "baz" ""))
|
||||
|
||||
(equal (split "\\s+" "foo bar baz
|
||||
frob")
|
||||
'("foo" "bar" "baz" "frob"))
|
||||
|
||||
(equal (split "\\s*" "foo bar baz")
|
||||
'("f" "o" "o" "b" "a" "r" "b" "a" "z"))
|
||||
|
||||
(equal (split "(\\s+)" "foo bar baz")
|
||||
'("foo" "bar" "baz"))
|
||||
|
||||
(equal (split "(\\s+)" "foo bar baz" :with-registers-p t)
|
||||
'("foo" " " "bar" " " "baz"))
|
||||
|
||||
(equal (split "(\\s)(\\s*)" "foo bar baz" :with-registers-p t)
|
||||
'("foo" " " "" "bar" " " " " "baz"))
|
||||
|
||||
(equal (split "(,)|(;)" "foo,bar;baz" :with-registers-p t)
|
||||
'("foo" "," nil "bar" nil ";" "baz"))
|
||||
|
||||
(equal (split "(,)|(;)" "foo,bar;baz" :with-registers-p t :omit-unmatched-p t)
|
||||
'("foo" "," "bar" ";" "baz"))
|
||||
|
||||
(equal (split ":" "a:b:c:d:e:f:g::")
|
||||
'("a" "b" "c" "d" "e" "f" "g"))
|
||||
|
||||
(equal (split ":" "a:b:c:d:e:f:g::" :limit 1)
|
||||
'("a:b:c:d:e:f:g::"))
|
||||
|
||||
(equal (split ":" "a:b:c:d:e:f:g::" :limit 2)
|
||||
'("a" "b:c:d:e:f:g::"))
|
||||
|
||||
(equal (split ":" "a:b:c:d:e:f:g::" :limit 3)
|
||||
'("a" "b" "c:d:e:f:g::"))
|
||||
|
||||
(equal (split ":" "a:b:c:d:e:f:g::" :limit 1000)
|
||||
'("a" "b" "c" "d" "e" "f" "g" "" ""))
|
||||
|
||||
(equal (multiple-value-list (regex-replace "fo+" "foo bar" "frob"))
|
||||
(list "frob bar" t))
|
||||
|
||||
(equal (multiple-value-list (regex-replace "fo+" "FOO bar" "frob"))
|
||||
(list "FOO bar" nil))
|
||||
|
||||
(equal (multiple-value-list (regex-replace "(?i)fo+" "FOO bar" "frob"))
|
||||
(list "frob bar" t))
|
||||
|
||||
(equal (multiple-value-list (regex-replace "(?i)fo+" "FOO bar" "frob" :preserve-case t))
|
||||
(list "FROB bar" t))
|
||||
|
||||
(equal (multiple-value-list (regex-replace "(?i)fo+" "Foo bar" "frob" :preserve-case t))
|
||||
(list "Frob bar" t))
|
||||
|
||||
(equal (multiple-value-list (regex-replace "bar" "foo bar baz" "[frob (was '\\&' between '\\`' and '\\'')]"))
|
||||
(list "foo [frob (was 'bar' between 'foo ' and ' baz')] baz" t))
|
||||
|
||||
(equal (multiple-value-list
|
||||
(regex-replace "bar" "foo bar baz"
|
||||
'("[frob (was '" :match "' between '" :before-match "' and '" :after-match "')]")))
|
||||
(list "foo [frob (was 'bar' between 'foo ' and ' baz')] baz" t))
|
||||
|
||||
(equal (multiple-value-list (regex-replace "(be)(nev)(o)(lent)"
|
||||
"benevolent: adj. generous, kind"
|
||||
(lambda (match &rest registers)
|
||||
(format nil "~A [~{~A~^.~}]" match registers))
|
||||
:simple-calls t))
|
||||
(list "benevolent [be.nev.o.lent]: adj. generous, kind" t))
|
||||
|
||||
(equal (multiple-value-list (regex-replace-all "(?i)fo+" "foo Fooo FOOOO bar" "frob" :preserve-case t))
|
||||
(list "frob Frob FROB bar" t))
|
||||
|
||||
(string= (regex-replace-all "(?i)f(o+)" "foo Fooo FOOOO bar" "fr\\1b" :preserve-case t)
|
||||
"froob Frooob FROOOOB bar")
|
||||
|
||||
(let ((qp-regex (create-scanner "[\\x80-\\xff]")))
|
||||
(flet ((encode-quoted-printable (string)
|
||||
"Converts 8-bit string to quoted-printable representation."
|
||||
;; won't work for Corman Lisp because non-ASCII characters aren't 8-bit there
|
||||
(flet ((convert (target-string start end match-start match-end reg-starts reg-ends)
|
||||
(declare (ignore start end match-end reg-starts reg-ends))
|
||||
(format nil "=~2,'0x" (char-code (char target-string match-start)))))
|
||||
(regex-replace-all qp-regex string #'convert))))
|
||||
(string= (encode-quoted-printable "F<>te S<>rensen na<6E>ve H<>hner Stra<72>e")
|
||||
"F=EAte S=F8rensen na=EFve H=FChner Stra=DFe")))
|
||||
|
||||
(let ((url-regex (create-scanner "[^a-zA-Z0-9_\\-.]")))
|
||||
(flet ((url-encode (string)
|
||||
"URL-encodes a string."
|
||||
;; won't work for Corman Lisp because non-ASCII characters aren't 8-bit there
|
||||
(flet ((convert (target-string start end match-start match-end reg-starts reg-ends)
|
||||
(declare (ignore start end match-end reg-starts reg-ends))
|
||||
(format nil "%~2,'0x" (char-code (char target-string match-start)))))
|
||||
(regex-replace-all url-regex string #'convert))))
|
||||
(string= (url-encode "F<>te S<>rensen na<6E>ve H<>hner Stra<72>e")
|
||||
"F%EAte%20S%F8rensen%20na%EFve%20H%FChner%20Stra%DFe")))
|
||||
|
||||
(flet ((how-many (target-string start end match-start match-end reg-starts reg-ends)
|
||||
(declare (ignore target-string start end match-start match-end))
|
||||
(format nil "~A" (- (svref reg-ends 0)
|
||||
(svref reg-starts 0)))))
|
||||
(string= (regex-replace-all "{(.+?)}"
|
||||
"foo{...}bar{.....}{..}baz{....}frob"
|
||||
(list "[" #'how-many " dots]"))
|
||||
"foo[3 dots]bar[5 dots][2 dots]baz[4 dots]frob"))
|
||||
|
||||
(let ((qp-regex (create-scanner "[\\x80-\\xff]")))
|
||||
(flet ((encode-quoted-printable (string)
|
||||
"Converts 8-bit string to quoted-printable representation.
|
||||
Version using SIMPLE-CALLS keyword argument."
|
||||
;; ;; won't work for Corman Lisp because non-ASCII characters aren't 8-bit there
|
||||
(flet ((convert (match)
|
||||
(format nil "=~2,'0x" (char-code (char match 0)))))
|
||||
(regex-replace-all qp-regex string #'convert
|
||||
:simple-calls t))))
|
||||
(string= (encode-quoted-printable "F<>te S<>rensen na<6E>ve H<>hner Stra<72>e")
|
||||
"F=EAte S=F8rensen na=EFve H=FChner Stra=DFe")))
|
||||
|
||||
(flet ((how-many (match first-register)
|
||||
(declare (ignore match))
|
||||
(format nil "~A" (length first-register))))
|
||||
(string= (regex-replace-all "{(.+?)}"
|
||||
"foo{...}bar{.....}{..}baz{....}frob"
|
||||
(list "[" #'how-many " dots]")
|
||||
:simple-calls t)
|
||||
"foo[3 dots]bar[5 dots][2 dots]baz[4 dots]frob"))
|
||||
|
||||
(flet ((my-repetition (char min)
|
||||
`(:greedy-repetition ,min nil ,char)))
|
||||
(setf (parse-tree-synonym 'a*) (my-repetition #\a 0)
|
||||
(parse-tree-synonym 'b+) (my-repetition #\b 1))
|
||||
(unwind-protect
|
||||
(let ((scanner (create-scanner '(:sequence a* b+))))
|
||||
(equal (mapcar (lambda (target)
|
||||
(scan scanner target))
|
||||
'("ab" "b" "aab" "a" "x"))
|
||||
(list 0 0 0 nil nil)))
|
||||
(setf (parse-tree-synonym 'a*) nil
|
||||
(parse-tree-synonym 'b+) nil)))
|
||||
|
||||
(null (scan "^a+$" "a+"))
|
||||
|
||||
(let ((*allow-quoting* t))
|
||||
;;we use CREATE-SCANNER because of Lisps like SBCL that don't have an interpreter
|
||||
(equalp (multiple-value-list (scan (create-scanner "^\\Qa+\\E$") "a+"))
|
||||
(list 0 2 #() #())))
|
||||
|
||||
(string= (parse-string "\\k<reg>") "k<reg>")
|
||||
|
||||
(let ((*allow-named-registers* t))
|
||||
(equal (nth-value 1 (create-scanner "((?<small>[a-z]*)(?<big>[A-Z]*))"))
|
||||
(list nil "small" "big")))
|
||||
|
||||
(let ((*allow-named-registers* t))
|
||||
(equal (nth-value 1 (create-scanner '(:register
|
||||
(:sequence
|
||||
(:named-register "small"
|
||||
(:greedy-repetition 0 nil (:char-class (:range #\a #\z))))
|
||||
(:named-register "big"
|
||||
(:greedy-repetition 0 nil (:char-class (:range #\a #\z))))))))
|
||||
(list nil "small" "big")))
|
||||
|
||||
(let ((*allow-named-registers* t))
|
||||
(equalp (multiple-value-list (scan (create-scanner "((?<small>[a-z]*)(?<big>[A-Z]*))") "aaaBBB"))
|
||||
(list 0 6 #(0 0 3) #(6 3 6))))
|
||||
|
||||
(let ((*allow-named-registers* t))
|
||||
;; multiple-choice back-reference
|
||||
(equalp (multiple-value-list (scan (create-scanner "^(?<reg>[ab])(?<reg>[12])\\k<reg>\\k<reg>$") "a1aa"))
|
||||
(list 0 4 #(0 1) #(1 2))))
|
||||
|
||||
(let ((*allow-named-registers* t))
|
||||
(equalp (multiple-value-list (scan (create-scanner "^(?<reg>[ab])(?<reg>[12])\\k<reg>\\k<reg>$") "a22a"))
|
||||
(list 0 4 #(0 1) #(1 2))))
|
||||
|
||||
(let ((*allow-named-registers* t))
|
||||
;; demonstrating most-recently-seen-register-first property of back-reference;
|
||||
;; "greedy" regex (analogous to "aa?")
|
||||
(equalp (multiple-value-list (scan (create-scanner "^(?<reg>)(?<reg>a)(\\k<reg>)") "a"))
|
||||
(list 0 1 #(0 0 1) #(0 1 1))))
|
||||
|
||||
(let ((*allow-named-registers* t))
|
||||
(equalp (multiple-value-list (scan (create-scanner "^(?<reg>)(?<reg>a)(\\k<reg>)") "aa"))
|
||||
(list 0 2 #(0 0 1) #(0 1 2))))
|
||||
|
||||
(let ((*allow-named-registers* t))
|
||||
;; switched groups
|
||||
;; "lazy" regex (analogous to "aa??")
|
||||
(equalp (multiple-value-list (scan (create-scanner "^(?<reg>a)(?<reg>)(\\k<reg>)") "a"))
|
||||
(list 0 1 #(0 1 1) #(1 1 1))))
|
||||
|
||||
(let ((*allow-named-registers* t))
|
||||
;; scanner ignores the second "a"
|
||||
(equalp (multiple-value-list (scan (create-scanner "^(?<reg>a)(?<reg>)(\\k<reg>)") "aa"))
|
||||
(list 0 1 #(0 1 1) #(1 1 1))))
|
||||
|
||||
(let ((*allow-named-registers* t))
|
||||
;; "aa" will be matched only when forced by adding "$" at the end
|
||||
(equalp (multiple-value-list (scan (create-scanner "^(?<reg>a)(?<reg>)(\\k<reg>)$") "aa"))
|
||||
(list 0 2 #(0 1 1) #(1 1 2))))
|
||||
|
||||
(string= (quote-meta-chars "[a-z]*") "\\[a\\-z\\]\\*")
|
||||
|
||||
(string= (handler-case
|
||||
(create-scanner "foo**x")
|
||||
(ppcre-syntax-error (condition)
|
||||
(format nil "Houston, we've got a problem with the string ~S: Looks like something went wrong at position ~A. The last message we received was \"~?\"."
|
||||
(ppcre-syntax-error-string condition)
|
||||
(ppcre-syntax-error-pos condition)
|
||||
(simple-condition-format-control condition)
|
||||
(simple-condition-format-arguments condition))))
|
||||
"Houston, we've got a problem with the string \"foo**x\": Looks like something went wrong at position 4. The last message we received was \"Quantifier '*' not allowed.\".")
|
||||
|
||||
(flet ((my-weird-filter (pos)
|
||||
"Only match at this point if either pos is odd and the
|
||||
character we're looking at is lowercase or if pos is even and the next
|
||||
two characters we're looking at are uppercase. Consume these
|
||||
characters if there's a match."
|
||||
(cond ((and (oddp pos)
|
||||
(< pos cl-ppcre::*end-pos*)
|
||||
(lower-case-p (char cl-ppcre::*string* pos)))
|
||||
(1+ pos))
|
||||
((and (evenp pos)
|
||||
(< (1+ pos) cl-ppcre::*end-pos*)
|
||||
(upper-case-p (char cl-ppcre::*string* pos))
|
||||
(upper-case-p (char cl-ppcre::*string* (1+ pos))))
|
||||
(+ pos 2))
|
||||
(t nil))))
|
||||
(let ((weird-regex `(:sequence "+" (:filter ,#'my-weird-filter) "+")))
|
||||
(equalp (multiple-value-list (scan weird-regex "+A++a+AA+"))
|
||||
(list 5 9 #() #()))))
|
||||
|
||||
(let ((a "\\E*"))
|
||||
(equalp (multiple-value-list (scan (concatenate 'string "(?:" (quote-meta-chars a) "){2}") "\\E*\\E*"))
|
||||
(list 0 6 #() #())))
|
||||
|
||||
(let ((a "\\E*"))
|
||||
(equalp (multiple-value-list (scan `(:greedy-repetition 2 2 ,a) "\\E*\\E*"))
|
||||
(list 0 6 #() #())))
|
||||
|
||||
(loop for *optimize-char-classes* in '(:hash-table :hash-table* :charset :charset* :charmap)
|
||||
for s = (create-scanner "(([a-c])+)x")
|
||||
always (equalp (multiple-value-list (scan s "abcxy"))
|
||||
(list 0 4 #(0 2) #(3 3))))
|
||||
159
test/tests.lisp
Normal file
159
test/tests.lisp
Normal file
@ -0,0 +1,159 @@
|
||||
;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: CL-PPCRE-TEST; Base: 10 -*-
|
||||
;;; $Header: /usr/local/cvsrep/cl-ppcre/test/tests.lisp,v 1.12 2008/07/23 00:48:00 edi Exp $
|
||||
|
||||
;;; The tests in this file test CL-PPCRE against testdata generated by
|
||||
;;; the Perl program `perltest.pl' from the input file `testinput' in
|
||||
;;; order to check compatibility with Perl and correctness of the
|
||||
;;; regex engine.
|
||||
|
||||
;;; Copyright (c) 2002-2008, Dr. Edmund Weitz. All rights reserved.
|
||||
|
||||
;;; Redistribution and use in source and binary forms, with or without
|
||||
;;; modification, are permitted provided that the following conditions
|
||||
;;; are met:
|
||||
|
||||
;;; * Redistributions of source code must retain the above copyright
|
||||
;;; notice, this list of conditions and the following disclaimer.
|
||||
|
||||
;;; * Redistributions in binary form must reproduce the above
|
||||
;;; copyright notice, this list of conditions and the following
|
||||
;;; disclaimer in the documentation and/or other materials
|
||||
;;; provided with the distribution.
|
||||
|
||||
;;; THIS SOFTWARE IS PROVIDED BY THE AUTHOR 'AS IS' AND ANY EXPRESSED
|
||||
;;; OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
;;; WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
;;; ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
|
||||
;;; DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
;;; DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
|
||||
;;; GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
;;; INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
|
||||
;;; WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
;;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
;;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
(in-package :cl-ppcre-test)
|
||||
|
||||
(defvar *this-file* (load-time-value
|
||||
(or #.*compile-file-pathname* *load-pathname*))
|
||||
"The location of this source file.")
|
||||
|
||||
(defmacro do-tests ((name &optional show-progress-p) &body body)
|
||||
"Helper macro which repeatedly executes BODY until the code in body
|
||||
calls the function DONE. It is assumed that each invocation of BODY
|
||||
will be the execution of one test which returns NIL in case of success
|
||||
and list of string describing errors otherwise.
|
||||
|
||||
The macro prints a simple progress indicator \(one dots for ten tests)
|
||||
to *STANDARD-OUTPUT* unless SHOW-PROGRESS-P is NIL and returns a true
|
||||
value iff all tests succeeded. Errors in BODY are caught and reported
|
||||
\(and counted as failures)."
|
||||
`(let ((successp t)
|
||||
(testcount 1))
|
||||
(block test-block
|
||||
(flet ((done ()
|
||||
(return-from test-block successp)))
|
||||
(format t "~&Test: ~A~%" ,name)
|
||||
(loop
|
||||
(when (and ,show-progress-p (zerop (mod testcount 10)))
|
||||
(format t ".")
|
||||
(when (zerop (mod testcount 100))
|
||||
(terpri))
|
||||
(force-output))
|
||||
(let ((errors
|
||||
(handler-case
|
||||
(progn ,@body)
|
||||
(error (msg)
|
||||
(list (format nil "~&got an unexpected error: ~A" msg))))))
|
||||
(setq successp (and successp (null errors)))
|
||||
(when errors
|
||||
(format t "~&~4@A:~{~& ~A~}~%" testcount errors))
|
||||
(incf testcount)))))
|
||||
successp))
|
||||
|
||||
(defun simple-tests (&key (file-name
|
||||
(make-pathname :name "simple"
|
||||
:type nil :version nil
|
||||
:defaults *this-file*))
|
||||
(external-format '(:latin-1 :eol-style :lf))
|
||||
verbose)
|
||||
"Loops through all the forms in the file FILE-NAME and executes each
|
||||
of them using EVAL. It is assumed that each FORM specifies a test
|
||||
which returns a true value iff it succeeds. Prints each test form to
|
||||
*STANDARD-OUTPUT* if VERBOSE is true and shows a simple progress
|
||||
indicator otherwise. EXTERNAL-FORMAT is the FLEXI-STREAMS external
|
||||
format which is used to read the file. Returns a true value iff all
|
||||
tests succeeded."
|
||||
(with-open-file (binary-stream file-name :element-type 'flex:octet)
|
||||
(let ((stream (flex:make-flexi-stream binary-stream :external-format external-format))
|
||||
(*package* (find-package :cl-ppcre-test)))
|
||||
(do-tests ((format nil "Simple tests from file ~S" (file-namestring file-name))
|
||||
(not verbose))
|
||||
(let ((form (or (read stream nil) (done))))
|
||||
(when verbose
|
||||
(format t "~&~S" form))
|
||||
(cond ((eval form) nil)
|
||||
(t (list (format nil "~S returned NIL" form)))))))))
|
||||
|
||||
(defun random-test-function (probability)
|
||||
"Returns a random character test function which contains each
|
||||
character with probability PROBABILITY."
|
||||
(let ((hash-table (make-hash-table)))
|
||||
(dotimes (code char-code-limit)
|
||||
(let ((char (code-char code)))
|
||||
(when (and char (< (random 1.0d0) probability))
|
||||
(setf (gethash (code-char code) hash-table) t))))
|
||||
(lambda (char)
|
||||
(gethash char hash-table))))
|
||||
|
||||
(defun test-optimized-test-functions% (probability)
|
||||
"Creates a random test function with probability PROBABILITY and six
|
||||
\(one for each possible \"kind\") corresponding optimized test
|
||||
functions, then checks for each character in turn that all functions
|
||||
agree on it."
|
||||
(let* ((test-function (random-test-function probability))
|
||||
(optimized-functions (loop for kind in '(nil
|
||||
:hash-table
|
||||
:hash-table*
|
||||
:charset
|
||||
:charset*
|
||||
:charmap)
|
||||
collect (create-optimized-test-function test-function :kind kind))))
|
||||
(loop for code below char-code-limit
|
||||
for char = (code-char code)
|
||||
for expected-result = (and char (funcall test-function char))
|
||||
always (or (null char)
|
||||
(loop for optimized-function in optimized-functions
|
||||
always (eq (not (funcall optimized-function char))
|
||||
(not expected-result)))))))
|
||||
|
||||
(defun test-optimized-test-functions (&key verbose)
|
||||
"Runs TEST-OPTIMIZED-TEST-FUNCTIONS% with different probabilities."
|
||||
(let ((probabilities '(0 .001 .01 .1 1)))
|
||||
(do-tests ("Optimized test functions - this might take some time..." (not verbose))
|
||||
(let ((probability (or (pop probabilities) (done))))
|
||||
(when verbose
|
||||
(format t "~&Probability is ~A" probability))
|
||||
(not (test-optimized-test-functions% probability))))))
|
||||
|
||||
(defun run-all-tests (&key more-tests verbose)
|
||||
"Runs all tests for CL-PPCRE and returns a true value iff all tests
|
||||
succeeded. VERBOSE is interpreted by the individual test suites.
|
||||
MORE-TESTS can be a list of function designators designating
|
||||
additional tests to run. This facility is used by the tests for
|
||||
CL-PPCRE-UNICODE."
|
||||
(let ((successp t))
|
||||
(macrolet ((run-test-suite (&body body)
|
||||
`(unless (progn ,@body)
|
||||
(setq successp nil))))
|
||||
;; run the automatically generated Perl tests
|
||||
(run-test-suite (perl-test :verbose verbose))
|
||||
(run-test-suite (test-optimized-test-functions :verbose verbose))
|
||||
(run-test-suite (simple-tests :verbose verbose))
|
||||
(when more-tests
|
||||
(unless (listp more-tests)
|
||||
(setq more-tests (list more-tests))
|
||||
(dolist (test more-tests)
|
||||
(run-test-suite (funcall test :verbose verbose))))))
|
||||
(format t "~2&~:[Some tests failed~;All tests passed~]." successp)
|
||||
successp))
|
||||
80
test/unicode-tests.lisp
Normal file
80
test/unicode-tests.lisp
Normal file
@ -0,0 +1,80 @@
|
||||
;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: CL-PPCRE-TEST; Base: 10 -*-
|
||||
;;; $Header: /usr/local/cvsrep/cl-ppcre/test/unicode-tests.lisp,v 1.8 2008/07/23 00:17:53 edi Exp $
|
||||
|
||||
;;; Copyright (c) 2008, Dr. Edmund Weitz. All rights reserved.
|
||||
|
||||
;;; Redistribution and use in source and binary forms, with or without
|
||||
;;; modification, are permitted provided that the following conditions
|
||||
;;; are met:
|
||||
|
||||
;;; * Redistributions of source code must retain the above copyright
|
||||
;;; notice, this list of conditions and the following disclaimer.
|
||||
|
||||
;;; * Redistributions in binary form must reproduce the above
|
||||
;;; copyright notice, this list of conditions and the following
|
||||
;;; disclaimer in the documentation and/or other materials
|
||||
;;; provided with the distribution.
|
||||
|
||||
;;; THIS SOFTWARE IS PROVIDED BY THE AUTHOR 'AS IS' AND ANY EXPRESSED
|
||||
;;; OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
;;; WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
;;; ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
|
||||
;;; DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
;;; DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
|
||||
;;; GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
;;; INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
|
||||
;;; WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
;;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
;;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
(in-package :cl-ppcre-test)
|
||||
|
||||
(defun unicode-test (&key (file-name
|
||||
(make-pathname :name "unicodetestdata"
|
||||
:type nil :version nil
|
||||
:defaults *this-file*)
|
||||
file-name-provided-p)
|
||||
verbose)
|
||||
"Loops through all test cases in FILE-NAME and prints a report if
|
||||
VERBOSE is true. Returns a true value if all tests succeeded.
|
||||
|
||||
For the syntax of the tests in FILE-NAME refer to CL-UNICODE."
|
||||
(with-open-file (stream file-name)
|
||||
(let ((*regex-char-code-limit* (if file-name-provided-p *regex-char-code-limit* char-code-limit))
|
||||
(*optimize-char-classes* (if file-name-provided-p *optimize-char-classes* nil))
|
||||
;; we only check for correctness and don't care about speed
|
||||
;; that match (but rather about space constraints of the
|
||||
;; trial versions)
|
||||
(*use-bmh-matchers* (if file-name-provided-p *use-bmh-matchers* nil)))
|
||||
(do-tests ((format nil "Running Unicode tests in file ~S" (file-namestring file-name))
|
||||
(not verbose))
|
||||
(let ((input-line (or (read stream nil) (done)))
|
||||
errors)
|
||||
(destructuring-bind (char-code property-name expected-result)
|
||||
input-line
|
||||
(let ((char (and (< char-code char-code-limit) (code-char char-code))))
|
||||
(when char
|
||||
(when verbose
|
||||
(format t "~&~A: #x~X" property-name char-code))
|
||||
(let* ((string (string char))
|
||||
(result-1 (scan (format nil "\\p{~A}" property-name) string))
|
||||
(result-2 (scan (format nil "[\\p{~A}]" property-name) string))
|
||||
(inverted-result-1 (scan (format nil "\\P{~A}" property-name) string))
|
||||
(inverted-result-2 (scan (format nil "[\\P{~A}]" property-name) string)))
|
||||
(unless (eq expected-result (not (not result-1)))
|
||||
(push (format nil "\(code-char #x~X) should ~:[not ~;~]have matched \"\\p{~A}\""
|
||||
char-code expected-result property-name)
|
||||
errors))
|
||||
(unless (eq expected-result (not (not result-2)))
|
||||
(push (format nil "\(code-char #x~X) should ~:[not ~;~]have matched \"[\\p{~A}]\""
|
||||
char-code expected-result property-name)
|
||||
errors))
|
||||
(unless (eq expected-result (not inverted-result-1))
|
||||
(push (format nil "\(code-char #x~X) should ~:[~;not ~]have matched \"\\P{~A}\""
|
||||
char-code expected-result property-name)
|
||||
errors))
|
||||
(unless (eq expected-result (not inverted-result-2))
|
||||
(push (format nil "\(code-char #x~X) should ~:[~;not ~]have matched \"[\\P{~A}]\""
|
||||
char-code expected-result property-name)
|
||||
errors)))
|
||||
errors))))))))
|
||||
107
test/unicodetestdata
Normal file
107
test/unicodetestdata
Normal file
@ -0,0 +1,107 @@
|
||||
;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: CL-USER; Base: 10 -*-
|
||||
;;; $Header: /usr/local/cvsrep/cl-ppcre/test/unicodetestdata,v 1.10 2008/07/22 14:00:35 edi Exp $
|
||||
|
||||
;;; some arbitrary test data for Unicode properties - stolen from CL-UNICODE
|
||||
|
||||
(#x0001 "ASCII" t)
|
||||
(#x0100 "ASCII" nil)
|
||||
(#x000A "Alphabetic" nil)
|
||||
(#x0061 "Alphabetic" t)
|
||||
(#x0061 "Ll" t)
|
||||
(#x0041 "Alphabetic" t)
|
||||
(#x0041 "alphabetic" t)
|
||||
(#x0041 "IsAlphabetic" t)
|
||||
(#x02E4 "Alphabetic" t)
|
||||
(#x0970 "Alphabetic" nil)
|
||||
(#x030D "BidiClass:NonspacingMark" t)
|
||||
(#x030D "NonspacingMark" t)
|
||||
(#x030D "nonspacing mark" t)
|
||||
(#xE0146 "BidiClass:NonspacingMark" t)
|
||||
(#x000D "BidiClass:WhiteSpace" nil)
|
||||
(#x0020 "BidiClass:WhiteSpace" t)
|
||||
(#x2006 "BidiClass:WhiteSpace" t)
|
||||
(#x12470 "Cuneiform" t)
|
||||
(#x12470 "IsCuneiform" t)
|
||||
(#x12470 "CuneiformNumbersAndPunctuation" t)
|
||||
(#x12470 "Block:CuneiformNumbersAndPunctuation" t)
|
||||
(#x12470 "InCuneiformNumbersAndPunctuation" t)
|
||||
(#x12470 "Script:Cuneiform" t)
|
||||
(#x0041 "Block:Hebrew" nil)
|
||||
(#x0593 "Block:Hebrew" t)
|
||||
(#x0593 "InHebrew" t)
|
||||
(#x040D "Block:Cyrillic" t)
|
||||
(#x040D "InCyrillic" t)
|
||||
(#x0042 "Block:Cyrillic" nil)
|
||||
(#x2011 "Dash" t)
|
||||
(#x2011 "IsDash" t)
|
||||
(#xFF0D "Dash" t)
|
||||
(#x003D "Dash" nil)
|
||||
(#x00F0 "Lowercase" t)
|
||||
(#x00F0 "IsLowercase" t)
|
||||
(#x00F0 "lowercase" t)
|
||||
(#x00F0 "Ll" t)
|
||||
(#x0067 "Lowercase" t)
|
||||
(#x010A "Lowercase" nil)
|
||||
(#x1D6C1 "Lowercase" nil)
|
||||
(#x0023 "CurrencySymbol" nil)
|
||||
(#x0024 "CurrencySymbol" t)
|
||||
(#x0024 "IsCurrencySymbol" t)
|
||||
(#x0024 "currency symbol" t)
|
||||
(#x20AC "CurrencySymbol" t)
|
||||
(#xFFE6 "CurrencySymbol" t)
|
||||
(#x002B "Sm" t)
|
||||
(#x002B "Math" t)
|
||||
(#x002B "IsMath" t)
|
||||
(#x002B "math" t)
|
||||
(#x211C "Math" t)
|
||||
(#x1D7D2 "Math" t)
|
||||
(#x002A "Math" nil)
|
||||
(#x25C9 "Math" nil)
|
||||
(#x0000 "NonCharacterCodePoint" nil)
|
||||
(#xFDD0 "NonCharacterCodePoint" t)
|
||||
(#xFDD0 "Non-Character-Code-Point" t)
|
||||
(#xFDD0 "non-character-code-point" t)
|
||||
(#xFFFFF "NonCharacterCodePoint" t)
|
||||
(#x0043 "Arabic" nil)
|
||||
(#x0606 "Arabic" t)
|
||||
(#x0606 "arabic" t)
|
||||
(#x0606 "IsArabic" t)
|
||||
(#x0606 "Script:Arabic" t)
|
||||
(#x0044 "IsVariationSelector" nil)
|
||||
(#x0044 "VariationSelector" nil)
|
||||
(#x180B "VariationSelector" t)
|
||||
(#x180B "Variation_Selector" t)
|
||||
(#x180B "Variation-Selector" t)
|
||||
(#x180B "variationselector" t)
|
||||
(#x180B "variation selector" t)
|
||||
(#x180B "IsVariationSelector" t)
|
||||
(#x00B5 "XIDContinue" t)
|
||||
(#x00B5 "IsXIDContinue" t)
|
||||
(#x00B5 "IsXID_Continue" t)
|
||||
(#x00B5 "Is_XID_Continue" t)
|
||||
(#x00B5 "XID_Continue" t)
|
||||
(#x33FF "Unified_Ideograph" nil)
|
||||
(#x33FF "Ideographic" nil)
|
||||
(#x3400 "Unified_Ideograph" t)
|
||||
(#x3400 "Ideographic" t)
|
||||
(#x3400 "Han" t)
|
||||
(#x3400 "OtherLetter" t)
|
||||
(#x3400 "Alphabetic" t)
|
||||
(#x3400 "Common" nil)
|
||||
(#x3400 "Assigned" t)
|
||||
(#x3400 "Any" t)
|
||||
(#x0378 "Cn" t)
|
||||
(#x0378 "Unassigned" t)
|
||||
(#x0377 "Cn" nil)
|
||||
(#x0377 "Unassigned" nil)
|
||||
(#x2800 "Braille" t)
|
||||
(#x2800 "Script:Braille" t)
|
||||
(#x2800 "OtherSymbol" t)
|
||||
(#x0027 "QuotationMark" t)
|
||||
(#x201C "QuotationMark" t)
|
||||
(#x201C "OtherNeutral" t)
|
||||
(#x201C "PatternSyntax" t)
|
||||
(#x0028 "Bidi_Mirrored" t)
|
||||
(#x0028 "BidiMirrored" t)
|
||||
(#x0028 "IsBidiMirrored" t)
|
||||
(#x0027 "Bidi_Mirrored" nil)
|
||||
Reference in New Issue
Block a user