366 lines
15 KiB
Common Lisp
366 lines
15 KiB
Common Lisp
;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: CL-PPCRE-TEST; Base: 10 -*-
|
||
;;; $Header: /usr/local/cvsrep/cl-ppcre/test/simple,v 1.9 2008/07/23 00:48:00 edi Exp $
|
||
|
||
;;; some simple tests for CL-PPCRE - entered manually and to be read
|
||
;;; in the CL-PPCRE-TEST package; all forms are expected to return a
|
||
;;; true value on success when EVALuated
|
||
|
||
(equalp (multiple-value-list (scan "(a)*b" "xaaabd"))
|
||
(list 1 5 #(3) #(4)))
|
||
|
||
(equalp (multiple-value-list (scan "(a)*b" "xaaabd" :start 1))
|
||
(list 1 5 #(3) #(4)))
|
||
|
||
(equalp (multiple-value-list (scan "(a)*b" "xaaabd" :start 2))
|
||
(list 2 5 #(3) #(4)))
|
||
|
||
(null (scan "(a)*b" "xaaabd" :end 4))
|
||
|
||
(equalp (multiple-value-list (scan '(:greedy-repetition 0 nil #\b) "bbbc"))
|
||
(list 0 3 #() #()))
|
||
|
||
(null (scan '(:greedy-repetition 4 6 #\b) "bbbc"))
|
||
|
||
(let ((s (create-scanner "(([a-c])+)x")))
|
||
(equalp (multiple-value-list (scan s "abcxy"))
|
||
(list 0 4 #(0 2) #(3 3))))
|
||
|
||
(equalp (multiple-value-list (scan-to-strings "[^b]*b" "aaabd"))
|
||
(list "aaab" #()))
|
||
|
||
(equalp (multiple-value-list (scan-to-strings "([^b])*b" "aaabd"))
|
||
(list "aaab" #("a")))
|
||
|
||
(equalp (multiple-value-list (scan-to-strings "(([^b])*)b" "aaabd"))
|
||
(list "aaab" #("aaa" "a")))
|
||
|
||
(equalp (register-groups-bind (first second third fourth)
|
||
("((a)|(b)|(c))+" "abababc" :sharedp t)
|
||
(list first second third fourth))
|
||
(list "c" "a" "b" "c"))
|
||
|
||
(equalp (register-groups-bind (nil second third fourth)
|
||
("((a)|(b)|(c))()+" "abababc" :start 6)
|
||
(list second third fourth))
|
||
(list nil nil "c"))
|
||
|
||
(null (register-groups-bind (first)
|
||
("(a|b)+" "accc" :start 1)
|
||
first))
|
||
|
||
(equalp (register-groups-bind (fname lname (#'parse-integer date month year))
|
||
("(\\w+)\\s+(\\w+)\\s+(\\d{1,2})\\.(\\d{1,2})\\.(\\d{4})" "Frank Zappa 21.12.1940")
|
||
(list fname lname (encode-universal-time 0 0 0 date month year 0)))
|
||
(list "Frank" "Zappa" 1292889600))
|
||
|
||
(flet ((foo (regex target-string &key (start 0) (end (length target-string)))
|
||
(let ((sum 0))
|
||
(do-matches (s e regex target-string nil :start start :end end)
|
||
(incf sum (- e s)))
|
||
(/ sum (- end start)))))
|
||
(and (= 1/3 (foo "a" "abcabcabc"))
|
||
(= 5/9 (foo "aa|b" "aacabcbbc"))))
|
||
|
||
(labels ((crossfoot (target-string &key (start 0) (end (length target-string)))
|
||
(let ((sum 0))
|
||
(do-matches-as-strings (m :digit-class
|
||
target-string nil
|
||
:start start :end end)
|
||
(incf sum (parse-integer m)))
|
||
(if (< sum 10)
|
||
sum
|
||
(crossfoot (format nil "~A" sum))))))
|
||
(and (zerop (crossfoot "bar"))
|
||
(= 3 (crossfoot "a3x"))
|
||
(= 6 (crossfoot "12345"))))
|
||
|
||
(let (result)
|
||
(do-register-groups (first second third fourth)
|
||
("((a)|(b)|(c))" "abababc" nil :start 2 :sharedp t)
|
||
(push (list first second third fourth) result))
|
||
(equal (nreverse result)
|
||
'(("a" "a" nil nil)
|
||
("b" nil "b" nil)
|
||
("a" "a" nil nil)
|
||
("b" nil "b" nil)
|
||
("c" nil nil "c"))))
|
||
|
||
(let (result)
|
||
(do-register-groups ((#'parse-integer n) (#'intern sign) whitespace)
|
||
("(\\d+)|(\\+|-|\\*|/)|(\\s+)" "12*15 - 42/3")
|
||
(unless whitespace
|
||
(push (or n sign) result)))
|
||
(equal (nreverse result)
|
||
'(12 * 15 - 42 / 3)))
|
||
|
||
(equal (all-matches "a" "foo bar baz")
|
||
(list 5 6 9 10))
|
||
|
||
(equal (all-matches "\\w*" "foo bar baz")
|
||
(list 0 3 3 3 4 7 7 7 8 11 11 11))
|
||
|
||
(equal (all-matches-as-strings "a" "foo bar baz")
|
||
(list "a" "a"))
|
||
|
||
(equal (all-matches-as-strings "\\w*" "foo bar baz")
|
||
(list "foo" "" "bar" "" "baz" ""))
|
||
|
||
(equal (split "\\s+" "foo bar baz
|
||
frob")
|
||
'("foo" "bar" "baz" "frob"))
|
||
|
||
(equal (split "\\s*" "foo bar baz")
|
||
'("f" "o" "o" "b" "a" "r" "b" "a" "z"))
|
||
|
||
(equal (split "(\\s+)" "foo bar baz")
|
||
'("foo" "bar" "baz"))
|
||
|
||
(equal (split "(\\s+)" "foo bar baz" :with-registers-p t)
|
||
'("foo" " " "bar" " " "baz"))
|
||
|
||
(equal (split "(\\s)(\\s*)" "foo bar baz" :with-registers-p t)
|
||
'("foo" " " "" "bar" " " " " "baz"))
|
||
|
||
(equal (split "(,)|(;)" "foo,bar;baz" :with-registers-p t)
|
||
'("foo" "," nil "bar" nil ";" "baz"))
|
||
|
||
(equal (split "(,)|(;)" "foo,bar;baz" :with-registers-p t :omit-unmatched-p t)
|
||
'("foo" "," "bar" ";" "baz"))
|
||
|
||
(equal (split ":" "a:b:c:d:e:f:g::")
|
||
'("a" "b" "c" "d" "e" "f" "g"))
|
||
|
||
(equal (split ":" "a:b:c:d:e:f:g::" :limit 1)
|
||
'("a:b:c:d:e:f:g::"))
|
||
|
||
(equal (split ":" "a:b:c:d:e:f:g::" :limit 2)
|
||
'("a" "b:c:d:e:f:g::"))
|
||
|
||
(equal (split ":" "a:b:c:d:e:f:g::" :limit 3)
|
||
'("a" "b" "c:d:e:f:g::"))
|
||
|
||
(equal (split ":" "a:b:c:d:e:f:g::" :limit 1000)
|
||
'("a" "b" "c" "d" "e" "f" "g" "" ""))
|
||
|
||
(equal (multiple-value-list (regex-replace "fo+" "foo bar" "frob"))
|
||
(list "frob bar" t))
|
||
|
||
(equal (multiple-value-list (regex-replace "fo+" "FOO bar" "frob"))
|
||
(list "FOO bar" nil))
|
||
|
||
(equal (multiple-value-list (regex-replace "(?i)fo+" "FOO bar" "frob"))
|
||
(list "frob bar" t))
|
||
|
||
(equal (multiple-value-list (regex-replace "(?i)fo+" "FOO bar" "frob" :preserve-case t))
|
||
(list "FROB bar" t))
|
||
|
||
(equal (multiple-value-list (regex-replace "(?i)fo+" "Foo bar" "frob" :preserve-case t))
|
||
(list "Frob bar" t))
|
||
|
||
(equal (multiple-value-list (regex-replace "bar" "foo bar baz" "[frob (was '\\&' between '\\`' and '\\'')]"))
|
||
(list "foo [frob (was 'bar' between 'foo ' and ' baz')] baz" t))
|
||
|
||
(equal (multiple-value-list
|
||
(regex-replace "bar" "foo bar baz"
|
||
'("[frob (was '" :match "' between '" :before-match "' and '" :after-match "')]")))
|
||
(list "foo [frob (was 'bar' between 'foo ' and ' baz')] baz" t))
|
||
|
||
(equal (multiple-value-list (regex-replace "(be)(nev)(o)(lent)"
|
||
"benevolent: adj. generous, kind"
|
||
(lambda (match &rest registers)
|
||
(format nil "~A [~{~A~^.~}]" match registers))
|
||
:simple-calls t))
|
||
(list "benevolent [be.nev.o.lent]: adj. generous, kind" t))
|
||
|
||
(equal (multiple-value-list (regex-replace-all "(?i)fo+" "foo Fooo FOOOO bar" "frob" :preserve-case t))
|
||
(list "frob Frob FROB bar" t))
|
||
|
||
(string= (regex-replace-all "(?i)f(o+)" "foo Fooo FOOOO bar" "fr\\1b" :preserve-case t)
|
||
"froob Frooob FROOOOB bar")
|
||
|
||
(let ((qp-regex (create-scanner "[\\x80-\\xff]")))
|
||
(flet ((encode-quoted-printable (string)
|
||
"Converts 8-bit string to quoted-printable representation."
|
||
;; won't work for Corman Lisp because non-ASCII characters aren't 8-bit there
|
||
(flet ((convert (target-string start end match-start match-end reg-starts reg-ends)
|
||
(declare (ignore start end match-end reg-starts reg-ends))
|
||
(format nil "=~2,'0x" (char-code (char target-string match-start)))))
|
||
(regex-replace-all qp-regex string #'convert))))
|
||
(string= (encode-quoted-printable "F<>te S<>rensen na<6E>ve H<>hner Stra<72>e")
|
||
"F=EAte S=F8rensen na=EFve H=FChner Stra=DFe")))
|
||
|
||
(let ((url-regex (create-scanner "[^a-zA-Z0-9_\\-.]")))
|
||
(flet ((url-encode (string)
|
||
"URL-encodes a string."
|
||
;; won't work for Corman Lisp because non-ASCII characters aren't 8-bit there
|
||
(flet ((convert (target-string start end match-start match-end reg-starts reg-ends)
|
||
(declare (ignore start end match-end reg-starts reg-ends))
|
||
(format nil "%~2,'0x" (char-code (char target-string match-start)))))
|
||
(regex-replace-all url-regex string #'convert))))
|
||
(string= (url-encode "F<>te S<>rensen na<6E>ve H<>hner Stra<72>e")
|
||
"F%EAte%20S%F8rensen%20na%EFve%20H%FChner%20Stra%DFe")))
|
||
|
||
(flet ((how-many (target-string start end match-start match-end reg-starts reg-ends)
|
||
(declare (ignore target-string start end match-start match-end))
|
||
(format nil "~A" (- (svref reg-ends 0)
|
||
(svref reg-starts 0)))))
|
||
(string= (regex-replace-all "{(.+?)}"
|
||
"foo{...}bar{.....}{..}baz{....}frob"
|
||
(list "[" #'how-many " dots]"))
|
||
"foo[3 dots]bar[5 dots][2 dots]baz[4 dots]frob"))
|
||
|
||
(let ((qp-regex (create-scanner "[\\x80-\\xff]")))
|
||
(flet ((encode-quoted-printable (string)
|
||
"Converts 8-bit string to quoted-printable representation.
|
||
Version using SIMPLE-CALLS keyword argument."
|
||
;; ;; won't work for Corman Lisp because non-ASCII characters aren't 8-bit there
|
||
(flet ((convert (match)
|
||
(format nil "=~2,'0x" (char-code (char match 0)))))
|
||
(regex-replace-all qp-regex string #'convert
|
||
:simple-calls t))))
|
||
(string= (encode-quoted-printable "F<>te S<>rensen na<6E>ve H<>hner Stra<72>e")
|
||
"F=EAte S=F8rensen na=EFve H=FChner Stra=DFe")))
|
||
|
||
(flet ((how-many (match first-register)
|
||
(declare (ignore match))
|
||
(format nil "~A" (length first-register))))
|
||
(string= (regex-replace-all "{(.+?)}"
|
||
"foo{...}bar{.....}{..}baz{....}frob"
|
||
(list "[" #'how-many " dots]")
|
||
:simple-calls t)
|
||
"foo[3 dots]bar[5 dots][2 dots]baz[4 dots]frob"))
|
||
|
||
(flet ((my-repetition (char min)
|
||
`(:greedy-repetition ,min nil ,char)))
|
||
(setf (parse-tree-synonym 'a*) (my-repetition #\a 0)
|
||
(parse-tree-synonym 'b+) (my-repetition #\b 1))
|
||
(unwind-protect
|
||
(let ((scanner (create-scanner '(:sequence a* b+))))
|
||
(equal (mapcar (lambda (target)
|
||
(scan scanner target))
|
||
'("ab" "b" "aab" "a" "x"))
|
||
(list 0 0 0 nil nil)))
|
||
(setf (parse-tree-synonym 'a*) nil
|
||
(parse-tree-synonym 'b+) nil)))
|
||
|
||
(null (scan "^a+$" "a+"))
|
||
|
||
(let ((*allow-quoting* t))
|
||
;;we use CREATE-SCANNER because of Lisps like SBCL that don't have an interpreter
|
||
(equalp (multiple-value-list (scan (create-scanner "^\\Qa+\\E$") "a+"))
|
||
(list 0 2 #() #())))
|
||
|
||
(string= (parse-string "\\k<reg>") "k<reg>")
|
||
|
||
(let ((*allow-named-registers* t))
|
||
(equal (nth-value 1 (create-scanner "((?<small>[a-z]*)(?<big>[A-Z]*))"))
|
||
(list nil "small" "big")))
|
||
|
||
(let ((*allow-named-registers* t))
|
||
(equal (nth-value 1 (create-scanner '(:register
|
||
(:sequence
|
||
(:named-register "small"
|
||
(:greedy-repetition 0 nil (:char-class (:range #\a #\z))))
|
||
(:named-register "big"
|
||
(:greedy-repetition 0 nil (:char-class (:range #\a #\z))))))))
|
||
(list nil "small" "big")))
|
||
|
||
(let ((*allow-named-registers* t))
|
||
(equalp (multiple-value-list (scan (create-scanner "((?<small>[a-z]*)(?<big>[A-Z]*))") "aaaBBB"))
|
||
(list 0 6 #(0 0 3) #(6 3 6))))
|
||
|
||
(let ((*allow-named-registers* t))
|
||
;; multiple-choice back-reference
|
||
(equalp (multiple-value-list (scan (create-scanner "^(?<reg>[ab])(?<reg>[12])\\k<reg>\\k<reg>$") "a1aa"))
|
||
(list 0 4 #(0 1) #(1 2))))
|
||
|
||
(let ((*allow-named-registers* t))
|
||
(equalp (multiple-value-list (scan (create-scanner "^(?<reg>[ab])(?<reg>[12])\\k<reg>\\k<reg>$") "a22a"))
|
||
(list 0 4 #(0 1) #(1 2))))
|
||
|
||
(let ((*allow-named-registers* t))
|
||
;; demonstrating most-recently-seen-register-first property of back-reference;
|
||
;; "greedy" regex (analogous to "aa?")
|
||
(equalp (multiple-value-list (scan (create-scanner "^(?<reg>)(?<reg>a)(\\k<reg>)") "a"))
|
||
(list 0 1 #(0 0 1) #(0 1 1))))
|
||
|
||
(let ((*allow-named-registers* t))
|
||
(equalp (multiple-value-list (scan (create-scanner "^(?<reg>)(?<reg>a)(\\k<reg>)") "aa"))
|
||
(list 0 2 #(0 0 1) #(0 1 2))))
|
||
|
||
(let ((*allow-named-registers* t))
|
||
;; switched groups
|
||
;; "lazy" regex (analogous to "aa??")
|
||
(equalp (multiple-value-list (scan (create-scanner "^(?<reg>a)(?<reg>)(\\k<reg>)") "a"))
|
||
(list 0 1 #(0 1 1) #(1 1 1))))
|
||
|
||
(let ((*allow-named-registers* t))
|
||
;; scanner ignores the second "a"
|
||
(equalp (multiple-value-list (scan (create-scanner "^(?<reg>a)(?<reg>)(\\k<reg>)") "aa"))
|
||
(list 0 1 #(0 1 1) #(1 1 1))))
|
||
|
||
(let ((*allow-named-registers* t))
|
||
;; "aa" will be matched only when forced by adding "$" at the end
|
||
(equalp (multiple-value-list (scan (create-scanner "^(?<reg>a)(?<reg>)(\\k<reg>)$") "aa"))
|
||
(list 0 2 #(0 1 1) #(1 1 2))))
|
||
|
||
(string= (quote-meta-chars "[a-z]*") "\\[a\\-z\\]\\*")
|
||
|
||
(string= (handler-case
|
||
(create-scanner "foo**x")
|
||
(ppcre-syntax-error (condition)
|
||
(format nil "Houston, we've got a problem with the string ~S: Looks like something went wrong at position ~A. The last message we received was \"~?\"."
|
||
(ppcre-syntax-error-string condition)
|
||
(ppcre-syntax-error-pos condition)
|
||
(simple-condition-format-control condition)
|
||
(simple-condition-format-arguments condition))))
|
||
"Houston, we've got a problem with the string \"foo**x\": Looks like something went wrong at position 4. The last message we received was \"Quantifier '*' not allowed.\".")
|
||
|
||
(flet ((my-weird-filter (pos)
|
||
"Only match at this point if either pos is odd and the
|
||
character we're looking at is lowercase or if pos is even and the next
|
||
two characters we're looking at are uppercase. Consume these
|
||
characters if there's a match."
|
||
(cond ((and (oddp pos)
|
||
(< pos cl-ppcre::*end-pos*)
|
||
(lower-case-p (char cl-ppcre::*string* pos)))
|
||
(1+ pos))
|
||
((and (evenp pos)
|
||
(< (1+ pos) cl-ppcre::*end-pos*)
|
||
(upper-case-p (char cl-ppcre::*string* pos))
|
||
(upper-case-p (char cl-ppcre::*string* (1+ pos))))
|
||
(+ pos 2))
|
||
(t nil))))
|
||
(let ((weird-regex `(:sequence "+" (:filter ,#'my-weird-filter) "+")))
|
||
(equalp (multiple-value-list (scan weird-regex "+A++a+AA+"))
|
||
(list 5 9 #() #()))))
|
||
|
||
(let ((a "\\E*"))
|
||
(equalp (multiple-value-list (scan (concatenate 'string "(?:" (quote-meta-chars a) "){2}") "\\E*\\E*"))
|
||
(list 0 6 #() #())))
|
||
|
||
(let ((a "\\E*"))
|
||
(equalp (multiple-value-list (scan `(:greedy-repetition 2 2 ,a) "\\E*\\E*"))
|
||
(list 0 6 #() #())))
|
||
|
||
(loop for *optimize-char-classes* in '(:hash-table :hash-table* :charset :charset* :charmap)
|
||
for s = (create-scanner "(([a-c])+)x")
|
||
always (equalp (multiple-value-list (scan s "abcxy"))
|
||
(list 0 4 #(0 2) #(3 3))))
|
||
|
||
(labels ((char-code-odd-p (char)
|
||
(oddp (char-code char)))
|
||
(char-code-even-p (char)
|
||
(evenp (char-code char)))
|
||
(resolver (name)
|
||
(cond ((string= name "odd") #'char-code-odd-p)
|
||
((string= name "even") #'char-code-even-p)
|
||
((string= name "true") (constantly t))
|
||
(t (error "Can't resolve ~S." name)))))
|
||
(equalp
|
||
(let ((*property-resolver* #'resolver))
|
||
(list (regex-replace-all (create-scanner "\\p{odd}") "abcd" "+")
|
||
(regex-replace-all (create-scanner "\\p{even}") "abcd" "+")
|
||
(regex-replace-all (create-scanner "\\p{true}") "abcd" "+")))
|
||
'("+b+d" "a+c+" "++++")))
|