git-svn-id: svn://bknr.net/svn/trunk/thirdparty/cl-ppcre@4461 4281704c-cde7-0310-8518-8e2dc76b1ff0
201 lines
8.2 KiB
Common Lisp
201 lines
8.2 KiB
Common Lisp
;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: CL-PPCRE; Base: 10 -*-
|
|
;;; $Header: /usr/local/cvsrep/cl-ppcre/util.lisp,v 1.48 2009/10/28 07:36:15 edi Exp $
|
|
|
|
;;; Utility functions and constants dealing with the character sets we
|
|
;;; use to encode character classes
|
|
|
|
;;; Copyright (c) 2002-2009, Dr. Edmund Weitz. All rights reserved.
|
|
|
|
;;; Redistribution and use in source and binary forms, with or without
|
|
;;; modification, are permitted provided that the following conditions
|
|
;;; are met:
|
|
|
|
;;; * Redistributions of source code must retain the above copyright
|
|
;;; notice, this list of conditions and the following disclaimer.
|
|
|
|
;;; * Redistributions in binary form must reproduce the above
|
|
;;; copyright notice, this list of conditions and the following
|
|
;;; disclaimer in the documentation and/or other materials
|
|
;;; provided with the distribution.
|
|
|
|
;;; THIS SOFTWARE IS PROVIDED BY THE AUTHOR 'AS IS' AND ANY EXPRESSED
|
|
;;; OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
|
;;; WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
;;; ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
|
|
;;; DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
;;; DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
|
|
;;; GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|
;;; INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
|
|
;;; WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
|
;;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
|
;;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
|
|
(in-package :cl-ppcre)
|
|
|
|
(defmacro defconstant (name value &optional doc)
|
|
"Make sure VALUE is evaluated only once \(to appease SBCL)."
|
|
`(cl:defconstant ,name (if (boundp ',name) (symbol-value ',name) ,value)
|
|
,@(when doc (list doc))))
|
|
|
|
#+:lispworks
|
|
(eval-when (:compile-toplevel :load-toplevel :execute)
|
|
(import 'lw:with-unique-names))
|
|
|
|
#-:lispworks
|
|
(defmacro with-unique-names ((&rest bindings) &body body)
|
|
"Syntax: WITH-UNIQUE-NAMES ( { var | (var x) }* ) declaration* form*
|
|
|
|
Executes a series of forms with each VAR bound to a fresh,
|
|
uninterned symbol. The uninterned symbol is as if returned by a call
|
|
to GENSYM with the string denoted by X - or, if X is not supplied, the
|
|
string denoted by VAR - as argument.
|
|
|
|
The variable bindings created are lexical unless special declarations
|
|
are specified. The scopes of the name bindings and declarations do not
|
|
include the Xs.
|
|
|
|
The forms are evaluated in order, and the values of all but the last
|
|
are discarded \(that is, the body is an implicit PROGN)."
|
|
;; reference implementation posted to comp.lang.lisp as
|
|
;; <cy3bshuf30f.fsf@ljosa.com> by Vebjorn Ljosa - see also
|
|
;; <http://www.cliki.net/Common%20Lisp%20Utilities>
|
|
`(let ,(mapcar #'(lambda (binding)
|
|
(check-type binding (or cons symbol))
|
|
(if (consp binding)
|
|
(destructuring-bind (var x) binding
|
|
(check-type var symbol)
|
|
`(,var (gensym ,(etypecase x
|
|
(symbol (symbol-name x))
|
|
(character (string x))
|
|
(string x)))))
|
|
`(,binding (gensym ,(symbol-name binding)))))
|
|
bindings)
|
|
,@body))
|
|
|
|
#+:lispworks
|
|
(eval-when (:compile-toplevel :load-toplevel :execute)
|
|
(setf (macro-function 'with-rebinding)
|
|
(macro-function 'lw:rebinding)))
|
|
|
|
#-:lispworks
|
|
(defmacro with-rebinding (bindings &body body)
|
|
"WITH-REBINDING ( { var | (var prefix) }* ) form*
|
|
|
|
Evaluates a series of forms in the lexical environment that is
|
|
formed by adding the binding of each VAR to a fresh, uninterned
|
|
symbol, and the binding of that fresh, uninterned symbol to VAR's
|
|
original value, i.e., its value in the current lexical environment.
|
|
|
|
The uninterned symbol is created as if by a call to GENSYM with the
|
|
string denoted by PREFIX - or, if PREFIX is not supplied, the string
|
|
denoted by VAR - as argument.
|
|
|
|
The forms are evaluated in order, and the values of all but the last
|
|
are discarded \(that is, the body is an implicit PROGN)."
|
|
;; reference implementation posted to comp.lang.lisp as
|
|
;; <cy3wv0fya0p.fsf@ljosa.com> by Vebjorn Ljosa - see also
|
|
;; <http://www.cliki.net/Common%20Lisp%20Utilities>
|
|
(loop for binding in bindings
|
|
for var = (if (consp binding) (car binding) binding)
|
|
for name = (gensym)
|
|
collect `(,name ,var) into renames
|
|
collect ``(,,var ,,name) into temps
|
|
finally (return `(let ,renames
|
|
(with-unique-names ,bindings
|
|
`(let (,,@temps)
|
|
,,@body))))))
|
|
|
|
(declaim (inline digit-char-p))
|
|
(defun digit-char-p (chr)
|
|
(declare #.*standard-optimize-settings*)
|
|
"Tests whether a character is a decimal digit, i.e. the same as
|
|
Perl's [\\d]. Note that this function shadows the standard Common
|
|
Lisp function CL:DIGIT-CHAR-P."
|
|
(char<= #\0 chr #\9))
|
|
|
|
(declaim (inline word-char-p))
|
|
(defun word-char-p (chr)
|
|
(declare #.*standard-optimize-settings*)
|
|
"Tests whether a character is a \"word\" character. In the ASCII
|
|
charset this is equivalent to a-z, A-Z, 0-9, or _, i.e. the same as
|
|
Perl's [\\w]."
|
|
(or (alphanumericp chr)
|
|
(char= chr #\_)))
|
|
|
|
(defconstant +whitespace-char-string+
|
|
(coerce '(#\Space #\Tab #\Linefeed #\Return #\Page) 'string)
|
|
"A string of all characters which are considered to be whitespace.
|
|
Same as Perl's [\\s].")
|
|
|
|
(defun whitespacep (chr)
|
|
(declare #.*special-optimize-settings*)
|
|
"Tests whether a character is whitespace, i.e. whether it would
|
|
match [\\s] in Perl."
|
|
(find chr +whitespace-char-string+ :test #'char=))
|
|
|
|
(defmacro maybe-coerce-to-simple-string (string)
|
|
"Coerces STRING to a simple STRING unless it already is one."
|
|
(with-unique-names (=string=)
|
|
`(let ((,=string= ,string))
|
|
(cond (#+:lispworks
|
|
(lw:simple-text-string-p ,=string=)
|
|
#-:lispworks
|
|
(simple-string-p ,=string=)
|
|
,=string=)
|
|
(t
|
|
(coerce ,=string=
|
|
#+:lispworks 'lw:simple-text-string
|
|
#-:lispworks 'simple-string))))))
|
|
|
|
(declaim (inline nsubseq))
|
|
(defun nsubseq (sequence start &optional (end (length sequence)))
|
|
"Returns a subsequence by pointing to location in original sequence."
|
|
(make-array (- end start)
|
|
:element-type (array-element-type sequence)
|
|
:displaced-to sequence
|
|
:displaced-index-offset start))
|
|
|
|
(defun normalize-var-list (var-list)
|
|
"Utility function for REGISTER-GROUPS-BIND and DO-REGISTER-GROUPS.
|
|
Creates the long form \(a list of \(FUNCTION VAR) entries) out of the
|
|
short form of VAR-LIST."
|
|
(loop for element in var-list
|
|
if (consp element)
|
|
nconc (loop for var in (rest element)
|
|
collect (list (first element) var))
|
|
else
|
|
collect (list '(function identity) element)))
|
|
|
|
(defun string-list-to-simple-string (string-list)
|
|
"Concatenates a list of strings to one simple-string."
|
|
(declare #.*standard-optimize-settings*)
|
|
;; this function provided by JP Massar; note that we can't use APPLY
|
|
;; with CONCATENATE here because of CALL-ARGUMENTS-LIMIT
|
|
(let ((total-size 0))
|
|
(declare (fixnum total-size))
|
|
(dolist (string string-list)
|
|
#-:genera (declare (string string))
|
|
(incf total-size (length string)))
|
|
(let ((result-string (make-sequence #-:lispworks 'simple-string
|
|
#+:lispworks 'lw:simple-text-string
|
|
total-size))
|
|
(curr-pos 0))
|
|
(declare (fixnum curr-pos))
|
|
(dolist (string string-list)
|
|
#-:genera (declare (string string))
|
|
(replace result-string string :start1 curr-pos)
|
|
(incf curr-pos (length string)))
|
|
result-string)))
|
|
|
|
(defun complement* (test-function)
|
|
"Like COMPLEMENT but optimized for unary functions."
|
|
(declare #.*standard-optimize-settings*)
|
|
(typecase test-function
|
|
(function
|
|
(lambda (char)
|
|
(declare (character char))
|
|
(not (funcall (the function test-function) char))))
|
|
(otherwise
|
|
(lambda (char)
|
|
(declare (character char))
|
|
(not (funcall test-function char)))))) |