Files
CXML/glisp/match.lisp
2005-03-13 18:02:10 +00:00

208 lines
9.3 KiB
Common Lisp

;;; -*- Mode: Lisp; Syntax: Common-Lisp; Package: GLISP; -*-
;;; ---------------------------------------------------------------------------
;;; Title: Very simple (non-deterministic) regular expression matching
;;; Created: 1999-01-21
;;; Author: Gilbert Baumann <unk6@rz.uni-karlsruhe.de>
;;; License: LGPL (See file COPYING for details).
;;; ---------------------------------------------------------------------------
;;; (c) copyright 1999 by Gilbert Baumann
;;; This library is free software; you can redistribute it and/or
;;; modify it under the terms of the GNU Library General Public
;;; License as published by the Free Software Foundation; either
;;; version 2 of the License, or (at your option) any later version.
;;;
;;; This library is distributed in the hope that it will be useful,
;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
;;; Library General Public License for more details.
;;;
;;; You should have received a copy of the GNU Library General Public
;;; License along with this library; if not, write to the
;;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
;;; Boston, MA 02111-1307 USA.
(in-package :GLISP)
;; Syntax
;; ------
;; atom -- match the atom
;; (p predicate) -- match, iff (funcall p elt) is non-NIL
;; (& a0 .. an) -- match a0a1..an
;; (/ a0 .. an) -- match a0 or a1 ... or an
;; (* a0 .. an) -- iteration, match any number of (& a0 ... an)
;; (+ . rest) == (/ (& . rest) (* . rest))
;; (? . rest) == (/ (& . rest) (&))
;; (= var subexpr) == assign the subexpr to the match variable 'var'
;;
;; not implemented:
;; (- a b) -- match a, but not b
;; (and a b) -- matches if a and b matches
;; (or a b) == (/ a b)
;; (not x) == matches if x does not match
;;
;; This syntax has to be merged with clex as well.
(defvar *match-macros* (make-hash-table :test #'eq))
(defmacro define-match-macro (name args &body body)
`(eval-when (compile load eval)
(setf (gethash ',name *match-macros*)
#'(lambda (whole)
(destructuring-bind ,args (cdr whole)
,@body)))
',name))
(defun symcat (&rest syms)
(let ((pack (dolist (k syms nil)
(when (symbolp k)
(return (symbol-package k))))))
(cond ((null pack)
(error "No package for ~S of ~S." 'symcat syms))
(t
(intern (apply #'concatenate 'string (mapcar #'string syms))
pack)))))
(defun sym-equal (a b)
(string= (symbol-name a) (symbol-name b)))
(defun bau-funcall (fun &rest args)
(cond ((and (consp fun) (eq (car fun) 'lambda))
(cons fun args))
((and (consp fun) (eq (car fun) 'function))
(cons (cadr fun) args))
(t
(list* 'funcall fun args))))
(defun compile-srx (srx action &key (string-type 'vector) (test '#'eql))
(let ((vars nil))
(labels ((cmp (x cont-expr)
(cond
((atom x)
(with-unique-names (string start end)
`(lambda (,string ,start ,end)
(declare (type fixnum ,start ,end)
(type ,string-type ,string))
(if (and (< ,start ,end)
,(bau-funcall test `(aref ,string ,start) `',x))
,(bau-funcall cont-expr string `(the fixnum (1+ ,start)) end)))))
((sym-equal (car x) 'p)
(destructuring-bind (p) (cdr x)
(with-unique-names (string start end)
`(lambda (,string ,start ,end)
(declare (type fixnum ,start ,end)
(type ,string-type ,string))
(if (and (< ,start ,end)
,(bau-funcall p `(aref ,string ,start)))
,(bau-funcall cont-expr string `(the fixnum (1+ ,start)) end))))))
((sym-equal (car x) '/)
(with-unique-names (ccfn string string2 start end end2 j)
`(lambda (,string ,start ,end)
(declare (type fixnum ,start ,end)
(type ,string-type ,string))
(labels ((,ccfn (,string2 ,j ,end2)
(declare (type fixnum ,j ,end2)
(type ,string-type ,string2))
,(bau-funcall cont-expr string2 j end2)))
,@(mapcar (lambda (a)
`(,(cmp a `#',ccfn) ,string ,start ,end))
(cdr x))))))
((sym-equal (car x) '*)
(with-unique-names (ccfn string string2 start end end2 j)
(let ((subexpr (cons '& (cdr x))))
`(lambda (,string ,start ,end)
(declare (type fixnum ,start ,end)
(type ,string-type ,string))
(labels ((,ccfn (,string2 ,j ,end2)
(declare (type fixnum ,j ,end2)
(type ,string-type ,string2))
(,(cmp subexpr `#',ccfn) ,string2 ,j ,end2)
,(bau-funcall cont-expr string j end)))
(,ccfn ,string ,start ,end))))))
((sym-equal (car x) '&)
(case (length x)
(1 (with-unique-names (string start end)
`(lambda (,string ,start ,end)
(declare (type fixnum ,start ,end)
(type ,string-type ,string))
,(bau-funcall cont-expr string start end))))
(2 (cmp (cadr x) cont-expr))
(otherwise
(with-unique-names (string start end)
`(lambda (,string ,start ,end)
(declare (type fixnum ,start ,end)
(type ,string-type ,string))
(,(cmp (cadr x)
(with-unique-names (string j end)
`#'(lambda (,string ,j ,end)
(declare (type fixnum ,j ,end)
(type ,string-type ,string))
(,(cmp (cons '& (cddr x)) cont-expr) ,string ,j ,end))))
,string ,start ,end))))))
((sym-equal (car x) '=)
(destructuring-bind (var subexpr) (cdr x)
(pushnew var vars)
(with-unique-names (string i0 end)
`(lambda (,string ,i0 ,end)
(declare (type fixnum ,i0 ,end)
(type ,string-type ,string))
(,(cmp subexpr
(with-unique-names (string i1 end)
`#'(lambda (,string ,i1 ,end)
(declare (type fixnum ,i1 ,end)
(type ,string-type ,string))
(setf ,(symcat var "-START") ,i0
,(symcat var "-END") ,i1)
,(bau-funcall cont-expr string i1 end))))
,string ,i0 ,end)))))
((sym-equal (car x) '+)
(cmp `(& ,@(cdr x) (* ,@(cdr x))) cont-expr))
((sym-equal (car x) '?)
(cmp `(/ (&) (& ,@(cdr x))) cont-expr))
(t
(let ((mmf (gethash (car x) *match-macros*)))
(cond (mmf
(cmp (funcall mmf x) cont-expr))
(t
(error "Unknown symbolic regular expression: ~S." x))))) )))
(with-unique-names (string start end continuation match)
(let ((cf (cmp srx `#',continuation)))
`(lambda (,string ,start ,end)
(declare ;;#.cl-user:+optimize-very-fast+
(type fixnum ,start ,end)
(type ,string-type ,string))
(block ,match
(let ,(mapcan (lambda (var) (list (symcat var "-START") (symcat var "-END"))) vars)
(labels (,(with-unique-names (string j end)
`(,continuation (,string ,j ,end)
(declare (type fixnum ,j ,end)
(type ,string-type ,string))
(declare (ignore ,string))
(if (= ,j ,end)
(let ()
(return-from ,match ,action))))))
(,cf ,string ,start ,end)))
nil)))))))
(defmacro if-match ((string &key start end type (test '#'eql)) srx &body actions)
(let ((str (gensym "str")))
`(let ((,str ,string))
(,(compile-srx srx `(progn .,actions)
:string-type (or type 'vector)
:test test)
,str
,(if start start 0)
,(if end end `(length ,str))))))