162 lines
6.7 KiB
Common Lisp
162 lines
6.7 KiB
Common Lisp
;;;; PMSF-Lib --- PMSF Common Lisp Utility Library
|
|
;;;; This is copyrighted software. See documentation for terms.
|
|
;;;;
|
|
;;;; parsing-utilities.lisp --- Parser writing support machinery
|
|
;;;;
|
|
;;;; $Id$
|
|
|
|
(cl:in-package #:pmsf-lib)
|
|
|
|
;;;; %File Description:
|
|
;;;;
|
|
;;;; This file contains various utility functions and macros for use
|
|
;;;; when writing lexers and parsers.
|
|
;;;;
|
|
|
|
;;;
|
|
;;; Lexer Facility
|
|
;;;
|
|
|
|
(define-condition lexer-error (simple-error)
|
|
((lexer :initarg :lexer :reader lexer-error-lexer)
|
|
(string :initarg :string :reader lexer-error-string)
|
|
(position :initarg :position :reader lexer-error-position))
|
|
(:report (lambda (c s)
|
|
(format s "Lexer ~A:~D:~S: ~?"
|
|
(lexer-error-lexer c)
|
|
(lexer-error-position c)
|
|
(lexer-error-string c)
|
|
(simple-condition-format-control c)
|
|
(simple-condition-format-arguments c)))))
|
|
|
|
(defmacro define-lexer (name (&rest options) &rest clauses)
|
|
(with-unique-names (string start end)
|
|
`(defun ,name (,string &key ((:start ,start) 0) ((:end ,end)))
|
|
(let ((,end (or ,end (length ,string))))
|
|
(lambda ()
|
|
,(generate-lexer-body name string start end clauses options))))))
|
|
|
|
(defmacro with-lexer ((name (&rest options) &rest clauses) string &body body)
|
|
(with-unique-names (string-var start end)
|
|
`(let* ((,string-var ,string) (,start 0) (,end (length ,string-var)))
|
|
(flet ((,name ()
|
|
,(generate-lexer-body name string-var start end
|
|
clauses options)))
|
|
,@body))))
|
|
|
|
(defun generate-lexer-body (name string start end clauses options)
|
|
(destructuring-bind (&key (regex-code-limit cl-ppcre:*regex-char-code-limit*)
|
|
(use-bmh-matchers cl-ppcre:*use-bmh-matchers*))
|
|
options
|
|
(with-unique-names (match-start match-end reg-starts reg-ends)
|
|
`(do ()
|
|
((>= ,start ,end) nil)
|
|
,@(loop for (pattern varlist . body) in clauses
|
|
for real-pattern = `(:sequence :start-anchor
|
|
,(if (stringp pattern)
|
|
`(:regex ,pattern)
|
|
pattern))
|
|
collect
|
|
`(multiple-value-bind (,match-start
|
|
,match-end
|
|
,reg-starts ,reg-ends)
|
|
(cl-ppcre:scan
|
|
(load-time-value
|
|
(let ((cl-ppcre:*regex-char-code-limit*
|
|
,regex-code-limit)
|
|
(cl-ppcre:*use-bmh-matchers* ,use-bmh-matchers))
|
|
(cl-ppcre:create-scanner ',real-pattern)))
|
|
,string
|
|
:start ,start :end ,end)
|
|
(declare (ignorable ,match-end ,reg-starts ,reg-ends))
|
|
(when (and ,match-start (= ,match-start ,start))
|
|
(flet ((succeed (id &optional (value id))
|
|
(setq ,start ,match-end)
|
|
(return (values id value)))
|
|
(skip ()
|
|
(setq ,start ,match-end)
|
|
(go restart))
|
|
(fail (&optional (reason "No match!")
|
|
&rest args)
|
|
(error 'lexer-error
|
|
:lexer ',name
|
|
:position ,start
|
|
:string ,string
|
|
:format-control reason
|
|
:format-arguments args)))
|
|
(declare (ignorable #'succeed #'skip #'fail))
|
|
(let ,(loop for var in varlist
|
|
for index upfrom 0
|
|
collect
|
|
`(,var
|
|
(when (aref ,reg-starts ,index)
|
|
(subseq ,string
|
|
(aref ,reg-starts ,index)
|
|
(aref ,reg-ends ,index)))))
|
|
,@body)))))
|
|
(error 'lexer-error
|
|
:lexer ',name
|
|
:position ,start
|
|
:string ,string
|
|
:format-control "No match!"
|
|
:format-arguments nil)
|
|
restart))))
|
|
|
|
;;;
|
|
;;; Parsing Helpers
|
|
;;;
|
|
|
|
(defun infix-to-prefix (a b c) (list b a c))
|
|
|
|
;;;
|
|
;;; Regexp replacement helpers
|
|
;;;
|
|
|
|
(defun generate-replacement-template (replacement)
|
|
(let ((result nil))
|
|
(dolist (token (cl-ppcre::build-replacement-template replacement)
|
|
(nreverse result))
|
|
(let ((actual (if (eq token :backslash) "\\" token)))
|
|
(if (and (stringp actual) (first result) (stringp (first result)))
|
|
(push (concatenate 'string (pop result) actual) result)
|
|
(push actual result))))))
|
|
|
|
(defun derive-new-pattern (target-string match-start match-end reg-starts reg-ends replacement-template)
|
|
(loop with reg-bound = (if reg-starts (array-dimension reg-starts 0) 0)
|
|
for token in replacement-template
|
|
collect
|
|
(typecase token
|
|
(string token)
|
|
(integer
|
|
;; replace numbers with the corresponding registers
|
|
(when (>= token reg-bound)
|
|
;; but only if the register was referenced in the
|
|
;; regular expression
|
|
(cl-ppcre::signal-invocation-error
|
|
"Reference to non-existent register ~A in replacement string."
|
|
(1+ token)))
|
|
(when (svref reg-starts token)
|
|
;; and only if it matched, i.e. no match results
|
|
;; in an empty string
|
|
(cl-ppcre:quote-meta-chars
|
|
(subseq target-string
|
|
(svref reg-starts token)
|
|
(svref reg-ends token)))))
|
|
(symbol
|
|
(case token
|
|
((:match)
|
|
;; the whole match
|
|
(cl-ppcre:quote-meta-chars
|
|
(subseq target-string match-start match-end)))
|
|
((:before-match)
|
|
;; the part of the target string before the match
|
|
(cl-ppcre:quote-meta-chars
|
|
(subseq target-string 0 match-start)))
|
|
((:after-match)
|
|
;; the part of the target string after the match
|
|
(cl-ppcre:quote-meta-chars
|
|
(subseq target-string match-end))))))
|
|
into result
|
|
finally (return (reduce (lambda (x y) (concatenate 'string x y)) result
|
|
:initial-value ""))))
|