Files
OSNCL/lib/parsing-utilities.lisp

162 lines
6.7 KiB
Common Lisp

;;;; PMSF-Lib --- PMSF Common Lisp Utility Library
;;;; This is copyrighted software. See documentation for terms.
;;;;
;;;; parsing-utilities.lisp --- Parser writing support machinery
;;;;
;;;; $Id$
(cl:in-package #:pmsf-lib)
;;;; %File Description:
;;;;
;;;; This file contains various utility functions and macros for use
;;;; when writing lexers and parsers.
;;;;
;;;
;;; Lexer Facility
;;;
(define-condition lexer-error (simple-error)
((lexer :initarg :lexer :reader lexer-error-lexer)
(string :initarg :string :reader lexer-error-string)
(position :initarg :position :reader lexer-error-position))
(:report (lambda (c s)
(format s "Lexer ~A:~D:~S: ~?"
(lexer-error-lexer c)
(lexer-error-position c)
(lexer-error-string c)
(simple-condition-format-control c)
(simple-condition-format-arguments c)))))
(defmacro define-lexer (name (&rest options) &rest clauses)
(with-unique-names (string start end)
`(defun ,name (,string &key ((:start ,start) 0) ((:end ,end)))
(let ((,end (or ,end (length ,string))))
(lambda ()
,(generate-lexer-body name string start end clauses options))))))
(defmacro with-lexer ((name (&rest options) &rest clauses) string &body body)
(with-unique-names (string-var start end)
`(let* ((,string-var ,string) (,start 0) (,end (length ,string-var)))
(flet ((,name ()
,(generate-lexer-body name string-var start end
clauses options)))
,@body))))
(defun generate-lexer-body (name string start end clauses options)
(destructuring-bind (&key (regex-code-limit cl-ppcre:*regex-char-code-limit*)
(use-bmh-matchers cl-ppcre:*use-bmh-matchers*))
options
(with-unique-names (match-start match-end reg-starts reg-ends)
`(do ()
((>= ,start ,end) nil)
,@(loop for (pattern varlist . body) in clauses
for real-pattern = `(:sequence :start-anchor
,(if (stringp pattern)
`(:regex ,pattern)
pattern))
collect
`(multiple-value-bind (,match-start
,match-end
,reg-starts ,reg-ends)
(cl-ppcre:scan
(load-time-value
(let ((cl-ppcre:*regex-char-code-limit*
,regex-code-limit)
(cl-ppcre:*use-bmh-matchers* ,use-bmh-matchers))
(cl-ppcre:create-scanner ',real-pattern)))
,string
:start ,start :end ,end)
(declare (ignorable ,match-end ,reg-starts ,reg-ends))
(when (and ,match-start (= ,match-start ,start))
(flet ((succeed (id &optional (value id))
(setq ,start ,match-end)
(return (values id value)))
(skip ()
(setq ,start ,match-end)
(go restart))
(fail (&optional (reason "No match!")
&rest args)
(error 'lexer-error
:lexer ',name
:position ,start
:string ,string
:format-control reason
:format-arguments args)))
(declare (ignorable #'succeed #'skip #'fail))
(let ,(loop for var in varlist
for index upfrom 0
collect
`(,var
(when (aref ,reg-starts ,index)
(subseq ,string
(aref ,reg-starts ,index)
(aref ,reg-ends ,index)))))
,@body)))))
(error 'lexer-error
:lexer ',name
:position ,start
:string ,string
:format-control "No match!"
:format-arguments nil)
restart))))
;;;
;;; Parsing Helpers
;;;
(defun infix-to-prefix (a b c) (list b a c))
;;;
;;; Regexp replacement helpers
;;;
(defun generate-replacement-template (replacement)
(let ((result nil))
(dolist (token (cl-ppcre::build-replacement-template replacement)
(nreverse result))
(let ((actual (if (eq token :backslash) "\\" token)))
(if (and (stringp actual) (first result) (stringp (first result)))
(push (concatenate 'string (pop result) actual) result)
(push actual result))))))
(defun derive-new-pattern (target-string match-start match-end reg-starts reg-ends replacement-template)
(loop with reg-bound = (if reg-starts (array-dimension reg-starts 0) 0)
for token in replacement-template
collect
(typecase token
(string token)
(integer
;; replace numbers with the corresponding registers
(when (>= token reg-bound)
;; but only if the register was referenced in the
;; regular expression
(cl-ppcre::signal-invocation-error
"Reference to non-existent register ~A in replacement string."
(1+ token)))
(when (svref reg-starts token)
;; and only if it matched, i.e. no match results
;; in an empty string
(cl-ppcre:quote-meta-chars
(subseq target-string
(svref reg-starts token)
(svref reg-ends token)))))
(symbol
(case token
((:match)
;; the whole match
(cl-ppcre:quote-meta-chars
(subseq target-string match-start match-end)))
((:before-match)
;; the part of the target string before the match
(cl-ppcre:quote-meta-chars
(subseq target-string 0 match-start)))
((:after-match)
;; the part of the target string after the match
(cl-ppcre:quote-meta-chars
(subseq target-string match-end))))))
into result
finally (return (reduce (lambda (x y) (concatenate 'string x y)) result
:initial-value ""))))