Initial CL parser/generator implementation
This commit is contained in:
161
lib/parsing-utilities.lisp
Normal file
161
lib/parsing-utilities.lisp
Normal file
@ -0,0 +1,161 @@
|
||||
;;;; PMSF-Lib --- PMSF Common Lisp Utility Library
|
||||
;;;; This is copyrighted software. See documentation for terms.
|
||||
;;;;
|
||||
;;;; parsing-utilities.lisp --- Parser writing support machinery
|
||||
;;;;
|
||||
;;;; $Id$
|
||||
|
||||
(cl:in-package #:pmsf-lib)
|
||||
|
||||
;;;; %File Description:
|
||||
;;;;
|
||||
;;;; This file contains various utility functions and macros for use
|
||||
;;;; when writing lexers and parsers.
|
||||
;;;;
|
||||
|
||||
;;;
|
||||
;;; Lexer Facility
|
||||
;;;
|
||||
|
||||
(define-condition lexer-error (simple-error)
|
||||
((lexer :initarg :lexer :reader lexer-error-lexer)
|
||||
(string :initarg :string :reader lexer-error-string)
|
||||
(position :initarg :position :reader lexer-error-position))
|
||||
(:report (lambda (c s)
|
||||
(format s "Lexer ~A:~D:~S: ~?"
|
||||
(lexer-error-lexer c)
|
||||
(lexer-error-position c)
|
||||
(lexer-error-string c)
|
||||
(simple-condition-format-control c)
|
||||
(simple-condition-format-arguments c)))))
|
||||
|
||||
(defmacro define-lexer (name (&rest options) &rest clauses)
|
||||
(with-unique-names (string start end)
|
||||
`(defun ,name (,string &key ((:start ,start) 0) ((:end ,end)))
|
||||
(let ((,end (or ,end (length ,string))))
|
||||
(lambda ()
|
||||
,(generate-lexer-body name string start end clauses options))))))
|
||||
|
||||
(defmacro with-lexer ((name (&rest options) &rest clauses) string &body body)
|
||||
(with-unique-names (string-var start end)
|
||||
`(let* ((,string-var ,string) (,start 0) (,end (length ,string-var)))
|
||||
(flet ((,name ()
|
||||
,(generate-lexer-body name string-var start end
|
||||
clauses options)))
|
||||
,@body))))
|
||||
|
||||
(defun generate-lexer-body (name string start end clauses options)
|
||||
(destructuring-bind (&key (regex-code-limit cl-ppcre:*regex-char-code-limit*)
|
||||
(use-bmh-matchers cl-ppcre:*use-bmh-matchers*))
|
||||
options
|
||||
(with-unique-names (match-start match-end reg-starts reg-ends)
|
||||
`(do ()
|
||||
((>= ,start ,end) nil)
|
||||
,@(loop for (pattern varlist . body) in clauses
|
||||
for real-pattern = `(:sequence :start-anchor
|
||||
,(if (stringp pattern)
|
||||
`(:regex ,pattern)
|
||||
pattern))
|
||||
collect
|
||||
`(multiple-value-bind (,match-start
|
||||
,match-end
|
||||
,reg-starts ,reg-ends)
|
||||
(cl-ppcre:scan
|
||||
(load-time-value
|
||||
(let ((cl-ppcre:*regex-char-code-limit*
|
||||
,regex-code-limit)
|
||||
(cl-ppcre:*use-bmh-matchers* ,use-bmh-matchers))
|
||||
(cl-ppcre:create-scanner ',real-pattern)))
|
||||
,string
|
||||
:start ,start :end ,end)
|
||||
(declare (ignorable ,match-end ,reg-starts ,reg-ends))
|
||||
(when (and ,match-start (= ,match-start ,start))
|
||||
(flet ((succeed (id &optional (value id))
|
||||
(setq ,start ,match-end)
|
||||
(return (values id value)))
|
||||
(skip ()
|
||||
(setq ,start ,match-end)
|
||||
(go restart))
|
||||
(fail (&optional (reason "No match!")
|
||||
&rest args)
|
||||
(error 'lexer-error
|
||||
:lexer ',name
|
||||
:position ,start
|
||||
:string ,string
|
||||
:format-control reason
|
||||
:format-arguments args)))
|
||||
(declare (ignorable #'succeed #'skip #'fail))
|
||||
(let ,(loop for var in varlist
|
||||
for index upfrom 0
|
||||
collect
|
||||
`(,var
|
||||
(when (aref ,reg-starts ,index)
|
||||
(subseq ,string
|
||||
(aref ,reg-starts ,index)
|
||||
(aref ,reg-ends ,index)))))
|
||||
,@body)))))
|
||||
(error 'lexer-error
|
||||
:lexer ',name
|
||||
:position ,start
|
||||
:string ,string
|
||||
:format-control "No match!"
|
||||
:format-arguments nil)
|
||||
restart))))
|
||||
|
||||
;;;
|
||||
;;; Parsing Helpers
|
||||
;;;
|
||||
|
||||
(defun infix-to-prefix (a b c) (list b a c))
|
||||
|
||||
;;;
|
||||
;;; Regexp replacement helpers
|
||||
;;;
|
||||
|
||||
(defun generate-replacement-template (replacement)
|
||||
(let ((result nil))
|
||||
(dolist (token (cl-ppcre::build-replacement-template replacement)
|
||||
(nreverse result))
|
||||
(let ((actual (if (eq token :backslash) "\\" token)))
|
||||
(if (and (stringp actual) (first result) (stringp (first result)))
|
||||
(push (concatenate 'string (pop result) actual) result)
|
||||
(push actual result))))))
|
||||
|
||||
(defun derive-new-pattern (target-string match-start match-end reg-starts reg-ends replacement-template)
|
||||
(loop with reg-bound = (if reg-starts (array-dimension reg-starts 0) 0)
|
||||
for token in replacement-template
|
||||
collect
|
||||
(typecase token
|
||||
(string token)
|
||||
(integer
|
||||
;; replace numbers with the corresponding registers
|
||||
(when (>= token reg-bound)
|
||||
;; but only if the register was referenced in the
|
||||
;; regular expression
|
||||
(cl-ppcre::signal-invocation-error
|
||||
"Reference to non-existent register ~A in replacement string."
|
||||
(1+ token)))
|
||||
(when (svref reg-starts token)
|
||||
;; and only if it matched, i.e. no match results
|
||||
;; in an empty string
|
||||
(cl-ppcre:quote-meta-chars
|
||||
(subseq target-string
|
||||
(svref reg-starts token)
|
||||
(svref reg-ends token)))))
|
||||
(symbol
|
||||
(case token
|
||||
((:match)
|
||||
;; the whole match
|
||||
(cl-ppcre:quote-meta-chars
|
||||
(subseq target-string match-start match-end)))
|
||||
((:before-match)
|
||||
;; the part of the target string before the match
|
||||
(cl-ppcre:quote-meta-chars
|
||||
(subseq target-string 0 match-start)))
|
||||
((:after-match)
|
||||
;; the part of the target string after the match
|
||||
(cl-ppcre:quote-meta-chars
|
||||
(subseq target-string match-end))))))
|
||||
into result
|
||||
finally (return (reduce (lambda (x y) (concatenate 'string x y)) result
|
||||
:initial-value ""))))
|
||||
Reference in New Issue
Block a user