;;;; PMSF-Lib --- PMSF Common Lisp Utility Library ;;;; This is copyrighted software. See documentation for terms. ;;;; ;;;; parsing-utilities.lisp --- Parser writing support machinery ;;;; ;;;; $Id$ (cl:in-package #:pmsf-lib) ;;;; %File Description: ;;;; ;;;; This file contains various utility functions and macros for use ;;;; when writing lexers and parsers. ;;;; ;;; ;;; Lexer Facility ;;; (define-condition lexer-error (simple-error) ((lexer :initarg :lexer :reader lexer-error-lexer) (string :initarg :string :reader lexer-error-string) (position :initarg :position :reader lexer-error-position)) (:report (lambda (c s) (format s "Lexer ~A:~D:~S: ~?" (lexer-error-lexer c) (lexer-error-position c) (lexer-error-string c) (simple-condition-format-control c) (simple-condition-format-arguments c))))) (defmacro define-lexer (name (&rest options) &rest clauses) (with-unique-names (string start end) `(defun ,name (,string &key ((:start ,start) 0) ((:end ,end))) (let ((,end (or ,end (length ,string)))) (lambda () ,(generate-lexer-body name string start end clauses options)))))) (defmacro with-lexer ((name (&rest options) &rest clauses) string &body body) (with-unique-names (string-var start end) `(let* ((,string-var ,string) (,start 0) (,end (length ,string-var))) (flet ((,name () ,(generate-lexer-body name string-var start end clauses options))) ,@body)))) (defun generate-lexer-body (name string start end clauses options) (destructuring-bind (&key (regex-code-limit cl-ppcre:*regex-char-code-limit*) (use-bmh-matchers cl-ppcre:*use-bmh-matchers*)) options (with-unique-names (match-start match-end reg-starts reg-ends) `(do () ((>= ,start ,end) nil) ,@(loop for (pattern varlist . body) in clauses for real-pattern = `(:sequence :start-anchor ,(if (stringp pattern) `(:regex ,pattern) pattern)) collect `(multiple-value-bind (,match-start ,match-end ,reg-starts ,reg-ends) (cl-ppcre:scan (load-time-value (let ((cl-ppcre:*regex-char-code-limit* ,regex-code-limit) (cl-ppcre:*use-bmh-matchers* ,use-bmh-matchers)) (cl-ppcre:create-scanner ',real-pattern))) ,string :start ,start :end ,end) (declare (ignorable ,match-end ,reg-starts ,reg-ends)) (when (and ,match-start (= ,match-start ,start)) (flet ((succeed (id &optional (value id)) (setq ,start ,match-end) (return (values id value))) (skip () (setq ,start ,match-end) (go restart)) (fail (&optional (reason "No match!") &rest args) (error 'lexer-error :lexer ',name :position ,start :string ,string :format-control reason :format-arguments args))) (declare (ignorable #'succeed #'skip #'fail)) (let ,(loop for var in varlist for index upfrom 0 collect `(,var (when (aref ,reg-starts ,index) (subseq ,string (aref ,reg-starts ,index) (aref ,reg-ends ,index))))) ,@body))))) (error 'lexer-error :lexer ',name :position ,start :string ,string :format-control "No match!" :format-arguments nil) restart)))) ;;; ;;; Parsing Helpers ;;; (defun infix-to-prefix (a b c) (list b a c)) ;;; ;;; Regexp replacement helpers ;;; (defun generate-replacement-template (replacement) (let ((result nil)) (dolist (token (cl-ppcre::build-replacement-template replacement) (nreverse result)) (let ((actual (if (eq token :backslash) "\\" token))) (if (and (stringp actual) (first result) (stringp (first result))) (push (concatenate 'string (pop result) actual) result) (push actual result)))))) (defun derive-new-pattern (target-string match-start match-end reg-starts reg-ends replacement-template) (loop with reg-bound = (if reg-starts (array-dimension reg-starts 0) 0) for token in replacement-template collect (typecase token (string token) (integer ;; replace numbers with the corresponding registers (when (>= token reg-bound) ;; but only if the register was referenced in the ;; regular expression (cl-ppcre::signal-invocation-error "Reference to non-existent register ~A in replacement string." (1+ token))) (when (svref reg-starts token) ;; and only if it matched, i.e. no match results ;; in an empty string (cl-ppcre:quote-meta-chars (subseq target-string (svref reg-starts token) (svref reg-ends token))))) (symbol (case token ((:match) ;; the whole match (cl-ppcre:quote-meta-chars (subseq target-string match-start match-end))) ((:before-match) ;; the part of the target string before the match (cl-ppcre:quote-meta-chars (subseq target-string 0 match-start))) ((:after-match) ;; the part of the target string after the match (cl-ppcre:quote-meta-chars (subseq target-string match-end)))))) into result finally (return (reduce (lambda (x y) (concatenate 'string x y)) result :initial-value ""))))