Files
CLASH/src/main/parsing.lisp

491 lines
17 KiB
Common Lisp

;;;; CLASH --- The Common Lisp Adaptable Simple HTTP server
;;;; This is copyrighted software. See documentation for terms.
;;;;
;;;; parsing.cl --- General HTTP parsing routines
;;;;
;;;; Checkout Tag: $Name$
;;;; $Id$
(in-package :CLASH)
;;;; %File Description:
;;;;
;;;; This file implements various parsing routines useful in dealing
;;;; with HTTP component constructs, based on the HTTP grammar.
;;;;
(eval-when (:compile-toplevel :load-toplevel :execute)
(defconstant +HTTP-LWS-Character-Bag+ '(#\Space #\Tab)
"HTTP LWS characters.")
(defconstant +HTTP-Quote-Character+ #\"
"HTTP quote character.")
(defconstant +HTTP-Default-List-Delimiter+ #\,
"HTTP default list delimiter character.")
(defconstant +HTTP-Pair-Delimiter+ #\=
"HTTP key-value pair delimiter character.")
)
(defun char-lws-p (char)
(declare (type character char))
(member char +HTTP-LWS-Character-Bag+ :test #'char=))
(define-compiler-macro char-lws-p (char)
(let ((char-var (gensym)))
`(let ((,char-var ,char))
(declare (type character ,char-var))
(or ,@(loop for lws-char in +HTTP-LWS-Character-Bag+
collect `(char= ,char-var ,lws-char))))))
(declaim (inline char-quote-p))
(defun char-quote-p (char)
(declare (type character char))
(char= char +HTTP-Quote-Character+))
(defun string-trim-lws (string)
"Trim HTTP LWS from string."
(string-trim +HTTP-LWS-Character-Bag+ string))
(declaim (inline skip-forward-lws))
(defun skip-forward-lws (string start end)
(do ((pos start (1+ pos)))
((or (>= pos end) (not (char-lws-p (char string pos))))
pos)))
(declaim (inline skip-backward-lws))
(defun skip-backward-lws (string start end)
(do ((pos end (1- pos)))
((or (<= pos start) (not (char-lws-p (char string pos))))
pos)))
(declaim (inline skip-forward-to-lws))
(defun skip-forward-to-lws (string start end)
(do ((pos start (1+ pos)))
((or (>= pos end) (char-lws-p (char string pos)))
pos)))
(declaim (inline skip-backward-to-lws))
(defun skip-backward-to-lws (string start end)
(do ((pos end (1- pos)))
((or (<= pos start) (char-lws-p (char string pos)))
pos)))
(defmacro with-lws-trimmed-bounds ((string-var start-var end-var) &body body)
`(let ((,end-var (or ,end-var (length ,string-var))))
(let ((,start-var (skip-forward-lws ,string-var ,start-var ,end-var))
(,end-var
(1+ (skip-backward-lws ,string-var ,start-var (1- ,end-var)))))
,@body)))
(defun dequote-string (string)
"Dequote an HTTP string."
(delete +HTTP-Quote-Character+ string))
(defun parse-http-list (string &key (delimiter +HTTP-Default-List-Delimiter+))
(declare (type string string) (character delimiter))
(loop with state = :seek-element
with start = nil
with stop = nil
for char across string
for pos from 0
when
(case state
(:seek-element
(cond
((char-lws-p char)
nil)
((char= char delimiter)
nil)
((char-quote-p char)
(setq start pos
state :in-quoted-string)
nil)
(t
(setq start pos
state :in-token)
nil)))
(:in-token
(cond
((char-lws-p char)
(setq stop pos
state :seek-delimiter)
t)
((char-quote-p char)
(setq state :in-quoted-string)
nil)
((char= char delimiter)
(setq stop pos
state :seek-element)
t)
(t
nil)))
(:in-quoted-string
(when (char-quote-p char)
(setq state :in-token))
nil)
(:seek-delimiter
(cond
((char= char delimiter)
(setq state :seek-element)
nil)
((char-lws-p char)
nil)
(t
(error 'clash-syntax-error :fragment string
:reason "Whitespace in token")))))
collect (subseq string start stop)
when
(and
(= pos (1- (length string)))
(case state
(:in-token
(setq stop nil)
t)
(:in-quoted-string
(error 'clash-syntax-error :fragment string
:reason "End of line in quoted-string"))
((:seek-element :seek-delimiter)
nil)))
collect (subseq string start stop)))
(defun parse-simple-list (string &key
(delimiter +HTTP-Default-List-Delimiter+))
"Parse a simple HTTP list from the string, returning a list of strings."
(mapcar #'(lambda (entry) (dequote-string (string-trim-lws entry)))
(parse-http-list string :delimiter delimiter)))
(defun parse-key-value-pair (string &key (delimiter +HTTP-Pair-Delimiter+))
(declare (type character delimiter))
(or
(loop with in-quote = nil
for char across string
for pos from 0
do
(cond
((and in-quote (char-quote-p char))
(setq in-quote nil))
((char-quote-p char)
(setq in-quote t))
((char= char delimiter)
(return (cons (dequote-string (subseq string 0 pos))
(dequote-string (subseq string (1+ pos))))))
(t
nil)))
(error 'clash-syntax-error :fragment string
:reason (format nil "Missing ~A in key-value pair." delimiter))))
(defun parse-key-value-list
(string &key (list-delimiter +HTTP-Default-List-Delimiter+)
(pair-delimiter +HTTP-Pair-Delimiter+))
"Parse a list of key-value pairs from the string, returning an alist."
(mapcar #'(lambda (elem)
(parse-key-value-pair elem :delimiter pair-delimiter))
(parse-http-list string :delimiter list-delimiter)))
(defconstant +Base64-Code-Array+
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/")
(defconstant +Base64-Padding-Character+ #\=)
(defun decode-base64-string (string &key (start 0) end)
(declare (fixnum start) (string string))
(let ((end (or end (length string))))
(declare (fixnum end))
(with-output-to-string (stream)
(loop with count = 4
with accumulator = 0
with padding = 0
for pos from start below end
for char = (char string pos)
for value = (position char +Base64-Code-Array+ :test #'char=)
do
(when (and (null value) (char= char +Base64-Padding-Character+))
(setq value 0)
(incf padding))
(when value
(decf count)
(setf (ldb (byte 6 (* count 6)) accumulator) value))
(when (zerop count)
(loop for byte from 2 downto padding
for item-value = (ldb (byte 8 (* byte 8)) accumulator)
for item-char = (code-char item-value)
do
(write-char item-char stream))
(setq accumulator 0 count 4 padding 0))
finally
(when (/= count 4)
(error 'clash-syntax-error :fragment string
:reason
"Base64-Encoding ended on non-block-boundary"))))))
(defun encode-base64-string
(string &key (start 0) end use-crlf final-crlf (line-length 76))
(declare (fixnum start) (string string))
(let ((end (or end (length string))))
(declare (fixnum end))
(with-output-to-string (stream)
(loop with count = 3
with accumulator = 0
with line-count = 0
for pos from start below end
for char = (char string pos)
for value = (char-code char)
do
;; Snarf and stash byte
(decf count)
(setf (ldb (byte 8 (* count 8)) accumulator) value)
;; If we've got 24 bits, then output encoded 4-tuple
(when (zerop count)
;; Check line-length first
(when (> (+ line-count 4) line-length)
(when use-crlf (write-char #\Return stream))
(write-char #\Newline stream)
(setq line-count 0))
;; Output 4-tuple
(loop for offset from 3 downto 0
for index = (ldb (byte 6 (* offset 6)) accumulator)
do
(incf line-count)
(write-char (aref +Base64-Code-Array+ index) stream))
;; Reset stuff
(setq accumulator 0 count 3))
finally
;; If we didn't end on a 24 bit boundary, output rest with padding
(when (< count 3)
;; Check line-length first
(when (> (+ line-count 4) line-length)
(when use-crlf (write-char #\Return stream))
(write-char #\Newline stream))
(loop for offset from 3 downto count
for index = (ldb (byte 6 (* offset 6)) accumulator)
do
(write-char (aref +Base64-Code-Array+ index) stream))
(dotimes (i count)
(write-char +Base64-Padding-Character+ stream)))
(when final-crlf
(when use-crlf (write-char #\Return stream))
(write-char #\Newline stream))))))
(defun parse-authority-string (string)
(let* ((scheme-end (position #\Space string))
(scheme (subseq string 0 scheme-end)))
(cond
((string-equal scheme "Basic")
(let* ((userid-password
(decode-base64-string string :start (1+ scheme-end)))
(userid-end (position #\: userid-password))
(userid (subseq userid-password 0 userid-end))
(password (subseq userid-password (1+ userid-end))))
(list scheme
(cons "userid" userid)
(cons "password" password))))
(t
(list* scheme
(parse-key-value-list (subseq string scheme-end)))))))
(defun rfc1123-format-time (universal-time)
(multiple-value-bind (second minute hour date month year day daylight-p zone)
(decode-universal-time universal-time 0)
(declare (ignore daylight-p zone))
(let ((wkday (aref #("Mon" "Tue" "Wed" "Thu" "Fri" "Sat" "Sun") day))
(full-month (aref #("Jan" "Feb" "Mar" "Apr" "May" "Jun"
"Jul" "Aug" "Sep" "Oct" "Nov" "Dec")
(1- month))))
(format nil "~A, ~2,'0D ~A ~4,'0D ~2,'0D:~2,'0D:~2,'0D GMT"
wkday date full-month year hour minute second))))
(defun parse-http-time (string &optional (start 0) (end (length string)))
(with-lws-trimmed-bounds (string start end)
(unless (= 8 (- end start))
(error 'clash-syntax-error :fragment (subseq string start end)
:reason
"Invalid length for http-time field (too short or too long)"))
(handler-case
(let ((hour (parse-integer string :start start :end (+ start 2)))
(min (parse-integer string :start (+ start 3) :end (+ start 5)))
(sec (parse-integer string :start (+ start 6) :end (+ start 8))))
(assert (and (<= 0 hour 23) (<= 0 min 59) (<= 0 sec 59)
(char= (char string (+ start 2)) #\:)
(char= (char string (+ start 5)) #\:)))
(values hour min sec))
(error ()
(error 'clash-syntax-error :fragment (subseq string start end)
:readson "Invalid http-time")))))
(defun parse-http-day (string &optional (start 0) (end (length string)))
(with-lws-trimmed-bounds (string start end)
(unless (<= 1 (- end start) 2)
(error 'clash-syntax-error :fragment (subseq string start end)
:reason
"Invalid length for day field (too short or too long)"))
(handler-case
(let ((day (parse-integer string :start start :end end)))
(assert (<= 1 day 31))
day)
(error ()
(error 'clash-syntax-error :fragment (subseq string start end)
:readson "Invalid http-date day")))))
(defconstant +http-date-month-names+
'("Jan" "Feb" "Mar" "Apr" "May" "Jun" "Jul" "Aug" "Sep" "Oct" "Nov" "Dec"))
(defun lookup-month-by-name (string &optional (start 0) (end (length string)))
(with-lws-trimmed-bounds (string start end)
(unless (= 3 (- end start))
(error 'clash-syntax-error :fragment (subseq string start end)
:reason
"Invalid length for month field (too short or too long)"))
(or
(loop for month-name in +http-date-month-names+
for month from 1
thereis (when (string= string month-name :start1 start :end1 end)
month))
(error 'clash-syntax-error :fragment (subseq string start end)
:reason "Unknown month name in http-date"))))
(defun parse-http-year (string &optional (start 0) (end (length string)))
(with-lws-trimmed-bounds (string start end)
(case (- end start)
(4
(handler-case (parse-integer string :start start :end end)
(error ()
(error 'clash-syntax-error :fragment (subseq string start end)
:readson "Invalid http-date year"))))
(2
(handler-case
(let ((year (parse-integer string :start start :end end)))
(+ year (if (< year 50)
2000
1900)))
(error ()
(error 'clash-syntax-error :fragment (subseq string start end)
:readson "Invalid http-date year"))))
(t
(error 'clash-syntax-error :fragment (subseq string start end)
:reason
"Invalid length for year field (too short or too long)")))))
(defun parse-rfc1123-date (string &optional (start 0) (end (length string)))
(with-lws-trimmed-bounds (string start end)
(unless (<= 29 (- end start))
(error 'clash-syntax-error :fragment (subseq string start end)
:reason
"Invalid length for rfc1123-date (too short)"))
(let ((day (parse-http-day string (+ start 5) (+ start 7)))
(month (lookup-month-by-name string (+ start 8) (+ start 11)))
(year (parse-http-year string (+ start 12) (+ start 16))))
(multiple-value-bind (hour min sec)
(parse-http-time string (+ start 17) (+ start 25))
(unless (string= string " GMT" :start1 (+ start 25) :end1 (+ start 29))
(error 'clash-syntax-error :fragment (subseq string start end)
:reason
"Invalid time-zone (must be GMT)"))
(let ((result (encode-universal-time sec min hour day month year 0)))
(multiple-value-bind (sec2 min2 hour2 day2 month2 year2)
(decode-universal-time result 0)
(unless (and (= sec sec2) (= min min2) (= hour hour2)
(= day day2) (= month month2) (= year year2))
(error 'clash-syntax-error :fragment (subseq string start end)
:reason
"Ambiguous rfc1123-date specification")))
result)))))
(defun parse-rfc850-date (string &optional (start 0) (end (length string)))
(with-lws-trimmed-bounds (string start end)
(let ((day-end (position #\, string :start start :end end)))
(unless (and day-end (<= 24 (- end day-end))
(<= 6 (- day-end start) 9))
(error 'clash-syntax-error :fragment (subseq string start end)
:reason
"Invalid length for rfc850-date (too short or too long)"))
(let ((day (parse-http-day string (+ day-end 2) (+ day-end 4)))
(month (lookup-month-by-name string (+ day-end 5) (+ day-end 8)))
(year (parse-http-year string (+ day-end 9) (+ day-end 11))))
(multiple-value-bind (hour min sec)
(parse-http-time string (+ day-end 12) (+ day-end 20))
(unless (string= string " GMT" :start1 (+ day-end 20)
:end1 (+ day-end 24))
(error 'clash-syntax-error :fragment (subseq string start end)
:reason
"Invalid time-zone (must be GMT)"))
(let ((result (encode-universal-time sec min hour day month year 0)))
(multiple-value-bind (sec2 min2 hour2 day2 month2 year2)
(decode-universal-time result 0)
(unless (and (= sec sec2) (= min min2) (= hour hour2)
(= day day2) (= month month2) (= year year2))
(error 'clash-syntax-error :fragment (subseq string start end)
:reason
"Ambiguous rfc850-date specification")))
result))))))
(defun parse-asctime-date (string &optional (start 0) (end (length string)))
(with-lws-trimmed-bounds (string start end)
(unless (<= 24 (- end start))
(error 'clash-syntax-error :fragment (subseq string start end)
:reason
"Invalid length for asctime-date (too short)"))
(let ((day (parse-http-day string (+ start 8) (+ start 10)))
(month (lookup-month-by-name string (+ start 4) (+ start 7)))
(year (parse-http-year string (+ start 20) (+ start 24))))
(multiple-value-bind (hour min sec)
(parse-http-time string (+ start 11) (+ start 19))
(let ((result (encode-universal-time sec min hour day month year 0)))
(multiple-value-bind (sec2 min2 hour2 day2 month2 year2)
(decode-universal-time result 0)
(unless (and (= sec sec2) (= min min2) (= hour hour2)
(= day day2) (= month month2) (= year year2))
(error 'clash-syntax-error :fragment (subseq string start end)
:reason
"Ambiguous asctime-date specification")))
result)))))
(defun parse-http-date (string &optional (start 0) (end (length string)))
(with-lws-trimmed-bounds (string start end)
(unless (> (- end start) 3)
(error 'clash-syntax-error :fragment (subseq string start end)
:reason "Invalid length for HTTP-date (too short)"))
(case (char string (+ start 3)) ; see RFC for grammar
(#\, ; rfc1123-date
(parse-rfc1123-date string start end))
(#\Space ; asctime-date
(parse-asctime-date string start end))
(t ; Must be an rfc850-date
(parse-rfc850-date string start end)))))
(defun merge-multiple-keys (a-list &key (test #'eql))
"Merge multiple entries in an a-list into one entry which contains
the list of values. All other entries are kept."
(do* ((result nil)
(rest a-list (rest rest))
(key (caar rest) (caar rest))
(entry (assoc key result :test test) (assoc key result :test test))
(value (cdar rest) (cdar rest)))
((null rest) result)
(if entry
(setf (cdr entry)
(if (consp (cdr entry))
(cons value (cdr entry))
(list value (cdr entry))))
(push (cons key value) result))))
;;; HTML escaping for error messages.
;;; This is especially important to avoid cross-scripting client
;;; attacks through our server.
(defun escape-text-for-html (text)
(declare (type simple-string text))
(with-output-to-string (stream)
(loop for char of-type character across text
do
(case char
(#\< (write-string "&lt;" stream))
(#\> (write-string "&gt;" stream))
(#\& (write-string "&amp;" stream))
(#\" (write-string "&quot;" stream))
(t (write-char char stream))))))