491 lines
17 KiB
Common Lisp
491 lines
17 KiB
Common Lisp
;;;; CLASH --- The Common Lisp Adaptable Simple HTTP server
|
|
;;;; This is copyrighted software. See documentation for terms.
|
|
;;;;
|
|
;;;; parsing.cl --- General HTTP parsing routines
|
|
;;;;
|
|
;;;; Checkout Tag: $Name$
|
|
;;;; $Id$
|
|
|
|
(in-package :CLASH)
|
|
|
|
;;;; %File Description:
|
|
;;;;
|
|
;;;; This file implements various parsing routines useful in dealing
|
|
;;;; with HTTP component constructs, based on the HTTP grammar.
|
|
;;;;
|
|
|
|
(eval-when (:compile-toplevel :load-toplevel :execute)
|
|
|
|
(defconstant +HTTP-LWS-Character-Bag+ '(#\Space #\Tab)
|
|
"HTTP LWS characters.")
|
|
|
|
(defconstant +HTTP-Quote-Character+ #\"
|
|
"HTTP quote character.")
|
|
|
|
(defconstant +HTTP-Default-List-Delimiter+ #\,
|
|
"HTTP default list delimiter character.")
|
|
|
|
(defconstant +HTTP-Pair-Delimiter+ #\=
|
|
"HTTP key-value pair delimiter character.")
|
|
|
|
)
|
|
|
|
(defun char-lws-p (char)
|
|
(declare (type character char))
|
|
(member char +HTTP-LWS-Character-Bag+ :test #'char=))
|
|
|
|
(define-compiler-macro char-lws-p (char)
|
|
(let ((char-var (gensym)))
|
|
`(let ((,char-var ,char))
|
|
(declare (type character ,char-var))
|
|
(or ,@(loop for lws-char in +HTTP-LWS-Character-Bag+
|
|
collect `(char= ,char-var ,lws-char))))))
|
|
|
|
(declaim (inline char-quote-p))
|
|
(defun char-quote-p (char)
|
|
(declare (type character char))
|
|
(char= char +HTTP-Quote-Character+))
|
|
|
|
(defun string-trim-lws (string)
|
|
"Trim HTTP LWS from string."
|
|
(string-trim +HTTP-LWS-Character-Bag+ string))
|
|
|
|
(declaim (inline skip-forward-lws))
|
|
(defun skip-forward-lws (string start end)
|
|
(do ((pos start (1+ pos)))
|
|
((or (>= pos end) (not (char-lws-p (char string pos))))
|
|
pos)))
|
|
|
|
(declaim (inline skip-backward-lws))
|
|
(defun skip-backward-lws (string start end)
|
|
(do ((pos end (1- pos)))
|
|
((or (<= pos start) (not (char-lws-p (char string pos))))
|
|
pos)))
|
|
|
|
(declaim (inline skip-forward-to-lws))
|
|
(defun skip-forward-to-lws (string start end)
|
|
(do ((pos start (1+ pos)))
|
|
((or (>= pos end) (char-lws-p (char string pos)))
|
|
pos)))
|
|
|
|
(declaim (inline skip-backward-to-lws))
|
|
(defun skip-backward-to-lws (string start end)
|
|
(do ((pos end (1- pos)))
|
|
((or (<= pos start) (char-lws-p (char string pos)))
|
|
pos)))
|
|
|
|
(defmacro with-lws-trimmed-bounds ((string-var start-var end-var) &body body)
|
|
`(let ((,end-var (or ,end-var (length ,string-var))))
|
|
(let ((,start-var (skip-forward-lws ,string-var ,start-var ,end-var))
|
|
(,end-var
|
|
(1+ (skip-backward-lws ,string-var ,start-var (1- ,end-var)))))
|
|
,@body)))
|
|
|
|
(defun dequote-string (string)
|
|
"Dequote an HTTP string."
|
|
(delete +HTTP-Quote-Character+ string))
|
|
|
|
(defun parse-http-list (string &key (delimiter +HTTP-Default-List-Delimiter+))
|
|
(declare (type string string) (character delimiter))
|
|
(loop with state = :seek-element
|
|
with start = nil
|
|
with stop = nil
|
|
for char across string
|
|
for pos from 0
|
|
when
|
|
(case state
|
|
(:seek-element
|
|
(cond
|
|
((char-lws-p char)
|
|
nil)
|
|
((char= char delimiter)
|
|
nil)
|
|
((char-quote-p char)
|
|
(setq start pos
|
|
state :in-quoted-string)
|
|
nil)
|
|
(t
|
|
(setq start pos
|
|
state :in-token)
|
|
nil)))
|
|
(:in-token
|
|
(cond
|
|
((char-lws-p char)
|
|
(setq stop pos
|
|
state :seek-delimiter)
|
|
t)
|
|
((char-quote-p char)
|
|
(setq state :in-quoted-string)
|
|
nil)
|
|
((char= char delimiter)
|
|
(setq stop pos
|
|
state :seek-element)
|
|
t)
|
|
(t
|
|
nil)))
|
|
(:in-quoted-string
|
|
(when (char-quote-p char)
|
|
(setq state :in-token))
|
|
nil)
|
|
(:seek-delimiter
|
|
(cond
|
|
((char= char delimiter)
|
|
(setq state :seek-element)
|
|
nil)
|
|
((char-lws-p char)
|
|
nil)
|
|
(t
|
|
(error 'clash-syntax-error :fragment string
|
|
:reason "Whitespace in token")))))
|
|
collect (subseq string start stop)
|
|
when
|
|
(and
|
|
(= pos (1- (length string)))
|
|
(case state
|
|
(:in-token
|
|
(setq stop nil)
|
|
t)
|
|
(:in-quoted-string
|
|
(error 'clash-syntax-error :fragment string
|
|
:reason "End of line in quoted-string"))
|
|
((:seek-element :seek-delimiter)
|
|
nil)))
|
|
collect (subseq string start stop)))
|
|
|
|
(defun parse-simple-list (string &key
|
|
(delimiter +HTTP-Default-List-Delimiter+))
|
|
"Parse a simple HTTP list from the string, returning a list of strings."
|
|
(mapcar #'(lambda (entry) (dequote-string (string-trim-lws entry)))
|
|
(parse-http-list string :delimiter delimiter)))
|
|
|
|
(defun parse-key-value-pair (string &key (delimiter +HTTP-Pair-Delimiter+))
|
|
(declare (type character delimiter))
|
|
(or
|
|
(loop with in-quote = nil
|
|
for char across string
|
|
for pos from 0
|
|
do
|
|
(cond
|
|
((and in-quote (char-quote-p char))
|
|
(setq in-quote nil))
|
|
((char-quote-p char)
|
|
(setq in-quote t))
|
|
((char= char delimiter)
|
|
(return (cons (dequote-string (subseq string 0 pos))
|
|
(dequote-string (subseq string (1+ pos))))))
|
|
(t
|
|
nil)))
|
|
(error 'clash-syntax-error :fragment string
|
|
:reason (format nil "Missing ~A in key-value pair." delimiter))))
|
|
|
|
(defun parse-key-value-list
|
|
(string &key (list-delimiter +HTTP-Default-List-Delimiter+)
|
|
(pair-delimiter +HTTP-Pair-Delimiter+))
|
|
"Parse a list of key-value pairs from the string, returning an alist."
|
|
(mapcar #'(lambda (elem)
|
|
(parse-key-value-pair elem :delimiter pair-delimiter))
|
|
(parse-http-list string :delimiter list-delimiter)))
|
|
|
|
(defconstant +Base64-Code-Array+
|
|
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/")
|
|
|
|
(defconstant +Base64-Padding-Character+ #\=)
|
|
|
|
(defun decode-base64-string (string &key (start 0) end)
|
|
(declare (fixnum start) (string string))
|
|
(let ((end (or end (length string))))
|
|
(declare (fixnum end))
|
|
(with-output-to-string (stream)
|
|
(loop with count = 4
|
|
with accumulator = 0
|
|
with padding = 0
|
|
for pos from start below end
|
|
for char = (char string pos)
|
|
for value = (position char +Base64-Code-Array+ :test #'char=)
|
|
do
|
|
(when (and (null value) (char= char +Base64-Padding-Character+))
|
|
(setq value 0)
|
|
(incf padding))
|
|
(when value
|
|
(decf count)
|
|
(setf (ldb (byte 6 (* count 6)) accumulator) value))
|
|
(when (zerop count)
|
|
(loop for byte from 2 downto padding
|
|
for item-value = (ldb (byte 8 (* byte 8)) accumulator)
|
|
for item-char = (code-char item-value)
|
|
do
|
|
(write-char item-char stream))
|
|
(setq accumulator 0 count 4 padding 0))
|
|
finally
|
|
(when (/= count 4)
|
|
(error 'clash-syntax-error :fragment string
|
|
:reason
|
|
"Base64-Encoding ended on non-block-boundary"))))))
|
|
|
|
(defun encode-base64-string
|
|
(string &key (start 0) end use-crlf final-crlf (line-length 76))
|
|
(declare (fixnum start) (string string))
|
|
(let ((end (or end (length string))))
|
|
(declare (fixnum end))
|
|
(with-output-to-string (stream)
|
|
(loop with count = 3
|
|
with accumulator = 0
|
|
with line-count = 0
|
|
for pos from start below end
|
|
for char = (char string pos)
|
|
for value = (char-code char)
|
|
do
|
|
;; Snarf and stash byte
|
|
(decf count)
|
|
(setf (ldb (byte 8 (* count 8)) accumulator) value)
|
|
;; If we've got 24 bits, then output encoded 4-tuple
|
|
(when (zerop count)
|
|
;; Check line-length first
|
|
(when (> (+ line-count 4) line-length)
|
|
(when use-crlf (write-char #\Return stream))
|
|
(write-char #\Newline stream)
|
|
(setq line-count 0))
|
|
;; Output 4-tuple
|
|
(loop for offset from 3 downto 0
|
|
for index = (ldb (byte 6 (* offset 6)) accumulator)
|
|
do
|
|
(incf line-count)
|
|
(write-char (aref +Base64-Code-Array+ index) stream))
|
|
;; Reset stuff
|
|
(setq accumulator 0 count 3))
|
|
finally
|
|
;; If we didn't end on a 24 bit boundary, output rest with padding
|
|
(when (< count 3)
|
|
;; Check line-length first
|
|
(when (> (+ line-count 4) line-length)
|
|
(when use-crlf (write-char #\Return stream))
|
|
(write-char #\Newline stream))
|
|
(loop for offset from 3 downto count
|
|
for index = (ldb (byte 6 (* offset 6)) accumulator)
|
|
do
|
|
(write-char (aref +Base64-Code-Array+ index) stream))
|
|
(dotimes (i count)
|
|
(write-char +Base64-Padding-Character+ stream)))
|
|
(when final-crlf
|
|
(when use-crlf (write-char #\Return stream))
|
|
(write-char #\Newline stream))))))
|
|
|
|
(defun parse-authority-string (string)
|
|
(let* ((scheme-end (position #\Space string))
|
|
(scheme (subseq string 0 scheme-end)))
|
|
(cond
|
|
((string-equal scheme "Basic")
|
|
(let* ((userid-password
|
|
(decode-base64-string string :start (1+ scheme-end)))
|
|
(userid-end (position #\: userid-password))
|
|
(userid (subseq userid-password 0 userid-end))
|
|
(password (subseq userid-password (1+ userid-end))))
|
|
(list scheme
|
|
(cons "userid" userid)
|
|
(cons "password" password))))
|
|
(t
|
|
(list* scheme
|
|
(parse-key-value-list (subseq string scheme-end)))))))
|
|
|
|
(defun rfc1123-format-time (universal-time)
|
|
(multiple-value-bind (second minute hour date month year day daylight-p zone)
|
|
(decode-universal-time universal-time 0)
|
|
(declare (ignore daylight-p zone))
|
|
(let ((wkday (aref #("Mon" "Tue" "Wed" "Thu" "Fri" "Sat" "Sun") day))
|
|
(full-month (aref #("Jan" "Feb" "Mar" "Apr" "May" "Jun"
|
|
"Jul" "Aug" "Sep" "Oct" "Nov" "Dec")
|
|
(1- month))))
|
|
(format nil "~A, ~2,'0D ~A ~4,'0D ~2,'0D:~2,'0D:~2,'0D GMT"
|
|
wkday date full-month year hour minute second))))
|
|
|
|
(defun parse-http-time (string &optional (start 0) (end (length string)))
|
|
(with-lws-trimmed-bounds (string start end)
|
|
(unless (= 8 (- end start))
|
|
(error 'clash-syntax-error :fragment (subseq string start end)
|
|
:reason
|
|
"Invalid length for http-time field (too short or too long)"))
|
|
(handler-case
|
|
(let ((hour (parse-integer string :start start :end (+ start 2)))
|
|
(min (parse-integer string :start (+ start 3) :end (+ start 5)))
|
|
(sec (parse-integer string :start (+ start 6) :end (+ start 8))))
|
|
(assert (and (<= 0 hour 23) (<= 0 min 59) (<= 0 sec 59)
|
|
(char= (char string (+ start 2)) #\:)
|
|
(char= (char string (+ start 5)) #\:)))
|
|
(values hour min sec))
|
|
(error ()
|
|
(error 'clash-syntax-error :fragment (subseq string start end)
|
|
:readson "Invalid http-time")))))
|
|
|
|
(defun parse-http-day (string &optional (start 0) (end (length string)))
|
|
(with-lws-trimmed-bounds (string start end)
|
|
(unless (<= 1 (- end start) 2)
|
|
(error 'clash-syntax-error :fragment (subseq string start end)
|
|
:reason
|
|
"Invalid length for day field (too short or too long)"))
|
|
(handler-case
|
|
(let ((day (parse-integer string :start start :end end)))
|
|
(assert (<= 1 day 31))
|
|
day)
|
|
(error ()
|
|
(error 'clash-syntax-error :fragment (subseq string start end)
|
|
:readson "Invalid http-date day")))))
|
|
|
|
(defconstant +http-date-month-names+
|
|
'("Jan" "Feb" "Mar" "Apr" "May" "Jun" "Jul" "Aug" "Sep" "Oct" "Nov" "Dec"))
|
|
|
|
(defun lookup-month-by-name (string &optional (start 0) (end (length string)))
|
|
(with-lws-trimmed-bounds (string start end)
|
|
(unless (= 3 (- end start))
|
|
(error 'clash-syntax-error :fragment (subseq string start end)
|
|
:reason
|
|
"Invalid length for month field (too short or too long)"))
|
|
(or
|
|
(loop for month-name in +http-date-month-names+
|
|
for month from 1
|
|
thereis (when (string= string month-name :start1 start :end1 end)
|
|
month))
|
|
(error 'clash-syntax-error :fragment (subseq string start end)
|
|
:reason "Unknown month name in http-date"))))
|
|
|
|
(defun parse-http-year (string &optional (start 0) (end (length string)))
|
|
(with-lws-trimmed-bounds (string start end)
|
|
(case (- end start)
|
|
(4
|
|
(handler-case (parse-integer string :start start :end end)
|
|
(error ()
|
|
(error 'clash-syntax-error :fragment (subseq string start end)
|
|
:readson "Invalid http-date year"))))
|
|
(2
|
|
(handler-case
|
|
(let ((year (parse-integer string :start start :end end)))
|
|
(+ year (if (< year 50)
|
|
2000
|
|
1900)))
|
|
(error ()
|
|
(error 'clash-syntax-error :fragment (subseq string start end)
|
|
:readson "Invalid http-date year"))))
|
|
(t
|
|
(error 'clash-syntax-error :fragment (subseq string start end)
|
|
:reason
|
|
"Invalid length for year field (too short or too long)")))))
|
|
|
|
(defun parse-rfc1123-date (string &optional (start 0) (end (length string)))
|
|
(with-lws-trimmed-bounds (string start end)
|
|
(unless (<= 29 (- end start))
|
|
(error 'clash-syntax-error :fragment (subseq string start end)
|
|
:reason
|
|
"Invalid length for rfc1123-date (too short)"))
|
|
(let ((day (parse-http-day string (+ start 5) (+ start 7)))
|
|
(month (lookup-month-by-name string (+ start 8) (+ start 11)))
|
|
(year (parse-http-year string (+ start 12) (+ start 16))))
|
|
(multiple-value-bind (hour min sec)
|
|
(parse-http-time string (+ start 17) (+ start 25))
|
|
(unless (string= string " GMT" :start1 (+ start 25) :end1 (+ start 29))
|
|
(error 'clash-syntax-error :fragment (subseq string start end)
|
|
:reason
|
|
"Invalid time-zone (must be GMT)"))
|
|
(let ((result (encode-universal-time sec min hour day month year 0)))
|
|
(multiple-value-bind (sec2 min2 hour2 day2 month2 year2)
|
|
(decode-universal-time result 0)
|
|
(unless (and (= sec sec2) (= min min2) (= hour hour2)
|
|
(= day day2) (= month month2) (= year year2))
|
|
(error 'clash-syntax-error :fragment (subseq string start end)
|
|
:reason
|
|
"Ambiguous rfc1123-date specification")))
|
|
result)))))
|
|
|
|
(defun parse-rfc850-date (string &optional (start 0) (end (length string)))
|
|
(with-lws-trimmed-bounds (string start end)
|
|
(let ((day-end (position #\, string :start start :end end)))
|
|
(unless (and day-end (<= 24 (- end day-end))
|
|
(<= 6 (- day-end start) 9))
|
|
(error 'clash-syntax-error :fragment (subseq string start end)
|
|
:reason
|
|
"Invalid length for rfc850-date (too short or too long)"))
|
|
(let ((day (parse-http-day string (+ day-end 2) (+ day-end 4)))
|
|
(month (lookup-month-by-name string (+ day-end 5) (+ day-end 8)))
|
|
(year (parse-http-year string (+ day-end 9) (+ day-end 11))))
|
|
(multiple-value-bind (hour min sec)
|
|
(parse-http-time string (+ day-end 12) (+ day-end 20))
|
|
(unless (string= string " GMT" :start1 (+ day-end 20)
|
|
:end1 (+ day-end 24))
|
|
(error 'clash-syntax-error :fragment (subseq string start end)
|
|
:reason
|
|
"Invalid time-zone (must be GMT)"))
|
|
(let ((result (encode-universal-time sec min hour day month year 0)))
|
|
(multiple-value-bind (sec2 min2 hour2 day2 month2 year2)
|
|
(decode-universal-time result 0)
|
|
(unless (and (= sec sec2) (= min min2) (= hour hour2)
|
|
(= day day2) (= month month2) (= year year2))
|
|
(error 'clash-syntax-error :fragment (subseq string start end)
|
|
:reason
|
|
"Ambiguous rfc850-date specification")))
|
|
result))))))
|
|
|
|
(defun parse-asctime-date (string &optional (start 0) (end (length string)))
|
|
(with-lws-trimmed-bounds (string start end)
|
|
(unless (<= 24 (- end start))
|
|
(error 'clash-syntax-error :fragment (subseq string start end)
|
|
:reason
|
|
"Invalid length for asctime-date (too short)"))
|
|
(let ((day (parse-http-day string (+ start 8) (+ start 10)))
|
|
(month (lookup-month-by-name string (+ start 4) (+ start 7)))
|
|
(year (parse-http-year string (+ start 20) (+ start 24))))
|
|
(multiple-value-bind (hour min sec)
|
|
(parse-http-time string (+ start 11) (+ start 19))
|
|
(let ((result (encode-universal-time sec min hour day month year 0)))
|
|
(multiple-value-bind (sec2 min2 hour2 day2 month2 year2)
|
|
(decode-universal-time result 0)
|
|
(unless (and (= sec sec2) (= min min2) (= hour hour2)
|
|
(= day day2) (= month month2) (= year year2))
|
|
(error 'clash-syntax-error :fragment (subseq string start end)
|
|
:reason
|
|
"Ambiguous asctime-date specification")))
|
|
result)))))
|
|
|
|
(defun parse-http-date (string &optional (start 0) (end (length string)))
|
|
(with-lws-trimmed-bounds (string start end)
|
|
(unless (> (- end start) 3)
|
|
(error 'clash-syntax-error :fragment (subseq string start end)
|
|
:reason "Invalid length for HTTP-date (too short)"))
|
|
(case (char string (+ start 3)) ; see RFC for grammar
|
|
(#\, ; rfc1123-date
|
|
(parse-rfc1123-date string start end))
|
|
(#\Space ; asctime-date
|
|
(parse-asctime-date string start end))
|
|
(t ; Must be an rfc850-date
|
|
(parse-rfc850-date string start end)))))
|
|
|
|
(defun merge-multiple-keys (a-list &key (test #'eql))
|
|
"Merge multiple entries in an a-list into one entry which contains
|
|
the list of values. All other entries are kept."
|
|
(do* ((result nil)
|
|
(rest a-list (rest rest))
|
|
(key (caar rest) (caar rest))
|
|
(entry (assoc key result :test test) (assoc key result :test test))
|
|
(value (cdar rest) (cdar rest)))
|
|
((null rest) result)
|
|
(if entry
|
|
(setf (cdr entry)
|
|
(if (consp (cdr entry))
|
|
(cons value (cdr entry))
|
|
(list value (cdr entry))))
|
|
(push (cons key value) result))))
|
|
|
|
;;; HTML escaping for error messages.
|
|
|
|
;;; This is especially important to avoid cross-scripting client
|
|
;;; attacks through our server.
|
|
|
|
(defun escape-text-for-html (text)
|
|
(declare (type simple-string text))
|
|
(with-output-to-string (stream)
|
|
(loop for char of-type character across text
|
|
do
|
|
(case char
|
|
(#\< (write-string "<" stream))
|
|
(#\> (write-string ">" stream))
|
|
(#\& (write-string "&" stream))
|
|
(#\" (write-string """ stream))
|
|
(t (write-char char stream))))))
|