Nearly complete rewrite to clean up URL/URI parsing, merging and
handling, in the face of the much saner RFC 2396. This implementation provides all major capabilities, including low-consing operation, representation of relative and absolute URIs, useful merging and read-write consistency of URIs. Some clean-up work is still needed to purge the implementation from all references to URLs, to support the handling of all URIs. Other minor cleanups and additions are still needed, but after that the handling of URIs should be nearly as robust as the handling of pathnames in CL.
This commit is contained in:
629
src/main/url.cl
629
src/main/url.cl
@ -10,15 +10,19 @@
|
||||
|
||||
;;;; %File Description:
|
||||
;;;;
|
||||
;;;; This file provides all the facilities for the parsing of URLs
|
||||
;;;; into their component parts, as well as the mapping mechanism from
|
||||
;;;; URLs to resources.
|
||||
;;;; This file provides all the facilities for parsing and unparsing
|
||||
;;;; of URIs into/from their component parts, as well as features to
|
||||
;;;; manipulate and handle parsed URIs. This file tries to conform to
|
||||
;;;; RFC 2396, which differs in important aspects from earlier RFCs
|
||||
;;;; that defined URLs, URNs and URIs, like RFC 1738 and RFC 1808.
|
||||
;;;; All in all the new definition seems much more consistent and
|
||||
;;;; useful, and is therefore preferred.
|
||||
;;;;
|
||||
|
||||
;;; Conditions
|
||||
(define-condition url-error (clash-error)
|
||||
((url-string :initarg :url-string :reader url-error-url-string))
|
||||
(:default-initargs :code +HTTP-Code-Not-Found+)
|
||||
(:default-initargs :code +HTTP-Code-Bad-Request+)
|
||||
(:report (lambda (condition stream)
|
||||
(report-clash-error condition stream
|
||||
"Unspecified URL parsing error on URL ~A"
|
||||
@ -33,7 +37,34 @@
|
||||
(url-unknown-scheme-error-scheme condition)
|
||||
(url-error-url-string condition))))))
|
||||
|
||||
;;; Constants
|
||||
(define-condition url-syntax-error (url-error)
|
||||
()
|
||||
(:report (lambda (condition stream)
|
||||
(report-clash-error condition stream
|
||||
"URL syntax error while parsing URL ~A"
|
||||
(list (url-error-url-string condition))))))
|
||||
|
||||
(define-condition url-illegal-escape-error (url-syntax-error)
|
||||
((escape-seq :initarg :escape-seq
|
||||
:reader url-illegal-escape-error-escape-seq))
|
||||
(:report (lambda (condition stream)
|
||||
(report-clash-error condition stream
|
||||
"URL syntax error (Illegal URL escape sequence ~A) while parsing URL ~A"
|
||||
(list
|
||||
(url-illegal-escape-error-escape-seq
|
||||
condition)
|
||||
(url-error-url-string condition))))))
|
||||
|
||||
(define-condition url-simple-syntax-error (url-syntax-error)
|
||||
((reason :initarg :reason :reader url-simple-syntax-error-reason))
|
||||
(:report (lambda (condition stream)
|
||||
(report-clash-error condition stream
|
||||
"URL syntax error (~A) while parsing URL ~A"
|
||||
(list
|
||||
(url-simple-syntax-error-reason condition)
|
||||
(url-error-url-string condition))))))
|
||||
|
||||
;;; Constants and predicates on URI component characters
|
||||
|
||||
(defconstant +url-scheme-separator+ #\:
|
||||
"Separator Character for URL schemes.")
|
||||
@ -41,49 +72,305 @@
|
||||
(defconstant +url-fragment-separator+ #\#
|
||||
"Separator Character for URL fragments, which are not part of URLs.")
|
||||
|
||||
(defconstant +url-escape-character+ #\%
|
||||
"Character that indicates an escaped character.")
|
||||
|
||||
(defconstant +url-host-port-separator+ #\:
|
||||
"Host:Port Separator Character for some URL schemes.")
|
||||
|
||||
(defconstant +url-path-separator+ #\/
|
||||
"Path Separator Character for some URL schemes.")
|
||||
|
||||
(defconstant +url-param-separator+ #\;
|
||||
"Parameter Separator Character for some URL schemes.")
|
||||
|
||||
(defconstant +url-query-separator+ #\?
|
||||
"Query Separator Character for some URL schemes.")
|
||||
|
||||
(defconstant +url-port-host-indicator+ "//"
|
||||
(defconstant +url-query-key-separator+ #\=
|
||||
"Separator of query keywords from query values.")
|
||||
|
||||
(defconstant +url-query-pair-separator+ #\&
|
||||
"Separator of query pairs.")
|
||||
|
||||
(defconstant +url-authority-indicator+ "//"
|
||||
"String that when present at the beginning of some scheme-specific parts
|
||||
indicates the presence of host and/or port information.")
|
||||
indicates the presence of authority information.")
|
||||
|
||||
;;; URI character class predicates
|
||||
|
||||
(defmacro def-uri-char-p (name char &body body)
|
||||
"Declares an inlined predicate function on characters that is named
|
||||
`URI-name-CHAR-P', has a lambda list of (char) and the body body."
|
||||
(let ((realname (intern (format nil "URI-~A-CHAR-P" (symbol-name name))
|
||||
(symbol-package name))))
|
||||
`(progn
|
||||
(declaim (inline ,realname))
|
||||
(defun ,realname (,char)
|
||||
(declare (optimize (speed 3)) (type character ,char))
|
||||
,@body))))
|
||||
|
||||
(def-uri-char-p lowalpha char
|
||||
(char<= #\a char #\z))
|
||||
|
||||
(def-uri-char-p upalpha char
|
||||
(char<= #\A char #\Z))
|
||||
|
||||
(def-uri-char-p alpha char
|
||||
(or (uri-lowalpha-char-p char)
|
||||
(uri-upalpha-char-p char)))
|
||||
|
||||
(def-uri-char-p digit char
|
||||
(char<= #\0 char #\9))
|
||||
|
||||
(def-uri-char-p alphanum char
|
||||
(or (uri-alpha-char-p char)
|
||||
(uri-digit-char-p char)))
|
||||
|
||||
(def-uri-char-p mark char
|
||||
(or (char= char #\-)
|
||||
(char= char #\_)
|
||||
(char= char #\.)
|
||||
(char= char #\!)
|
||||
(char= char #\~)
|
||||
(char= char #\*)
|
||||
(char= char #\')
|
||||
(char= char #\()
|
||||
(char= char #\))))
|
||||
|
||||
(def-uri-char-p unreserved char
|
||||
(or (uri-alphanum-char-p char)
|
||||
(uri-mark-char-p char)))
|
||||
|
||||
(def-uri-char-p reserved char
|
||||
(or (char= char #\;)
|
||||
(char= char #\/)
|
||||
(char= char #\?)
|
||||
(char= char #\:)
|
||||
(char= char #\@)
|
||||
(char= char #\&)
|
||||
(char= char #\=)
|
||||
(char= char #\+)
|
||||
(char= char #\$)
|
||||
(char= char #\,)))
|
||||
|
||||
(def-uri-char-p scheme char
|
||||
(or (uri-alphanum-char-p char)
|
||||
(char= char #\+)
|
||||
(char= char #\-)
|
||||
(char= char #\.)))
|
||||
|
||||
(def-uri-char-p host char
|
||||
(or (uri-alphanum-char-p char)
|
||||
(char= char #\-)
|
||||
(char= char #\.)))
|
||||
|
||||
(def-uri-char-p port char
|
||||
(uri-digit-char-p char))
|
||||
|
||||
;;; URLs
|
||||
|
||||
(defclass url ()
|
||||
((source-string :initarg :source-string :reader url-source-string
|
||||
:initform nil)))
|
||||
:initform nil)
|
||||
(scheme :initarg :scheme :reader url-scheme)))
|
||||
|
||||
(defgeneric url-string (url)
|
||||
(:documentation
|
||||
"Renders the given URL object into a new string and returns that."))
|
||||
|
||||
(defgeneric url-scheme (url)
|
||||
(:documentation "Return a string with the URLs canonical scheme."))
|
||||
|
||||
(defgeneric url-specific-part (url)
|
||||
(defgeneric url-specific-string (url)
|
||||
(:documentation "Return a string with the URLs scheme-specific part."))
|
||||
|
||||
(defgeneric url-significant-string (url)
|
||||
(defgeneric url-significant-string (url))
|
||||
|
||||
(defgeneric merge-urls (url-a url-b)
|
||||
(:documentation
|
||||
"Return a string which contains those parts of the URL, that are necessary
|
||||
to identify a resource for the HTTP server to look-up the provider of that
|
||||
resource. For example with HTTP-URLs this excludes the search-key part."))
|
||||
"Merges two URLs into a new URL which is returned. Merging is done
|
||||
in a component-wise fashion, with url-a taking precedence over url-b.
|
||||
Merging is only defined on usefully similar URLs, i.e. URLs that are
|
||||
part of the same scheme."))
|
||||
|
||||
(defmethod url-string ((url url))
|
||||
(format nil "~A:~A"
|
||||
(format nil "~@[~A:~]~A"
|
||||
(url-scheme url)
|
||||
(url-specific-part url)))
|
||||
(url-specific-string url)))
|
||||
|
||||
(defmethod print-object ((obj url) stream)
|
||||
(print-unreadable-object (obj stream :type t :identity nil)
|
||||
(format stream "~S" (url-specific-part obj))))
|
||||
(write-string (url-string obj) stream)))
|
||||
|
||||
;;; Schemes and Parsers
|
||||
|
||||
(defvar *scheme-parser-map* nil
|
||||
"Mapping from schemes to parsers.")
|
||||
|
||||
(defun register-scheme (scheme parser-function)
|
||||
"Registers a new URL scheme."
|
||||
(let ((scheme (string-downcase scheme)))
|
||||
(let ((present (assoc scheme *scheme-parser-map* :test #'string=)))
|
||||
(if present
|
||||
(setf (cdr present) parser-function)
|
||||
(push (cons scheme parser-function) *scheme-parser-map*)))
|
||||
scheme))
|
||||
|
||||
(defun unregister-scheme (scheme)
|
||||
"Unregisters an URL scheme."
|
||||
(let ((scheme (string-downcase scheme)))
|
||||
(let ((present (assoc scheme *scheme-parser-map* :test #'string=)))
|
||||
(if present
|
||||
(setf *scheme-parser-map*
|
||||
(delete present *scheme-parser-map*))
|
||||
(error "URL scheme ~A is not defined." scheme))
|
||||
present)))
|
||||
|
||||
(defun lookup-scheme-parser (string)
|
||||
(cdr
|
||||
(assoc string *scheme-parser-map* :test #'string-equal)))
|
||||
|
||||
(defun reset-schemes ()
|
||||
"Clear the list of recognized schemes."
|
||||
(setq *scheme-parser-map* nil))
|
||||
|
||||
;;; Parsing of URLs
|
||||
|
||||
(defparameter *parse-url-default-base* "http"
|
||||
"This parameter supplies the default value to the `base' parameter
|
||||
of `parse-url-from-string'. Set this to nil to prevent parsing of
|
||||
relative URIs.")
|
||||
|
||||
(defun parse-url-from-string (string &key (base *parse-url-default-base*)
|
||||
(clear-fragment t))
|
||||
"Parse a URI from the simple string `string' and return the URL
|
||||
object that represents the parsed URI. If `base' is supplied, it must
|
||||
either be a string indicating an url-scheme or a valid Absolute-URI.
|
||||
If a string is passed, then a relative URI for this scheme will be
|
||||
parsed and returned. If a URI is passed, then a relative URI for the
|
||||
absolute URI's scheme will be parsed and merged with this base
|
||||
URL. Otherwise trying to parse a relative URI will result in an error.
|
||||
|
||||
If `clear-fragment' is true, then any fragment present in the URI
|
||||
string will be cleared before parsing begins and returned as a second
|
||||
value. Otherwise it will be handled by the scheme-specific parsing
|
||||
routine (usually by including it in the relevant scheme-specific part
|
||||
of the URL, e.g. the path or query component).
|
||||
|
||||
This function will raise conditions of type `url-error' if parsing
|
||||
fails. The raised condition will be of type `url-syntax-error' if the
|
||||
failure results from a syntactic error in the URI string passed."
|
||||
(let* ((base-url (and (not (stringp base)) base))
|
||||
(base-scheme (and base (if (stringp base) base (url-scheme base))))
|
||||
(end (or (and clear-fragment
|
||||
(position +url-fragment-separator+
|
||||
string :from-end t))
|
||||
(length string)))
|
||||
(fragment (and clear-fragment
|
||||
(< end (length string))
|
||||
(subseq string (1+ end)))))
|
||||
(let ((scheme-end (position-if-not #'uri-scheme-char-p string :end end)))
|
||||
(cond
|
||||
;; Absolute URI, i.e. scheme is included
|
||||
((and scheme-end
|
||||
(uri-alpha-char-p (schar string 0))
|
||||
(char= +url-scheme-separator+ (schar string scheme-end)))
|
||||
(let* ((scheme (nstring-downcase (subseq string 0 scheme-end)))
|
||||
(parser (lookup-scheme-parser scheme)))
|
||||
(unless parser
|
||||
(error 'url-unknown-scheme-error
|
||||
:scheme scheme :url-string string))
|
||||
(values (funcall parser scheme string (1+ scheme-end) end nil)
|
||||
fragment)))
|
||||
(t
|
||||
;; A relative URI, check if we are allowed to parse relative-URIs
|
||||
(unless base-scheme
|
||||
(error 'url-error :url-string string))
|
||||
(let ((parser (lookup-scheme-parser base-scheme)))
|
||||
(unless parser
|
||||
(error 'url-unknown-scheme-error
|
||||
:scheme base-scheme :url-string string))
|
||||
(values (funcall parser nil string 0 end base-url)
|
||||
fragment)))))))
|
||||
|
||||
(defun escape-url-string (string &optional (allowed ""))
|
||||
(declare (type simple-string string allowed)
|
||||
(optimize (speed 3)))
|
||||
(flet ((escape-p (char)
|
||||
(declare (type character char))
|
||||
(not
|
||||
(or
|
||||
(char<= #\0 char #\9)
|
||||
(char<= #\A char #\Z)
|
||||
(char<= #\a char #\z)
|
||||
(char= char #\-)
|
||||
(char= char #\_)
|
||||
(char= char #\.)
|
||||
(char= char #\!)
|
||||
(char= char #\~)
|
||||
(char= char #\*)
|
||||
(char= char #\')
|
||||
(find char allowed))))
|
||||
(escape-char (char string pos)
|
||||
(declare (type character char) (type simple-string string)
|
||||
(type (string-inner-index 3) pos))
|
||||
(setf (schar string pos) #\%)
|
||||
(let* ((value (char-code char))
|
||||
(msn-char (digit-char (ldb (byte 4 4) value) 16))
|
||||
(lsn-char (digit-char (ldb (byte 4 0) value) 16)))
|
||||
(setf (schar string (1+ pos)) msn-char
|
||||
(schar string (+ pos 2)) lsn-char))))
|
||||
(let ((escape-count (count-if #'escape-p string)))
|
||||
(when (zerop escape-count)
|
||||
(return-from escape-url-string string))
|
||||
(do* ((length (length string))
|
||||
(result (make-string (+ length (* escape-count 2))))
|
||||
(result-pos 0 (1+ result-pos))
|
||||
(source-pos 0 (1+ source-pos)))
|
||||
((= source-pos length) result)
|
||||
(declare (type (string-inner-index 3) result-pos source-pos))
|
||||
(let ((char (schar string source-pos)))
|
||||
(cond
|
||||
((escape-p char)
|
||||
(escape-char char result result-pos)
|
||||
(incf result-pos 2))
|
||||
(t
|
||||
(setf (schar result result-pos) char))))))))
|
||||
|
||||
(defun unescape-url-subseq (string start end)
|
||||
"Copies a subsequence of a string, unescaping escaped characters on the way."
|
||||
(declare (type simple-string string)
|
||||
(type string-index start end)
|
||||
(optimize (speed 3)))
|
||||
(let ((first-escaped-pos (position +url-escape-character+ string
|
||||
:start start :end end)))
|
||||
(unless first-escaped-pos
|
||||
(return-from unescape-url-subseq (subseq string start end)))
|
||||
(let ((escape-end (- end 3)))
|
||||
(flet ((unescaped-char (pos)
|
||||
(unless (<= pos escape-end)
|
||||
(error 'url-illegal-escape-error
|
||||
:escape-seq (subseq string pos)
|
||||
:url-string string))
|
||||
(let ((msn-value (digit-char-p (schar string (1+ pos)) 16))
|
||||
(lsn-value (digit-char-p (schar string (+ pos 2)) 16)))
|
||||
(unless (and msn-value lsn-value)
|
||||
(error 'url-illegal-escape-error
|
||||
:escape-seq (subseq string pos (+ pos 3))
|
||||
:url-string string))
|
||||
(code-char (+ (* 16 msn-value) lsn-value)))))
|
||||
(do* ((escape-count (count +url-escape-character+ string
|
||||
:start start :end end))
|
||||
(result-string (subseq string start (- end (* 2 escape-count))))
|
||||
(result-pos (- first-escaped-pos start) (1+ result-pos))
|
||||
(source-pos first-escaped-pos (1+ source-pos)))
|
||||
((>= source-pos end) result-string)
|
||||
(declare (type string-index result-pos source-pos)
|
||||
(type simple-string result-string))
|
||||
(when (char= +url-escape-character+
|
||||
(setf (schar result-string result-pos)
|
||||
(schar string source-pos)))
|
||||
(setf (schar result-string result-pos)
|
||||
(unescaped-char source-pos))
|
||||
(incf source-pos 2)))))))
|
||||
|
||||
;;; The following are some commonly used mix-ins for URLs.
|
||||
|
||||
@ -91,95 +378,253 @@ resource. For example with HTTP-URLs this excludes the search-key part."))
|
||||
((host :initarg :host :initform nil :reader url-host)
|
||||
(port :initarg :port :initform nil :reader url-port)))
|
||||
|
||||
(defgeneric url-authority-p (url)
|
||||
(:documentation
|
||||
"Returns true if the given url supplies an authority component.")
|
||||
(:method (url) (declare (ignorable url)) nil))
|
||||
|
||||
(defmethod url-authority-p ((url url-host-port-mixin))
|
||||
(or (url-host url) (url-port url)))
|
||||
|
||||
(defgeneric url-authority-string (url)
|
||||
(:documentation "Return the url authority part as a formatted string."))
|
||||
|
||||
(defmethod url-authority-string ((url url-host-port-mixin))
|
||||
(format nil "~:[~;//~@[~A~]~@[:~D~]~]"
|
||||
(url-authority-p url)
|
||||
(let ((host (url-host url)))
|
||||
(if host (escape-url-string host) nil))
|
||||
(url-port url)))
|
||||
|
||||
(defun parse-host-port-part (url start end)
|
||||
(let* ((host-end (loop for index from start below end
|
||||
for char = (schar url index)
|
||||
while (uri-host-char-p char)
|
||||
finally
|
||||
(return index)))
|
||||
(port-end (if (and (< host-end end)
|
||||
(char= (schar url host-end)
|
||||
+url-host-port-separator+))
|
||||
(loop for index from (1+ host-end) below end
|
||||
for char = (schar url index)
|
||||
while (uri-port-char-p char)
|
||||
finally (return index))
|
||||
host-end)))
|
||||
(values
|
||||
(subseq url start host-end)
|
||||
(and (> port-end host-end)
|
||||
(parse-integer url :start (1+ host-end) :end port-end))
|
||||
port-end)))
|
||||
|
||||
(defclass url-path-mixin ()
|
||||
((path :initarg :path :initform nil :reader url-path)))
|
||||
((directory :initarg :directory :initform nil :reader url-directory)
|
||||
(params :initarg :params :initform nil :reader url-params)
|
||||
(name :initarg :name :initform nil :reader url-name)
|
||||
(name-param :initarg :name-param :initform nil :reader url-name-param)))
|
||||
|
||||
(defclass url-parameter-mixin ()
|
||||
((parameters :initarg :parameters :initform nil :reader url-parameters)))
|
||||
(defgeneric url-path-p (url)
|
||||
(:documentation
|
||||
"Returns true if the given url supplies a path component.")
|
||||
(:method (url) (declare (ignorable url)) nil))
|
||||
|
||||
(defclass url-search-mixin ()
|
||||
((searchpart :initarg :searchpart :initform nil :reader url-searchpart)))
|
||||
(defmethod url-path-p ((url url-path-mixin))
|
||||
(or (url-directory url) (url-name url)))
|
||||
|
||||
;;; Schemes and Parsers
|
||||
(defgeneric url-path-string (url)
|
||||
(:documentation "Return the url path part as a formatted string."))
|
||||
|
||||
(defvar *default-url-scheme* "http"
|
||||
"This specifies the default scheme to use for parsing of URLs that
|
||||
don't provide a scheme by themselves.")
|
||||
(defmethod url-path-string ((url url-path-mixin))
|
||||
(let ((directory (url-directory url))
|
||||
(name (url-name url))
|
||||
(params (url-params url))
|
||||
(name-param (url-name-param url)))
|
||||
(cond
|
||||
((or params name-param)
|
||||
;; Handle Parameters correctly
|
||||
(error "URL's with path parameters not yet implemented"))
|
||||
(t
|
||||
(format nil "~:[~;~:[/~;~]~{~A/~}~@[~A~]~]"
|
||||
(or directory name)
|
||||
(eq (first directory) :relative)
|
||||
(mapcar #'escape-url-string (rest directory))
|
||||
(and name (escape-url-string name)))))))
|
||||
|
||||
(defvar *scheme-parser-map* (make-hash-table :test #'equal)
|
||||
"Mapping from schemes to parsers.")
|
||||
(defun parse-path-part (url start end &optional allow-params)
|
||||
(cond
|
||||
((zerop (- end start))
|
||||
;; No path supplied
|
||||
(values nil nil nil))
|
||||
(allow-params
|
||||
(loop with absolute-p = (char= (schar url start) +url-path-separator+)
|
||||
with real-start = (if absolute-p (1+ start) start)
|
||||
with name = nil
|
||||
with name-param = nil
|
||||
for segment-start = real-start then (min end (1+ segment-end))
|
||||
for segment-end = (or (position +url-path-separator+ url
|
||||
:start segment-start :end end)
|
||||
end)
|
||||
for param-start = (or (position +url-param-separator+ url
|
||||
:start segment-start
|
||||
:end segment-end)
|
||||
segment-end)
|
||||
while (< segment-start end)
|
||||
if (= segment-end end)
|
||||
do
|
||||
(setq name
|
||||
(unescape-url-subseq url segment-start param-start)
|
||||
name-param
|
||||
(unescape-url-subseq url (1+ param-start) segment-end))
|
||||
else
|
||||
collect
|
||||
(unescape-url-subseq url segment-start param-start)
|
||||
into directory and
|
||||
collect
|
||||
(when (< param-start segment-end)
|
||||
(unescape-url-subseq url (1+ param-start) segment-end))
|
||||
into params
|
||||
end
|
||||
finally
|
||||
(return (values (cons (if absolute-p :absolute :relative) directory)
|
||||
name
|
||||
(cons nil params)
|
||||
name-param))))
|
||||
(t
|
||||
(loop with absolute-p = (char= (schar url start) +url-path-separator+)
|
||||
with real-start = (if absolute-p (1+ start) start)
|
||||
with name = nil
|
||||
for segment-start = real-start then (min end (1+ segment-end))
|
||||
for segment-end = (or (position +url-path-separator+ url
|
||||
:start segment-start :end end)
|
||||
end)
|
||||
while (< segment-start end)
|
||||
if (= segment-end end)
|
||||
do
|
||||
(setq name
|
||||
(unescape-url-subseq url segment-start segment-end))
|
||||
else
|
||||
collect
|
||||
(unescape-url-subseq url segment-start segment-end)
|
||||
into directory
|
||||
end
|
||||
finally
|
||||
(return (values (cons (if absolute-p :absolute :relative) directory)
|
||||
name))))))
|
||||
|
||||
(defun register-scheme (scheme parser-function)
|
||||
"Registers a new URL scheme."
|
||||
(let ((scheme (string-downcase scheme)))
|
||||
(setf (gethash scheme *scheme-parser-map*) parser-function)))
|
||||
(defclass url-query-mixin ()
|
||||
((query-arguments :initarg :query-arguments :initform nil
|
||||
:reader url-query-arguments)))
|
||||
|
||||
(defun unregister-scheme (scheme)
|
||||
"Unregisters an URL scheme."
|
||||
(let ((scheme (string-downcase scheme)))
|
||||
(remhash scheme *scheme-parser-map*)))
|
||||
(defgeneric url-query-string (url))
|
||||
|
||||
(defun reset-schemes ()
|
||||
"Clear the list of recognized schemes."
|
||||
(clrhash *scheme-parser-map*))
|
||||
(defmethod url-query-string ((url url-query-mixin))
|
||||
(format nil "~@[?~{~A=~A~^&~}~]"
|
||||
(loop for (key . value) in (url-query-arguments url)
|
||||
collect (escape-url-string key)
|
||||
collect (escape-url-string value))))
|
||||
|
||||
;;; Parsing of URLs
|
||||
|
||||
(defun parse-url-from-string (string &optional clear-fragment)
|
||||
(let* ((string (if (and clear-fragment
|
||||
(position +url-fragment-separator+
|
||||
string))
|
||||
(subseq string 0
|
||||
(position +url-fragment-separator+
|
||||
string))
|
||||
string))
|
||||
(scheme-end (position +url-scheme-separator+ string))
|
||||
(scheme (if scheme-end
|
||||
(string-downcase
|
||||
(subseq string 0 scheme-end))
|
||||
*default-url-scheme*))
|
||||
(parser (gethash scheme *scheme-parser-map* nil)))
|
||||
(unless parser
|
||||
(error 'url-unknown-scheme-error :scheme scheme :url-string string))
|
||||
(funcall parser scheme string (if scheme-end (1+ scheme-end) 0))))
|
||||
(defun parse-query-part (url start end)
|
||||
(loop for key-start = start then (min end (1+ pair-end))
|
||||
for pair-end = (or (position +url-query-pair-separator+ url
|
||||
:start key-start :end end)
|
||||
end)
|
||||
for key-end = (position +url-query-key-separator+ url
|
||||
:start key-start :end pair-end)
|
||||
while (< key-start end)
|
||||
do
|
||||
(unless key-end
|
||||
(error 'url-simple-syntax-error :url-string url
|
||||
:reason "Malformed query key-value pair"))
|
||||
collect
|
||||
(cons (unescape-url-subseq url key-start key-end)
|
||||
(unescape-url-subseq url (1+ key-end) pair-end))))
|
||||
|
||||
;;; The specific URL schemes:
|
||||
|
||||
(defclass url-http (url url-host-port-mixin url-path-mixin url-search-mixin)
|
||||
((scheme :allocation :class :initform "http" :reader url-scheme)))
|
||||
(defclass url-http (url url-host-port-mixin url-path-mixin url-query-mixin)
|
||||
((scheme :initform "http")))
|
||||
|
||||
(defmethod url-specific-part ((url url-http))
|
||||
(format nil "~A~@[?~A~]"
|
||||
(url-significant-string url)
|
||||
(url-searchpart url)))
|
||||
(defmethod url-specific-string ((url url-http))
|
||||
(concatenate 'string
|
||||
(url-authority-string url)
|
||||
(url-path-string url)
|
||||
(url-query-string url)))
|
||||
|
||||
(defmethod url-significant-string ((url url-http))
|
||||
(format nil "~:[~2*~;//~A~@[:~A~]/~]~@[~A~]"
|
||||
(or (url-host url) (url-port url))
|
||||
(url-host url)
|
||||
(url-port url)
|
||||
(url-path url)))
|
||||
(concatenate 'string
|
||||
(url-authority-string url)
|
||||
(url-path-string url)))
|
||||
|
||||
(defun url-http-parser (scheme url start)
|
||||
(declare (ignore scheme))
|
||||
(let ((rest (subseq url start))
|
||||
host port path searchpart)
|
||||
(when (and (> (length rest) 1) (equal "//" (subseq rest 0 2)))
|
||||
(let* ((rest-pos (position +url-path-separator+ rest :start 2))
|
||||
(port-pos (position +url-host-port-separator+ rest :start 2
|
||||
:end rest-pos)))
|
||||
(setq host (subseq rest 2 (or port-pos rest-pos)))
|
||||
(when port-pos
|
||||
(setq port (subseq rest (1+ port-pos) rest-pos)))
|
||||
(setq rest (subseq rest (1+ rest-pos)))))
|
||||
(let ((searchpart-pos (position +url-query-separator+ rest)))
|
||||
(if searchpart-pos
|
||||
(setq searchpart (subseq rest (1+ searchpart-pos))
|
||||
path (subseq rest 0 searchpart-pos))
|
||||
(setq path rest)))
|
||||
(make-instance 'url-http
|
||||
:source-string url
|
||||
:host host :port port :path path
|
||||
:searchpart searchpart)))
|
||||
(defun url-http-parser (scheme url start end base-url)
|
||||
(let ((host nil)
|
||||
(port nil)
|
||||
(directory nil)
|
||||
(name nil)
|
||||
(query nil))
|
||||
;; Check for relpath here, because later on we can't reliably
|
||||
;; discriminate a valid relpath from a mal-formed abs-path
|
||||
(when (and (char/= (schar url start) +url-path-separator+)
|
||||
scheme)
|
||||
(error 'url-simple-syntax-error :url-string url
|
||||
:reason "Relative path component in Absolute-URI"))
|
||||
;; Authority
|
||||
(when (and (> (- end start) 2)
|
||||
(string= url +url-authority-indicator+
|
||||
:start1 start :end1 (+ start 2)))
|
||||
(multiple-value-setq (host port start)
|
||||
(parse-host-port-part url (+ start 2) end)))
|
||||
;; Query
|
||||
(let ((query-pos (position +url-query-separator+ url
|
||||
:start start :end end)))
|
||||
(when query-pos
|
||||
(setq query (parse-query-part url (1+ query-pos) end)
|
||||
end query-pos)))
|
||||
;; Path is the rest.
|
||||
(multiple-value-setq (directory name)
|
||||
(parse-path-part url start end))
|
||||
;; Create URL object, possibly merge with base-url
|
||||
(let ((result
|
||||
(make-instance 'url-http
|
||||
:source-string url
|
||||
:scheme scheme
|
||||
:host host :port port
|
||||
:directory directory
|
||||
:name name
|
||||
:query-arguments query)))
|
||||
(if base-url
|
||||
(merge-urls result base-url)
|
||||
result))))
|
||||
|
||||
(register-scheme "http" #'url-http-parser)
|
||||
|
||||
(defmethod merge-urls ((a url-http) (b url-http))
|
||||
(let* ((new-scheme (or (url-scheme a) (url-scheme b)))
|
||||
(authority-p (url-authority-p a))
|
||||
(new-host (if authority-p (url-host a) (url-host b)))
|
||||
(new-port (if authority-p (url-port a) (url-port b)))
|
||||
(directory-a (url-directory a))
|
||||
(directory-b (url-directory b))
|
||||
(new-name (or (url-name a) (url-name b)))
|
||||
(new-directory
|
||||
(cond
|
||||
((null directory-a)
|
||||
(copy-list directory-b))
|
||||
((eq (first directory-a) :absolute)
|
||||
(copy-list directory-a))
|
||||
(t
|
||||
(loop with new-directory = (reverse (rest directory-b))
|
||||
for segment in (rest directory-a)
|
||||
do
|
||||
(cond
|
||||
((string= segment "..")
|
||||
(pop new-directory))
|
||||
((string= segment ".")
|
||||
t)
|
||||
(t
|
||||
(push segment new-directory)))
|
||||
finally
|
||||
(return (cons :absolute (nreverse new-directory))))))))
|
||||
(make-instance 'url-http :source-string nil
|
||||
:scheme new-scheme
|
||||
:host new-host :port new-port
|
||||
:directory new-directory
|
||||
:name new-name)))
|
||||
|
||||
Reference in New Issue
Block a user