Files
PURI/src.lisp
2003-07-18 21:00:54 +00:00

1372 lines
42 KiB
Common Lisp

;; -*- mode: common-lisp; package: puri -*-
;; Support for URIs in Allegro.
;; For general URI information see RFC2396.
;;
;; copyright (c) 1999-2001 Franz Inc, Berkeley, CA - All rights reserved.
;; copyright (c) 2003 Kevin Rosenberg (porting changes)
;;
;; The software, data and information contained herein are proprietary
;; to, and comprise valuable trade secrets of, Franz, Inc. They are
;; given in confidence by Franz, Inc. pursuant to a written license
;; agreement, and may be stored and used only in accordance with the terms
;; of such license.
;;
;; Restricted Rights Legend
;; ------------------------
;; Use, duplication, and disclosure of the software, data and information
;; contained herein by any agency, department or entity of the U.S.
;; Government are subject to restrictions of Restricted Rights for
;; Commercial Software developed at private expense as specified in
;; DOD FAR Supplement 52.227-7013 (c) (1) (ii), as applicable.
;;
;; Original version from ACL 6.1:
;; uri.cl,v 2.3.6.4.2.1 2001/08/09 17:42:39 layer
;;
;; $Id: src.lisp,v 1.3 2003/07/18 21:00:54 kevin Exp $
(defpackage #:puri
(:use #:cl)
(:export
#:uri ; the type and a function
#:uri-p
#:copy-uri
#:uri-scheme ; and slots
#:uri-host #:uri-port
#:uri-path
#:uri-query
#:uri-fragment
#:uri-plist
#:uri-authority ; pseudo-slot accessor
#:urn ; class
#:urn-nid ; pseudo-slot accessor
#:urn-nss ; pseudo-slot accessor
#:*strict-parse*
#:parse-uri
#:merge-uris
#:enough-uri
#:uri-parsed-path
#:render-uri
#:make-uri-space ; interning...
#:uri-space
#:uri=
#:intern-uri
#:unintern-uri
#:do-all-uris))
(in-package #:puri)
(eval-when (compile) (declaim (optimize (speed 3))))
(eval-when (:compile-toplevel :load-toplevel :execute)
(defvar if*-keyword-list '("then" "thenret" "else" "elseif")))
#-allegro
(define-condition parse-error (error) ())
(defun .parse-error (fmt &rest args)
#+allegro (apply #'excl::.parse-error fmt args)
#-allegro (error
(make-condition 'parse-error :format-control fmt
:format-arguments args)))
(defun internal-reader-error (stream fmt &rest args)
#+allegro
(apply #'excl::internal-reader-error stream fmt args)
#-allegro
(apply #'format stream
"#u takes a string or list argument: ~s" args))
#-allegro (defvar *current-case-mode* :case-insensitive-upper)
(defun position-char (char string start max)
(declare (optimize (speed 3) (safety 0) (space 0))
(fixnum start max) (simple-string string))
(do* ((i start (1+ i)))
((= i max) nil)
(declare (fixnum i))
(when (char= char (schar string i)) (return i))))
#+allegro
(defun delimited-string-to-list (string &optional (separator #\space))
(excl:delimited-string-to-list string))
(defun delimited-string-to-list (string &optional (separator #\space)
skip-terminal)
(declare (optimize (speed 3) (safety 0) (space 0)
(compilation-speed 0))
(type string string)
(type character separator))
(do* ((len (length string))
(output '())
(pos 0)
(end (position-char separator string pos len)
(position-char separator string pos len)))
((null end)
(if (< pos len)
(push (subseq string pos) output)
(when (or (not skip-terminal) (zerop len))
(push "" output)))
(nreverse output))
(declare (type fixnum pos len)
(type (or null fixnum) end))
(push (subseq string pos end) output)
(setq pos (1+ end))))
(defmacro if* (&rest args)
(do ((xx (reverse args) (cdr xx))
(state :init)
(elseseen nil)
(totalcol nil)
(lookat nil nil)
(col nil))
((null xx)
(cond ((eq state :compl)
`(cond ,@totalcol))
(t (error "if*: illegal form ~s" args))))
(cond ((and (symbolp (car xx))
(member (symbol-name (car xx))
if*-keyword-list
:test #'string-equal))
(setq lookat (symbol-name (car xx)))))
(cond ((eq state :init)
(cond (lookat (cond ((string-equal lookat "thenret")
(setq col nil
state :then))
(t (error
"if*: bad keyword ~a" lookat))))
(t (setq state :col
col nil)
(push (car xx) col))))
((eq state :col)
(cond (lookat
(cond ((string-equal lookat "else")
(cond (elseseen
(error
"if*: multiples elses")))
(setq elseseen t)
(setq state :init)
(push `(t ,@col) totalcol))
((string-equal lookat "then")
(setq state :then))
(t (error "if*: bad keyword ~s"
lookat))))
(t (push (car xx) col))))
((eq state :then)
(cond (lookat
(error
"if*: keyword ~s at the wrong place " (car xx)))
(t (setq state :compl)
(push `(,(car xx) ,@col) totalcol))))
((eq state :compl)
(cond ((not (string-equal lookat "elseif"))
(error "if*: missing elseif clause ")))
(setq state :init)))))
(defclass uri ()
(
;;;; external:
(scheme :initarg :scheme :initform nil :accessor uri-scheme)
(host :initarg :host :initform nil :accessor uri-host)
(port :initarg :port :initform nil :accessor uri-port)
(path :initarg :path :initform nil :accessor uri-path)
(query :initarg :query :initform nil :accessor uri-query)
(fragment :initarg :fragment :initform nil :accessor uri-fragment)
(plist :initarg :plist :initform nil :accessor uri-plist)
;;;; internal:
(escaped
;; used to prevent unnessary work, looking for chars to escape and
;; unescape.
:initarg :escaped :initform nil :accessor uri-escaped)
(string
;; the cached printable representation of the URI. It *might* be
;; different than the original string, though, because the user might
;; have escaped non-reserved chars--they won't be escaped when the URI
;; is printed.
:initarg :string :initform nil :accessor uri-string)
(parsed-path
;; the cached parsed representation of the URI path.
:initarg :parsed-path
:initform nil
:accessor .uri-parsed-path)
(hashcode
;; cached sxhash, so we don't have to compute it more than once.
:initarg :hashcode :initform nil :accessor uri-hashcode)))
(defclass urn (uri)
((nid :initarg :nid :initform nil :accessor urn-nid)
(nss :initarg :nss :initform nil :accessor urn-nss)))
(eval-when (compile eval)
(defmacro clear-caching-on-slot-change (name)
`(defmethod (setf ,name) :around (new-value (self uri))
(declare (ignore new-value))
(prog1 (call-next-method)
(setf (uri-string self) nil)
,@(when (eq name 'uri-path) `((setf (.uri-parsed-path self) nil)))
(setf (uri-hashcode self) nil))))
)
(clear-caching-on-slot-change uri-scheme)
(clear-caching-on-slot-change uri-host)
(clear-caching-on-slot-change uri-port)
(clear-caching-on-slot-change uri-path)
(clear-caching-on-slot-change uri-query)
(clear-caching-on-slot-change uri-fragment)
(defmethod make-load-form ((self uri) &optional env)
(declare (ignore env))
`(make-instance ',(class-name (class-of self))
:scheme ,(uri-scheme self)
:host ,(uri-host self)
:port ,(uri-port self)
:path ',(uri-path self)
:query ,(uri-query self)
:fragment ,(uri-fragment self)
:plist ',(uri-plist self)
:string ,(uri-string self)
:parsed-path ',(.uri-parsed-path self)))
(defmethod uri-p ((thing uri)) t)
(defmethod uri-p ((thing t)) nil)
(defun copy-uri (uri
&key place
(scheme (when uri (uri-scheme uri)))
(host (when uri (uri-host uri)))
(port (when uri (uri-port uri)))
(path (when uri (uri-path uri)))
(parsed-path
(when uri (copy-list (.uri-parsed-path uri))))
(query (when uri (uri-query uri)))
(fragment (when uri (uri-fragment uri)))
(plist (when uri (copy-list (uri-plist uri))))
(class (when uri (class-of uri)))
&aux (escaped (when uri (uri-escaped uri))))
(if* place
then (setf (uri-scheme place) scheme)
(setf (uri-host place) host)
(setf (uri-port place) port)
(setf (uri-path place) path)
(setf (.uri-parsed-path place) parsed-path)
(setf (uri-query place) query)
(setf (uri-fragment place) fragment)
(setf (uri-plist place) plist)
(setf (uri-escaped place) escaped)
(setf (uri-string place) nil)
(setf (uri-hashcode place) nil)
place
elseif (eq 'uri class)
then ;; allow the compiler to optimize the call to make-instance:
(make-instance 'uri
:scheme scheme :host host :port port :path path
:parsed-path parsed-path
:query query :fragment fragment :plist plist
:escaped escaped :string nil :hashcode nil)
else (make-instance class
:scheme scheme :host host :port port :path path
:parsed-path parsed-path
:query query :fragment fragment :plist plist
:escaped escaped :string nil :hashcode nil)))
(defmethod uri-parsed-path ((uri uri))
(when (uri-path uri)
(when (null (.uri-parsed-path uri))
(setf (.uri-parsed-path uri)
(parse-path (uri-path uri) (uri-escaped uri))))
(.uri-parsed-path uri)))
(defmethod (setf uri-parsed-path) (path-list (uri uri))
(assert (and (consp path-list)
(or (member (car path-list) '(:absolute :relative)
:test #'eq))))
(setf (uri-path uri) (render-parsed-path path-list t))
(setf (.uri-parsed-path uri) path-list)
path-list)
(defun uri-authority (uri)
(when (uri-host uri)
(let ((*print-pretty* nil))
(format nil "~a~@[:~a~]" (uri-host uri) (uri-port uri)))))
(defun uri-nid (uri)
(if* (equalp "urn" (uri-scheme uri))
then (uri-host uri)
else (error "URI is not a URN: ~s." uri)))
(defun uri-nss (uri)
(if* (equalp "urn" (uri-scheme uri))
then (uri-path uri)
else (error "URI is not a URN: ~s." uri)))
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Parsing
(defparameter *excluded-characters*
'(;; `delims' (except #\%, because it's handled specially):
#\< #\> #\" #\space #\#
;; `unwise':
#\{ #\} #\| #\\ #\^ #\[ #\] #\`))
(defun reserved-char-vector (chars &key except)
(do* ((a (make-array 127 :element-type 'bit :initial-element 0))
(chars chars (cdr chars))
(c (car chars) (car chars)))
((null chars) a)
(if* (and except (member c except :test #'char=))
thenret
else (setf (sbit a (char-int c)) 1))))
(defparameter *reserved-characters*
(reserved-char-vector
(append *excluded-characters*
'(#\; #\/ #\? #\: #\@ #\& #\= #\+ #\$ #\, #\%))))
(defparameter *reserved-authority-characters*
(reserved-char-vector
(append *excluded-characters* '(#\; #\/ #\? #\: #\@))))
(defparameter *reserved-path-characters*
(reserved-char-vector
(append *excluded-characters*
'(#\;
;;;;The rfc says this should be here, but it doesn't make sense.
;; #\=
#\/ #\?))))
(defparameter *reserved-path-characters2*
;; These are the same characters that are in
;; *reserved-path-characters*, minus #\/. Why? Because the parsed
;; representation of the path can contain the %2f converted into a /.
;; That's the whole point of having the parsed representation, so that
;; lisp programs can deal with the path element data in the most
;; convenient form.
(reserved-char-vector
(append *excluded-characters*
'(#\;
;;;;The rfc says this should be here, but it doesn't make sense.
;; #\=
#\?))))
(defparameter *reserved-fragment-characters*
(reserved-char-vector (remove #\# *excluded-characters*)))
(eval-when (compile eval)
(defun gen-char-range-list (start end)
(do* ((res '())
(endcode (1+ (char-int end)))
(chcode (char-int start)
(1+ chcode))
(hyphen nil))
((= chcode endcode)
;; - has to be first, otherwise it signifies a range!
(if* hyphen
then (setq res (nreverse res))
(push #\- res)
res
else (nreverse res)))
(if* (= #.(char-int #\-) chcode)
then (setq hyphen t)
else (push (code-char chcode) res))))
)
(defparameter *valid-nid-characters*
(reserved-char-vector
'#.(nconc (gen-char-range-list #\a #\z)
(gen-char-range-list #\A #\Z)
(gen-char-range-list #\0 #\9)
'(#\- #\. #\+))))
(defparameter *reserved-nss-characters*
(reserved-char-vector
(append *excluded-characters* '(#\& #\~ #\/ #\?))))
(defparameter *illegal-characters*
(reserved-char-vector (remove #\# *excluded-characters*)))
(defparameter *strict-illegal-query-characters*
(reserved-char-vector (append '(#\?) (remove #\# *excluded-characters*))))
(defparameter *illegal-query-characters*
(reserved-char-vector
*excluded-characters* :except '(#\^ #\| #\#)))
(defun parse-uri (thing &key (class 'uri) &aux escape)
(when (uri-p thing) (return-from parse-uri thing))
(setq escape (escape-p thing))
(multiple-value-bind (scheme host port path query fragment)
(parse-uri-string thing)
(when scheme
(setq scheme
(intern (funcall
(case *current-case-mode*
((:case-insensitive-upper :case-sensitive-upper)
#'string-upcase)
((:case-insensitive-lower :case-sensitive-lower)
#'string-downcase))
(decode-escaped-encoding scheme escape))
(find-package :keyword))))
(when (and scheme (eq :urn scheme))
(return-from parse-uri
(make-instance 'urn :scheme scheme :nid host :nss path)))
(when host (setq host (decode-escaped-encoding host escape)))
(when port
(setq port (read-from-string port))
(when (not (numberp port)) (error "port is not a number: ~s." port))
(when (not (plusp port))
(error "port is not a positive integer: ~d." port))
(when (eql port (case scheme
(:http 80)
(:https 443)
(:ftp 21)
(:telnet 23)))
(setq port nil)))
(when (or (string= "" path)
(and ;; we canonicalize away a reference to just /:
scheme
(member scheme '(:http :https :ftp) :test #'eq)
(string= "/" path)))
(setq path nil))
(when path
(setq path
(decode-escaped-encoding path escape *reserved-path-characters*)))
(when query (setq query (decode-escaped-encoding query escape)))
(when fragment
(setq fragment
(decode-escaped-encoding fragment escape
*reserved-fragment-characters*)))
(if* (eq 'uri class)
then ;; allow the compiler to optimize the make-instance call:
(make-instance 'uri
:scheme scheme
:host host
:port port
:path path
:query query
:fragment fragment
:escaped escape)
else ;; do it the slow way:
(make-instance class
:scheme scheme
:host host
:port port
:path path
:query query
:fragment fragment
:escaped escape))))
(defmethod uri ((thing uri))
thing)
(defmethod uri ((thing string))
(parse-uri thing))
(defmethod uri ((thing t))
(error "Cannot coerce ~s to a uri." thing))
(defvar *strict-parse* t)
(defun parse-uri-string (string &aux (illegal-chars *illegal-characters*))
(declare (optimize (speed 3)))
;; Speed is important, so use a specialized state machine instead of
;; regular expressions for parsing the URI string. The regexp we are
;; simulating:
;; ^(([^:/?#]+):)?
;; (//([^/?#]*))?
;; ([^?#]*)
;; (\?([^#]*))?
;; (#(.*))?
(let* ((state 0)
(start 0)
(end (length string))
(tokval nil)
(scheme nil)
(host nil)
(port nil)
(path-components '())
(query nil)
(fragment nil)
;; namespace identifier, for urn parsing only:
(nid nil))
(declare (fixnum state start end))
(flet ((read-token (kind &optional legal-chars)
(setq tokval nil)
(if* (>= start end)
then :end
else (let ((sindex start)
(res nil)
c)
(declare (fixnum sindex))
(setq res
(loop
(when (>= start end) (return nil))
(setq c (schar string start))
(let ((ci (char-int c)))
(if* legal-chars
then (if* (and (eq :colon kind) (eq c #\:))
then (return :colon)
elseif (= 0 (sbit legal-chars ci))
then (.parse-error
"~
URI ~s contains illegal character ~s at position ~d."
string c start))
elseif (and (< ci 128)
*strict-parse*
(= 1 (sbit illegal-chars ci)))
then (.parse-error "~
URI ~s contains illegal character ~s at position ~d."
string c start)))
(case kind
(:path (case c
(#\? (return :question))
(#\# (return :hash))))
(:query (case c (#\# (return :hash))))
(:rest)
(t (case c
(#\: (return :colon))
(#\? (return :question))
(#\# (return :hash))
(#\/ (return :slash)))))
(incf start)))
(if* (> start sindex)
then ;; we found some chars
;; before we stopped the parse
(setq tokval (subseq string sindex start))
:string
else ;; immediately stopped at a special char
(incf start)
res))))
(failure (&optional why)
(.parse-error "illegal URI: ~s [~d]~@[: ~a~]"
string state why))
(impossible ()
(.parse-error "impossible state: ~d [~s]" state string)))
(loop
(case state
(0 ;; starting to parse
(ecase (read-token t)
(:colon (failure))
(:question (setq state 7))
(:hash (setq state 8))
(:slash (setq state 3))
(:string (setq state 1))
(:end (setq state 9))))
(1 ;; seen <token><special char>
(let ((token tokval))
(ecase (read-token t)
(:colon (setq scheme token)
(if* (equalp "urn" scheme)
then (setq state 15)
else (setq state 2)))
(:question (push token path-components)
(setq state 7))
(:hash (push token path-components)
(setq state 8))
(:slash (push token path-components)
(push "/" path-components)
(setq state 6))
(:string (failure))
(:end (push token path-components)
(setq state 9)))))
(2 ;; seen <scheme>:
(ecase (read-token t)
(:colon (failure))
(:question (setq state 7))
(:hash (setq state 8))
(:slash (setq state 3))
(:string (setq state 10))
(:end (setq state 9))))
(10 ;; seen <scheme>:<token>
(let ((token tokval))
(ecase (read-token t)
(:colon (failure))
(:question (push token path-components)
(setq state 7))
(:hash (push token path-components)
(setq state 8))
(:slash (push token path-components)
(setq state 6))
(:string (failure))
(:end (push token path-components)
(setq state 9)))))
(3 ;; seen / or <scheme>:/
(ecase (read-token t)
(:colon (failure))
(:question (push "/" path-components)
(setq state 7))
(:hash (push "/" path-components)
(setq state 8))
(:slash (setq state 4))
(:string (push "/" path-components)
(push tokval path-components)
(setq state 6))
(:end (push "/" path-components)
(setq state 9))))
(4 ;; seen [<scheme>:]//
(ecase (read-token t)
(:colon (failure))
(:question (failure))
(:hash (failure))
(:slash (failure))
(:string (setq host tokval)
(setq state 11))
(:end (failure))))
(11 ;; seen [<scheme>:]//<host>
(ecase (read-token t)
(:colon (setq state 5))
(:question (setq state 7))
(:hash (setq state 8))
(:slash (push "/" path-components)
(setq state 6))
(:string (impossible))
(:end (setq state 9))))
(5 ;; seen [<scheme>:]//<host>:
(ecase (read-token t)
(:colon (failure))
(:question (failure))
(:hash (failure))
(:slash (push "/" path-components)
(setq state 6))
(:string (setq port tokval)
(setq state 12))
(:end (failure))))
(12 ;; seen [<scheme>:]//<host>:[<port>]
(ecase (read-token t)
(:colon (failure))
(:question (setq state 7))
(:hash (setq state 8))
(:slash (push "/" path-components)
(setq state 6))
(:string (impossible))
(:end (setq state 9))))
(6 ;; seen /
(ecase (read-token :path)
(:question (setq state 7))
(:hash (setq state 8))
(:string (push tokval path-components)
(setq state 13))
(:end (setq state 9))))
(13 ;; seen path
(ecase (read-token :path)
(:question (setq state 7))
(:hash (setq state 8))
(:string (impossible))
(:end (setq state 9))))
(7 ;; seen ?
(setq illegal-chars
(if* *strict-parse*
then *strict-illegal-query-characters*
else *illegal-query-characters*))
(ecase (prog1 (read-token :query)
(setq illegal-chars *illegal-characters*))
(:hash (setq state 8))
(:string (setq query tokval)
(setq state 14))
(:end (setq state 9))))
(14 ;; query
(ecase (read-token :query)
(:hash (setq state 8))
(:string (impossible))
(:end (setq state 9))))
(8 ;; seen #
(ecase (read-token :rest)
(:string (setq fragment tokval)
(setq state 9))
(:end (setq state 9))))
(9 ;; done
(return
(values
scheme host port
(apply #'concatenate 'simple-string (nreverse path-components))
query fragment)))
;; URN parsing:
(15 ;; seen urn:, read nid now
(case (read-token :colon *valid-nid-characters*)
(:string (setq nid tokval)
(setq state 16))
(t (failure "missing namespace identifier"))))
(16 ;; seen urn:<nid>
(case (read-token t)
(:colon (setq state 17))
(t (failure "missing namespace specific string"))))
(17 ;; seen urn:<nid>:, rest is nss
(return (values scheme
nid
nil
(progn
(setq illegal-chars *reserved-nss-characters*)
(read-token :rest)
tokval))))
(t (.parse-error
"internal error in parse engine, wrong state: ~s." state)))))))
(defun escape-p (string)
(declare (optimize (speed 3)))
(do* ((i 0 (1+ i))
(max (the fixnum (length string))))
((= i max) nil)
(declare (fixnum i max))
(when (char= #\% (schar string i))
(return t))))
(defun parse-path (path-string escape)
(do* ((xpath-list (delimited-string-to-list path-string #\/))
(path-list
(progn
(if* (string= "" (car xpath-list))
then (setf (car xpath-list) :absolute)
else (push :relative xpath-list))
xpath-list))
(pl (cdr path-list) (cdr pl))
segments)
((null pl) path-list)
(if* (cdr (setq segments (delimited-string-to-list (car pl) #\;)))
then ;; there is a param
;;; (setf (car pl) segments)
(setf (car pl)
(mapcar #'(lambda (s)
(decode-escaped-encoding
s escape *reserved-path-characters2*))
segments))
else ;; no param
;;; (setf (car pl) (car segments))
(setf (car pl)
(decode-escaped-encoding
(car segments) escape *reserved-path-characters2*)))))
(defun decode-escaped-encoding (string escape
&optional (reserved-chars
*reserved-characters*))
;; Return a string with the real characters.
(when (null escape) (return-from decode-escaped-encoding string))
(do* ((i 0 (1+ i))
(max (length string))
(new-string (copy-seq string))
(new-i 0 (1+ new-i))
ch ch2 chc chc2)
((= i max)
#+allegro
(excl::.primcall 'sys::shrink-svector new-string new-i)
#+sbcl
(sb-kernel:shrink-vector new-string new-i)
#-(or allegro sbcl)
(subseq new-string 0 new-i)
new-string)
(if* (char= #\% (setq ch (schar string i)))
then (when (> (+ i 3) max)
(.parse-error
"Unsyntactic escaped encoding in ~s." string))
(setq ch (schar string (incf i)))
(setq ch2 (schar string (incf i)))
(when (not (and (setq chc (digit-char-p ch 16))
(setq chc2 (digit-char-p ch2 16))))
(.parse-error
"Non-hexidecimal digits after %: %c%c." ch ch2))
(let ((ci (+ (* 16 chc) chc2)))
(if* (or (null reserved-chars)
(= 0 (sbit reserved-chars ci)))
then ;; ok as is
(setf (schar new-string new-i)
(code-char ci))
else (setf (schar new-string new-i) #\%)
(setf (schar new-string (incf new-i)) ch)
(setf (schar new-string (incf new-i)) ch2)))
else (setf (schar new-string new-i) ch))))
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;; Printing
(defun render-uri (uri stream
&aux (escape (uri-escaped uri))
(*print-pretty* nil))
(when (null (uri-string uri))
(setf (uri-string uri)
(let ((scheme (uri-scheme uri))
(host (uri-host uri))
(port (uri-port uri))
(path (uri-path uri))
(query (uri-query uri))
(fragment (uri-fragment uri)))
(concatenate 'simple-string
(when scheme
(encode-escaped-encoding
(string-downcase ;; for upper case lisps
(symbol-name scheme))
*reserved-characters* escape))
(when scheme ":")
(when host "//")
(when host
(encode-escaped-encoding
host *reserved-authority-characters* escape))
(when port ":")
(when port
;;;; too slow until ACL 6.0:
;;; (format nil "~d" port)
;;; (princ-to-string port)
#-allegro (princ-to-string port)
#+allegro
(with-output-to-string (s)
(excl::maybe-print-fast s port))
)
(when path
(encode-escaped-encoding path
nil
;;*reserved-path-characters*
escape))
(when query "?")
(when query (encode-escaped-encoding query nil escape))
(when fragment "#")
(when fragment (encode-escaped-encoding fragment nil escape))))))
(if* stream
then (format stream "~a" (uri-string uri))
else (uri-string uri)))
(defun render-parsed-path (path-list escape)
(do* ((res '())
(first (car path-list))
(pl (cdr path-list) (cdr pl))
(pe (car pl) (car pl)))
((null pl)
(when res (apply #'concatenate 'simple-string (nreverse res))))
(when (or (null first)
(prog1 (eq :absolute first)
(setq first nil)))
(push "/" res))
(if* (atom pe)
then (push
(encode-escaped-encoding pe *reserved-path-characters* escape)
res)
else ;; contains params
(push (encode-escaped-encoding
(car pe) *reserved-path-characters* escape)
res)
(dolist (item (cdr pe))
(push ";" res)
(push (encode-escaped-encoding
item *reserved-path-characters* escape)
res)))))
(defun render-urn (urn stream
&aux (*print-pretty* nil))
(when (null (uri-string urn))
(setf (uri-string urn)
(let ((nid (urn-nid urn))
(nss (urn-nss urn)))
(concatenate 'simple-string "urn:" nid ":" nss))))
(if* stream
then (format stream "~a" (uri-string urn))
else (uri-string urn)))
(defparameter *escaped-encoding*
(vector #\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9 #\a #\b #\c #\d #\e #\f))
(defun encode-escaped-encoding (string reserved-chars escape)
(when (null escape) (return-from encode-escaped-encoding string))
;; Make a string as big as it possibly needs to be (3 times the original
;; size), and truncate it at the end.
(do* ((max (length string))
(new-max (* 3 max)) ;; worst case new size
(new-string (make-string new-max))
(i 0 (1+ i))
(new-i -1)
c ci)
((= i max)
#+allegro
(excl::.primcall 'sys::shrink-svector new-string (incf new-i))
#+sbcl
(sb-kernel:shrink-vector new-string (incf new-i))
#-(or allegro sbcl)
(subseq new-string 0 (incf new-i))
new-string)
(setq ci (char-int (setq c (schar string i))))
(if* (or (null reserved-chars)
(> ci 127)
(= 0 (sbit reserved-chars ci)))
then ;; ok as is
(incf new-i)
(setf (schar new-string new-i) c)
else ;; need to escape it
(multiple-value-bind (q r) (truncate ci 16)
(setf (schar new-string (incf new-i)) #\%)
(setf (schar new-string (incf new-i)) (elt *escaped-encoding* q))
(setf (schar new-string (incf new-i))
(elt *escaped-encoding* r))))))
(defmethod print-object ((uri uri) stream)
(if* *print-escape*
then (format stream "#<~a ~a>" 'uri (render-uri uri nil))
else (render-uri uri stream)))
(defmethod print-object ((urn urn) stream)
(if* *print-escape*
then (format stream "#<~a ~a>" 'uri (render-urn urn nil))
else (render-urn urn stream)))
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; merging and unmerging
(defmethod merge-uris ((uri string) (base string) &optional place)
(merge-uris (parse-uri uri) (parse-uri base) place))
(defmethod merge-uris ((uri uri) (base string) &optional place)
(merge-uris uri (parse-uri base) place))
(defmethod merge-uris ((uri string) (base uri) &optional place)
(merge-uris (parse-uri uri) base place))
(defmethod merge-uris ((uri uri) (base uri) &optional place)
;; The following is from
;; http://info.internet.isi.edu/in-notes/rfc/files/rfc2396.txt
;; and is algorithm we use to merge URIs.
;;
;; For more information, see section 5.2 of the RFC.
;;
(tagbody
;;;; step 2
(when (and (null (uri-parsed-path uri))
(null (uri-scheme uri))
(null (uri-host uri))
(null (uri-port uri))
(null (uri-query uri)))
(return-from merge-uris
(let ((new (copy-uri base :place place)))
(when (uri-query uri)
(setf (uri-query new) (uri-query uri)))
(when (uri-fragment uri)
(setf (uri-fragment new) (uri-fragment uri)))
new)))
(setq uri (copy-uri uri :place place))
;;;; step 3
(when (uri-scheme uri)
(return-from merge-uris uri))
(setf (uri-scheme uri) (uri-scheme base))
;;;; step 4
(when (uri-host uri) (go :done))
(setf (uri-host uri) (uri-host base))
(setf (uri-port uri) (uri-port base))
;;;; step 5
(let ((p (uri-parsed-path uri)))
(when (and p (eq :absolute (car p)))
(when (equal '(:absolute "") p)
;; Canonicalize the way parsing does:
(setf (uri-path uri) nil))
(go :done)))
;;;; step 6
(let* ((base-path
(or (uri-parsed-path base)
;; needed because we canonicalize away a path of just `/':
'(:absolute "")))
(path (uri-parsed-path uri))
new-path-list)
(when (not (eq :absolute (car base-path)))
(error "Cannot merge ~a and ~a, since latter is not absolute."
uri base))
;; steps 6a and 6b:
(setq new-path-list
(append (butlast base-path)
(if* path then (cdr path) else '(""))))
;; steps 6c and 6d:
(let ((last (last new-path-list)))
(if* (atom (car last))
then (when (string= "." (car last))
(setf (car last) ""))
else (when (string= "." (caar last))
(setf (caar last) ""))))
(setq new-path-list
(delete "." new-path-list :test #'(lambda (a b)
(if* (atom b)
then (string= a b)
else nil))))
;; steps 6e and 6f:
(let ((npl (cdr new-path-list))
index tmp fix-tail)
(setq fix-tail
(string= ".." (let ((l (car (last npl))))
(if* (atom l)
then l
else (car l)))))
(loop
(setq index
(position ".." npl
:test #'(lambda (a b)
(string= a
(if* (atom b)
then b
else (car b))))))
(when (null index) (return))
(when (= 0 index)
;; The RFC says, in 6g, "that the implementation may handle
;; this error by retaining these components in the resolved
;; path, by removing them from the resolved path, or by
;; avoiding traversal of the reference." The examples in C.2
;; imply that we should do the first thing (retain them), so
;; that's what we'll do.
(return))
(if* (= 1 index)
then (setq npl (cddr npl))
else (setq tmp npl)
(dotimes (x (- index 2)) (setq tmp (cdr tmp)))
(setf (cdr tmp) (cdddr tmp))))
(setf (cdr new-path-list) npl)
(when fix-tail (setq new-path-list (nconc new-path-list '("")))))
;; step 6g:
;; don't complain if new-path-list starts with `..'. See comment
;; above about this step.
;; step 6h:
(when (or (equal '(:absolute "") new-path-list)
(equal '(:absolute) new-path-list))
(setq new-path-list nil))
(setf (uri-path uri)
(render-parsed-path new-path-list
;; don't know, so have to assume:
t)))
;;;; step 7
:done
(return-from merge-uris uri)))
(defmethod enough-uri ((uri string) (base string) &optional place)
(enough-uri (parse-uri uri) (parse-uri base) place))
(defmethod enough-uri ((uri uri) (base string) &optional place)
(enough-uri uri (parse-uri base) place))
(defmethod enough-uri ((uri string) (base uri) &optional place)
(enough-uri (parse-uri uri) base place))
(defmethod enough-uri ((uri uri) (base uri) &optional place)
(let ((new-scheme nil)
(new-host nil)
(new-port nil)
(new-parsed-path nil))
(when (or (and (uri-scheme uri)
(not (equalp (uri-scheme uri) (uri-scheme base))))
(and (uri-host uri)
(not (equalp (uri-host uri) (uri-host base))))
(not (equalp (uri-port uri) (uri-port base))))
(return-from enough-uri uri))
(when (null (uri-host uri))
(setq new-host (uri-host base)))
(when (null (uri-port uri))
(setq new-port (uri-port base)))
(when (null (uri-scheme uri))
(setq new-scheme (uri-scheme base)))
;; Now, for the hard one, path.
;; We essentially do here what enough-namestring does.
(do* ((base-path (uri-parsed-path base))
(path (uri-parsed-path uri))
(bp base-path (cdr bp))
(p path (cdr p)))
((or (null bp) (null p))
;; If p is nil, that means we have something like
;; (enough-uri "/foo/bar" "/foo/bar/baz.htm"), so
;; new-parsed-path will be nil.
(when (null bp)
(setq new-parsed-path (copy-list p))
(when (not (symbolp (car new-parsed-path)))
(push :relative new-parsed-path))))
(if* (equal (car bp) (car p))
thenret ;; skip it
else (setq new-parsed-path (copy-list p))
(when (not (symbolp (car new-parsed-path)))
(push :relative new-parsed-path))
(return)))
(let ((new-path
(when new-parsed-path
(render-parsed-path new-parsed-path
;; don't know, so have to assume:
t)))
(new-query (uri-query uri))
(new-fragment (uri-fragment uri))
(new-plist (copy-list (uri-plist uri))))
(if* (and (null new-scheme)
(null new-host)
(null new-port)
(null new-path)
(null new-parsed-path)
(null new-query)
(null new-fragment))
then ;; can't have a completely empty uri!
(copy-uri nil
:class (class-of uri)
:place place
:path "/"
:plist new-plist)
else (copy-uri nil
:class (class-of uri)
:place place
:scheme new-scheme
:host new-host
:port new-port
:path new-path
:parsed-path new-parsed-path
:query new-query
:fragment new-fragment
:plist new-plist)))))
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; support for interning URIs
(defun make-uri-space (&rest keys &key (size 777) &allow-other-keys)
#+allegro
(apply #'make-hash-table :size size
:hash-function 'uri-hash
:test 'uri= :values nil keys)
#-allegro
(apply #'make-hash-table :size size keys))
(defun gethash-uri (uri table)
#+allegro (gethash uri table)
#-allegro
(let* ((hash (uri-hash uri))
(existing (gethash hash table)))
(dolist (u existing)
(when (uri= u uri)
(return-from gethash-uri (values u t))))
(values nil nil)))
(defun puthash-uri (uri table)
#+allegro (excl:puthash-key uri table)
#-allegro
(let ((existing (gethash (uri-hash uri) table)))
(dolist (u existing)
(when (uri= u uri)
(return-from puthash-uri u)))
(setf (gethash (uri-hash uri) table)
(cons uri existing))
uri))
(defun uri-hash (uri)
(if* (uri-hashcode uri)
thenret
else (setf (uri-hashcode uri)
(sxhash
#+allegro
(render-uri uri nil)
#-allegro
(string-downcase
(render-uri uri nil))))))
(defvar *uris* (make-uri-space))
(defun uri-space () *uris*)
(defun (setf uri-space) (new-val)
(setq *uris* new-val))
;; bootstrapping (uri= changed from function to method):
(when (fboundp 'uri=) (fmakunbound 'uri=))
(defmethod uri= ((uri1 uri) (uri2 uri))
(when (not (eq (uri-scheme uri1) (uri-scheme uri2)))
(return-from uri= nil))
;; RFC2396 says: a URL with an explicit ":port", where the port is
;; the default for the scheme, is the equivalent to one where the
;; port is elided. Hmmmm. This means that this function has to be
;; scheme dependent. Grrrr.
(let ((default-port (case (uri-scheme uri1)
(:http 80)
(:https 443)
(:ftp 21)
(:telnet 23))))
(and (equalp (uri-host uri1) (uri-host uri2))
(eql (or (uri-port uri1) default-port)
(or (uri-port uri2) default-port))
(string= (uri-path uri1) (uri-path uri2))
(string= (uri-query uri1) (uri-query uri2))
(string= (uri-fragment uri1) (uri-fragment uri2)))))
(defmethod uri= ((urn1 urn) (urn2 urn))
(when (not (eq (uri-scheme urn1) (uri-scheme urn2)))
(return-from uri= nil))
(and (equalp (urn-nid urn1) (urn-nid urn2))
(urn-nss-equal (urn-nss urn1) (urn-nss urn2))))
(defun urn-nss-equal (nss1 nss2 &aux len)
;; Return t iff the nss values are the same.
;; %2c and %2C are equivalent.
(when (or (null nss1) (null nss2)
(not (= (setq len (length nss1))
(length nss2))))
(return-from urn-nss-equal nil))
(do* ((i 0 (1+ i))
(state :char)
c1 c2)
((= i len) t)
(setq c1 (schar nss1 i))
(setq c2 (schar nss2 i))
(ecase state
(:char
(if* (and (char= #\% c1) (char= #\% c2))
then (setq state :percent+1)
elseif (char/= c1 c2)
then (return nil)))
(:percent+1
(when (char-not-equal c1 c2) (return nil))
(setq state :percent+2))
(:percent+2
(when (char-not-equal c1 c2) (return nil))
(setq state :char)))))
(defmethod intern-uri ((xuri uri) &optional (uri-space *uris*))
(let ((uri (gethash-uri xuri uri-space)))
(if* uri
thenret
else (puthash-uri xuri uri-space))))
(defmethod intern-uri ((uri string) &optional (uri-space *uris*))
(intern-uri (parse-uri uri) uri-space))
(defun unintern-uri (uri &optional (uri-space *uris*))
(if* (eq t uri)
then (clrhash uri-space)
elseif (uri-p uri)
then (remhash uri uri-space)
else (error "bad uri: ~s." uri)))
(defmacro do-all-uris ((var &optional uri-space result-form)
&rest forms
&environment env)
"do-all-uris (var [[uri-space] result-form])
{declaration}* {tag | statement}*
Executes the forms once for each uri with var bound to the current uri"
(let ((f (gensym))
(g-ignore (gensym))
(g-uri-space (gensym))
(body #+allegro (third (excl::parse-body forms env))
#-allegro forms))
`(let ((,g-uri-space (or ,uri-space *uris*)))
(prog nil
(flet ((,f (,var &optional ,g-ignore)
(declare (ignore-if-unused ,var ,g-ignore))
(tagbody ,@body)))
(maphash #',f ,g-uri-space))
(return ,result-form)))))
(defun sharp-u (stream chr arg)
(declare (ignore chr arg))
(let ((arg (read stream nil nil t)))
(if *read-suppress*
nil
(if* (stringp arg)
then (parse-uri arg)
else
(internal-reader-error
stream
"#u takes a string or list argument: ~s" arg)))))
#+allegro
excl::
#+allegro
(locally (declare (special std-lisp-readtable))
(let ((*readtable* std-lisp-readtable))
(set-dispatch-macro-character #\# #\u #'puri::sharp-u)))
#-allegro
(set-dispatch-macro-character #\# #\u #'puri::sharp-u)
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(provide :uri)
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; timings
;; (don't run under emacs with M-x fi:common-lisp)
#+ignore
(defun time-uri-module ()
(declare (optimize (speed 3) (safety 0) (debug 0)))
(let ((uri "http://www.franz.com/a/b;x;y;z/c/foo?bar=baz&xxx#foo")
(uri2 "http://www.franz.com/a/b;x;y;z/c/%2ffoo?bar=baz&xxx#foo"))
(gc t) (gc :tenure) (gc :tenure) (gc :tenure)
(format t "~&;;; starting timing testing 1...~%")
(time (dotimes (i 100000) (parse-uri uri)))
(gc t) (gc :tenure) (gc :tenure) (gc :tenure)
(format t "~&;;; starting timing testing 2...~%")
(let ((uri (parse-uri uri)))
(time (dotimes (i 100000)
;; forces no caching of the printed representation:
(setf (uri-string uri) nil)
(format nil "~a" uri))))
(gc t) (gc :tenure) (gc :tenure) (gc :tenure)
(format t "~&;;; starting timing testing 3...~%")
(time
(progn
(dotimes (i 100000) (parse-uri uri2))
(let ((uri (parse-uri uri)))
(dotimes (i 100000)
;; forces no caching of the printed representation:
(setf (uri-string uri) nil)
(format nil "~a" uri)))))))
;;******** reference output (ultra, modified 5.0.1):
;;; starting timing testing 1...
; cpu time (non-gc) 13,710 msec user, 0 msec system
; cpu time (gc) 600 msec user, 10 msec system
; cpu time (total) 14,310 msec user, 10 msec system
; real time 14,465 msec
; space allocation:
; 1,804,261 cons cells, 7 symbols, 41,628,832 other bytes, 0 static bytes
;;; starting timing testing 2...
; cpu time (non-gc) 27,500 msec user, 0 msec system
; cpu time (gc) 280 msec user, 20 msec system
; cpu time (total) 27,780 msec user, 20 msec system
; real time 27,897 msec
; space allocation:
; 1,900,463 cons cells, 0 symbols, 17,693,712 other bytes, 0 static bytes
;;; starting timing testing 3...
; cpu time (non-gc) 52,290 msec user, 10 msec system
; cpu time (gc) 1,290 msec user, 30 msec system
; cpu time (total) 53,580 msec user, 40 msec system
; real time 54,062 msec
; space allocation:
; 7,800,205 cons cells, 0 symbols, 81,697,496 other bytes, 0 static bytes
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;; after improving decode-escaped-encoding/encode-escaped-encoding:
;;; starting timing testing 1...
; cpu time (non-gc) 14,520 msec user, 0 msec system
; cpu time (gc) 400 msec user, 0 msec system
; cpu time (total) 14,920 msec user, 0 msec system
; real time 15,082 msec
; space allocation:
; 1,800,270 cons cells, 0 symbols, 41,600,160 other bytes, 0 static bytes
;;; starting timing testing 2...
; cpu time (non-gc) 27,490 msec user, 10 msec system
; cpu time (gc) 300 msec user, 0 msec system
; cpu time (total) 27,790 msec user, 10 msec system
; real time 28,025 msec
; space allocation:
; 1,900,436 cons cells, 0 symbols, 17,693,712 other bytes, 0 static bytes
;;; starting timing testing 3...
; cpu time (non-gc) 47,900 msec user, 20 msec system
; cpu time (gc) 920 msec user, 10 msec system
; cpu time (total) 48,820 msec user, 30 msec system
; real time 49,188 msec
; space allocation:
; 3,700,215 cons cells, 0 symbols, 81,707,144 other bytes, 0 static bytes