r10882: fix delimited-to-string and parse-uri to correspond to franz' code

This commit is contained in:
Kevin M. Rosenberg
2006-01-27 03:18:00 +00:00
parent ec30187f4b
commit 296f1e6510
4 changed files with 76 additions and 47 deletions

7
debian/changelog vendored
View File

@ -1,3 +1,10 @@
cl-puri (1.4-1) unstable; urgency=low
* New upstream: no longer depend on simple strings; fix bugs to correspond
to Franz delimited-string-to-list and parse-uri
-- Kevin M. Rosenberg <kmr@debian.org> Thu, 26 Jan 2006 15:54:30 -0700
cl-puri (1.3.1.3-1) unstable; urgency=low cl-puri (1.3.1.3-1) unstable; urgency=low
* New upstream * New upstream

View File

@ -28,3 +28,6 @@
(or (funcall (intern (symbol-name '#:do-tests) (or (funcall (intern (symbol-name '#:do-tests)
(find-package :puri-tests))) (find-package :puri-tests)))
(error "test-op failed"))) (error "test-op failed")))
(defmethod operation-done-p ((o test-op) (c (eql (find-system 'puri-tests))))
(values nil))

103
src.lisp
View File

@ -88,7 +88,7 @@
#+allegro #+allegro
(excl::.primcall 'sys::shrink-svector str size) (excl::.primcall 'sys::shrink-svector str size)
#+sbcl #+sbcl
(sb-kernel:shrink-vector str size) (setq str (sb-kernel:shrink-vector str size))
#+cmu #+cmu
(lisp::shrink-vector str size) (lisp::shrink-vector str size)
#+lispworks #+lispworks
@ -126,17 +126,17 @@
excl:if*))) excl:if*)))
#-allegro #-allegro
(defun position-char (char string start max) (defmethod position-char (char (string string) start max)
(declare (optimize (speed 3) (safety 0) (space 0)) (declare (optimize (speed 3) (safety 0) (space 0))
(fixnum start max) (simple-string string)) (fixnum start max) (string string))
(do* ((i start (1+ i))) (do* ((i start (1+ i)))
((= i max) nil) ((= i max) nil)
(declare (fixnum i)) (declare (fixnum i))
(when (char= char (schar string i)) (return i)))) (when (char= char (char string i)) (return i))))
#-allegro #-allegro
(defun delimited-string-to-list (string &optional (separator #\space) (defun delimited-string-to-list (string &optional (separator #\space)
skip-terminal) skip-terminal)
(declare (optimize (speed 3) (safety 0) (space 0) (declare (optimize (speed 3) (safety 0) (space 0)
(compilation-speed 0)) (compilation-speed 0))
(type string string) (type string string)
@ -149,9 +149,9 @@
((null end) ((null end)
(if (< pos len) (if (< pos len)
(push (subseq string pos) output) (push (subseq string pos) output)
(when (or (not skip-terminal) (zerop len)) (when (and (plusp len) (not skip-terminal))
(push "" output))) (push "" output)))
(nreverse output)) (nreverse output))
(declare (type fixnum pos len) (declare (type fixnum pos len)
(type (or null fixnum) end)) (type (or null fixnum) end))
(push (subseq string pos end) output) (push (subseq string pos end) output)
@ -549,7 +549,7 @@
(setq res (setq res
(loop (loop
(when (>= start end) (return nil)) (when (>= start end) (return nil))
(setq c (schar string start)) (setq c (char string start))
(let ((ci (char-int c))) (let ((ci (char-int c)))
(if* legal-chars (if* legal-chars
then (if* (and (eq :colon kind) (eq c #\:)) then (if* (and (eq :colon kind) (eq c #\:))
@ -726,7 +726,7 @@ URI ~s contains illegal character ~s at position ~d."
(return (return
(values (values
scheme host port scheme host port
(apply #'concatenate 'simple-string (nreverse path-components)) (apply #'concatenate 'string (nreverse path-components))
query fragment))) query fragment)))
;; URN parsing: ;; URN parsing:
(15 ;; seen urn:, read nid now (15 ;; seen urn:, read nid now
@ -755,7 +755,7 @@ URI ~s contains illegal character ~s at position ~d."
(max (the fixnum (length string)))) (max (the fixnum (length string))))
((= i max) nil) ((= i max) nil)
(declare (fixnum i max)) (declare (fixnum i max))
(when (char= #\% (schar string i)) (when (char= #\% (char string i))
(return t)))) (return t))))
(defun parse-path (path-string escape) (defun parse-path (path-string escape)
@ -769,19 +769,23 @@ URI ~s contains illegal character ~s at position ~d."
(pl (cdr path-list) (cdr pl)) (pl (cdr path-list) (cdr pl))
segments) segments)
((null pl) path-list) ((null pl) path-list)
(if* (cdr (setq segments (delimited-string-to-list (car pl) #\;)))
(if* (cdr (setq segments
(if* (string= "" (car pl))
then '("")
else (delimited-string-to-list (car pl) #\;))))
then ;; there is a param then ;; there is a param
;;; (setf (car pl) segments)
(setf (car pl) (setf (car pl)
(mapcar #'(lambda (s) (mapcar #'(lambda (s)
(decode-escaped-encoding (decode-escaped-encoding s escape
s escape *reserved-path-characters2*)) ;; decode all %xx:
segments)) nil))
segments))
else ;; no param else ;; no param
;;; (setf (car pl) (car segments))
(setf (car pl) (setf (car pl)
(decode-escaped-encoding (decode-escaped-encoding (car segments) escape
(car segments) escape *reserved-path-characters2*))))) ;; decode all %xx:
nil)))))
(defun decode-escaped-encoding (string escape (defun decode-escaped-encoding (string escape
&optional (reserved-chars &optional (reserved-chars
@ -795,26 +799,27 @@ URI ~s contains illegal character ~s at position ~d."
ch ch2 chc chc2) ch ch2 chc chc2)
((= i max) ((= i max)
(shrink-vector new-string new-i)) (shrink-vector new-string new-i))
(if* (char= #\% (setq ch (schar string i))) (if* (char= #\% (setq ch (char string i)))
then (when (> (+ i 3) max) then (when (> (+ i 3) max)
(.parse-error (.parse-error
"Unsyntactic escaped encoding in ~s." string)) "Unsyntactic escaped encoding in ~s." string))
(setq ch (schar string (incf i))) (setq ch (char string (incf i)))
(setq ch2 (schar string (incf i))) (setq ch2 (char string (incf i)))
(when (not (and (setq chc (digit-char-p ch 16)) (when (not (and (setq chc (digit-char-p ch 16))
(setq chc2 (digit-char-p ch2 16)))) (setq chc2 (digit-char-p ch2 16))))
(.parse-error (.parse-error
"Non-hexidecimal digits after %: %c%c." ch ch2)) "Non-hexidecimal digits after %: %c%c." ch ch2))
(let ((ci (+ (* 16 chc) chc2))) (let ((ci (+ (* 16 chc) chc2)))
(if* (or (null reserved-chars) (if* (or (null reserved-chars)
(= 0 (sbit reserved-chars ci))) (and (< ci (length reserved-chars))
(= 0 (sbit reserved-chars ci))))
then ;; ok as is then ;; ok as is
(setf (schar new-string new-i) (setf (char new-string new-i)
(code-char ci)) (code-char ci))
else (setf (schar new-string new-i) #\%) else (setf (char new-string new-i) #\%)
(setf (schar new-string (incf new-i)) ch) (setf (char new-string (incf new-i)) ch)
(setf (schar new-string (incf new-i)) ch2))) (setf (char new-string (incf new-i)) ch2)))
else (setf (schar new-string new-i) ch)))) else (setf (char new-string new-i) ch))))
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;; Printing ;;;; Printing
@ -830,7 +835,7 @@ URI ~s contains illegal character ~s at position ~d."
(path (uri-path uri)) (path (uri-path uri))
(query (uri-query uri)) (query (uri-query uri))
(fragment (uri-fragment uri))) (fragment (uri-fragment uri)))
(concatenate 'simple-string (concatenate 'string
(when scheme (when scheme
(encode-escaped-encoding (encode-escaped-encoding
(string-downcase ;; for upper case lisps (string-downcase ;; for upper case lisps
@ -866,7 +871,7 @@ URI ~s contains illegal character ~s at position ~d."
(pl (cdr path-list) (cdr pl)) (pl (cdr path-list) (cdr pl))
(pe (car pl) (car pl))) (pe (car pl) (car pl)))
((null pl) ((null pl)
(when res (apply #'concatenate 'simple-string (nreverse res)))) (when res (apply #'concatenate 'string (nreverse res))))
(when (or (null first) (when (or (null first)
(prog1 (eq :absolute first) (prog1 (eq :absolute first)
(setq first nil))) (setq first nil)))
@ -891,7 +896,7 @@ URI ~s contains illegal character ~s at position ~d."
(setf (uri-string urn) (setf (uri-string urn)
(let ((nid (urn-nid urn)) (let ((nid (urn-nid urn))
(nss (urn-nss urn))) (nss (urn-nss urn)))
(concatenate 'simple-string "urn:" nid ":" nss)))) (concatenate 'string "urn:" nid ":" nss))))
(if* stream (if* stream
then (format stream "~a" (uri-string urn)) then (format stream "~a" (uri-string urn))
else (uri-string urn))) else (uri-string urn)))
@ -900,8 +905,6 @@ URI ~s contains illegal character ~s at position ~d."
(vector #\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9 #\a #\b #\c #\d #\e #\f)) (vector #\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9 #\a #\b #\c #\d #\e #\f))
(defun encode-escaped-encoding (string reserved-chars escape) (defun encode-escaped-encoding (string reserved-chars escape)
(unless (typep string 'simple-string)
(setq string (coerce string 'simple-string)))
(when (null escape) (return-from encode-escaped-encoding string)) (when (null escape) (return-from encode-escaped-encoding string))
;; Make a string as big as it possibly needs to be (3 times the original ;; Make a string as big as it possibly needs to be (3 times the original
;; size), and truncate it at the end. ;; size), and truncate it at the end.
@ -913,18 +916,18 @@ URI ~s contains illegal character ~s at position ~d."
c ci) c ci)
((= i max) ((= i max)
(shrink-vector new-string (incf new-i))) (shrink-vector new-string (incf new-i)))
(setq ci (char-int (setq c (schar string i)))) (setq ci (char-int (setq c (char string i))))
(if* (or (null reserved-chars) (if* (or (null reserved-chars)
(> ci 127) (> ci 127)
(= 0 (sbit reserved-chars ci))) (= 0 (sbit reserved-chars ci)))
then ;; ok as is then ;; ok as is
(incf new-i) (incf new-i)
(setf (schar new-string new-i) c) (setf (char new-string new-i) c)
else ;; need to escape it else ;; need to escape it
(multiple-value-bind (q r) (truncate ci 16) (multiple-value-bind (q r) (truncate ci 16)
(setf (schar new-string (incf new-i)) #\%) (setf (char new-string (incf new-i)) #\%)
(setf (schar new-string (incf new-i)) (elt *escaped-encoding* q)) (setf (char new-string (incf new-i)) (elt *escaped-encoding* q))
(setf (schar new-string (incf new-i)) (setf (char new-string (incf new-i))
(elt *escaped-encoding* r)))))) (elt *escaped-encoding* r))))))
(defmethod print-object ((uri uri) stream) (defmethod print-object ((uri uri) stream)
@ -949,12 +952,10 @@ URI ~s contains illegal character ~s at position ~d."
(defmethod merge-uris ((uri string) (base uri) &optional place) (defmethod merge-uris ((uri string) (base uri) &optional place)
(merge-uris (parse-uri uri) base place)) (merge-uris (parse-uri uri) base place))
(defmethod merge-uris ((uri uri) (base uri) &optional place) (defmethod merge-uris ((uri uri) (base uri) &optional place)
;; The following is from ;; See ../doc/rfc2396.txt for info on the algorithm we use to merge
;; http://info.internet.isi.edu/in-notes/rfc/files/rfc2396.txt ;; URIs.
;; and is algorithm we use to merge URIs.
;;
;; For more information, see section 5.2 of the RFC.
;; ;;
(tagbody (tagbody
;;;; step 2 ;;;; step 2
@ -985,6 +986,18 @@ URI ~s contains illegal character ~s at position ~d."
;;;; step 5 ;;;; step 5
(let ((p (uri-parsed-path uri))) (let ((p (uri-parsed-path uri)))
;; bug13133:
;; The following form causes our implementation to be at odds with
;; RFC 2396, however this is apparently what was intended by the
;; authors of the RFC. Specifically, (merge-uris "?y" "/foo")
;; should return #<uri /foo?y> instead of #<uri ?y>, according to
;; this:
;;; http://www.apache.org/~fielding/uri/rev-2002/issues.html#003-relative-query
(when (null p)
(setf (uri-path uri) (uri-path base))
(go :done))
(when (and p (eq :absolute (car p))) (when (and p (eq :absolute (car p)))
(when (equal '(:absolute "") p) (when (equal '(:absolute "") p)
;; Canonicalize the way parsing does: ;; Canonicalize the way parsing does:
@ -1245,8 +1258,8 @@ URI ~s contains illegal character ~s at position ~d."
(state :char) (state :char)
c1 c2) c1 c2)
((= i len) t) ((= i len) t)
(setq c1 (schar nss1 i)) (setq c1 (char nss1 i))
(setq c2 (schar nss2 i)) (setq c2 (char nss2 i))
(ecase state (ecase state
(:char (:char
(if* (and (char= #\% c1) (char= #\% c2)) (if* (and (char= #\% c1) (char= #\% c2))

View File

@ -40,7 +40,10 @@
("g/" "http://a/b/c/g/" ,base-uri) ("g/" "http://a/b/c/g/" ,base-uri)
("/g" "http://a/g" ,base-uri) ("/g" "http://a/g" ,base-uri)
("//g" "http://g" ,base-uri) ("//g" "http://g" ,base-uri)
("?y" "http://a/b/c/?y" ,base-uri) ;; Following was changed from appendix C of RFC 2396
;; http://www.apache.org/~fielding/uri/rev-2002/issues.html#003-relative-query
#-ignore ("?y" "http://a/b/c/d;p?y" ,base-uri)
#+ignore ("?y" "http://a/b/c/?y" ,base-uri)
("g?y" "http://a/b/c/g?y" ,base-uri) ("g?y" "http://a/b/c/g?y" ,base-uri)
("#s" "http://a/b/c/d;p?q#s" ,base-uri) ("#s" "http://a/b/c/d;p?q#s" ,base-uri)
("g#s" "http://a/b/c/g#s" ,base-uri) ("g#s" "http://a/b/c/g#s" ,base-uri)
@ -180,6 +183,9 @@
(push `(test "%20" (format nil "~a" (parse-uri "%20")) (push `(test "%20" (format nil "~a" (parse-uri "%20"))
:test 'string=) :test 'string=)
res) res)
(push `(test "%FF" (format nil "~a" (parse-uri "%FF"))
:test 'string=)
res) ;Value 255 outside reserved-chars vector (128 bits)
(push `(test "&" (format nil "~a" (parse-uri "%26")) (push `(test "&" (format nil "~a" (parse-uri "%26"))
:test 'string=) :test 'string=)
res) res)