Grow a buffer for string normalization exponentially.

* dom/dom-builder.lisp (DOM-BUILDER): New slot `text-buffer'.
	(SAX:START-ELEMENT, SAX:END-ELEMENT, SAX:START-CDATA,
	SAX:END-CDATA, SAX:PROCESSING-INSTRUCTION, SAX:COMMENT): Call
	flush-characters.  (SAX:CHARACTERS): Rewritten.
	(FLUSH-CHARACTERS): New, based on the old sax:characters.

	* dom/dom-impl.lisp ((initialize-instance :after entity-reference)):
	Call flush-characters.
This commit is contained in:
dlichteblau
2007-10-03 15:17:08 +00:00
parent a2990d65ce
commit e7884fc9f7
2 changed files with 42 additions and 18 deletions

View File

@ -18,7 +18,8 @@
(defclass dom-builder () (defclass dom-builder ()
((document :initform nil :accessor document) ((document :initform nil :accessor document)
(element-stack :initform '() :accessor element-stack) (element-stack :initform '() :accessor element-stack)
(internal-subset :accessor internal-subset))) (internal-subset :accessor internal-subset)
(text-buffer :initform nil :accessor text-buffer)))
(defun make-dom-builder () (defun make-dom-builder ()
(make-instance 'dom-builder)) (make-instance 'dom-builder))
@ -87,6 +88,7 @@
(defmethod sax:start-element (defmethod sax:start-element
((handler dom-builder) namespace-uri local-name qname attributes) ((handler dom-builder) namespace-uri local-name qname attributes)
(check-type qname rod) ;catch recoder/builder mismatch (check-type qname rod) ;catch recoder/builder mismatch
(flush-characters handler)
(with-slots (document element-stack) handler (with-slots (document element-stack) handler
(let* ((nsp sax:*namespace-processing*) (let* ((nsp sax:*namespace-processing*)
(element (make-instance 'element (element (make-instance 'element
@ -126,27 +128,45 @@
(defmethod sax:end-element ((handler dom-builder) namespace-uri local-name qname) (defmethod sax:end-element ((handler dom-builder) namespace-uri local-name qname)
(declare (ignore namespace-uri local-name qname)) (declare (ignore namespace-uri local-name qname))
(flush-characters handler)
(pop (element-stack handler))) (pop (element-stack handler)))
(defmethod sax:characters ((handler dom-builder) data) (defmethod sax:characters ((handler dom-builder) data)
(with-slots (document element-stack) handler (with-slots (text-buffer) handler
(let* ((parent (car element-stack)) (cond
(last-child (dom:last-child parent))) ((null text-buffer)
(cond (setf text-buffer data))
((eq (dom:node-type parent) :cdata-section) (t
(setf (dom:data parent) data)) (unless (array-has-fill-pointer-p text-buffer)
((and last-child (eq (dom:node-type last-child) :text)) (setf text-buffer (make-array (length text-buffer)
;; um entities herum wird SAX:CHARACTERS mehrfach aufgerufen fuer :element-type 'rune
;; den gleichen Textknoten. Hier muessen wir den bestehenden Knoten :adjustable t
;; erweitern, sonst ist das Dokument nicht normalisiert. :fill-pointer t
;; (XXX Oder sollte man besser den Parser entsprechend aendern?) :initial-contents text-buffer)))
(dom:append-data last-child data)) (let ((n (length text-buffer))
(t (m (length data)))
(let ((node (dom:create-text-node document data))) (adjust-vector-exponentially text-buffer (+ n m) t)
(setf (slot-value node 'parent) parent) (move data text-buffer 0 n m))))))
(fast-push node (slot-value (car element-stack) 'children))))))))
(defun flush-characters (handler)
(with-slots (document element-stack text-buffer) handler
(let ((data text-buffer))
(when data
(when (array-has-fill-pointer-p data)
(setf data
(make-array (length data)
:element-type 'rune
:initial-contents data)))
(let ((parent (car element-stack)))
(if (eq (dom:node-type parent) :cdata-section)
(setf (dom:data parent) data)
(let ((node (dom:create-text-node document data)))
(setf (slot-value node 'parent) parent)
(fast-push node (slot-value (car element-stack) 'children)))))
(setf text-buffer nil)))))
(defmethod sax:start-cdata ((handler dom-builder)) (defmethod sax:start-cdata ((handler dom-builder))
(flush-characters handler)
(with-slots (document element-stack) handler (with-slots (document element-stack) handler
(let ((node (dom:create-cdata-section document #"")) (let ((node (dom:create-cdata-section document #""))
(parent (car element-stack))) (parent (car element-stack)))
@ -155,10 +175,12 @@
(push node element-stack)))) (push node element-stack))))
(defmethod sax:end-cdata ((handler dom-builder)) (defmethod sax:end-cdata ((handler dom-builder))
(flush-characters handler)
(let ((node (pop (slot-value handler 'element-stack)))) (let ((node (pop (slot-value handler 'element-stack))))
(assert (eq (dom:node-type node) :cdata-section)))) (assert (eq (dom:node-type node) :cdata-section))))
(defmethod sax:processing-instruction ((handler dom-builder) target data) (defmethod sax:processing-instruction ((handler dom-builder) target data)
(flush-characters handler)
(with-slots (document element-stack) handler (with-slots (document element-stack) handler
(let ((node (dom:create-processing-instruction document target data)) (let ((node (dom:create-processing-instruction document target data))
(parent (car element-stack))) (parent (car element-stack)))
@ -166,6 +188,7 @@
(fast-push node (slot-value (car element-stack) 'children))))) (fast-push node (slot-value (car element-stack) 'children)))))
(defmethod sax:comment ((handler dom-builder) data) (defmethod sax:comment ((handler dom-builder) data)
(flush-characters handler)
(with-slots (document element-stack) handler (with-slots (document element-stack) handler
(let ((node (dom:create-comment document data)) (let ((node (dom:create-comment document data))
(parent (car element-stack))) (parent (car element-stack)))

View File

@ -1247,7 +1247,8 @@
(push instance (element-stack handler)) (push instance (element-stack handler))
#+cxml-system::utf8dom-file #+cxml-system::utf8dom-file
(setf handler (cxml:make-recoder handler #'cxml:rod-to-utf8-string)) (setf handler (cxml:make-recoder handler #'cxml:rod-to-utf8-string))
(funcall resolver (real-rod (dom:name instance)) handler))) (funcall resolver (real-rod (dom:name instance)) handler)
(flush-characters handler)))
(labels ((walk (n) (labels ((walk (n)
(setf (slot-value n 'read-only-p) t) (setf (slot-value n 'read-only-p) t)
(when (dom:element-p n) (when (dom:element-p n)