whitespace normalizer

This commit is contained in:
dlichteblau
2005-12-29 00:31:30 +00:00
parent 4df5d1d054
commit b0615afdd9
14 changed files with 135 additions and 11 deletions

View File

@ -77,5 +77,6 @@
#:sax-proxy
#:proxy-chained-handler
#:make-namespace-normalizer
#:make-whitespace-normalizer
#:rod-to-utf8-string
#:utf8-string-to-rod))

View File

@ -118,4 +118,8 @@
(defwrapper sax:entity-resolver
(resolver)
resolver))
resolver)
(defwrapper sax::dtd
(dtd)
dtd))

View File

@ -53,6 +53,8 @@
#:*use-xmlns-namespace*
#:make-attribute
#:find-attribute
#:find-attribute-ns
#:attribute-namespace-uri
#:attribute-local-name
#:attribute-qname
@ -137,6 +139,23 @@ Setting this variable has no effect unless both
value
specified-p)
(defun %rod= (x y)
;; allow rods *and* strings *and* null
(cond
((zerop (length x)) (zerop (length y)))
((zerop (length y)) nil)
((stringp x) (string= x y))
(t (runes:rod= x y))))
(defun find-attribute (qname attrs)
(find qname attrs :key #'attribute-qname :test #'%rod=))
(defun find-attribute-ns (uri lname attrs)
(find-if (lambda (attr)
(and (%rod= uri (sax:attribute-namespace-uri attr))
(%rod= lname (sax:attribute-local-name attr))))
attrs))
(defgeneric start-document (handler)
(:documentation "Called at the beginning of the parsing process,
before any element, processing instruction or comment is reported.
@ -325,7 +344,11 @@ finished, if present.")
(:documentation
"Called between sax:end-dtd and sax:end-document to register an entity
resolver, a function of two arguments: An entity name and SAX handler.
When called, the resolver function will parse the named entities data.")
When called, the resolver function will parse the named entity's data.")
(:method ((handler t) resolver)
(declare (ignore resolver))
nil))
;; internal for now
(defgeneric dtd (handler dtd)
(:method ((handler t) dtd) (declare (ignore dtd)) nil))

View File

@ -37,4 +37,5 @@
(define-proxy-method sax:notation-declaration (name public-id system-id))
(define-proxy-method sax:element-declaration (name model))
(define-proxy-method sax:attribute-declaration (elt attr type default))
(define-proxy-method sax:entity-resolver (resolver)))
(define-proxy-method sax:entity-resolver (resolver))
(define-proxy-method sax::dtd (dtd)))

62
xml/space-normalizer.lisp Normal file
View File

@ -0,0 +1,62 @@
;;;; space-normalizer.lisp -- whitespace removal
;;;;
;;;; This file is part of the CXML parser, released under Lisp-LGPL.
;;;; See file COPYING for details.
;;;;
;;;; Copyright (c) 2005 David Lichteblau
(in-package :cxml)
(defclass whitespace-normalizer (sax-proxy)
((attributes :initform '(t) :accessor xml-space-attributes)
(models :initform nil :accessor xml-space-models)
(dtd :initarg :dtd :accessor xml-space-dtd)))
(defun make-whitespace-normalizer (chained-handler &optional dtd)
(make-instance 'whitespace-normalizer
:dtd dtd
:chained-handler chained-handler))
(defmethod sax::dtd ((handler whitespace-normalizer) dtd)
(unless (xml-space-dtd handler)
(setf (xml-space-dtd handler) dtd)))
(defmethod sax:start-element
((handler whitespace-normalizer) uri lname qname attrs)
(declare (ignore uri lname))
(let ((dtd (xml-space-dtd handler)))
(when dtd
(let ((xml-space
(sax:find-attribute (if (stringp qname) "xml:space" #"xml:space")
attrs)))
(push (print(if xml-space
(rod= (rod (sax:attribute-value xml-space)) #"default")
(car (xml-space-attributes handler))))
(xml-space-attributes handler)))
(let* ((e (cxml::find-element (rod qname) dtd))
(cspec (when e (cxml::elmdef-content e))))
(push (and (consp cspec)
(not (and (eq (car cspec) '*)
(let ((subspec (second cspec)))
(and (eq (car subspec) 'or)
(eq (cadr subspec) :PCDATA))))))
(xml-space-models handler)))))
(call-next-method))
(defmethod sax:characters ((handler whitespace-normalizer) data)
(cond
((and (xml-space-dtd handler)
(car (xml-space-attributes handler))
(car (xml-space-models handler)))
(unless (every #'white-space-rune-p (rod data))
(warn "non-whitespace character data in element content")
(call-next-method)))
(t
(call-next-method))))
(defmethod sax:end-element ((handler whitespace-normalizer) uri lname qname)
(declare (ignore uri lname qname))
(when (xml-space-dtd handler)
(pop (xml-space-attributes handler))
(pop (xml-space-models handler)))
(call-next-method))

View File

@ -2513,7 +2513,8 @@
(let ((dtd (dtd *ctx*)))
(sax:entity-resolver
(handler *ctx*)
(lambda (name handler) (resolve-entity name handler dtd))))
(lambda (name handler) (resolve-entity name handler dtd)))
(sax::dtd (handler *ctx*) dtd))
(list :DOCTYPE name extid))))
(defun report-cached-dtd (dtd)