405 lines
13 KiB
Common Lisp
405 lines
13 KiB
Common Lisp
;;; -*- show-trailing-whitespace: t; indent-tabs: nil -*-
|
|
;;; ---------------------------------------------------------------------------
|
|
;;; Title: An event API for the HTML parser, inspired by SAX
|
|
;;; Created: 2007-10-14
|
|
;;; Author: David Lichteblau
|
|
;;; License: BSD
|
|
;;; ---------------------------------------------------------------------------
|
|
;;; (c) copyright 2005,2007 David Lichteblau
|
|
|
|
;;; Redistribution and use in source and binary forms, with or without
|
|
;;; modification, are permitted provided that the following conditions are
|
|
;;; met:
|
|
;;;
|
|
;;; 1. Redistributions of source code must retain the above copyright
|
|
;;; notice, this list of conditions and the following disclaimer.
|
|
;;;
|
|
;;; 2. Redistributions in binary form must reproduce the above copyright
|
|
;;; notice, this list of conditions and the following disclaimer in the
|
|
;;; documentation and/or other materials provided with the distribution
|
|
;;;
|
|
;;; THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
|
|
;;; WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
|
|
;;; MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
|
;;; IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
|
|
;;; INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
|
;;; (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
|
;;; SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
|
;;; HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
|
|
;;; STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
|
|
;;; IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
;;; POSSIBILITY OF SUCH DAMAGE.
|
|
|
|
(defpackage :hax
|
|
(:use :common-lisp)
|
|
(:export #:abstract-handler
|
|
#:default-handler
|
|
|
|
#:make-attribute
|
|
#:standard-attribute
|
|
#:find-attribute
|
|
#:attribute-name
|
|
#:attribute-value
|
|
#:attribute-specified-p
|
|
|
|
#:start-document
|
|
#:start-element
|
|
#:characters
|
|
#:end-element
|
|
#:end-document
|
|
#:comment
|
|
|
|
#+rune-is-integer
|
|
#:%want-strings-p))
|
|
|
|
(in-package :hax)
|
|
|
|
|
|
;;;; ATTRIBUTE
|
|
|
|
(defgeneric attribute-name (attribute))
|
|
(defgeneric attribute-value (attribute))
|
|
(defgeneric attribute-specified-p (attribute))
|
|
|
|
(defclass standard-attribute ()
|
|
((name :initarg :name :accessor attribute-name)
|
|
(value :initarg :value :accessor attribute-value)
|
|
(specified-p :initarg :specified-p :accessor attribute-specified-p)))
|
|
|
|
(defun make-attribute (name value &optional (specified-p t))
|
|
(make-instance 'standard-attribute
|
|
:name name
|
|
:value value
|
|
:specified-p specified-p))
|
|
|
|
(defun %rod= (x y)
|
|
;; allow rods *and* strings *and* null
|
|
(cond
|
|
((zerop (length x)) (zerop (length y)))
|
|
((zerop (length y)) nil)
|
|
((stringp x) (string= x y))
|
|
(t (runes:rod= x y))))
|
|
|
|
(defun find-attribute (name attrs)
|
|
(find name attrs :key #'attribute-name :test #'%rod=))
|
|
|
|
|
|
;;;; ABSTRACT-HANDLER and DEFAULT-HANDLER
|
|
|
|
(defclass abstract-handler () ())
|
|
(defclass default-handler (abstract-handler) ())
|
|
|
|
#+rune-is-integer
|
|
(defgeneric %want-strings-p (handler)
|
|
(:method ((handler null)) nil)
|
|
(:method ((handler abstract-handler)) t))
|
|
|
|
(defgeneric start-document (handler name public-id system-id)
|
|
(:method ((handler null) name public-id system-id)
|
|
(declare (ignore name public-id system-id))
|
|
nil)
|
|
(:method ((handler default-handler) name public-id system-id)
|
|
(declare (ignore name public-id system-id))
|
|
nil))
|
|
|
|
(defgeneric start-element (handler name attributes)
|
|
(:method ((handler null) name attributes)
|
|
(declare (ignore name attributes))
|
|
nil)
|
|
(:method ((handler default-handler) name attributes)
|
|
(declare (ignore name attributes))
|
|
nil))
|
|
|
|
(defgeneric characters (handler data)
|
|
(:method ((handler null) data)
|
|
(declare (ignore data))
|
|
nil)
|
|
(:method ((handler default-handler) data)
|
|
(declare (ignore data))
|
|
nil))
|
|
|
|
(defgeneric end-element (handler name)
|
|
(:method ((handler null) name)
|
|
(declare (ignore name))
|
|
nil)
|
|
(:method ((handler default-handler) name)
|
|
(declare (ignore name))
|
|
nil))
|
|
|
|
(defgeneric end-document (handler)
|
|
(:method ((handler null)) nil)
|
|
(:method ((handler default-handler)) nil))
|
|
|
|
(defgeneric comment (handler data)
|
|
(:method ((handler null) data)
|
|
(declare (ignore data))
|
|
nil)
|
|
(:method ((handler default-handler) data)
|
|
(declare (ignore data))
|
|
nil))
|
|
|
|
|
|
;;;; documentation
|
|
|
|
(setf (documentation (find-package :hax) t)
|
|
"An event protocol for HTML serialization, this package is similar
|
|
to the SAX protocol defined by cxml for XML serialization.
|
|
|
|
(Technically, this package should have been spelled SAH, but HAX
|
|
sounds better.)
|
|
|
|
Note that Closure HTML is not a streaming parser yet. Documents
|
|
are always parsed in full before the first HAX event is emitted.
|
|
In spite of this restriction, the HAX API is useful for HTML
|
|
serialization and transformation purposes, and for integration
|
|
with SAX.
|
|
|
|
@begin[HAX handlers]{section}
|
|
@aboutclass{abstract-handler}
|
|
@aboutclass{default-handler}
|
|
@end{section}
|
|
@begin[The attribute protocol]{section}
|
|
@aboutclass{standard-attribute}
|
|
@aboutfun{make-attribute}
|
|
@aboutfun{attribute-name}
|
|
@aboutfun{attribute-value}
|
|
@aboutfun{attribute-specified-p}
|
|
@end{section}
|
|
@begin[HAX events]{section}
|
|
@aboutfun{start-document}
|
|
@aboutfun{start-element}
|
|
@aboutfun{end-element}
|
|
@aboutfun{characters}
|
|
@aboutfun{comment}
|
|
@aboutfun{end-document}
|
|
@end{section}")
|
|
|
|
(setf (documentation 'abstract-handler 'type)
|
|
"@short{The superclass of all HAX handlers.}
|
|
|
|
Direct subclasses have to implement all event methods, since
|
|
no default methods are defined on this class.
|
|
|
|
Note that it is permissible to use handlers that are not
|
|
instances of this class in some circumstances.
|
|
|
|
In particular,
|
|
@code{nil} is a valid HAX handler and ignores all events.
|
|
|
|
In addition,
|
|
@a[http://common-lisp.net/project/cxml/sax.html#sax]{SAX handlers}
|
|
are valid HAX handlers (and vice versa), even though
|
|
hax:abstract-handler and sax:abstract-handler do not
|
|
share a specific superclass. HAX events sent to SAX handlers are
|
|
automatically re-signalled as XHTML SAX events, and SAX events sent
|
|
to HAX handlers are re-signalled as namespace-less HAX events.
|
|
|
|
However, user code should define subclasses of the documented
|
|
superclasses to enable the HAX/SAX bridging described above.
|
|
|
|
@see{chtml:parse}
|
|
@see{chtml:serialize-lhtml}
|
|
@see{chtml:serialize-pt}
|
|
@see{start-document}
|
|
@see{end-document}
|
|
@see{start-element}
|
|
@see{end-element}
|
|
@see{characters}
|
|
@see{comment}")
|
|
|
|
(setf (documentation 'default-handler 'type)
|
|
"@short{A no-op HAX handler.}
|
|
|
|
This class defines methods for all HAX events that do nothing.
|
|
It is useful as a superclass when implementing a HAX handler that
|
|
is interested in only some events and not others.
|
|
|
|
@see{chtml:parse}
|
|
@see{chtml:serialize-lhtml}
|
|
@see{chtml:serialize-pt}
|
|
@see{start-document}
|
|
@see{end-document}
|
|
@see{start-element}
|
|
@see{end-element}
|
|
@see{characters}
|
|
@see{comment}")
|
|
|
|
(setf (documentation 'standard-attribute 'type)
|
|
"@short{An implementation of the HAX attribute protocol.}
|
|
|
|
A standard class implementing the generic functions for HAX
|
|
attributes. Instances of this class can be passed to
|
|
@fun{hax:start-element} in the list of attributes.
|
|
|
|
@see-slot{attribute-name}
|
|
@see-slot{attribute-value}
|
|
@see-slot{attribute-specified-p}
|
|
@see-constructor{make-instance}")
|
|
|
|
(setf (documentation 'make-attribute 'function)
|
|
"@arg[name]{a string/rod}
|
|
@arg[value]{a string/rod}
|
|
@arg[specified-p]{a boolean, default is @code{t}}
|
|
@return{an instance of @class{standard-attribute}.}
|
|
@short{Creates a HAX attribute.}
|
|
|
|
Creates an instance that can be used with the generic functions
|
|
for HAX attributes. The result can be passed to
|
|
@fun{hax:start-element} in the list of attributes.
|
|
|
|
@see{attribute-name}
|
|
@see{attribute-value}
|
|
@see{attribute-specified-p}")
|
|
|
|
(setf (documentation 'find-attribute 'function)
|
|
"@arg[name]{a string/rod}
|
|
@arg[attrs]{a list of attributes}
|
|
@return{an attribute, or nil}
|
|
@short{Searches for an attribute by name.}
|
|
|
|
Returns the first attribute in @var{attrs} with the specified name,
|
|
or @code{nil} if no such attribute was found.
|
|
|
|
@see{attribute-name}")
|
|
|
|
(setf (documentation 'attribute-name 'function)
|
|
"@arg[instance]{any class implementing this function}
|
|
@return{a string/rod}
|
|
@short{Return an attribute's name.}
|
|
|
|
Instances of this classes implementing this function can be passed to
|
|
@fun{hax:start-element} in the list of attributes.
|
|
|
|
@see{attribute-value}
|
|
@see{attribute-specified-p}")
|
|
|
|
(setf (documentation 'attribute-value 'function)
|
|
"@arg[instance]{any class implementing this function}
|
|
@return{a string/rod}
|
|
@short{Return an attribute's value.}
|
|
|
|
Instances of this classes implementing this function can be passed to
|
|
@fun{hax:start-element} in the list of attributes.
|
|
|
|
@see{attribute-name}
|
|
@see{attribute-specified-p}")
|
|
|
|
(setf (documentation 'attribute-specified-p 'function)
|
|
"@arg[instance]{any class implementing this function}
|
|
@return{a string/rod}
|
|
@short{Return whether the attribute was contained the parsed document.}
|
|
|
|
Attributes return @code{nil} here if they resulted from a default
|
|
value declaration in a DTD.
|
|
|
|
Instances of this classes implementing this function can be passed to
|
|
@fun{hax:start-element} in the list of attributes.
|
|
|
|
@see{attribute-name}
|
|
@see{attribute-value}")
|
|
|
|
(setf (documentation 'start-document 'function)
|
|
"@arg[handler]{a HAX/SAX handler
|
|
(see @class{abstract-handler} for details)}
|
|
@arg[name]{root element name, a rod/string}
|
|
@arg[public-id]{nil or the Public ID, a rod/string}
|
|
@arg[system-id]{nil or the System ID/URI, a rod/string}
|
|
@return{unspecified}
|
|
@short{Signals the beginning of an HTML document.}
|
|
|
|
This is the first event sent to any handler.
|
|
|
|
If @var{system-id} is non-nil, the document includes a doctype
|
|
declaration.
|
|
|
|
@see{start-element}
|
|
@see{end-element}
|
|
@see{characters}
|
|
@see{comment}
|
|
@see{end-document}")
|
|
|
|
(setf (documentation 'start-element 'function)
|
|
"@arg[handler]{a HAX/SAX handler
|
|
(see @class{abstract-handler} for details)}
|
|
@arg[name]{root element name, a rod/string}
|
|
@arg[attributes]{a list of attributes}
|
|
@return{unspecified}
|
|
@short{Signals the beginning of an HTML element.}
|
|
|
|
This event corresponds to the opening tag of an element.
|
|
|
|
Elements of the attribute list can have any class, but must implement
|
|
the generic functions for attributes. See @class{standard-attribute}
|
|
for the built-in attribute implementation.
|
|
|
|
@see{find-attribute}
|
|
@see{start-document}
|
|
@see{end-element}
|
|
@see{characters}
|
|
@see{comment}
|
|
@see{end-document}")
|
|
|
|
(setf (documentation 'end-element 'function)
|
|
"@arg[handler]{a HAX/SAX handler
|
|
(see @class{abstract-handler} for details)}
|
|
@arg[name]{root element name, a rod/string}
|
|
@return{unspecified}
|
|
@short{Signals the end of an HTML element.}
|
|
|
|
This event corresponds to the closing tag of an element.
|
|
|
|
@see{start-document}
|
|
@see{start-element}
|
|
@see{characters}
|
|
@see{comment}
|
|
@see{end-document}")
|
|
|
|
(setf (documentation 'characters 'function)
|
|
"@arg[handler]{a HAX/SAX handler
|
|
(see @class{abstract-handler} for details)}
|
|
@arg[data]{rod/string}
|
|
@return{unspecified}
|
|
@short{Signals character data.}
|
|
|
|
This event represents character data in a document.
|
|
|
|
@see{start-document}
|
|
@see{start-element}
|
|
@see{end-element}
|
|
@see{comment}
|
|
@see{end-document}")
|
|
|
|
(setf (documentation 'comment 'function)
|
|
"@arg[handler]{a HAX/SAX handler
|
|
(see @class{abstract-handler} for details)}
|
|
@arg[data]{rod/string}
|
|
@return{unspecified}
|
|
@short{Signals a comment.}
|
|
|
|
This event represents a comment.
|
|
|
|
@see{start-document}
|
|
@see{start-element}
|
|
@see{end-element}
|
|
@see{characters}
|
|
@see{end-document}")
|
|
|
|
(setf (documentation 'end-document 'function)
|
|
"@arg[handler]{a HAX/SAX handler
|
|
(see @class{abstract-handler} for details)}
|
|
@return{The return value of this function depends on the handler class.}
|
|
@short{Signals the end of an HTML document.}
|
|
|
|
This is the last event sent to any handler, and signals the end of
|
|
serialization.
|
|
|
|
The return value of this function is usually returned to user code
|
|
by higher-level serialization functions and can be considered the
|
|
result of serialization and \"return value\" of the handler.
|
|
|
|
@see{start-document}
|
|
@see{start-element}
|
|
@see{end-element}
|
|
@see{characters}
|
|
@see{comment}")
|