"""gen_xml.py: Library for generating XML files
$Revision: 1.8 $ $Date: 2003/06/02 01:06:32 $
By John W. Shipman (john@nmt.edu); derived from my earlier `gen_sgml.py'.
Exports:
class Document: A whole XML document
Document ( rootName=None, dtdName=None ):
[ if rootName is None ->
return a new, empty Document object with no associated DTD
else if rootName and dtdName are strings ->
return a new, empty Document object with root element
name (rootName) and associated with DTD name (dtdName) ]
.rootName: [ as passed to constructor ]
.dtdName: [ as passed to constructor ]
.root:
[ if self has no content -> None
else -> a Tag object containing the root element ]
.str(fold):
[ if fold is a Foldifier object ->
append a representation of self to fold ]
class Tag: Base class for all XML tags. Virtual class; child
classes must supply their own __str__ methods.
Tag ( parent, gi, **attrs )
[ if (parent is None or a Tag object)
and (gi is the generic identifier as a nonempty string)
and (attrs is a dictionary containing the attributes) ->
if (parent is None) ->
return a new root Tag object with generic identifier (gi),
attributes (attrs), and no children
else ->
parent := parent with a new Tag object with gi (gi),
attributes (attrs), and no children
added as its next content
return that same new Tag object ]
NB: In the attrs argument, use _class to get a class="..."
attribute.
.parent
[ if self is the root -> None
else -> the Tag that contains self ]
.depth
[ if self is the root -> 0
else -> self.parent.depth + 1 ]
.gi [ self's generic identifier as a string ]
.attrs
[ a dictionary containing zero or more (key,value) pairs,
where each key is an attribute name, and the corresponding
value must be a string or coercible to a string ]
.content
[ a list containing zero or more children, where each child
may be a string or a Tag object ]
.str(fold)
[ if (fold is a Foldifier object) ->
add a representation of self to fold.
VIRTUAL METHOD, must be provided by subclasses ]
.add ( * L )
[ if (L is a list of strings) ->
self := self with the elements of L added as its next
children ]
.addAttr ( name, value )
[ if (name and value are strings) ->
self := self with an attribute added whose name is name
and whose value is value ]
NB: This routine is needed to add attributes whose names
(e.g., "class") are Python reserved words.
.configure ( d ):
[ if d is a dictionary ->
self.attrs +:= name-value pairs from d, replacing any
entries with duplicate keys ]
.addAttrDict ( d ): synonym for .configure()
.openTag ( )
[ return a list of strings L such that the concatenation of
all elements of L is the XML source for self's opening tag,
but the elements of L are divided at places where line breaks
can harmlessly occur ]
.closeTag ( )
[ return a list of strings L such that the concatenation of
all elements of L is the XML source for self's closing tag,
but the elements of L are divided at places where line breaks
can harmlessly occur ]
.strContent ( )
[ return self's content as a list of strings ]
class BlockTag(Tag): For block-type tags; these tag groups must
always start on a new line, indented further than the parent
class InlineTag(Tag): For inline-type tags; these tag groups
may be placed in the middle of a line.
class EmptyTag(Tag): For empty tags such as
in HTML
Notes:
Each tag consists of:
- A required `generic identifier' (GI) string. For example, in
HTML the GI's are H1, H2, P, and so forth.
- Zero or more attributes. Each consists of a required name
and an optional value. For example, in this HTML tag:
there are two attributes: NOWRAP has no value and ROWSPAN
has a value of two. Because XML does not allow attributes
without values, this will be output as NOWRAP="NOWRAP".
- Optional content. The content consists of a sequence of
text strings and embedded tags. If a tag U is contained
within a tag T, T is called the PARENT and U is the CHILD tag.
The primary motivation for writing this module is to get reasonable
indentation and line folding in the output
"""
REVISION = "$Revision: 1.8 $"
DATE = "$Date: 2003/06/02 01:06:32 $"
#================================================================
# IMPORTS
#================================================================
from types import * # For type checking
import string # General string handling
#================================================================
# MANIFEST CONSTANTS
#================================================================
COLS_INDENT = 2 # Number of columns per indentation level
#================================================================
# VERIFICATION FUNCTIONS
#================================================================
# attr-format ( name, value ) ==
# if value is None -> name
# else if value is an integer ->
# a string of the form 'name=v', where v is str(value)
# else ->
# a string of the form 'name="value"'
#----------------------------------------------------------------
# - - - e s c a p e Q u o t e s - - -
def escapeQuotes ( s ):
"""Returns s, with all occurrences of '"' replaced by """
"""
#-- 1 --
# [ L := a list of substrings of s such that s equals
# L[0] + '"' + L[1] + ... + '"' + L[-1]
L = str(s).split ( '"' )
#-- 2 --
# [ M := L with """ inserted between elements ]
M = [L[0]]
for i in range(1,len(L)):
M.append ( """ )
M.append ( L[i] )
#-- 3 --
return "".join(M)
# - - - - - c l a s s D o c u m e n t - - - - -
class Document:
"Represents a whole XML document."
# - - - D o c u m e n t . _ _ i n i t _ _ - - -
def __init__ ( self, rootName=None, dtdName=None ):
"Constructor for a Document"
self.rootName = rootName
self.dtdName = dtdName
# - - - D o c u m e n t . s t r - - -
def str ( self, fold ):
"Append a representation of self to a Foldifier object."
#-- 1 --
# [ if self.rootName is None -> I
# else ->
# fold := fold with a DOCTYPE for self appended ]
if self.rootName is not None:
fold.add ( '' %
( self.rootName, self.dtdName ) )
#-- 2 --
# [ if self.root is None -> I
# else ->
# fold := fold with a representation of self's content
# appended ]
if self.root is not None:
self.root.str(fold)
#-- 3 --
# [ fold := fold, flushed ]
fold.flush()
# - - - - - c l a s s T a g - - - - -
class Tag:
"Represents one XML tag and its contained subtree if any."
# - - - T a g . _ _ i n i t _ _ - - -
def __init__ ( self, parent, gi, **attrs ):
"Constructor for the Tag class"
#-- 1 --
self.gi = gi
self.content = []
self.attrs = attrs
#-- 2 --
# [ if attrs has a key "_class" ->
# self.attrs := self.attrs with the entry for "_class"
# deleted and replaced with one for key "class" ]
if self.attrs.has_key ( "_class" ):
self.attrs["class"] = self.attrs["_class"]
del self.attrs["_class"]
#-- 3 --
# [ if parent is false ->
# self.parent := None
# self.depth := 0
# else ->
# self.parent := parent
# self.depth := parent.depth + 1
# parent := parent with self added to its content
if not parent:
self.parent = None
self.depth = 0
else:
self.parent = parent
self.depth = parent.depth + 1
parent.content.append ( self )
# - - - T a g . a d d - - -
def add ( self, *L ):
"Add strings to tag content."
self.content = self.content + map ( None, L )
# - - - T a g . a d d A t t r - - -
def addAttr ( self, name, value ):
"Add an attribute to self."
self.attrs[name] = value
# - - - T a g . c o n f i g u r e - - -
# - - - T a g . a d d A t t r D i c t - - -
def configure ( self, d ):
"Add the name/value pairs from d as attributes to self."
self.attrs.update ( d )
addAttrDict = configure
# - - - T a g . _ _ s t r _ _ - - -
def str ( self ):
"Virtual method: emits self and content as a string."
raise TypeError, "Unimplemented virtual method: Tag.__str__()"
# - - - T a g . o p e n T a g - - -
def openTag ( self ):
"Return self's opening tag as a list of strings."
#-- 1 --
# [ L := a list containing the &ETAGO; and generic identifier ]
# NOTE: The concatenation is very important, because the output
# will be folded on boundaries between strings, and it is *not*
# good to separate the &ETAGO; from the gi.
L = [ "<" + self.gi ]
#-- 2 --
# [ L := L with two elements (" ", s) appended for each member
# (n, v) of self.attrs, where s is attr-format(n,v) ]
self.appendAttrs ( L )
#-- 3 --
L.append ( ">" )
#-- 4 --
return L
# - - - T a g . a p p e n d A t t r s - - -
def appendAttrs ( self, L ):
"""Append the attributes of self to a list.
[ if L is a list ->
L := L with two elements (" ", s) appended for each member
(n, v) of self.attrs, where s is attr-format(n,v) ]
"""
for (name, value) in self.attrs.items():
#-- 2 body --
# [ L := L with " " and attr-format(name,value) appended ]
L.append ( " " )
L.append ( self.__formatAttr ( name, value ) )
# - - - T a g . _ _ f o r m a t A t t r - - -
def __formatAttr ( self, name, value ):
"""Format one attribute (name, value) pair
[ if (name is a string)
and (value is an integer, string, or None) ->
return attr-format(name,value) ]
"""
#-- 1 --
if value is None:
return '%s="%s"' % ( name, name )
#-- 2 --
# [ escaped := value, converted to a string, with all
# double-quote values replaced by "'" ]
escaped = escapeQuotes ( str(value) )
#-- 3 --
return '%s="%s"' % ( name, escaped )
# - - - T a g . c l o s e T a g - - -
def closeTag ( self ):
"Returns self's closing tag."
return "%s>" % self.gi
# - - - T a g . s t r C o n t e n t - - -
def strContent ( self, fold ):
"Send self's content to a Foldifier."
for elt in self.content:
if type(elt) is StringType:
fold.add ( elt )
else:
elt.str ( fold )
# - - - - - c l a s s B l o c k T a g - - - - -
class BlockTag(Tag):
"Represents a block-type tag, which starts on a new line."
# - - - B l o c k T a g . s t r - - -
def str ( self, fold ):
"""Format self to a Foldifier object.
"""
#-- 1 --
# [ fold := fold flushed ]
fold.flush()
#-- 2 --
# [ fold := fold with self's opening tag added ]
L = tuple ( self.openTag ( ) )
apply ( fold.add, L )
#-- 3 --
# [ fold := fold with indenting increased by COLS_INDENT ]
fold.addIndent(COLS_INDENT)
#-- 4 --
# [ fold := fold with self's content added ]
self.strContent ( fold )
#-- 5 --
# [ fold := fold with indenting decreased by COLS_INDENT ]
fold.addIndent(-COLS_INDENT)
#-- 6 --
# [ fold := fold with self's closing tag added ]
fold.add ( self.closeTag ( ) )
#-- 7 --
# [ fold := fold flushed ]
fold.flush()
# - - - - - c l a s s I n l i n e T a g - - - - -
class InlineTag(Tag):
"Represents tags that can appear in mid-line."
def str ( self, fold ):
"""Add a representation of self to fold.
"""
#-- 1 --
# [ fold := fold with a representation of self's opening tag
# sent to it ]
L = tuple ( self.openTag ( ) )
apply ( fold.add, L )
#-- 2 --
# [ fold := fold with self's content sent to it ]
self.strContent ( fold )
#-- 3 --
# [ fold := fold with self's closing tag sent to it ]
fold.add ( self.closeTag ( ) )
# - - - - - c l a s s E m p t y T a g - - -
class EmptyTag(Tag):
"Represents empty tags such as in HTML."
def str ( self, fold ):
"""Generate output for an empty tag
"""
#-- 1 --
# [ L := a list containing the &ETAGO; and generic identifier
# fold := fold, flushed ]
fold.flush()
L = [ "<" + self.gi ]
#-- 2 --
# [ L := L with two elements (" ", s) appended for each member
# (n, v) of self.attrs, where s is attr-format(n,v) ]
self.appendAttrs ( L )
#-- 3 --
L.append ( "/>" )
#-- 4 --
# [ fold := fold with elements of L appended ]
apply ( fold.add, L )
fold.flush()
|