"""gen_xml.py: Library for generating XML files $Revision: 1.8 $ $Date: 2003/06/02 01:06:32 $ By John W. Shipman (john@nmt.edu); derived from my earlier `gen_sgml.py'. Exports: class Document: A whole XML document Document ( rootName=None, dtdName=None ): [ if rootName is None -> return a new, empty Document object with no associated DTD else if rootName and dtdName are strings -> return a new, empty Document object with root element name (rootName) and associated with DTD name (dtdName) ] .rootName: [ as passed to constructor ] .dtdName: [ as passed to constructor ] .root: [ if self has no content -> None else -> a Tag object containing the root element ] .str(fold): [ if fold is a Foldifier object -> append a representation of self to fold ] class Tag: Base class for all XML tags. Virtual class; child classes must supply their own __str__ methods. Tag ( parent, gi, **attrs ) [ if (parent is None or a Tag object) and (gi is the generic identifier as a nonempty string) and (attrs is a dictionary containing the attributes) -> if (parent is None) -> return a new root Tag object with generic identifier (gi), attributes (attrs), and no children else -> parent := parent with a new Tag object with gi (gi), attributes (attrs), and no children added as its next content return that same new Tag object ] NB: In the attrs argument, use _class to get a class="..." attribute. .parent [ if self is the root -> None else -> the Tag that contains self ] .depth [ if self is the root -> 0 else -> self.parent.depth + 1 ] .gi [ self's generic identifier as a string ] .attrs [ a dictionary containing zero or more (key,value) pairs, where each key is an attribute name, and the corresponding value must be a string or coercible to a string ] .content [ a list containing zero or more children, where each child may be a string or a Tag object ] .str(fold) [ if (fold is a Foldifier object) -> add a representation of self to fold. VIRTUAL METHOD, must be provided by subclasses ] .add ( * L ) [ if (L is a list of strings) -> self := self with the elements of L added as its next children ] .addAttr ( name, value ) [ if (name and value are strings) -> self := self with an attribute added whose name is name and whose value is value ] NB: This routine is needed to add attributes whose names (e.g., "class") are Python reserved words. .configure ( d ): [ if d is a dictionary -> self.attrs +:= name-value pairs from d, replacing any entries with duplicate keys ] .addAttrDict ( d ): synonym for .configure() .openTag ( ) [ return a list of strings L such that the concatenation of all elements of L is the XML source for self's opening tag, but the elements of L are divided at places where line breaks can harmlessly occur ] .closeTag ( ) [ return a list of strings L such that the concatenation of all elements of L is the XML source for self's closing tag, but the elements of L are divided at places where line breaks can harmlessly occur ] .strContent ( ) [ return self's content as a list of strings ] class BlockTag(Tag): For block-type tags; these tag groups must always start on a new line, indented further than the parent class InlineTag(Tag): For inline-type tags; these tag groups may be placed in the middle of a line. class EmptyTag(Tag): For empty tags such as
in HTML Notes: Each tag consists of: - A required `generic identifier' (GI) string. For example, in HTML the GI's are H1, H2, P, and so forth. - Zero or more attributes. Each consists of a required name and an optional value. For example, in this HTML tag: there are two attributes: NOWRAP has no value and ROWSPAN has a value of two. Because XML does not allow attributes without values, this will be output as NOWRAP="NOWRAP". - Optional content. The content consists of a sequence of text strings and embedded tags. If a tag U is contained within a tag T, T is called the PARENT and U is the CHILD tag. The primary motivation for writing this module is to get reasonable indentation and line folding in the output """ REVISION = "$Revision: 1.8 $" DATE = "$Date: 2003/06/02 01:06:32 $" #================================================================ # IMPORTS #================================================================ from types import * # For type checking import string # General string handling #================================================================ # MANIFEST CONSTANTS #================================================================ COLS_INDENT = 2 # Number of columns per indentation level #================================================================ # VERIFICATION FUNCTIONS #================================================================ # attr-format ( name, value ) == # if value is None -> name # else if value is an integer -> # a string of the form 'name=v', where v is str(value) # else -> # a string of the form 'name="value"' #---------------------------------------------------------------- # - - - e s c a p e Q u o t e s - - - def escapeQuotes ( s ): """Returns s, with all occurrences of '"' replaced by """ """ #-- 1 -- # [ L := a list of substrings of s such that s equals # L[0] + '"' + L[1] + ... + '"' + L[-1] L = str(s).split ( '"' ) #-- 2 -- # [ M := L with """ inserted between elements ] M = [L[0]] for i in range(1,len(L)): M.append ( """ ) M.append ( L[i] ) #-- 3 -- return "".join(M) # - - - - - c l a s s D o c u m e n t - - - - - class Document: "Represents a whole XML document." # - - - D o c u m e n t . _ _ i n i t _ _ - - - def __init__ ( self, rootName=None, dtdName=None ): "Constructor for a Document" self.rootName = rootName self.dtdName = dtdName # - - - D o c u m e n t . s t r - - - def str ( self, fold ): "Append a representation of self to a Foldifier object." #-- 1 -- # [ if self.rootName is None -> I # else -> # fold := fold with a DOCTYPE for self appended ] if self.rootName is not None: fold.add ( '' % ( self.rootName, self.dtdName ) ) #-- 2 -- # [ if self.root is None -> I # else -> # fold := fold with a representation of self's content # appended ] if self.root is not None: self.root.str(fold) #-- 3 -- # [ fold := fold, flushed ] fold.flush() # - - - - - c l a s s T a g - - - - - class Tag: "Represents one XML tag and its contained subtree if any." # - - - T a g . _ _ i n i t _ _ - - - def __init__ ( self, parent, gi, **attrs ): "Constructor for the Tag class" #-- 1 -- self.gi = gi self.content = [] self.attrs = attrs #-- 2 -- # [ if attrs has a key "_class" -> # self.attrs := self.attrs with the entry for "_class" # deleted and replaced with one for key "class" ] if self.attrs.has_key ( "_class" ): self.attrs["class"] = self.attrs["_class"] del self.attrs["_class"] #-- 3 -- # [ if parent is false -> # self.parent := None # self.depth := 0 # else -> # self.parent := parent # self.depth := parent.depth + 1 # parent := parent with self added to its content if not parent: self.parent = None self.depth = 0 else: self.parent = parent self.depth = parent.depth + 1 parent.content.append ( self ) # - - - T a g . a d d - - - def add ( self, *L ): "Add strings to tag content." self.content = self.content + map ( None, L ) # - - - T a g . a d d A t t r - - - def addAttr ( self, name, value ): "Add an attribute to self." self.attrs[name] = value # - - - T a g . c o n f i g u r e - - - # - - - T a g . a d d A t t r D i c t - - - def configure ( self, d ): "Add the name/value pairs from d as attributes to self." self.attrs.update ( d ) addAttrDict = configure # - - - T a g . _ _ s t r _ _ - - - def str ( self ): "Virtual method: emits self and content as a string." raise TypeError, "Unimplemented virtual method: Tag.__str__()" # - - - T a g . o p e n T a g - - - def openTag ( self ): "Return self's opening tag as a list of strings." #-- 1 -- # [ L := a list containing the &ETAGO; and generic identifier ] # NOTE: The concatenation is very important, because the output # will be folded on boundaries between strings, and it is *not* # good to separate the &ETAGO; from the gi. L = [ "<" + self.gi ] #-- 2 -- # [ L := L with two elements (" ", s) appended for each member # (n, v) of self.attrs, where s is attr-format(n,v) ] self.appendAttrs ( L ) #-- 3 -- L.append ( ">" ) #-- 4 -- return L # - - - T a g . a p p e n d A t t r s - - - def appendAttrs ( self, L ): """Append the attributes of self to a list. [ if L is a list -> L := L with two elements (" ", s) appended for each member (n, v) of self.attrs, where s is attr-format(n,v) ] """ for (name, value) in self.attrs.items(): #-- 2 body -- # [ L := L with " " and attr-format(name,value) appended ] L.append ( " " ) L.append ( self.__formatAttr ( name, value ) ) # - - - T a g . _ _ f o r m a t A t t r - - - def __formatAttr ( self, name, value ): """Format one attribute (name, value) pair [ if (name is a string) and (value is an integer, string, or None) -> return attr-format(name,value) ] """ #-- 1 -- if value is None: return '%s="%s"' % ( name, name ) #-- 2 -- # [ escaped := value, converted to a string, with all # double-quote values replaced by "'" ] escaped = escapeQuotes ( str(value) ) #-- 3 -- return '%s="%s"' % ( name, escaped ) # - - - T a g . c l o s e T a g - - - def closeTag ( self ): "Returns self's closing tag." return "" % self.gi # - - - T a g . s t r C o n t e n t - - - def strContent ( self, fold ): "Send self's content to a Foldifier." for elt in self.content: if type(elt) is StringType: fold.add ( elt ) else: elt.str ( fold ) # - - - - - c l a s s B l o c k T a g - - - - - class BlockTag(Tag): "Represents a block-type tag, which starts on a new line." # - - - B l o c k T a g . s t r - - - def str ( self, fold ): """Format self to a Foldifier object. """ #-- 1 -- # [ fold := fold flushed ] fold.flush() #-- 2 -- # [ fold := fold with self's opening tag added ] L = tuple ( self.openTag ( ) ) apply ( fold.add, L ) #-- 3 -- # [ fold := fold with indenting increased by COLS_INDENT ] fold.addIndent(COLS_INDENT) #-- 4 -- # [ fold := fold with self's content added ] self.strContent ( fold ) #-- 5 -- # [ fold := fold with indenting decreased by COLS_INDENT ] fold.addIndent(-COLS_INDENT) #-- 6 -- # [ fold := fold with self's closing tag added ] fold.add ( self.closeTag ( ) ) #-- 7 -- # [ fold := fold flushed ] fold.flush() # - - - - - c l a s s I n l i n e T a g - - - - - class InlineTag(Tag): "Represents tags that can appear in mid-line." def str ( self, fold ): """Add a representation of self to fold. """ #-- 1 -- # [ fold := fold with a representation of self's opening tag # sent to it ] L = tuple ( self.openTag ( ) ) apply ( fold.add, L ) #-- 2 -- # [ fold := fold with self's content sent to it ] self.strContent ( fold ) #-- 3 -- # [ fold := fold with self's closing tag sent to it ] fold.add ( self.closeTag ( ) ) # - - - - - c l a s s E m p t y T a g - - - class EmptyTag(Tag): "Represents empty tags such as
in HTML." def str ( self, fold ): """Generate output for an empty tag """ #-- 1 -- # [ L := a list containing the &ETAGO; and generic identifier # fold := fold, flushed ] fold.flush() L = [ "<" + self.gi ] #-- 2 -- # [ L := L with two elements (" ", s) appended for each member # (n, v) of self.attrs, where s is attr-format(n,v) ] self.appendAttrs ( L ) #-- 3 -- L.append ( "/>" ) #-- 4 -- # [ fold := fold with elements of L appended ] apply ( fold.add, L ) fold.flush()