"""sgmltag.py: SGML tag processing module $Revision: 1.9 $ $Date: 2002/08/01 07:15:12 $ Exports: giStartCset: a Cset object representing the characters that can be the first character of a Generic Identifier (GI) giCset: a Cset object representing the characters that can be the non-first characters of a Generic Identifier (GI) class SGMLTag: Represents one opening or closing tag def SGML_Tag_Scan(): Scans one tag from a Scan object class TagAttr: Represents one attribute of a tag def Tag_Attr_Scan(): Scans one attribute from a Scan object """ import string # Standard string module from cset import * # Author's Icon cset type giStartCset = letters giCset = Cset("_-").union(digits).union(letters) SGML_TAG_OPEN = "<" SGML_COMMENT_GI = "!" # Appears in the GI position of a comment SGML_COMMENT_HEAD = "!--" # Starts a comment SGML_COMMENT_TAIL = "-->" # Ends a comment class SGMLTag: """Represents one opening or closing SGML tag. Exports: SGMLTag ( gi, isClose, attrList, text=None ): [ if (gi is the Generic Identifier as a nonempty string) and (isClose is 0 an opening tag, 1 for a closing tag) and (attrList is a list of TagAttr objects representing zero or more attributes, if not a comment, else None) and (text is a string contain the comment text, if a comment, else None) -> return a new SGMLTag with those values ] .gi: [ as passed to constructor ] .isClose: [ as passed to constructor ] .attrList: [ if the attrList argument passed to the constructor was None -> an empty list else -> as passed to constructor ] .text: [ as passed to constructor ] str(): [ returns self reconstituted as a text string ] """ # - - - S G M L T a g . _ _ i n i t _ _ - - - def __init__ ( self, gi, isClose, attrList, text=None ): self.gi = gi self.isClose = isClose self.text = text if attrList: self.attrList = attrList else: self.attrList = [] # - - - S G M L T a g . _ _ s t r _ _ - - - def __str__ ( self ): """Reconstitute self as a string """ #-- 1 -- if self.gi == SGML_COMMENT_GI: return "<%s%s%s" % ( SGML_COMMENT_HEAD, self.text, SGML_COMMENT_TAIL ) #-- 2 -- L = ["<"] #-- 3 -- if self.isClose: L.append ( "/" ) #-- 4 -- L.append ( self.gi ) #-- 5 -- for attr in self.attrList: L.append ( " " ) L.append ( str ( attr ) ) #-- 6 -- L.append ( ">" ) #-- 7 -- return string.join ( L, "" ) # - - - S G M L _ T a g _ S c a n - - - def SGML_Tag_Scan ( scan ): """Parses an SGML tag from a Scan object. [ if scan is a Scan object -> if scan starts with a syntactically valid SGML tag -> scan := scan advanced past that tag return a new SGMLTag object representing the tag else if scan appears to start with a tag but it is syntactically invalid -> scan := scan advanced past the valid-looking part, but at least one character Log() +:= error message(s) return None else -> return None ] """ #-- 1 -- # [ if scan starts with SGML_TAG_OPEN -> # scan := scan advanced one # else -> # return None ] if not scan.tabMatch ( SGML_TAG_OPEN ): return None #-- 2 -- # [ if scan starts with SGML_COMMENT_HEAD followed by arbitrary # text and then, possibly on a different line, SGML_COMMENT_TAIL -> # scan := scan advanced past the first occurrence of # SGML_COMMENT_TAIL # return a new SGMLTag object representing a comment whose text # is all text between scan's current position and the first # occurrence of SGML_COMMENT_TAIL # else if scan starts with SGML_COMMENT_HEAD but there is no # SGML_COMMENT_TAIL anywhere in scan -> # scan := scan advanced to EOF # Log() +:= error message(s) # return None # else -> I ] if scan.tabMatch ( SGML_COMMENT_HEAD ): return SGML_Tag_Scan_Comment ( scan ) #-- 3 -- # [ if the next character in scan is "/" -> # scan := scan advanced one # isClose := 1 # else -> # isClose := 0 ] if scan.tabMatch ( "/" ): isClose = 1 else: isClose = 0 #-- 4 -- # [ if scan starts with a valid GI -> # scan := scan advanced past that GI # gi := that GI as a string # else -> # Log() +:= error message(s) # return None ] gi = SGML_Tag_Scan_GI ( scan ) if gi is None: return None #-- 5 -- # [ scan := scan advanced past all leading whitespace, including # any number of newlines # attrList := an empty list ] scan.deblank() attrList = [] #-- 6 -- # [ if scan contains one or more attributes separated by whitespace -> # scan := scan advanced past all attributes and whitespace # attrList := attrList with TagAttr objects appended representing # those attributes # Log() +:= error messages about badly formed attributes, if any # else -> I ] SGML_Tag_Scan_Attr_List ( scan, attrList ) #-- 7 -- # [ if scan starts with ">" -> # scan := scan advanced by one # return a new SGMLTag object with .name=name, .isClose=isClose, # .attrList=attrList, and .text=None # else -> # Log() +:= error message # return None ] if not scan.tabMatch ( ">" ): scan.error ( "Expecting the `>' at the end of an SGML tag." ) return None else: return SGMLTag ( gi, isClose, attrList ) # - - - S G M L _ T a g _ S c a n _ C o m m e n t - - - def SGML_Tag_Scan_Comment ( scan ): """Scan an SGML comment, after the opening `