# scan.py: Stream scanner object for Python #-- # $Revision: 1.21 $ # $Date: 2002/06/22 04:24:32 $ #-- import types, string # Standard modules from cset import * # Character set type, like Icon csets from log import * # Author's standard error logging object class Scan: """ Stream scanner object. Written by John W. Shipman (john@nmt.edu), New Mexico Tech Computer Center, Socorro, NM 87801. This class is based on an original in the Icon language that I've used heavily. Its methods include a number that are built-ins in Icon but had to be added here. Uses the Log object for all error logging (which see). Exported methods: Scan ( fileName, commentPrefix=None, callback=None ) .close() # Close this object .atEndLine() # True iff self is at end of line .error ( text,... ) # Log a scan error message .warning ( text,... ) # Log a scan warning .msgKind ( kind, text, ... ) # Log a generic message .msgCount ( kind=None ) # Return count of messages .nextLine() # Read next line, if there is one .message ( text, ... ) # Log a message with our prefix .write ( text, ...) # Log a message without our prefix .move ( n ) # Advance position in current line .tab ( n ) # Move to position in current line .isPos ( n ) # Is current position n? .find ( s ) # Find string s on current line .upto ( c ) # Find first character not in a Cset .deblank ( ) # Skip whitespace on many lines .deblankLine ( ) # Skip whitespace on current line .any ( c ) # Is next char in a given Cset? .tabAny ( c ) # Advance if next char is in a Cset .many ( c ) # Find leading characters in a Cset .tabMany ( c ) # Advanced past leading chars in a Cset .match ( s ) # Does the line start with s? .matchArb ( s ) # Like match(), but case-insensitive .tabMatch ( s ) # Advance if line starts with s .tabMatchArb ( s ) # Like tabMatch(), but case-insensitive .reMatch ( r ) # Match regular expression .tabReMatch ( r ) # Like tabMatch(), but uses regular expr. .moveMatch ( m ) # Advance using a MatchObject (from `re') .integer ( maxLen=None ) # Parser for an integer .fixed ( ) # Parser for a fixed-point number .flatCset ( n, c ) # Parse fixed-sized field matching Cset .flatInt ( n ) # Parse fixed-sized integer field Exported elements: .atEndFile # True iff self is at end of file .lineNo # Current line number of self, counting from 1 .pos # Position in current line, must be in the range # [0,len(line)]. READ-ONLY!! .line # Same as self.rawLine, except comments (if any) # are removed. READ-ONLY!! State and invariants: .fileName If a string was passed to the constructor's fileName argument, that string is stored here, otherwise it is None. .file File handle for reading the input file. INV: Must be readable. .commentPrefix As passed to the constructor; string or None. .callback As passed to the constructor; a function or None. .lineNo Line number of self.line, counting from 1. .rawLine The current line from the input, as a string; may be empty; never has a trailing newline; valid only if .atEndFile is false, else "". .echoed 1 if a scan error has been logged against the current line of self, else 0. .atEndFile 1 if the input has been exhausted, else 0. """ ERROR = "Error" # Default error kind WHITE_CSET = Cset ( " \t" ) # Cset for whitespace DIGIT_CSET = Cset ( string.digits ) # Cset for digits MESSAGE_PREFIX = "*** " # Normal message prefix # - - - S c a n . _ _ i n i t _ _ - - - def __init__ ( self, fileName, commentPrefix=None, callback=None ): """ [ if (fileName is a string or file) and (commentPrefix is a string or None) and (callBack is a function or None) -> if (fileName names a readable file) -> return a new Scan object with (commentChar) as the comment character (if given) and using (callback) as the callback procedure (if given) and positioned at the beginning of the first line (if there is one) if (fileName is a readable file handle) -> return a new Scan object with (commentChar) as the comment character (if given) and using (callback) as the callback procedure (if given), reading from fileName at its current position else -> raise IOError ] """ #-- 1 -- #-[ if fileName is a file -> # self.fileName := None # self.file := fileName # else if fileName is a string naming a readable file -> # self.fileName := fileName # self.file := fileName opened for reading # else if fileName is a string not naming a readable file -> # raise IOError # else -> # raise TypeError #-] if type(fileName) == types.FileType: self.fileName = None self.file = fileName elif type(fileName) == types.StringType: self.fileName = fileName self.file = open ( fileName ) # May raise IOError else: raise TypeError, "First argument must be a file handle" \ " or file name" #-- 2 -- self.commentPrefix = commentPrefix self.callback = callback self.lineNo = 0 self.rawLine = "" self.line = "" self.pos = 0 self.echoed = 0 self.atEndFile = 0 #-- 3 -- #-[ if self is at end of file or on the last line -> # self.atEndFile := 1 # else -> # self.file := self.file advanced after the next line # self := self advanced to the beginning of the next line #-] self.nextLine() # Attempt to read first line, possibly EOF # - - - S c a n . c l o s e - - - def close ( self ): """ [ self := self, closed; all further operations on self are undefined ] """ if self.file: self.file.close() self.file = None # - - - S c a n . a t E n d L i n e - - - def atEndLine ( self ): """ [ if self is at end of line -> return 1 else -> return 0 ] """ if self.pos == len(self.line): return 1 else: return 0 # - - - S c a n . e r r o r - - - def error ( self, *L ): """ [ Log() +:= an error message showing the current line and position of self, with all (text) arguments used as the message ] """ text = string.join ( L, "" ) self.msgKind ( self.ERROR, text ) # - - - S c a n . w a r n i n g - - - def warning ( self, *L ): """ [ Log() +:= an error message showing the current line and position of self, with all (text) arguments used as the message ] """ text = string.join ( L, "" ) self.msgKind ( WARNING_KIND, text ) # - - - S c a n . m s g K i n d - - - def msgKind ( self, kind, *L ): """ [ Log() +:= a message of kind (kind) showing the current line and position of self, with all (text) arguments used as the message ] """ #-- 1 -- #-[ if self.echoed -> I # else if self.fileName -> # self.echoed := 1 # Log() +:= message displaying self.fileName (if # there is one) and self.lineNo and # the result of self.callback(self) (if # there is a self.callback) and self.line #-] if not self.echoed: self.echoed = 1 if self.fileName: where = ( "File `%s', line %d" % ( self.fileName, self.lineNo ) ) else: where = ( "Line %d" % self.lineNo ) if self.callback: where = "%s [%s]" % ( where, self.callback(self) ) Log().write ( "\n--- ", where, "\n", self.rawLine ) #-- 2 -- #-[ Log() +:= (a line pointing to self.pos) + (strings from L) #-] text = string.join ( L, "" ) Log().write ( " "*self.pos, "^" ) Log().msgKind ( kind, text ) # - - - S c a n . m s g C o u n t - - - def msgCount ( self, kind=None ): """Returns the count of errors or other kinds of messages. [ if kind is None -> return number of error messages in Log() else if kind is a string -> return number of messages of kind (kind) in Log() ] """ return Log().count ( kind=kind ) # - - - S c a n . n e x t L i n e - - - def nextLine ( self ): """ [ if self is at end of file or on the last line -> return 0 else -> self := self moved to the beginning of the next line return 1 ] """ #-- 1 -- if self.atEndFile: return 0 #-- 2 -- #-[ if self.file has at least one line remaining -> # self.file := self.file advanced to the next line, if any # self.rawLine := that next line as a string # else -> # self.rawLine := "" #-] self.rawLine = self.file.readline() #-- 3 -- if len(self.rawLine) == 0: self.atEndFile = 1 self.line = self.rawLine = "" return 0 #-- 4 -- #-[ if self.rawLine ends with a newline -> # self.rawLine := self.rawLine with that trailing newline # removed # else -> I #-] if self.rawLine[-1] == "\n": # Strip trailing newline if any self.rawLine = self.rawLine[:-1] #-- 5 -- self.line = self.rawLine self.echoed = 0 self.lineNo = self.lineNo + 1 self.pos = 0 #-- 6 -- #-[ if (self.commentPrefix is nonempty) # and (self.rawLine contains self.commentPrefix) -> # self.line := self.rawLine with all characters removed up # to the first occurrence of self.commentPrefix # else -> I #-] if self.commentPrefix: # Check for comment commentPos = string.find(self.rawLine, self.commentPrefix) if commentPos >= 0: self.line = self.rawLine[0:commentPos] #-- 7 -- return 1 # Indicate success to caller # - - - S c a n . m e s s a g e - - - def message ( self, *L ): """ [ Log() +:= a line consisting of self.MESSAGE_PREFIX with all (text) arguments concatenated to it ] """ text = self.MESSAGE_PREFIX + string.join ( L, "" ) self.write ( text ) # - - - S c a n . w r i t e - - - def write ( self, *L ): """ [ Log() +:= a line consisting of all (text) arguments, concatenated ] """ text = string.join ( L, "" ) Log().write(text) # - - - S c a n . m o v e - - - def move ( self, n ): """ [ if n is a nonnegative integer -> if (current position)+(n) is somewhere on the current line -> self := self advanced by n characters return the string between the current position and (current position)+(n), possibly empty else -> raise IndexError ] """ #-- 1 -- newPos = self.pos + n #-- 2 -- #-[ if newPos is within the current line -> I # else -> # raise IndexError #-] if not ( 0 <= newPos <= len(self.line)): raise IndexError #-- 3 -- #-[ loPos := min(self.pos, newPos) # hiPos := max(self.pos, newPos) #-] if newPos < self.pos: loPos = newPos hiPos = self.pos else: loPos = self.pos hiPos = newPos #-- 4 -- #-[ self.pos = newPos # return self.line between loPos and hiPos #-] self.pos = newPos result = self.line[loPos:hiPos] return result # - - - S c a n . t a b - - - def tab ( self, newPos ): """ [ if newPos is an integer -> if (newPos < 0) and (position (len(self.line)-newPos-1) is somewhere on the current line) -> self.pos := len(self.line)-newPos-1 return the string between the current position and position (len(self.line)-newPos-1) (possibly empty) else if position newPos is somewhere on the current line -> self.pos := newPos return the string between the current position and position newPos (possibly an empty string) else -> raise IndexError ] """ #-- 1 -- #-[ effPos = newPos converted to the equivalent positive position #-] effPos = self.__effPos ( newPos ) #-- 2 -- #-[ if effPos is within the current line -> I # else -> raise IndexError #-] if not ( 0 <= effPos <= len(self.line) ): raise IndexError #-- 3 -- #-[ loPos := min(self.pos, effPos) # hiPos := max(self.pos, effPos) #-] if effPos < self.pos: loPos = effPos hiPos = self.pos else: loPos = self.pos hiPos = effPos #-- 4 -- #-[ self.pos = effPos # return self.line between loPos and hiPos #-] self.pos = effPos result = self.line[loPos:hiPos] return result # - - - S c a n . _ _ e f f P o s - - - def __effPos ( self, p ): """Converts a negative position to the equivalent positive position. """ if p < 0: return len(self.line) + 1 + p else: return p # - - - S c a n . i s P o s - - - def isPos ( self, p ): """ Checks to see if the current line is at position p. Supports end-relative indexing: use -1 to check if the string is at end of line, -2 to see if it is one before end of line, and so on. Returns the (nonnegative) position if true, else returns None. [ if (p<0) and (self.pos==(len(self.line)-p-1) -> return len(self.line)-p-1 else if (newPos >=0) and (self.pos=p) -> return p else -> return None ] """ #-- 1 -- effPos = self.__effPos ( p ) #-- 2 -- if self.pos == p: return effPos else: return None # - - - S c a n . f i n d - - - def find ( self, s ): """ [ if string s occurs on the remainder of the current line -> return the position of the first occurrence's first character else -> return None ] """ p = string.find ( self.line, s, self.pos ) if p < 0: return None else: return p # - - - S c a n . u p t o - - - def upto ( self, c ): """ [ if c is a Cset -> if there is a character found in c on the current line at or after the current position -> return the position of the first such character (counting from 0) else -> return None ] """ #-- 1 -- #-[ if any character remaining in self's line is in cset c -> # return the index of the first such character # else -> I #-] for i in range(self.pos,len(self.line)): #-- 1.1 -- #-[ if self.line[i] is in c -> # return i # else -> I #-] if c.has(self.line[i]): return i #-- 2 -- return None # - - - S c a n . d e b l a n k - - - def deblank ( self ): """ [ self := self advanced past all spaces or tabs, skipping lines if necessary until reaching a character that is not space or tab, or end of file ] """ #-- 1 -- while not self.atEndFile: #-- 1 body -- #-[ if there is a nonblank character remaining on the # current line -> # self := self advanced to the first remaining # nonblank character # return # else if we are on the last line of the file -> # self := self advanced to end of file # else -> # self := self advanced to the beginning of the # next line from file #-] #-- 1.1 -- #-[ if the current line of self starts with one or more # whitespace characters -> # self := self advanced past all such characters # else -> I #-] self.tabMany ( self.WHITE_CSET ) #-- 1.2 -- #-[ if the current line is not exhausted -> # return # else if any lines remain in self's file -> # advance to the next line # else -> I #-] if self.atEndLine(): self.nextLine() else: return #-- 2 -- return # - - - S c a n . d e b l a n k L i n e - - - def deblankLine ( self ): """ [ self := self advanced past all spaces or tabs, until reaching a character that is not space or tab, or end of line, whichever comes first ] """ self.tabMany ( self.WHITE_CSET ) # - - - S c a n . a n y - - - def any ( self, c ): """ [ if c is a Cset -> if the current line of self starts with a character found in c -> return the position past that character else -> return None ] """ #-- 1 -- #-[ if self is at end of line -> # return None # else -> I #-] if self.atEndLine(): return None #-- 2 -- #-[ if the next character is in c -> # return self.pos+1 # else -> # return None #-] if c.has ( self.line[self.pos] ): return self.pos+1 else: return None # - - - S c a n . t a b A n y - - - def tabAny ( self, c ): """ [ if c is a cset -> if the current line of self starts with a character found in c -> self := self advanced by one character return the (old!) first character of self else -> return None ] """ #-- 1 -- #-[ if self starts with a character not in cset c) -> # i := None # else -> # i := position after the next character #-] i = self.any(c) #-- 2 -- #-[ if i is None -> # return None # else -> I #-] if i is None: return None #-- 3 -- #-[ self := self advanced to position i # return the characters between self.pos and position i #-] return self.tab(i) # - - - S c a n . m a n y - - - def many ( self, c ): """ [ if c is a Cset -> if the current line of self starts with one or more characters found in c -> return the position of the first character that is not in c, or end of line, whichever comes first else -> return None ] """ #-- 1 -- #-[ if (self is at end of line) # or (the next character in self is not in c) -> # return None # else -> # i := self.pos #-] if self.any(c) is None: return None else: i = self.pos #-- 2 -- #-[ i := i advanced past all characters in c #-] while ( ( i < len(self.line) ) and ( c.has ( self.line[i] ) ) ): i = i + 1 #-- 3 -- return i # - - - S c a n . t a b M a n y - - - def tabMany ( self, c ): """ [ if c is a cset -> if the current line of self starts with one or more characters found in c -> self := self advanced past all such characters return the matching characters from self else -> return None ] """ #-- 1 -- #-[ if the current line of self starts with one or more # characters found in c -> # i := the position of the first character that is not # in c, or end of line, whichever comes first # else -> # i := None #-] i = self.many ( c ) #-- 2 -- #-[ if i is None -> # return None # else -> # self := self advanced to position i # return the string between self.pos and position i #-] if i is None: return None else: return self.tab(i) # - - - S c a n . m a t c h - - - def match ( self, s ): """ [ if s is a string -> if the current line of self starts with s -> return the position after the matching part else -> return None ] """ #-- 1 -- #-[ if (remaining line in self is shorter than s) -> # return None # else -> I #-] if self.pos + len(s) > len(self.line): return None #-- 2 -- if s == self.line[self.pos:self.pos+len(s)]: return self.pos + len(s) else: return None # - - - S c a n . m a t c h A r b - - - def matchArb ( self, s ): """ [ if s is a string -> if the current line of self starts with s, ignoring case -> return the position just past the matching part else -> return None ] """ #-- 1 -- #-[ if (remaining line in self is shorter than s) -> # return None # else -> I #-] if self.pos + len(s) > len(self.line): return None #-- 2 -- #-[ supper := s, uppercased # tupper := next len(s) characters from self, uppercased #-] supper = string.upper(s) tupper = string.upper(self.line[self.pos:self.pos+len(s)]) #-- 3 -- if supper == tupper: return self.pos+len(s) else: return None # - - - S c a n . t a b M a t c h - - - def tabMatch ( self, s ): """ [ if s is a string -> if the current line of self starts with s -> self := self advanced past that return s else -> return None ] """ #-- 1 -- #-[ if line in self starts with s -> # i := position after matching part # else -> # i := None #-] i = self.match(s) #-- 2 -- #-[ if i is None -> # return None # else -> # self.pos := self.pos+len(s) # return the next len(s) characters from self #-] if i is None: return None else: return self.tab(i) # - - - S c a n . t a b M a t c h A r b - - - def tabMatchArb ( self, s ): """ [ if s is a string -> if the current line of self starts with s, ignoring case -> self := self advanced past that return the part that matched else -> return None ] """ #-- 1 -- #-[ if line in self starts with s, ignoring case -> # i := position after matching part # else -> # i := None #-] i = self.matchArb(s) #-- 2 -- #-[ if i is None -> # return None # else -> # self.pos := self.pos+len(s) # return the next len(s) characters from self #-] if i is None: return None else: return self.tab(i) # - - - S c a n . r e M a t c h - - - def reMatch ( self, r ): """Regular expression version of .match() [ if (r is a string or a regular expression object as returned by re.compile()) -> if (r is a string) and (r, interpreted as a regular expression, matches the input -> return the MatchObject from the match if (r is a compiled regular expression) -> and (r matches the input -> return the MatchObject from the match else -> return None ] NOTE: To advance the position past a MatchObject m, use scan.move(m.end()) """ #-- 1 -- len = None #-- 2 -- #-[ if r is a string -> # if r, interpreted as a regular expression, matches # a string at the start of self.line[self.pos:] -> # m := the MatchObject resulting from the match # else -> # m := None # else if r matches a string at the start of # self.line[self.pos:] -> # m := the MatchObject resulting from the match # else -> I #-] if type(r) == types.StringType: #-- 2.1 -- #-- 2.1.1 -- target = self.line[self.pos:] #-- 2.1.2 -- #-[ if r, interpreted as a regexp, matches target -> # m := a MatchObject describing the match # else -> # m := None #-] m = re.match ( r, target ) else: #-- 2.2 -- #-- 2.2.1 -- #-[ if r, interpreted as a compiled regexp, matches # self.line[self.pos:] -> # m := a MatchObject describing the match # else -> # m := None #-] m = r.match ( self.line, self.pos ) #-- 3 -- return m # - - - S c a n . t a b R e M a t c h - - - def tabReMatch ( self, r ): """Like .tabMatch(), but uses a regular expression. [ if (r is a string or a regular expression object as returned by re.compile()) -> if (r is a string) and (r, interpreted as a regular expression, matches the input -> scan := scan advanced past the matching part return the MatchObject for the match if (r is a compiled regular expression) -> and (r matches the input -> return the MatchObject for the match else -> return None ] """ #-- 1 -- #-[ if r matches a string at the start of scan -> # m := the MatchObject from that match # else -> # m := None #-] m = self.reMatch ( r ) #-- 2 -- #-[ if m is None -> # return None # else -> # scan := scan advanced past MatchObject m # return m #-] if m is None: return None else: self.moveMatch ( m ) return m # - - - S c a n . m o v e M a t c h - - - def moveMatch ( self, m ): """Advance past the string represented by a MatchObject m [ if (m is a MatchObject) -> self := self advanced by the length of the match from m ] """ self.move ( m.end() - m.start ( ) ) # - - - S c a n . i n t e g e r - - - def integer ( self, maxLen=None ): """ [ if (maxLen is an integer or None) -> if (maxLen) is &null, or the line in self starts with a digit but not more than (maxLen) of them -> self := self advanced past all leading digits return those digits, as integer type else if (maxLen) is given and the line in self starts with more than (maxLen) digits -> self := self advanced past all leading digits Log() +:= error, integer too long return the first (maxLen) digits from self else -> return None ] """ #-- 1 -- #-[ if self starts with one or more digits -> # i := position after those digits # else -> # i := None #-] i = self.tabMany ( self.DIGIT_CSET ) #-- 2 -- #-[ if i is None -> # return none # else -> # self := self advanced to position i # text := characters from self.pos to position i #-] if i is None: return None else: text = self.tab(i) #-- 3 -- #-[ if (maxLen is None) or (len(text) <= maxLen) -> I # else -> # Log() ||:= error, integer too long # text := the first (maxLen) characters of text #-] if ( ( maxLen is not None ) and ( len(text) > maxLen ) ): self.error ( "This integer is too long, the maximum is ", str(maxLen), "." ) text = text[0:maxLen] #-- 4 -- #-[ return text, converted to integer type #-] return string.atoi(text) # - - - S c a n . f i x e d - - - def fixed ( self ): """ [ let have the syntax [ "-" ] ... [ "." [ ... ] ] in: if self starts with a -> self := self advanced past the return the else -> return None ] """ #-- 1 -- #-[ if self starts with "-" -> # self := self advanced one # sign := "-" # else -> # sign := "" #-] sign = self.tabMatch ( "-" ) if not sign: sign = "" #-- 2 -- #-[ if self starts with a digit -> # self := self advanced past all leading digits # text := sign + (all leading digits from self) # else -> # text := None #-] text = None mantissa = self.tabMany ( self.DIGIT_CSET ) if mantissa: text = sign + mantissa #-- 3 -- #-[ if text is None -> # return None # else -> I #-] if text is None: return None #-- 4 -- #-[ if self starts with "." -> # self := self advanced past the "." and any digits # that immediately follow it # text ||:= "." || (any digits following the ".") # else -> I #-] i = self.tabMatch(".") if i is not None: text = text + "." trail = self.tabMany ( self.DIGIT_CSET ) if trail is not None: text = text + trail #-- 5 -- return text # - - - S c a n . f l a t C s e t - - - def flatCset ( self, n, c ): """ [ if n is a positive integer and c is a Cset -> if self begins with n characters found in c -> self := self advanced by n characters return the next n characters from self else -> return None ] """ #-- 1 -- #-[ if self starts with one or more characters in c -> # i := the position after the last such character # else -> # i := None #-] i = self.many ( c ) #-- 2 -- if ( ( i is None) or ( i - self.pos < n ) ): return None #-- 3 -- #-[ self := self advanced by n characters # return the next n characters from self #-] return self.move(n) # - - - S c a n . f l a t I n t - - - def flatInt ( self, n ): """ [ if n is a positive integer -> if self starts with an integer of size n, left-padded with blanks, and with a possible "-" sign located anywhere within those blanks -> self := self advanced by n characters return that signed integer as integer type else -> return None ] """ #-- 1 -- #-[ if self starts with at least n characters -> # text := those characters # else -> # return None #-] if self.pos+n <= len(self.line): text = self.line[self.pos:self.pos+n] else: return None #-- 2 -- sign = 1 # 1 for positive, -1 for negative off = 0 # Offset within text #-- 3 -- #-[ if text starts with any leading spaces -> # off := off advanced past those spaces # else -> I #-] while text[off] == " ": off = off + 1 #-- 4 -- if text[off] == "-": off = off + 1 sign = -1 #-- 5 -- #-[ if text[off] starts with any leading spaces -> # off := off advanced past those spaces # else -> I #-] while text[off] == " ": off = off + 1 #-- 6 -- #-[ if text[off] starts with a digit -> # off := off advanced past all leading digits # magnitude := those leading digits # else -> # return None #-] if self.DIGIT_CSET.has(text[off]): magnitude = "" while ( ( off < len(text) ) and ( self.DIGIT_CSET.has(text[off]) ) ): magnitude = magnitude + text[off] off = off + 1 else: return None #-- 7 -- if off == len(text): self.pos = self.pos + off return sign * string.atoi(magnitude) else: return None