# scan.icn: Generic scanning & error reporting functions for Icon. #-- # Copyright (C) 1995 by John W. Shipman, 507 Fitch Avenue NW, Socorro, # NM 87801. All rights reserved. #-- $ifndef __SCAN_ICN__ $define __SCAN_ICN__ $define SCAN_DATE "$Date: 1999/10/02 19:00:29 $" $define SCAN_REVISION "$Revision: 1.7 $" #-- See below for revision history #================================================================ # EXPORTED METHODS: BASIC METHODS #================================================================ # Scan_Open ( # Open a stream for scanning # fileName, # Stream file name, or &null for &input # log, # Error logging object; see log.icn # commentChar, # Comment char, or &null for none # callback ): # Localization callback procedure (see Scan_Error) # [ if fileName is readable -> # returns a scan object pointing at the start of that file, # logging to (log), using (commentChar) as the line comment # character if given, and using (locProc) as the callback if given # else -> # fail # ] #-- # Scan_Close ( self ): Close stream self # [ closes self's file, if it isn't already closed # returns &null # ] #-- # Scan_End_File ( self ): # [ if self is at end of file -> # return &null # else -> fail # ] #-- # Scan_End_Line ( self ): # [ if self is at the end of the current line -> # return &null # else -> fail # ] #-- # Scan_Error ( self, text, ... ): # [ if the (text) arguments are all strings -> # same intended function as Scan_Error_Kind but with a # default value for `kind' # ] #-- # Scan_Error_Count ( self, kind ) # [ if (kind) is a string or &null -> # returns the count of errors of kind (kind) from self's log object, # where (kind) may be &null for the default error kind # ] #-- # Scan_Error_Kind ( self, kind, text, ... ); # [ if (kind) is a string or &null, and the (text) arguments are # all strings -> # if this is the first error for the current line -> # self's log ||:= a message showing the file name, line number, # and the result of self's locProc(self) if any, # followed by an echo of self's current line # In any case: # self's log ||:= message showing a carat at the position # corresponding to &pos, followed by an # error of class `kind' whose message text # is the concatenation of all `text' arguments # ] #-- # Scan_File_Name ( self ) # [ returns self's fileName # ] #-- # Scan_Gen_Lines ( self ) # [ generates the lines from self ] #-- # Scan_Line_Number ( self ) # [ returns the line number of the current line in self, counting from 1 # ] #-- # Scan_Log ( self ) # [ returns the Log object associated with self # ] #-- # Scan_Message ( self, text, ...) # [ if the (text) arguments are all strings -> # self's log ||:= message containing the concatenation # of all text args, prefixed by self's prefix # ] #-- # Scan_Next_Line ( self ): # [ if self is at end of file -> fail # else if no lines remain in self -> # close self's file # fail # | else -> # &subject := the next line from self's file # return &null # ] #-- # Scan_Prime (self ) # [ if self is at end of file -> # fail # else if self is positioned on the last line -> # self := self advanced to end of file # fail # else -> # self := self advanced to the next nonempty line # ] #-- # Scan_Version ( self ): Returns our version ID string. #-- # Scan_Write ( self, text, ...) # [ if the (text) arguments are all strings -> # self's log ||:= message containing the concatenation # of all text args, without self's prefix # ] #---------------------------------------------------------------- # UTILITY SCANNING FUNCTIONS #---------------------------------------------------------------- # Scan_Deblank ( self, wset ) # [ let whitespace = an element of wset, if given, or ' \t' # (that is, space and tab) if it is &null # in: # if wset is a cset or &null -> # if self is not at EOF and contains at least one non-whitespace # character -> # self := self advanced past any leading whitespace characters, # skipping lines if necessary until a non-whitespace # character is found # return self # else -> # fail # ] # Note: to deblank without skipping to the next line, use this construct: # tab(many(wset)) #-- # Scan_Equal_Arb ( self, s ) # [ if self starts with (s), ignoring case -> # self := self advanced by the length of (s) # return the next (length of s) characters # ] # Note: This function is like Icon's unary "=" operator, only it's # case-insensitive. #-- # Scan_Find_Arb ( self, s ) # [ if s is a nonempty string -> # generate the positions in the current line of self where s # occurs (case-insensitive) # ] # Note: This is like the Icon function find(), only case-insensitive. #-- # Scan_Fixed ( self ) # [ let have the syntax # { ... [ "." [ ] ... ]} # in: # if self starts with a -> # self := self advanced past the leading # return the , represented as a real. # else -> fail # ] # Note: This routine is for scanning fixed-point numbers. Note that it # does not allow a number to start with a decimal. I find it too hard # to distinguish ".1" from a "1" preceded by a flyspeck, so I require # that it be typed is "0.1". #-- # Scan_Integer ( self, maxLen ) # [ if maxLen is an integer or &null -> # if (maxLen) is &null, or the line in self starts with a digit # but not more than (maxLen) of them -> # self := self advanced past all leading digits # return those digits, as type integer # else if (maxLen) is given and the line in self starts with more # than (maxLen) digits -> # self := self advanced past all leading digits # self ||:= error, integer too long, maximum is (maxLen) # return the first (maxLen) digits from self # ] #-- # Scan_Match_Arb ( self, s ) # [ if s is a nonempty string -> # if string (s) is a prefix of the current line in self, using # case-insensitive comparison -> # return the position in that line where that string ends # else -> fail # ] # Note: Like the Icon match() function, only case-insensitive. #---------------------------------------------------------------- # FLAT-FILE UTILITY FUNCTIONS #---------------------------------------------------------------- # Scan_Flat_Cset ( self, n, c ) # [ if n is a positive integer and c is a cset -> # if self begins with n characters in c -> # self := self advanced by n characters # return the next n characters of self # else -> fail # ] #-- # Scan_Flat_Int ( self, n ) # [ if n is a positive integer -> # if self starts with an integer of size n, left-padded # with blanks, and with a possible "-" sign located # anywhere within those blanks -> # self := self advanced by n characters # return that signed integer as type integer # else -> # fail # ] #-- # - - - State - - - record scanTag ( # Holds all instance variables for the stream name, # Name of the file file, # The open file handle line, # The current line lineNo, # The current line number, origin 1 echoed, # True if the current line has been echoed already eof, # True if the stream is exhausted log, # Error logging object (see log.icn) locProc, # Callback procedure to supply localization info on errors comment ) # Comment character # - - - Invariants - - - #-- # This object is highly unusual in that the Icon scanning variables # &subject and &pos are part of the state. Consequently, a critical # invariant is that there is only one Scan object active at a time # in a given scope. #-- # .file == the file handle, positioned after the last character # of the current line #-- # &subject == the current line, up to but not including # scan.commentChar, the newline if there is one, or the end of # the line, whichever comes first #-- # &pos == the current position in the current line #-- # .line == the current line, up to but not including the newline # if there is one, or the end of the line, whichever comes first #-- # .lineNo == the serial number of the current line, 1 for the first #-- # .echoed == &null if no scan error has ever been logged on the # current line, # == 1 otherwise #-- # .eof == &null if the input has not been exhausted # 1 otherwise #-- # - - - Defines - - - $define SCAN_WHITE_SET ' \t' # Default whitespace definition # - - - Revision history - - - #-- # 1996-10-17 Added new methods: Scan_Flat_Cset() and Scan_Flat_Int(). #-- # 1996-09-24 Added new methods: Scan_Match_Arb(), Scan_Equal_Arb(), # and Scan_Fixed(). #-- # 1996-09-14 Added a new method, Scan_Error_Count(), which # extracts the error count from our log object. #-- # 1996-04-25 Because Kathy Hedges was having a lot of trouble # visually interpreting the error messages from webgen and # webcheck, I changed Log_Error_Kind so that it puts a blank # line out before the echo line. #-- # 1996-02-24 Added two new methods: Scan_Message(), for # multiline error messages, and Scan_Log(), for retrieving # the log attribute. #-- # 1995-12-27 Added a new method, Scan_Gen_Lines(i), using # definite iteration over the lines of a file. #-- # 1995-10-27 Added intended functions for use in Cleanroom code. # Added a function that returns our version ID. Removed old # Scan_Advance(). Scan_Look() and Scan_Look_String() removed # because they duplicate the Icon match() function. Other # minor fixes. #-- # 1995-08-31 Scan_Error() was changed to allow a list of text # arguments. Also, the routine Scan_Error_Kind() was added, # with the error kind as the 2nd argument, while it used to # be the 3rd argument of Scan_Error(). #-- # 1995-08-20 Renamed `scan.icn' from the original `iscan.icn'. # The main reason for this rewrite was to extract the error # logging logic into a separate `log object'. This was inspired # by a particular application (webcheck.icn) that parsed multiple # files but had only a single error stream, so the error stream # logic was separated out. #-- # 1994-10-25 Start date. Written to unify scanning & error message # logic for bandraw.icn, point.icn, and veg.icn. Design goals: # * Use native Icon scanning as much as possible, under # the assumption that line breaks cannot occur in mid-token. # This means that the &subject is set to the current line # whenever a new one is read. The primitive Scan_Prime() # should be called between tokens to bring up a new line # if the current one is exhausted. # * The caller will deal with decisions like whether to stop. #-- # - - - S c a n _ O p e n - - - procedure Scan_Open ( name, # File name to be opened, or &null for &input log, # Error logging object (see log.icn) comment, # Comment character, or &null for no comment char locProc ) # Callback procedure to supply localization string #-- # Opens a new stream, and returns the scanTag for it. #-- local self # scanTag to be returned #-- 1 -- #-[ if (log) is &null -> # log := a new Log object with all arguments defaulted # else -> I #-] if / log then log := Log_Open ( ); # Open an error log if not provided #-- 2 -- #-[ self := a new Scan object with .lineno (0), .eof (&null), # .log (log), .comment (comment), and .locProc (locProc) # #-] self := scanTag ( ); self.lineNo := 0; self.eof := &null; self.log := log; self.comment := comment; self.locProc := locProc; #-- 3 -- #-[ if (name) is given and can be opened -> # self.name := (name) # self.file := (name), opened for reading # else if (name) is given but can't be opened for reading -> # fail # else -> # self.name := "&input" # self.file := &input #-] if \ ( self.name := name ) then # Is there a name? self.file := open ( name ) | fail # Yes, try to open it else { self.name := "&input"; # Name of standard input self.file := &input; # Default is standard input } #-- 4 -- #-[ if there is at least one line in self.file -> # position self at the start of that line # else -> # self.file := # self.eof := 1 #-] Scan_Next_Line ( self ); # Bring up the first line #-- 5 -- return self; end # --- Scan_Open --- # - - - S c a n _ C l o s e - - - procedure Scan_Close ( self ) close ( \ self.file ); self.eof := 1; self.file := &null; end # --- Scan_Close --- # - - - S c a n _ D e b l a n k - - - procedure Scan_Deblank ( self, whiteSet ) #-- # Skip all characters in wset (default ' \t') and all empty # lines until we get to a nonblank character. If successful, # return &null, else fail. #-- / whiteSet := SCAN_WHITE_SET; while not Scan_End_File ( self ) do { #-- Bypass exhausted lines tab ( many ( whiteSet ) ); # Skip its initial blanks if not pos(0) then return; # This line has a nonblank char. Scan_Next_Line ( self ); # This line is blank, try again } #-- Bypass exhausted lines fail; end # --- Scan_Deblank --- # - - - S c a n _ E r r o r _ C o u n t - - - procedure Scan_Error_Count ( self, kind ) return Log_Error_Count ( self.log, kind ); end # - - - S c a n _ E r r o r _ K i n d - - - procedure Scan_Error_Kind ( # General syntax error/warning reporting self, # The stream msgClass, # Defaults to "Error" L[] ) # One or more text strings #-- # Writes lines (1-4) for the first error on each line; writes # only lines (3-4) for additional errors on the same line. # (1) --- File `foo' Line 43 # (2) # (3) Blank except for a "^" under the current &pos # (4) : # If the stream has an associated `callback', we call it to get the # . Using a callback is necessary because some errors are # generated inside this package, and only the caller knows what # locality information to supply. The callback is called by: # ( self ) #-- local where # String indicating file name, line number, etc local text # Concatenation of all L[] elements text := ""; # Build up the error text string every text ||:= ! L; if / self.echoed then { #-- Echo the line self.echoed := 1; # Prevent it from being echoed again where := ""; if \self.name then # Append file name, if any where ||:= "File `" || self.name || "', line " || self.lineNo else where ||:= "Line " || self.lineNo; # Append line number if \ self.locProc then where ||:= " [" || self.locProc(self) || "]"; Log_Write ( self.log, "\n--- ", where ); # Identify the source location Log_Message ( self.log, self.line ); # Echo the input line } #-- Echo the line #-- # Now display a carat at the position corresponding to &pos, and # write the . #-- Log_Message ( self.log, left ( "", &pos - 1 ) || "^" ); # Scan pointer Log_Error_Kind ( self.log, msgClass, text ); #-- # This routine always fails. This allows the caller to use a construct # like `return Scan_Error ( )' as shorthand for {Scan_Error(...);fail} #-- fail; end # --- Scan_Error_Kind --- # - - - S c a n _ E r r o r - - - procedure Scan_Error ( self, L[] ) #-- # Generates an error, using the 2nd and succeeding arguments as # error text, and sends it to the log object associated with self. #-- local text text := ""; # Build up the text string every text ||:= ! L; Scan_Error_Kind ( self, "Error", text ); end # --- Scan_Error --- # - - - S c a n _ E n d _ F i l e - - - procedure Scan_End_File ( self ) #-- # Returns &null if stream self is exhausted, else fails. #-- return \ self.eof; end # --- Scan_End_File --- # - - - S c a n _ E n d _ L i n e - - - procedure Scan_End_Line ( self ) #-- # Returns &null if stream self is at the end of the line, # else fails. #-- if \ self.eof then return; # Treat EOF as end of line also if &pos > ( * &subject ) then return; # Yes, at end of line fail; # No, not at end of line end # --- Scan_End_Line --- # - - - S c a n _ E q u a l _ A r b - - - procedure Scan_Equal_Arb ( self, s ) return tab ( Scan_Match_Arb ( self, s ) ); end # --- Scan_Equal_Arb --- # - - - S c a n _ F i l e _ N a m e - - - procedure Scan_File_Name ( self ) #-- # Returns the file name of the file we're currently in. #-- if ( / self ) | ( / self.name ) then fail; return self.name; end # --- Scan_File_Name --- # - - - S c a n _ F i n d _ A r b - - - procedure Scan_Find_Arb ( self, s ) local mapS local mapSubject #-- 1 -- #-[ mapS := (s), lowercased # mapSubject := &subject, lowercased #-] mapS := map ( s ); mapSubject := map ( &subject ); #-- 2 -- #-[ generate the positions of mapS in mapSubject, starting at &pos, # through the end of &subject #-] every suspend ( find ( mapS, mapSubject, &pos ) ); #-- 3 -- fail; end # --- Scan_Find_Arb --- # - - - S c a n _ F i x e d - - - procedure Scan_Fixed ( self ) local text # The text of the number #-- 1 -- #-[ if self starts with a digit -> # text ||:= all leading digits from self # self := self advanced past all leading digits # else -> # fail #-] if not ( text := tab ( many ( &digits ) ) ) then fail; #-- 2 -- #-[ if self starts with "." -> # text ||:= "." || (any digits following the ".") # else -> I #-] if = "." then text ||:= "." || tab ( many ( &digits ) ); #-- 3 -- #-[ return (text), as a real #-] return real ( text ); end # --- Scan_Fixed --- # - - - S c a n _ F l a t _ C s e t - - - procedure Scan_Flat_Cset ( self, n, c ) #-- 1 -- #-[ if (the position of the next character in self that is # NOT in c) < (&pos + n ) -> # fail # else -> I #-] if ( not any ( c ) ) | ( many ( c ) < ( &pos + n ) ) then fail; #-- 2 -- #-[ if at least n characters remain in the subject -> # self := self advanced by n characters # return the next n characters of self #-] return move ( n ); end # --- Scan_Flat_Cset --- # - - - S c a n _ F l a t _ I n t - - - procedure Scan_Flat_Int ( self, n ) #-- # Syntax: # [ " " ... ] [ "-" ] [ " " ... ] digit ... #-- local text # The next n characters of self local sign # The sign, +1 or -1 local magnitude # The unsigned portion #-- 1 -- sign := 1; #-- 2 -- #-[ if self starts with at least n characters -> # text := those characters # else -> # fail #-] if not ( text := &subject [ &pos +: n ] ) then fail; # NB: fails unless &subject has all those characters #-- 3 -- #-[ &subject := text # &pos := 1 #-] text ? { #-- Dissect text #-- 4 -- #-[ if the subject starts with any leading spaces -> # &pos := &pos advanced past those spaces # else -> I #-] tab ( many ( ' ' ) ); #-- 5 -- #-[ if the subject starts with "-" -> # &pos := &pos advanced past the "-" # sign := -1 # else -> I #-] if = "-" then sign := -1; #-- 6 -- #-[ if the subject starts with any leading spaces -> # &pos := &pos advanced past those spaces # else -> I #-] tab ( many ( ' ' ) ); #-- 7 -- #-[ if subject starts with a digit -> # &pos := &pos advanced past all leading digits # magnitude := all leading digits, as type integer # else -> fail #-] if not ( magnitude := integer ( tab ( many ( &digits ) ) ) ) then fail; #-- 8 -- #-[ if the subject has been consumed -> I # else -> fail {NB: failure to right-justify the number} #-] if not pos ( 0 ) then fail; } #-- Dissect text #-- 9 -- #-[ self := self advanced by n characters # return sign * magnitude #-] move ( n ); return sign * magnitude; end # --- Scan_Flat_Int --- # - - - S c a n _ G e n _ L i n e s - - - procedure Scan_Gen_Lines ( self ) #-- # Generates the lines from scan object self. # NB: Has not been verified, but derived from the logic of # several routines in webgen.icn that have been verified. #-- while not Scan_End_File ( self ) do { suspend &subject; Scan_Next_Line ( self ); } fail; end # --- Scan_Gen_Lines --- # - - - S c a n _ I n t e g e r - - - procedure Scan_Integer ( self, maxLen ) #-- # Tries to match and return an integer. If its length exceeds # maxLen (default is unlimited length), it gives an error and # returns &null. Fails if no integer there. #-- local text # Returned result if not any ( &digits ) then fail; text := tab ( many ( &digits ) ); if ( \ maxLen ) & ( maxLen < ( * text ) ) then { #-- Misconstructed integer Scan_Error ( self, "This integer is too long; the maximum is " || maxLen ); text := text[1+:maxLen]; # Crop it } #-- Misconstructed integer return integer ( text ); end # --- Scan_Integer --- # - - - S c a n _ L i n e _ N u m b e r - - - procedure Scan_Line_Number ( self ) #-- # Returns the line number of the line we're currently on. #-- return self.lineNo; end # --- Scan_Line_Number --- # - - - S c a n _ L o g - - - procedure Scan_Log ( self ) return self.log; end # - - - S c a n _ M a t c h _ A r b - - - procedure Scan_Match_Arb ( self, s ) local mapS local mapPre local result #-- 1 -- #-[ mapS := (s), lowercased #-] mapS := map ( s ); #-- 2 -- #-[ if the remainder of the line in self is shorter than (s) -> # fail # else -> # mapPre := the leading (*s) characters from that line, lowercased #-] if ( ( * &subject ) + 1 - &pos ) < ( *s ) then fail else mapPre := map ( &subject [ &pos +: ( *s ) ] ); #-- 3 -- #-[ if mapS matches mapPre -> # return the position (*s) characters past &pos # else -> # fail #-] if mapS == mapPre then return &pos + (*s) else fail; end # --- Scan_Match_Arb --- # - - - S c a n _ M e s s a g e - - - procedure Scan_Message ( self, L[] ) local text text := ""; every text ||:= ! L; Log_Message ( self.log, text ); end # - - - S c a n _ N e x t _ L i n e - - - procedure Scan_Next_Line ( self ) #-- # Read the next line from stream self, make it the &subject, set &pos to 1, # and return &null. Fail on end of file. #-- local commentPos # Position of comment character, if any if \ self.eof then # Already at end of file? fail; if not ( self.line := read ( self.file ) ) then { #-- End of file self.eof := 1; # Prevent any more read() calls, since we're... close ( self.file ); # ...closing the file. self.file := &null; # Insure that Scan_Close() doesn't reclose it fail; } #-- End of file #-- # Set up the new line as the &subject. If a comment character # is defined for this stream, and that character is found anywhere # on the line, crop the line to that position. #-- self.echoed := &null; # Clear the line-echoed flag self.lineNo +:= 1; # Count input lines if ( \ self.comment ) & ( commentPos := any ( self.comment ) ) then &subject := self.line[1:commentPos] else &subject := self.line; &pos := 1; return; end # --- Scan_Next_Line --- # - - - S c a n _ P r i m e - - - procedure Scan_Prime ( self ) #-- # If the current line is exhausted, try to bring up another one. # If successful, return &null; fail on end of file. #-- if \ self.eof then # Already at end of file? fail; if &pos > * &subject then #-- Loop until we get a nonempty line Scan_Next_Line ( self ); return; end # --- Scan_Prime --- # - - - S c a n _ W r i t e - - - procedure Scan_Write ( self, L[] ) local text text := ""; every text ||:= ! L; Log_Write ( self.log, text ); end # - - - S c a n _ V e r s i o n - - - procedure Scan_Version ( self ) return "Scan " || SCAN_REVISION || " " || SCAN_DATE; end # --- Scan_Version --- $endif