# abbr.icn: Abbreviation functions for the Txny object (q.v.) #-- $define ABBR_REVISION "$Revision: 1.10 $" $define ABBR_DATE "$Date: 1998/07/18 22:50:01 $" #================================================================ # Class Abbr: Each instance is a standard abbreviation derived # from a bird name. # Contrary to the usual practice of using a record for the # object's state, the object returned from Abbr_New will be a # string; see the Invariants section below for other constraints. # This version implements John Shipman's six-character bird # code as used in the Christmas Bird Count database project, # but other implementations are possible. #---------------------------------------------------------------- # EXPORTED METHODS #---------------------------------------------------------------- # Abbr_New ( eng ) # [ if (eng) is a string formatted as an English bird name, e.g., # "Red-tailed Hawk", and containing at least one word -> # return the canonical abbreviation derived from (eng) # ] #-- # Abbr_Length ( ) # [ returns the length of an Abbr's string form # ] #-- # Abbr_Cset ( ) # [ returns the cset including all nonblank characters that can # occur in an Abbr # ] #-- # Abbr_Scan ( scan ) # For parsing abbrs in free-form files # [ if scan is a Scan object -> # if the current line of scan starts with one or more # characters in Abbr_Cset(), but not more than # Abbr_Length() of them -> # scan := scan advanced past those characters # return those characters, uppercased and right blank # padded to size Abbr_Length() # else -> # scan := scan advanced past all initial characters # that are in Abbr_Cset() # scan ||:= error, expected abbr # fail # ] #-- # Abbr_Scan_Flat ( scan ) # For parsing fixed-field abbrs # [ if scan is a Scan object -> # if scan's current line starts with between 1 and # Abbr_Length() characters in Abbr_Cset(), followed # by enough spaces to make a total of Abbr_Length() -> # scan := scan advanced by Abbr_Length() # return the next Abbr_Length() characters of scan, # uppercased # else -> # scan := scan advanced by Abbr_Length() if possible, # else not advanced at all # scan ||:= error, expecting abbr of length (Abbr_Length()) # fail # ] #-- # Abbr_Scan_Look ( scan ) # [ if scan starts with a character in Abbr_Cset() -> # return &null # else -> # fail # ] #-- # - - - Invariants - - - #-- # An Abbr object must be a string of length Abbr_Length() # which consists of 2-6 uppercase characters (or slash, "/") # with right blank padding. #-- # - - - Notational definitions - - - #-- # one-word-rule(word) == word, truncated or blank-padded to # size ABBR_L #-- # two-word-rule(w1,w2) == # if w1 matches one of the confusing color names -> # (the substitute 3-letter color code) || left(w2,3) # else -> # left(w1,3,"-") || left(w2,3) #-- # three-word-rule(w1,w2,w3) == # if w1 matches one of the confusing color names -> # (the substitute 2-letter color code) || # left(w2,1) || left(w3,3) # else -> # left(w1,2) || left(w2,1) || left(w3,3) # # four-word-rule(w1,w2,w3,wLast) == # left(w1,1) || left(w2,1) || left(w3,1) || left(wLast,3) #-- # - - - Defines - - - $define ABBR_L 6 # Length of an Abbr # - - - A b b r _ L e n g t h - - - #-- 1996-08-20: Verified with Stavely procedure Abbr_Length ( ) return ABBR_L; end # - - - A b b r _ C s e t - - - #-- 1996-08-20: Verified with Stavely procedure Abbr_Cset ( ) static abbrCset initial { abbrCset := &letters ++ '/'; # Letters plus slash (e.g., B/CTEA) } return abbrCset; end # - - - A b b r _ S c a n _ L o o k - - - procedure Abbr_Scan_Look ( scan ) if any ( Abbr_Cset ( ) ) then return &null else fail; end # - - - A b b r _ S c a n - - - #-- 1996-08-20: Verified with Stavely procedure Abbr_Scan ( scan ) local result #-- 1 -- #-[ if scan starts with a character in Abbr_Cset() -> # result := that and all adjacent characters in Abbr_Cset() # scan := scan advanced past all those characters # | else -> # scan ||:= error, expecting abbr # fail #-] if not ( result := tab ( many ( Abbr_Cset ( ) ) ) ) then return Scan_Error ( scan, "Expecting a bird abbreviation of length 1 to ", ABBR_L, " characters." ); #-- 2 -- #-[ if result has more than ABBR_L characters -> # scan ||:= error, abbr too long # fail # | else -> I #-] if ABBR_L < * result then return Scan_Error ( scan, "This bird abbreviation <", result, "> is too long, only ", ABBR_L, " characters are permitted." ); #-- 3 -- #-[ return result, right-blank-filled to size ABBR_L and uppercased #-] return left ( map ( result, &lcase, &ucase ), ABBR_L ); end # --- Abbr_Scan --- # - - - A b b r _ S c a n _ F l a t - - - #-- 1996-08-20: Verified with Stavely procedure Abbr_Scan_Flat ( scan ) local result # Result to be returned local validLen # Size of valid part #-- 1 -- #-[ if scan starts with at least ABBR_L characters -> # scan := scan advanced by ABBR_L characters # result := those characters, upshifted # | else -> # scan ||:= error, expecting abbr of size ABBR_L # fail #-] if not ( result := map ( move ( ABBR_L ), &lcase, &ucase ) ) then return Scan_Error ( scan, "Expecting a bird abbreviation of size ", ABBR_L, "." ); #-- 2 -- #-[ &subject := result # &pos := 1 #-] result ? { #-- Dissect result #-- 3 -- #-[ &pos := &pos advanced past all contiguous characters # in Abbr_Cset() #-] tab ( many ( Abbr_Cset ( ) ) ); #-- 4 -- #-[ &pos := &pos advanced past all spaces #-] tab ( many ( ' ' ) ); #-- 5 -- #-[ validLen := number of characters scanned in #-] validLen := &pos - 1; } #-- Dissect result #-- 6 -- #-[ if validLen < ABBR_L -> # scan ||:= error, character result[validLen+1] is not a # valid abbr char # fail # | else -> I #-] if validLen < ABBR_L then return Scan_Error ( scan, "Character `", result[validLen+1], "' is not valid in a bird abbreviation." ); #-- 7 -- return result; end # --- Abbr_Scan_Flat --- # - - - A b b r _ N e w - - - #-- 1996-08-20: Verified with Stavely procedure Abbr_New ( eng ) local words # String without hyphens or spaces local wordList # List of words in (words) local result # Result to be returned #-- 1 -- #-[ words := (eng) uppercased and trimmed at both ends and # with all hyphens changed to spaces #-] words := trim ( map ( eng, "-" || &lcase, " " || &ucase ) ); words := reverse ( trim ( reverse ( words ) ) ); #-- 2 -- #-[ wordList := (words) broken into a list on spaces, with # spaces omitted #-] wordList := Abbr_Split ( words ); #-- 3 -- #-[ if wordList has 1 or more elements -> # if wordList has 4 or more elements -> # result := four-word-rule(wordList[1], wordList[2], # wordList[3], wordList[-1]) # if wordList has 3 elements -> # result := three-word-rule(wordList[1], wordList[2], wordList[3]) # if wordList has 2 elements -> # result := two-word-rule(wordList[1], wordList[2]) # if wordList has 1 element -> # result := one-word-rule(wordList[1]) #-] case * wordList of { 1: result := left ( wordList[1], ABBR_L ) 2: result := Abbr_Two_Rule ( wordList[1], wordList[2] ) 3: result := Abbr_Three_Rule ( wordList[1], wordList[2], wordList[3] ) default: result := Abbr_Four_Rule ( wordList[1], wordList[2], wordList[3], wordList[-1] ) } #-- 4 -- return result; end # --- Abbr_New --- # - - - A b b r _ S p l i t - - - #-- 1996-08-22: Verified with Stavely #-[ if text is a nonempty string without leading or trailing spaces -> # return a list of the words in text that are separated by one # or more spaces #-] procedure Abbr_Split ( text ) local result # Resulting list to be returned local spacePos # Position of the next space #-- 1 -- #-[ result := an empty list # &subject := text # &pos := 1 #-] result := []; text ? { #-- Dissect text #-- 2 -- #-[ if contains any spaces -> # result ||:= one string for each word that is followed by a space # &pos := &pos advanced after the last space # | else -> I #-] while ( spacePos := find ( ' ' ) ) do { #-- 2 body -- #-[ if spacePos points to the next space in -> # result := result with a new string added, consisting # of the characters between &pos and spacePos # &pos := &pos advanced to the next nonblank character # after spacePos, or to the end, whichever is first #-] put ( result, tab ( spacePos ) ); tab ( many ( ' ' ) ); } #-- 2 body -- #-- 3 -- #-[ result ||:= a string made from the rest of #-] put ( result, tab ( 0 ) ); } #-- Dissect text #-- 4 -- return result; end # --- Abbr_Split --- # - - - A b b r _ T w o _ R u l e - - - #-- 1996-10-16: Verified, Stavely # [ return two-word-rule(w1, w2) # ] procedure Abbr_Two_Rule ( w1, w2 ) local head # First 3 letters #-- 1 -- #-[ head := first three letters of w1, right-padded with hyphens # to 3 #-] head := left ( w1, 3, "-" ); #-- 2 -- #-[ if w1 matches a confusing color name -> # head := corresponding 3-letter substitute # | else -> I #-] head := Color_Sub_3 ( Color_Sub_Check ( w1 ) ); #-- 3 -- return head || left ( w2, 3 ); end # --- Abbr_Two_Rule --- # - - - A b b r _ T h r e e _ R u l e - - - #-- 1996-10-16: Verified, Stavely # [ if w1 is a string whose length is >=2, # and w2 is a string whose length is >=1, # and w3 is a string -> # returns three-word-rule(w1, w2, w3) # ] procedure Abbr_Three_Rule ( w1, w2, w3 ) local head # Color-dependent part, first 2 chars local tail # Non-color-dependent part, last 4 #-- 1 -- #-[ head := first 2 characters of w1 # tail := (first char. of w2) || (first 3 chars. of w3 ) #-] head := left ( w1, 2 ); tail := left ( w2, 1 ) || left ( w3, 3 ); #-- 2 -- #-[ if w1 matches a confusing color name -> # head := the corresponding 2-letter substitute code # | else -> I #-] head := Color_Sub_2 ( Color_Sub_Check ( w1 ) ); #-- 3 -- return head || tail; end # --- Abbr_Three_Rule --- # - - - A b b r _ F o u r _ R u l e - - - #-- 1996-08-22: Verified with Stavely # [ if w1, w2, w3, and wLast are all nonempty strings -> # return abbr-four-rule(w1, w2, w3, wLast) # ] procedure Abbr_Four_Rule ( w1, w2, w3, wLast ) return left(w1,1) || left(w2,1) || left(w3,1) || left(wLast,3); end # --- Abbr_Four_Rule --- #================================================================ # Class ColorSub: An object, internal to Abbr, that represents # substitutes for confusing color names. #---------------------------------------------------------------- record colorSubTag ( sub2, # Substitute code, exactly 2 letters sub3 ) # Substitute code, exactly 3 letters # - - - C o l o r _ S u b _ C h e c k - - - # [ if name is one of the confusing color names -> # return a ColorSub giving substitutes for that color # | else -> fail # ] procedure Color_Sub_Check ( name ) static colorSubMap # Table: maps lowercased name |-> ColorSub initial { colorSubMap := table ( ); colorSubMap [ "black" ] := colorSubTag ( "BK", "BLK" ); colorSubMap [ "blue" ] := colorSubTag ( "BU", "BLU" ); colorSubMap [ "brown" ] := colorSubTag ( "BN", "BRN" ); colorSubMap [ "gray" ] := colorSubTag ( "GY", "GRY" ); colorSubMap [ "grey" ] := colorSubTag ( "GY", "GRY" ); colorSubMap [ "green" ] := colorSubTag ( "GN", "GRN" ); } return \ colorSubMap[map(name)]; # Fails if not in the map end # --- Color_Sub_Check --- # - - - C o l o r _ S u b _ 3 - - - procedure Color_Sub_3 ( self ) return self.sub3; end # - - - C o l o r _ S u b _ 2 - - - procedure Color_Sub_2 ( self ) return self.sub2; end