# txny.icn: An object to represent a complete taxonomy (phylogenetic # arrangement of biological classifications). #-- $define TXNY_REVISION "$Revision: 1.14 $" $define TXNY_DATE "$Date: 1998/08/03 03:08:49 $" #================================================================ # Class Txny: An instance represents a complete taxonomic # arrangement of a set of organisms, structuring a set of taxa # (singular: taxon, describing one category) into a hierarchical # tree. Also supported is a complete system for encoding taxa # by an abbreviation-based code system, such as the six-letter # system used in the Christmas Bird Count project. # A number of related objects are used to represent the taxa, # the codes, and other entities. See the external documentation # for this system for full details. #---------------------------------------------------------------- # EXPORTED METHODS #---------------------------------------------------------------- # Txny_New ( log, ranksFileName, baseName ) # [ if log is a Log object or &null, and ranksFileName, stdFileName, # and altFileName are each a string or &null -> # if eff-ranks-name ( ranksFileName ) names a valid ranks file, # and eff-std-name ( baseName ) names a valid std file, # and eff-alt-name ( baseName ) names a valid alt file, # and these files are all consistent with each other -> # return a Txny object representing all those files # else -> # log ||:= error message(s) # fail # ] #-- # Txny_Root ( self ) # [ returns the Taxon object describing the root category # ] #-- # Txny_Hier ( self ) # [ returns the Hier object describing the hierarchy of taxonomic # ranks used in this arrangement # ] #-- # Txny_Lookup_Abbr ( self, abbr ) # [ if abbr matches (case-insensitive) a form code used in self -> # return an AbSym object describing abbr's use # else -> fail # ] #-- # Txny_Ab_Tab ( self ) # [ returns self's AbTab object # ] #-- # Txny_Lookup_Sci ( self, sci ) # [ if sci is a string -> # if sci matches the scientific name of a taxon in self -> # return a Taxon object representing that taxon # else -> fail # ] #-- # Txny_Lookup_Eng ( self, eng ) # [ if eng is a string -> # if eng matches (case-insensitive) the English name of a # taxon in self -> # return a Taxon object representing that taxon # else -> fail # ] #-- # Txny_Lookup_Tx_Key ( self, txKey ) # [ if txKey is a string -> # if txKey matches the taxonomic key of a taxon in self -> # return a Taxon object representing that taxon # else -> fail # ] #-- # Txny_Log ( self ) # [ returns the Log object associated with self # ] #-- # Txny_Derive_Tx_Key ( self, parent, child ) # [ child.txKey := derived-tx-key ( self, parent, child ) # ] #-- # Txny_Abbr_To_Taxon ( self, abbr ) # [ if abbr is bound to a specific taxon in self -> # return a Taxon object representing that taxon # else -> fail # ] #-- # Txny_Abbr_Scan_Error ( self, abbr, scan ) # [ if abbr is unknown (or unbound) in self -> # scan ||:= error message, abbr is unknown (or unbound) # fail # else -> # scan ||:= error message, abbr has binding (its binding) # fail # ] #-- # - - - Notational definitions - - - #---------------------------------------------------------------- # bindings-for-taxon ( txny, taxon, canon, disamb ) == # if canon is &null -> no bindings # if canon is not &null and disamb is &null -> # one StdBind binding relating canon to taxon # if canon is not &null and disamb is not &null -> # one StdBind binding relating disamb to taxon, plus # one CollBind binding relating canon to the singleton set {disamb} #-- # This definition describes the set of AbBind objects that # result from the presence or absence of the `canon' (canonical) # and `disamb' (disambiguated) abbreviations. #---------------------------------------------------------------- # derived-tx-key ( txny, parent, child ) == # if parent is &null -> "" # else -> # ( Taxon_Tx_Key ( parent ) ) || # ( a string of "0" characters, whose length is the sum # of all key lengths of ranks whose depths are in the open # interval ( depth ( parent ), depth ( child ) ) ) || # ( the birth order of child, left-zero-padded to the key length # of the child's rank ) #-- # This function is used to derive the taxonomic key of a child # from its parent. For example, if the key of the parent is # "0104", and the child is the 7rd child of the # parent at a rank that has 3 digits in its key, the child's # key will be "0104007". The middle term of the definition # ("a string of "0" characters...") handles the case where an # optional rank is omitted. For example, suppose a family has # key "0702", and the subfamily rank is optional and contributes # two digits, and the genus contributes two digits also. Then # the 14th genus under that family would have key "07020014"; # the 5th and 6th digits are a placeholder for the missing subfamily. #---------------------------------------------------------------- # eff-alt-name ( baseName ) == # eff-file-name ( baseName, DEFAULT_BASE_NAME ) || ALT_EXTENSION #-- # The effective name of the alt file. #---------------------------------------------------------------- # eff-file-name ( baseName, defaultName ) # if argName is &null -> # path-file-name ( defaultName ) # else -> # path-file-name ( argName ) #-- # This function describes the defaulting of optional filenames. #---------------------------------------------------------------- # eff-ranks-name ( fileName ) == # if fileName is &null -> # path-file-name ( DEFAULT_RANKS_FILE_NAME ) # else -> # path-file-name ( fileName ) #---------------------------------------------------------------- # eff-std-name ( baseName ) == # eff-file-name ( baseName, DEFAULT_BASE_NAME ) || STD_EXTENSION #-- # The effective name of the std file. #---------------------------------------------------------------- # path-file-name ( fileName ) == # if the environmental variable named by PATH_ENVI is defined -> # (value of that environmental variable) || "/" || fileName # else -> # DEFAULT_PATH || "/" || fileName #---------------------------------------------------------------- # path-to-rank ( txny, rank ) == # the sequence of Taxon objects T1, T2, ..., TN such that: # (a) T1 is txny.root {NB: if .root is &null, path-to-rank is empty} # (b) T[i] is the last child of T[i-1] # (c) TN either has no children, or has a last child CN # such that depth(CN) >= depth(rank) #-- # This function represents the nodes visited while walking down # the tree from the root, always taking the rightmost path, that # is, the youngest child. It is used in slotting new taxa into # the tree. #---------------------------------------------------------------- # rank-can-occur ( txny, rank ) == # if txny.root is &null -> # if rank is the root rank of txny.hier -> T # else -> F # else if rank is the root rank of txny.hier -> F # else if rank-parent ( txny, rank ) has no children -> # if there exists a non-optional rank M in txny.hier such that # depth ( rank-parent ( txny, rank ) ) < M < depth ( rank ) -> # F # else -> T # else # if depth ( last child of rank-parent ) ~= depth ( rank ) -> F # else -> T #-- # This is a predicate used to test whether a Taxon of the given # (rank) is legal given the current state of the tree. For # example, a species cannot follow a family if there are # required ranks between family and species. #---------------------------------------------------------------- # rank-parent ( txny, rank ) == # if txny.root is &null -> &null # else -> # the last element of path-to-rank ( txny, rank ) #-- # Used to determine the parent taxon of a new taxon about to # be added. #---------------------------------------------------------------- # - - - Syntactic definitions - - - #-- # ::= # ::= a string of length Hier_Rank_Code_Len(self.hier) # which is either blank or defined by # Hier_Rank_Code_Lookup(self.hier) # ::= a character in STATUS_CSET # ::= "/" # ::= ... # ::= "/" [ "/" ] # ::= ["("")"] # ::= one or more characters in WHITE_CSET # ::= ... # ::= ... # ::= ... # ::= see the Abbr object # ::= any character string not containing "/" #-- # - - - State - - - record txnyTag ( log, # A Log object for error logging during input hier, # The Hier object representing the ranks used root, # A Taxon object rooting a tree of taxa abTab, # An AbTab object containing the abbrevations sciMap, # Table: maps Taxon's lowercased sci |-> that Taxon engMap, # Table: maps Taxon's lowercased eng |-> that Taxon txKeyMap, # Table: maps Taxon's txKey |-> that Taxon scan ) # A Scan object used during input # - - - Invariants - - - #-- # .log == a Log object #-- # .hier == a Hier object representing the ranks file #-- # .root == # if the tree is empty -> &null # else -> # a Taxon object representing the root taxon of the tree, # such that every node has zero or more children that are # at ranks deeper than the parent, and all children of # a single parent have the same depth #-- # .abTab == # an AbTab object representing bindings of Abbrs for taxa # in self #-- # .sciMap == # a table with a &null default value such that for every # taxon T, self.sciMap[T's sci, lowercased] === T #-- # .engMap == # a table with a &null default value such that for every # taxon T at or deeper than the species rank in self.hier, # self.engMap[T's eng, lowercased] === T; if self.hier # does not have a species rank, this table is empty. #-- # .txKeyMap == # a table with a &null default value such that for every # taxon T, self.txKeyMap[T's txKey] === T #-- # - - - Defines - - - $define PATH_ENVI "TXNYPATH" # Envi. var. for changing path $define DEFAULT_PATH "." # Default path: current directory $define DEFAULT_RANKS_FILE_NAME "ranks" $define DEFAULT_BASE_NAME "aou7" # AOU, 7th ed, no supplements $define STD_EXTENSION ".std" $define ALT_EXTENSION ".alt" $define WARNING_KIND "Warning" # Warning class for Log_Error_Kind() # - - - T x n y _ R o o t - - - #-- Verified 1996-09-18, A. Stavely procedure Txny_Root ( self ) return self.root; end # - - - T x n y _ H i e r - - - #-- Verified 1996-09-18, A. Stavely procedure Txny_Hier ( self ) return self.hier; end # - - - T x n y _ L o o k u p _ A b b r - - - #-- Verified 1996-09-18, A. Stavely procedure Txny_Lookup_Abbr ( self, abbr ) return Ab_Tab_Lookup_Abbr ( self.abTab, abbr ); end # - - - T x n y _ A b _ T a b - - - procedure Txny_Ab_Tab ( self ) return self.abTab; end # - - - T x n y _ L o o k u p _ S c i - - - #-- Verified 1996-09-18, A. Stavely procedure Txny_Lookup_Sci ( self, sci ) return \ self.sciMap [ map ( sci ) ]; end # - - - T x n y _ L o o k u p _ E n g - - - #-- Verified 1996-09-18, A. Stavely procedure Txny_Lookup_Eng ( self, eng ) return \ self.engMap [ map ( eng ) ]; end # - - - T x n y _ L o o k u p _ T x _ K e y - - - #-- Verified 1996-09-18, A. Stavely procedure Txny_Lookup_Tx_Key ( self, txKey ) return \ self.txKeyMap[txKey]; end # - - - T x n y _ L o g - - - #-- Verified 1996-09-18, A. Stavely procedure Txny_Log ( self ) return self.log; end # - - - T x n y _ N e w - - - #-- Verified 1996-09-18, A. Stavely procedure Txny_New ( log, ranksFileName, baseName ) local errCount # Error count of (log) upon entry local effRanksFileName # Effective ranks file name local effStdFileName # Effective std file name local effAltFileName # Effective alt file name local self # Result to be returned #-- 1 -- #-[ self := a new txnyTag with field values: # .log := log, defaulting to a new log object # .hier := &null # .root := &null # .abTab := a new, empty AbTab # .sciMap := a new table with &null default value # .engMap := a new table with &null default value # .txKeyMap := a new table with &null default value # .scan := &null # effRanksFileName := eff-ranks-name ( ranksFileName ) # effStdFileName := eff-std-name ( baseName ) # effAltFileName := eff-alt-name ( baseName ) # errCount := error count from (log) #-] self := txnyTag ( ); self.log := ( \ log ) | Log_Open ( ); self.abTab := Ab_Tab_New ( ); self.sciMap := table ( ); self.engMap := table ( ); self.txKeyMap := table ( ); effRanksFileName := Txny_Eff_File_Name ( ranksFileName, DEFAULT_RANKS_FILE_NAME ); effStdFileName := Txny_Eff_File_Name ( baseName, DEFAULT_BASE_NAME ) || STD_EXTENSION; effAltFileName := Txny_Eff_File_Name ( baseName, DEFAULT_BASE_NAME ) || ALT_EXTENSION; errCount := Log_Error_Count ( log ); #-- 2 -- #-[ if effRanksFileName names a valid ranks file -> # self.hier := a Hier object representing that file # | else -> # self.log ||:= error message(s) # fail #-] if not ( self.hier := Hier_New ( effRanksFileName, self ) ) then fail; #-- 3 -- #-[ if effStdFileName names a std file that is valid and # consistent with self.hier -> # self.root := a Taxon rooting a tree of taxa from that file # self.abTab +:= entries for the bindings of all abbrs used # in that file # self.sciMap +:= entries mapping the scientific names of the taxa # from that file to the corresponding Taxon objects # self.engMap +:= entries mapping lowercased English names of the # taxa at or deeper than species rank (if there # is a species rank) from that file |-> the # corresponding Taxon objects # self.txKeyMap +:= entries mapping the taxonomic keys of the taxa # from that file to the corresponding Taxon objects # | else -> # self.log ||:= error message(s) # fail #-] if not Txny_Read_Std ( self, effStdFileName ) then fail; #-- 4 -- #-[ if effAltFileName names an alt file that is valid and consistent # with the scientific names in self.sciMap, the bindings in self.abTab, # the ranks in self.hier, and the tree of standard taxa rooted at # self.root -> # self.root := self.root with new Taxon objects added for # forms deeper than species from that file # self.abTab +:= new bindings for abbrs from that file # self.engMap +:= entries mapping lowercased new English names # from that file to the corresponding Taxon objects, # for taxa at or deeper than species rank, if there # is a species rank in self.hier # self.txKeymap +:= entries mapping new taxonomic keys from # that file to the corresponding Taxon objects # | else -> # self.log ||:= error message(s) # fail #-] if not Txny_Read_Alt ( self, effAltFileName ) then fail; #-- 5 -- #-[ if self.abTab is self-consistent -> I # | else -> # self.log ||:= error message(s) #-] Txny_Final_Check ( self ); #-- 6 -- #-[ if errCount < (count of errors from (log)) -> fail # | else -> return self #-] if errCount < Log_Error_Count ( self.log ) then fail else return self; end # --- Txny_New --- # - - - T x n y _ E f f _ F i l e _ N a m e - - - #-- Verified 1996-09-18, A. Stavely # [ if fileName is a string or &null, and defaultName is a string -> # return eff-file-name ( fileName, defaultName ) # ] procedure Txny_Eff_File_Name ( fileName, defaultName ) local path #-- 1 -- #-[ if the environmental variable named by PATH_ENVI is defined -> # path := its value # | else -> # path := DEFAULT_PATH #-] path := getenv ( PATH_ENVI ) | DEFAULT_PATH; #-- 2 -- #-[ if fileName is not &null -> # return path || "/" || fileName # | else -> # return path || "/" || defaultName; #-] return path || "/" || ( ( \ fileName ) | defaultName ); end # --- Txny_Eff_File_Name --- # - - - T x n y _ F i n a l _ C h e c k - - - #-- Verified 1996-09-19, A. Stavely # [ if self.abTab is self-consistent -> I # | else -> # self.log ||:= error message(s) # ] procedure Txny_Final_Check ( self ) local errCount # self.log's error count on entry local abSym # Holds each AbSym object in self.abTab #-- 1 -- #-[ errCount := error count from self.log #-] errCount := Log_Error_Count ( self.log ); #-- 2 -- #-[ if all entries in self.abTab have bindings -> I # | else -> # self.log ||:= error message(s) #-] every abSym := Ab_Tab_Gen_Ab_Syms ( self.abTab ) do { #-- 2 body -- #-[ if abSym has a binding -> I # | else -> # self.log ||:= error, undefined abbreviation (abSym's abbr) #-] if not Ab_Sym_Binding ( abSym ) then Log_Error ( self.log, "Undefined abbreviation <", Ab_Sym_Abbr ( abSym ), ">." ); } #-- 2 body -- #-- 3 -- #-[ if (self.log's error count) > errCount -> fail # | else -> return &null #-] if Log_Error_Count ( self.log ) > errCount then fail else return &null; end # --- Txny_Final_Check --- # - - - T x n y _ D e r i v e _ T x _ K e y - - - #-- Unverified # [ if parent is a Taxon object or &null, # and child is a Taxon object -> # child.txKey := derived-tx-key ( self, parent, child ) # ] procedure Txny_Derive_Tx_Key ( self, parent, child ) local missx #-- 1 -- child.txKey := ""; #-- 2 -- #-[ if parent is &null -> # return &null # else -> I #-] if / parent then return &null; #-- 3 -- #-[ child.txKey := parent's txKey #-] child.txKey := Taxon_Short_Tx_Key ( parent ); #-- 4 -- #-[ child.txKey ||:= ( for every txLevel which is deeper than the # parent but shallower than the child, # N copies of the character "0" where N is # that level's key length ) #-] every missx := ( Taxon_Depth ( parent ) + 1 ) to ( Taxon_Depth ( child ) - 1 ) do child.txKey ||:= left ( "", Rank_Key_Len ( Hier_Nth_Rank ( self.hier, missx ) ), "0" ); #-- 4 -- #-[ child.txKey := child's birth order, left zero padded to # child's rank's key length #-] child.txKey ||:= right ( Taxon_Birth_Order ( child ), Rank_Key_Len ( Taxon_Rank ( child ) ), "0" ); end # --- Txny_Derive_Tx_Key --- # - - - T x n y _ A b b r _ T o _ T a x o n - - - procedure Txny_Abbr_To_Taxon ( self, abbr ) local abSym # The symbol table entry for abbr, if any local abBind # The bind for abSym, if any local taxon # The taxon for abBind, if any #-- 1 -- #-[ if self contains an AbSym entry for abbr -> # abSym := that AbSym # else -> # fail #-] if not ( abSym := Txny_Lookup_Abbr ( self, abbr ) ) then fail; #-- 2 -- #-[ if abSym has a binding -> # abBind := that binding, as an AbBind object # else -> # fail #-] if not ( abBind := Ab_Sym_Binding ( abSym ) ) then fail; #-- 3 -- #-[ if abBind refers to specific taxon -> # return that taxon # else -> # scan ||:= error message # fail #-] if not ( taxon := Ab_Bind_Lookup ( abBind ) ) then fail else return taxon; end # --- Txny_Abbr_To_Taxon --- # - - - T x n y _ A b b r _ S c a n _ E r r o r - - - procedure Txny_Abbr_Scan_Error ( self, abbr, scan ) #-- # This routine is intended for use in reporting errors on # undefined or ambiguous codes; it should be called only # when Txny_Abbr_To_Taxon() has failed. #-- local abSym # The symbol table entry for abbr in self, if any local abBind # The binding for abSym, if any #-- 1 -- #-[ if self contains an AbSym entry for abbr -> # abSym := that AbSym # else -> # fail #-] if not ( abSym := Txny_Lookup_Abbr ( self, abbr ) ) then return Scan_Error ( scan, "Form code `", abbr, "' is unknown." ); #-- 2 -- #-[ if abSym has a binding -> # abBind := that binding, as an AbBind object # else -> # scan ||:= error message # fail #-] if not ( abBind := Ab_Sym_Binding ( abSym ) ) then return Scan_Error ( scan, "Form code `", abbr, "' is an unknown forward reference." ); #-- 3 -- #-[ scan ||:= a description of abBind # fail #-] return Scan_Error ( scan, "Species code `", abbr, "' is invalid: ", Ab_Bind_Show ( abBind ), "." ); end # --- Txny_Abbr_Scan_Error ---