#!/usr/local/bin/python #================================================================ # nombuild.py: Write flat files for the six-letter bird codes. # $Revision: 1.6 $ $Date: 2008/09/21 22:41:28 $ # Documentation: http://www.nmt.edu/~shipman/z/cbc/nomo.html #================================================================ # Command line arguments: # nombuild.py [-x] [basename] # where: # -x Write XML output; default is to flat files # basename is the base filename for the standard forms # file and the alternate forms file, e.g., # "aou7" for the 7th edition, or "aou743" for the # 7th edition with supplements through the 43rd #================================================================ # CONTENTS # 1. Overall intended function # 2. Specification functions # 3. Imports # 4. Manifest constants # 5. Classes and functions # class Args: Represents the command line arguments. # class AbTab: A symbol table for Abbr objects. # class AbSym: Symbol table entry object for AbTab # class AbBind: Base class for symbol bindings # class StdBind(AbBind): Symbol is a standard taxon # class CollBind(AbBind): Symbol is a collision # class EqBind(AbBind): Symbol is a cross-reference # class HtBind(AbBind): Symbol is a higher taxon than species # class Txny: Represents the entire taxonomy. # .__readStd(): Process standard forms file # .__parseStd(): Parse one line of the standard forms file # .__addRawStd(): Add a RawStd object to the taxonomy # .__addRawNonSp(): Add non-species form # .__addTaxon(): Add a Taxon object to the tree # .__findParent(): Where should the taxon go? # .__rankParent(): Whose kid is this? # .__addTaxonToMaps(): Add taxon to dictionaries # .__addEngMap(): Add to English name map # .__addSciMap(): Add to scientific name map # .__addTxKeyMap(): Add to taxonomic key no. map # .__addAbMap(): Add to bird-codes map # .__addAbMapCanon(): Add canonical bird code # .__addAbMapDisamb(): Add collision resolution # .__addRawSpGroup(): Add taxa from species line # .__addRawGenus(): Add genus from species line # .__hasTaxonChanged(): Is this a new genus? # .__addRawSubgenus(): Add subgenus from species line # .__addRawSpecies(): Add species from species line # .__readAlt(): Process alternate forms file # .__parseAlt(): Parse one alternate form line # .__parseHigherAlt(): Parse a higher-taxon (" ") alt form # .__bindHigherAlt(): Bind a bird code to a higher taxon # .__parseEquivalent(): Parse a cross-reference ("=") code # .__parseSubspecific(): Parse a subspecific ("<") form # .__bindSubspecific(): Bind a bird code to a form # .__addForm(): Create a subspecific taxon # .__parseCollisionCluster(): Parse a collision ("?") line # .__buildCollisionSet(): Collect disambiguations # .dispatchTable: Case table for alternate form type codes # .__finalCheck(): Check for overall validity # class Taxon: Represents one taxon (species, genus, family, etc.) # class RawStd: Represents the raw line from the std-file. # class StdHead: Represents the rank and status parts of a line. # class SpTail: Represents the rest of the line for species. # class NonSpTail: Represents the rest of a non-species line. # writeTreeFile(): Write .tre file # writeAbbrFile(): Write .ab6 file # writeCollFile(): Write .col file # 6. Main #================================================================ # Overall intended function #---------------------------------------------------------------- # [ if ( ( sys.argv contains valid command line arguments ) and # ( ranks-file is a readable, valid ranks file ) and # ( std-file(base-name(sys.argv)) is a readable, valid # standard forms file conforming to that ranks file ) and # ( alt-file(base-name(sys.argv)) is a readable, valid # alternate forms file consistent with the std-file ) ) -> # tree-file(base-name(sys.argv)) := tree-data(std-file, # alt-file) # abbr-file(base-name(sys.argv)) := abbr-data(std-file, # alt-file) # coll-file(base-name(sys.argv)) := coll-data(std-file, # alt-file) #---------------------------------------------------------------- #================================================================ # Specification functions #---------------------------------------------------------------- # abbr-file(basename) == # basename + ABBR_EXTENSION #-- # The name of the abbreviations file. #---------------------------------------------------------------- # abbr-data(std, alt) == # abbreviations file contents derived from std and alt #-- # std is the contents of the standard forms file, and alt the # contents of the alternate forms file. #---------------------------------------------------------------- # alt-file(basename) == # basename + ALT_EXTENSION #-- # The name of the alternate forms file. #---------------------------------------------------------------- # base-name(argv) == # if argv has a basename argument -> # that argument # else -> # DEFAULT_BASENAME #-- # argv is sys.argv, and basename is the positional argument # that specifies the base file name for std-file and alt-file #---------------------------------------------------------------- # bindings-for-taxon ( txny, taxon, canon, disamb ) == # if canon is None -> no bindings # if canon is not None and disamb is None -> # one StdBind binding relating canon to taxon # if canon is not None and disamb is not None -> # one StdBind binding relating disamb to taxon, plus # one CollBind binding relating canon to the singleton # set {disamb} #-- # This definition describes the set of AbBind objects that # result from the presence or absence of the `canon' (canonical) # and `disamb' (disambiguated) abbreviations. #---------------------------------------------------------------- # coll-file(basename) == # basename + COLL_EXTENSION #-- # The name of the collisions file. #---------------------------------------------------------------- # coll-data(std, alt) == # collisions file contents derived from std and alt #---------------------------------------------------------------- # derived-tx-key ( txny, parent, child ) == # if parent is None -> "" # else -> # (parent.txKey + (a string of "0" characters, whose length # is the sum of all key lengths of ranks whose depths are in # the open interval (parent.depth, child.depth)) + # (the birth order of child, left-zero-padded to the key length # of the child's rank)) #-- # This function is used to derive the taxonomic key of a child # from its parent. For example, if the key of the parent is # "0104", and the child is the 7rd child of the # parent at a rank that has 3 digits in its key, the child's # key will be "0104007". The middle term of the definition # ("a string of "0" characters...") handles the case where an # optional rank is omitted. For example, suppose a family has # key "0702", and the subfamily rank is optional and contributes # two digits, and the genus contributes two digits also. Then # the 14th genus under that family would have key "07020014"; # the 5th and 6th digits are a placeholder for the missing subfamily. #---------------------------------------------------------------- # path-to-rank ( txny, rank ) == # the sequence of Taxon objects T1, T2, ..., TN such that: # (a) T1 is txny.root {NB: if .root is None, path-to-rank is empty} # (b) T[i] is the last child of T[i-1] # (c) TN either has no children, or has a last child CN # such that depth(CN) >= depth(rank) #-- # This function represents the nodes visited while walking down # the tree from the root, always taking the rightmost path, that # is, the youngest child. It is used in slotting new taxa into # the tree. #---------------------------------------------------------------- # rank-can-occur ( txny, rank ) == # if txny.root is None -> # if rank is the root rank of txny.hier -> 1 # else -> 0 # else if rank is the root rank of txny.hier -> 0 # else if rank-parent ( txny, rank ) has no children -> # if there exists a non-optional rank M in txny.hier such that # depth ( rank-parent ( txny, rank ) ) < M < depth ( rank ) -> # 0 # else -> 1 # else # if depth ( last child of rank-parent ) != depth ( rank ) -> 0 # else -> 1 #-- # This is a predicate used to test whether a Taxon of the given # (rank) is legal given the current state of the tree. For # example, a species cannot follow a family if there are # required ranks between family and species. #---------------------------------------------------------------- # rank-parent ( txny, rank ) == # if txny.root is None -> None # else -> # the last element of path-to-rank ( txny, rank ) #-- # Used to determine the parent taxon of a new taxon about to # be added. #---------------------------------------------------------------- # ranks-file == # the file with the default ranks file name from hier.py #---------------------------------------------------------------- # std-file(basename) == # basename + STD_EXTENSION #-- # The name of the standard forms file. #---------------------------------------------------------------- # tree-file(basename) == # basename + TREE_EXTENSION #---------------------------------------------------------------- # tree-data(std, alt) == # tree file contents derived from std and alt #---------------------------------------------------------------- # xml-file(basename) == # basename + XML_EXTENSION #---------------------------------------------------------------- # xml-tree-data(txny) == # txny represented as an XML tree conforming to txny.rnc schema #---------------------------------------------------------------- #================================================================ # Imports #---------------------------------------------------------------- #-- # Python standard modules #-- from __future__ import generators # Allow generators import sys # Standard command lines and streams import re # Standard regular expression package #-- # Shipman standard Python modules #-- from log import * # Error logging module from scan import * # Stream scanning module from set import * # Set of immutable objects from tree import * # Generic tree node object from sysargs import * # Command line argument checker import xmlcreate as xc # XML document creation module #-- # Modules specific to this application #-- from hier import * # Taxonomic hierarchy object #-- # Currently (2008-09-21) there is a completely new and different # 'abbr.py' that is incompatible with the one used by this # program, which lives in ~/www/nomo/abbr.py (NOT xnomo). # Today I made a copy of the latter here called 'oldabbr' # so that if I ever remove the ~/www/nomo directory, this # program will still work. #-- from oldabbr import * # Standard bird codes or `abbrs' from txny_schema import * # XML element and attribute names #================================================================ # Manifest constants #---------------------------------------------------------------- PROGRAM_NAME = "nombuild" EXTERNAL_VERSION = "2.0" #-- # File names and extensions #-- LOG_FILE = "nombuild.log" # Error logging file STD_EXTENSION = ".std" # Extension for standard forms file ALT_EXTENSION = ".alt" # Extension for alternate forms file TREE_EXTENSION = ".tre" # Extension for tree file ABBR_EXTENSION = ".ab6" # Extension for abbreviations file COLL_EXTENSION = ".col" # Extension for collisions file XML_EXTENSION = ".xml" # Extension for XML output DEFAULT_BASENAME = "aou744" # Default taxonomy file set #-- # Command line arguments #-- XML_SWITCH = "x" switchSpecs = [ SwitchArg ( XML_SWITCH, [ "Generate XML output" ] ) ] BASENAME_KEY = "basename" posSpecs = [ # Positional argument descriptions PosArg ( BASENAME_KEY, [ "Base file name for taxonomy files" ] ) ] #-- # Format of the standard forms (input) file #-- L_STATUS = 1 # Status code length statusRe = re.compile ( # Regular expression for the status code r'[ +\?]' ) # ' '=normal, '?'=questionable occurrence, '+'=extinct htNameRe = re.compile ( # Regex for names of higher taxa r'[A-Z][a-z]+' ) # Capital followed by one or more lowercase SUBGENUS_GROUP = "s" # Group name in following r.e. subgenusRe = re.compile ( # Subgenus in parentheses r'\(' # Matches a literal left parenthesis r'(?P<%s>[A-Z][a-z]+)' # Matches the subgenus name r'\)' # Matches a literal right parenthesis % SUBGENUS_GROUP ) speciesRe = re.compile ( # Species name (without genus) r'[a-z]+' ) # Matches one or more lowercase letters slashCset = Cset("/") # Cset for the slash character #-- # Binding types for AbBind.className attributes #-- AB_BIND_CLASS = "AbBind" STD_BIND_CLASS = "StdBind" COLL_BIND_CLASS = "CollBind" EQ_BIND_CLASS = "EqBind" HT_BIND_CLASS = "HtBind" #-- # Format of the tree (output) file #-- L_SCI = 36 # Capacity of scientific name field L_ENG = 56 # Capacity of English name field #-- # Constants for the XML representations. Keep in sync with the # txny.rnc schema. Constants ending with _N are nodes, _A are attributes. #-- #================================================================ # Classes and functions #---------------------------------------------------------------- # - - - - - c l a s s A r g s - - - - - class Args: """Represents the command line arguments. Exports: Args(): [ if sys.argv contains valid command line arguments for this program -> return a new Args object representing those arguments else -> sys.stderr +:= (usage message) + (error message) stop execution ] .xmlOutput: [ true if XML output option asserted ] .basename: [ the base-name argument from sys.argv ] """ def __init__ ( self ): "Constructor for Args" #-- 1 -- # [ if sys.argv contains only switches in switchSpecs and # positional arguments conforming to posSpecs -> # sysArgs := a SysArgs object representing those # arguments # else -> # sys.stderr +:= (usage message) + (error message) # stop execution ] sysArgs = SysArgs ( switchSpecs, posSpecs ) #-- 2 -- # [ self.xmlOutput := XML_SWITCH option from sysArgs # self.basename := BASENAME_KEY positional argument # from sysArgs ] self.xmlOutput = sysArgs.switchMap[XML_SWITCH] self.basename = sysArgs.posMap[BASENAME_KEY] # - - - - - c l a s s A b T a b - - - - - class AbTab: """A container for AbSym objects describing abbr bindings. Exports: AbTab(): [ return a new, empty AbTab ] .lookupAbbr ( abbr ): [ abbr is a string -> if abbr is in self -> return the AbSym object for that abbr else -> raise KeyError ] .addAbbr ( abbr ): [ abbr is a string -> if abbr is in self -> return the AbSym object for that abbr else -> self := self with a new AbSym object for that abbr return that new AbSym object ] .genAbSyms(): [ generate the AbSym objects in self in ascending order by abbr ] State/Invariants: .__abbrMap: [ a dictionary mapping A |-> T for A in all abbrs in self, uppercased, and T the corresponding AbSym object ] """ # - - - A b T a b . _ _ i n i t _ _ - - - def __init__ ( self ): "Constructor for AbTab" self.__abbrMap = {} # - - - A b T a b . l o o k u p A b b r - - - def lookupAbbr ( self, abbr ): "Find the symbol table entry for a given abbr." return self.__abbrMap[abbr.upper()] # - - - A b T a b . a d d A b b r - - - def addAbbr ( self, abbr ): "Return or create a symbol table entry for abbr." #-- 1 -- key = abbr.upper() #-- 2 -- # [ if self.__abbrMap has a key=(key) -> # result := the corresponding value # else -> # self := self with a new entry mapping key |-> # a new AbSym with no binding # result := that new AbSym ] try: result = self.__abbrMap[key] except KeyError: self.__abbrMap[key] = result = AbSym ( key ) #-- 2 -- return result # - - - A b T a b . g e n A b S y m s - - - def genAbSyms ( self ): "Generate all entries in ascending order by abbr" #-- 1 -- # [ keyList := list of all keys in self.__abbrMap in # ascending order ] keyList = self.__abbrMap.keys() keyList.sort() #-- 2 -- # [ generate all values from self.__abbrMap in order # using the elements of keyList ] for key in keyList: yield self.__abbrMap[key] raise StopIteration # - - - - - c l a s s A b S y m - - - - - class AbSym: """Each instance represents one abbr, and its current binding if any. An AbTab is basically a symbol table, and we have to deal with the `forward reference' problem, that is, the case where we may encounter a reference to a symbol before that symbol has been defined. When we do, we create an AbSym with no binding. Later, when we encounter the definition of that symbol, we attach a binding (in the form of an AbBind object) that describes its definition. Normally, each symbol should have exactly one binding. If there is no binding for a symbol after we've read all the input files, that's an error---a symbol was referred to but never defined. Also, in MOST cases, if we try to attach a binding to a symbol that already has one, that's an error too: the symbol is multiply defined. However, there is a particular problem with clusters of names that all abbreviate to the same code (e.g., Blackburnian Warbler and Blackpoll Warbler would both be BLAWAR by the rules). There is a special type of binding called a CollBind that tracks these collisions. In this one case, if the symbol has an existing CollBind binding and we are trying to attach a new CollBind binding, rather than reporting a multiple definition, we COMBINE the two CollBind bindings into one new CollBind binding that includes all the disambiguations (substitute, non-colliding abbreviations) in both bindings. See the AbBind object for more details of the process of combining bindings. Exports: AbSym ( abbr ): [ abbr is a string -> return a new AbSym for abbr=abbr and no binding ] .abbr: [ as passed to constructor ] .binding: [ if self has no binding -> None else -> an AbBind object with self's current binding ] .bind ( abBind ): [ abBind is an AbBind object -> if self has no binding -> self := self with binding abBind else if abBind can be combined with the existing binding of self -> self := self with its binding combined with abBind else -> raise ValueError ] """ # - - - A b S y m . _ _ i n i t _ _ - - - def __init__ ( self, abbr ): "Constructor for AbSym" self.abbr = abbr self.binding = None # - - - A b S y m . b i n d - - - def bind ( self, abBind ): "Try to add a binding to self." #-- 1 -- # [ if self has no binding -> # self := self with binding abBind # return # else -> I ] if self.binding is None: self.binding = abBind return #-- 2 -- # [ if self's binding combines with abBind -> # combo := self's binding combined with abBind # else -> # raise ValueError ] combo = self.binding.combine ( abBind ) #-- 3 -- self.binding = combo # - - - - - c l a s s A b B i n d - - - - - class AbBind: """Class to represent bindings of an abbreviation. See the narrative in class AbSym about the relationship between AbSym objects and their bindings as AbBind objects. This is one of the rare cases in which the author has actually needed polymorphism. Below are declared several subclasses of AbBind that define the different kinds of bindings. Each must supply the virtual methods listed below. Exports: AbBind ( abbr ): [ abbr is a valid abbr -> create a new AbBind object that describes a binding for abbr ] .abbr: [ as passed to constructor ] Virtual members/methods: .className: [ a string that is the same for all bindings of the same type ] .show(): [ return self as a string for error message purposes ] .combine ( other ): [ other is an AbBind object -> if self and other can be combined -> return a new AbBind object representing the combination else -> raise ValueError ] .lookup(): [ if self is unambiguously associated with a specific Taxon object -> return that Taxon else -> return None ] .eng(): [ if self describes a binding based on an English name -> return that name as a string else -> return None ] .xmlWrite ( parentNode ): [ parentNode is a DOM Element node -> parentNode := parentNode with a new child node added representing self ] """ # - - - A b B i n d . _ _ i n i t _ _ - - - def __init__ ( self, abbr ): "Constructor for AbBind." self.abbr = abbr self.className = AB_BIND_CLASS # - - - A b B i n d . s h o w - - - # - - - A b B i n d . c o m b i n e - - - # - - - A b B i n d . l o o k u p - - - # - - - A b B i n d . e n g - - - def show(self): raise NotImplementedError def combine(self, other): raise NotImplementedError def lookup(self): raise NotImplementedError def eng(self): raise NotImplementedError # - - - A b B i n d . x m l W r i t e - - - def xmlWrite(self, parentNode): """Generate an XML subtree representing self (for valid codes) """ #-- 1 -- # [ parentNode := parentNode with a new ABBR_N node added # abbrNode := that new node ] abbrNode = xc.Element ( parentNode, ABBR_N ) #-- 2 -- # [ eng := English name for self # sci := scientific name for self ] eng = self.eng() taxon = self.lookup() sci = taxon.sci #-- 3 -- # [ abbrNode := abbrNode with attributes and children # added representing self ] abbrNode[CODE_A] = self.abbr.rstrip() abbrNode[SCI_A] = sci xc.Text ( abbrNode, eng ) # - - - - - c l a s s S t d B i n d - - - - - class StdBind(AbBind): """Represents a standard taxon from std-file or alt-file. Exports: StdBind ( abbr, taxon ): [ (abbr is an abbr) and (taxon is a Taxon) -> return a new StdBind representing a binding of that abbr to that taxon ] .abbr: [ as passed to constructor ] .taxon: [ as passed to constructor ] """ # - - - S t d B i n d . _ _ i n i t _ _ - - - def __init__ ( self, abbr, taxon ): "Constructor for StdBind" #-- 1 -- # [ self := self with parent class invariants in place ] AbBind.__init__ ( self, abbr ) #-- 2 -- self.className = STD_BIND_CLASS self.taxon = taxon # - - - S t d B i n d . s h o w - - - def show ( self ): "Display self as a string" return "%s -> %s" % ( self.abbr, str(self.taxon) ) # - - - S t d B i n d . c o m b i n e - - - def combine ( self, other ): "Standard bindings do not combine." raise ValueError, "Standard bindings do not combine." # - - - S t d B i n d . l o o k u p - - - def lookup ( self ): "Return self's taxon." return self.taxon # - - - S t d B i n d . e n g - - - def eng ( self ): "Return the English name for this binding." return self.taxon.eng # - - - - - c l a s s C o l l B i n d - - - - - class CollBind(AbBind): """Represents a cluster of names sharing a collision form. Exports: CollBind ( abbr, collSet ): [ (abbr is an abbr string) and (collSet is a Set of disambiguated abbrs) -> return a new CollBind object representing those values ] .abbr: [ as passed to constructor ] .collSet: [ as passed to constructor ] """ # - - - C o l l B i n d . _ _ i n i t _ _ - - - def __init__ ( self, abbr, collSet ): "Constructor for CollBind" #-- 1 -- # [ self := self with parent class invariants in place ] AbBind.__init__ ( self, abbr ) #-- 2 -- self.className = COLL_BIND_CLASS self.collSet = collSet # - - - C o l l B i n d . s h o w - - - def show ( self ): "Display self as a string." L = [ "Invalid code `%s', use one of: " % self.abbr ] self.collSet.sort() for coll in self.collSet: L.append ( " %s" % coll ) return "".join(L) # - - - C o l l B i n d . c o m b i n e - - - def combine ( self, other ): "Form the union of two collision sets." #-- 1 -- if self.className != other.className: raise ValueError, ( "Bindings (%s, %s) do not combine" % ( self.className, other.className ) ) #-- 2 -- # [ return a new CollBind with abbr=self.abbr and # collSet=(union of self.collSet and other.collSet) ] newSet = self.collSet.union(other.collSet) return CollBind ( self.abbr, newSet ) # - - - C o l l B i n d . l o o k u p - - - def lookup ( self ): "Fails because a collision binding is not associated w/a Taxon." return None # - - - C o l l B i n d . e n g - - - def eng ( self ): "Fails because a collision binding is not based on an English name." return None # - - - C o l l B i n d . x m l W r i t e - - - def xmlWrite ( self, parentNode ): """Create a COLLISION_N node representing self.""" #-- 1 -- # [ parentNode := parentNode with a new COLLISION_N node added # with BAD_ABBR_A set to self.abbr # collNode := that new node ] collNode = xc.Element ( parentNode, COLLISION_N ) collNode[BAD_ABBR_A] = self.abbr #-- 2 -- # [ collNode := collNode with one GOOD_CODE_N added # representing each disambiguation of self ] for goodAbbr in self.collSet.genSorted(): #-- 2 body -- # [ collNode := collNode with a GOOD_CODE_N added # representing goodAbbr ] goodNode = xc.Element ( collNode, GOOD_ABBR_N ) xc.Text ( goodNode, goodAbbr ) # - - - - - c l a s s E q B i n d - - - - - class EqBind(AbBind): """Represents a deprecated abbr referenced to some other preferred abbr. Exports: EqBind ( abbr, prefSym, engName ): [ (abbr is an abbr string) and (prefSym is the preferred abbr as an AbSym) and (engName is the English name on which self is based) -> return a new EqBind representing that binding ] .abbr: [ as passed to constructor ] .prefSym: [ as passed to constructor ] .engName: [ as passed to constructor ] """ # - - - E q B i n d . _ _ i n i t _ _ - - - def __init__ ( self, abbr, prefSym, engName ): "Constructor for EqBind" #-- 1 -- # [ self := self with parent class invariants in place ] AbBind.__init__ ( self, abbr ) #-- 2 -- self.className = EQ_BIND_CLASS self.prefSym = prefSym self.engName = engName # - - - E q B i n d . s h o w - - - def show ( self ): "Display self as a string." return ( "Deprecated code `%s', prefer %s." % ( self.abbr, self.prefSym ) ) # - - - E q B i n d . c o m b i n e - - - def combine ( self, other ): "Equivalence bindings do not combine." raise ValueError, "Equivalence bindings do not combine." # - - - E q B i n d . l o o k u p - - - def lookup ( self ): """Find the taxon to which self refers. [ if self leads to a reference loop -> return None else -> return eq-bind-chain-closure ( Set(self.abbr), self.abSym ] The `eq-bind-chain-closure' specification function is used to find the taxon to which a particular EqBind refers. The problem is that the data may contain reference loops. For example, suppose the data says that CATBIR=GRYCAT and GRYCAT=CATBIR. If we just use straight recursion to chase these references, the program will loop. So, when we start chasing a chain of EqBind bindings in hopes of finding one that isn't another EqBind, we do so while maintaining a set of all the bindings we've seen so far, and if we get to another EqBind that is already in that set, we know we've detected a reference loop. The set is symbolized as S in this specification function: eq-bind-chain-closure ( S, prefSym ) == if prefSym has no binding -> raise ValueError else if prefSym's binding B is not an EqBind -> prefSym.binding.lookup() else if prefSym.abbr is in set S -> raise ValueError else if prefSym.binding is an EqBind -> eq-bind-chain-closure ( union ( S, Set(S.abbr) ), prefSym.binding.prefSym ) """ try: return self.chainClosure ( Set ( self.abbr ), self.prefSym ) except ValueError: return None # - - - E q B i n d . c h a i n C l o s u r e - - - def chainClosure ( self, abbrSet, prefSym ): """Try to find a non-EqBind binding for self. [ (abbrSet is a Set of abbrs) and (prefSym is an AbSym) -> return eq-bind-chain-closure ( abbrSet, prefSym ) ] """ #-- 1 -- # [ if prefSym is unbound -> # raise ValueError # else -> # symBind := prefSym's binding ] symBind = prefSym.binding if symBind is None: raise ValueError, "Abbreviation `%s' is unbound." % self.abbr #-- 2 -- # [ if symBind is not an EqBind -> # return symBind.lookup() # else -> # symAbbr := symBind.abbr ] if symBind.className != "EqBind": return symBind.lookup() else: symAbbr = symBind.abbr #-- 3 -- # [ if symAbbr is in abbrSet -> # raise ValueError (because this reference chain is a circuit) # else -> # abbrSet := union ( abbrSet, Set ( symAbbr ) ) ] if symAbbr in abbrSet: raise ValueError, ( "Abbrevation `%s' leads to a circuit of " "references." % self.abbr ) else: abbrSet.add ( symAbbr ) #-- 4 -- # [ return eq-bind-chain-closure ( abbrSet, symBind.prefSym ) ] return self.chainClosure ( abbrSet, symBind.prefSym ) # - - - E q B i n d . e n g - - - def eng ( self ): "Return self's English name." return self.engName # - - - - - c l a s s H t B i n d - - - - - class HtBind(AbBind): """Represents an abbr that refers to a standard taxon above species rank. Exports: HtBind ( abbr, taxon, engName, tex ): [ (abbr is an abbr string) and (taxon is a Taxon) and (engName is an English name string) and (tex is a TeX string) -> return a new HtBind representing that binding .abbr: [ as passed to constructor ] .taxon: [ as passed to constructor ] .engName: [ as passed to constructor ] .tex: [ as passed to constructor ] """ # - - - H t B i n d . _ _ i n i t _ _ - - - def __init__ ( self, abbr, taxon, engName, tex ): "Constructor for HtBind" #-- 1 -- # [ self := self with parent class invariants in place ] AbBind.__init__ ( self, abbr ) #-- 2 -- self.className = HT_BIND_CLASS self.taxon = taxon self.engName = engName self.tex = tex # - - - H t B i n d . s h o w - - - def show ( self ): "Display self as a string." return "%s (%s) -> %s" % ( self.abbr, self.engName, self.taxon ) # - - - H t B i n d . l o o k u p - - - def lookup ( self ): "Return self's taxon." return self.taxon # - - - H t B i n d . e n g - - - def eng ( self ): "Return self's English name." return self.engName # - - - H t B i n d . x m l W r i t e - - - def xmlWrite(self, parentNode): """Generate an XML subtree representing self (for valid codes) """ #-- 1 -- # [ parentNode := parentNode with a new ABBR_N node added # abbrNode := that new node ] abbrNode = xc.Element ( parentNode, ABBR_N ) #-- 2 -- # [ eng := English name for self # sci := scientific name for self ] eng = self.eng() taxon = self.lookup() sci = taxon.sci #-- 3 -- # [ abbrNode := abbrNode with attributes and children # added representing self ] abbrNode[CODE_A] = self.abbr.rstrip() abbrNode[SCI_A] = sci xc.Text ( abbrNode, eng ) #-- 4 -- # [ if self.tex is not None -> # abbrNode := abbrNode with a TEX_NAME_N child # added with content self.tex # else -> I ] if self.tex is not None: texNode = xc.Element ( abbrNode, TEX_NAME_N ) xc.Text ( texNode, self.tex ) # - - - - - c l a s s T x n y - - - - - class Txny: """Represents the taxonomy from the standard & alternate forms files Exports: Txny ( hier, basename=None ): [ ( hier is a Hier object ) and ( basename is a string, defaulting to DEFAULT_BASE_NAME ) -> if ( ( std-file(basename) is a readable, valid standard forms file conforming to hier ) and ( alt-file(basename) is a readable, valid alternate forms file conforming to hier and std-file(basename) ) ) -> return a new Txny object representing those files else -> Log() +:= error message(s) stop execution ] .hier: [ as passed to constructor ] .root: [ the root Taxon object of self ] .abTab: [ self's AbTab object, describing the bindings of Abbrs for taxa in self ] .lookupAbbr(abbr): [ abbr is a string -> if abbr matches (case-insensitive) a form code in self -> return an AbSym object describing abbr's use else -> raise KeyError ] .lookupSci(sci): [ sci is a string -> if sci matches the scientific name of a taxon in self -> return the Taxon object for that name else -> raise KeyError ] .lookupEng(eng): [ eng is a string -> if eng matches (case-insensitive) the English name of a taxon in self -> return the Taxon object for that name else -> raise KeyError ] .lookupTxKey(txKey): [ txKey is a string -> if txKey matches the taxonomic key of a taxon in self -> return the Taxon object for that key else -> raise KeyError ] .deriveTxKey(parent, child): [ (parent is a Taxon or None) and (child is a Taxon) -> return derived-tx-key(self, parent, child) ] .abbrToTaxon(abbr): [ abbr is a string -> if abbr is bound to a specific taxon in self -> return a Taxon object representing that taxon else -> raise KeyError ] .scanError(abbr, scan): [ (abbr is a string) and (scan is a Scan object) -> if abbr is unknown or unbound in self -> scan +:= error message, abbr is unknown/unbound else -> scan +:= error message, "abbr has binding: " + (binding of abbr in self) State/Invariants: .__engMap: [ dictionary mapping E.lower() |-> T such that for all English names E in self, T is the corresponding Taxon ] .__sciMap: [ dictionary mapping S |-> T such that for all scientific names S in self, T is the corresponding Taxon ] .__txKeyMap: [ dictionary mapping K |-> T such that for all taxonomic keys K in self, T is the corresponding Taxon ] """ # - - - T x n y . l o o k u p A b b r - - - def lookupAbbr ( self, abbr ): "Lookup a bird code" return self.abTab.lookupAbbr ( abbr ) # - - - T x n y . l o o k u p S c i - - - def lookupSci ( self, sci ): "Lookup a scientific name" return self.__sciMap[sci] # - - - T x n y . l o o k u p E n g - - - def lookupEng ( self, eng ): "Lookup an English name, case-insensitive" return self.__engMap[eng.lower()] # - - - T x n y . l o o k u p T x K e y - - - def lookupTxKey ( self, txKey ): "Look up a taxon by its key number." return self.__txKeyMap[txKey] # - - - T x n y . d e r i v e T x K e y - - - def deriveTxKey ( self, parent, child ): "Construct the taxonomic key for a child." #-- 1 -- if parent is None: return "" #-- 2 -- result = [] #-- 3 -- # [ result +:= parent's short taxonomic key ] result.append(parent.shortTxKey) #-- 4 -- # [ result +:= N zeroes, where N is the sum of the key # length for all ranks in self.hier between # parent.rank and child.rank ] for deepx in range ( parent.depth + 1, child.depth ): #-- 4 body -- # [ result := K zeroes, where K is the key length # of the rank in self.hier with depth deepx ] result.append ( "0" * self.hier.nthRank(deepx).keyLen ) #-- 5 -- # [ result +:= child's birth order+1, as a string, # left zero padded to child's rank's key length ] result.append ( str(child.birthOrder+1).zfill(child.rank.keyLen) ) #-- 6 -- # [ return elements of result, concatenated ] return "".join(result) # - - - T x n y . a b b r T o T a x o n - - - def abbrToTaxon ( self, abbr ): "Return the Taxon for a given abbr, if any." #-- 1 -- # [ if abbr matches (case-insensitive) an abbreviation in self -> # abSym := the AbSym object for that abbrevation # else -> raise KeyError ] abSym = self.lookupAbbr ( abbr ) #-- 2 -- # [ if abSym has a binding -> # abBind := that binding, as an AbBind object # else -> # raise KeyError ] abBind = abSym.binding if abBind is None: raise KeyError, "Code `%s' has no binding." % abbr #-- 3 -- # [ if abBind refers to a specific taxon -> # return that taxon # else -> raise KeyError ] taxon = abBind.lookup() if taxon is None: raise KeyError, ( "Code `%s' does not refer " "to a specific taxon." % abbr ) else: return taxon # - - - T x n y . s c a n E r r o r - - - def scanError ( self, abbr, scan ): "Issue an error message related to a given stream scan." #-- 1 -- # [ if abbr matches (case-insensitively) an abbr in self -> # abSym := the AbSym for abbr # else -> # scan +:= error message # return ] try: abSym = self.lookupAbbr ( abbr ) except KeyError: scan.error ( "Form code `%s' is unknown." % abbr ) return #-- 2 -- # [ if abSym has a binding -> # abBind := that binding as an AbBind object # else -> # scan +:= error message # return ] abBind = abSym.binding if abBind is None: scan.error ( "Form code `%s' is an unknown forward " "reference." % abbr ) return #-- 3 -- # [ scan +:= a description of abBind ] scan.error ( "Species code `%s' is invalid: %s." % ( abbr, abBind ) ) # - - - T x n y . _ _ i n i t _ _ - - - def __init__ ( self, hier, basename=None ): "Constructor for Txny." #-- 1 -- if basename is None: effBasename = DEFAULT_BASENAME else: effBasename = basename #-- 2 -- self.hier = hier self.abTab = AbTab() # A new, empty AbTab self.root = None self.__sciMap = {} self.__engMap = {} self.__txKeyMap = {} stdFileName = effBasename + STD_EXTENSION altFileName = effBasename + ALT_EXTENSION errCount = Log().count() # Initial error count #-- 3 -- # [ if stdFileName names a readable, valid standard forms # file -> # self.root := a Taxon rooting the tree of taxa from # that file # self.abTab +:= entries for the bindings of all abbrs # used in that file # self.__sciMap +:= entries mapping S |-> T for S in # the scientific names from that file and T the taxon # for that scientific name # self.__engMap +:= entries mapping E.lower() |-> T for # E in the English names from that file and T the # taxon for that English name # self.__txKeyMap +:= entries mapping K |-> T for K # the set of taxonomic keys derived from that file # and T the corresponding taxon # else -> # Log() +:= error message(s), if any # stop execution ] self.__readStd ( stdFileName ) #-- 4 -- # [ if altFileName names a readable, valid alternate forms # file that is consistent with the tree rooted in self.root -> # self.root := self.root with new Taxon objects added # for forms deeper than species from that file # self.abTab +:= new bindings for abbrs from that file # self.__engMap +:= entries mapping lowercased new # English names from that file to the corresponding # Taxon objects, for taxa at or deeper than species # rank, if there is a species rank in self.hier # self.__txKeyMap +:= entries mapping new taxonomic keys # from that file to the corresponding Taxon objects # else -> # Log() +:= error message(s), if any # stop execution ] self.__readAlt ( altFileName ) #-- 5 -- # [ if self.abTab is self-consistent -> # I # else -> # Log() +:= error message(s), if any ] self.__finalCheck() #-- 6 -- # [ if errCount < Log().count() -> # Log() +:= error message # stop execution # else -> # return ] if errCount < Log().count(): Log().fatal ( "Execution terminated due to errors in " "taxonomic input files." ) # - - - T x n y . _ _ r e a d S t d - - - def __readStd ( self, stdFileName ): """Build the basic taxonomic tree from the standard forms file. [ stdFileName is a string -> if stdFileName names a readable, valid standard forms file -> self.root := a Taxon rooting the tree of taxa from that file self.abTab +:= entries for the bindings of all abbrs used in that file self.__sciMap +:= entries mapping S |-> T for S in the scientific names from that file and T the taxon for that scientific name self.__engMap +:= entries mapping E.lower() |-> T for E in the English names from that file and T the taxon for that English name self.__txKeyMap +:= entries mapping K |-> T for K the set of taxonomic keys derived from that file and T the corresponding taxon else -> Log() +:= error message(s), if any stop execution ] """ #-- 1 -- # [ errCount := current error count from Log() ] errCount = Log().count() #-- 2 -- # [ if stdFileName names a file that can be opened for reading -> # scan := a new Scan object positioned at the start of that file # else -> # Log() +:= error message # stop execution ] try: scan = Scan ( stdFileName ) except IOError, detail: Log().fatal ( "Cannot open standard forms file `%s' " "for reading." % stdFileName ) #-- 3 -- # [ self := self with all valid taxa and abbreviations added # from scan # self.scan +:= error messages from scan, if any # self.scan := self.scan advanced to end of file ] while not scan.atEndFile: #-- 3 body -- # [ if line in scan is a valid std line -> # self := self with any valid taxa and abbreviations # added from that line # self.scan := self.scan advanced to the next line # else -> # self.scan +:= error message # self.scan := self.scan advanced to the next line ] self.__parseStd ( scan ) scan.nextLine() #-- 4 -- # [ if errCount < (current error count from Log()) -> # Log() +:= error message # stop execution # else -> I ] scan.close() if errCount < Log().count(): Log().fatal ( "Execution terminated due to errors in the " "standard forms file." ) # - - - T x n y . _ _ p a r s e S t d - - - def __parseStd ( self, scan ): """Parse one line from the standard forms file. [ scan is a Scan object -> if line in scan is valid -> self := self with any valid taxa and abbreviations added from that line self.scan := self.scan advanced to end of line else -> self.scan +:= error message self.scan := self.scan advanced no further than end of line ] """ #-- 1 -- # [ if the current line in scan is a syntactically valid # std line representing a level in self.hier -> # scan := scan advanced to end of line # rawStd := a RawStd object representing that line # else if the current line in scan is syntactically valid # but represents a level not in self.hier -> # return # else -> # scan := scan advanced to end of line # scan +:= error message(s) # raise ValueError ] rawStd = RawStd.parse(self.hier, scan) if rawStd is None: return #-- 2 -- # [ self := self with all valid taxa and abbrs added from rawStd # scan +:= error message(s), if any, from trying to add # rawStd's taxa or abbrs ] self.__addRawStd ( scan, rawStd ) # - - - T x n y . _ _ a d d R a w S t d - - - def __addRawStd ( self, scan, rawStd ): """Add a std line in raw form to self [ (scan is a Scan object) and (rawStd is a RawStd object) -> self := self with all valid taxa and abbrs added from rawStd scan +:= error message(s), if any, from trying to add rawStd's taxa or abbrs ] """ #-- 1 -- # [ if rawStd represents a non-species line -> # self := self with the taxon from rawStd added # scan +:= error message(s) from adding rawStd, if any # else -> # self := self with all valid taxa and addrs added # from rawStd # scan +:= error message(s) from adding rawStd, if any ] if rawStd.head.rank is not None: self.__addRawNonSp ( scan, rawStd ) else: self.__addRawSpGroup ( scan, rawStd ) # - - - T x n y . _ _ a d d R a w N o n S p - - - def __addRawNonSp ( self, scan, rawStd ): """Add a non-species line to self [ (scan is a Scan object) and (rawStd is a RawStd object with a specific rank) -> self := self with the taxon from rawStd added scan +:= error message(s) from adding rawStd, if any ] """ #-- 1 -- stdHead = rawStd.head nonSpTail = rawStd.nonSpTail() #-- 2 -- # [ self := self with a new Taxon added, if valid, with # parent=(rank-parent(self,stdHead.rank)), # status=stdHead.status, # rank=(stdHead.rank), sci=nonSpTail.sci, # eng=nonSpTail.eng, tex=nonSpTail.tex, # and no abbreviations # scan +:= error message(s) from adding this taxon, if any ] self.__addTaxon ( scan, stdHead.rank, # rank nonSpTail.sci, # sci nonSpTail.eng, # eng nonSpTail.eng, # tex, same as eng stdHead.status, # status None, None ) # canon, disamb # - - - T x n y . _ _ a d d T a x o n - - - def __addTaxon ( self, scan, rank, sci, eng, tex, status, canon, disamb ): """Add a new taxon to self. [ (rank is a Rank object) and (sci is a scientific name as a string) and (eng is an English name as a string) and (tex is a TeX-encoded English name as a string) and (status is the status as a string or None) and (canon is the canonical abbr as a string, or None) and (disamb is the disambiguated adbr as a string, or None) -> self := self with a new Taxon added, if valid, with parent=(rank-parent(self,rank)), status=status, rank=rank, sci=sci, eng=eng, tex=tex, canon=canon, and disamb=disamb scan +:= error message(s) from adding this taxon, if any ] """ #-- 1 -- # [ if rank-can-occur(self, rank) -> # parent := rank-parent(self, rank) # else -> # scan +:= error message # return ] parent = self.__findParent(scan, rank) #-- 2 -- # [ if parent is rank-parent(self,rank) -> # taxon := a new Taxon made from txny=self, # parent=parent, sci=sci, eng=eng, tex=tex, # status=status, canon=canon, and disamb=disamb ] taxon = Taxon ( self, parent, rank, sci, eng, tex, status, canon, disamb ) #-- 3 -- if parent is None: self.root = taxon #-- 4 -- # [ self.__engMap +:= entry mapping taxon.eng.lower() # |-> taxon for taxa at or deeper than species rank # self.__sciMap +:= entry mapping taxon.sci |-> taxon # self.__txKeyMap +:= entry mapping taxon.txKey() |-> taxon # self.abTab +:= bindings-for-taxon(self, taxon, # taxon.canon, taxon.disamb) # self.scan +:= errors from duplications, if any ] self.__addTaxonToMaps ( scan, taxon ) # - - - T x n y . _ _ f i n d P a r e n t - - - def __findParent ( self, scan, rank ): """Find the parent of the new rank to be added. [ (scan is a Scan object) and (rank is a Rank object) -> if rank-can-occur(self, rank) -> return rank-parent(self, rank) else -> scan +:= error message(s) return None ] """ #-- 1 -- if self.root is None: #-- 1.1 -- # [ if rank is the root rank of self.hier -> # return None # else -> # scan +:= error message # return None ] if rank.depth == 0: return None else: scan.error ( "The first taxon must have rank %s." % hier.nthRank(0) ) return None else: #-- 1.2 -- # [ if rank-can-occur(self, rank) -> # return rank-parent(self, rank) # else -> # scan +:= error message # return None ] return self.__rankParent ( scan, rank ) # - - - T x n y . _ _ r a n k P a r e n t - - - def __rankParent ( self, scan, rank ): """Compute the rank-parent() function for a given rank. [ (self.root is not None) and (scan is a Scan object) and (rank is a Rank object) -> if rank-can-occur(self, rank) -> return rank-parent(self, rank) else -> scan +:= error message return None ] """ #-- 1 -- # [ self.root is not None -> # parent := rank-parent(self, rank) # child := last child of rank-parent(self, lank), # or None if parent is childless ] parent = self.root child = parent.lastChild() while ( ( child is not None ) and ( child.depth < rank.depth ) ): parent, child = child, parent.lastChild() #-- 2 -- # [ if rank-can-occur(self, rank) -> # return parent # else -> # scan +:= error message # return None ] if child is None: if self.hier.canParentHaveChild(parent.rank, rank): return parent else: scan.error ( "There are missing required ranks " "between this taxon and %s." % parent ) return None else: if child.depth == rank.depth: return parent else: scan.error ( "Taxa may not have children of " "different ranks." ) scan.message ( " Parent: %s" % parent ) scan.message ( " Sibling: %s" % child ) return None # - - - T x n y . _ _ a d d T a x o n T o M a p s - - - def __addTaxonToMaps ( self, scan, taxon ): """Maintain invariants for self.__engMap, etc. [ (scan is a Scan object) and (taxon is a Taxon) -> self.__engMap +:= entry mapping taxon.eng.lower() |-> taxon for taxa at or deeper than species rank self.__sciMap +:= entry mapping taxon.sci |-> taxon self.__txKeyMap +:= entry mapping taxon.txKey() |-> taxon self.abTab +:= bindings-for-taxon(self, taxon, taxon.canon, taxon.disamb) self.scan +:= errors from duplications, if any ] """ #-- 1 -- # [ self.__engMap +:= an entry mapping taxon.eng.lower() |-> # taxon for taxa at or deeper than species rank # scan +:= errors from duplication, if any ] self.__addEngMap ( scan, taxon ) #-- 2 -- # [ self.__sciMap +:= an entry mapping taxon.sci |-> taxon # scan +:= errors from duplication, if any ] self.__addSciMap ( scan, taxon ) #-- 3 -- # [ self.__txKeyMap +:= an entry mapping taxon.txKey() |-> taxon # scan +:= errors from duplication, if any ] self.__addTxKeyMap ( scan, taxon ) #-- 4 -- # [ self.abTab +:= bindings-for-taxon ( self, taxon, # taxon.canon, taxon.disamb ) ] # scan +:= errors from duplication, if any ] self.__addAbMap ( scan, taxon ) # - - - T x n y . _ _ a d d E n g M a p - - - def __addEngMap ( self, scan, taxon ): """Add taxon to self.__engMap [ (scan is a Scan object) and (taxon is a Taxon) -> self.__engMap +:= an entry mapping taxon.eng.lower() |-> taxon for taxa at or deeper than species rank scan +:= errors from duplication, if any ] """ #-- 1 -- key = taxon.eng.lower() #-- 2 -- # [ if taxon is at a shallower depth than # self.hier.speciesRank() -> # return # else -> I ] if taxon.depth < self.hier.speciesRank().depth: return #-- 3 -- # [ if self.__engMap has no entry for key -> # self.__engMap +:= an entry mapping key |-> taxon # else -> # scan +:= warning, duplicate English names ] try: other = self.__engMap[key] scan.warning ( "Duplicate English name; the other is " "%s." % other ) except KeyError: self.__engMap[key] = taxon # - - - T x n y . _ _ a d d S c i M a p - - - def __addSciMap ( self, scan, taxon ): """Add a new entry to self.__sciMap [ (scan is a Scan object) and (taxon is a Taxon) -> self.__sciMap +:= an entry mapping taxon.sci |-> taxon scan +:= errors from duplication, if any ] """ try: other = self.__sciMap[taxon.sci] scan.warning ( "Duplicate scientific name; the other " "is %s." % other ) except KeyError: self.__sciMap[taxon.sci] = taxon # - - - T x n y . _ _ a d d T x K e y M a p - - - def __addTxKeyMap ( self, scan, taxon ): """Add a new entry to self.__txKeyMap [ (scan is a Scan object) and (taxon is a Taxon) -> self.__txKeyMap +:= an entry mapping taxon.txKey() |-> taxon scan +:= errors from duplication, if any ] """ #-- 1 -- key = taxon.txKey() #-- 2 -- # [ if key is a key in self.__txKeyMap -> # scan +:= error message # else -> # self.__txKeyMap[key] := taxon ] try: other = self.__txKeyMap[key] scan.error ( "Duplicate taxonomic key `%s'; the other " "is %s." % ( key, other ) ) except KeyError: self.__txKeyMap[key] = taxon # - - - T x n y . _ _ a d d A b M a p - - - def __addAbMap ( self, scan, taxon ): """Add new bindings for taxon [ (scan is a Scan object) and (taxon is a Taxon) -> self.abTab +:= bindings-for-taxon ( self, taxon, taxon.canon, taxon.disamb ) ] scan +:= errors from duplication, if any ] """ #-- 1 -- if taxon.canon is None: return #-- 2 -- # [ taxon.canon is not None -> # self.abMap +:= bindings-for-taxon ( self, taxon, # taxon.canon, taxon.disamb ) # scan +:= error messages about incompatible bindings, # if any ] if taxon.disamb is None: #-- 2.1 -- # [ (taxon.canon is not None) and (taxon.disamb is None) -> # self.abMap +:= bindings-for-taxon ( self, taxon, # taxon.canon, taxon.disamb ) # scan +:= errors due to incompatible bindings, if any ] self.__addAbMapCanon ( scan, taxon, taxon.canon ) else: #-- 2.2 -- # [ (taxon.canon is not None) and (taxon.disamb is not None) -> # self.abMap +:= bindings-for-taxon ( self, taxon, # taxon.canon, taxon.disamb ) # scan +:= errors due to incompatible bindings, if any ] self.__addAbMapDisamb ( scan, taxon, taxon.canon, taxon.disamb ) # - - - T x n y . _ _ a d d A b M a p C a n o n - - - def __addAbMapCanon ( self, scan, taxon, canon ): """Add bindings for canonical abbr with no disambiguation [ (scan is a Scan object) and (taxon is a Taxon) and (canon is an abbr or None) -> self.abMap +:= bindings-for-taxon ( self, taxon, canon, None ) scan +:= errors due to incompatible bindings, if any ] """ #-- 1 -- # [ stdBind := a new StdBind relating canon to taxon ] stdBind = StdBind ( canon, taxon ) #-- 2 -- # [ if self.abTab has a binding for canon -> # stdSym := that binding as an AbSym # else -> # stdSym := a new AbSym for canon with no binding ] stdSym = self.abTab.addAbbr ( canon ) #-- 3 -- # [ if stdSym has no binding -> # stdSym := stdSym with binding stdBind added # else if stdSym's binding can be combined with stdBind -> # stdSym := stdSym with binding stdBind combined # else -> # scan +:= error message ] try: stdSym.bind ( stdBind ) except ValueError: scan.error ( "This use of `%s' conflicts with `%s'." % ( canon, stdSym.binding.show() ) ) # - - - T x n y . _ _ a d d A b M a p D i s a m b - - - def __addAbMapDisamb ( self, scan, taxon, canon, disamb ): """Add bindings for canonical abbr with disambiguation [ (scan is a Scan object) and (taxon is a Taxon) and (canon is not None) and (disamb is None) -> self.abMap +:= bindings-for-taxon ( self, taxon, taxon.canon, taxon.disamb ) scan +:= errors due to incompatible bindings, if any ] """ #-- 1 -- # [ self.abMap +:= bindings-for-taxon ( self, taxon, # disamb, None ) # scan +:= error(s) from incompatible bindings, if any ] self.__addAbMapCanon ( scan, taxon, disamb ) #-- 2 -- # [ collBind := a new CollBind relating canon to the # singleton set {disamb} ] collBind = CollBind ( canon, Set(disamb) ) #-- 3 -- # [ if self.abTab has no binding for (canon) -> # collSym := a new AbSym for disamb with no binding # else -> # collSym := that binding as an AbSym ] collSym = self.abTab.addAbbr ( canon ) #-- 4 -- # [ if collSym has no binding -> # collSym := collSym with binding collBind added # else if collSym's binding can be combined with collBind -> # collSym := collSym with binding collBind combined # else -> # scan +:= error message ] try: collSym.bind ( collBind ) except ValueError: scan.error ( "This use of `%s' conflicts with `%s'." % ( canon, collSym.binding ) ) # - - - T x n y . _ _ a d d R a w S p G r o u p - - - def __addRawSpGroup ( self, scan, rawStd ): """Add the taxa from a species line to self [ (scan is a Scan object) and (rawStd is a RawStd object with no given rank) -> self := self with all valid taxa and addrs added from rawStd scan +:= error message(s) from adding rawStd, if any ] """ #-- 1 -- spTail = rawStd.spTail() #-- 2 -- # [ if (self.hier omits genus rank) or # (genus rank can be added now, and the last child of # rank-parent(self, self.hier.genusRank()) is a genus # with the same name as rawStd.spTail.genus -> # I # else -> # self := self with genus added from rawStd.spTail # self.scan +:= error(s) from duplicate genus, if any ] self.__addRawGenus ( scan, spTail ) #-- 2 -- # [ if (self.hier omits subgenus rank) or # (subgenus can be added now, and the last child of # rank-parent(self, self.hier.subgenusRank()) is a subgenus # with the same name as rawStd.spTail's genus) -> # I # else -> # self := self with rawStd.spTail's genus added # scan +:= error(s) from duplicate subgenus, if any ] self.__addRawSubgenus ( scan, spTail ) #-- 3 -- # [ self := self with species added from spTail with # status=rawStd.stdHead.status # scan +:= error(s) from duplications, if any ] self.__addRawSpecies ( scan, rawStd.head.status, spTail ) # - - - T x n y . _ _ a d d R a w G e n u s - - - def __addRawGenus ( self, scan, spTail ): """Add the genus if it is new. [ (scan is a Scan object) and (spTail is an SpTail object) -> if (self.hier omits genus rank) or (genus rank can be added now, and the last child of rank-parent(self, self.hier.genusRank()) is a genus with the same name as spTail.genus -> I else -> self := self with genus added from spTail self.scan +:= error(s) from duplicate genus, if any ] """ #-- 1 -- genus = spTail.genus genusRank = self.hier.genusRank() #-- 2 -- # [ if not rank-can-occur ( self, genusRank ) -> # scan +:= error message # return # else if rank-parent(self, genusRank) exists and # has a last child with the same name as (genus) -> # return # else -> # self := self with genus added from spTail.genus # scan +:= error(s), if any ] if self.__hasTaxonChanged ( scan, genusRank, genus ): #-- 2.1 -- # [ self := self with genus added from spTail.genus # scan +:= error(s), if any ] self.__addTaxon ( scan, genusRank, # rank genus, # sci genus, # eng == same as sci r"\itc{%s}" % genus, # TeX None, None, None ) # status, canon, disamb # - - - T x n y . _ _ h a s T a x o n C h a n g e d - - - def __hasTaxonChanged ( self, scan, rank, sci ): """Determine whether the genus or subgenus name has changed. [ (scan is a Scan object) and (rank is a Rank) and (sci is a scientific name as a string) -> if not rank-can-occur ( self, rank ) -> scan +:= error message return 0 else if rank-parent(self, rank) exists and has a last child of rank=(rank) and the same name as (sci) -> return 0 else -> return 1 ] """ #-- 1 -- # [ if rank-can-occur ( self, rank ) -> # parent := rank-parent ( self, rank ) # else -> # scan +:= error message # return 0 ] parent = self.__findParent ( scan, rank ) if parent is None: return 0 #-- 2 -- # [ if (parent has children) and # (parent's last child's sci matches (sci) -> # return 0 # else -> # return 1 ] prevChild = parent.lastChild() if ( ( prevChild is not None ) and ( prevChild.sci == sci ) ): return 0 else: return 1 # - - - T x n y . _ _ a d d R a w S u b g e n u s - - - def __addRawSubgenus ( self, scan, spTail ): """Same as __addRawGenus, only for subgenera. [ (scan is a Scan object) and (spTail is an SpTail) -> if (self.hier omits subgenus rank) or (subgenus can be added now, and the last child of rank-parent(self, self.hier.subgenusRank()) is a subgenus with the same name as rawStd.spTail's genus) -> I else -> self := self with rawStd.spTail's subgenus added scan +:= error(s) from duplicate subgenus, if any ] Note: The usual convention for subgenus names is to place them in parentheses after the genus name, e.g., "Cygnus (Olor)" is subgenus Olor of genus Cygnus. In the .tex field, the genus and subgenus names are italicized with the TeX \itc{} macro, making the .tex name "\itc{Genus} (\itc{Subgenus})". """ #-- 1 -- # [ if self.hier omits the subgenus rank -> # return # else -> # subgenusRank := the subgenus rank from self.hier ] subgenusRank = self.hier.subgenusRank() if subgenusRank is None: return #-- 2 -- # [ if spTail contains no subgenus -> # return # else -> # I ] if spTail.subgenus is None: return #-- 3 -- # [ sci := the subgenus name from spTail ] sci = ( "%s (%s)" % ( spTail.genus, spTail.subgenus ) ) #-- 4 -- # [ if not rank-can-occur ( self, subgenusRank ) -> # scan +:= error message # return # else if rank-parent(self, subgenusRank) exists and has a # last child of rank (subgenusRank) and the same name as (sci) -> # return # else -> # self := self with rawStd.spTail.subgenus added # scan +:= error message, if any ] if self.__hasTaxonChanged ( scan, subgenusRank, sci ): #-- 4.1 -- # [ self := self with rawStd.spTail.subgenus added # scan +:= error message, if any ] self.__addTaxon ( scan, subgenusRank, # rank sci, # sci sci, # eng, same as sci r"\itc{%s} (\itc{%s})" % # tex (spTail.genus, spTail.subgenus), None, None, None ) # status, canon, disamb # - - - T x n y . _ _ a d d R a w S p e c i e s - - - def __addRawSpecies ( self, scan, status, spTail ): """Add a new species taxon to self. [ (scan is a Scan object) and (status is a status code or None) and (spTail is an SpTail) -> self := self with species added from spTail with status=rawStd.stdHead.status scan +:= error(s) from duplications, if any ] Note: A species name does not exist independently; it must always be given with a genus name. Hence, "Lanius excubitor", never just "excubitor". """ #-- 1 -- # [ sci := (spTail.genus) + " " + (spTail.species) ] sci = "%s %s" % ( spTail.genus, spTail.species ) #-- 2 -- # [ self := self with a new species added with txny=self, # rank=self.hier.speciesRank(), sci=sci, eng=spTail.eng, # status=status, canon=spTail.canon, disamb=spTail.disamb # scan +:= error message(s), if any ] self.__addTaxon ( scan, self.hier.speciesRank(), # rank sci, # sci spTail.eng, # eng spTail.eng, # tex, same as eng status, # status spTail.canonical, # canon spTail.disamb ) # disamb # - - - T x n y . _ _ r e a d A l t - - - def __readAlt ( self, altFileName ): """Read and process the alternate forms file. [ altFileName is a string -> if altFileName names a readable, valid alternate forms file that is consistent with the tree rooted in self.root -> self.root := self.root with new Taxon objects added for forms deeper than species from that file self.abTab +:= new bindings for abbrs from that file self.__engMap +:= entries mapping lowercased new English names from that file to the corresponding Taxon objects, for taxa at or deeper than species rank, if there is a species rank in self.hier self.__txKeyMap +:= entries mapping new taxonomic keys from that file to the corresponding Taxon objects else -> Log() +:= error message(s), if any stop execution ] """ #-- 1 -- # [ errCount := count of errors in Log() ] errCount = Log().count() #-- 2 -- # [ if altFileName names a file that can be opened for reading -> # scan := a new Scan object positioned at the start of that file # else -> # Log() +:= error message # stop execution ] try: scan = Scan ( altFileName ) except IOError, detail: Log().fatal ( "Cannot open alternate forms file `%s' " "for reading." % altFileName ) #-- 3 -- # [ self := self with all valid names and abbreviations # added from scan # scan +:= errors from scan, if any # scan := scan advanced to end of file ] while not scan.atEndFile: #-- 3 body -- # [ self := self with all valid names and abbrs # added from scan # scan +:= errors from scan, if any # scan := scan advanced to next line ] self.__parseAlt ( scan ) scan.nextLine() #-- 4 -- # [ if errCount < (current error count from Log()) -> # Log() +:= error message # stop execution # else -> I ] scan.close() if errCount < Log().count(): Log().fatal ( "Execution terminated due to errors in the " "alternate forms file." ) # - - - T x n y . _ _ p a r s e A l t - - - def __parseAlt ( self, scan ): """Parse one line from the alternate forms file. [ scan is a Scan object -> self := self with all valid names and abbrs added from scan scan +:= errors from scan, if any scan := scan advanced to next line ] """ #-- 1 -- # [ if scan's current line starts with a bird abbreviation, # right-padded to the maximum size with spaces -> # scan := scan advanced by the size of an abbr # abbr := those characters # else -> # scan +:= error message # return ] abbr = abbrScanFlat ( scan ) if abbr is None: return #-- 2 -- # [ if the line scan has at least one character -> # scan := scan advanced 1 # recordType := that character # else -> # scan +:= error message # return ] try: recordType = scan.move(1) except IndexError: scan.error ( "Expecting a record type code." ) return #-- 3 -- # [ if ( ( recordType is valid ) and # ( the line in scan is valid for a record # with type code recordType ) ) -> # scan := scan advanced to end of line # self := self with binding(s) added from line in scan # else -> # scan := scan advanced no further than end of line # scan +:= error message(s) ] #-- # NB: dispatchTable follows all the parse functions, below. #-- if self.dispatchTable.has_key ( recordType ): self.dispatchTable[recordType] ( self, scan, abbr ) else: scan.error ( "Expecting a record type code." ) #-- # Generic I/F for all alt scan functions: # [ (scan is a Scan object) and # (recordType is a string) -> # if the line in scan is a valid alt tail for type # (recordType) -> # scan := scan advanced to end of line # self := self with binding(s) for abbr added from that tail # else -> # scan +:= error message(s) # scan := scan advanced no further than end of line ] #-- # - - - T x n y . _ _ p a r s e H i g h e r A l t - - - def __parseHigherAlt ( self, scan, abbr ): """Process a higher-taxon abbr binding; for recordType=" " [ (scan is a Scan object) and (abbr is a bird code) -> if the line in scan is a valid higher-alt tail -> scan := scan advanced to end of line self := self with a binding added from that tail else -> scan +:= error message(s) scan := scan advanced no further than end of line ] """ #-- 1 -- # [ if there is a "/" in the line in scan -> # scan := scan advanced past the first "/" and # any trailing whitespace # sci := characters up to the first "/", minus # leading and trailing whitespace # else -> # scan +:= error message(s) # scan := scan advanced no further than end of line # return ] slashPos = scan.upto ( slashCset ) if slashPos is None: scan.error ( "There must be a `/' after the scientific " "name." ) return else: sci = scan.tab(slashPos).strip() scan.move(1) scan.deblankLine() #-- 2 -- # [ if there is another "/" on this line -> # scan := scan advanced to end of line # eng := characters up to that "/", minus leading and # trailing whitespace # tex := characters between that "/" and end of line, # minus leading and trailing whitespace # else -> # scan := scan advanced to end of line # eng := characters from scan to end of line, minus # leading and trailing whitespace # tex : = None ] slashPos = scan.upto ( slashCset ) if slashPos is None: eng = scan.tab(-1).strip() tex = None else: eng = scan.tab(slashPos).strip() scan.move(1) # Skip the slash tex = scan.tab(-1).strip() #-- 3 -- # [ if eng contains a comma -> # eng := (text after comma, deblanked) + " " + # (text before comma, deblanked) # else -> I ] eng = engSwap ( eng ) #-- 4 -- # [ if sci is defined in self.__sciMap -> # self := self with a higher-taxon binding for abbr # with eng=(eng), sci=(sci), tex=(tex) # else -> # scan +:= error message(s) ] self.__bindHigherAlt ( scan, abbr, sci, eng, tex ) # - - - T x n y . _ _ b i n d H i g h e r A l t - - - def __bindHigherAlt ( self, scan, abbr, sci, eng, tex ): """Create a binding from abbr to a higher taxon. [ (scan is a Scan object) and (abbr is a bird code as a string) and (sci is a scientific name as a string) and (eng is an English name as a string) and (tex is a TeX English name or None) -> if sci is defined in self.__sciMap -> self := self with a higher-taxon binding for abbr to eng=(eng), sci=(sci), tex=(tex) else -> scan +:= error message(s) ] """ #-- 1 -- # [ if sci is a key in self.__sciMap -> # taxon := the corresponding Taxon object # else -> # scan +:= error message # return ] try: taxon = self.lookupSci ( sci ) except KeyError: scan.error ( "Scientific name `%s' is undefined." % sci ) return #-- 2 -- # [ htBind := a new HtBind made from (abbr, taxon, eng, tex) # sym := self.abTab's entry for (abbr) ] htBind = HtBind ( abbr, taxon, eng, tex ) sym = self.abTab.addAbbr ( abbr ) #-- 3 -- # [ if htBind is compatible with the binding of abSym, if any -> # sym := sym with htBind added # else -> # scan +:= error message(s) ] try: sym.bind ( htBind ) except ValueError: scan.error ( "Code `%s' conflicts with %s." % ( abbr, sym.binding.show() ) ) # - - - T x n y . _ _ p a r s e E q u i v a l e n t - - - def __parseEquivalent ( self, scan, abbr ): """Process a cross-reference, e.g., "amewid=amewig Widgeon, American" [ (scan is a Scan object) and (abbr is a bird code) -> if the line in scan is a valid cross-reference tail -> scan := scan advanced to end of line self := self with a binding added for that tail else -> scan +:= error message(s) scan := scan advanced no further than end of line ] """ #-- 1 -- # [ if scan starts with a bird code in flat-field form -> # scan := scan advanced past that code # prefAbbr := that code # else -> # scan +:= error message # return ] prefAbbr = abbrScanFlat ( scan ) if prefAbbr is None: return #-- 2 -- # [ scan := scan advanced to end of line # rawEng := remainder of the line in scan, minus leading and # trailing whitespace ] rawEng = scan.tab(-1).strip() #-- 3 -- # [ if rawEng contains a comma -> # eng := (text following comma, leading spaces dropped) + # " " + (text preceding comma) # else -> # eng := rawEng ] eng = engSwap ( rawEng ) #-- 4 -- # [ if self.abTab has no entry for prefAbbr -> # self.abTab := self.abTab with a new entry for prefAbbr # with no bindings # prefSym := that new entry # else -> # self.abTab := the existing entry in self.abTab for prefAbbr ] prefSym = self.abTab.addAbbr ( prefAbbr ) #-- 5 -- # [ eqBind := a new EqBind object for abbr=(abbr), sym=(prefSym), # and eng=(eng) ] eqBind = EqBind ( abbr, prefSym, eng ) #-- 6 -- # [ if self.abTab has no entry for abbr -> # self.abTab := self.abTab with a new entry for abbr # with no bindings # sym := that new entry # else -> # self.abTab := the existing entry in self.abTab for abbr ] sym = self.abTab.addAbbr ( abbr ) #-- 7 -- # [ if sym has no binding -> # sym := sym bound to eqBind # else if sym's binding can be combined with eqBind -> # sym := sym with eqBind combined with its binding # else -> # scan +:= error message # return ] try: sym.bind ( eqBind ) except ValueError: scan.error ( "Code `%s' conflicts with %s." % ( abbr, sym.binding ) ) # - - - T x n y . _ _ p a r s e S u b s p e c i f i c - - - def __parseSubspecific ( self, scan, abbr ): """Process a subspecific form, e.g., "blugoo if the line in scan is a valid subspecific tail -> scan := scan advanced to end of line self := self with a subspecific form added and a binding for abbr to that form else -> scan +:= error message(s) scan := scan advanced no further than end of line ] """ #-- 1 -- # [ if the line in scan starts with a bird code in flat-field form -> # scan := scan advanced past that code and any trailing # whitespace # stdAbbr := that code # else -> # scan +:= error message # return ] stdAbbr = abbrScanFlat ( scan ) if stdAbbr is None: return else: scan.deblankLine() #-- 2 -- # [ if there is another "/" on the line in scan -> # rawEng := characters up to that "/", minus leading and # trailing whitespace # tex := characters following the "/" to end of line, # minus leading and trailing whitespace # else -> # (rawEng, tex) := characters up to end of line, minus # leading and trailing whitespace ] slashPos = scan.upto ( slashCset ) if slashPos is None: rawEng = tex = scan.tab(-1).strip() else: rawEng = scan.tab(slashPos).strip() scan.move(1) tex = scan.tab(-1).strip() #-- 3 -- # [ if rawEng contains a comma -> # eng := (text following comma, leading spaces dropped) + # " " + (text preceding comma) # else -> # eng := rawEng ] eng = engSwap ( rawEng ) #-- 4 -- # [ if self.hier is missing the form level -> # I # else if stdAbbr is in self.abTab and bound to a StdBind # of species rank -> # self := self with a new form-rank taxon added as the # next child of that species # self.abTab := self.abTab with a standard binding # added with abbr=(abbr), taxon=(that new form-rank taxon), # eng=(eng), and tex=(tex) # scan +:= error message(s), if any # else -> # scan +:= error message(s) ] self.__bindSubspecific ( scan, abbr, stdAbbr, eng, tex ) # - - - T x n y . _ _ b i n d S u b s p e c i f i c - - - def __bindSubspecific ( self, scan, abbr, stdAbbr, eng, tex ): """Add a sub-specific form and bind abbr to it. [ (scan is a Scan object) and (abbr is a bird code) and (stdAbbr is a bird code) and (eng is an English name) and (tex is a TeX English name) -> if self.hier is missing the form level -> I else if stdAbbr is in self.abTab and bound to a StdBind of species rank -> self := self with a new form-rank taxon added as the next child of that species self.abTab := self.abTab with a standard binding added with abbr=(abbr), taxon=(that new form-rank taxon), eng=(eng), and tex=(tex) scan +:= error message(s), if any else -> scan +:= error message(s) ] """ #-- 1 -- # [ if self.hier has both species and form ranks -> # spRank := self.hier's species rank # formRank := self.hier's form rank # else -> # return ] spRank = self.hier.speciesRank() formRank = self.hier.formRank() if ( ( spRank is None ) or ( formRank is None ) ): return #-- 2 -- # [ if stdAbbr is in self.abTab and has a binding -> # spSym := self.abTab's entry for stdAbbr # spBind := the binding for self.abTab's entry for stdAbbr # else -> # scan +:= error message(s) # return ] try: spSym = self.lookupAbbr ( stdAbbr ) spBind = spSym.binding if spBind is None: scan.error ( "Code `%s' is undefined." % stdAbbr ) return except KeyError: scan.error ( "Code `%s' is unknown." % stdAbbr ) return #-- 3 -- # [ if ( ( spBind is not a StdBind) or # ( spBind does not have species rank ) ) -> # scan +:= error message # return # else -> # spTaxon := the taxon from spBind ] if not isinstance(spBind, StdBind): scan.error ( "Code %s is not a standard taxon." % stdAbbr ) return spTaxon = spBind.lookup() if ( ( spTaxon is None ) or ( spTaxon.rank.depth != spRank.depth ) ): scan.error ( "Subspecies must be referred to standard " "species, but %s is a %s." % ( spTaxon, spTaxon.rank.name ) ) return #-- 4 -- # [ self := self with a new form-rank taxon added under spTaxon, # with txny=(self), parent=(spTaxon), # sci=(spTaxon.sci+" "+(form's child number)), # eng=(eng), tex=(tex), status=None, canon=(abbr), and # txKey=(derived-tx-key(self, spTaxon, new form)) # self.abTab := self.abTab with a StdBind added for # abbr=(abbr) to that new taxon # scan +:= error message(s), if any ] self.__addForm ( scan, abbr, spTaxon, eng, tex, formRank ) # - - - T x n y . _ _ a d d F o r m - - - def __addForm ( self, scan, abbr, spTaxon, eng, tex, formRank ): """Add a new subspecific (form) rank. [ (scan is a Scan object) and (abbr is a bird code ) and (spTaxon is a Taxon) and (eng is an English name) and (tex is a TeX English name) and (formRank is the form rank from self.hier) -> self := self with a new form-rank taxon added under spTaxon, with txny=(self), parent=(spTaxon), sci=(spTaxon.sci+" "+(form's child number)), eng=(eng), tex=(tex), status=None, canon=(abbr), and txKey=(derived-tx-key(self, spTaxon, new form)) self.abTab := self.abTab with a StdBind added for abbr=(abbr) to that new taxon scan +:= error message(s), if any ] """ #-- 1 -- # [ taxon := a new form-rank Taxon with txny=(self), # sci=(spTaxon.sci+" "+(form's child number)), eng=(eng), # tex=(tex), status=None, canon=(abbr), disamb=None, and # txKey=(derived-tx-key(self, spTaxon, new form)) ] formSci = "%s %d" % ( spTaxon.sci, spTaxon.nChildren()+1 ) taxon = Taxon ( self, spTaxon, formRank, formSci, eng, tex, None, abbr, None ) #-- 2 -- # [ self.__engMap +:= entry mapping taxon.eng.lower() # |-> taxon for taxa at or deeper than species rank # self.__sciMap +:= entry mapping taxon.sci |-> taxon # self.__txKeyMap +:= entry mapping taxon.txKey() |-> taxon # self.abTab +:= bindings-for-taxon(self, taxon, # taxon.canon, taxon.disamb) # self.scan +:= errors from duplications, if any ] self.__addTaxonToMaps ( scan, taxon ) # - - - T x n y . _ _ p a r s e C o l l i s i o n C l u s t e r - - - def __parseCollisionCluster ( self, scan, abbr ): """Process the tail of a collision line, e.g. "colba :colbid:colbin" [ (scan is a Scan object) and (abbr is a bird code) -> if the line in scan is a valid collision tail -> scan := scan advanced to end of line self := self with collision bindings to abbr added for each disambiguation from that tail else -> scan +:= error message(s) scan := scan advanced no further than end of line ] """ #-- 1 -- # [ if the line in scan consists of one or more bird codes # separated by ":" -> # scan := scan advanced to end of line # collSet := a Set containing all those codes in # standard form (that is, upshifted and right-padded) # else -> # scan +:= error message(s) # return ] collSet = self.__buildCollisionSet ( scan ) if collSet is None: return #-- 2 -- # [ collBind := a new CollBind object binding abbr to collSet ] collBind = CollBind ( abbr, collSet ) #-- 3 -- # [ if self.abTab has an entry for abbr -> # collSym := that entry # else -> # self.abTab +:= a new entry for abbr with no bindings # collSym := that entry ] collSym = self.abTab.addAbbr ( abbr ) #-- 4 -- # [ if collBind can be combined with collSym -> # collSym := collSym combined with collBind # else -> # scan +:= error message ] try: collSym.bind ( collBind ) except ValueError: scan.error ( "This use of `%s' is incompatible with the " "existing use, %s." % ( abbr, collSym.binding() ) ) # - - - T x n y . _ _ b u i l d C o l l i s i o n S e t - - - def __buildCollisionSet ( self, scan ): """Parse the "code1:code2:...:coden" tail of a collision line. [ scan is a Scan object -> if the line in scan consists of one or more bird codes separated by ":" -> scan := scan advanced to end of line return a Set containing all those codes in standard form (that is, upshifted and right-padded) else -> scan +:= error message(s) return None ] """ #-- 1 -- # [ scan := scan advanced past any whitespace on the line ] scan.deblankLine() #-- 2 -- # [ if scan starts with a bird code in free-field form-> # scan := scan advanced past that code and any # trailing whitespace # collSet := a Set containing that code, minus trailing # whitespace # else -> # scan +:= error message(s) # return ] abbr = abbrScan ( scan ) if abbr is None: return None else: collSet = Set ( abbr ) scan.deblankLine() #-- 3 -- # [ if scan is nonblank and starts with one or more # ":abbr>" groups -> # scan := scan advanced past those groups # collSet := collSet with those (abbr)s added from those groups # else -> # scan +:= error message(s) # return ] while scan.match(":") is not None: #-- 3 body -- # [ scan starts with ":" -> # if a bird code follows the ":" -> # scan := scan advanced past that code and any # trailing whitespace # collSet := collSet with those codes added # else -> # scan +:= error message(s) # return ] #--3.1 -- # [ scan starts with ":" -> # scan := scan advanced one, then past any whitespace ] scan.move(1) scan.deblankLine() #-- 3.2 -- # [ if scan starts with a bird code -> # scan := scan advanced past that code and any trailing # whitespace # collSet := collSet with that code added # else -> # scan +:= error message # return ] abbr = abbrScan ( scan ) if abbr is None: return else: collSet.add ( abbr ) scan.deblankLine() #-- 4 -- # [ if scan is at end of line -> # return collSet # else -> # scan +:= error message # return None ] if scan.atEndLine(): return collSet else: scan.error ( "Expecting one or more `:abbr'... groups. " ) return None # - - - T x n y . d i s p a t c h T a b l e - - - #-- # This dictionary maps the alt record type codes to parsers for # the corresponding tail. Each tail-parser function must conform # to the generic intended function given after Txny.__parseAlt(). #-- dispatchTable = { # Parse functions for alt tails " ": __parseHigherAlt, "=": __parseEquivalent, "<": __parseSubspecific, "?": __parseCollisionCluster } # - - - T x n y . _ _ f i n a l C h e c k - - - def __finalCheck ( self ): "Make sure all symbols are defined." #-- 1 -- # [ errCount := error count from Log() ] errCount = Log().count() #-- 2 -- # [ if all entries in self.abTab have bindings -> # I # else -> # Log() +:= error message(s) ] for abSym in self.abTab.genAbSyms(): bind = abSym.binding if bind is None: Log().error ( "Undefined bird code `%s'." % abSym.abbr ) # - - - - - c l a s s T a x o n - - - - - class Taxon: """Represents one taxon---a species, a family, whatever. Exports: Taxon ( txny, parent, rank, sci, eng, tex, status, canon, disamb ): [ (txny is a Txny object) and (parent is the new taxon's parent as Taxon or None) and (rank is the new taxon's rank as a Rank object) and (sci is a scientific name) and (eng is an English name) and (tex is a TeX English name) and (status is a string or None) and (canon is the canonical bird code or None) and (disamb is the disambiguated bird code or None) -> return a new Taxon object with those values ] .txny: [ as passed to constructor ] .parent: [ as passed to constructor ] .rank: [ as passed to constructor ] .sci: [ as passed to constructor ] .eng: [ as passed to constructor ] .tex: [ as passed to constructor ] .status: [ as passed to constructor ] .canon: [ as passed to constructor ] .disamb: [ as passed to constructor ] .depth: [ self's rank's depth ] .shortTxKey: [ if self's parent is None -> None else -> self's taxonomic key string without trailing zeroes ] .birthOrder: [ if self.parent is None -> 0 else -> return self's birth order relative to its parent ] .txKey(): [ return self's full taxonomic key string, a string of digits that will sort self in phylogenetic order, with the length given by self.txny.hier.txKeyLen ] .abbr(): [ if self has an unambiguous substitute bird code -> return that code else if self has a canonical bird code -> return that code else -> return None ] .nChildren(): [ return the number of self's children, >= 0 ] .nthChild(childx): [ if self has at least (childx+1) children -> return the (childx)th child else -> return None ] .genChildren(): [ generate self's children in birth order, if any ] .lastChild(): [ if self has any children -> return the last child by birth order else -> return None ] .show(): [ return self as a string ] .xmlWriteSubtree(self, parentNode): [ parentNode is a DOM Element node -> parentNode := parentNode with a new TAXON_N child added representing self and self's descendants ] State/Invariants: .__tree: [ a Tree object linking us to our relatives ] """ # - - - T a x o n . _ _ i n i t _ _ - - - def __init__ ( self, txny, parent, rank, sci, eng, tex, status, canon, disamb ): "Constructor for Taxon." #-- 1 -- self.txny = txny self.parent = parent self.rank = rank self.depth = rank.depth self.sci = sci self.eng = engSwap(eng) self.tex = tex self.status = status self.canon = canon self.disamb = disamb #-- 2 -- # [ if parent is None -> # self.__tree := a new Tree node with no parent # and value=self # self.birthOrder := 0 # else -> # self.__tree := a new Tree node with # parent=(parent.__tree) and value=self # self.birthOrder := self's birth order ] if self.parent is None: self.birthOrder = 0 self.__tree = Tree ( None, self ) else: parentTree = parent.__tree self.__tree = Tree ( parentTree, self ) self.birthOrder = self.__tree.birthOrder #-- 3 -- # [ self.shortTxKey := as invariant ] self.shortTxKey = self.txny.deriveTxKey ( parent, self ) # - - - T a x o n . t x K e y - - - def txKey ( self ): "Return self's full-length taxonomic key." padLength = self.txny.hier.txKeyLen - len(self.shortTxKey) pad = "0" * padLength return "%s%s" % ( self.shortTxKey, pad ) # - - - T a x o n . a b b r - - - def abbr ( self ): "Return self's bird code if any." if self.disamb is not None: return self.disamb elif self.canon is not None: return self.canon else: return None # - - - T a x o n . n C h i l d r e n - - - def nChildren ( self ): "Return number of self's children." return self.__tree.nChildren() # - - - T a x o n . n t h C h i l d - - - def nthChild ( self, childx ): "Return self's (childx)th child, counting from 0." if childx < self.nChildren(): return self.__tree.nthChild(childx).value # - - - T a x o n . g e n C h i l d r e n - - - def genChildren ( self ): "Generate self's children." for childx in range(self.nChildren()): yield self.nthChild(childx) raise StopIteration # - - - T a x o n . l a s t C h i l d - - - def lastChild ( self ): "Return self's last child, if any." kidCount = self.nChildren() if kidCount == 0: return None else: return self.nthChild(kidCount-1) # - - - T a x o n . s h o w - - - def show ( self ): "Return self as a string." return str(self) # - - - T a x o n . _ _ s t r _ _ - - - def __str__ ( self ): "Return self as a string." return "%s [%s]" % ( self.eng, self.sci ) # - - - T a x o n . x m l W r i t e S u b t r e e - - def xmlWriteSubtree ( self, parentNode ): """Build an XML subtree representing self and descendants.""" #-- 1 -- # [ parentNode := parentNode with a new TAXON_N Element # child added representing self # newNode := that new child ] newNode = self.xmlWriteNode ( parentNode ) #-- 2 -- # [ newNode := newNode with children added representing # self's descendants ] for subTaxon in self.genChildren(): subTaxon.xmlWriteSubtree ( newNode ) # - - - T a x o n . x m l W r i t e N o d e - - - def xmlWriteNode ( self, parentNode ): """Build a new TAXON_N node.""" #-- 1 -- # [ parentNode := parentNode with a new TAXON_N node added # newNode := that new node ] newNode = xc.Element ( parentNode, TAXON_N ) #-- 2 -- # [ newNode := newNode with attributes set from self ] newNode[RANK_A] = self.rank.code if self.status: newNode[STATUS_A] = self.status newNode[SCI_A] = self.sci newNode[TX_KEY_A] = self.txKey() engNode = xc.Element ( newNode, ENG_N ) xc.Text ( engNode, self.eng ) stdAbbr = self.abbr() if stdAbbr: newNode[STD_ABBR_A] = stdAbbr if ( ( self.tex ) and ( self.tex != self.eng ) ): texNode = xc.Element ( newNode, TEX_NAME_N ) xc.Text ( texNode, self.tex ) #-- 3 -- return newNode # - - - e n g S w a p - - - def engSwap ( rawEng ): """Handle conversion of 'robin, American' -> 'American robin' [ rawEng is a string -> if rawEng contains a comma -> return (text following comma, leading spaces dropped) + " " + (text preceding comma) else -> return rawEng ] """ #-- 1 -- # [ if rawEng contains no comma -> # return rawEng # else -> # commaPos := position of the first comma ] commaPos = rawEng.find ( "," ) if commaPos < 0: return rawEng #-- 2 -- # [ generic := rawEng up to position commaPos with any # leading and trailing spaces dropped # specific := rawEng from position (commaPos+1) # through the end with any leading and trailing spaces dropped ] generic = rawEng[:commaPos].strip() specific = rawEng[commaPos+1:].strip() #-- 3 -- # [ return (specific + " " + generic) ] return "%s %s" % ( specific, generic ) # - - - - - c l a s s R a w S t d - - - - - class RawStd: """Represents a roughly-parsed line from the standard forms file. Exports: RawStd ( head, tail ): [ ( ( head is a StdHead object representing the rank and status parts of a standard forms line ) and ( tail is an SpTail object representing the rest of the line ) ) -> return a new RawStd object that line ] .head: [ as passed to constructor ] .spTail(): [ if self represents a species line -> return an SpTail object representing that line's fields else -> raise ValueError ] .nonSpTail(): [ if self represents a non-species line -> return a NonSpTail object representing that line's fields else -> raise ValueError ] .parse ( hier, scan ): # Static method [ (hier is a Hier object) and (scan is a Scan object) -> if the current line in scan is a syntactically valid std line representing a level in hier -> scan := scan advanced to end of line return a RawStd object representing that line else if the current line in scan is syntactically valid but represents a level not in hier -> scan := scan advanced no further than end of line return None else -> scan := scan advanced to end of line scan +:= error message(s) raise ValueError ] Invariants: .tail: [ as passed to constructor ] """ # - - - R a w S t d . _ _ i n i t _ _ - - - def __init__ ( self, head, tail ): "Constructor for RawStd" self.head = head self.tail = tail # - - - R a w S t d . s p T a i l - - - def spTail ( self ): "Return self's SpTail if it has one." if self.head.rank is None: return self.tail else: raise ValueError, "RawStd.spTail() called on non-species." # - - - R a w S t d . n o n S p T a i l - - - def nonSpTail ( self ): "Return self's NonSpTail if it has one." if self.head.rank is None: raise ValueError, "RawStd.nonSpTail() called on species." else: return self.tail # - - - R a w S t d . p a r s e - - - def parse ( hier, scan ): # Static method "Try to parse a standard forms line" #-- 1 -- # [ if scan starts with a valid head whose rank code is # in hier or blank -> # head := a StdHead object representing the head # scan := scan advanced past the head # else if scan starts with a valid head whose rank code is # not in hier -> # scan := scan advanced past the head # return None # else if scan starts with an invalid head -> # scan +:= error message(s) # raise ValueError ] stdHead = StdHead.parse ( hier, scan ) if stdHead is None: return None #-- 2 -- # [ if ( ( head represents a species line) and # ( the line in scan is a valid species tail ) ) -> # scan := scan advanced to end of line # tail := an SpTail object representing that tail # else if ( ( head represents a non-species line ) and # ( the line in scan is a valid non-species tail ) ) -> # scan := scan advanced to end of line # tail := a NonSpTail object representing that tail # else -> # scan +:= error message(s) # scan := scan advanced no further than end of line # raise ValueError ] if stdHead.rank is None: #-- 2.1 -- Head represents a species line tail = SpTail.parse ( scan ) else: #-- 2.2 -- Head represents a non-species line tail = NonSpTail.parse ( scan ) #-- 3 -- # [ return a new RawStd object made from stdHead and tail ] return RawStd ( stdHead, tail ) parse = staticmethod(parse) # - - - - - c l a s s S t d H e a d - - - - - class StdHead: """Represents the parts of a standard forms line that is universal. Exports: StdHead ( rank, status ): [ (rank is a Rank object or None) and (status is a status code as a string) -> return a new StdHead object with those values ] .rank: [ as passed to constructor ] .status: [ as passed to constructor ] .parse ( hier, scan ): # Static method [ (hier is a Hier object) and (scan is a Scan object) -> if the line in scan starts with a valid head containing a rank code in hier or no rank code -> scan := scan advanced past the head return a new StdHead representing the head if the line in scans starts with a valid head containing a rank code not in hier -> scan := scan advanced past the head return None else -> scan +:= error messages(s) scan := scan advanced no further than end of line raise ValueError ] """ # - - - S t d H e a d . _ _ i n i t _ _ - - - def __init__ ( self, rank, status ): "Constructor for StdHead" self.rank = rank self.status = status # - - - S t d H e a d . p a r s e - - - def parse ( hier, scan ): "Parse the head section of a standard forms line." #-- 1 -- # [ if scan starts with at least L_RANK_CODE characters -> # scan := scan advanced by L_RANK_CODE # rankCode := the next L_RANK_CODE characters from scan # else -> # scan +:= error message # raise ValueError ] try: rankCode = scan.move ( L_RANK_CODE ) except IndexError: scan.error ( "Expecting a %d-character rank code " "field." % L_RANK_CODE ) raise ValueError #-- 2 -- # [ if the start of scan is a valid status -> # scan := scan advanced past the matching part # status := matching part # else -> # scan +:= error message # raise ValueError ] m = scan.tabReMatch ( statusRe ) if m is None: scan.error ( "Expecting a %d-character status code " "field." % L_STATUS ) raise ValueError else: status = m.group() #-- 3 -- # Change representation of normal status to None if status == " ": status = None #-- 3 -- # [ if rankCode is blank -> # rank := None # else if rankCode is a rank code defined in hier -> # rank := the corresponding Rank object # else -> # return None ] if rankCode == ( " " * L_RANK_CODE ): rank = None else: try: rank = hier.lookupRankCode ( rankCode.rstrip() ) except KeyError: return None #-- 4 -- return StdHead ( rank, status ) parse = staticmethod ( parse ) # - - - - - c l a s s N o n S p T a i l - - - - - class NonSpTail: """Represents a non-species standard forms line beyond the head. Exports: NonSpTail ( sci, eng ): [ (sci is a scientific name as a string) and (eng is an English name as a string) -> return a new NonSpTail with those values ] .sci: [ as passed to constructor ] .eng: [ as passed to constructor ] .parse ( scan ): # Static method [ scan is a Scan object -> if the line in scan is a valid non-species tail -> scan := scan advanced to the end of the line return a new NonSpTail object representing the tail else -> scan +:= error message(s) raise ValueError ] """ # - - - N o n S p T a i l . _ _ i n i t _ _ - - - def __init__ ( self, sci, eng ): "Constructor for NonSpTail" self.sci = sci self.eng = eng # - - - N o n S p T a i l . p a r s e - - - def parse ( scan ): "Parse the tail section of a non-species standard forms line." #-- 1 -- # [ if scan starts with a higher taxon name -> # scan := scan advanced past that name and any following # whitespace # sci := that name # else -> # scan +:= error message # raise ValueError ] m = scan.tabReMatch ( htNameRe ) if m is None: scan.error ( "Expecting a capitalized scientific name, " "e.g., `Gaviiformes'." ) raise ValueError else: sci = m.group() scan.deblankLine() #-- 2 -- # [ if scan starts with "/" -> # scan := scan advanced past that # else -> # scan +:= error message # raise ValueError ] slash = scan.tabMatch ( "/" ) if slash is None: scan.error ( "Expecting a `/' after the scientific name." ) raise ValueError #-- 3 -- # [ scan := scan advanced to end of line # eng := the rest of the line in scan, minus trailing whitespace ] eng = scan.tab(-1).rstrip() #-- 4 -- # [ if eng has at least two characters -> # return a new SpNonTail with eng=(eng) and sci=(sci) # else -> # scan +:= error message # raise ValueError ] if len ( eng ) >= 2: return NonSpTail ( sci, eng ) else: scan.error ( "The English name `%s' is too short." % eng ) raise ValueError parse = staticmethod ( parse ) # - - - - - c l a s s S p T a i l - - - - - class SpTail: """Represents a species standard forms line beyond the head. Exports: SpTail ( genus, subgenus, species, eng, disamb ): [ (genus is a genus name as a string) and (subgenus is a subgenus name as a string or None) and (species is a species name as a string) and (eng is an English name as a string) and (disamb is a disambiguation code as a string or None) -> return a new SpTail object with those values ] .genus: [ as passed to constructor ] .subgenus: [ as passed to constructor ] .species: [ as passed to constructor ] .eng: [ as passed to constructor ] .disamb: [ as passed to constructor ] .canonical: [ self's canonical abbrevation as a string ] .parse ( scan ): # Static method [ scan is a Scan object -> if scan is a valid species tail -> return a new SpTail object representing that tail else -> scan +:= error message(s) raise ValueError ] """ # - - - S p T a i l . _ _ i n i t _ _ - - - def __init__ ( self, genus, subgenus, species, eng, disamb ): "Constructor for SpTail" self.genus = genus self.subgenus = subgenus self.species = species self.eng = eng self.disamb = disamb self.canonical = abbreviate(self.eng) # - - - S p T a i l . p a r s e S c i - - - def parseSci ( scan ): """Parse `Genus (Subgenus) species', where the subgenus is optional. [ if scan is a Scan object -> if scan starts with a valid species scientific name -> scan := scan advanced past that name and any trailing whitespace genus := genus name subgenus := subgenus name, if any, else None species := species name else -> scan +:= error message raise ValueError ] """ #-- 1 -- # [ if scan starts with a higher taxon name -> # scan := scan advanced past that name and any # trailing whitespace # genus := that name # else -> # scan +:= error message # raise ValueError ] m = scan.tabReMatch ( htNameRe ) if m is None: scan.error ( "Expecting a capitalized genus name, " "e.g., `Fulica'." ) raise ValueError else: genus = m.group() scan.deblankLine() #-- 2 -- # [ if scan starts with a subgenus name in parentheses -> # scan := scan advanced past all that and any # trailing whitespace # subgenus := that name # else -> # subgenus := None ] m = scan.tabReMatch ( subgenusRe ) if m is None: subgenus = None else: subgenus = m.group ( SUBGENUS_GROUP ) scan.deblankLine() #-- 3 -- # [ if scan starts with a lowercase letter -> # scan := scan advanced past all leading lowercase letters # and any trailing whitespace # species := those letters # else -> # scan +:= error message # raise ValueError ] m = scan.tabReMatch ( speciesRe ) if m is None: scan.error ( "Expecting a lowercase species name, " "e.g., `celata'." ) raise ValueError else: species = m.group() scan.deblankLine() #-- 4 -- return (genus, subgenus, species) parseSci = staticmethod(parseSci) # - - - S p T a i l . p a r s e - - - def parse ( scan ): "Parse the tail section of a standard species line." #-- 1 -- # [ if scan starts with a valid species scientific name -> # scan := scan advanced past that name and any # trailing whitespace # genus := genus name # subgenus := subgenus name, if any, else None # species := species name # else -> # scan +:= error message # raise ValueError ] genus, subgenus, species = SpTail.parseSci ( scan ) #-- 2 -- # [ if scan starts with "/" -> # scan := scan advanced past that and any trailing whitespace # else -> # scan +:= error message # raise ValueError ] slash = scan.tabMatch ( "/" ) if slash is None: scan.error ( "Expecting the `/' before the English name." ) raise ValueError #-- 3 -- # [ if there is a "/" anywhere on the current line -> # scan := scan advanced past the first slash and # any trailing whitespace # rawEng := text up to the first "/" on the line # else -> # scan := scan advanced to end of line # rawEng := text through end of line, minus any trailing space ] slashPos = scan.upto ( slashCset ) if slashPos is None: # No slash rawEng = scan.tab(-1).rstrip() else: rawEng = scan.tab(slashPos).rstrip() # Move up to "/" scan.move(1) # Move past "/" scan.deblankLine() # Skip any trailing space #-- 4 -- # [ if rawEng contains a comma -> # eng := (text following comma, leading spaces dropped) + # " " + (text preceding comma) # else -> # eng := rawEng ] eng = engSwap ( rawEng ) #-- 5 -- # [ if scan is at end of line -> # disamb := None # else if scan starts with a bird code -> # scan := scan advanced past that code and any # trailing whitespace # disamb := that code # else -> # scan +:= error message # raise ValueError ] if scan.atEndLine(): # End of line, no disambiguation disamb = None else: # Not at end of line disamb = abbrScan ( scan ) # Look for a bird code if disamb is None: raise ValueError else: scan.deblankLine() #-- 6 -- # [ if scan is at end of line -> # return a new SpTail made from genus, subgenus, species, # eng, and disamb # else -> # scan +:= error message # raise ValueError ] if scan.atEndLine(): return SpTail ( genus, subgenus, species, eng, disamb ) else: scan.error ( "Unrecognized characters at end of line." ) raise ValueError parse = staticmethod ( parse ) # - - - w r i t e T r e e F i l e - - - def writeTreeFile ( args, txny ): """Write the file with standard taxa [ (args is the Args object) and (txny is a Txny object) -> tree-file(args.basename) := tree-data(txny) ] """ #-- 1 -- # [ if tree-file(args.basename) can be opened new for writing -> # treeFile := that file so opened # else -> # sys.stderr +:= error message # stop execution ] treeFileName = args.basename + TREE_EXTENSION try: treeFile = open ( treeFileName, "w" ) except IOError, detail: Log().fatal ( "Can't open tree file `%s' for writing." % treeFileName ) #-- 2 -- # [ treeFile +:= tree-data(txny) ] writeSubtree ( treeFile, txny.root ) treeFile.close() # - - - w r i t e S u b t r e e - - - def writeSubtree ( treeFile, taxon ): """Write tree data for taxon and all its descendants. [ (treeFile is a writeable file) and (taxon is a Taxon) -> treeFile +:= tree records for taxon and all its descendants ] """ #-- 1 -- # [ treeFile +:= tree record for taxon ] writeTaxon ( treeFile, taxon ) #-- 2 -- # [ treeFile +:= tree records for taxon's descendants ] for child in taxon.genChildren(): writeSubtree ( treeFile, child ) # - - - w r i t e T a x o n - - - def writeTaxon ( treeFile, taxon ): """Write one tree record. [ (treeFile is a writeable file) and (taxon is a Taxon) -> treeFile +:= tree record for taxon ] """ #-- 1 -- # [ if ( ( len ( taxon.sci ) > L_SCI ) or # ( len ( taxon.eng ) > L_ENG ) ) -> # Log() +:= error message # stop execution # else -> I ] if len(taxon.sci) > L_SCI: Log().fatal ( "Scientific name `%s' too long (%d), field " "capacity is %d." % ( taxon.sci, len(taxon.sci), L_SCI ) ) if len(taxon.eng) > L_ENG: Log().fatal ( "English name `%s' too long (%d), field " "capacity is %d." % ( taxon.eng, len(taxon.eng), L_ENG ) ) #-- 2 -- # [ if taxon has a standard code -> # abbr := that code # else -> # abbr := BLANK_ABBR ] abbr = taxon.abbr() if abbr is None: abbr = BLANK_ABBR #-- 3 -- if taxon.status is None: statusCode = " " else: statusCode = taxon.status #-- 4 -- # [ treeFile +:= record made from taxon.txKey(), abbr, statusCode, # taxon.sci, taxon.eng, and taxon.tex ] treeFile.write ( "%s%s%s%-*s%-*s%-s\n" % ( taxon.txKey(), abbr, statusCode, L_SCI, taxon.sci, L_ENG, taxon.eng, taxon.tex ) ) # - - - w r i t e A b b r F i l e s - - - def writeAbbrFiles ( args, txny ): """Write the files of standard and collision bird codes. [ (args is the Args object) and (txny is a Txny object) -> abbr-file(args.basename) := abbr-data(txny) coll-file(args.basename) := coll-data(txny) ] """ #-- 1 -- # [ if (args.basename+ABBR_EXTENSION) can be opened new for writing -> # abbrFile := that file so opened # else -> # Log() +:= error message # stop execution ] abbrFileName = args.basename + ABBR_EXTENSION try: abbrFile = open ( abbrFileName, "w" ) except IOError: Log().fatal ( "Can't open abbreviations file `%s' for writing." % abbrFileName ) #-- 2 -- # [ if (args.basename+COLL_EXTENSION) can be opened new for writing -> # collFile := that file so opened # else -> # Log() +:= error message # stop execution ] collFileName = args.basename + COLL_EXTENSION try: collFile = open ( collFileName, "w" ) except IOError: Log().fatal ( "Can't open collisions file `%s' for writing." % collFileName ) #-- 3 -- # [ abbrFile +:= abbr lines for non-collision bird codes in self # collFile +:= coll lines for collision bird codes in self ] for sym in txny.abTab.genAbSyms(): #-- 3 body -- # [ if sym is a collision code -> # collFile +:= a coll line for sym # else -> # abbrFile +:= an abbr line for sym ] #-- 3.1 -- # [ abBind := sym's binding # abbr := sym.abbr ] abBind = sym.binding abbr = sym.abbr #-- 3.2 -- # [ if abBind is a collision symbol -> # collFile +:= collision line for abbr->abBind # else -> # abbrFile +:= abbr line for abbr->abBind ] if abBind.className == "CollBind": writeColl ( collFile, abbr, abBind ) else: writeAbbr ( abbrFile, abbr, abBind ) #-- 4 -- abbrFile.close() collFile.close() # - - - w r i t e C o l l - - - def writeColl ( collFile, abbr, abBind ): """Write a line to the collisions file. [ (collFile is a writeable file handle) and (abbr is a bird code) and (abBind is a CollBind) -> collFile +:= collision lines for abbr->abBind ] """ #-- 1 -- for otherAbbr in abBind.collSet.genSorted(): #-- 1 body -- # [ collFile +:= collision line for (abbr, otherAbbr) ] collFile.write ( "%s%s\n" % ( abbr, otherAbbr ) ) # - - - w r i t e A b b r - - - def writeAbbr ( abbrFile, abbr, abBind ): """Write a non-collision abbreviation record. [ (abbrFile is a writeable file handle) and (abbr is a bird code) and (abBind is a non-collision AbBind) -> abbrFile +:= abbr line for abbr->abBind ] """ #-- 1 -- # [ eng := English name for abBind ] eng = abBind.eng() #-- 2 -- # [ if abBind refers to a specific taxon -> # taxon := that taxon as a Taxon object # else -> # Log() +:= error message # stop execution ] taxon = abBind.lookup() if taxon is None: Log().fatal ( "Code `%s' leads to a reference loop." % abbr ) #-- 2 -- # [ abbrFile +:= abbr line for (abbr, taxon.sci, eng) ] abbrFile.write ( "%s%-*s%-s\n" % ( abbr, L_SCI, taxon.sci, eng ) ) # - - - w r i t e X M L F i l e - - - def writeXMLFile ( args, txny ): """Generate all output to one XML file using the txny.rnc schema. [ (args is an Args object) and (txny is a Txny object) -> xml-file(args.basename) := xml-tree-data(txny) ] """ #-- 1 -- # [ doc := a new XML, empty document tree as an # xc.Document object # outFileName := xml-file(args.basename) ] doc = xc.Document() outFileName = args.basename + XML_EXTENSION #-- 2 -- # [ doc := doc with an XML representation of txny added, # conforming to txny.rnc ] xmlTree ( doc, txny ) #-- 3 -- # [ outFileName can be opened new for writing -> # outFile := that file so opened ] outFile = open ( outFileName, "w" ) #-- 4 -- # [ outFile := doc in XML ] doc.write ( outFile ) # - - - x m l T r e e - - - def xmlTree ( doc, txny ): """Build the XML document tree. [ (doc is a DOM Document node) and (txny is a Txny object) -> doc := doc with an XML representation of txny added, conforming to txny.rnc ] """ #-- 1 -- # [ doc := doc with a new root TAXONOMY_SYSTEM_N node # root := that node ] root = xc.Element ( doc, TAXONOMY_SYSTEM_N ) #-- 2 -- # [ root := root with a new RANK_SET_N node added representing # txny.hier ] xmlRankSet ( root, txny ) #-- 3 -- # [ root := root with a new TAXONOMY_N node added # representing txny.root ] xmlTaxonomy ( root, txny ) #-- 4 -- # [ root := root with a new ABBR_SET_N node added # representing codes from txny.abTab and a new # COLLISION_SET_N node added representing # collisions from txny.abTab ] xmlAbbrSet ( root, txny ) # - - - x m l R a n k S e t - - - def xmlRankSet ( root, txny ): """Add the RANK_SET_N subtree. [ (root is a TAXONOMY_SYSTEM_N node) and (txny is a Txny object) -> root := root with a new RANK_SET_N node added representing txny.hier ] """ #-- 1 -- # [ root := root with a new RANK_SET_N node added # rankSet := that new node ] rankSet = xc.Element ( root, RANK_SET_N ) #-- 2 -- # [ rankSet := rankSet with RANK_N nodes added representing # the ranks from txny.hier in top-down order ] for rank in txny.hier.ranks(): xmlAddRank ( rankSet, rank ) # - - - x m l A d d R a n k - - - def xmlAddRank ( rankSet, rank ): """Add one RANK_N node to rankSet. [ (rankSet is an Element node) and (rank is a hier.Rank object) -> rankSet := rankSet with a new RANK_N object added representing rank ] """ #-- 1 -- # [ rankSet := rankSet with a new RANK_N node added # rankNode := that node ] rankNode = xc.Element ( rankSet, RANK_N ) #-- 2 -- # [ rankNode := rankNode with a new CODE_A attribute added # representing rank.code and a DIGITS_A attribute # representing rank.keyLen ] rankNode[CODE_A] = rank.code rankNode[DIGITS_A] = str(rank.keyLen) rankNode[DEPTH_A] = str(rank.depth) #-- 3 -- # [ if rank.isOptional -> # rankNode := rankNode with an OPTIONAL_A attribute of 1 # else -> I ] if rank.isOptional: rankNode[OPTIONAL_A] = '1' #-- 4 -- # [ rankNode := rankNode with a Text child made from rank.name ] xc.Text ( rankNode, rank.name ) # - - - x m l T a x o n o m y - - - def xmlTaxonomy ( root, txny ): """Add the TAXONOMY_N subtree to root [ (root is a TAXONOMY_SYSTEM_N node) and (txny is a Txny object) -> root := root with a new TAXONOMY_N node added representing txny.root and its descendants ] """ #-- 1 -- # [ root := root with a new, empty TAXONOMY_N node added # txnyNode := that new node ] txnyNode = xc.Element ( root, TAXONOMY_N ) #-- 2 -- # [ txnyNode := txnyNode with txny.root's subtree added ] txny.root.xmlWriteSubtree ( txnyNode ) # - - - x m l A b b r S e t - - - def xmlAbbrSet ( root, txny ): """Add the ABBR_SET_N subtree [ (root is a TAXONOMY_SYSTEM_N node) and (txny is a Txny object) -> root := root with new ABBR_SET_N and COLLISION_SET_N nodes added representing codes from txny.abTab ] """ #-- 1 -- # [ root := root with a new, empty ABBR_SET_N node and a new, # empty COLLISION_SET_N node added # abbrSetNode := that ABBR_SET_N node # collSetNode := that COLLISION_SET_N node ] abbrSetNode = xc.Element ( root, ABBR_SET_N ) collSetNode = xc.Element ( root, COLLISION_SET_N ) #-- 2 -- # [ abbrSetNode := abbrSetNode with all valid abbreviations # added from txny.abTab # collSetNode := collSetNode with all collision abbreviations # added from txny.abTab ] for sym in txny.abTab.genAbSyms(): #-- 2 body -- # [ if sym is a valid abbr -> # abbrSetNode := abbrSetNode with a new ABBR_N node # added representing sym # else -> # collSetnode := collSetNode with a new COLL_N node # added representing sym ] if sym.binding.className != COLL_BIND_CLASS: sym.binding.xmlWrite ( abbrSetNode ) else: sym.binding.xmlWrite ( collSetNode ) # - - - - - m a i n - - - - - Log().write("=== %s %s ===" % (PROGRAM_NAME, EXTERNAL_VERSION)) #-- 1 -- # [ if sys.argv contains valid command line arguments -> # args := an Args object representing those arguments # else -> # sys.stderr +:= (usage message) + (error message) # stop execution ] Log().addLogFile(LOG_FILE) args = Args() #-- 2 -- # [ if ranks-file is a readable, valid ranks file -> # hier = a new Hier object representing that file # else -> # Log() +:= error message(s) # stop execution ] try: hier = Hier ( ) except hier.HierError, detail: Log().fatal ( "Errors in ranks file: %s" % `detail` ) #-- 3 -- # [ if ( ( std-file(args.basename) is a readable, valid standard # forms file conforming to hier ) and # ( alt-file(args.basename) is a readable, valid alternate # forms file conforming to hier and # std-file(args.basename) ) ) -> # txny := a new Txny object representing those files # else -> # Log() +:= error message(s) # stop execution ] txny = Txny ( hier, args.basename ) #-- 4 -- # [ if args.xmlOutput -> # xml-file(args.basename) := xml-tree-data(txny) # else -> # tree-file(args.basename) := tree-data(txny) ] if args.xmlOutput: writeXMLFile ( args, txny ) else: writeTreeFile ( args, txny ) #-- 5 -- # [ abbr-file(args.basename) := abbr-data(txny) # coll-file(args.basename) := coll-data(txny) ] writeAbbrFiles ( args, txny ) Log().write("=== Total errors, %d." % Log().count() )