""" hier.py: Taxonomic hierarchy objects $Revision: 1.1 $ $Date: 2009/10/19 01:10:30 $ These objects represents the ``ranks file'' used by the `nombuild' system; refer to the document ``A system for representing taxonomic nomenclature'' for details, online at http://www.nmt.edu/~shipman/z/cbc/nomo.html Exports: class HierError(Error): Our generic exception. .__str__(): Stringify exception information. class Hier: Represents the selected set of taxonomic ranks Hier ( ranksFileName=None ): [ if (ranksFileName is a string or None) -> if (effective-ranks-file-name(ranksFileName) names a readable, valid ranks file) -> return a new Hier object representing that file else -> Log() +:= error message(s) raise HierError ] .nRanks: [ number of ranks in self ] .txKeyLen: [ the length of the taxonomic key string ] .nthRank ( n ): [ if n is an integer -> if there are at least n+1 ranks in self -> return self's (n)th rank, counting from zero else -> raise IndexError ] .ranks(): [ returns a new list of the Rank obs in self, from largest to smallest ] .lookupRankCode ( c ): [ if c is a string -> if c is a rank code in self -> return the Rank object for that code else -> raise KeyError ] .genusRank(): [ if self contains a genus rank -> return that Rank else -> return None ] .subgenusRank(): [ if self contains a subgenus rank -> return that Rank else -> return None ] .speciesRank(): [ if self contains a species rank -> return that Rank else -> return None ] .formRank(): [ if self contains a form rank -> return that Rank else -> return None ] .canParentHaveChild ( p, c ): [ if p and c are Rank objects -> if p.depth >= c.depth -> return 0 else if there is a rank m such that p.depth < m.depth < c.depth and m is not optional -> return 0 else -> return 1 ] .scanTxKey ( scan ): [ if scan is a Scan object -> if scan starts with a valid taxonomic key number for self -> scan := scan advanced past that number return that number else -> scan +:= error message return None ] .txKeyRank ( self, txKey ): [ if (txKey is a string) -> if txKey is all zeroes -> return the root Rank object from self else -> return the Rank object corresponding to the rightmost nonzero subfield of txKey ] .txKeySubfield ( n ): [ returns (x,y) where characters [x:y] of a taxonomic key correspond to the rank with depth n ] .getField ( txKey, n ): [ if (txKey is a TxKey string) and (0 <= n < self.nRanks ) -> return the (n)th field of txKey, counting from 0, as an integer ] .clearField ( txKey, n ): [ if (txKey is a TxKey string) and (0 <= n < self.nRanks ) -> return txKey with the (n)th field, counting from 0, set to zeroes ] class Rank: Represents one taxonomic rank, e.g., family rank Rank ( hier, depth, code, isOptional, keyLen, name ) [ if (hier is the containing Hier object) and (depth is a nonnegative integer representing the distance of this rank below root, root being 0) and (code is a string not exceeding L_RANK_CODE in length) and (isOptional is 0 for required taxa and 1 for optional ones) and (keyLen is a nonnegative integer giving the number of digits in a taxonomic key for this rank) and (name is the name of the rank) -> return a new Rank object with those values ] .hier: [ as passed to constructor ] .depth: [ as passed to constructor ] .code: [ self's code, right-trimmed ] .isOptional: [ if self is an optional rank -> 1 else -> 0 ] .keyLen: [ the length of self's part of a txKey ] .name: [ self's name, e.g., "Subfamily" ] .__str__(): [ returns a string representing self ] Exported constants (synchronize with `nombuild' program): L_RANK_CODE: Maximum length of a rank code """ #================================================================ # Imports #---------------------------------------------------------------- import string, copy, os, sys sys.path.insert(0, "/u/john/tcc/python/lib") from log import * from scan import * from cset import * #================================================================ # Verification functions #---------------------------------------------------------------- # effective-ranks-file-name ( arg ) == # if arg is not None -> # arg # else if ENV_NOMO names a currently defined environmental # variable -> # (value of that environmental variable) + DEFAULT_RANKS_NAME # else -> # DEFAULT_RANKS_NAME #---------------------------------------------------------------- #================================================================ # Manifest constants #================================================================ # Environmental variables #---------------------------------------------------------------- ENV_NOMO = "TXNY_HOME" # Names directory containing input files #---------------------------------------------------------------- # Field lengths in the flat files #---------------------------------------------------------------- L_RANK_CODE = 2 # Rank code length DEFAULT_RANKS_NAME = "ranks" # Default ranks file name # - - - - - c l a s s H i e r E r r o r - - - - - class HierError(Exception): # Our generic exception def __str__ ( self ): return "*Hier error* " + Exception.__str__(self) # - - - - - c l a s s H i e r - - - - - class Hier: """Represents the taxonomic ranks of interest, largest to smallest. State/Invariants: .txKeyLen == sum of r.keyLen for all ranks r in self .__rankList == list of Rank objects ordered from largest .__fieldList == list of (x,y) tuples corresponding to the ranks in self.__rankList such that for a txKey T, T[x:y] is the field for the corresponding rank .__rankMap == dictionary that maps each rank code |-> the corresponding Rank object """ GENUS_CODE = "g" # Hardwired rank codes SUBGENUS_CODE = "-g" SPECIES_CODE = "s" FORM_CODE = "x" # - - - H i e r . n t h R a n k - - - def nthRank ( self, n ): """Return the (n)th rank, counting from zero """ return self.__rankList[n] # - - - H i e r . r a n k s - - - def ranks ( self ): """Return a list of Rank objects, largest to smallest """ return copy.copy ( self.__rankList ) # - - - H i e r . l o o k u p R a n k C o d e - - - def lookupRankCode ( self, c ): """Look up a rank code. Returns a Rank object """ return self.__rankMap[c] # May raise KeyError # - - - H i e r . g e n u s R a n k - - - def genusRank ( self ): """Returns the Rank object for genus rank, or None """ try: return self.__rankMap[self.GENUS_CODE] except: return None # - - - H i e r . s u b G e n u s R a n k - - - def subgenusRank ( self ): """Returns the Rank object for subgenus rank, or None """ try: return self.__rankMap[self.SUBGENUS_CODE] except: return None # - - - H i e r . s p e c i e s R a n k - - - def speciesRank ( self ): """Returns the Rank object for species rank, or None """ try: return self.__rankMap[self.SPECIES_CODE] except KeyError: return None # - - - H i e r . f o r m R a n k - - - def formRank ( self ): """Returns the Rank object for form rank, or None """ try: return self.__rankMap[self.FORM_CODE] except: return None # - - - H i e r . c a n P a r e n t H a v e C h i l d - - - def canParentHaveChild ( self, p, c ): """Can rank p have a chil of rank c? """ #-- 1 -- if p.depth >= c.depth: return 0 #-- 2 -- # [ if there is at least one rank m such that # p.depth < m.depth < c.depth and m is not optional -> # return 0 # else -> I ] for i in range ( p.depth + 1, c.depth ): #-- 1.1 -- # [ if self.__rankList[i] is not optional -> # return 0 # else -> I ] m = self.__rankList[i] if not m.isOptional: return 0 #-- 3 -- return 1 # - - - H i e r . s c a n T x K e y - - - def scanTxKey ( self, scan ): """See if scan starts with a valid taxonomic key string Note: returns the string, or None if not valid """ return scan.flatCset ( self.txKeyLen, digits ) # - - - H i e r . t x K e y R a n k - - - def txKeyRank ( self, txKey ): """Find the rank implied by the value of taxonomic key txKey """ #-- 1 -- # [ if any subfield of txKey is nonzero -> # return the depth corresponding to the rightmost nonzero # subfield of txKey # else -> I ] #--Note: Normally when iterating in the negative direction, the #--last argument to range() is -1. But the root rank does not #--have a subfield in a TxKey, so we stop before we check subfield 0. #-- 1 top -- # [ for i in [ self.nRanks-1, self.nRanks-2, ..., 1 ]: for i in range ( self.nRanks-1, 0, -1 ): #-- 1 body -- # [ if the (i)th subfield of txKey is zero -> I # else -> return self.nthRank(i) ] field = self.getField ( txKey, i ) if field > 0: return self.nthRank ( i ) #-- 2 -- return self.nthRank ( 0 ) # - - - H i e r . t x K e y S u b f i e l d - - - def txKeySubfield ( self, n ): """Find the slice of a txKey for the (n)th rank, counting from 0 """ return self.__fieldList[n] # - - - H i e r . g e t F i e l d - - - def getField ( self, txKey, n ): """Get the (n)th field of txKey as an integer """ start, end = self.txKeySubfield ( n ) return string.atoi ( txKey[start:end] ) # - - - H i e r . c l e a r F i e l d - - - def clearField ( self, txKey, n ): """Return txKey with the (n)th field set to zeroes """ start, end = self.txKeySubfield ( n ) newKey = ( txKey[:start] + # Part before field n ("0" * (end-start) ) + # Field n, zeroed txKey[end:] ) # Part after field n return newKey # - - - H i e r . _ _ i n i t _ _ - - - def __init__ ( self, ranksFileName=None ): """Constructor for the Hier object. """ #-- 1 -- # [ fileName := effective-ranks-file-name # errCount := current error count from Log() ] errCount = Log().count() if ranksFileName is not None: fileName = ranksFileName elif os.environ.has_key(ENV_NOMO): fileName = os.path.join ( os.environ[ENV_NOMO], DEFAULT_RANKS_NAME ) else: fileName = DEFAULT_RANKS_NAME #-- 2 -- # [ if (fileName names a readable, valid ranks file) -> # self.__rankList := a list of Rank objects in the same # order as lines in the file # else -> # Log() +:= error message(s) # raise HierError self.__readRanksFile ( fileName ) #-- 3 -- self.__rankMap = {} self.txKeyLen = 0 self.nRanks = len ( self.__rankList ) self.__fieldList = [] #-- 4 -- # [ self.txKeyLen +:= sum of R.keyLen for all R in self.__rankList # self.__fieldList +:= list of (x,y) tuples corresponding to the # ranks in self.__rankList such that for a # txKey T, T[x:y] is the field for the # corresponding rank # self.__rankMap +:= entries mapping R.code |-> R for # all R in self.__rankList #-] for i in range ( 0, self.nRanks ): #-- 4.1 -- # [ let # R==self.__rankList[i] # in: # self.txKeyLen +:= R.keyLen # self.__fieldList +:= tuple (K,K+L) where K is self.txKeyLen # and L is R.keyLen # self.__rankMap +:= entry mapping R.code |-> R # Log() +:= errors for duplicate rank codes, # if any rank = self.__rankList[i] self.__fieldList.append ( ( self.txKeyLen, self.txKeyLen + rank.keyLen ) ) self.txKeyLen = self.txKeyLen + rank.keyLen if self.__rankMap.has_key ( rank.code ): Log().error ( "Duplicate rank code %s" % rank.code ) Log().message ( "Other rank is %s" % str(self.__rankMap[rank.code]) ) else: self.__rankMap[rank.code] = rank # - - - H i e r . _ _ r e a d R a n k s F i l e - - - def __readRanksFile ( self, fileName ): """Read the file of taxonomic ranks [ if (ranksFileName names a readable, valid ranks file) -> self.__rankList := a list of Rank objects in the same order as lines in the file else -> Log() +:= error message(s) ] raise HierError """ #-- 1 -- self.__rankList = [] #-- 2 -- # [ if fileName names a readable file -> # scan := a new Scan object pointing to the start of that file # else -> # Log() +:= error message # return None ] try: scan = Scan ( fileName ) except IOError: Log().error ( "Can't open `%s' for reading." % fileName ) raise HierError, ( "Can't open the ranks file, `%s'." % fileName ) #-- 3 -- # [ self.__rankList +:= new Rank objects representing valid lines # from scan in the same order # scan +:= error message(s) from bad lines, if any ] while not scan.atEndFile: #-- 3.1 -- # [ if line in scan is a valid ranks line -> # scan := scan advanced to end of line # self.__rankList +:= a new Rank object with depth # len(self.__rankList) # else -> # scan := scan advanced past the valid part # scan +:= error message(s) ] self.__readRanksLine ( scan ) #-- 3.2 -- scan.nextLine() #-- 4 -- scan.close() # - - - H i e r . _ _ r e a d R a n k s L i n e - - - def __readRanksLine ( self, scan ): """Process one line of the ranks file [ if line in scan is a valid ranks line -> scan := scan advanced to end of line self.__rankList +:= a new Rank object with depth len(self.__rankList) else -> scan := scan advanced past the valid part scan +:= error message(s) ] """ #-- 1 -- # [ if line in scan starts with a field of size L_RANK_CODE -> # scan := scan advanced past that field # code := that field, with right blanks stripped # else -> # scan +:= error message # return None ] try: code = string.rstrip ( scan.move ( L_RANK_CODE ) ) except IndexError: scan.error ( "Expect rank code of length %d" % L_RANK_CODE ) return None #-- 2 -- # [ if line in scan starts with a space -> # scan := scan advanced one # isOptional := 0 # else if line in scan starts with "?" -> # scan := scan advanced one # isOptional := 1 # else -> # scan +:= error message # return None ] try: opt = scan.move ( 1 ) if opt == " ": isOptional = 0 elif opt == "?": isOptional = 1 else: scan.error ( "Expecting ' ' or '?'" ) return None except IndexError: scan.error ( "Expecting ' ' or '?' for optional" ) return None #-- 3 -- # [ if line in scan starts with a digit -> # scan := scan advanced 1 # keyLen := that digit, converted to integer # else -> # scan +:= error message # return None ] rawLen = scan.tabAny ( digits ) if not rawLen: scan.error ( "Expecting a 1-digit field length" ) return None keyLen = string.atoi ( rawLen ) #-- 4 -- # [ if line in scan is nonempty -> # scan := scan advanced to end of line # name := remainder of line # else -> # scan +:= error message # return None ] name = scan.tab ( -1 ) if len ( name ) < 1: scan.error ( "Expecting rank name" ) return None #-- 5 -- # [ self.__rankList +:= a new Rank with depth # (len(self.__rankList)), code, isOptional, # keyLen, and name ] rank = Rank ( self, len ( self.__rankList ), # depth code, isOptional, keyLen, name ) self.__rankList.append ( rank ) # - - - - - c l a s s R a n k - - - - - class Rank: "Represents one taxonomic rank. Examples: species; form; subgenus." # - - - R a n k . _ _ i n i t _ _ - - - def __init__ ( self, hier, depth, code, isOptional, keyLen, name ): """Constructor for Rank object. """ self.hier = hier self.depth = depth self.code = code self.isOptional = isOptional self.keyLen = keyLen self.name = name # - - - R a n k . _ _ s t r _ _ - - - def __str__ ( self ): """str() function for Rank object. """ return self.name # - - - R a n k . _ _ r e p r _ _ - - - def __repr__ ( self ): return str(self)