""" hier.py: Taxonomic hierarchy objects
$Revision: 1.9 $ $Date: 2000/07/16 23:02:42 $
This objects represents the ``ranks file'' used
by nombuild.icn; refer to the document ``A system for
representing taxonomic nomenclature'' for details.
Classes exported:
class HierError(Error): Our exception,
class Hier: Represents the selected set of taxonomic ranks
class Rank: Represents one taxonomic rank, e.g., family rank
Exported constants (synchronize with `nombuild' program):
L_RANK_CODE: Maximum length of a rank code
"""
#================================================================
# Imports
#----------------------------------------------------------------
import string, copy, os, sys
sys.path.insert(0, "/u/john/tcc/python/lib")
from log import *
from scan import *
from cset import *
#================================================================
# Verification functions
#----------------------------------------------------------------
# effective-ranks-file-name ( arg ) ==
# if arg is not None ->
# arg
# else if ENV_NOMO names a currently defined environmental
# variable ->
# (value of that environmental variable) + DEFAULT_RANKS_NAME
# else ->
# DEFAULT_RANKS_NAME
#----------------------------------------------------------------
#================================================================
# Manifest constants
#================================================================
# Environmental variables
#----------------------------------------------------------------
ENV_NOMO = "TXNY_HOME" # Names directory containing input files
#----------------------------------------------------------------
# Field lengths in the flat files
#----------------------------------------------------------------
L_RANK_CODE = 2 # Rank code length
DEFAULT_RANKS_NAME = "ranks" # Default ranks file name
# - - - - - c l a s s H i e r E r r o r - - - - -
class HierError(Exception): # Our generic exception
def __str__ ( self ):
return "*Hier error* " + Exception.__str__(self)
# - - - - - c l a s s H i e r - - - - -
class Hier:
"""Represents the taxonomic ranks of interest, largest to smallest.
Exports:
Hier ( ranksFileName=None )
[ if (ranksFileName is a string or None) ->
if (effective-ranks-file-name(ranksFileName) names
a readable, valid ranks file) ->
return a new Hier object representing that file
else ->
Log() +:= error message(s)
raise HierError ]
.nRanks [ number of ranks in self ]
.txKeyLen [ the length of the taxonomic key string ]
.nthRank ( n )
[ if n is an integer ->
if there are at least n+1 ranks in self ->
return self's (n)th rank, counting from zero
else -> raise IndexError ]
.ranks()
[ returns a new list of the Rank obs in self, from largest
to smallest ]
.lookupRankCode ( c )
[ if c is a string ->
if c is a rank code in self ->
return the Rank object for that code
else -> raise IndexError ]
.genusRank()
[ if self contains a genus rank -> return that Rank
else -> return None ]
.subgenusRank()
[ if self contains a subgenus rank -> return that Rank
else -> return None ]
.speciesRank()
[ if self contains a species rank -> return that Rank
else -> return None ]
.formRank()
[ if self contains a form rank -> return that Rank
else -> return None ]
.canParentHaveChild ( p, c )
[ if p and c are Rank objects ->
if p.depth >= c.depth ->
return 0
else if there is a rank m such that
p.depth < m.depth < c.depth and m is not optional ->
return 0
else -> return 1 ]
.scanTxKey ( scan )
[ if scan is a Scan object ->
if scan starts with a valid taxonomic key number for
self ->
scan := scan advanced past that number
return that number
else ->
scan +:= error message
return None ]
.txKeyRank ( self, txKey ):
[ if (txKey is a string) ->
if txKey is all zeroes ->
return the root Rank object from self
else ->
return the Rank object corresponding to the rightmost
nonzero subfield of txKey ]
.txKeySubfield ( n )
[ returns (x,y) where characters [x:y] of a taxonomic key
correspond to the rank with depth n ]
.getField ( txKey, n )
[ if (txKey is a TxKey string)
and (0 <= n < self.nRanks ) ->
return the (n)th field of txKey, counting from 0,
as an integer ]
.clearField ( txKey, n )
[ if (txKey is a TxKey string)
and (0 <= n < self.nRanks ) ->
return txKey with the (n)th field, counting from 0,
set to zeroes ]
State/Invariants:
.txKeyLen == sum of r.keyLen for all ranks r in self
.__rankList == list of Rank objects ordered from largest
.__fieldList == list of (x,y) tuples corresponding to the
ranks in self.__rankList such that for a txKey T,
T[x:y] is the field for the corresponding rank
.__rankMap == dictionary that maps each rank code |->
the corresponding Rank object
"""
GENUS_CODE = "g" # Hardwired rank codes
SUBGENUS_CODE = "-g"
SPECIES_CODE = "s"
FORM_CODE = "x"
# - - - H i e r . n t h R a n k - - -
def nthRank ( self, n ):
"""Return the (n)th rank, counting from zero
"""
return self.__rankList[n]
# - - - H i e r . r a n k s - - -
def ranks ( self ):
"""Return a list of Rank objects, largest to smallest
"""
return copy.copy ( self.__rankList )
# - - - H i e r . l o o k u p R a n k C o d e - - -
def lookupRankCode ( self, c ):
"""Look up a rank code. Returns a Rank object
"""
return self.__rankMap[c] # May raise KeyError
# - - - H i e r . g e n u s R a n k - - -
def genusRank ( self ):
"""Returns the Rank object for genus rank, or None
"""
try:
return self.__rankMap[self.GENUS_CODE]
except:
return None
# - - - H i e r . s u b G e n u s R a n k - - -
def subgenusRank ( self ):
"""Returns the Rank object for subgenus rank, or None
"""
try:
return self.__rankMap[self.SUBGENUS_CODE]
except:
return None
# - - - H i e r . s p e c i e s R a n k - - -
def speciesRank ( self ):
"""Returns the Rank object for species rank, or None
"""
try:
return self.__rankMap[self.SPECIES_CODE]
except KeyError:
return None
# - - - H i e r . f o r m R a n k - - -
def formRank ( self ):
"""Returns the Rank object for form rank, or None
"""
try:
return self.__rankMap[self.FORM_CODE]
except:
return None
# - - - H i e r . c a n P a r e n t H a v e C h i l d - - -
def canParentHaveChild ( self, p, c ):
"""Can rank p have a chil of rank c?
"""
#-- 1 --
if p.depth >= c.depth:
return 0
#-- 2 --
# [ if there is at least one rank m such that
# p.depth < m.depth < c.depth and m is not optional ->
# return 0
# else -> I ]
for i in range ( p.depth + 1, c.depth ):
#-- 1.1 --
# [ if self.__rankList[i] is not optional ->
# return 0
# else -> I ]
m = self.__rankList[i]
if not m.isOptional:
return 0
#-- 3 --
return 1
# - - - H i e r . s c a n T x K e y - - -
def scanTxKey ( self, scan ):
"""See if scan starts with a valid taxonomic key string
Note: returns the string, or None if not valid
"""
return scan.flatCset ( self.txKeyLen, digits )
# - - - H i e r . t x K e y R a n k - - -
def txKeyRank ( self, txKey ):
"""Find the rank implied by the value of taxonomic key txKey
"""
#-- 1 --
# [ if any subfield of txKey is nonzero ->
# return the depth corresponding to the rightmost nonzero
# subfield of txKey
# else -> I ]
#--Note: Normally when iterating in the negative direction, the
#--last argument to range() is -1. But the root rank does not
#--have a subfield in a TxKey, so we stop before we check subfield 0.
#-- 1 top --
# [ for i in [ self.nRanks-1, self.nRanks-2, ..., 1 ]:
for i in range ( self.nRanks-1, 0, -1 ):
#-- 1 body --
# [ if the (i)th subfield of txKey is zero -> I
# else -> return self.nthRank(i) ]
field = self.getField ( txKey, i )
if field > 0:
return self.nthRank ( i )
#-- 2 --
return self.nthRank ( 0 )
# - - - H i e r . t x K e y S u b f i e l d - - -
def txKeySubfield ( self, n ):
"""Find the slice of a txKey for the (n)th rank, counting from 0
"""
return self.__fieldList[n]
# - - - H i e r . g e t F i e l d - - -
def getField ( self, txKey, n ):
"""Get the (n)th field of txKey as an integer
"""
start, end = self.txKeySubfield ( n )
return string.atoi ( txKey[start:end] )
# - - - H i e r . c l e a r F i e l d - - -
def clearField ( self, txKey, n ):
"""Return txKey with the (n)th field set to zeroes
"""
start, end = self.txKeySubfield ( n )
newKey = ( txKey[:start] + # Part before field n
("0" * (end-start) ) + # Field n, zeroed
txKey[end:] ) # Part after field n
return newKey
# - - - H i e r . _ _ i n i t _ _ - - -
def __init__ ( self, ranksFileName=None ):
"""Constructor for the Hier object.
"""
#-- 1 --
# [ fileName := effective-ranks-file-name
# errCount := current error count from Log() ]
errCount = Log().count()
if ranksFileName is not None:
fileName = ranksFileName
elif os.environ.has_key(ENV_NOMO):
fileName = os.path.join ( os.environ[ENV_NOMO],
DEFAULT_RANKS_NAME )
else:
fileName = DEFAULT_RANKS_NAME
#-- 2 --
# [ if (fileName names a readable, valid ranks file) ->
# self.__rankList := a list of Rank objects in the same
# order as lines in the file
# else ->
# Log() +:= error message(s)
# raise HierError
self.__readRanksFile ( fileName )
#-- 3 --
self.__rankMap = {}
self.txKeyLen = 0
self.nRanks = len ( self.__rankList )
self.__fieldList = []
#-- 4 --
# [ self.txKeyLen +:= sum of R.keyLen for all R in self.__rankList
# self.__fieldList +:= list of (x,y) tuples corresponding to the
# ranks in self.__rankList such that for a
# txKey T, T[x:y] is the field for the
# corresponding rank
# self.__rankMap +:= entries mapping R.code |-> R for
# all R in self.__rankList
#-]
for i in range ( 0, self.nRanks ):
#-- 4.1 --
# [ let
# R==self.__rankList[i]
# in:
# self.txKeyLen +:= R.keyLen
# self.__fieldList +:= tuple (K,K+L) where K is self.txKeyLen
# and L is R.keyLen
# self.__rankMap +:= entry mapping R.code |-> R
# Log() +:= errors for duplicate rank codes,
# if any
rank = self.__rankList[i]
self.__fieldList.append ( ( self.txKeyLen,
self.txKeyLen + rank.keyLen ) )
self.txKeyLen = self.txKeyLen + rank.keyLen
if self.__rankMap.has_key ( rank.code ):
Log().error ( "Duplicate rank code %s" % rank.code )
Log().message ( "Other rank is %s" %
str(self.__rankMap[rank.code]) )
else:
self.__rankMap[rank.code] = rank
# - - - H i e r . _ _ r e a d R a n k s F i l e - - -
def __readRanksFile ( self, fileName ):
"""Read the file of taxonomic ranks
[ if (ranksFileName names a readable, valid ranks file) ->
self.__rankList := a list of Rank objects in the same
order as lines in the file
else ->
Log() +:= error message(s) ]
raise HierError
"""
#-- 1 --
self.__rankList = []
#-- 2 --
# [ if fileName names a readable file ->
# scan := a new Scan object pointing to the start of that file
# else ->
# Log() +:= error message
# return None ]
try:
scan = Scan ( fileName )
except IOError:
Log().error ( "Can't open `%s' for reading." % fileName )
raise HierError, ( "Can't open the ranks file, `%s'." %
fileName )
#-- 3 --
# [ self.__rankList +:= new Rank objects representing valid lines
# from scan in the same order
# scan +:= error message(s) from bad lines, if any ]
while not scan.atEndFile:
#-- 3.1 --
# [ if line in scan is a valid ranks line ->
# scan := scan advanced to end of line
# self.__rankList +:= a new Rank object with depth
# len(self.__rankList)
# else ->
# scan := scan advanced past the valid part
# scan +:= error message(s) ]
self.__readRanksLine ( scan )
#-- 3.2 --
scan.nextLine()
#-- 4 --
scan.close()
# - - - H i e r . _ _ r e a d R a n k s L i n e - - -
def __readRanksLine ( self, scan ):
"""Process one line of the ranks file
[ if line in scan is a valid ranks line ->
scan := scan advanced to end of line
self.__rankList +:= a new Rank object with depth
len(self.__rankList)
else ->
scan := scan advanced past the valid part
scan +:= error message(s) ]
"""
#-- 1 --
# [ if line in scan starts with a field of size L_RANK_CODE ->
# scan := scan advanced past that field
# code := that field, with right blanks stripped
# else ->
# scan +:= error message
# return None ]
try:
code = string.rstrip ( scan.move ( L_RANK_CODE ) )
except IndexError:
scan.error ( "Expect rank code of length %d" % L_RANK_CODE )
return None
#-- 2 --
# [ if line in scan starts with a space ->
# scan := scan advanced one
# isOptional := 0
# else if line in scan starts with "?" ->
# scan := scan advanced one
# isOptional := 1
# else ->
# scan +:= error message
# return None ]
try:
opt = scan.move ( 1 )
if opt == " ":
isOptional = 0
elif opt == "?":
isOptional = 1
else:
scan.error ( "Expecting ' ' or '?'" )
return None
except IndexError:
scan.error ( "Expecting ' ' or '?' for optional" )
return None
#-- 3 --
# [ if line in scan starts with a digit ->
# scan := scan advanced 1
# keyLen := that digit, converted to integer
# else ->
# scan +:= error message
# return None ]
rawLen = scan.tabAny ( digits )
if not rawLen:
scan.error ( "Expecting a 1-digit field length" )
return None
keyLen = string.atoi ( rawLen )
#-- 4 --
# [ if line in scan is nonempty ->
# scan := scan advanced to end of line
# name := remainder of line
# else ->
# scan +:= error message
# return None ]
name = scan.tab ( -1 )
if len ( name ) < 1:
scan.error ( "Expecting rank name" )
return None
#-- 5 --
# [ self.__rankList +:= a new Rank with depth
# (len(self.__rankList)), code, isOptional,
# keyLen, and name ]
rank = Rank ( self,
len ( self.__rankList ), # depth
code, isOptional, keyLen, name )
self.__rankList.append ( rank )
# - - - - - c l a s s R a n k - - - - -
class Rank:
"""Represents one taxonomic rank. Examples: species; form; subgenus
Exports:
Rank ( hier, depth, code, isOptional, keyLen, name )
[ if (hier is the containing Hier object)
and (depth is a nonnegative integer representing the distance
of this rank below root, root being 0)
and (code is a string not exceeding L_RANK_CODE in length)
and (isOptional is 0 for required taxa and 1 for optional ones)
and (keyLen is a nonnegative integer giving the number of
digits in a taxonomic key for this rank)
and (name is the name of the rank) ->
return a new Rank object with those values ]
.hier [ as passed to constructor ]
.depth [ as passed to constructor ]
.code [ self's code, right-trimmed ]
.isOptional
[ if self is an optional rank -> 1
else -> 0 ]
.keyLen [ the length of self's part of a txKey ]
.name [ self's name, e.g., "Subfamily" ]
str() [ returns a string representing self ]
"""
# - - - R a n k . _ _ i n i t _ _ - - -
def __init__ ( self, hier, depth, code, isOptional, keyLen, name ):
"""Constructor for Rank object.
"""
self.hier = hier
self.depth = depth
self.code = code
self.isOptional = isOptional
self.keyLen = keyLen
self.name = name
# - - - R a n k . _ _ s t r _ _ - - -
def __str__ ( self ):
"""str() function for Rank object.
"""
return self.name
# - - - R a n k . _ _ r e p r _ _ - - -
def __repr__ ( self ):
return str(self)