Next / Previous / TCC home / NMT home

Source listing for hier.py

logo
""" hier.py:  Taxonomic hierarchy objects

    $Revision: 1.9 $ $Date: 2000/07/16 23:02:42 $

    This objects represents the ``ranks file'' used
    by nombuild.icn; refer to the document ``A system for
    representing taxonomic nomenclature'' for details.

    Classes exported:
        class HierError(Error):  Our exception, 
        class Hier: Represents the selected set of taxonomic ranks
        class Rank: Represents one taxonomic rank, e.g., family rank

    Exported constants (synchronize with `nombuild' program):
        L_RANK_CODE: Maximum length of a rank code
"""

#================================================================
# Imports
#----------------------------------------------------------------

import string, copy, os, sys

sys.path.insert(0, "/u/john/tcc/python/lib")

from log import *
from scan import *
from cset import *

#================================================================
# Verification functions
#----------------------------------------------------------------
# effective-ranks-file-name ( arg ) ==
#   if arg is not None ->
#     arg
#   else if ENV_NOMO names a currently defined environmental
#   variable ->
#     (value of that environmental variable) + DEFAULT_RANKS_NAME
#   else ->
#     DEFAULT_RANKS_NAME
#----------------------------------------------------------------


#================================================================
# Manifest constants
#================================================================
# Environmental variables
#----------------------------------------------------------------

ENV_NOMO  =  "TXNY_HOME"    # Names directory containing input files

#----------------------------------------------------------------
# Field lengths in the flat files
#----------------------------------------------------------------

L_RANK_CODE  =  2               # Rank code length
DEFAULT_RANKS_NAME  =  "ranks"  # Default ranks file name


# - - - - -   c l a s s   H i e r E r r o r   - - - - -

class HierError(Exception):     # Our generic exception
    def __str__ ( self ):
        return "*Hier error* " + Exception.__str__(self)


# - - - - -   c l a s s   H i e r   - - - - -

class Hier:
    """Represents the taxonomic ranks of interest, largest to smallest.

        Exports:
            Hier ( ranksFileName=None )
                [ if (ranksFileName is a string or None) ->
                    if (effective-ranks-file-name(ranksFileName) names
                    a readable, valid ranks file) ->
                      return a new Hier object representing that file
                    else ->
                      Log()  +:=  error message(s)
                      raise HierError ]
            .nRanks     [ number of ranks in self ]
            .txKeyLen   [ the length of the taxonomic key string ]
            .nthRank ( n )
              [ if n is an integer ->
                  if there are at least n+1 ranks in self ->
                    return self's (n)th rank, counting from zero
                  else -> raise IndexError ]
            .ranks()
              [ returns a new list of the Rank obs in self, from largest
                to smallest ]
            .lookupRankCode ( c )
              [ if c is a string ->
                  if c is a rank code in self ->
                    return the Rank object for that code
                  else -> raise IndexError ]
            .genusRank()
              [ if self contains a genus rank -> return that Rank
                else -> return None ]
            .subgenusRank()
              [ if self contains a subgenus rank -> return that Rank
                else -> return None ]
            .speciesRank()
              [ if self contains a species rank -> return that Rank
                else -> return None ]
            .formRank()
              [ if self contains a form rank -> return that Rank
                else -> return None ]
            .canParentHaveChild ( p, c )
              [ if p and c are Rank objects ->
                  if p.depth >= c.depth ->
                    return 0
                  else if there is a rank m such that
                  p.depth < m.depth < c.depth and m is not optional ->
                    return 0
                  else -> return 1 ]
            .scanTxKey ( scan )
              [ if scan is a Scan object ->
                  if scan starts with a valid taxonomic key number for
                  self ->
                    scan  :=  scan advanced past that number
                    return that number
                  else ->
                    scan  +:=  error message
                    return None ]
            .txKeyRank ( self, txKey ):
              [ if (txKey is a string) ->
                  if txKey is all zeroes ->
                    return the root Rank object from self
                  else ->
                    return the Rank object corresponding to the rightmost
                    nonzero subfield of txKey ]
            .txKeySubfield ( n )
              [ returns (x,y) where characters [x:y] of a taxonomic key
                correspond to the rank with depth n ]
            .getField ( txKey, n )
              [ if (txKey is a TxKey string)
                and (0 <= n < self.nRanks ) ->
                  return the (n)th field of txKey, counting from 0,
                  as an integer ]
            .clearField ( txKey, n )
              [ if (txKey is a TxKey string)
                and (0 <= n < self.nRanks ) ->
                  return txKey with the (n)th field, counting from 0,
                  set to zeroes ]

        State/Invariants:
            .txKeyLen == sum of r.keyLen for all ranks r in self
            .__rankList == list of Rank objects ordered from largest
            .__fieldList == list of (x,y) tuples corresponding to the
              ranks in self.__rankList such that for a txKey T,
              T[x:y] is the field for the corresponding rank
            .__rankMap == dictionary that maps each rank code |->
              the corresponding Rank object
    """
    GENUS_CODE          =  "g"      # Hardwired rank codes
    SUBGENUS_CODE       =  "-g"
    SPECIES_CODE        =  "s"
    FORM_CODE           =  "x"


# - - -   H i e r  . n t h R a n k   - - -

    def nthRank ( self, n ):
        """Return the (n)th rank, counting from zero
        """
        return self.__rankList[n]


# - - -   H i e r  . r a n k s   - - -

    def ranks ( self ):
        """Return a list of Rank objects, largest to smallest
        """
        return copy.copy ( self.__rankList )


# - - -   H i e r  . l o o k u p R a n k C o d e   - - -

    def lookupRankCode ( self, c ):
        """Look up a rank code.  Returns a Rank object
        """
        return self.__rankMap[c]    # May raise KeyError


# - - -   H i e r  . g e n u s R a n k   - - -

    def genusRank ( self ):
        """Returns the Rank object for genus rank, or None
        """
        try:
            return self.__rankMap[self.GENUS_CODE]
        except:
            return None


# - - -   H i e r  . s u b G e n u s R a n k   - - -

    def subgenusRank ( self ):
        """Returns the Rank object for subgenus rank, or None
        """
        try:
            return self.__rankMap[self.SUBGENUS_CODE]
        except:
            return None


# - - -   H i e r  . s p e c i e s R a n k   - - -

    def speciesRank ( self ):
        """Returns the Rank object for species rank, or None
        """
        try:
            return self.__rankMap[self.SPECIES_CODE]
        except KeyError:
            return None


# - - -   H i e r  . f o r m R a n k   - - -

    def formRank ( self ):
        """Returns the Rank object for form rank, or None
        """
        try:
            return self.__rankMap[self.FORM_CODE]
        except:
            return None


# - - -   H i e r  . c a n P a r e n t H a v e C h i l d   - - -

    def canParentHaveChild ( self, p, c ):
        """Can rank p have a chil of rank c?
        """
        #-- 1 --
        if  p.depth >= c.depth:
            return 0

        #-- 2 --
        # [ if there is at least one rank m such that
        #   p.depth < m.depth < c.depth and m is not optional ->
        #     return 0
        #   else -> I ]
        for i in range ( p.depth + 1, c.depth ):

            #-- 1.1 --
            # [ if self.__rankList[i] is not optional ->
            #     return 0
            #   else -> I ]
            m  =  self.__rankList[i]

            if  not m.isOptional:
                return 0

        #-- 3 --
        return 1


# - - -   H i e r  . s c a n T x K e y   - - -

    def scanTxKey ( self, scan ):
        """See if scan starts with a valid taxonomic key string
            Note: returns the string, or None if not valid
        """
        return scan.flatCset ( self.txKeyLen, digits )


# - - -   H i e r . t x K e y R a n k   - - -

    def txKeyRank ( self, txKey ):
        """Find the rank implied by the value of taxonomic key txKey
        """
        #-- 1 --
        # [ if any subfield of txKey is nonzero ->
        #     return the depth corresponding to the rightmost nonzero
        #     subfield of txKey
        #   else -> I ]
        #--Note: Normally when iterating in the negative direction, the
        #--last argument to range() is -1.  But the root rank does not
        #--have a subfield in a TxKey, so we stop before we check subfield 0.

        #-- 1 top --
        # [ for i in [ self.nRanks-1, self.nRanks-2, ..., 1 ]:
        for i in range ( self.nRanks-1, 0, -1 ):

            #-- 1 body --
            # [ if the (i)th subfield of txKey is zero -> I
            #   else -> return self.nthRank(i) ]

            field  =  self.getField ( txKey, i )

            if  field > 0:
                return self.nthRank ( i )

        #-- 2 --
        return self.nthRank ( 0 )


# - - -   H i e r . t x K e y S u b f i e l d   - - -

    def txKeySubfield ( self, n ):
        """Find the slice of a txKey for the (n)th rank, counting from 0
        """
        return self.__fieldList[n]


# - - -   H i e r  . g e t F i e l d   - - -

    def getField ( self, txKey, n ):
        """Get the (n)th field of txKey as an integer
        """
        start, end  =  self.txKeySubfield ( n )
        return string.atoi ( txKey[start:end] )


# - - -   H i e r  . c l e a r F i e l d   - - -

    def clearField ( self, txKey, n ):
        """Return txKey with the (n)th field set to zeroes
        """
        start, end  =  self.txKeySubfield ( n )
        newKey  =  ( txKey[:start] +            # Part before field n
                     ("0" * (end-start) ) +     # Field n, zeroed
                     txKey[end:] )              # Part after field n
        return newKey


# - - -   H i e r  . _ _ i n i t _ _   - - -

    def __init__ ( self, ranksFileName=None ):
        """Constructor for the Hier object.
        """

        #-- 1 --
        # [ fileName  :=  effective-ranks-file-name
        #   errCount  :=  current error count from Log() ]
        errCount  =  Log().count()
        
        if  ranksFileName is not None:
            fileName  =  ranksFileName
        elif os.environ.has_key(ENV_NOMO):
            fileName  =  os.path.join ( os.environ[ENV_NOMO],
                                        DEFAULT_RANKS_NAME )
        else:
            fileName  =  DEFAULT_RANKS_NAME

        #-- 2 --
        # [ if (fileName names a readable, valid ranks file) ->
        #     self.__rankList  :=  a list of Rank objects in the same
        #                          order as lines in the file
        #   else ->
        #     Log()  +:=  error message(s)
        #     raise HierError
        self.__readRanksFile ( fileName )

        #-- 3 --
        self.__rankMap    =  {}
        self.txKeyLen     =  0
        self.nRanks       =  len ( self.__rankList )
        self.__fieldList  =  []

        #-- 4 --
        # [ self.txKeyLen     +:=  sum of R.keyLen for all R in self.__rankList
        #   self.__fieldList  +:=  list of (x,y) tuples corresponding to the
        #                          ranks in self.__rankList such that for a
        #                          txKey T, T[x:y] is the field for the
        #                          corresponding rank
        #   self.__rankMap    +:=  entries mapping R.code |-> R for
        #                          all R in self.__rankList
        #-]
        for i in range ( 0, self.nRanks ):
            #-- 4.1 --
            # [ let
            #     R==self.__rankList[i]
            #   in:
            #     self.txKeyLen     +:=  R.keyLen
            #     self.__fieldList  +:=  tuple (K,K+L) where K is self.txKeyLen
            #                            and L is R.keyLen
            #     self.__rankMap    +:=  entry mapping R.code |-> R
            #     Log()             +:=  errors for duplicate rank codes,
            #                            if any
            rank  =  self.__rankList[i]
            self.__fieldList.append ( ( self.txKeyLen,
                                        self.txKeyLen + rank.keyLen ) )
            self.txKeyLen  =  self.txKeyLen + rank.keyLen

            if self.__rankMap.has_key ( rank.code ):
                Log().error ( "Duplicate rank code %s" % rank.code )
                Log().message ( "Other rank is %s" %
                                str(self.__rankMap[rank.code]) )
            else:
                self.__rankMap[rank.code]  =  rank


# - - -   H i e r  . _ _ r e a d R a n k s F i l e   - - -

    def __readRanksFile ( self, fileName ):
        """Read the file of taxonomic ranks
             [ if (ranksFileName names a readable, valid ranks file) ->
                 self.__rankList  :=  a list of Rank objects in the same
                                      order as lines in the file
               else ->
                 Log()  +:=  error message(s) ]
                 raise HierError
        """

        #-- 1 --
        self.__rankList  =  []

        #-- 2 --
        # [ if fileName names a readable file ->
        #     scan  :=  a new Scan object pointing to the start of that file
        #   else ->
        #     Log()  +:=  error message
        #     return None ]
        try:
            scan  =  Scan ( fileName )
        except IOError:
            Log().error ( "Can't open `%s' for reading." % fileName )
            raise HierError, ( "Can't open the ranks file, `%s'." %
                               fileName )

        #-- 3 --
        # [ self.__rankList  +:=  new Rank objects representing valid lines
        #                         from scan in the same order
        #   scan             +:=  error message(s) from bad lines, if any ]
        while not scan.atEndFile:
            #-- 3.1 --
            # [ if line in scan is a valid ranks line ->
            #     scan             :=   scan advanced to end of line
            #     self.__rankList  +:=  a new Rank object with depth
            #                           len(self.__rankList)
            #   else ->
            #     scan  :=   scan advanced past the valid part
            #     scan  +:=  error message(s) ]
            self.__readRanksLine ( scan )

            #-- 3.2 --
            scan.nextLine()

        #-- 4 --
        scan.close()


# - - -   H i e r  . _ _ r e a d R a n k s L i n e   - - -

    def __readRanksLine ( self, scan ):
        """Process one line of the ranks file
            [ if line in scan is a valid ranks line ->
                scan             :=   scan advanced to end of line
                self.__rankList  +:=  a new Rank object with depth
                                      len(self.__rankList)
              else ->
                scan  :=   scan advanced past the valid part
                scan  +:=  error message(s) ]
        """

        #-- 1 --
        # [ if line in scan starts with a field of size L_RANK_CODE ->
        #     scan  :=  scan advanced past that field
        #     code  :=  that field, with right blanks stripped
        #   else ->
        #     scan  +:=  error message
        #     return None ]
        try:
            code  =  string.rstrip ( scan.move ( L_RANK_CODE ) )
        except IndexError:
            scan.error ( "Expect rank code of length %d" % L_RANK_CODE )
            return None

        #-- 2 --
        # [ if line in scan starts with a space ->
        #     scan        :=  scan advanced one
        #     isOptional  :=  0
        #   else if line in scan starts with "?" ->
        #     scan        :=  scan advanced one
        #     isOptional  :=  1
        #   else ->
        #     scan  +:=  error message
        #     return None ]
        try:
            opt  =  scan.move ( 1 )
            if  opt == " ":
                isOptional  =  0
            elif  opt == "?":
                isOptional  =  1
            else:
                scan.error ( "Expecting ' ' or '?'" )
                return None
        except IndexError:
            scan.error ( "Expecting ' ' or '?' for optional" )
            return None

        #-- 3 --
        # [ if line in scan starts with a digit ->
        #     scan    :=  scan advanced 1
        #     keyLen  :=  that digit, converted to integer
        #   else ->
        #     scan  +:=  error message
        #     return None ]
        rawLen  =  scan.tabAny ( digits )
        if  not rawLen:
            scan.error ( "Expecting a 1-digit field length" )
            return None
        keyLen  =  string.atoi ( rawLen )

        #-- 4 --
        # [ if line in scan is nonempty ->
        #     scan  :=  scan advanced to end of line
        #     name  :=  remainder of line
        #   else ->
        #     scan  +:=  error message
        #     return None ]
        name  =  scan.tab ( -1 )
        if  len ( name ) < 1:
            scan.error ( "Expecting rank name" )
            return None

        #-- 5 --
        # [ self.__rankList  +:=  a new Rank with depth
        #                         (len(self.__rankList)), code, isOptional,
        #                         keyLen, and name ]
        rank   =  Rank ( self,
                         len ( self.__rankList ),       # depth
                         code, isOptional, keyLen, name )
        self.__rankList.append ( rank )


# - - - - -   c l a s s  R a n k   - - - - -

class Rank:
    """Represents one taxonomic rank.  Examples: species; form; subgenus

      Exports:
        Rank ( hier, depth, code, isOptional, keyLen, name )
          [ if (hier is the containing Hier object)
            and (depth is a nonnegative integer representing the distance
            of this rank below root, root being 0)
            and (code is a string not exceeding L_RANK_CODE in length)
            and (isOptional is 0 for required taxa and 1 for optional ones)
            and (keyLen is a nonnegative integer giving the number of
            digits in a taxonomic key for this rank)
            and (name is the name of the rank) ->
              return a new Rank object with those values ]
        .hier   [ as passed to constructor ]
        .depth  [ as passed to constructor ]
        .code   [ self's code, right-trimmed ]
        .isOptional
          [ if self is an optional rank -> 1
            else -> 0 ]
        .keyLen  [ the length of self's part of a txKey ]
        .name    [ self's name, e.g., "Subfamily" ]
        str()    [ returns a string representing self ]
    """


# - - -   R a n k  . _ _ i n i t _ _   - - -

    def __init__ ( self, hier, depth, code, isOptional, keyLen, name ):
        """Constructor for Rank object.
        """
        self.hier        =  hier
        self.depth       =  depth
        self.code        =  code
        self.isOptional  =  isOptional
        self.keyLen      =  keyLen
        self.name        =  name


# - - -   R a n k  . _ _ s t r _ _   - - -

    def __str__ ( self ):
        """str() function for Rank object.
        """
        return self.name


# - - -   R a n k . _ _ r e p r _ _   - - -

    def __repr__ ( self ):
        return str(self)

TCC home: TCC home
NMT home: NMT home

Last updated: 2014-09-19 18:03 MDT