Next / Previous / TCC home / NMT home

Source listing for txny.py

logo
""" txny.py:  Bird taxonomy objects in Python (MySQL version)

    $Revision: 1.24 $ $Date: 2000/10/07 07:19:54 $

    These objects represent the taxonomy files generated
    by nombuild.icn; refer to the document ``A system for
    representing taxonomic nomenclature'' for details.

    Classes exported:
        Txny: Represents the entire taxonomy
        Taxon: Represents one taxon, e.g., _Hirundo rustica_,
            the barn swallow
        AbbrDef: Represents one abbreviation, the English name
            from which it was derived, and the scientific name
            to which it is referred
    Pseudo-classes: These are not Python classes, they are
    just strings with constraints on their content.
        TxKey: Represents a key number that can be used to sort
            data phylogenetically.  Must be a string of digits
            whose length is (Hier.txKeyLen).
        Abbr: A standard bird abbreviation; a string of no more
            than L_ABBR characters, right-blank-padded to size,
            and uppercased.  Example: "KAUOO " == Kauai Oo.

    Exported constants (synchronize with `nombuild' program):
        L_SCI: Maximum length of a scientific name
        L_ENG: Maximum length of an English name
        L_TEX: Maximum length of a TeX-encoded English name
        L_ABBR: Length of a bird code
"""

#================================================================
# IMPORTS
#----------------------------------------------------------------
# Standard Python modules
#----------------------------------------------------------------

import sys, string, copy         # Standard Python modules

#----------------------------------------------------------------
# Shipman's standard library
#----------------------------------------------------------------

sys.path.insert(0, "/u/john/tcc/python/lib")

from log import *           # Error logging module
from scan import *          # Stream scanning module
import cset                 # Character set type
from my_db import *         # MySQL schema layer

#----------------------------------------------------------------
# Subsidiary modules
#----------------------------------------------------------------

from hier import *          # Taxonomic rank hierarchy module


#================================================================
# MANIFEST CONSTANTS
#================================================================
# Field lengths in the flat files
#----------------------------------------------------------------

L_SCI        =  36          # Scientific name field length
L_ENG        =  56          # English name field length
L_TEX        =  80          # TeX English name field length
L_ABBR       =  6           # Standard bird code length
C_ABBR       =  cset.letters.union(Cset("/ "))      # Cset for bird codes
C_BLANK      =  cset.Cset(" ")      # Cset for spaces
C_STATUS     =  cset.Cset(" ?+")    # Cset for tree file's status code


#----------------------------------------------------------------
# Constants related to databases
#-----------------------------------------------------------------

DB_OPTIONS = { "user":   "john",
               "passwd": "abracadabra",
               "host":   "crayola",
               "db":     "john" }

#================================================================
# SCHEMA
#----------------------------------------------------------------
# Prefix conventions:
#   T_      Names of tables
#   F_      Names of fields (columns)
#   L_      Lengths of fields
#   X_      File extension for this table in external form
#----------------------------------------------------------------
# Tree table: represents the taxonomic tree.  The length of the
# F_TX_KEY column is derived from Hier.txKeyLen.
#----------------------------------------------------------------

T_TREE                  =  "tree"
F_TX_KEY                =  "tx_key"         # Taxonomic key
F_STD_ABBR, L_ABBR      =  "std_abbr", 6    # Standard bird code
F_STATUS                =  "status"         # Status, '?' if not in AOU
F_SCI,      L_SCI       =  "sci", 36        # Scientific name
F_ENG,      L_ENG       =  "eng", 56        # English name
F_TEX,      L_TEX       =  "tex", 80        # TeX English name
X_TREE                  =  ".tre"

#----------------------------------------------------------------
# Abbreviations table: For looking up any bird code
#----------------------------------------------------------------

T_AB6       =  "abbr"
F_ABBR      =  "ab6"        # Abbreviation (bird code); length L_ABBR
# F_SCI, L_SCI as in tree table, and this column is a relation to same
F_ALT_ENG   =  "alt_eng"    # English name for this abbreviation
X_AB6       =  ".ab6"

#----------------------------------------------------------------
# Collisions table: for documenting collisions in the code system
#----------------------------------------------------------------

T_COLL        =  "coll"
F_BAD_ABBR    =  "bad_abbr"       # The invalid code
F_GOOD_ABBR   =  "good_abbr"      # One of the valid alternatives
X_COLL        =  ".col"


# - - - - -   c l a s s   T x n y E r r o r   - - - - -

class TxnyError(Exception):     # Our generic exception
    def __str__ ( self ):
        return "*Txny error* " + Exception.__str__(self)


#================================================================
# Verification functions
#================================================================
# abbr-file(baseName) ==
#   the file named (baseName + X_AB6)
#----------------------------------------------------------------
# coll-file(baseName) ==
#   the file named (baseName + X_COLL)
#----------------------------------------------------------------
# tree-file(baseName) ==
#   the file named (baseName + X_TREE)
#----------------------------------------------------------------




# - - - - -   c l a s s   T x n y   - - -

class Txny:
    """Represents an entire taxonomic tree, e.g., Class Aves.

      Exports:
        Txny ( hier ):
            [ if (hier is the Hier object defining the rank hierarchy
              used in TheDatabase(DB_OPTIONS)
              and (TheDatabase(DB_OPTIONS) can be opened) ->
                  return a new Txny object representing that hierarchy
                  and database
                else ->
                  Log()  +:=  error message(s)
                  raise TxnyError ]
        .hier       [ as passed to the constructor ]
        .root():
          [ return a Taxon object representing the root taxon ]
        .lookupTxKey ( txKey ):
          [ if (txKey is a string) ->
              if (txKey matches the taxonomic key of a taxon in self) ->
                return the corresponding Taxon
              else -> return None ]
        .lookupSci ( sci ):
          [ if (sci is a string) ->
              if (sci is the scientific name of a taxon in self) ->
                return the Taxon with that name
              else -> return None ]
        .lookupAbbr ( abbr ):
          [ if (abbr is a string) ->
              if (abbr is the abbr for a taxon in self, case-insensitive) ->
                return the Taxon to which that abbr is referred
              else -> return None ]
        .lookupAbbrDef ( abbr ):
          [ if (abbr is a string) ->
              if (abbr is the abbr for a taxon in self) ->
                return the AbbrDef for that abbr
              else -> return None ]
        .lookupCollision ( abbr ):
          [ if (abbr is a string) ->
              if (abbr is a collision abbr in self) ->
                return a list of the preferred abbrs for that collision
              else -> return None ]
        .allTxKeys():
          [ returns a list of all txKey strings in self, in
            ascending (phylogenetic) order ]
        .allAbbrs():
          [ returns a list of all abbr strings in self, in
            ascending order ]
        .create():
          [ if self's databases do not exist ->
              create them ]
        .drop():
          [ if self's databases exist ->
              delete them ]
        .reload(baseName):
          [ if baseName is the base file name for a set of readable,
            valid flat files as built by nombuild.icn ->
              self  :=  self reloaded from those files
            else -> TxnyError ]

      State:
        .db
          [ TheDatabase(DB_OPTIONS) ]
        .treeTable
          [ a Table object representing the taxonomic tree table ]
        .abbrTable
          [ a Table object representing the abbreviations table ]
        .collTable
          [ a Table object representing the collisions table ]

    """


# - - -   T x n y . _ _ i n i t _ _   - - -

    def __init__ ( self, hier ):
        """Constructor for a Txny object.
        """
        #-- 1 --
        self.hier  =  hier

        #-- 2 --
        # [ if TheDatabase(DB_OPTIONS) can be opened ->
        #     self.db  :=  TheDatabase(DB_OPTIONS)
        #   else ->
        #     raise TxnyError ]
        try:
            self.db  =  TheDatabase(DB_OPTIONS)
        except DBHelpersError, detail:
            raise TxnyError, ( "Could not open database.\n%s" %
                               str(detail) )

        #-- 3 --
        # [ self.treeTable  :=  a Table representing the tree table
        #                       in self.db ]
        self.treeTable  =  TreeTable ( self )

        #-- 4 --
        # [ self.abbrTable  :=  a Table representing the abbreviations
        #                       table in self.db ]
        self.abbrTable  =  AbbrTable ( self )

        #-- 5 --
        # [ self.collTable  :=  a Table representing the collisions
        #                       table in self.db ]
        self.collTable  =  CollTable ( self )


# - - -   T x n y . r o o t   - - -

    def root ( self ):
        """Return the root taxon
        """
        rootKey  =  "0" * self.hier.txKeyLen
        return self.treeTable.lookupTxKey ( rootKey )


# - - -   T x n y . l o o k u p T x K e y   - - -

    def lookupTxKey ( self, txKey ):
        """Find the taxon for a given taxonomic key.
        """
        return self.treeTable.lookupTxKey ( txKey )


# - - -   T x n y . l o o k u p S c i   - - -

    def lookupSci ( self, sci ):
        """Find the Taxon for a given scientific name.
        """
        return self.treeTable.lookupSci ( sci )


# - - -   T x n y . l o o k u p A b b r   - - -

    def lookupAbbr ( self, abbr ):
        """Find the Taxon object corresponding to a given bird code
        """
        #-- 1 --
        # [ if abbr is in self.abbrTable ->
        #     abbrDef  :=  a new AbbrDef object for that code
        #   else ->
        #     abbrDef  :=  None ]
        abbrDef  =  self.abbrTable.lookupAbbrDef ( abbr )

        #-- 2 --
        if  abbrDef is None:
            return None

        #-- 3 --
        # [ if abbrDef.sci is defined in self.treeTable ->
        #     taxon  :=  the Taxon object for that scientific name
        #   else ->
        #     taxon  :=  None ]
        taxon  =  self.lookupSci ( abbrDef.sci )

        #-- 4 --
        return taxon


# - - -   T x n y . l o o k u p A b b r D e f   - - -

    def lookupAbbrDef ( self, abbr ):
        """Find the AbbrDef for a given Abbr
        """
        return self.abbrTable.lookupAbbrDef ( abbr )


# - - -   T x n y . l o o k u p C o l l i s i o n   - - -

    def lookupCollision ( self, abbr ):
        """Check to see if `abbr' is a prohibited collision code.
        """

        #-- 1 --
        # [ if abbr is a collision in self.collTable ->
        #     result  :=  list of valid alternate codes
        #   else ->
        #     result  :=  an empty list ]
        result  =  self.collTable.lookupColl ( abbr )

        #-- 2 --
        if  len ( result ) == 0:
            return None
        else:
            return result

# - - -   T x n y . a l l T x K e y s   - - -

    def allTxKeys ( self ):
        """Return all taxonomic keys in ascending order
        """
        return  self.treeTable.allTxKeys()


# - - -   T x n y . a l l A b b r s   - - -

    def allAbbrs ( self ):
        """Return all abbrs in ascending order
        """
        return  self.abbrTable.allAbbrs()


# - - -   T x n y . c r e a t e   - - -

    def create ( self ):
        """Actually create self's databases
        """
        if  not self.treeTable.exists():
            self.treeTable.create()

        if  not self.abbrTable.exists():
            self.abbrTable.create()

        if  not self.collTable.exists():
            self.collTable.create()


# - - -   T x n y . d r o p   - - -

    def drop ( self ):
        """Drop self's tables
        """
        if  self.treeTable.exists():
            self.treeTable.drop()

        if  self.abbrTable.exists():
            self.abbrTable.drop()

        if  self.collTable.exists():
            self.collTable.drop()


# - - -   T x n y . r e l o a d   - - -

    def reload ( self, fileName ):
        """Reload self from flat files
        """

        #-- 1 --
        # [ if tree-file(fileName) is a readable, valid tree file ->
        #     self.treeTable  :=  self.treeTable reloaded from file
        #   else ->
        #     raise TxnyError ]
        self.treeTable.reload ( fileName + X_TREE )

        #-- 2 --
        # [ if abbr-file(fileName) is a readable, valid abbr file ->
        #     self.abbrTable  :=  self.abbrTable reloaded from file
        #   else ->
        #     raise TxnyError ]
        self.abbrTable.reload ( fileName + X_AB6 )

        #-- 3 --
        # [ if coll-file(fileName) is a readable, valid collisions
        #   file ->
        #     self.collTable  :=  self.collTable reloaded from that file
        #   else ->
        #     raise TxnyError ]
        self.collTable.reload ( fileName + X_COLL )


# - - - - -   c l a s s   T r e e T a b l e   - - - - -

class TreeTable(Table):
    """Represents the database table for the tree file from nombuild.icn

      Exports (in addition to parent class):
        TreeTable ( txny ):
          [ if (txny is a Txny object) ->
              self  :=  a Table object representing TREE_TABLE in self.db
                        with hierarchy (txny.hier) ]
        .txny       [ as passed to constructor ]
        .lookupTxKey ( txKey ):
          [ if (txKey is a string) ->
              if (self's table has a row whose tx_key field makes txKey) ->
                return a Taxon object representing that row
              else -> return None ]            
        .lookupSci ( sci ):
          [ if sci is a string ->
              if sci matches a scientific name in self ->
                return the Taxon object for that scientific name
              else -> return None ]
        .allTxKeys ( ):
          [ return all the tx_key fields in self, in ascending order ]
        .reload ( fileName ):
          [ if fileName is a string ->
              if fileName names a readable, valid .tre file ->
                self  :=  self - (all existing contents) + (contents of
                          fileName)
              else ->
                raise TxnyError ]
    """


# - - -   T r e e T a b l e . _ _ i n i t _ _   - - -

    def __init__ ( self, txny ):
        """Constructor for a TreeTable
        """
        columns  =  [ 
          Column ( F_TX_KEY,   "char(%d)" % txny.hier.txKeyLen,
                     keyType=PRIMARY ),
          Column ( F_STD_ABBR, "char(%d)" % L_ABBR,
                     allowNulls=1 ),
          Column ( F_STATUS,   "char(1)" ),
          Column ( F_SCI,      "varchar(%d)" % L_SCI ),
          Column ( F_ENG,      "varchar(%d)" % L_ENG ),
          Column ( F_TEX,      "varchar(%d)" % L_TEX ) ]
        Table.__init__ ( self, txny.db, T_TREE, columns )
        self.txny  =  txny


# - - -   T r e e T a b l e . l o o k u p T x K e y   - - -

    def lookupTxKey ( self, txKey ):
        """Find the taxon for a given taxonomic key.

          [ if (txKey is a string) ->
              if (txKey matches the taxonomic key of a taxon in self) ->
                return the corresponding Taxon
              else -> return None ]
        """

        #-- 1 --
        # [ if there is a row in self for which F_TX_KEY=txKey ->
        #     row  :=  that row as a Row object
        #   else ->
        #     row  :=  None ]
        row  =  self.queryExact ( [ ( F_TX_KEY, txKey ) ] )

        #-- 2 --
        # [ if row is None -> return None
        #   else ->
        #     return a Taxon object made from row ]
        if  row is None:
            return None
        else:
            return self.__makeTaxon ( row )


# - - -   T r e e T a b l e . _ _ m a k e T a x o n   - - -

    def __makeTaxon ( self, row ):
        """Convert a row from self's table into a Taxon object

          [ if (row is a Row from self) ->
              return a Taxon object representing that row ]
        """

        #-- 1 --
        # [ txKey   :=  F_TX_KEY field from row
        #   abbr    :=  F_STD_ABBR field from row, or None if that
        #               field is blank
        #   status  :=  F_STATUS field from row
        #   sci     :=  F_SCI field from row
        #   eng     :=  F_ENG field from row
        #   tex     :=  F_TEX field from row ]
        txKey   =  row [ F_TX_KEY ]
        abbr    =  row [ F_STD_ABBR ]
        status  =  row [ F_STATUS ]
        sci     =  row [ F_SCI ]
        eng     =  row [ F_ENG ]
        tex     =  row [ F_TEX ]

        #-- 2 --
        # [ rank  :=  a Rank object representing the taxonomic rank
        #             implied by the value of txKey ]
        rank  =  self.txny.hier.txKeyRank ( txKey )

        #-- 3 --
        return Taxon ( self.txny, rank, txKey, abbr, status,
                       sci, eng, tex )


# - - -   T r e e T a b l e . l o o k u p S c i   - - -

    def lookupSci ( self, sci ):
        """Find the Taxon object for a given scientific name
        """
        #-- 1 --
        # [ if self has a record whose F_SCI field matches sci ->
        #     row  :=  that record as a row tuple
        #   else ->
        #     row  :=  None ]
        row  =  self.queryExact ( [ ( F_SCI, sci ) ] )

        #-- 2 --
        # [ if row is None -> return None
        #   else ->
        #     return a Taxon object made from row ]
        if  row is None:
            return None
        else:
            return self.__makeTaxon ( row )


# - - -   T r e e T a b l e . a l l T x K e y s   - - -

    def allTxKeys ( self ):
        """Return all taxonomic key fields in ascending order
        """

        #-- 1 --
        # [ q  :=  a QueryResult object representing all records in self,
        #          sorted by F_TX_KEY ]
        q  =  self.queryAll ( sortList= [ SortSpec ( F_TX_KEY ) ] )

        #-- 2 --
        # [ return a list containg all F_TX_KEY fields from q in the
        #   same order ]
        result  =  []
        row     =  q.next()

        while  row is not None:
            result.append ( row [ F_TX_KEY ] )
            row     =  q.next()

        #-- 3 --
        return result


# - - -   T r e e T a b l e . r e l o a d   - - -

    def reload ( self, fileName ):
        """Reload this table
        """
        #-- 1 --
        # [ self  :=  self - (all existing contents) ]
        self.deleteContents ()
        errCount  =  Log().count()

        #-- 2 --
        # [ if fileName can be opened for reading ->
        #     scan  :=  a new Scan object at the start of that file
        #   else ->
        #     raise TxnyError ]
        try:
            scan  =  Scan ( fileName )
        except IOError, detail:
            raise TxnyError, ( "Can't open tree file `%s' for reading.\n%s" %
                               ( fileName, str(detail) ) )

        #-- 3 --
        # [ scan   :=   scan advanced to end of file
        #   self   :=   self with new rows added corresponding to valid
        #               .tre lines from scan
        #   Log()  +:=  errors from bad lines in scan, if any ]
        while  not scan.atEndFile:
            #-- 3 body --
            # [ if line in scan is a valid .tre line ->
            #     scan  :=  scan advanced to the next line
            #     self  :=  self + (a new row made from that line)
            #   else ->
            #     scan   :=   scan advanced to the next line
            #     Log()  +:=  error message(s) ]
            self.__readTreeLine ( scan )
            scan.nextLine()

        #-- 4 --
        # [ if errCount < Log().count() ->
        #     raise TxnyError
        #   else ->
        #     return ]
        scan.close()
        if  errCount < Log().count():
            raise TxnyError, "The tree file was not valid."


# - - -   T r e e T a b l e . _ _ r e a d T r e e L i n e   - - -

    def __readTreeLine ( self, scan ):
        """Read one line from a .tre file

          [ if scan is a Scan object ->
              if line in scan is a valid .tre line ->
                scan  :=  scan advanced to end of line
                self  :=  self + (a new row made from that line)
              else ->
                scan   :=   scan advanced past the valid part, if any
                Log()  +:=  error message(s) ]
        """

        #-- 1 --
        # [ if scan starts with a valid txKey field ->
        #     scan   :=  scan advanced past that field
        #     txKey  :=  that field
        #   else ->
        #     Log()  +:=  error message
        #     return ]
        txKey  =  self.__scanTxKey ( scan )
        if  txKey is None:
            return

        #-- 2 --
        # [ if scan starts with L_ABBR blanks ->
        #     scan  :=  scan advanced by L_ABBR
        #     abbr  :=  None
        #   else if scan starts with L_ABBR characters in C_ABBR  ->
        #     scan  :=  scan advanced by L_ABBR
        #     abbr  :=  those characters
        #   else ->
        #     Log()  +:=  error message
        #     return ]
        abbr  =  self.__scanAbbr ( scan )
        if  abbr is "":
            return

        #-- 3 --
        # [ if scan starts with " " or "?" ->
        #     scan    :=  scan advanced by one
        #     status  :=  next character from scan
        #   else ->
        #     Log()  +:=  error message
        #     return ]
        status  =  self.__scanStatus ( scan )
        if  status is None:
            return

        #-- 4 --
        # [ if scan starts with a scientific name field ->
        #     scan  :=  scan advanced past that field
        #     sci   :=  that field, with right blanks trimmed
        #   else ->
        #     Log()  +:=  error message
        #     return ]
        sci  =  self.__scanSci ( scan )
        if  sci is None:
            return

        #-- 5 --
        # [ if scan starts with an English name field ->
        #     scan  :=  scan advanced past that field
        #     eng   :=  that field, with right blanks trimmed
        #   else ->
        #     Log()  +:=  error message
        #     return ]
        eng  =  self.__scanEng ( scan )
        if  eng is None:
            return

        #-- 6 --
        # [ if characters remain in the line in scan ->
        #     scan  :=  scan advanced to end of line
        #     tex   :=  characters up to end of line in scan
        #   else ->
        #     Log()  +:=  error message
        #     return ]
        tex  =  self.__scanTeX ( scan )
        if  tex is None:
            return

        #-- 7 --
        # [ self  :=  self with a new row made from txKey, abbr,
        #             status, sci, eng, and tex ]
        # NB: Since the F_STD_ABBR field is optional, we add it
        # only if the corresponding value is not None.
        rowMap  =  { F_TX_KEY: txKey,
                     F_STATUS: status,
                     F_SCI: sci,
                     F_ENG: eng,
                     F_TEX: tex }
        if  abbr is not None:
            rowMap [ F_STD_ABBR ]  =  abbr

        self.insert ( rowMap )


# - - -   T r e e T a b l e . _ _ s c a n T x K e y   - - -

    def __scanTxKey ( self, scan ):
        """Scan the taxonomic key field

          [ if  scan is a Scan object ->
              if scan starts with a valid txKey field ->
                scan   :=  scan advanced past that field
                return that field
              else ->
                Log()  +:=  error message
                return None ]
        """

        #-- 1 --
        keyLen  =  self.txny.hier.txKeyLen

        #-- 2 --
        # [ if scan starts with keyLen digits ->
        #     scan    :=  scan advanced keyLen
        #     result  :=  next keyLen digits from scan
        #   else ->
        #     result  :=  None ]
        result  =  scan.flatCset ( keyLen, cset.digits )

        #-- 3 --
        if result is None:
            scan.error ( "Expecting %d-digit taxonomic key" % keyLen )
            return None
        else:
            return result   


# - - -   T r e e T a b l e . _ _ s c a n A b b r   - - -

    def __scanAbbr ( self, scan ):
        """Scan the (optional) standard abbreviation field

          [ if scan is a Scan object ->
              if scan starts with a blank abbreviation field ->
                scan  :=  scan advanced by L_ABBR
                return None
              else if scan starts with a valid nonblank abbreviation ->
                scan  :=  scan advanced by L_ABBR
                return the next L_ABBR characters from scan
              else ->
                Log()  +:=  error message
                return "" ]             
        """

        #-- 1 --
        # [ if scan starts with L_ABBR blanks ->
        #     scan  :=  scan advanced by L_ABBR
        #     return None
        #   else -> I ]
        abbr  =  scan.flatCset ( L_ABBR, C_BLANK )
        if  abbr is not None:
            return None

        #-- 2 --
        # [ if scan starts with L_ABBR characters in C_ABBR ->
        #     scan  :=  scan advanced by L_ABBR
        #     return the next L_ABBR characters from scan
        #   else -> I ]     
        abbr  =  scan.flatCset ( L_ABBR, C_ABBR )
        if  abbr is not None:
            return abbr

        #-- 3 --
        # [ Log()  +:=  error message
        #   return "" ]
        scan.error ( "Expecting a bird abbreviation of "
                     "length %d (or spaces)" % L_ABBR )
        return ""


# - - -   T r e e T a b l e . _ _ s c a n S t a t u s   - - -

    def __scanStatus ( self, scan ):
        """Scan the status code field

          [ if scan is a Scan object ->
              if scan starts with a valid status code in C_STATUS ->
                scan  :=  scan advanced by one
                return the next character from scan
              else ->
                Log()  +:=  error message
                return None ]
        """

        #-- 1 --
        # [ if scan starts with " " or "?" ->
        #     scan    :=  scan advanced by one
        #     result  :=  next character from scan
        #   else ->
        #     Log()  +:=  error message
        #     return None ]
        result  =  scan.tabAny ( C_STATUS )
        if  result is None:
            scan.error ( "Expecting a status code" )
            return None

        #-- 2 --
        return result


# - - -   T r e e T a b l e . _ _ s c a n S c i   - - -

    def __scanSci ( self, scan ):
        """Scan the scientific name field

          [ if scan is a Scan object ->
              if scan starts with a scientific name field ->
                scan  :=  scan advanced past that field
                return the contents of that field without right blanks
              else ->
                Log()  +:=  error message
                return None ]
        """

        #-- 1 --
        # [ if scan starts with L_SCI characters ->
        #     scan    :=  scan advanced by L_SCI
        #     rawSci  :=  next L_SCI characters from scan
        #   else ->
        #     Log()  +:=  error message
        #     return None ]
        rawSci  =  scan.move ( L_SCI )
        if  rawSci is None:
            scan.error ( "Expecting the scientific name of length %d" %
                         L_SCI )
            return None

        #-- 2 --
        return string.rstrip ( rawSci )


# - - -   T r e e T a b l e . _ _ s c a n E n g   - - -

    def __scanEng ( self, scan ):
        """Scan the English name field

          [ if scan is a Scan object ->
              if scan starts with an English name field ->
                scan  :=  scan advanced past that field
                return the contents of that field without right blanks
              else ->
                Log()  +:=  error message
                return None ]
        """

        #-- 1 --
        # [ if scan starts with L_ENG characters ->
        #     scan    :=  scan advanced by L_ENG
        #     rawEng  :=  next L_ENG characters from scan
        #   else ->
        #     Log()  +:=  error message
        #     return None ]
        rawEng  =  scan.move ( L_ENG )
        if  rawEng is None:
            scan.error ( "Expecting the English name of length %d" %
                         L_ENG )
            return None

        #-- 2 --
        return string.rstrip ( rawEng )


# - - -   T r e e T a b l e . _ _ s c a n T e X   - - -

    def __scanTeX ( self, scan ):
        """Scan the TeX-encoded English name

          [ if scan is a Scan object ->
              if any characters remain on the current line in scan ->
                scan  :=  scan advanced to end of line
                return the remaining characters up to end of line
              else ->
                Log()  +:=  error message
                return None ]
        """

        #-- 1 --
        # [ if scan is at end of line ->
        #     Log()  +:=  error message
        #     return None
        #   else -> I ]
        if  scan.atEndLine():
            scan.error ( "Expecting the TeX-encoded English name" )
            return None

        #-- 2 --
        # [ scan    :=  scan advanced to end of line
        #   result  :=  characters from scan up to end of line ]
        result  =  scan.tab(-1)
        return result



# - - - - -   c l a s s   A b b r T a b l e   - - - - -

class AbbrTable(Table):
    """Represents the table for the abbr file from nombuild.icn

      Exports:
        AbbrTable ( txny ):
          [ if (txny is a Txny object) ->
              self  :=  a Table object representing ABBR_TABLE in self.db ]
        .txny       [ as passed to constructor ]
        .lookupAbbrDef ( abbr ):
          [ if (abbr is a string) ->
              if self contains an entry for abbr ->
                return an AbbrDef representing that entry
              else -> return None ]
        .allAbbrs ( ):
          [ return all F_ABBR fields from self in ascending order ]
        .reload ( fileName ):
          [ if fileName is a string ->
              if fileName names a readable, valid .ab6 file ->
                self  :=  self - (all existing contents) + (contents of
                          fileName)
              else ->
                raise TxnyError ]
    """


# - - -   A b b r T a b l e . _ _ i n i t _ _   - - -

    def __init__ ( self, txny ):
        """Constructor for AbbrTable
        """
        columns  =  [
          Column ( F_ABBR,    "char(%d)" % L_ABBR, keyType=PRIMARY ),
          Column ( F_SCI,     "varchar(%d)" % L_SCI ),
          Column ( F_ALT_ENG, "varchar(%d)" % L_ENG ) ]
        Table.__init__ ( self, txny.db, T_AB6, columns )
        self.txny  =  txny


# - - -   A b b r T a b l e . l o o k u p A b b r D e f   - - -

    def lookupAbbrDef ( self, abbr ):
        """Find the scientific name and English name for a given bird code

          [ if (abbr is a string) ->
              if (abbr is the abbr for a taxon in self, case-insensitive) ->
                return the AbbrDef for that abbr
              else -> return None ]
        """
        #-- 1 --
        # [ if self has a record whose F_ABBR field matches abbr ->
        #     row  :=  that record as a Row object
        #   else ->
        #     row  :=  None ]
        row  =  self.queryExact ( [ ( F_ABBR, abbr ) ] )


        #-- 2 --
        # [ if row is None -> return None
        #   else ->
        #     return an AbbrDef object made from row ]
        if  row is None:
            return None
        else:
            return self.__makeAbbrDef ( row )


# - - -   A b b r T a b l e . _ _ m a k e A b b r D e f   - - -

    def __makeAbbrDef ( self, row ):
        """Fabricate an AbbrDef object from a query row

          [ if row is a Row object from self's table ->
              return an AbbrDef object representing that row ]
        """

        #-- 1 --
        # [ abbr  :=  F_ABBR field from row
        #   sci   :=  F_SCI field from row
        #   eng   :=  F_ALT_ENG field from row ]
        abbr  =  row [ F_ABBR ]
        sci   =  row [ F_SCI ]
        eng   =  row [ F_ALT_ENG ]

        #-- 2 --
        return AbbrDef ( abbr, sci, eng )


# - - -   A b b r T a b l e . a l l A b b r s   - - -

    def allAbbrs ( self ):
        """Return all codes in ascending order
        """

        #-- 1 --
        # [ q  :=  a QueryResult object representing all the rows in self,
        #          sorted by the F_ABBR field ]
        q  =  self.queryAll ( sortList=[ SortSpec ( F_ABBR ) ] )

        #-- 2 --
        # [ result  :=  a list of all F_ABBR fields from q in the same order ]
        result  =  []
        row     =  q.next()

        while  row is not None:
            result.append ( row [ F_ABBR ] )
            row     =  q.next()

        #-- 3 --
        return result


# - - -   A b b r T a b l e . r e l o a d   - - -

    def reload ( self, fileName ):
        """Reload self from the .ab6 file
        """

        #-- 1 --
        # [ self      :=  self - (all existing contents)
        #   errCount  :=  error count from Log() ]
        self.deleteContents()
        errCount  =  Log().count()

        #-- 2 --
        # [ if fileName can be opened for reading ->
        #     scan  :=  a new Scan object at the start of that file
        #   else ->
        #     raise TxnyError ]
        try:
            scan  =  Scan ( fileName )
        except IOError, detail:
            raise TxnyError, ( "Can't open abbreviations file `%s' for "
                               "reading.\n%s" % ( fileName, `detail` ) )

        #-- 3 --
        # [ scan   :=   scan advanced to end of file
        #   self   :=   self with new rows added corresponding to valid
        #               .ab6 records from scan
        #   Log()  +:=  error messages from bad lines in scan, if any ]
        while  not scan.atEndFile:
            #-- 3 body --
            # [ if line in scan is a valid .ab6 line ->
            #     scan  :=  scan advanced to the next line
            #     self  :=  self + (a new row added from that line)
            #   else ->
            #     scan   :=   scan advanced to the next line
            #     Log()  +:=  error message(s) ]
            self.__readAbbrLine ( scan )
            scan.nextLine()


# - - -   A b b r T a b l e . _ _ r e a d A b b r L i n e   - - -

    def __readAbbrLine ( self, scan ):
        """Read and process a line from the .ab6 file

          [ if scan is a Scan object ->
              if line in scan is a valid .ab6 line ->
                scan  :=  scan advanced to the next line
                self  :=  self + (a new row added from that line)
              else ->
                scan   :=   scan advanced to the next line
                Log()  +:=  error message(s) ]
        """

        #-- 1 --
        # [ if scan starts with an abbr field ->
        #     scan  :=  scan advanced past that field
        #     abbr  :=  that field
        #   else ->
        #     Log()  +:=  error message
        #     return ]
        abbr  =  scan.flatCset ( L_ABBR, C_ABBR )
        if  abbr is None:
            scan.error ( "Expecting the %d-letter code." % L_ABBR )
            return

        #-- 2 --
        # [ if scan starts with at least L_SCI characters ->
        #     scan  :=  scan advanced by L_SCI
        #     sci   :=  the next L_SCI characters from scan with
        #               right spaces trimmed
        #   else ->
        #     Log()  +:=  error message
        #     return ]
        try:
            sci  =  string.rstrip ( scan.move ( L_SCI ) )
        except IndexError:
            scan.error ( "Expecting a %d-character scientific "
                         "name field." % L_SCI )
            return

        #-- 3 --
        # [ if scan is not at end of line ->
        #     scan  :=  scan advanced to end of line
        #     eng   :=  remaining characters from line in scan with
        #               right spaces trimmed
        #   else ->
        #     Log()  +:=  error message
        #     return ]
        if  not scan.atEndLine():
            eng  =  string.rstrip ( scan.tab ( -1 ) )
        else:
            scan.error ( "Expecting the English name field." )
            return

        #-- 4 --
        # [ if abbr is already in self ->
        #     Log()  +:=  error
        #     return
        #   else -> I ]
        abbrDef  =  self.lookupAbbrDef ( abbr )
        if  abbrDef is not None:
            scan.error ( "This code is already defined as "
                         "%s [%s]" % ( abbrDef.eng, abbrDef.sci ) )

        #-- 5 --
        # [ if sci is defined in self.txny.treeTable -> I
        #   else ->
        #     Log()  +:=  error message
        #     return ]
        taxon  =  self.txny.treeTable.lookupSci ( sci )
        if  taxon is None:
            scan.error ( "Scientific name `%s' is not defined in "
                         "the tree." % sci )

        #-- 6 --
        # [ self  :=  self + (a new row made from F_ABBR=abbr, 
        #             F_SCI=sci, and F_ALT_ENG=eng) ]
        self.insert ( { F_ABBR:    abbr,
                        F_SCI:     sci,
                        F_ALT_ENG: eng } )


# - - - - -   c l a s s   C o l l T a b l e   - - - - -

class CollTable(Table):
    """Represents the table for the collisions file from nombuild.icn

      Exports:
        CollTable ( txny ):
          [ if (txny is a Txny object) ->
              self  :=  a Table object representing COLL_TABLE in self.db ]
        .txny       [ as passed to constructor ]
        .lookupColl ( abbr ):
          [ if abbr is a string ->
              if abbr occurs in the bad_abbr column of one or more
              records in self ->
                return a list of all good_abbr columns from those records ]
        .reload ( fileName ):
          [ if fileName is a string ->
              if fileName names a readable, valid .col file ->
                self  :=  self - (all existing contents) + (contents of
                          fileName)
              else ->
                raise TxnyError ]
    """


# - - -   C o l l T a b l e . _ _ i n i t _ _   - - -

    def __init__ ( self, txny ):
        """Constructor for the CollTable object
        """
        columns  =  [
          Column ( F_BAD_ABBR,  "char(%d)" % L_ABBR ),
          Column ( F_GOOD_ABBR, "char(%d)" % L_ABBR ) ]
        Table.__init__ ( self, txny.db, T_COLL, columns,
          keyList= [ TableKey ( PRIMARY, [ F_BAD_ABBR, F_GOOD_ABBR ] ) ] )
        self.txny  =  txny


# - - -   C o l l T a b l e . l o o k u p C o l l   - - -

    def lookupColl ( self, abbr ):
        """Lookup for the preferred alternates for a collision form
        """
        #-- 1 --
        # [ q  :=  a QueryResult object representing the set of records
        #          in self whose F_BAD_ABBR columns equal abbr ]
        q  =  self.querySome ( colPairs=[ ( F_BAD_ABBR, abbr ) ],
                               sortList=[ SortSpec ( F_GOOD_ABBR ) ] )

        #-- 2 --
        # [ result  :=  values of the F_GOOD_ABBR columns in q in
        #               the order returned by q ]
        result  =  []
        row     =  q.next()

        while  row is not None:
            result.append ( row [ F_GOOD_ABBR ] )
            row     =  q.next()

        #-- 3 --
        return result


# - - -   C o l l T a b l e . r e l o a d   - - -

    def reload ( self, fileName ):
        """Reload this table from a .col file
        """

        #-- 1 --
        # [ self      :=  self - (all existing contents)
        #   errCount  :=  error count from Log() ]
        self.deleteContents()
        errCount  =  Log().count()

        #-- 2 --
        # [ if fileName can be opened for reading ->
        #     scan  :=  a new Scan object at the start of that file
        #   else ->
        #     raise TxnyError ]
        try:
            scan  =  Scan ( fileName )
        except IOError, detail:
            raise TxnyError, ( "Can't open collisions file `%s' for "
                               "reading.\n%s" % ( fileName, `detail` ) )

        #-- 3 --
        # [ scan   :=   scan advanced to end of file
        #   self   :=   self with new rows added corresponding to
        #               valid .col lines from scan
        #   Log()  +:=  errors from bad lines in scan, if any ]
        while  not scan.atEndFile:
            #-- 3 body --
            # [ if line in scan is a valid .col line ->
            #     scan  :=  scan advanced to the next line
            #     self  :=  self + (a new row made from that line)
            #   else ->
            #     scan   :=   scan advanced to the next line
            #     Log()  +:=  error message ]
            self.__readCollLine ( scan )
            scan.nextLine()

        #-- 4 --
        # [ if errCount < Log().count()  ->
        #     raise TxnyError
        #   else ->
        #     return ]
        scan.close()
        if  errCount < Log().count():
            raise TxnyError, "The collisions file was not valid."



# - - -   C o l l T a b l e . _ _ r e a d C o l l L i n e   - - -

    def __readCollLine ( self, scan ):
        """Read and process one collision line.

          [ if scan is a Scan object ->
              if line in scan is a valid .col line ->
                scan  :=  scan advanced to the end of the line
                self  :=  self + (a new row made from that line)
              else ->
                scan   :=   scan advanced past valid parts, if any
                Log()  +:=  error message ]
        """

        #-- 1 --
        # [ if scan starts with a valid abbr field ->
        #     scan     :=  scan advanced past that field
        #     badAbbr  :=  that field's contents
        #   else ->
        #     Log()  +:=  error message
        #     return ]
        badAbbr  =  self.__scanAbbr ( scan )
        if  badAbbr is None:
            scan.error ( "Expecting the collision (bad) code" )
            return

        #-- 2 --
        # [ if scan starts with a valid abbr field ->
        #     scan      :=  scan advanced past that field
        #     goodAbbr  :=  that field's contents
        #   else ->
        #     Log()  +:=  error message
        #     return ]
        goodAbbr  =  self.__scanAbbr ( scan )
        if  goodAbbr is None:
            scan.error ( "Expecting the preferred (good) code" )
            return

        #-- 3 --
        # [ if badAbbr is a key in self.txny.abbrTable ->
        #     Log()  +:=  error message
        #     return
        #   else -> I ]
        abbrDef  =  self.txny.abbrTable.lookupAbbrDef ( badAbbr )
        if  abbrDef is not None:
            scan.error ( "Code `%s' can't be a collision code.  It occurs "
                         "in the abbr file as `%s'." %
                         ( badAbbr, str(abbrDef) ) )
            return

        #-- 4 --
        # [ if goodAbbr is a key in self.txny.abbrTable ->
        #     return
        #   else ->
        #     Log()  +:=  error message
        #     return ]
        abbrDef  =  self.txny.abbrTable.lookupAbbrDef ( goodAbbr )
        if  abbrDef is None:
            scan.error ( "Code `%s' is not in the abbreviations file." %
                         goodAbbr )
            return

        #-- 5 --
        # [ self  :=  self + (a new row made from F_BAD_ABBR=badAbbr
        #             and F_GOOD_ABBR=goodAbbr) ]
        self.insert ( { F_BAD_ABBR: badAbbr,  F_GOOD_ABBR: goodAbbr } )


# - - -   C o l l T a b l e . _ _ s c a n A b b r   - - -

    def __scanAbbr ( self, scan ):
        """Process an abbreviation field from scan

          [ if scan is a Scan object ->
              if scan starts with a valid abbr field ->
                scan  :=  scan advanced past that field
                return the contents of that field
              else -> return None ]
        """

        #-- 1 --
        # [ if scan starts with L_ABBR characters in C_ABBR ->
        #     scan  :=  scan advanced by L_ABBR
        #     abbr  :=  next L_ABBR characters from scan
        #   else ->
        #     return None ]
        abbr  =  scan.flatCset ( L_ABBR, C_ABBR )
        if  abbr is None:
            return None         

        #-- 2 --
        return abbr


# - - - - -   c l a s s   T a x o n   - - - - -

class Taxon:
    """Represents one specific biological taxon, e.g., family Falconidae.

      Exports:
        Taxon ( txny, rank, txKey, abbr, status, sci, eng, tex )
          [ if (txny is a Txny object)
            and (rank is a Rank object)
            and (txKey is a TxKey string)
            and (abbr is an Abbr string or None)
            and (status is a one-character string)
            and (sci is a nomempty scientific name string)
            and (eng is a nonempty English name string)
            and (tex is a nonempty TeX English name string) ->
              return a new Taxon object representing those fields ]
        .rank       [ as passed to constructor ]
        .txKey      [ as passed to constructor ]
        .abbr       [ as passed to constructor ]
        .status     [ as passed to constructor ]
        .sci        [ as passed to constructor ]
        .eng        [ as passed to constructor ]
        .tex        [ as passed to constructor ]
        .parent()
          [ if self is the root -> return None
            else -> return the Taxon of self's parent ]
        str()       [ returns a string displaying self's .sci and .eng ]
    """


# - - -   T a x o n . _ _ i n i t _ _   - - -

    def __init__ ( self, txny, rank, txKey, abbr, status, sci, eng, tex ):
        """Constructor for a Taxon object
        """
        self.txny    =  txny
        self.rank    =  rank
        self.txKey   =  txKey
        self.abbr    =  abbr
        self.status  =  status
        self.sci     =  sci
        self.eng     =  eng
        self.tex     =  tex


# - - -   T a x o n . p a r e n t   - - -

    def parent ( self ):
        """Find self's parent taxon, if any
        """
        #-- 1 --
        # [ if self is the root -> return None
        #   else -> I ]
        if  self.rank.depth == 0:
            return None

        #-- 2 --
        # [ parentKey  :=  self.txKey with the subfield corresponding
        #                  to self's rank set to all zeroes ]
        parentKey       =  self.txKey
        zeroField       =  "0" * self.rank.keyLen
        (a,b)           =  self.txny.hier.txKeySubfield ( self.rank.depth )
        parentKey       =  parentKey[:a] + zeroField + parentKey[b:]

        #-- 3 --
        return self.txny.lookupTxKey ( parentKey )


# - - -   T a x o n . _ _ s t r _ _   - - -

    def __str__ ( self ):
        return "%s [%s]" % ( self.eng, self.sci )


# - - - - -   c l a s s   A b b r D e f   - - - - -

class AbbrDef:
    """Represents the definition of one Abbr from the abbreviations file.

      Exports:
        AbbrDef ( abbr, sci, eng )      [ constructor ]
        .abbr   [ the Abbr being defined ]
        .sci    [ the scientific name linking this Abbr to a Taxon ]
        .eng    [ the English name from which this Abbr was derived ]
        str()   [ displays self as a string ]
    """


# - - -   A b b r D e f . _ _ i n i t _ _   - - -

    def __init__ ( self, abbr, sci, eng ):
        self.abbr  =  abbr
        self.sci   =  sci
        self.eng   =  eng


# - - -   A b b r D e f . _ _ s t r _ _   - - -

    def __str__ ( self ):
        return "%s -> %s [%s]" % ( self.abbr, self.eng, self.sci )

TCC home: TCC home
NMT home: NMT home

Last updated: 2014-09-17 17:46 MDT