""" txny.py: Bird taxonomy objects in Python (MySQL version)
$Revision: 1.24 $ $Date: 2000/10/07 07:19:54 $
These objects represent the taxonomy files generated
by nombuild.icn; refer to the document ``A system for
representing taxonomic nomenclature'' for details.
Classes exported:
Txny: Represents the entire taxonomy
Taxon: Represents one taxon, e.g., _Hirundo rustica_,
the barn swallow
AbbrDef: Represents one abbreviation, the English name
from which it was derived, and the scientific name
to which it is referred
Pseudo-classes: These are not Python classes, they are
just strings with constraints on their content.
TxKey: Represents a key number that can be used to sort
data phylogenetically. Must be a string of digits
whose length is (Hier.txKeyLen).
Abbr: A standard bird abbreviation; a string of no more
than L_ABBR characters, right-blank-padded to size,
and uppercased. Example: "KAUOO " == Kauai Oo.
Exported constants (synchronize with `nombuild' program):
L_SCI: Maximum length of a scientific name
L_ENG: Maximum length of an English name
L_TEX: Maximum length of a TeX-encoded English name
L_ABBR: Length of a bird code
"""
#================================================================
# IMPORTS
#----------------------------------------------------------------
# Standard Python modules
#----------------------------------------------------------------
import sys, string, copy # Standard Python modules
#----------------------------------------------------------------
# Shipman's standard library
#----------------------------------------------------------------
sys.path.insert(0, "/u/john/tcc/python/lib")
from log import * # Error logging module
from scan import * # Stream scanning module
import cset # Character set type
from my_db import * # MySQL schema layer
#----------------------------------------------------------------
# Subsidiary modules
#----------------------------------------------------------------
from hier import * # Taxonomic rank hierarchy module
#================================================================
# MANIFEST CONSTANTS
#================================================================
# Field lengths in the flat files
#----------------------------------------------------------------
L_SCI = 36 # Scientific name field length
L_ENG = 56 # English name field length
L_TEX = 80 # TeX English name field length
L_ABBR = 6 # Standard bird code length
C_ABBR = cset.letters.union(Cset("/ ")) # Cset for bird codes
C_BLANK = cset.Cset(" ") # Cset for spaces
C_STATUS = cset.Cset(" ?+") # Cset for tree file's status code
#----------------------------------------------------------------
# Constants related to databases
#-----------------------------------------------------------------
DB_OPTIONS = { "user": "john",
"passwd": "abracadabra",
"host": "crayola",
"db": "john" }
#================================================================
# SCHEMA
#----------------------------------------------------------------
# Prefix conventions:
# T_ Names of tables
# F_ Names of fields (columns)
# L_ Lengths of fields
# X_ File extension for this table in external form
#----------------------------------------------------------------
# Tree table: represents the taxonomic tree. The length of the
# F_TX_KEY column is derived from Hier.txKeyLen.
#----------------------------------------------------------------
T_TREE = "tree"
F_TX_KEY = "tx_key" # Taxonomic key
F_STD_ABBR, L_ABBR = "std_abbr", 6 # Standard bird code
F_STATUS = "status" # Status, '?' if not in AOU
F_SCI, L_SCI = "sci", 36 # Scientific name
F_ENG, L_ENG = "eng", 56 # English name
F_TEX, L_TEX = "tex", 80 # TeX English name
X_TREE = ".tre"
#----------------------------------------------------------------
# Abbreviations table: For looking up any bird code
#----------------------------------------------------------------
T_AB6 = "abbr"
F_ABBR = "ab6" # Abbreviation (bird code); length L_ABBR
# F_SCI, L_SCI as in tree table, and this column is a relation to same
F_ALT_ENG = "alt_eng" # English name for this abbreviation
X_AB6 = ".ab6"
#----------------------------------------------------------------
# Collisions table: for documenting collisions in the code system
#----------------------------------------------------------------
T_COLL = "coll"
F_BAD_ABBR = "bad_abbr" # The invalid code
F_GOOD_ABBR = "good_abbr" # One of the valid alternatives
X_COLL = ".col"
# - - - - - c l a s s T x n y E r r o r - - - - -
class TxnyError(Exception): # Our generic exception
def __str__ ( self ):
return "*Txny error* " + Exception.__str__(self)
#================================================================
# Verification functions
#================================================================
# abbr-file(baseName) ==
# the file named (baseName + X_AB6)
#----------------------------------------------------------------
# coll-file(baseName) ==
# the file named (baseName + X_COLL)
#----------------------------------------------------------------
# tree-file(baseName) ==
# the file named (baseName + X_TREE)
#----------------------------------------------------------------
# - - - - - c l a s s T x n y - - -
class Txny:
"""Represents an entire taxonomic tree, e.g., Class Aves.
Exports:
Txny ( hier ):
[ if (hier is the Hier object defining the rank hierarchy
used in TheDatabase(DB_OPTIONS)
and (TheDatabase(DB_OPTIONS) can be opened) ->
return a new Txny object representing that hierarchy
and database
else ->
Log() +:= error message(s)
raise TxnyError ]
.hier [ as passed to the constructor ]
.root():
[ return a Taxon object representing the root taxon ]
.lookupTxKey ( txKey ):
[ if (txKey is a string) ->
if (txKey matches the taxonomic key of a taxon in self) ->
return the corresponding Taxon
else -> return None ]
.lookupSci ( sci ):
[ if (sci is a string) ->
if (sci is the scientific name of a taxon in self) ->
return the Taxon with that name
else -> return None ]
.lookupAbbr ( abbr ):
[ if (abbr is a string) ->
if (abbr is the abbr for a taxon in self, case-insensitive) ->
return the Taxon to which that abbr is referred
else -> return None ]
.lookupAbbrDef ( abbr ):
[ if (abbr is a string) ->
if (abbr is the abbr for a taxon in self) ->
return the AbbrDef for that abbr
else -> return None ]
.lookupCollision ( abbr ):
[ if (abbr is a string) ->
if (abbr is a collision abbr in self) ->
return a list of the preferred abbrs for that collision
else -> return None ]
.allTxKeys():
[ returns a list of all txKey strings in self, in
ascending (phylogenetic) order ]
.allAbbrs():
[ returns a list of all abbr strings in self, in
ascending order ]
.create():
[ if self's databases do not exist ->
create them ]
.drop():
[ if self's databases exist ->
delete them ]
.reload(baseName):
[ if baseName is the base file name for a set of readable,
valid flat files as built by nombuild.icn ->
self := self reloaded from those files
else -> TxnyError ]
State:
.db
[ TheDatabase(DB_OPTIONS) ]
.treeTable
[ a Table object representing the taxonomic tree table ]
.abbrTable
[ a Table object representing the abbreviations table ]
.collTable
[ a Table object representing the collisions table ]
"""
# - - - T x n y . _ _ i n i t _ _ - - -
def __init__ ( self, hier ):
"""Constructor for a Txny object.
"""
#-- 1 --
self.hier = hier
#-- 2 --
# [ if TheDatabase(DB_OPTIONS) can be opened ->
# self.db := TheDatabase(DB_OPTIONS)
# else ->
# raise TxnyError ]
try:
self.db = TheDatabase(DB_OPTIONS)
except DBHelpersError, detail:
raise TxnyError, ( "Could not open database.\n%s" %
str(detail) )
#-- 3 --
# [ self.treeTable := a Table representing the tree table
# in self.db ]
self.treeTable = TreeTable ( self )
#-- 4 --
# [ self.abbrTable := a Table representing the abbreviations
# table in self.db ]
self.abbrTable = AbbrTable ( self )
#-- 5 --
# [ self.collTable := a Table representing the collisions
# table in self.db ]
self.collTable = CollTable ( self )
# - - - T x n y . r o o t - - -
def root ( self ):
"""Return the root taxon
"""
rootKey = "0" * self.hier.txKeyLen
return self.treeTable.lookupTxKey ( rootKey )
# - - - T x n y . l o o k u p T x K e y - - -
def lookupTxKey ( self, txKey ):
"""Find the taxon for a given taxonomic key.
"""
return self.treeTable.lookupTxKey ( txKey )
# - - - T x n y . l o o k u p S c i - - -
def lookupSci ( self, sci ):
"""Find the Taxon for a given scientific name.
"""
return self.treeTable.lookupSci ( sci )
# - - - T x n y . l o o k u p A b b r - - -
def lookupAbbr ( self, abbr ):
"""Find the Taxon object corresponding to a given bird code
"""
#-- 1 --
# [ if abbr is in self.abbrTable ->
# abbrDef := a new AbbrDef object for that code
# else ->
# abbrDef := None ]
abbrDef = self.abbrTable.lookupAbbrDef ( abbr )
#-- 2 --
if abbrDef is None:
return None
#-- 3 --
# [ if abbrDef.sci is defined in self.treeTable ->
# taxon := the Taxon object for that scientific name
# else ->
# taxon := None ]
taxon = self.lookupSci ( abbrDef.sci )
#-- 4 --
return taxon
# - - - T x n y . l o o k u p A b b r D e f - - -
def lookupAbbrDef ( self, abbr ):
"""Find the AbbrDef for a given Abbr
"""
return self.abbrTable.lookupAbbrDef ( abbr )
# - - - T x n y . l o o k u p C o l l i s i o n - - -
def lookupCollision ( self, abbr ):
"""Check to see if `abbr' is a prohibited collision code.
"""
#-- 1 --
# [ if abbr is a collision in self.collTable ->
# result := list of valid alternate codes
# else ->
# result := an empty list ]
result = self.collTable.lookupColl ( abbr )
#-- 2 --
if len ( result ) == 0:
return None
else:
return result
# - - - T x n y . a l l T x K e y s - - -
def allTxKeys ( self ):
"""Return all taxonomic keys in ascending order
"""
return self.treeTable.allTxKeys()
# - - - T x n y . a l l A b b r s - - -
def allAbbrs ( self ):
"""Return all abbrs in ascending order
"""
return self.abbrTable.allAbbrs()
# - - - T x n y . c r e a t e - - -
def create ( self ):
"""Actually create self's databases
"""
if not self.treeTable.exists():
self.treeTable.create()
if not self.abbrTable.exists():
self.abbrTable.create()
if not self.collTable.exists():
self.collTable.create()
# - - - T x n y . d r o p - - -
def drop ( self ):
"""Drop self's tables
"""
if self.treeTable.exists():
self.treeTable.drop()
if self.abbrTable.exists():
self.abbrTable.drop()
if self.collTable.exists():
self.collTable.drop()
# - - - T x n y . r e l o a d - - -
def reload ( self, fileName ):
"""Reload self from flat files
"""
#-- 1 --
# [ if tree-file(fileName) is a readable, valid tree file ->
# self.treeTable := self.treeTable reloaded from file
# else ->
# raise TxnyError ]
self.treeTable.reload ( fileName + X_TREE )
#-- 2 --
# [ if abbr-file(fileName) is a readable, valid abbr file ->
# self.abbrTable := self.abbrTable reloaded from file
# else ->
# raise TxnyError ]
self.abbrTable.reload ( fileName + X_AB6 )
#-- 3 --
# [ if coll-file(fileName) is a readable, valid collisions
# file ->
# self.collTable := self.collTable reloaded from that file
# else ->
# raise TxnyError ]
self.collTable.reload ( fileName + X_COLL )
# - - - - - c l a s s T r e e T a b l e - - - - -
class TreeTable(Table):
"""Represents the database table for the tree file from nombuild.icn
Exports (in addition to parent class):
TreeTable ( txny ):
[ if (txny is a Txny object) ->
self := a Table object representing TREE_TABLE in self.db
with hierarchy (txny.hier) ]
.txny [ as passed to constructor ]
.lookupTxKey ( txKey ):
[ if (txKey is a string) ->
if (self's table has a row whose tx_key field makes txKey) ->
return a Taxon object representing that row
else -> return None ]
.lookupSci ( sci ):
[ if sci is a string ->
if sci matches a scientific name in self ->
return the Taxon object for that scientific name
else -> return None ]
.allTxKeys ( ):
[ return all the tx_key fields in self, in ascending order ]
.reload ( fileName ):
[ if fileName is a string ->
if fileName names a readable, valid .tre file ->
self := self - (all existing contents) + (contents of
fileName)
else ->
raise TxnyError ]
"""
# - - - T r e e T a b l e . _ _ i n i t _ _ - - -
def __init__ ( self, txny ):
"""Constructor for a TreeTable
"""
columns = [
Column ( F_TX_KEY, "char(%d)" % txny.hier.txKeyLen,
keyType=PRIMARY ),
Column ( F_STD_ABBR, "char(%d)" % L_ABBR,
allowNulls=1 ),
Column ( F_STATUS, "char(1)" ),
Column ( F_SCI, "varchar(%d)" % L_SCI ),
Column ( F_ENG, "varchar(%d)" % L_ENG ),
Column ( F_TEX, "varchar(%d)" % L_TEX ) ]
Table.__init__ ( self, txny.db, T_TREE, columns )
self.txny = txny
# - - - T r e e T a b l e . l o o k u p T x K e y - - -
def lookupTxKey ( self, txKey ):
"""Find the taxon for a given taxonomic key.
[ if (txKey is a string) ->
if (txKey matches the taxonomic key of a taxon in self) ->
return the corresponding Taxon
else -> return None ]
"""
#-- 1 --
# [ if there is a row in self for which F_TX_KEY=txKey ->
# row := that row as a Row object
# else ->
# row := None ]
row = self.queryExact ( [ ( F_TX_KEY, txKey ) ] )
#-- 2 --
# [ if row is None -> return None
# else ->
# return a Taxon object made from row ]
if row is None:
return None
else:
return self.__makeTaxon ( row )
# - - - T r e e T a b l e . _ _ m a k e T a x o n - - -
def __makeTaxon ( self, row ):
"""Convert a row from self's table into a Taxon object
[ if (row is a Row from self) ->
return a Taxon object representing that row ]
"""
#-- 1 --
# [ txKey := F_TX_KEY field from row
# abbr := F_STD_ABBR field from row, or None if that
# field is blank
# status := F_STATUS field from row
# sci := F_SCI field from row
# eng := F_ENG field from row
# tex := F_TEX field from row ]
txKey = row [ F_TX_KEY ]
abbr = row [ F_STD_ABBR ]
status = row [ F_STATUS ]
sci = row [ F_SCI ]
eng = row [ F_ENG ]
tex = row [ F_TEX ]
#-- 2 --
# [ rank := a Rank object representing the taxonomic rank
# implied by the value of txKey ]
rank = self.txny.hier.txKeyRank ( txKey )
#-- 3 --
return Taxon ( self.txny, rank, txKey, abbr, status,
sci, eng, tex )
# - - - T r e e T a b l e . l o o k u p S c i - - -
def lookupSci ( self, sci ):
"""Find the Taxon object for a given scientific name
"""
#-- 1 --
# [ if self has a record whose F_SCI field matches sci ->
# row := that record as a row tuple
# else ->
# row := None ]
row = self.queryExact ( [ ( F_SCI, sci ) ] )
#-- 2 --
# [ if row is None -> return None
# else ->
# return a Taxon object made from row ]
if row is None:
return None
else:
return self.__makeTaxon ( row )
# - - - T r e e T a b l e . a l l T x K e y s - - -
def allTxKeys ( self ):
"""Return all taxonomic key fields in ascending order
"""
#-- 1 --
# [ q := a QueryResult object representing all records in self,
# sorted by F_TX_KEY ]
q = self.queryAll ( sortList= [ SortSpec ( F_TX_KEY ) ] )
#-- 2 --
# [ return a list containg all F_TX_KEY fields from q in the
# same order ]
result = []
row = q.next()
while row is not None:
result.append ( row [ F_TX_KEY ] )
row = q.next()
#-- 3 --
return result
# - - - T r e e T a b l e . r e l o a d - - -
def reload ( self, fileName ):
"""Reload this table
"""
#-- 1 --
# [ self := self - (all existing contents) ]
self.deleteContents ()
errCount = Log().count()
#-- 2 --
# [ if fileName can be opened for reading ->
# scan := a new Scan object at the start of that file
# else ->
# raise TxnyError ]
try:
scan = Scan ( fileName )
except IOError, detail:
raise TxnyError, ( "Can't open tree file `%s' for reading.\n%s" %
( fileName, str(detail) ) )
#-- 3 --
# [ scan := scan advanced to end of file
# self := self with new rows added corresponding to valid
# .tre lines from scan
# Log() +:= errors from bad lines in scan, if any ]
while not scan.atEndFile:
#-- 3 body --
# [ if line in scan is a valid .tre line ->
# scan := scan advanced to the next line
# self := self + (a new row made from that line)
# else ->
# scan := scan advanced to the next line
# Log() +:= error message(s) ]
self.__readTreeLine ( scan )
scan.nextLine()
#-- 4 --
# [ if errCount < Log().count() ->
# raise TxnyError
# else ->
# return ]
scan.close()
if errCount < Log().count():
raise TxnyError, "The tree file was not valid."
# - - - T r e e T a b l e . _ _ r e a d T r e e L i n e - - -
def __readTreeLine ( self, scan ):
"""Read one line from a .tre file
[ if scan is a Scan object ->
if line in scan is a valid .tre line ->
scan := scan advanced to end of line
self := self + (a new row made from that line)
else ->
scan := scan advanced past the valid part, if any
Log() +:= error message(s) ]
"""
#-- 1 --
# [ if scan starts with a valid txKey field ->
# scan := scan advanced past that field
# txKey := that field
# else ->
# Log() +:= error message
# return ]
txKey = self.__scanTxKey ( scan )
if txKey is None:
return
#-- 2 --
# [ if scan starts with L_ABBR blanks ->
# scan := scan advanced by L_ABBR
# abbr := None
# else if scan starts with L_ABBR characters in C_ABBR ->
# scan := scan advanced by L_ABBR
# abbr := those characters
# else ->
# Log() +:= error message
# return ]
abbr = self.__scanAbbr ( scan )
if abbr is "":
return
#-- 3 --
# [ if scan starts with " " or "?" ->
# scan := scan advanced by one
# status := next character from scan
# else ->
# Log() +:= error message
# return ]
status = self.__scanStatus ( scan )
if status is None:
return
#-- 4 --
# [ if scan starts with a scientific name field ->
# scan := scan advanced past that field
# sci := that field, with right blanks trimmed
# else ->
# Log() +:= error message
# return ]
sci = self.__scanSci ( scan )
if sci is None:
return
#-- 5 --
# [ if scan starts with an English name field ->
# scan := scan advanced past that field
# eng := that field, with right blanks trimmed
# else ->
# Log() +:= error message
# return ]
eng = self.__scanEng ( scan )
if eng is None:
return
#-- 6 --
# [ if characters remain in the line in scan ->
# scan := scan advanced to end of line
# tex := characters up to end of line in scan
# else ->
# Log() +:= error message
# return ]
tex = self.__scanTeX ( scan )
if tex is None:
return
#-- 7 --
# [ self := self with a new row made from txKey, abbr,
# status, sci, eng, and tex ]
# NB: Since the F_STD_ABBR field is optional, we add it
# only if the corresponding value is not None.
rowMap = { F_TX_KEY: txKey,
F_STATUS: status,
F_SCI: sci,
F_ENG: eng,
F_TEX: tex }
if abbr is not None:
rowMap [ F_STD_ABBR ] = abbr
self.insert ( rowMap )
# - - - T r e e T a b l e . _ _ s c a n T x K e y - - -
def __scanTxKey ( self, scan ):
"""Scan the taxonomic key field
[ if scan is a Scan object ->
if scan starts with a valid txKey field ->
scan := scan advanced past that field
return that field
else ->
Log() +:= error message
return None ]
"""
#-- 1 --
keyLen = self.txny.hier.txKeyLen
#-- 2 --
# [ if scan starts with keyLen digits ->
# scan := scan advanced keyLen
# result := next keyLen digits from scan
# else ->
# result := None ]
result = scan.flatCset ( keyLen, cset.digits )
#-- 3 --
if result is None:
scan.error ( "Expecting %d-digit taxonomic key" % keyLen )
return None
else:
return result
# - - - T r e e T a b l e . _ _ s c a n A b b r - - -
def __scanAbbr ( self, scan ):
"""Scan the (optional) standard abbreviation field
[ if scan is a Scan object ->
if scan starts with a blank abbreviation field ->
scan := scan advanced by L_ABBR
return None
else if scan starts with a valid nonblank abbreviation ->
scan := scan advanced by L_ABBR
return the next L_ABBR characters from scan
else ->
Log() +:= error message
return "" ]
"""
#-- 1 --
# [ if scan starts with L_ABBR blanks ->
# scan := scan advanced by L_ABBR
# return None
# else -> I ]
abbr = scan.flatCset ( L_ABBR, C_BLANK )
if abbr is not None:
return None
#-- 2 --
# [ if scan starts with L_ABBR characters in C_ABBR ->
# scan := scan advanced by L_ABBR
# return the next L_ABBR characters from scan
# else -> I ]
abbr = scan.flatCset ( L_ABBR, C_ABBR )
if abbr is not None:
return abbr
#-- 3 --
# [ Log() +:= error message
# return "" ]
scan.error ( "Expecting a bird abbreviation of "
"length %d (or spaces)" % L_ABBR )
return ""
# - - - T r e e T a b l e . _ _ s c a n S t a t u s - - -
def __scanStatus ( self, scan ):
"""Scan the status code field
[ if scan is a Scan object ->
if scan starts with a valid status code in C_STATUS ->
scan := scan advanced by one
return the next character from scan
else ->
Log() +:= error message
return None ]
"""
#-- 1 --
# [ if scan starts with " " or "?" ->
# scan := scan advanced by one
# result := next character from scan
# else ->
# Log() +:= error message
# return None ]
result = scan.tabAny ( C_STATUS )
if result is None:
scan.error ( "Expecting a status code" )
return None
#-- 2 --
return result
# - - - T r e e T a b l e . _ _ s c a n S c i - - -
def __scanSci ( self, scan ):
"""Scan the scientific name field
[ if scan is a Scan object ->
if scan starts with a scientific name field ->
scan := scan advanced past that field
return the contents of that field without right blanks
else ->
Log() +:= error message
return None ]
"""
#-- 1 --
# [ if scan starts with L_SCI characters ->
# scan := scan advanced by L_SCI
# rawSci := next L_SCI characters from scan
# else ->
# Log() +:= error message
# return None ]
rawSci = scan.move ( L_SCI )
if rawSci is None:
scan.error ( "Expecting the scientific name of length %d" %
L_SCI )
return None
#-- 2 --
return string.rstrip ( rawSci )
# - - - T r e e T a b l e . _ _ s c a n E n g - - -
def __scanEng ( self, scan ):
"""Scan the English name field
[ if scan is a Scan object ->
if scan starts with an English name field ->
scan := scan advanced past that field
return the contents of that field without right blanks
else ->
Log() +:= error message
return None ]
"""
#-- 1 --
# [ if scan starts with L_ENG characters ->
# scan := scan advanced by L_ENG
# rawEng := next L_ENG characters from scan
# else ->
# Log() +:= error message
# return None ]
rawEng = scan.move ( L_ENG )
if rawEng is None:
scan.error ( "Expecting the English name of length %d" %
L_ENG )
return None
#-- 2 --
return string.rstrip ( rawEng )
# - - - T r e e T a b l e . _ _ s c a n T e X - - -
def __scanTeX ( self, scan ):
"""Scan the TeX-encoded English name
[ if scan is a Scan object ->
if any characters remain on the current line in scan ->
scan := scan advanced to end of line
return the remaining characters up to end of line
else ->
Log() +:= error message
return None ]
"""
#-- 1 --
# [ if scan is at end of line ->
# Log() +:= error message
# return None
# else -> I ]
if scan.atEndLine():
scan.error ( "Expecting the TeX-encoded English name" )
return None
#-- 2 --
# [ scan := scan advanced to end of line
# result := characters from scan up to end of line ]
result = scan.tab(-1)
return result
# - - - - - c l a s s A b b r T a b l e - - - - -
class AbbrTable(Table):
"""Represents the table for the abbr file from nombuild.icn
Exports:
AbbrTable ( txny ):
[ if (txny is a Txny object) ->
self := a Table object representing ABBR_TABLE in self.db ]
.txny [ as passed to constructor ]
.lookupAbbrDef ( abbr ):
[ if (abbr is a string) ->
if self contains an entry for abbr ->
return an AbbrDef representing that entry
else -> return None ]
.allAbbrs ( ):
[ return all F_ABBR fields from self in ascending order ]
.reload ( fileName ):
[ if fileName is a string ->
if fileName names a readable, valid .ab6 file ->
self := self - (all existing contents) + (contents of
fileName)
else ->
raise TxnyError ]
"""
# - - - A b b r T a b l e . _ _ i n i t _ _ - - -
def __init__ ( self, txny ):
"""Constructor for AbbrTable
"""
columns = [
Column ( F_ABBR, "char(%d)" % L_ABBR, keyType=PRIMARY ),
Column ( F_SCI, "varchar(%d)" % L_SCI ),
Column ( F_ALT_ENG, "varchar(%d)" % L_ENG ) ]
Table.__init__ ( self, txny.db, T_AB6, columns )
self.txny = txny
# - - - A b b r T a b l e . l o o k u p A b b r D e f - - -
def lookupAbbrDef ( self, abbr ):
"""Find the scientific name and English name for a given bird code
[ if (abbr is a string) ->
if (abbr is the abbr for a taxon in self, case-insensitive) ->
return the AbbrDef for that abbr
else -> return None ]
"""
#-- 1 --
# [ if self has a record whose F_ABBR field matches abbr ->
# row := that record as a Row object
# else ->
# row := None ]
row = self.queryExact ( [ ( F_ABBR, abbr ) ] )
#-- 2 --
# [ if row is None -> return None
# else ->
# return an AbbrDef object made from row ]
if row is None:
return None
else:
return self.__makeAbbrDef ( row )
# - - - A b b r T a b l e . _ _ m a k e A b b r D e f - - -
def __makeAbbrDef ( self, row ):
"""Fabricate an AbbrDef object from a query row
[ if row is a Row object from self's table ->
return an AbbrDef object representing that row ]
"""
#-- 1 --
# [ abbr := F_ABBR field from row
# sci := F_SCI field from row
# eng := F_ALT_ENG field from row ]
abbr = row [ F_ABBR ]
sci = row [ F_SCI ]
eng = row [ F_ALT_ENG ]
#-- 2 --
return AbbrDef ( abbr, sci, eng )
# - - - A b b r T a b l e . a l l A b b r s - - -
def allAbbrs ( self ):
"""Return all codes in ascending order
"""
#-- 1 --
# [ q := a QueryResult object representing all the rows in self,
# sorted by the F_ABBR field ]
q = self.queryAll ( sortList=[ SortSpec ( F_ABBR ) ] )
#-- 2 --
# [ result := a list of all F_ABBR fields from q in the same order ]
result = []
row = q.next()
while row is not None:
result.append ( row [ F_ABBR ] )
row = q.next()
#-- 3 --
return result
# - - - A b b r T a b l e . r e l o a d - - -
def reload ( self, fileName ):
"""Reload self from the .ab6 file
"""
#-- 1 --
# [ self := self - (all existing contents)
# errCount := error count from Log() ]
self.deleteContents()
errCount = Log().count()
#-- 2 --
# [ if fileName can be opened for reading ->
# scan := a new Scan object at the start of that file
# else ->
# raise TxnyError ]
try:
scan = Scan ( fileName )
except IOError, detail:
raise TxnyError, ( "Can't open abbreviations file `%s' for "
"reading.\n%s" % ( fileName, `detail` ) )
#-- 3 --
# [ scan := scan advanced to end of file
# self := self with new rows added corresponding to valid
# .ab6 records from scan
# Log() +:= error messages from bad lines in scan, if any ]
while not scan.atEndFile:
#-- 3 body --
# [ if line in scan is a valid .ab6 line ->
# scan := scan advanced to the next line
# self := self + (a new row added from that line)
# else ->
# scan := scan advanced to the next line
# Log() +:= error message(s) ]
self.__readAbbrLine ( scan )
scan.nextLine()
# - - - A b b r T a b l e . _ _ r e a d A b b r L i n e - - -
def __readAbbrLine ( self, scan ):
"""Read and process a line from the .ab6 file
[ if scan is a Scan object ->
if line in scan is a valid .ab6 line ->
scan := scan advanced to the next line
self := self + (a new row added from that line)
else ->
scan := scan advanced to the next line
Log() +:= error message(s) ]
"""
#-- 1 --
# [ if scan starts with an abbr field ->
# scan := scan advanced past that field
# abbr := that field
# else ->
# Log() +:= error message
# return ]
abbr = scan.flatCset ( L_ABBR, C_ABBR )
if abbr is None:
scan.error ( "Expecting the %d-letter code." % L_ABBR )
return
#-- 2 --
# [ if scan starts with at least L_SCI characters ->
# scan := scan advanced by L_SCI
# sci := the next L_SCI characters from scan with
# right spaces trimmed
# else ->
# Log() +:= error message
# return ]
try:
sci = string.rstrip ( scan.move ( L_SCI ) )
except IndexError:
scan.error ( "Expecting a %d-character scientific "
"name field." % L_SCI )
return
#-- 3 --
# [ if scan is not at end of line ->
# scan := scan advanced to end of line
# eng := remaining characters from line in scan with
# right spaces trimmed
# else ->
# Log() +:= error message
# return ]
if not scan.atEndLine():
eng = string.rstrip ( scan.tab ( -1 ) )
else:
scan.error ( "Expecting the English name field." )
return
#-- 4 --
# [ if abbr is already in self ->
# Log() +:= error
# return
# else -> I ]
abbrDef = self.lookupAbbrDef ( abbr )
if abbrDef is not None:
scan.error ( "This code is already defined as "
"%s [%s]" % ( abbrDef.eng, abbrDef.sci ) )
#-- 5 --
# [ if sci is defined in self.txny.treeTable -> I
# else ->
# Log() +:= error message
# return ]
taxon = self.txny.treeTable.lookupSci ( sci )
if taxon is None:
scan.error ( "Scientific name `%s' is not defined in "
"the tree." % sci )
#-- 6 --
# [ self := self + (a new row made from F_ABBR=abbr,
# F_SCI=sci, and F_ALT_ENG=eng) ]
self.insert ( { F_ABBR: abbr,
F_SCI: sci,
F_ALT_ENG: eng } )
# - - - - - c l a s s C o l l T a b l e - - - - -
class CollTable(Table):
"""Represents the table for the collisions file from nombuild.icn
Exports:
CollTable ( txny ):
[ if (txny is a Txny object) ->
self := a Table object representing COLL_TABLE in self.db ]
.txny [ as passed to constructor ]
.lookupColl ( abbr ):
[ if abbr is a string ->
if abbr occurs in the bad_abbr column of one or more
records in self ->
return a list of all good_abbr columns from those records ]
.reload ( fileName ):
[ if fileName is a string ->
if fileName names a readable, valid .col file ->
self := self - (all existing contents) + (contents of
fileName)
else ->
raise TxnyError ]
"""
# - - - C o l l T a b l e . _ _ i n i t _ _ - - -
def __init__ ( self, txny ):
"""Constructor for the CollTable object
"""
columns = [
Column ( F_BAD_ABBR, "char(%d)" % L_ABBR ),
Column ( F_GOOD_ABBR, "char(%d)" % L_ABBR ) ]
Table.__init__ ( self, txny.db, T_COLL, columns,
keyList= [ TableKey ( PRIMARY, [ F_BAD_ABBR, F_GOOD_ABBR ] ) ] )
self.txny = txny
# - - - C o l l T a b l e . l o o k u p C o l l - - -
def lookupColl ( self, abbr ):
"""Lookup for the preferred alternates for a collision form
"""
#-- 1 --
# [ q := a QueryResult object representing the set of records
# in self whose F_BAD_ABBR columns equal abbr ]
q = self.querySome ( colPairs=[ ( F_BAD_ABBR, abbr ) ],
sortList=[ SortSpec ( F_GOOD_ABBR ) ] )
#-- 2 --
# [ result := values of the F_GOOD_ABBR columns in q in
# the order returned by q ]
result = []
row = q.next()
while row is not None:
result.append ( row [ F_GOOD_ABBR ] )
row = q.next()
#-- 3 --
return result
# - - - C o l l T a b l e . r e l o a d - - -
def reload ( self, fileName ):
"""Reload this table from a .col file
"""
#-- 1 --
# [ self := self - (all existing contents)
# errCount := error count from Log() ]
self.deleteContents()
errCount = Log().count()
#-- 2 --
# [ if fileName can be opened for reading ->
# scan := a new Scan object at the start of that file
# else ->
# raise TxnyError ]
try:
scan = Scan ( fileName )
except IOError, detail:
raise TxnyError, ( "Can't open collisions file `%s' for "
"reading.\n%s" % ( fileName, `detail` ) )
#-- 3 --
# [ scan := scan advanced to end of file
# self := self with new rows added corresponding to
# valid .col lines from scan
# Log() +:= errors from bad lines in scan, if any ]
while not scan.atEndFile:
#-- 3 body --
# [ if line in scan is a valid .col line ->
# scan := scan advanced to the next line
# self := self + (a new row made from that line)
# else ->
# scan := scan advanced to the next line
# Log() +:= error message ]
self.__readCollLine ( scan )
scan.nextLine()
#-- 4 --
# [ if errCount < Log().count() ->
# raise TxnyError
# else ->
# return ]
scan.close()
if errCount < Log().count():
raise TxnyError, "The collisions file was not valid."
# - - - C o l l T a b l e . _ _ r e a d C o l l L i n e - - -
def __readCollLine ( self, scan ):
"""Read and process one collision line.
[ if scan is a Scan object ->
if line in scan is a valid .col line ->
scan := scan advanced to the end of the line
self := self + (a new row made from that line)
else ->
scan := scan advanced past valid parts, if any
Log() +:= error message ]
"""
#-- 1 --
# [ if scan starts with a valid abbr field ->
# scan := scan advanced past that field
# badAbbr := that field's contents
# else ->
# Log() +:= error message
# return ]
badAbbr = self.__scanAbbr ( scan )
if badAbbr is None:
scan.error ( "Expecting the collision (bad) code" )
return
#-- 2 --
# [ if scan starts with a valid abbr field ->
# scan := scan advanced past that field
# goodAbbr := that field's contents
# else ->
# Log() +:= error message
# return ]
goodAbbr = self.__scanAbbr ( scan )
if goodAbbr is None:
scan.error ( "Expecting the preferred (good) code" )
return
#-- 3 --
# [ if badAbbr is a key in self.txny.abbrTable ->
# Log() +:= error message
# return
# else -> I ]
abbrDef = self.txny.abbrTable.lookupAbbrDef ( badAbbr )
if abbrDef is not None:
scan.error ( "Code `%s' can't be a collision code. It occurs "
"in the abbr file as `%s'." %
( badAbbr, str(abbrDef) ) )
return
#-- 4 --
# [ if goodAbbr is a key in self.txny.abbrTable ->
# return
# else ->
# Log() +:= error message
# return ]
abbrDef = self.txny.abbrTable.lookupAbbrDef ( goodAbbr )
if abbrDef is None:
scan.error ( "Code `%s' is not in the abbreviations file." %
goodAbbr )
return
#-- 5 --
# [ self := self + (a new row made from F_BAD_ABBR=badAbbr
# and F_GOOD_ABBR=goodAbbr) ]
self.insert ( { F_BAD_ABBR: badAbbr, F_GOOD_ABBR: goodAbbr } )
# - - - C o l l T a b l e . _ _ s c a n A b b r - - -
def __scanAbbr ( self, scan ):
"""Process an abbreviation field from scan
[ if scan is a Scan object ->
if scan starts with a valid abbr field ->
scan := scan advanced past that field
return the contents of that field
else -> return None ]
"""
#-- 1 --
# [ if scan starts with L_ABBR characters in C_ABBR ->
# scan := scan advanced by L_ABBR
# abbr := next L_ABBR characters from scan
# else ->
# return None ]
abbr = scan.flatCset ( L_ABBR, C_ABBR )
if abbr is None:
return None
#-- 2 --
return abbr
# - - - - - c l a s s T a x o n - - - - -
class Taxon:
"""Represents one specific biological taxon, e.g., family Falconidae.
Exports:
Taxon ( txny, rank, txKey, abbr, status, sci, eng, tex )
[ if (txny is a Txny object)
and (rank is a Rank object)
and (txKey is a TxKey string)
and (abbr is an Abbr string or None)
and (status is a one-character string)
and (sci is a nomempty scientific name string)
and (eng is a nonempty English name string)
and (tex is a nonempty TeX English name string) ->
return a new Taxon object representing those fields ]
.rank [ as passed to constructor ]
.txKey [ as passed to constructor ]
.abbr [ as passed to constructor ]
.status [ as passed to constructor ]
.sci [ as passed to constructor ]
.eng [ as passed to constructor ]
.tex [ as passed to constructor ]
.parent()
[ if self is the root -> return None
else -> return the Taxon of self's parent ]
str() [ returns a string displaying self's .sci and .eng ]
"""
# - - - T a x o n . _ _ i n i t _ _ - - -
def __init__ ( self, txny, rank, txKey, abbr, status, sci, eng, tex ):
"""Constructor for a Taxon object
"""
self.txny = txny
self.rank = rank
self.txKey = txKey
self.abbr = abbr
self.status = status
self.sci = sci
self.eng = eng
self.tex = tex
# - - - T a x o n . p a r e n t - - -
def parent ( self ):
"""Find self's parent taxon, if any
"""
#-- 1 --
# [ if self is the root -> return None
# else -> I ]
if self.rank.depth == 0:
return None
#-- 2 --
# [ parentKey := self.txKey with the subfield corresponding
# to self's rank set to all zeroes ]
parentKey = self.txKey
zeroField = "0" * self.rank.keyLen
(a,b) = self.txny.hier.txKeySubfield ( self.rank.depth )
parentKey = parentKey[:a] + zeroField + parentKey[b:]
#-- 3 --
return self.txny.lookupTxKey ( parentKey )
# - - - T a x o n . _ _ s t r _ _ - - -
def __str__ ( self ):
return "%s [%s]" % ( self.eng, self.sci )
# - - - - - c l a s s A b b r D e f - - - - -
class AbbrDef:
"""Represents the definition of one Abbr from the abbreviations file.
Exports:
AbbrDef ( abbr, sci, eng ) [ constructor ]
.abbr [ the Abbr being defined ]
.sci [ the scientific name linking this Abbr to a Taxon ]
.eng [ the English name from which this Abbr was derived ]
str() [ displays self as a string ]
"""
# - - - A b b r D e f . _ _ i n i t _ _ - - -
def __init__ ( self, abbr, sci, eng ):
self.abbr = abbr
self.sci = sci
self.eng = eng
# - - - A b b r D e f . _ _ s t r _ _ - - -
def __str__ ( self ):
return "%s -> %s [%s]" % ( self.abbr, self.eng, self.sci )