""" cset.py: Module to emulate Icon cset (character set) objects in Python $Revision: 1.3 $ $Date: 2000/10/04 23:17:57 $ Exports: class Cset: Class for Cset objects .charSizeBits: Number of bits in one character .ucase: Cset of uppercase characters .lcase: Cset of lowercase characters .letters: Cset of all letters .digits: Cset of all digits .whitespace: Cset of all whitespace characters .universe: Cset of all characters (&cset in Icon) """ import string charSizeBits = 8 # Number of bits per character class Cset: """ Object to represent a subset of the character set This class is modeled after the `cset' built-in type in Icon. Each instance represents some, none, or all of the characters that can be represented in the native character set. Exports: Cset(s) [ if s is a string -> return a new Cset object representing all the distinct characters found in s ] .has(c): [ if c is a one-character string -> if c is in self -> return 1 else -> return 0 ] .__str__(self): [ return self as a string, with the elements in ascending order by ordinal ] .union(d) [ if d is a Cset -> return a new Cset consisting of all the characters found in both self and d ] .intersect(d) [ if d is a Cset -> return a new Cset consisting of all the characters found in either self or d ] .delta(d) [ if d is a Cset -> return a new Cset containing all the characters in self that are not found in d ] .complement() [ return a new Cset containing all the characters not found in self ] State: .map: A list of 2**charSizeBits elements, such that self.map[ord(c)] is 1 if c is in self, else 0. """ # - - - C s e t . _ _ i n i t _ _ - - - def __init__ ( self, s ): """ Constructor for a Cset """ #-- 1 -- #-[ self.map := a list of 2**charSizeBits zeroes #-] self.map = [0]*(2**charSizeBits) #-- 2 -- #-[ self.map := self.map with 1's stored in every position x # where x is the ordinal of a character found in s #-] for c in s: self.map[ord(c)] = 1 # - - - C s e t . h a s - - - def has ( self, c ): return self.map[ord(c)] # - - - C s e t . _ _ s t r _ _ - - - def __str__ ( self ): """ Function to convert a Cset to a string """ #-- 1 -- #-[ L := an empty list #-] L = [] #-- 2 -- #-[ L := L with strings S1, S2, ... appended, where each # Si is a character with value ord(i) for each true element # i of self.map, in the same order #-] for i in range(len(self.map)): if self.map[i]: L.append(chr(i)) #-- 3 -- #-[ return the elements of L joined to form a string #-] return string.join(L, "") # - - - C s e t . u n i o n - - - def union(self, d): #-- 1 -- #-[ result := an empty Cset #-] result = Cset("") #-- 2 -- #-[ result.map := the union of self.map and d.map #-] for i in range(len(self.map)): result.map[i] = self.map[i] or d.map[i] #-- 3 -- return result # - - - C s e t . i n t e r s e c t - - - def intersect(self, d): #-- 1 -- #-[ result := an empty Cset #-] result = Cset("") #-- 2 -- #-[ result.map := the intersection of self.map and d.map #-] for i in range(len(self.map)): result.map[i] = self.map[i] and d.map[i] #-- 3 -- return result # - - - C s e t . d e l t a - - - def delta(self, d): #-- 1 -- #-[ result := an empty Cset #-] result = Cset("") #-- 2 -- #-[ result.map := a shallow copy of self.map #-] result.map = self.map[:] #-- 3 -- #-[ result.map := result.map with all elements set to zero # where the corresponding element of # self.map are 1 #-] for i in range(len(self.map)): if d.map[i]: result.map[i] = 0 #-- 4 -- return result # - - - C s e t . c o m p l e m e n t - - - def complement(self): #-- 1 -- #-[ result := an empty Cset #-] result = Cset("") #-- 2 -- #-[ result.map := not self.map #-] for i in range(len(self.map)): result.map[i] = not self.map[i] #-- 3 -- return result # - - - Module exports - - - ucase = Cset ( "ABCDEFGHIJKLMNOPQRSTUVWXYZ" ) lcase = Cset ( "abcdefghijklmnopqrstuvwxyz" ) letters = ucase.union(lcase) digits = Cset ( "0123456789" ) emptyCset = Cset ( "" ) whiteSpace = Cset ( " \n\r\t\f" ) universe = emptyCset.complement()