"""set.py: Generic set object for Python $Revision: 1.11 $ $Date: 2003/09/20 02:22:32 $ This object acts like an algebraic set, but the members must be immutable---that is, numbers, strings, or tuples of immutable elements. The advantage of restricting members to tuples is that we can implement the set by using the members as keys in a dictionary. This means that random-access functions, such as testing for membership and deleting, have time complexity O(log n). Exports: Set(*L): [ L is a sequence of immutable objects -> return a new Set object containing the unique members of L ] Note: In Python 2.2 and beyond, if you want to pass a pre-made list X to this constructor, use `Set(*X)' .__contains__(self, x): # x in self [ x is immutable -> if x is in self -> return 1 else -> return 0 ] .__delitem__(self,x): # del self[x] [ x is immutable -> if self contains x -> self := self - {x} else -> raise KeyError ] .__iter__(self): # for x in self: ... [ return a new generator that yields all the members of self ] Note: Any changes to self after the generator is activated will not affect the generated sequence. .__len__(self): # len(self) [ return number of unique members in self ] .__repr__(self): # repr(self) [ return self displayed as a string ] .__str__(self): # str(self) [ return self displayed as a string ] .add(x): [ x is immutable -> self := self union {x} ] .contains(x): [ same as self.__contains__(x) ] .delete(x): [ x is immutable -> if x is a member of self -> self := self - {x} ] else -> I ] .diff(s): [ s is a Set object -> return a new Set containing elements of self not found in s ] .genSorted(comparator=None): [ comparator is a function with the same calling sequence as cmp(), whose domain includes all members of self, defaulting to cmp() -> return a new generator that yields all the members of self, sorted by the comparator function ] Note: Any changes to self after the generator is activated will not affect the generated sequence. .intersect(s): [ if s is a Set object -> return a new Set containing elements in both self and s ] .members(): [ return a new list of the members of self ] .union(s): [ if s is a Set object -> return a new Set containing all unique elements of self combined with s ] .copy(): [ return a new Set object with the same members as self ] """ from __future__ import generators # Allow use of generators import string # Standard string package # - - - - - c l a s s S e t - - - - - class Set: """Generic set object Invariant: self.__map: a dictionary whose keys are the elements contained in self and whose values are all None """ # - - - S e t . _ _ i n i t _ _ - - - def __init__ ( self, *L ): """Constructor for the Set object """ #-- 1 -- self.__map = {} #-- 2 -- # [ if L is None -> I # else -> # self := self + (elements of L) ] if L: for x in L: self.add(x) # - - - S e t . _ _ c o n t a i n s _ _ - - - def __contains__ ( self, x ): "Membership test, `x in self'" return self.__map.has_key(x) # - - - S e t . _ _ d e l i t e m _ _ - - - def __delitem__ ( self, x ): "Delete x from self. Raises KeyError if x not in self." del self.__map[x] # - - - S e t . _ _ i t e r _ _ - - - def __iter__ ( self ): """Generate the elements of self in no particular order. Note: If self.__map is changed after the generator starts running, and before it completes, the generated set is that at the time __iter__ was first called. """ #-- 1 -- for key in self.__map.keys(): # Test version yield key #-- 2 -- raise StopIteration # - - - S e t . _ _ l e n _ _ - - - def __len__ ( self ): "Return the number of elements in self." return len(self.__map) # - - - S e t . _ _ s t r _ _ - - - def __str__ ( self ): """Display self. Items are sorted for user convenience """ #-- 1 -- # [ L := a list of all keys in self.__map ] L = self.__map.keys() #-- 2 -- # [ L := L, sorted ] L.sort() #-- 3 -- # [ return a list of the elements of L, converted to strings, # separated by ", ", and enclosed in braces ] return "{%s}" % ", ".join(map(str, L)) # - - - S e t . _ _ r e p r _ _ - - - __repr__ = __str__ # - - - S e t . a d d - - - def add ( self, x ): """Add element x to self NB: We could check to see if x is already present, but that would cause x to be hashed twice when it's not. It's easier and faster just to store in the map every time. """ self.__map[x] = None # - - - S e t . c o n t a i n s - - - def contains ( self, x ): """Does self contain x? """ return self.__map.has_key(x) # - - - S e t . d e l e t e - - - def delete ( self, x ): """Delete x from self NB: In the original version, we tested for the existence of x in the map before deleting. But this means that in the common case where we're deleting an element that is in the map, we hash twice. Better to use a try/except block, which only comes into play for the case that x is not in the map. """ try: del self.__map[x] except KeyError: pass # - - - S e t . d i f f - - - def diff ( self, other ): """Set difference, self - other """ return Set ( * [ x for x in self.__map.keys() if x not in other ] ) # - - - S e t . g e n S o r t e d - - - def genSorted ( self, comparator=None ): "Generate elements of self in sorted order." #-- 1 -- if comparator is None: comparator = cmp #-- 2 -- # [ keyList := list of keys in self.__map ] keyList = self.__map.keys() #-- 3 -- # [ keyList := keyList, sorted by comparator ] keyList.sort(comparator) #-- 4 -- # [ generate elements of keyList ] for key in keyList: yield key #-- 5 -- raise StopIteration # - - - S e t . i n t e r s e c t - - - def intersect ( self, other ): "Return the intersection of self and other." return Set( * [ x for x in self.__map.keys() if other.__map.has_key(x) ] ) # - - - S e t . m e m b e r s - - - def members ( self ): """Return all members of self """ return self.__map.keys() # - - - S e t . u n i o n - - - def union ( self, other ): """Return union of self and another set """ return Set(*(self.__map.keys()+other.__map.keys())) # - - - S e t . c o p y - - - def copy ( self ): "Return a copy of self" #-- 1 -- # [ L := a new list of the members of self ] L = self.members() #-- 2 -- # [ return a new Set containing the members of L ] return Set(*L)