Home | History | Annotate | Download | only in Lib
      1 """Classes to represent arbitrary sets (including sets of sets).
      2 
      3 This module implements sets using dictionaries whose values are
      4 ignored.  The usual operations (union, intersection, deletion, etc.)
      5 are provided as both methods and operators.
      6 
      7 Important: sets are not sequences!  While they support 'x in s',
      8 'len(s)', and 'for x in s', none of those operations are unique for
      9 sequences; for example, mappings support all three as well.  The
     10 characteristic operation for sequences is subscripting with small
     11 integers: s[i], for i in range(len(s)).  Sets don't support
     12 subscripting at all.  Also, sequences allow multiple occurrences and
     13 their elements have a definite order; sets on the other hand don't
     14 record multiple occurrences and don't remember the order of element
     15 insertion (which is why they don't support s[i]).
     16 
     17 The following classes are provided:
     18 
     19 BaseSet -- All the operations common to both mutable and immutable
     20     sets. This is an abstract class, not meant to be directly
     21     instantiated.
     22 
     23 Set -- Mutable sets, subclass of BaseSet; not hashable.
     24 
     25 ImmutableSet -- Immutable sets, subclass of BaseSet; hashable.
     26     An iterable argument is mandatory to create an ImmutableSet.
     27 
     28 _TemporarilyImmutableSet -- A wrapper around a Set, hashable,
     29     giving the same hash value as the immutable set equivalent
     30     would have.  Do not use this class directly.
     31 
     32 Only hashable objects can be added to a Set. In particular, you cannot
     33 really add a Set as an element to another Set; if you try, what is
     34 actually added is an ImmutableSet built from it (it compares equal to
     35 the one you tried adding).
     36 
     37 When you ask if `x in y' where x is a Set and y is a Set or
     38 ImmutableSet, x is wrapped into a _TemporarilyImmutableSet z, and
     39 what's tested is actually `z in y'.
     40 
     41 """
     42 
     43 # Code history:
     44 #
     45 # - Greg V. Wilson wrote the first version, using a different approach
     46 #   to the mutable/immutable problem, and inheriting from dict.
     47 #
     48 # - Alex Martelli modified Greg's version to implement the current
     49 #   Set/ImmutableSet approach, and make the data an attribute.
     50 #
     51 # - Guido van Rossum rewrote much of the code, made some API changes,
     52 #   and cleaned up the docstrings.
     53 #
     54 # - Raymond Hettinger added a number of speedups and other
     55 #   improvements.
     56 
     57 from itertools import ifilter, ifilterfalse
     58 
     59 __all__ = ['BaseSet', 'Set', 'ImmutableSet']
     60 
     61 import warnings
     62 warnings.warn("the sets module is deprecated", DeprecationWarning,
     63                 stacklevel=2)
     64 
     65 class BaseSet(object):
     66     """Common base class for mutable and immutable sets."""
     67 
     68     __slots__ = ['_data']
     69 
     70     # Constructor
     71 
     72     def __init__(self):
     73         """This is an abstract class."""
     74         # Don't call this from a concrete subclass!
     75         if self.__class__ is BaseSet:
     76             raise TypeError, ("BaseSet is an abstract class.  "
     77                               "Use Set or ImmutableSet.")
     78 
     79     # Standard protocols: __len__, __repr__, __str__, __iter__
     80 
     81     def __len__(self):
     82         """Return the number of elements of a set."""
     83         return len(self._data)
     84 
     85     def __repr__(self):
     86         """Return string representation of a set.
     87 
     88         This looks like 'Set([<list of elements>])'.
     89         """
     90         return self._repr()
     91 
     92     # __str__ is the same as __repr__
     93     __str__ = __repr__
     94 
     95     def _repr(self, sorted=False):
     96         elements = self._data.keys()
     97         if sorted:
     98             elements.sort()
     99         return '%s(%r)' % (self.__class__.__name__, elements)
    100 
    101     def __iter__(self):
    102         """Return an iterator over the elements or a set.
    103 
    104         This is the keys iterator for the underlying dict.
    105         """
    106         return self._data.iterkeys()
    107 
    108     # Three-way comparison is not supported.  However, because __eq__ is
    109     # tried before __cmp__, if Set x == Set y, x.__eq__(y) returns True and
    110     # then cmp(x, y) returns 0 (Python doesn't actually call __cmp__ in this
    111     # case).
    112 
    113     def __cmp__(self, other):
    114         raise TypeError, "can't compare sets using cmp()"
    115 
    116     # Equality comparisons using the underlying dicts.  Mixed-type comparisons
    117     # are allowed here, where Set == z for non-Set z always returns False,
    118     # and Set != z always True.  This allows expressions like "x in y" to
    119     # give the expected result when y is a sequence of mixed types, not
    120     # raising a pointless TypeError just because y contains a Set, or x is
    121     # a Set and y contain's a non-set ("in" invokes only __eq__).
    122     # Subtle:  it would be nicer if __eq__ and __ne__ could return
    123     # NotImplemented instead of True or False.  Then the other comparand
    124     # would get a chance to determine the result, and if the other comparand
    125     # also returned NotImplemented then it would fall back to object address
    126     # comparison (which would always return False for __eq__ and always
    127     # True for __ne__).  However, that doesn't work, because this type
    128     # *also* implements __cmp__:  if, e.g., __eq__ returns NotImplemented,
    129     # Python tries __cmp__ next, and the __cmp__ here then raises TypeError.
    130 
    131     def __eq__(self, other):
    132         if isinstance(other, BaseSet):
    133             return self._data == other._data
    134         else:
    135             return False
    136 
    137     def __ne__(self, other):
    138         if isinstance(other, BaseSet):
    139             return self._data != other._data
    140         else:
    141             return True
    142 
    143     # Copying operations
    144 
    145     def copy(self):
    146         """Return a shallow copy of a set."""
    147         result = self.__class__()
    148         result._data.update(self._data)
    149         return result
    150 
    151     __copy__ = copy # For the copy module
    152 
    153     def __deepcopy__(self, memo):
    154         """Return a deep copy of a set; used by copy module."""
    155         # This pre-creates the result and inserts it in the memo
    156         # early, in case the deep copy recurses into another reference
    157         # to this same set.  A set can't be an element of itself, but
    158         # it can certainly contain an object that has a reference to
    159         # itself.
    160         from copy import deepcopy
    161         result = self.__class__()
    162         memo[id(self)] = result
    163         data = result._data
    164         value = True
    165         for elt in self:
    166             data[deepcopy(elt, memo)] = value
    167         return result
    168 
    169     # Standard set operations: union, intersection, both differences.
    170     # Each has an operator version (e.g. __or__, invoked with |) and a
    171     # method version (e.g. union).
    172     # Subtle:  Each pair requires distinct code so that the outcome is
    173     # correct when the type of other isn't suitable.  For example, if
    174     # we did "union = __or__" instead, then Set().union(3) would return
    175     # NotImplemented instead of raising TypeError (albeit that *why* it
    176     # raises TypeError as-is is also a bit subtle).
    177 
    178     def __or__(self, other):
    179         """Return the union of two sets as a new set.
    180 
    181         (I.e. all elements that are in either set.)
    182         """
    183         if not isinstance(other, BaseSet):
    184             return NotImplemented
    185         return self.union(other)
    186 
    187     def union(self, other):
    188         """Return the union of two sets as a new set.
    189 
    190         (I.e. all elements that are in either set.)
    191         """
    192         result = self.__class__(self)
    193         result._update(other)
    194         return result
    195 
    196     def __and__(self, other):
    197         """Return the intersection of two sets as a new set.
    198 
    199         (I.e. all elements that are in both sets.)
    200         """
    201         if not isinstance(other, BaseSet):
    202             return NotImplemented
    203         return self.intersection(other)
    204 
    205     def intersection(self, other):
    206         """Return the intersection of two sets as a new set.
    207 
    208         (I.e. all elements that are in both sets.)
    209         """
    210         if not isinstance(other, BaseSet):
    211             other = Set(other)
    212         if len(self) <= len(other):
    213             little, big = self, other
    214         else:
    215             little, big = other, self
    216         common = ifilter(big._data.__contains__, little)
    217         return self.__class__(common)
    218 
    219     def __xor__(self, other):
    220         """Return the symmetric difference of two sets as a new set.
    221 
    222         (I.e. all elements that are in exactly one of the sets.)
    223         """
    224         if not isinstance(other, BaseSet):
    225             return NotImplemented
    226         return self.symmetric_difference(other)
    227 
    228     def symmetric_difference(self, other):
    229         """Return the symmetric difference of two sets as a new set.
    230 
    231         (I.e. all elements that are in exactly one of the sets.)
    232         """
    233         result = self.__class__()
    234         data = result._data
    235         value = True
    236         selfdata = self._data
    237         try:
    238             otherdata = other._data
    239         except AttributeError:
    240             otherdata = Set(other)._data
    241         for elt in ifilterfalse(otherdata.__contains__, selfdata):
    242             data[elt] = value
    243         for elt in ifilterfalse(selfdata.__contains__, otherdata):
    244             data[elt] = value
    245         return result
    246 
    247     def  __sub__(self, other):
    248         """Return the difference of two sets as a new Set.
    249 
    250         (I.e. all elements that are in this set and not in the other.)
    251         """
    252         if not isinstance(other, BaseSet):
    253             return NotImplemented
    254         return self.difference(other)
    255 
    256     def difference(self, other):
    257         """Return the difference of two sets as a new Set.
    258 
    259         (I.e. all elements that are in this set and not in the other.)
    260         """
    261         result = self.__class__()
    262         data = result._data
    263         try:
    264             otherdata = other._data
    265         except AttributeError:
    266             otherdata = Set(other)._data
    267         value = True
    268         for elt in ifilterfalse(otherdata.__contains__, self):
    269             data[elt] = value
    270         return result
    271 
    272     # Membership test
    273 
    274     def __contains__(self, element):
    275         """Report whether an element is a member of a set.
    276 
    277         (Called in response to the expression `element in self'.)
    278         """
    279         try:
    280             return element in self._data
    281         except TypeError:
    282             transform = getattr(element, "__as_temporarily_immutable__", None)
    283             if transform is None:
    284                 raise # re-raise the TypeError exception we caught
    285             return transform() in self._data
    286 
    287     # Subset and superset test
    288 
    289     def issubset(self, other):
    290         """Report whether another set contains this set."""
    291         self._binary_sanity_check(other)
    292         if len(self) > len(other):  # Fast check for obvious cases
    293             return False
    294         for elt in ifilterfalse(other._data.__contains__, self):
    295             return False
    296         return True
    297 
    298     def issuperset(self, other):
    299         """Report whether this set contains another set."""
    300         self._binary_sanity_check(other)
    301         if len(self) < len(other):  # Fast check for obvious cases
    302             return False
    303         for elt in ifilterfalse(self._data.__contains__, other):
    304             return False
    305         return True
    306 
    307     # Inequality comparisons using the is-subset relation.
    308     __le__ = issubset
    309     __ge__ = issuperset
    310 
    311     def __lt__(self, other):
    312         self._binary_sanity_check(other)
    313         return len(self) < len(other) and self.issubset(other)
    314 
    315     def __gt__(self, other):
    316         self._binary_sanity_check(other)
    317         return len(self) > len(other) and self.issuperset(other)
    318 
    319     # We inherit object.__hash__, so we must deny this explicitly
    320     __hash__ = None
    321 
    322     # Assorted helpers
    323 
    324     def _binary_sanity_check(self, other):
    325         # Check that the other argument to a binary operation is also
    326         # a set, raising a TypeError otherwise.
    327         if not isinstance(other, BaseSet):
    328             raise TypeError, "Binary operation only permitted between sets"
    329 
    330     def _compute_hash(self):
    331         # Calculate hash code for a set by xor'ing the hash codes of
    332         # the elements.  This ensures that the hash code does not depend
    333         # on the order in which elements are added to the set.  This is
    334         # not called __hash__ because a BaseSet should not be hashable;
    335         # only an ImmutableSet is hashable.
    336         result = 0
    337         for elt in self:
    338             result ^= hash(elt)
    339         return result
    340 
    341     def _update(self, iterable):
    342         # The main loop for update() and the subclass __init__() methods.
    343         data = self._data
    344 
    345         # Use the fast update() method when a dictionary is available.
    346         if isinstance(iterable, BaseSet):
    347             data.update(iterable._data)
    348             return
    349 
    350         value = True
    351 
    352         if type(iterable) in (list, tuple, xrange):
    353             # Optimized: we know that __iter__() and next() can't
    354             # raise TypeError, so we can move 'try:' out of the loop.
    355             it = iter(iterable)
    356             while True:
    357                 try:
    358                     for element in it:
    359                         data[element] = value
    360                     return
    361                 except TypeError:
    362                     transform = getattr(element, "__as_immutable__", None)
    363                     if transform is None:
    364                         raise # re-raise the TypeError exception we caught
    365                     data[transform()] = value
    366         else:
    367             # Safe: only catch TypeError where intended
    368             for element in iterable:
    369                 try:
    370                     data[element] = value
    371                 except TypeError:
    372                     transform = getattr(element, "__as_immutable__", None)
    373                     if transform is None:
    374                         raise # re-raise the TypeError exception we caught
    375                     data[transform()] = value
    376 
    377 
    378 class ImmutableSet(BaseSet):
    379     """Immutable set class."""
    380 
    381     __slots__ = ['_hashcode']
    382 
    383     # BaseSet + hashing
    384 
    385     def __init__(self, iterable=None):
    386         """Construct an immutable set from an optional iterable."""
    387         self._hashcode = None
    388         self._data = {}
    389         if iterable is not None:
    390             self._update(iterable)
    391 
    392     def __hash__(self):
    393         if self._hashcode is None:
    394             self._hashcode = self._compute_hash()
    395         return self._hashcode
    396 
    397     def __getstate__(self):
    398         return self._data, self._hashcode
    399 
    400     def __setstate__(self, state):
    401         self._data, self._hashcode = state
    402 
    403 class Set(BaseSet):
    404     """ Mutable set class."""
    405 
    406     __slots__ = []
    407 
    408     # BaseSet + operations requiring mutability; no hashing
    409 
    410     def __init__(self, iterable=None):
    411         """Construct a set from an optional iterable."""
    412         self._data = {}
    413         if iterable is not None:
    414             self._update(iterable)
    415 
    416     def __getstate__(self):
    417         # getstate's results are ignored if it is not
    418         return self._data,
    419 
    420     def __setstate__(self, data):
    421         self._data, = data
    422 
    423     # In-place union, intersection, differences.
    424     # Subtle:  The xyz_update() functions deliberately return None,
    425     # as do all mutating operations on built-in container types.
    426     # The __xyz__ spellings have to return self, though.
    427 
    428     def __ior__(self, other):
    429         """Update a set with the union of itself and another."""
    430         self._binary_sanity_check(other)
    431         self._data.update(other._data)
    432         return self
    433 
    434     def union_update(self, other):
    435         """Update a set with the union of itself and another."""
    436         self._update(other)
    437 
    438     def __iand__(self, other):
    439         """Update a set with the intersection of itself and another."""
    440         self._binary_sanity_check(other)
    441         self._data = (self & other)._data
    442         return self
    443 
    444     def intersection_update(self, other):
    445         """Update a set with the intersection of itself and another."""
    446         if isinstance(other, BaseSet):
    447             self &= other
    448         else:
    449             self._data = (self.intersection(other))._data
    450 
    451     def __ixor__(self, other):
    452         """Update a set with the symmetric difference of itself and another."""
    453         self._binary_sanity_check(other)
    454         self.symmetric_difference_update(other)
    455         return self
    456 
    457     def symmetric_difference_update(self, other):
    458         """Update a set with the symmetric difference of itself and another."""
    459         data = self._data
    460         value = True
    461         if not isinstance(other, BaseSet):
    462             other = Set(other)
    463         if self is other:
    464             self.clear()
    465         for elt in other:
    466             if elt in data:
    467                 del data[elt]
    468             else:
    469                 data[elt] = value
    470 
    471     def __isub__(self, other):
    472         """Remove all elements of another set from this set."""
    473         self._binary_sanity_check(other)
    474         self.difference_update(other)
    475         return self
    476 
    477     def difference_update(self, other):
    478         """Remove all elements of another set from this set."""
    479         data = self._data
    480         if not isinstance(other, BaseSet):
    481             other = Set(other)
    482         if self is other:
    483             self.clear()
    484         for elt in ifilter(data.__contains__, other):
    485             del data[elt]
    486 
    487     # Python dict-like mass mutations: update, clear
    488 
    489     def update(self, iterable):
    490         """Add all values from an iterable (such as a list or file)."""
    491         self._update(iterable)
    492 
    493     def clear(self):
    494         """Remove all elements from this set."""
    495         self._data.clear()
    496 
    497     # Single-element mutations: add, remove, discard
    498 
    499     def add(self, element):
    500         """Add an element to a set.
    501 
    502         This has no effect if the element is already present.
    503         """
    504         try:
    505             self._data[element] = True
    506         except TypeError:
    507             transform = getattr(element, "__as_immutable__", None)
    508             if transform is None:
    509                 raise # re-raise the TypeError exception we caught
    510             self._data[transform()] = True
    511 
    512     def remove(self, element):
    513         """Remove an element from a set; it must be a member.
    514 
    515         If the element is not a member, raise a KeyError.
    516         """
    517         try:
    518             del self._data[element]
    519         except TypeError:
    520             transform = getattr(element, "__as_temporarily_immutable__", None)
    521             if transform is None:
    522                 raise # re-raise the TypeError exception we caught
    523             del self._data[transform()]
    524 
    525     def discard(self, element):
    526         """Remove an element from a set if it is a member.
    527 
    528         If the element is not a member, do nothing.
    529         """
    530         try:
    531             self.remove(element)
    532         except KeyError:
    533             pass
    534 
    535     def pop(self):
    536         """Remove and return an arbitrary set element."""
    537         return self._data.popitem()[0]
    538 
    539     def __as_immutable__(self):
    540         # Return a copy of self as an immutable set
    541         return ImmutableSet(self)
    542 
    543     def __as_temporarily_immutable__(self):
    544         # Return self wrapped in a temporarily immutable set
    545         return _TemporarilyImmutableSet(self)
    546 
    547 
    548 class _TemporarilyImmutableSet(BaseSet):
    549     # Wrap a mutable set as if it was temporarily immutable.
    550     # This only supplies hashing and equality comparisons.
    551 
    552     def __init__(self, set):
    553         self._set = set
    554         self._data = set._data  # Needed by ImmutableSet.__eq__()
    555 
    556     def __hash__(self):
    557         return self._set._compute_hash()
    558