1 """Classes to represent arbitrary sets (including sets of sets). 2 3 This module implements sets using dictionaries whose values are 4 ignored. The usual operations (union, intersection, deletion, etc.) 5 are provided as both methods and operators. 6 7 Important: sets are not sequences! While they support 'x in s', 8 'len(s)', and 'for x in s', none of those operations are unique for 9 sequences; for example, mappings support all three as well. The 10 characteristic operation for sequences is subscripting with small 11 integers: s[i], for i in range(len(s)). Sets don't support 12 subscripting at all. Also, sequences allow multiple occurrences and 13 their elements have a definite order; sets on the other hand don't 14 record multiple occurrences and don't remember the order of element 15 insertion (which is why they don't support s[i]). 16 17 The following classes are provided: 18 19 BaseSet -- All the operations common to both mutable and immutable 20 sets. This is an abstract class, not meant to be directly 21 instantiated. 22 23 Set -- Mutable sets, subclass of BaseSet; not hashable. 24 25 ImmutableSet -- Immutable sets, subclass of BaseSet; hashable. 26 An iterable argument is mandatory to create an ImmutableSet. 27 28 _TemporarilyImmutableSet -- A wrapper around a Set, hashable, 29 giving the same hash value as the immutable set equivalent 30 would have. Do not use this class directly. 31 32 Only hashable objects can be added to a Set. In particular, you cannot 33 really add a Set as an element to another Set; if you try, what is 34 actually added is an ImmutableSet built from it (it compares equal to 35 the one you tried adding). 36 37 When you ask if `x in y' where x is a Set and y is a Set or 38 ImmutableSet, x is wrapped into a _TemporarilyImmutableSet z, and 39 what's tested is actually `z in y'. 40 41 """ 42 43 # Code history: 44 # 45 # - Greg V. Wilson wrote the first version, using a different approach 46 # to the mutable/immutable problem, and inheriting from dict. 47 # 48 # - Alex Martelli modified Greg's version to implement the current 49 # Set/ImmutableSet approach, and make the data an attribute. 50 # 51 # - Guido van Rossum rewrote much of the code, made some API changes, 52 # and cleaned up the docstrings. 53 # 54 # - Raymond Hettinger added a number of speedups and other 55 # improvements. 56 57 from itertools import ifilter, ifilterfalse 58 59 __all__ = ['BaseSet', 'Set', 'ImmutableSet'] 60 61 import warnings 62 warnings.warn("the sets module is deprecated", DeprecationWarning, 63 stacklevel=2) 64 65 class BaseSet(object): 66 """Common base class for mutable and immutable sets.""" 67 68 __slots__ = ['_data'] 69 70 # Constructor 71 72 def __init__(self): 73 """This is an abstract class.""" 74 # Don't call this from a concrete subclass! 75 if self.__class__ is BaseSet: 76 raise TypeError, ("BaseSet is an abstract class. " 77 "Use Set or ImmutableSet.") 78 79 # Standard protocols: __len__, __repr__, __str__, __iter__ 80 81 def __len__(self): 82 """Return the number of elements of a set.""" 83 return len(self._data) 84 85 def __repr__(self): 86 """Return string representation of a set. 87 88 This looks like 'Set([<list of elements>])'. 89 """ 90 return self._repr() 91 92 # __str__ is the same as __repr__ 93 __str__ = __repr__ 94 95 def _repr(self, sorted=False): 96 elements = self._data.keys() 97 if sorted: 98 elements.sort() 99 return '%s(%r)' % (self.__class__.__name__, elements) 100 101 def __iter__(self): 102 """Return an iterator over the elements or a set. 103 104 This is the keys iterator for the underlying dict. 105 """ 106 return self._data.iterkeys() 107 108 # Three-way comparison is not supported. However, because __eq__ is 109 # tried before __cmp__, if Set x == Set y, x.__eq__(y) returns True and 110 # then cmp(x, y) returns 0 (Python doesn't actually call __cmp__ in this 111 # case). 112 113 def __cmp__(self, other): 114 raise TypeError, "can't compare sets using cmp()" 115 116 # Equality comparisons using the underlying dicts. Mixed-type comparisons 117 # are allowed here, where Set == z for non-Set z always returns False, 118 # and Set != z always True. This allows expressions like "x in y" to 119 # give the expected result when y is a sequence of mixed types, not 120 # raising a pointless TypeError just because y contains a Set, or x is 121 # a Set and y contain's a non-set ("in" invokes only __eq__). 122 # Subtle: it would be nicer if __eq__ and __ne__ could return 123 # NotImplemented instead of True or False. Then the other comparand 124 # would get a chance to determine the result, and if the other comparand 125 # also returned NotImplemented then it would fall back to object address 126 # comparison (which would always return False for __eq__ and always 127 # True for __ne__). However, that doesn't work, because this type 128 # *also* implements __cmp__: if, e.g., __eq__ returns NotImplemented, 129 # Python tries __cmp__ next, and the __cmp__ here then raises TypeError. 130 131 def __eq__(self, other): 132 if isinstance(other, BaseSet): 133 return self._data == other._data 134 else: 135 return False 136 137 def __ne__(self, other): 138 if isinstance(other, BaseSet): 139 return self._data != other._data 140 else: 141 return True 142 143 # Copying operations 144 145 def copy(self): 146 """Return a shallow copy of a set.""" 147 result = self.__class__() 148 result._data.update(self._data) 149 return result 150 151 __copy__ = copy # For the copy module 152 153 def __deepcopy__(self, memo): 154 """Return a deep copy of a set; used by copy module.""" 155 # This pre-creates the result and inserts it in the memo 156 # early, in case the deep copy recurses into another reference 157 # to this same set. A set can't be an element of itself, but 158 # it can certainly contain an object that has a reference to 159 # itself. 160 from copy import deepcopy 161 result = self.__class__() 162 memo[id(self)] = result 163 data = result._data 164 value = True 165 for elt in self: 166 data[deepcopy(elt, memo)] = value 167 return result 168 169 # Standard set operations: union, intersection, both differences. 170 # Each has an operator version (e.g. __or__, invoked with |) and a 171 # method version (e.g. union). 172 # Subtle: Each pair requires distinct code so that the outcome is 173 # correct when the type of other isn't suitable. For example, if 174 # we did "union = __or__" instead, then Set().union(3) would return 175 # NotImplemented instead of raising TypeError (albeit that *why* it 176 # raises TypeError as-is is also a bit subtle). 177 178 def __or__(self, other): 179 """Return the union of two sets as a new set. 180 181 (I.e. all elements that are in either set.) 182 """ 183 if not isinstance(other, BaseSet): 184 return NotImplemented 185 return self.union(other) 186 187 def union(self, other): 188 """Return the union of two sets as a new set. 189 190 (I.e. all elements that are in either set.) 191 """ 192 result = self.__class__(self) 193 result._update(other) 194 return result 195 196 def __and__(self, other): 197 """Return the intersection of two sets as a new set. 198 199 (I.e. all elements that are in both sets.) 200 """ 201 if not isinstance(other, BaseSet): 202 return NotImplemented 203 return self.intersection(other) 204 205 def intersection(self, other): 206 """Return the intersection of two sets as a new set. 207 208 (I.e. all elements that are in both sets.) 209 """ 210 if not isinstance(other, BaseSet): 211 other = Set(other) 212 if len(self) <= len(other): 213 little, big = self, other 214 else: 215 little, big = other, self 216 common = ifilter(big._data.__contains__, little) 217 return self.__class__(common) 218 219 def __xor__(self, other): 220 """Return the symmetric difference of two sets as a new set. 221 222 (I.e. all elements that are in exactly one of the sets.) 223 """ 224 if not isinstance(other, BaseSet): 225 return NotImplemented 226 return self.symmetric_difference(other) 227 228 def symmetric_difference(self, other): 229 """Return the symmetric difference of two sets as a new set. 230 231 (I.e. all elements that are in exactly one of the sets.) 232 """ 233 result = self.__class__() 234 data = result._data 235 value = True 236 selfdata = self._data 237 try: 238 otherdata = other._data 239 except AttributeError: 240 otherdata = Set(other)._data 241 for elt in ifilterfalse(otherdata.__contains__, selfdata): 242 data[elt] = value 243 for elt in ifilterfalse(selfdata.__contains__, otherdata): 244 data[elt] = value 245 return result 246 247 def __sub__(self, other): 248 """Return the difference of two sets as a new Set. 249 250 (I.e. all elements that are in this set and not in the other.) 251 """ 252 if not isinstance(other, BaseSet): 253 return NotImplemented 254 return self.difference(other) 255 256 def difference(self, other): 257 """Return the difference of two sets as a new Set. 258 259 (I.e. all elements that are in this set and not in the other.) 260 """ 261 result = self.__class__() 262 data = result._data 263 try: 264 otherdata = other._data 265 except AttributeError: 266 otherdata = Set(other)._data 267 value = True 268 for elt in ifilterfalse(otherdata.__contains__, self): 269 data[elt] = value 270 return result 271 272 # Membership test 273 274 def __contains__(self, element): 275 """Report whether an element is a member of a set. 276 277 (Called in response to the expression `element in self'.) 278 """ 279 try: 280 return element in self._data 281 except TypeError: 282 transform = getattr(element, "__as_temporarily_immutable__", None) 283 if transform is None: 284 raise # re-raise the TypeError exception we caught 285 return transform() in self._data 286 287 # Subset and superset test 288 289 def issubset(self, other): 290 """Report whether another set contains this set.""" 291 self._binary_sanity_check(other) 292 if len(self) > len(other): # Fast check for obvious cases 293 return False 294 for elt in ifilterfalse(other._data.__contains__, self): 295 return False 296 return True 297 298 def issuperset(self, other): 299 """Report whether this set contains another set.""" 300 self._binary_sanity_check(other) 301 if len(self) < len(other): # Fast check for obvious cases 302 return False 303 for elt in ifilterfalse(self._data.__contains__, other): 304 return False 305 return True 306 307 # Inequality comparisons using the is-subset relation. 308 __le__ = issubset 309 __ge__ = issuperset 310 311 def __lt__(self, other): 312 self._binary_sanity_check(other) 313 return len(self) < len(other) and self.issubset(other) 314 315 def __gt__(self, other): 316 self._binary_sanity_check(other) 317 return len(self) > len(other) and self.issuperset(other) 318 319 # We inherit object.__hash__, so we must deny this explicitly 320 __hash__ = None 321 322 # Assorted helpers 323 324 def _binary_sanity_check(self, other): 325 # Check that the other argument to a binary operation is also 326 # a set, raising a TypeError otherwise. 327 if not isinstance(other, BaseSet): 328 raise TypeError, "Binary operation only permitted between sets" 329 330 def _compute_hash(self): 331 # Calculate hash code for a set by xor'ing the hash codes of 332 # the elements. This ensures that the hash code does not depend 333 # on the order in which elements are added to the set. This is 334 # not called __hash__ because a BaseSet should not be hashable; 335 # only an ImmutableSet is hashable. 336 result = 0 337 for elt in self: 338 result ^= hash(elt) 339 return result 340 341 def _update(self, iterable): 342 # The main loop for update() and the subclass __init__() methods. 343 data = self._data 344 345 # Use the fast update() method when a dictionary is available. 346 if isinstance(iterable, BaseSet): 347 data.update(iterable._data) 348 return 349 350 value = True 351 352 if type(iterable) in (list, tuple, xrange): 353 # Optimized: we know that __iter__() and next() can't 354 # raise TypeError, so we can move 'try:' out of the loop. 355 it = iter(iterable) 356 while True: 357 try: 358 for element in it: 359 data[element] = value 360 return 361 except TypeError: 362 transform = getattr(element, "__as_immutable__", None) 363 if transform is None: 364 raise # re-raise the TypeError exception we caught 365 data[transform()] = value 366 else: 367 # Safe: only catch TypeError where intended 368 for element in iterable: 369 try: 370 data[element] = value 371 except TypeError: 372 transform = getattr(element, "__as_immutable__", None) 373 if transform is None: 374 raise # re-raise the TypeError exception we caught 375 data[transform()] = value 376 377 378 class ImmutableSet(BaseSet): 379 """Immutable set class.""" 380 381 __slots__ = ['_hashcode'] 382 383 # BaseSet + hashing 384 385 def __init__(self, iterable=None): 386 """Construct an immutable set from an optional iterable.""" 387 self._hashcode = None 388 self._data = {} 389 if iterable is not None: 390 self._update(iterable) 391 392 def __hash__(self): 393 if self._hashcode is None: 394 self._hashcode = self._compute_hash() 395 return self._hashcode 396 397 def __getstate__(self): 398 return self._data, self._hashcode 399 400 def __setstate__(self, state): 401 self._data, self._hashcode = state 402 403 class Set(BaseSet): 404 """ Mutable set class.""" 405 406 __slots__ = [] 407 408 # BaseSet + operations requiring mutability; no hashing 409 410 def __init__(self, iterable=None): 411 """Construct a set from an optional iterable.""" 412 self._data = {} 413 if iterable is not None: 414 self._update(iterable) 415 416 def __getstate__(self): 417 # getstate's results are ignored if it is not 418 return self._data, 419 420 def __setstate__(self, data): 421 self._data, = data 422 423 # In-place union, intersection, differences. 424 # Subtle: The xyz_update() functions deliberately return None, 425 # as do all mutating operations on built-in container types. 426 # The __xyz__ spellings have to return self, though. 427 428 def __ior__(self, other): 429 """Update a set with the union of itself and another.""" 430 self._binary_sanity_check(other) 431 self._data.update(other._data) 432 return self 433 434 def union_update(self, other): 435 """Update a set with the union of itself and another.""" 436 self._update(other) 437 438 def __iand__(self, other): 439 """Update a set with the intersection of itself and another.""" 440 self._binary_sanity_check(other) 441 self._data = (self & other)._data 442 return self 443 444 def intersection_update(self, other): 445 """Update a set with the intersection of itself and another.""" 446 if isinstance(other, BaseSet): 447 self &= other 448 else: 449 self._data = (self.intersection(other))._data 450 451 def __ixor__(self, other): 452 """Update a set with the symmetric difference of itself and another.""" 453 self._binary_sanity_check(other) 454 self.symmetric_difference_update(other) 455 return self 456 457 def symmetric_difference_update(self, other): 458 """Update a set with the symmetric difference of itself and another.""" 459 data = self._data 460 value = True 461 if not isinstance(other, BaseSet): 462 other = Set(other) 463 if self is other: 464 self.clear() 465 for elt in other: 466 if elt in data: 467 del data[elt] 468 else: 469 data[elt] = value 470 471 def __isub__(self, other): 472 """Remove all elements of another set from this set.""" 473 self._binary_sanity_check(other) 474 self.difference_update(other) 475 return self 476 477 def difference_update(self, other): 478 """Remove all elements of another set from this set.""" 479 data = self._data 480 if not isinstance(other, BaseSet): 481 other = Set(other) 482 if self is other: 483 self.clear() 484 for elt in ifilter(data.__contains__, other): 485 del data[elt] 486 487 # Python dict-like mass mutations: update, clear 488 489 def update(self, iterable): 490 """Add all values from an iterable (such as a list or file).""" 491 self._update(iterable) 492 493 def clear(self): 494 """Remove all elements from this set.""" 495 self._data.clear() 496 497 # Single-element mutations: add, remove, discard 498 499 def add(self, element): 500 """Add an element to a set. 501 502 This has no effect if the element is already present. 503 """ 504 try: 505 self._data[element] = True 506 except TypeError: 507 transform = getattr(element, "__as_immutable__", None) 508 if transform is None: 509 raise # re-raise the TypeError exception we caught 510 self._data[transform()] = True 511 512 def remove(self, element): 513 """Remove an element from a set; it must be a member. 514 515 If the element is not a member, raise a KeyError. 516 """ 517 try: 518 del self._data[element] 519 except TypeError: 520 transform = getattr(element, "__as_temporarily_immutable__", None) 521 if transform is None: 522 raise # re-raise the TypeError exception we caught 523 del self._data[transform()] 524 525 def discard(self, element): 526 """Remove an element from a set if it is a member. 527 528 If the element is not a member, do nothing. 529 """ 530 try: 531 self.remove(element) 532 except KeyError: 533 pass 534 535 def pop(self): 536 """Remove and return an arbitrary set element.""" 537 return self._data.popitem()[0] 538 539 def __as_immutable__(self): 540 # Return a copy of self as an immutable set 541 return ImmutableSet(self) 542 543 def __as_temporarily_immutable__(self): 544 # Return self wrapped in a temporarily immutable set 545 return _TemporarilyImmutableSet(self) 546 547 548 class _TemporarilyImmutableSet(BaseSet): 549 # Wrap a mutable set as if it was temporarily immutable. 550 # This only supplies hashing and equality comparisons. 551 552 def __init__(self, set): 553 self._set = set 554 self._data = set._data # Needed by ImmutableSet.__eq__() 555 556 def __hash__(self): 557 return self._set._compute_hash() 558