1 __all__ = ['Counter', 'deque', 'defaultdict', 'namedtuple', 'OrderedDict'] 2 # For bootstrapping reasons, the collection ABCs are defined in _abcoll.py. 3 # They should however be considered an integral part of collections.py. 4 from _abcoll import * 5 import _abcoll 6 __all__ += _abcoll.__all__ 7 8 from _collections import deque, defaultdict 9 from operator import itemgetter as _itemgetter, eq as _eq 10 from keyword import iskeyword as _iskeyword 11 import sys as _sys 12 import heapq as _heapq 13 from itertools import repeat as _repeat, chain as _chain, starmap as _starmap 14 from itertools import imap as _imap 15 16 try: 17 from thread import get_ident as _get_ident 18 except ImportError: 19 from dummy_thread import get_ident as _get_ident 20 21 22 ################################################################################ 23 ### OrderedDict 24 ################################################################################ 25 26 class OrderedDict(dict): 27 'Dictionary that remembers insertion order' 28 # An inherited dict maps keys to values. 29 # The inherited dict provides __getitem__, __len__, __contains__, and get. 30 # The remaining methods are order-aware. 31 # Big-O running times for all methods are the same as regular dictionaries. 32 33 # The internal self.__map dict maps keys to links in a doubly linked list. 34 # The circular doubly linked list starts and ends with a sentinel element. 35 # The sentinel element never gets deleted (this simplifies the algorithm). 36 # Each link is stored as a list of length three: [PREV, NEXT, KEY]. 37 38 def __init__(self, *args, **kwds): 39 '''Initialize an ordered dictionary. The signature is the same as 40 regular dictionaries, but keyword arguments are not recommended because 41 their insertion order is arbitrary. 42 43 ''' 44 if len(args) > 1: 45 raise TypeError('expected at most 1 arguments, got %d' % len(args)) 46 try: 47 self.__root 48 except AttributeError: 49 self.__root = root = [] # sentinel node 50 root[:] = [root, root, None] 51 self.__map = {} 52 self.__update(*args, **kwds) 53 54 def __setitem__(self, key, value, dict_setitem=dict.__setitem__): 55 'od.__setitem__(i, y) <==> od[i]=y' 56 # Setting a new item creates a new link at the end of the linked list, 57 # and the inherited dictionary is updated with the new key/value pair. 58 if key not in self: 59 root = self.__root 60 last = root[0] 61 last[1] = root[0] = self.__map[key] = [last, root, key] 62 return dict_setitem(self, key, value) 63 64 def __delitem__(self, key, dict_delitem=dict.__delitem__): 65 'od.__delitem__(y) <==> del od[y]' 66 # Deleting an existing item uses self.__map to find the link which gets 67 # removed by updating the links in the predecessor and successor nodes. 68 dict_delitem(self, key) 69 link_prev, link_next, _ = self.__map.pop(key) 70 link_prev[1] = link_next # update link_prev[NEXT] 71 link_next[0] = link_prev # update link_next[PREV] 72 73 def __iter__(self): 74 'od.__iter__() <==> iter(od)' 75 # Traverse the linked list in order. 76 root = self.__root 77 curr = root[1] # start at the first node 78 while curr is not root: 79 yield curr[2] # yield the curr[KEY] 80 curr = curr[1] # move to next node 81 82 def __reversed__(self): 83 'od.__reversed__() <==> reversed(od)' 84 # Traverse the linked list in reverse order. 85 root = self.__root 86 curr = root[0] # start at the last node 87 while curr is not root: 88 yield curr[2] # yield the curr[KEY] 89 curr = curr[0] # move to previous node 90 91 def clear(self): 92 'od.clear() -> None. Remove all items from od.' 93 root = self.__root 94 root[:] = [root, root, None] 95 self.__map.clear() 96 dict.clear(self) 97 98 # -- the following methods do not depend on the internal structure -- 99 100 def keys(self): 101 'od.keys() -> list of keys in od' 102 return list(self) 103 104 def values(self): 105 'od.values() -> list of values in od' 106 return [self[key] for key in self] 107 108 def items(self): 109 'od.items() -> list of (key, value) pairs in od' 110 return [(key, self[key]) for key in self] 111 112 def iterkeys(self): 113 'od.iterkeys() -> an iterator over the keys in od' 114 return iter(self) 115 116 def itervalues(self): 117 'od.itervalues -> an iterator over the values in od' 118 for k in self: 119 yield self[k] 120 121 def iteritems(self): 122 'od.iteritems -> an iterator over the (key, value) pairs in od' 123 for k in self: 124 yield (k, self[k]) 125 126 update = MutableMapping.update 127 128 __update = update # let subclasses override update without breaking __init__ 129 130 __marker = object() 131 132 def pop(self, key, default=__marker): 133 '''od.pop(k[,d]) -> v, remove specified key and return the corresponding 134 value. If key is not found, d is returned if given, otherwise KeyError 135 is raised. 136 137 ''' 138 if key in self: 139 result = self[key] 140 del self[key] 141 return result 142 if default is self.__marker: 143 raise KeyError(key) 144 return default 145 146 def setdefault(self, key, default=None): 147 'od.setdefault(k[,d]) -> od.get(k,d), also set od[k]=d if k not in od' 148 if key in self: 149 return self[key] 150 self[key] = default 151 return default 152 153 def popitem(self, last=True): 154 '''od.popitem() -> (k, v), return and remove a (key, value) pair. 155 Pairs are returned in LIFO order if last is true or FIFO order if false. 156 157 ''' 158 if not self: 159 raise KeyError('dictionary is empty') 160 key = next(reversed(self) if last else iter(self)) 161 value = self.pop(key) 162 return key, value 163 164 def __repr__(self, _repr_running={}): 165 'od.__repr__() <==> repr(od)' 166 call_key = id(self), _get_ident() 167 if call_key in _repr_running: 168 return '...' 169 _repr_running[call_key] = 1 170 try: 171 if not self: 172 return '%s()' % (self.__class__.__name__,) 173 return '%s(%r)' % (self.__class__.__name__, self.items()) 174 finally: 175 del _repr_running[call_key] 176 177 def __reduce__(self): 178 'Return state information for pickling' 179 items = [[k, self[k]] for k in self] 180 inst_dict = vars(self).copy() 181 for k in vars(OrderedDict()): 182 inst_dict.pop(k, None) 183 if inst_dict: 184 return (self.__class__, (items,), inst_dict) 185 return self.__class__, (items,) 186 187 def copy(self): 188 'od.copy() -> a shallow copy of od' 189 return self.__class__(self) 190 191 @classmethod 192 def fromkeys(cls, iterable, value=None): 193 '''OD.fromkeys(S[, v]) -> New ordered dictionary with keys from S. 194 If not specified, the value defaults to None. 195 196 ''' 197 self = cls() 198 for key in iterable: 199 self[key] = value 200 return self 201 202 def __eq__(self, other): 203 '''od.__eq__(y) <==> od==y. Comparison to another OD is order-sensitive 204 while comparison to a regular mapping is order-insensitive. 205 206 ''' 207 if isinstance(other, OrderedDict): 208 return dict.__eq__(self, other) and all(_imap(_eq, self, other)) 209 return dict.__eq__(self, other) 210 211 def __ne__(self, other): 212 'od.__ne__(y) <==> od!=y' 213 return not self == other 214 215 # -- the following methods support python 3.x style dictionary views -- 216 217 def viewkeys(self): 218 "od.viewkeys() -> a set-like object providing a view on od's keys" 219 return KeysView(self) 220 221 def viewvalues(self): 222 "od.viewvalues() -> an object providing a view on od's values" 223 return ValuesView(self) 224 225 def viewitems(self): 226 "od.viewitems() -> a set-like object providing a view on od's items" 227 return ItemsView(self) 228 229 230 ################################################################################ 231 ### namedtuple 232 ################################################################################ 233 234 _class_template = '''\ 235 class {typename}(tuple): 236 '{typename}({arg_list})' 237 238 __slots__ = () 239 240 _fields = {field_names!r} 241 242 def __new__(_cls, {arg_list}): 243 'Create new instance of {typename}({arg_list})' 244 return _tuple.__new__(_cls, ({arg_list})) 245 246 @classmethod 247 def _make(cls, iterable, new=tuple.__new__, len=len): 248 'Make a new {typename} object from a sequence or iterable' 249 result = new(cls, iterable) 250 if len(result) != {num_fields:d}: 251 raise TypeError('Expected {num_fields:d} arguments, got %d' % len(result)) 252 return result 253 254 def __repr__(self): 255 'Return a nicely formatted representation string' 256 return '{typename}({repr_fmt})' % self 257 258 def _asdict(self): 259 'Return a new OrderedDict which maps field names to their values' 260 return OrderedDict(zip(self._fields, self)) 261 262 def _replace(_self, **kwds): 263 'Return a new {typename} object replacing specified fields with new values' 264 result = _self._make(map(kwds.pop, {field_names!r}, _self)) 265 if kwds: 266 raise ValueError('Got unexpected field names: %r' % kwds.keys()) 267 return result 268 269 def __getnewargs__(self): 270 'Return self as a plain tuple. Used by copy and pickle.' 271 return tuple(self) 272 273 {field_defs} 274 ''' 275 276 _repr_template = '{name}=%r' 277 278 _field_template = '''\ 279 {name} = _property(_itemgetter({index:d}), doc='Alias for field number {index:d}') 280 ''' 281 282 def namedtuple(typename, field_names, verbose=False, rename=False): 283 """Returns a new subclass of tuple with named fields. 284 285 >>> Point = namedtuple('Point', ['x', 'y']) 286 >>> Point.__doc__ # docstring for the new class 287 'Point(x, y)' 288 >>> p = Point(11, y=22) # instantiate with positional args or keywords 289 >>> p[0] + p[1] # indexable like a plain tuple 290 33 291 >>> x, y = p # unpack like a regular tuple 292 >>> x, y 293 (11, 22) 294 >>> p.x + p.y # fields also accessable by name 295 33 296 >>> d = p._asdict() # convert to a dictionary 297 >>> d['x'] 298 11 299 >>> Point(**d) # convert from a dictionary 300 Point(x=11, y=22) 301 >>> p._replace(x=100) # _replace() is like str.replace() but targets named fields 302 Point(x=100, y=22) 303 304 """ 305 306 # Validate the field names. At the user's option, either generate an error 307 # message or automatically replace the field name with a valid name. 308 if isinstance(field_names, basestring): 309 field_names = field_names.replace(',', ' ').split() 310 field_names = map(str, field_names) 311 if rename: 312 seen = set() 313 for index, name in enumerate(field_names): 314 if (not all(c.isalnum() or c=='_' for c in name) 315 or _iskeyword(name) 316 or not name 317 or name[0].isdigit() 318 or name.startswith('_') 319 or name in seen): 320 field_names[index] = '_%d' % index 321 seen.add(name) 322 for name in [typename] + field_names: 323 if not all(c.isalnum() or c=='_' for c in name): 324 raise ValueError('Type names and field names can only contain ' 325 'alphanumeric characters and underscores: %r' % name) 326 if _iskeyword(name): 327 raise ValueError('Type names and field names cannot be a ' 328 'keyword: %r' % name) 329 if name[0].isdigit(): 330 raise ValueError('Type names and field names cannot start with ' 331 'a number: %r' % name) 332 seen = set() 333 for name in field_names: 334 if name.startswith('_') and not rename: 335 raise ValueError('Field names cannot start with an underscore: ' 336 '%r' % name) 337 if name in seen: 338 raise ValueError('Encountered duplicate field name: %r' % name) 339 seen.add(name) 340 341 # Fill-in the class template 342 class_definition = _class_template.format( 343 typename = typename, 344 field_names = tuple(field_names), 345 num_fields = len(field_names), 346 arg_list = repr(tuple(field_names)).replace("'", "")[1:-1], 347 repr_fmt = ', '.join(_repr_template.format(name=name) 348 for name in field_names), 349 field_defs = '\n'.join(_field_template.format(index=index, name=name) 350 for index, name in enumerate(field_names)) 351 ) 352 if verbose: 353 print class_definition 354 355 # Execute the template string in a temporary namespace and support 356 # tracing utilities by setting a value for frame.f_globals['__name__'] 357 namespace = dict(_itemgetter=_itemgetter, __name__='namedtuple_%s' % typename, 358 OrderedDict=OrderedDict, _property=property, _tuple=tuple) 359 try: 360 exec class_definition in namespace 361 except SyntaxError as e: 362 raise SyntaxError(e.message + ':\n' + class_definition) 363 result = namespace[typename] 364 365 # For pickling to work, the __module__ variable needs to be set to the frame 366 # where the named tuple is created. Bypass this step in enviroments where 367 # sys._getframe is not defined (Jython for example) or sys._getframe is not 368 # defined for arguments greater than 0 (IronPython). 369 try: 370 result.__module__ = _sys._getframe(1).f_globals.get('__name__', '__main__') 371 except (AttributeError, ValueError): 372 pass 373 374 return result 375 376 377 ######################################################################## 378 ### Counter 379 ######################################################################## 380 381 class Counter(dict): 382 '''Dict subclass for counting hashable items. Sometimes called a bag 383 or multiset. Elements are stored as dictionary keys and their counts 384 are stored as dictionary values. 385 386 >>> c = Counter('abcdeabcdabcaba') # count elements from a string 387 388 >>> c.most_common(3) # three most common elements 389 [('a', 5), ('b', 4), ('c', 3)] 390 >>> sorted(c) # list all unique elements 391 ['a', 'b', 'c', 'd', 'e'] 392 >>> ''.join(sorted(c.elements())) # list elements with repetitions 393 'aaaaabbbbcccdde' 394 >>> sum(c.values()) # total of all counts 395 15 396 397 >>> c['a'] # count of letter 'a' 398 5 399 >>> for elem in 'shazam': # update counts from an iterable 400 ... c[elem] += 1 # by adding 1 to each element's count 401 >>> c['a'] # now there are seven 'a' 402 7 403 >>> del c['b'] # remove all 'b' 404 >>> c['b'] # now there are zero 'b' 405 0 406 407 >>> d = Counter('simsalabim') # make another counter 408 >>> c.update(d) # add in the second counter 409 >>> c['a'] # now there are nine 'a' 410 9 411 412 >>> c.clear() # empty the counter 413 >>> c 414 Counter() 415 416 Note: If a count is set to zero or reduced to zero, it will remain 417 in the counter until the entry is deleted or the counter is cleared: 418 419 >>> c = Counter('aaabbc') 420 >>> c['b'] -= 2 # reduce the count of 'b' by two 421 >>> c.most_common() # 'b' is still in, but its count is zero 422 [('a', 3), ('c', 1), ('b', 0)] 423 424 ''' 425 # References: 426 # http://en.wikipedia.org/wiki/Multiset 427 # http://www.gnu.org/software/smalltalk/manual-base/html_node/Bag.html 428 # http://www.demo2s.com/Tutorial/Cpp/0380__set-multiset/Catalog0380__set-multiset.htm 429 # http://code.activestate.com/recipes/259174/ 430 # Knuth, TAOCP Vol. II section 4.6.3 431 432 def __init__(self, iterable=None, **kwds): 433 '''Create a new, empty Counter object. And if given, count elements 434 from an input iterable. Or, initialize the count from another mapping 435 of elements to their counts. 436 437 >>> c = Counter() # a new, empty counter 438 >>> c = Counter('gallahad') # a new counter from an iterable 439 >>> c = Counter({'a': 4, 'b': 2}) # a new counter from a mapping 440 >>> c = Counter(a=4, b=2) # a new counter from keyword args 441 442 ''' 443 super(Counter, self).__init__() 444 self.update(iterable, **kwds) 445 446 def __missing__(self, key): 447 'The count of elements not in the Counter is zero.' 448 # Needed so that self[missing_item] does not raise KeyError 449 return 0 450 451 def most_common(self, n=None): 452 '''List the n most common elements and their counts from the most 453 common to the least. If n is None, then list all element counts. 454 455 >>> Counter('abcdeabcdabcaba').most_common(3) 456 [('a', 5), ('b', 4), ('c', 3)] 457 458 ''' 459 # Emulate Bag.sortedByCount from Smalltalk 460 if n is None: 461 return sorted(self.iteritems(), key=_itemgetter(1), reverse=True) 462 return _heapq.nlargest(n, self.iteritems(), key=_itemgetter(1)) 463 464 def elements(self): 465 '''Iterator over elements repeating each as many times as its count. 466 467 >>> c = Counter('ABCABC') 468 >>> sorted(c.elements()) 469 ['A', 'A', 'B', 'B', 'C', 'C'] 470 471 # Knuth's example for prime factors of 1836: 2**2 * 3**3 * 17**1 472 >>> prime_factors = Counter({2: 2, 3: 3, 17: 1}) 473 >>> product = 1 474 >>> for factor in prime_factors.elements(): # loop over factors 475 ... product *= factor # and multiply them 476 >>> product 477 1836 478 479 Note, if an element's count has been set to zero or is a negative 480 number, elements() will ignore it. 481 482 ''' 483 # Emulate Bag.do from Smalltalk and Multiset.begin from C++. 484 return _chain.from_iterable(_starmap(_repeat, self.iteritems())) 485 486 # Override dict methods where necessary 487 488 @classmethod 489 def fromkeys(cls, iterable, v=None): 490 # There is no equivalent method for counters because setting v=1 491 # means that no element can have a count greater than one. 492 raise NotImplementedError( 493 'Counter.fromkeys() is undefined. Use Counter(iterable) instead.') 494 495 def update(self, iterable=None, **kwds): 496 '''Like dict.update() but add counts instead of replacing them. 497 498 Source can be an iterable, a dictionary, or another Counter instance. 499 500 >>> c = Counter('which') 501 >>> c.update('witch') # add elements from another iterable 502 >>> d = Counter('watch') 503 >>> c.update(d) # add elements from another counter 504 >>> c['h'] # four 'h' in which, witch, and watch 505 4 506 507 ''' 508 # The regular dict.update() operation makes no sense here because the 509 # replace behavior results in the some of original untouched counts 510 # being mixed-in with all of the other counts for a mismash that 511 # doesn't have a straight-forward interpretation in most counting 512 # contexts. Instead, we implement straight-addition. Both the inputs 513 # and outputs are allowed to contain zero and negative counts. 514 515 if iterable is not None: 516 if isinstance(iterable, Mapping): 517 if self: 518 self_get = self.get 519 for elem, count in iterable.iteritems(): 520 self[elem] = self_get(elem, 0) + count 521 else: 522 super(Counter, self).update(iterable) # fast path when counter is empty 523 else: 524 self_get = self.get 525 for elem in iterable: 526 self[elem] = self_get(elem, 0) + 1 527 if kwds: 528 self.update(kwds) 529 530 def subtract(self, iterable=None, **kwds): 531 '''Like dict.update() but subtracts counts instead of replacing them. 532 Counts can be reduced below zero. Both the inputs and outputs are 533 allowed to contain zero and negative counts. 534 535 Source can be an iterable, a dictionary, or another Counter instance. 536 537 >>> c = Counter('which') 538 >>> c.subtract('witch') # subtract elements from another iterable 539 >>> c.subtract(Counter('watch')) # subtract elements from another counter 540 >>> c['h'] # 2 in which, minus 1 in witch, minus 1 in watch 541 0 542 >>> c['w'] # 1 in which, minus 1 in witch, minus 1 in watch 543 -1 544 545 ''' 546 if iterable is not None: 547 self_get = self.get 548 if isinstance(iterable, Mapping): 549 for elem, count in iterable.items(): 550 self[elem] = self_get(elem, 0) - count 551 else: 552 for elem in iterable: 553 self[elem] = self_get(elem, 0) - 1 554 if kwds: 555 self.subtract(kwds) 556 557 def copy(self): 558 'Return a shallow copy.' 559 return self.__class__(self) 560 561 def __reduce__(self): 562 return self.__class__, (dict(self),) 563 564 def __delitem__(self, elem): 565 'Like dict.__delitem__() but does not raise KeyError for missing values.' 566 if elem in self: 567 super(Counter, self).__delitem__(elem) 568 569 def __repr__(self): 570 if not self: 571 return '%s()' % self.__class__.__name__ 572 items = ', '.join(map('%r: %r'.__mod__, self.most_common())) 573 return '%s({%s})' % (self.__class__.__name__, items) 574 575 # Multiset-style mathematical operations discussed in: 576 # Knuth TAOCP Volume II section 4.6.3 exercise 19 577 # and at http://en.wikipedia.org/wiki/Multiset 578 # 579 # Outputs guaranteed to only include positive counts. 580 # 581 # To strip negative and zero counts, add-in an empty counter: 582 # c += Counter() 583 584 def __add__(self, other): 585 '''Add counts from two counters. 586 587 >>> Counter('abbb') + Counter('bcc') 588 Counter({'b': 4, 'c': 2, 'a': 1}) 589 590 ''' 591 if not isinstance(other, Counter): 592 return NotImplemented 593 result = Counter() 594 for elem, count in self.items(): 595 newcount = count + other[elem] 596 if newcount > 0: 597 result[elem] = newcount 598 for elem, count in other.items(): 599 if elem not in self and count > 0: 600 result[elem] = count 601 return result 602 603 def __sub__(self, other): 604 ''' Subtract count, but keep only results with positive counts. 605 606 >>> Counter('abbbc') - Counter('bccd') 607 Counter({'b': 2, 'a': 1}) 608 609 ''' 610 if not isinstance(other, Counter): 611 return NotImplemented 612 result = Counter() 613 for elem, count in self.items(): 614 newcount = count - other[elem] 615 if newcount > 0: 616 result[elem] = newcount 617 for elem, count in other.items(): 618 if elem not in self and count < 0: 619 result[elem] = 0 - count 620 return result 621 622 def __or__(self, other): 623 '''Union is the maximum of value in either of the input counters. 624 625 >>> Counter('abbb') | Counter('bcc') 626 Counter({'b': 3, 'c': 2, 'a': 1}) 627 628 ''' 629 if not isinstance(other, Counter): 630 return NotImplemented 631 result = Counter() 632 for elem, count in self.items(): 633 other_count = other[elem] 634 newcount = other_count if count < other_count else count 635 if newcount > 0: 636 result[elem] = newcount 637 for elem, count in other.items(): 638 if elem not in self and count > 0: 639 result[elem] = count 640 return result 641 642 def __and__(self, other): 643 ''' Intersection is the minimum of corresponding counts. 644 645 >>> Counter('abbb') & Counter('bcc') 646 Counter({'b': 1}) 647 648 ''' 649 if not isinstance(other, Counter): 650 return NotImplemented 651 result = Counter() 652 for elem, count in self.items(): 653 other_count = other[elem] 654 newcount = count if count < other_count else other_count 655 if newcount > 0: 656 result[elem] = newcount 657 return result 658 659 660 if __name__ == '__main__': 661 # verify that instances can be pickled 662 from cPickle import loads, dumps 663 Point = namedtuple('Point', 'x, y', True) 664 p = Point(x=10, y=20) 665 assert p == loads(dumps(p)) 666 667 # test and demonstrate ability to override methods 668 class Point(namedtuple('Point', 'x y')): 669 __slots__ = () 670 @property 671 def hypot(self): 672 return (self.x ** 2 + self.y ** 2) ** 0.5 673 def __str__(self): 674 return 'Point: x=%6.3f y=%6.3f hypot=%6.3f' % (self.x, self.y, self.hypot) 675 676 for p in Point(3, 4), Point(14, 5/7.): 677 print p 678 679 class Point(namedtuple('Point', 'x y')): 680 'Point class with optimized _make() and _replace() without error-checking' 681 __slots__ = () 682 _make = classmethod(tuple.__new__) 683 def _replace(self, _map=map, **kwds): 684 return self._make(_map(kwds.get, ('x', 'y'), self)) 685 686 print Point(11, 22)._replace(x=100) 687 688 Point3D = namedtuple('Point3D', Point._fields + ('z',)) 689 print Point3D.__doc__ 690 691 import doctest 692 TestResults = namedtuple('TestResults', 'failed attempted') 693 print TestResults(*doctest.testmod()) 694