1 # Copyright 2006 Google, Inc. All Rights Reserved. 2 # Licensed to PSF under a Contributor Agreement. 3 4 """ 5 Python parse tree definitions. 6 7 This is a very concrete parse tree; we need to keep every token and 8 even the comments and whitespace between tokens. 9 10 There's also a pattern matching implementation here. 11 """ 12 13 __author__ = "Guido van Rossum <guido (at] python.org>" 14 15 import sys 16 import warnings 17 from StringIO import StringIO 18 19 HUGE = 0x7FFFFFFF # maximum repeat count, default max 20 21 _type_reprs = {} 22 def type_repr(type_num): 23 global _type_reprs 24 if not _type_reprs: 25 from .pygram import python_symbols 26 # printing tokens is possible but not as useful 27 # from .pgen2 import token // token.__dict__.items(): 28 for name, val in python_symbols.__dict__.items(): 29 if type(val) == int: _type_reprs[val] = name 30 return _type_reprs.setdefault(type_num, type_num) 31 32 class Base(object): 33 34 """ 35 Abstract base class for Node and Leaf. 36 37 This provides some default functionality and boilerplate using the 38 template pattern. 39 40 A node may be a subnode of at most one parent. 41 """ 42 43 # Default values for instance variables 44 type = None # int: token number (< 256) or symbol number (>= 256) 45 parent = None # Parent node pointer, or None 46 children = () # Tuple of subnodes 47 was_changed = False 48 was_checked = False 49 50 def __new__(cls, *args, **kwds): 51 """Constructor that prevents Base from being instantiated.""" 52 assert cls is not Base, "Cannot instantiate Base" 53 return object.__new__(cls) 54 55 def __eq__(self, other): 56 """ 57 Compare two nodes for equality. 58 59 This calls the method _eq(). 60 """ 61 if self.__class__ is not other.__class__: 62 return NotImplemented 63 return self._eq(other) 64 65 __hash__ = None # For Py3 compatibility. 66 67 def __ne__(self, other): 68 """ 69 Compare two nodes for inequality. 70 71 This calls the method _eq(). 72 """ 73 if self.__class__ is not other.__class__: 74 return NotImplemented 75 return not self._eq(other) 76 77 def _eq(self, other): 78 """ 79 Compare two nodes for equality. 80 81 This is called by __eq__ and __ne__. It is only called if the two nodes 82 have the same type. This must be implemented by the concrete subclass. 83 Nodes should be considered equal if they have the same structure, 84 ignoring the prefix string and other context information. 85 """ 86 raise NotImplementedError 87 88 def clone(self): 89 """ 90 Return a cloned (deep) copy of self. 91 92 This must be implemented by the concrete subclass. 93 """ 94 raise NotImplementedError 95 96 def post_order(self): 97 """ 98 Return a post-order iterator for the tree. 99 100 This must be implemented by the concrete subclass. 101 """ 102 raise NotImplementedError 103 104 def pre_order(self): 105 """ 106 Return a pre-order iterator for the tree. 107 108 This must be implemented by the concrete subclass. 109 """ 110 raise NotImplementedError 111 112 def set_prefix(self, prefix): 113 """ 114 Set the prefix for the node (see Leaf class). 115 116 DEPRECATED; use the prefix property directly. 117 """ 118 warnings.warn("set_prefix() is deprecated; use the prefix property", 119 DeprecationWarning, stacklevel=2) 120 self.prefix = prefix 121 122 def get_prefix(self): 123 """ 124 Return the prefix for the node (see Leaf class). 125 126 DEPRECATED; use the prefix property directly. 127 """ 128 warnings.warn("get_prefix() is deprecated; use the prefix property", 129 DeprecationWarning, stacklevel=2) 130 return self.prefix 131 132 def replace(self, new): 133 """Replace this node with a new one in the parent.""" 134 assert self.parent is not None, str(self) 135 assert new is not None 136 if not isinstance(new, list): 137 new = [new] 138 l_children = [] 139 found = False 140 for ch in self.parent.children: 141 if ch is self: 142 assert not found, (self.parent.children, self, new) 143 if new is not None: 144 l_children.extend(new) 145 found = True 146 else: 147 l_children.append(ch) 148 assert found, (self.children, self, new) 149 self.parent.changed() 150 self.parent.children = l_children 151 for x in new: 152 x.parent = self.parent 153 self.parent = None 154 155 def get_lineno(self): 156 """Return the line number which generated the invocant node.""" 157 node = self 158 while not isinstance(node, Leaf): 159 if not node.children: 160 return 161 node = node.children[0] 162 return node.lineno 163 164 def changed(self): 165 if self.parent: 166 self.parent.changed() 167 self.was_changed = True 168 169 def remove(self): 170 """ 171 Remove the node from the tree. Returns the position of the node in its 172 parent's children before it was removed. 173 """ 174 if self.parent: 175 for i, node in enumerate(self.parent.children): 176 if node is self: 177 self.parent.changed() 178 del self.parent.children[i] 179 self.parent = None 180 return i 181 182 @property 183 def next_sibling(self): 184 """ 185 The node immediately following the invocant in their parent's children 186 list. If the invocant does not have a next sibling, it is None 187 """ 188 if self.parent is None: 189 return None 190 191 # Can't use index(); we need to test by identity 192 for i, child in enumerate(self.parent.children): 193 if child is self: 194 try: 195 return self.parent.children[i+1] 196 except IndexError: 197 return None 198 199 @property 200 def prev_sibling(self): 201 """ 202 The node immediately preceding the invocant in their parent's children 203 list. If the invocant does not have a previous sibling, it is None. 204 """ 205 if self.parent is None: 206 return None 207 208 # Can't use index(); we need to test by identity 209 for i, child in enumerate(self.parent.children): 210 if child is self: 211 if i == 0: 212 return None 213 return self.parent.children[i-1] 214 215 def leaves(self): 216 for child in self.children: 217 for x in child.leaves(): 218 yield x 219 220 def depth(self): 221 if self.parent is None: 222 return 0 223 return 1 + self.parent.depth() 224 225 def get_suffix(self): 226 """ 227 Return the string immediately following the invocant node. This is 228 effectively equivalent to node.next_sibling.prefix 229 """ 230 next_sib = self.next_sibling 231 if next_sib is None: 232 return u"" 233 return next_sib.prefix 234 235 if sys.version_info < (3, 0): 236 def __str__(self): 237 return unicode(self).encode("ascii") 238 239 class Node(Base): 240 241 """Concrete implementation for interior nodes.""" 242 243 def __init__(self,type, children, 244 context=None, 245 prefix=None, 246 fixers_applied=None): 247 """ 248 Initializer. 249 250 Takes a type constant (a symbol number >= 256), a sequence of 251 child nodes, and an optional context keyword argument. 252 253 As a side effect, the parent pointers of the children are updated. 254 """ 255 assert type >= 256, type 256 self.type = type 257 self.children = list(children) 258 for ch in self.children: 259 assert ch.parent is None, repr(ch) 260 ch.parent = self 261 if prefix is not None: 262 self.prefix = prefix 263 if fixers_applied: 264 self.fixers_applied = fixers_applied[:] 265 else: 266 self.fixers_applied = None 267 268 def __repr__(self): 269 """Return a canonical string representation.""" 270 return "%s(%s, %r)" % (self.__class__.__name__, 271 type_repr(self.type), 272 self.children) 273 274 def __unicode__(self): 275 """ 276 Return a pretty string representation. 277 278 This reproduces the input source exactly. 279 """ 280 return u"".join(map(unicode, self.children)) 281 282 if sys.version_info > (3, 0): 283 __str__ = __unicode__ 284 285 def _eq(self, other): 286 """Compare two nodes for equality.""" 287 return (self.type, self.children) == (other.type, other.children) 288 289 def clone(self): 290 """Return a cloned (deep) copy of self.""" 291 return Node(self.type, [ch.clone() for ch in self.children], 292 fixers_applied=self.fixers_applied) 293 294 def post_order(self): 295 """Return a post-order iterator for the tree.""" 296 for child in self.children: 297 for node in child.post_order(): 298 yield node 299 yield self 300 301 def pre_order(self): 302 """Return a pre-order iterator for the tree.""" 303 yield self 304 for child in self.children: 305 for node in child.pre_order(): 306 yield node 307 308 def _prefix_getter(self): 309 """ 310 The whitespace and comments preceding this node in the input. 311 """ 312 if not self.children: 313 return "" 314 return self.children[0].prefix 315 316 def _prefix_setter(self, prefix): 317 if self.children: 318 self.children[0].prefix = prefix 319 320 prefix = property(_prefix_getter, _prefix_setter) 321 322 def set_child(self, i, child): 323 """ 324 Equivalent to 'node.children[i] = child'. This method also sets the 325 child's parent attribute appropriately. 326 """ 327 child.parent = self 328 self.children[i].parent = None 329 self.children[i] = child 330 self.changed() 331 332 def insert_child(self, i, child): 333 """ 334 Equivalent to 'node.children.insert(i, child)'. This method also sets 335 the child's parent attribute appropriately. 336 """ 337 child.parent = self 338 self.children.insert(i, child) 339 self.changed() 340 341 def append_child(self, child): 342 """ 343 Equivalent to 'node.children.append(child)'. This method also sets the 344 child's parent attribute appropriately. 345 """ 346 child.parent = self 347 self.children.append(child) 348 self.changed() 349 350 351 class Leaf(Base): 352 353 """Concrete implementation for leaf nodes.""" 354 355 # Default values for instance variables 356 _prefix = "" # Whitespace and comments preceding this token in the input 357 lineno = 0 # Line where this token starts in the input 358 column = 0 # Column where this token tarts in the input 359 360 def __init__(self, type, value, 361 context=None, 362 prefix=None, 363 fixers_applied=[]): 364 """ 365 Initializer. 366 367 Takes a type constant (a token number < 256), a string value, and an 368 optional context keyword argument. 369 """ 370 assert 0 <= type < 256, type 371 if context is not None: 372 self._prefix, (self.lineno, self.column) = context 373 self.type = type 374 self.value = value 375 if prefix is not None: 376 self._prefix = prefix 377 self.fixers_applied = fixers_applied[:] 378 379 def __repr__(self): 380 """Return a canonical string representation.""" 381 return "%s(%r, %r)" % (self.__class__.__name__, 382 self.type, 383 self.value) 384 385 def __unicode__(self): 386 """ 387 Return a pretty string representation. 388 389 This reproduces the input source exactly. 390 """ 391 return self.prefix + unicode(self.value) 392 393 if sys.version_info > (3, 0): 394 __str__ = __unicode__ 395 396 def _eq(self, other): 397 """Compare two nodes for equality.""" 398 return (self.type, self.value) == (other.type, other.value) 399 400 def clone(self): 401 """Return a cloned (deep) copy of self.""" 402 return Leaf(self.type, self.value, 403 (self.prefix, (self.lineno, self.column)), 404 fixers_applied=self.fixers_applied) 405 406 def leaves(self): 407 yield self 408 409 def post_order(self): 410 """Return a post-order iterator for the tree.""" 411 yield self 412 413 def pre_order(self): 414 """Return a pre-order iterator for the tree.""" 415 yield self 416 417 def _prefix_getter(self): 418 """ 419 The whitespace and comments preceding this token in the input. 420 """ 421 return self._prefix 422 423 def _prefix_setter(self, prefix): 424 self.changed() 425 self._prefix = prefix 426 427 prefix = property(_prefix_getter, _prefix_setter) 428 429 def convert(gr, raw_node): 430 """ 431 Convert raw node information to a Node or Leaf instance. 432 433 This is passed to the parser driver which calls it whenever a reduction of a 434 grammar rule produces a new complete node, so that the tree is build 435 strictly bottom-up. 436 """ 437 type, value, context, children = raw_node 438 if children or type in gr.number2symbol: 439 # If there's exactly one child, return that child instead of 440 # creating a new node. 441 if len(children) == 1: 442 return children[0] 443 return Node(type, children, context=context) 444 else: 445 return Leaf(type, value, context=context) 446 447 448 class BasePattern(object): 449 450 """ 451 A pattern is a tree matching pattern. 452 453 It looks for a specific node type (token or symbol), and 454 optionally for a specific content. 455 456 This is an abstract base class. There are three concrete 457 subclasses: 458 459 - LeafPattern matches a single leaf node; 460 - NodePattern matches a single node (usually non-leaf); 461 - WildcardPattern matches a sequence of nodes of variable length. 462 """ 463 464 # Defaults for instance variables 465 type = None # Node type (token if < 256, symbol if >= 256) 466 content = None # Optional content matching pattern 467 name = None # Optional name used to store match in results dict 468 469 def __new__(cls, *args, **kwds): 470 """Constructor that prevents BasePattern from being instantiated.""" 471 assert cls is not BasePattern, "Cannot instantiate BasePattern" 472 return object.__new__(cls) 473 474 def __repr__(self): 475 args = [type_repr(self.type), self.content, self.name] 476 while args and args[-1] is None: 477 del args[-1] 478 return "%s(%s)" % (self.__class__.__name__, ", ".join(map(repr, args))) 479 480 def optimize(self): 481 """ 482 A subclass can define this as a hook for optimizations. 483 484 Returns either self or another node with the same effect. 485 """ 486 return self 487 488 def match(self, node, results=None): 489 """ 490 Does this pattern exactly match a node? 491 492 Returns True if it matches, False if not. 493 494 If results is not None, it must be a dict which will be 495 updated with the nodes matching named subpatterns. 496 497 Default implementation for non-wildcard patterns. 498 """ 499 if self.type is not None and node.type != self.type: 500 return False 501 if self.content is not None: 502 r = None 503 if results is not None: 504 r = {} 505 if not self._submatch(node, r): 506 return False 507 if r: 508 results.update(r) 509 if results is not None and self.name: 510 results[self.name] = node 511 return True 512 513 def match_seq(self, nodes, results=None): 514 """ 515 Does this pattern exactly match a sequence of nodes? 516 517 Default implementation for non-wildcard patterns. 518 """ 519 if len(nodes) != 1: 520 return False 521 return self.match(nodes[0], results) 522 523 def generate_matches(self, nodes): 524 """ 525 Generator yielding all matches for this pattern. 526 527 Default implementation for non-wildcard patterns. 528 """ 529 r = {} 530 if nodes and self.match(nodes[0], r): 531 yield 1, r 532 533 534 class LeafPattern(BasePattern): 535 536 def __init__(self, type=None, content=None, name=None): 537 """ 538 Initializer. Takes optional type, content, and name. 539 540 The type, if given must be a token type (< 256). If not given, 541 this matches any *leaf* node; the content may still be required. 542 543 The content, if given, must be a string. 544 545 If a name is given, the matching node is stored in the results 546 dict under that key. 547 """ 548 if type is not None: 549 assert 0 <= type < 256, type 550 if content is not None: 551 assert isinstance(content, basestring), repr(content) 552 self.type = type 553 self.content = content 554 self.name = name 555 556 def match(self, node, results=None): 557 """Override match() to insist on a leaf node.""" 558 if not isinstance(node, Leaf): 559 return False 560 return BasePattern.match(self, node, results) 561 562 def _submatch(self, node, results=None): 563 """ 564 Match the pattern's content to the node's children. 565 566 This assumes the node type matches and self.content is not None. 567 568 Returns True if it matches, False if not. 569 570 If results is not None, it must be a dict which will be 571 updated with the nodes matching named subpatterns. 572 573 When returning False, the results dict may still be updated. 574 """ 575 return self.content == node.value 576 577 578 class NodePattern(BasePattern): 579 580 wildcards = False 581 582 def __init__(self, type=None, content=None, name=None): 583 """ 584 Initializer. Takes optional type, content, and name. 585 586 The type, if given, must be a symbol type (>= 256). If the 587 type is None this matches *any* single node (leaf or not), 588 except if content is not None, in which it only matches 589 non-leaf nodes that also match the content pattern. 590 591 The content, if not None, must be a sequence of Patterns that 592 must match the node's children exactly. If the content is 593 given, the type must not be None. 594 595 If a name is given, the matching node is stored in the results 596 dict under that key. 597 """ 598 if type is not None: 599 assert type >= 256, type 600 if content is not None: 601 assert not isinstance(content, basestring), repr(content) 602 content = list(content) 603 for i, item in enumerate(content): 604 assert isinstance(item, BasePattern), (i, item) 605 if isinstance(item, WildcardPattern): 606 self.wildcards = True 607 self.type = type 608 self.content = content 609 self.name = name 610 611 def _submatch(self, node, results=None): 612 """ 613 Match the pattern's content to the node's children. 614 615 This assumes the node type matches and self.content is not None. 616 617 Returns True if it matches, False if not. 618 619 If results is not None, it must be a dict which will be 620 updated with the nodes matching named subpatterns. 621 622 When returning False, the results dict may still be updated. 623 """ 624 if self.wildcards: 625 for c, r in generate_matches(self.content, node.children): 626 if c == len(node.children): 627 if results is not None: 628 results.update(r) 629 return True 630 return False 631 if len(self.content) != len(node.children): 632 return False 633 for subpattern, child in zip(self.content, node.children): 634 if not subpattern.match(child, results): 635 return False 636 return True 637 638 639 class WildcardPattern(BasePattern): 640 641 """ 642 A wildcard pattern can match zero or more nodes. 643 644 This has all the flexibility needed to implement patterns like: 645 646 .* .+ .? .{m,n} 647 (a b c | d e | f) 648 (...)* (...)+ (...)? (...){m,n} 649 650 except it always uses non-greedy matching. 651 """ 652 653 def __init__(self, content=None, min=0, max=HUGE, name=None): 654 """ 655 Initializer. 656 657 Args: 658 content: optional sequence of subsequences of patterns; 659 if absent, matches one node; 660 if present, each subsequence is an alternative [*] 661 min: optional minimum number of times to match, default 0 662 max: optional maximum number of times to match, default HUGE 663 name: optional name assigned to this match 664 665 [*] Thus, if content is [[a, b, c], [d, e], [f, g, h]] this is 666 equivalent to (a b c | d e | f g h); if content is None, 667 this is equivalent to '.' in regular expression terms. 668 The min and max parameters work as follows: 669 min=0, max=maxint: .* 670 min=1, max=maxint: .+ 671 min=0, max=1: .? 672 min=1, max=1: . 673 If content is not None, replace the dot with the parenthesized 674 list of alternatives, e.g. (a b c | d e | f g h)* 675 """ 676 assert 0 <= min <= max <= HUGE, (min, max) 677 if content is not None: 678 content = tuple(map(tuple, content)) # Protect against alterations 679 # Check sanity of alternatives 680 assert len(content), repr(content) # Can't have zero alternatives 681 for alt in content: 682 assert len(alt), repr(alt) # Can have empty alternatives 683 self.content = content 684 self.min = min 685 self.max = max 686 self.name = name 687 688 def optimize(self): 689 """Optimize certain stacked wildcard patterns.""" 690 subpattern = None 691 if (self.content is not None and 692 len(self.content) == 1 and len(self.content[0]) == 1): 693 subpattern = self.content[0][0] 694 if self.min == 1 and self.max == 1: 695 if self.content is None: 696 return NodePattern(name=self.name) 697 if subpattern is not None and self.name == subpattern.name: 698 return subpattern.optimize() 699 if (self.min <= 1 and isinstance(subpattern, WildcardPattern) and 700 subpattern.min <= 1 and self.name == subpattern.name): 701 return WildcardPattern(subpattern.content, 702 self.min*subpattern.min, 703 self.max*subpattern.max, 704 subpattern.name) 705 return self 706 707 def match(self, node, results=None): 708 """Does this pattern exactly match a node?""" 709 return self.match_seq([node], results) 710 711 def match_seq(self, nodes, results=None): 712 """Does this pattern exactly match a sequence of nodes?""" 713 for c, r in self.generate_matches(nodes): 714 if c == len(nodes): 715 if results is not None: 716 results.update(r) 717 if self.name: 718 results[self.name] = list(nodes) 719 return True 720 return False 721 722 def generate_matches(self, nodes): 723 """ 724 Generator yielding matches for a sequence of nodes. 725 726 Args: 727 nodes: sequence of nodes 728 729 Yields: 730 (count, results) tuples where: 731 count: the match comprises nodes[:count]; 732 results: dict containing named submatches. 733 """ 734 if self.content is None: 735 # Shortcut for special case (see __init__.__doc__) 736 for count in xrange(self.min, 1 + min(len(nodes), self.max)): 737 r = {} 738 if self.name: 739 r[self.name] = nodes[:count] 740 yield count, r 741 elif self.name == "bare_name": 742 yield self._bare_name_matches(nodes) 743 else: 744 # The reason for this is that hitting the recursion limit usually 745 # results in some ugly messages about how RuntimeErrors are being 746 # ignored. We don't do this on non-CPython implementation because 747 # they don't have this problem. 748 if hasattr(sys, "getrefcount"): 749 save_stderr = sys.stderr 750 sys.stderr = StringIO() 751 try: 752 for count, r in self._recursive_matches(nodes, 0): 753 if self.name: 754 r[self.name] = nodes[:count] 755 yield count, r 756 except RuntimeError: 757 # We fall back to the iterative pattern matching scheme if the recursive 758 # scheme hits the recursion limit. 759 for count, r in self._iterative_matches(nodes): 760 if self.name: 761 r[self.name] = nodes[:count] 762 yield count, r 763 finally: 764 if hasattr(sys, "getrefcount"): 765 sys.stderr = save_stderr 766 767 def _iterative_matches(self, nodes): 768 """Helper to iteratively yield the matches.""" 769 nodelen = len(nodes) 770 if 0 >= self.min: 771 yield 0, {} 772 773 results = [] 774 # generate matches that use just one alt from self.content 775 for alt in self.content: 776 for c, r in generate_matches(alt, nodes): 777 yield c, r 778 results.append((c, r)) 779 780 # for each match, iterate down the nodes 781 while results: 782 new_results = [] 783 for c0, r0 in results: 784 # stop if the entire set of nodes has been matched 785 if c0 < nodelen and c0 <= self.max: 786 for alt in self.content: 787 for c1, r1 in generate_matches(alt, nodes[c0:]): 788 if c1 > 0: 789 r = {} 790 r.update(r0) 791 r.update(r1) 792 yield c0 + c1, r 793 new_results.append((c0 + c1, r)) 794 results = new_results 795 796 def _bare_name_matches(self, nodes): 797 """Special optimized matcher for bare_name.""" 798 count = 0 799 r = {} 800 done = False 801 max = len(nodes) 802 while not done and count < max: 803 done = True 804 for leaf in self.content: 805 if leaf[0].match(nodes[count], r): 806 count += 1 807 done = False 808 break 809 r[self.name] = nodes[:count] 810 return count, r 811 812 def _recursive_matches(self, nodes, count): 813 """Helper to recursively yield the matches.""" 814 assert self.content is not None 815 if count >= self.min: 816 yield 0, {} 817 if count < self.max: 818 for alt in self.content: 819 for c0, r0 in generate_matches(alt, nodes): 820 for c1, r1 in self._recursive_matches(nodes[c0:], count+1): 821 r = {} 822 r.update(r0) 823 r.update(r1) 824 yield c0 + c1, r 825 826 827 class NegatedPattern(BasePattern): 828 829 def __init__(self, content=None): 830 """ 831 Initializer. 832 833 The argument is either a pattern or None. If it is None, this 834 only matches an empty sequence (effectively '$' in regex 835 lingo). If it is not None, this matches whenever the argument 836 pattern doesn't have any matches. 837 """ 838 if content is not None: 839 assert isinstance(content, BasePattern), repr(content) 840 self.content = content 841 842 def match(self, node): 843 # We never match a node in its entirety 844 return False 845 846 def match_seq(self, nodes): 847 # We only match an empty sequence of nodes in its entirety 848 return len(nodes) == 0 849 850 def generate_matches(self, nodes): 851 if self.content is None: 852 # Return a match if there is an empty sequence 853 if len(nodes) == 0: 854 yield 0, {} 855 else: 856 # Return a match if the argument pattern has no matches 857 for c, r in self.content.generate_matches(nodes): 858 return 859 yield 0, {} 860 861 862 def generate_matches(patterns, nodes): 863 """ 864 Generator yielding matches for a sequence of patterns and nodes. 865 866 Args: 867 patterns: a sequence of patterns 868 nodes: a sequence of nodes 869 870 Yields: 871 (count, results) tuples where: 872 count: the entire sequence of patterns matches nodes[:count]; 873 results: dict containing named submatches. 874 """ 875 if not patterns: 876 yield 0, {} 877 else: 878 p, rest = patterns[0], patterns[1:] 879 for c0, r0 in p.generate_matches(nodes): 880 if not rest: 881 yield c0, r0 882 else: 883 for c1, r1 in generate_matches(rest, nodes[c0:]): 884 r = {} 885 r.update(r0) 886 r.update(r1) 887 yield c0 + c1, r 888