Home | History | Annotate | Download | only in lib2to3
      1 """A bottom-up tree matching algorithm implementation meant to speed
      2 up 2to3's matching process. After the tree patterns are reduced to
      3 their rarest linear path, a linear Aho-Corasick automaton is
      4 created. The linear automaton traverses the linear paths from the
      5 leaves to the root of the AST and returns a set of nodes for further
      6 matching. This reduces significantly the number of candidate nodes."""
      7 
      8 __author__ = "George Boutsioukis <gboutsioukis (at] gmail.com>"
      9 
     10 import logging
     11 import itertools
     12 from collections import defaultdict
     13 
     14 from . import pytree
     15 from .btm_utils import reduce_tree
     16 
     17 class BMNode(object):
     18     """Class for a node of the Aho-Corasick automaton used in matching"""
     19     count = itertools.count()
     20     def __init__(self):
     21         self.transition_table = {}
     22         self.fixers = []
     23         self.id = next(BMNode.count)
     24         self.content = ''
     25 
     26 class BottomMatcher(object):
     27     """The main matcher class. After instantiating the patterns should
     28     be added using the add_fixer method"""
     29 
     30     def __init__(self):
     31         self.match = set()
     32         self.root = BMNode()
     33         self.nodes = [self.root]
     34         self.fixers = []
     35         self.logger = logging.getLogger("RefactoringTool")
     36 
     37     def add_fixer(self, fixer):
     38         """Reduces a fixer's pattern tree to a linear path and adds it
     39         to the matcher(a common Aho-Corasick automaton). The fixer is
     40         appended on the matching states and called when they are
     41         reached"""
     42         self.fixers.append(fixer)
     43         tree = reduce_tree(fixer.pattern_tree)
     44         linear = tree.get_linear_subpattern()
     45         match_nodes = self.add(linear, start=self.root)
     46         for match_node in match_nodes:
     47             match_node.fixers.append(fixer)
     48 
     49     def add(self, pattern, start):
     50         "Recursively adds a linear pattern to the AC automaton"
     51         #print("adding pattern", pattern, "to", start)
     52         if not pattern:
     53             #print("empty pattern")
     54             return [start]
     55         if isinstance(pattern[0], tuple):
     56             #alternatives
     57             #print("alternatives")
     58             match_nodes = []
     59             for alternative in pattern[0]:
     60                 #add all alternatives, and add the rest of the pattern
     61                 #to each end node
     62                 end_nodes = self.add(alternative, start=start)
     63                 for end in end_nodes:
     64                     match_nodes.extend(self.add(pattern[1:], end))
     65             return match_nodes
     66         else:
     67             #single token
     68             #not last
     69             if pattern[0] not in start.transition_table:
     70                 #transition did not exist, create new
     71                 next_node = BMNode()
     72                 start.transition_table[pattern[0]] = next_node
     73             else:
     74                 #transition exists already, follow
     75                 next_node = start.transition_table[pattern[0]]
     76 
     77             if pattern[1:]:
     78                 end_nodes = self.add(pattern[1:], start=next_node)
     79             else:
     80                 end_nodes = [next_node]
     81             return end_nodes
     82 
     83     def run(self, leaves):
     84         """The main interface with the bottom matcher. The tree is
     85         traversed from the bottom using the constructed
     86         automaton. Nodes are only checked once as the tree is
     87         retraversed. When the automaton fails, we give it one more
     88         shot(in case the above tree matches as a whole with the
     89         rejected leaf), then we break for the next leaf. There is the
     90         special case of multiple arguments(see code comments) where we
     91         recheck the nodes
     92 
     93         Args:
     94            The leaves of the AST tree to be matched
     95 
     96         Returns:
     97            A dictionary of node matches with fixers as the keys
     98         """
     99         current_ac_node = self.root
    100         results = defaultdict(list)
    101         for leaf in leaves:
    102             current_ast_node = leaf
    103             while current_ast_node:
    104                 current_ast_node.was_checked = True
    105                 for child in current_ast_node.children:
    106                     # multiple statements, recheck
    107                     if isinstance(child, pytree.Leaf) and child.value == u";":
    108                         current_ast_node.was_checked = False
    109                         break
    110                 if current_ast_node.type == 1:
    111                     #name
    112                     node_token = current_ast_node.value
    113                 else:
    114                     node_token = current_ast_node.type
    115 
    116                 if node_token in current_ac_node.transition_table:
    117                     #token matches
    118                     current_ac_node = current_ac_node.transition_table[node_token]
    119                     for fixer in current_ac_node.fixers:
    120                         if not fixer in results:
    121                             results[fixer] = []
    122                         results[fixer].append(current_ast_node)
    123 
    124                 else:
    125                     #matching failed, reset automaton
    126                     current_ac_node = self.root
    127                     if (current_ast_node.parent is not None
    128                         and current_ast_node.parent.was_checked):
    129                         #the rest of the tree upwards has been checked, next leaf
    130                         break
    131 
    132                     #recheck the rejected node once from the root
    133                     if node_token in current_ac_node.transition_table:
    134                         #token matches
    135                         current_ac_node = current_ac_node.transition_table[node_token]
    136                         for fixer in current_ac_node.fixers:
    137                             if not fixer in results.keys():
    138                                 results[fixer] = []
    139                             results[fixer].append(current_ast_node)
    140 
    141                 current_ast_node = current_ast_node.parent
    142         return results
    143 
    144     def print_ac(self):
    145         "Prints a graphviz diagram of the BM automaton(for debugging)"
    146         print("digraph g{")
    147         def print_node(node):
    148             for subnode_key in node.transition_table.keys():
    149                 subnode = node.transition_table[subnode_key]
    150                 print("%d -> %d [label=%s] //%s" %
    151                       (node.id, subnode.id, type_repr(subnode_key), str(subnode.fixers)))
    152                 if subnode_key == 1:
    153                     print(subnode.content)
    154                 print_node(subnode)
    155         print_node(self.root)
    156         print("}")
    157 
    158 # taken from pytree.py for debugging; only used by print_ac
    159 _type_reprs = {}
    160 def type_repr(type_num):
    161     global _type_reprs
    162     if not _type_reprs:
    163         from .pygram import python_symbols
    164         # printing tokens is possible but not as useful
    165         # from .pgen2 import token // token.__dict__.items():
    166         for name, val in python_symbols.__dict__.items():
    167             if type(val) == int: _type_reprs[val] = name
    168     return _type_reprs.setdefault(type_num, type_num)
    169