Home | History | Annotate | Download | only in lib2to3
      1 "Utility functions used by the btm_matcher module"
      2 
      3 from . import pytree
      4 from .pgen2 import grammar, token
      5 from .pygram import pattern_symbols, python_symbols
      6 
      7 syms = pattern_symbols
      8 pysyms = python_symbols
      9 tokens = grammar.opmap
     10 token_labels = token
     11 
     12 TYPE_ANY = -1
     13 TYPE_ALTERNATIVES = -2
     14 TYPE_GROUP = -3
     15 
     16 class MinNode(object):
     17     """This class serves as an intermediate representation of the
     18     pattern tree during the conversion to sets of leaf-to-root
     19     subpatterns"""
     20 
     21     def __init__(self, type=None, name=None):
     22         self.type = type
     23         self.name = name
     24         self.children = []
     25         self.leaf = False
     26         self.parent = None
     27         self.alternatives = []
     28         self.group = []
     29 
     30     def __repr__(self):
     31         return str(self.type) + ' ' + str(self.name)
     32 
     33     def leaf_to_root(self):
     34         """Internal method. Returns a characteristic path of the
     35         pattern tree. This method must be run for all leaves until the
     36         linear subpatterns are merged into a single"""
     37         node = self
     38         subp = []
     39         while node:
     40             if node.type == TYPE_ALTERNATIVES:
     41                 node.alternatives.append(subp)
     42                 if len(node.alternatives) == len(node.children):
     43                     #last alternative
     44                     subp = [tuple(node.alternatives)]
     45                     node.alternatives = []
     46                     node = node.parent
     47                     continue
     48                 else:
     49                     node = node.parent
     50                     subp = None
     51                     break
     52 
     53             if node.type == TYPE_GROUP:
     54                 node.group.append(subp)
     55                 #probably should check the number of leaves
     56                 if len(node.group) == len(node.children):
     57                     subp = get_characteristic_subpattern(node.group)
     58                     node.group = []
     59                     node = node.parent
     60                     continue
     61                 else:
     62                     node = node.parent
     63                     subp = None
     64                     break
     65 
     66             if node.type == token_labels.NAME and node.name:
     67                 #in case of type=name, use the name instead
     68                 subp.append(node.name)
     69             else:
     70                 subp.append(node.type)
     71 
     72             node = node.parent
     73         return subp
     74 
     75     def get_linear_subpattern(self):
     76         """Drives the leaf_to_root method. The reason that
     77         leaf_to_root must be run multiple times is because we need to
     78         reject 'group' matches; for example the alternative form
     79         (a | b c) creates a group [b c] that needs to be matched. Since
     80         matching multiple linear patterns overcomes the automaton's
     81         capabilities, leaf_to_root merges each group into a single
     82         choice based on 'characteristic'ity,
     83 
     84         i.e. (a|b c) -> (a|b) if b more characteristic than c
     85 
     86         Returns: The most 'characteristic'(as defined by
     87           get_characteristic_subpattern) path for the compiled pattern
     88           tree.
     89         """
     90 
     91         for l in self.leaves():
     92             subp = l.leaf_to_root()
     93             if subp:
     94                 return subp
     95 
     96     def leaves(self):
     97         "Generator that returns the leaves of the tree"
     98         for child in self.children:
     99             for x in child.leaves():
    100                 yield x
    101         if not self.children:
    102             yield self
    103 
    104 def reduce_tree(node, parent=None):
    105     """
    106     Internal function. Reduces a compiled pattern tree to an
    107     intermediate representation suitable for feeding the
    108     automaton. This also trims off any optional pattern elements(like
    109     [a], a*).
    110     """
    111 
    112     new_node = None
    113     #switch on the node type
    114     if node.type == syms.Matcher:
    115         #skip
    116         node = node.children[0]
    117 
    118     if node.type == syms.Alternatives  :
    119         #2 cases
    120         if len(node.children) <= 2:
    121             #just a single 'Alternative', skip this node
    122             new_node = reduce_tree(node.children[0], parent)
    123         else:
    124             #real alternatives
    125             new_node = MinNode(type=TYPE_ALTERNATIVES)
    126             #skip odd children('|' tokens)
    127             for child in node.children:
    128                 if node.children.index(child)%2:
    129                     continue
    130                 reduced = reduce_tree(child, new_node)
    131                 if reduced is not None:
    132                     new_node.children.append(reduced)
    133     elif node.type == syms.Alternative:
    134         if len(node.children) > 1:
    135 
    136             new_node = MinNode(type=TYPE_GROUP)
    137             for child in node.children:
    138                 reduced = reduce_tree(child, new_node)
    139                 if reduced:
    140                     new_node.children.append(reduced)
    141             if not new_node.children:
    142                 # delete the group if all of the children were reduced to None
    143                 new_node = None
    144 
    145         else:
    146             new_node = reduce_tree(node.children[0], parent)
    147 
    148     elif node.type == syms.Unit:
    149         if (isinstance(node.children[0], pytree.Leaf) and
    150             node.children[0].value == '('):
    151             #skip parentheses
    152             return reduce_tree(node.children[1], parent)
    153         if ((isinstance(node.children[0], pytree.Leaf) and
    154                node.children[0].value == '[')
    155                or
    156                (len(node.children)>1 and
    157                hasattr(node.children[1], "value") and
    158                node.children[1].value == '[')):
    159             #skip whole unit if its optional
    160             return None
    161 
    162         leaf = True
    163         details_node = None
    164         alternatives_node = None
    165         has_repeater = False
    166         repeater_node = None
    167         has_variable_name = False
    168 
    169         for child in node.children:
    170             if child.type == syms.Details:
    171                 leaf = False
    172                 details_node = child
    173             elif child.type == syms.Repeater:
    174                 has_repeater = True
    175                 repeater_node = child
    176             elif child.type == syms.Alternatives:
    177                 alternatives_node = child
    178             if hasattr(child, 'value') and child.value == '=': # variable name
    179                 has_variable_name = True
    180 
    181         #skip variable name
    182         if has_variable_name:
    183             #skip variable name, '='
    184             name_leaf = node.children[2]
    185             if hasattr(name_leaf, 'value') and name_leaf.value == '(':
    186                 # skip parenthesis
    187                 name_leaf = node.children[3]
    188         else:
    189             name_leaf = node.children[0]
    190 
    191         #set node type
    192         if name_leaf.type == token_labels.NAME:
    193             #(python) non-name or wildcard
    194             if name_leaf.value == 'any':
    195                 new_node = MinNode(type=TYPE_ANY)
    196             else:
    197                 if hasattr(token_labels, name_leaf.value):
    198                     new_node = MinNode(type=getattr(token_labels, name_leaf.value))
    199                 else:
    200                     new_node = MinNode(type=getattr(pysyms, name_leaf.value))
    201 
    202         elif name_leaf.type == token_labels.STRING:
    203             #(python) name or character; remove the apostrophes from
    204             #the string value
    205             name = name_leaf.value.strip("'")
    206             if name in tokens:
    207                 new_node = MinNode(type=tokens[name])
    208             else:
    209                 new_node = MinNode(type=token_labels.NAME, name=name)
    210         elif name_leaf.type == syms.Alternatives:
    211             new_node = reduce_tree(alternatives_node, parent)
    212 
    213         #handle repeaters
    214         if has_repeater:
    215             if repeater_node.children[0].value == '*':
    216                 #reduce to None
    217                 new_node = None
    218             elif repeater_node.children[0].value == '+':
    219                 #reduce to a single occurence i.e. do nothing
    220                 pass
    221             else:
    222                 #TODO: handle {min, max} repeaters
    223                 raise NotImplementedError
    224                 pass
    225 
    226         #add children
    227         if details_node and new_node is not None:
    228             for child in details_node.children[1:-1]:
    229                 #skip '<', '>' markers
    230                 reduced = reduce_tree(child, new_node)
    231                 if reduced is not None:
    232                     new_node.children.append(reduced)
    233     if new_node:
    234         new_node.parent = parent
    235     return new_node
    236 
    237 
    238 def get_characteristic_subpattern(subpatterns):
    239     """Picks the most characteristic from a list of linear patterns
    240     Current order used is:
    241     names > common_names > common_chars
    242     """
    243     if not isinstance(subpatterns, list):
    244         return subpatterns
    245     if len(subpatterns)==1:
    246         return subpatterns[0]
    247 
    248     # first pick out the ones containing variable names
    249     subpatterns_with_names = []
    250     subpatterns_with_common_names = []
    251     common_names = ['in', 'for', 'if' , 'not', 'None']
    252     subpatterns_with_common_chars = []
    253     common_chars = "[]().,:"
    254     for subpattern in subpatterns:
    255         if any(rec_test(subpattern, lambda x: type(x) is str)):
    256             if any(rec_test(subpattern,
    257                             lambda x: isinstance(x, str) and x in common_chars)):
    258                 subpatterns_with_common_chars.append(subpattern)
    259             elif any(rec_test(subpattern,
    260                               lambda x: isinstance(x, str) and x in common_names)):
    261                 subpatterns_with_common_names.append(subpattern)
    262 
    263             else:
    264                 subpatterns_with_names.append(subpattern)
    265 
    266     if subpatterns_with_names:
    267         subpatterns = subpatterns_with_names
    268     elif subpatterns_with_common_names:
    269         subpatterns = subpatterns_with_common_names
    270     elif subpatterns_with_common_chars:
    271         subpatterns = subpatterns_with_common_chars
    272     # of the remaining subpatterns pick out the longest one
    273     return max(subpatterns, key=len)
    274 
    275 def rec_test(sequence, test_func):
    276     """Tests test_func on all items of sequence and items of included
    277     sub-iterables"""
    278     for x in sequence:
    279         if isinstance(x, (list, tuple)):
    280             for y in rec_test(x, test_func):
    281                 yield y
    282         else:
    283             yield test_func(x)
    284