Home | History | Annotate | Download | only in lib2to3
      1 "Utility functions used by the btm_matcher module"
      2 
      3 from . import pytree
      4 from .pgen2 import grammar, token
      5 from .pygram import pattern_symbols, python_symbols
      6 
      7 syms = pattern_symbols
      8 pysyms = python_symbols
      9 tokens = grammar.opmap
     10 token_labels = token
     11 
     12 TYPE_ANY = -1
     13 TYPE_ALTERNATIVES = -2
     14 TYPE_GROUP = -3
     15 
     16 class MinNode(object):
     17     """This class serves as an intermediate representation of the
     18     pattern tree during the conversion to sets of leaf-to-root
     19     subpatterns"""
     20 
     21     def __init__(self, type=None, name=None):
     22         self.type = type
     23         self.name = name
     24         self.children = []
     25         self.leaf = False
     26         self.parent = None
     27         self.alternatives = []
     28         self.group = []
     29 
     30     def __repr__(self):
     31         return str(self.type) + ' ' + str(self.name)
     32 
     33     def leaf_to_root(self):
     34         """Internal method. Returns a characteristic path of the
     35         pattern tree. This method must be run for all leaves until the
     36         linear subpatterns are merged into a single"""
     37         node = self
     38         subp = []
     39         while node:
     40             if node.type == TYPE_ALTERNATIVES:
     41                 node.alternatives.append(subp)
     42                 if len(node.alternatives) == len(node.children):
     43                     #last alternative
     44                     subp = [tuple(node.alternatives)]
     45                     node.alternatives = []
     46                     node = node.parent
     47                     continue
     48                 else:
     49                     node = node.parent
     50                     subp = None
     51                     break
     52 
     53             if node.type == TYPE_GROUP:
     54                 node.group.append(subp)
     55                 #probably should check the number of leaves
     56                 if len(node.group) == len(node.children):
     57                     subp = get_characteristic_subpattern(node.group)
     58                     node.group = []
     59                     node = node.parent
     60                     continue
     61                 else:
     62                     node = node.parent
     63                     subp = None
     64                     break
     65 
     66             if node.type == token_labels.NAME and node.name:
     67                 #in case of type=name, use the name instead
     68                 subp.append(node.name)
     69             else:
     70                 subp.append(node.type)
     71 
     72             node = node.parent
     73         return subp
     74 
     75     def get_linear_subpattern(self):
     76         """Drives the leaf_to_root method. The reason that
     77         leaf_to_root must be run multiple times is because we need to
     78         reject 'group' matches; for example the alternative form
     79         (a | b c) creates a group [b c] that needs to be matched. Since
     80         matching multiple linear patterns overcomes the automaton's
     81         capabilities, leaf_to_root merges each group into a single
     82         choice based on 'characteristic'ity,
     83 
     84         i.e. (a|b c) -> (a|b) if b more characteristic than c
     85 
     86         Returns: The most 'characteristic'(as defined by
     87           get_characteristic_subpattern) path for the compiled pattern
     88           tree.
     89         """
     90 
     91         for l in self.leaves():
     92             subp = l.leaf_to_root()
     93             if subp:
     94                 return subp
     95 
     96     def leaves(self):
     97         "Generator that returns the leaves of the tree"
     98         for child in self.children:
     99             yield from child.leaves()
    100         if not self.children:
    101             yield self
    102 
    103 def reduce_tree(node, parent=None):
    104     """
    105     Internal function. Reduces a compiled pattern tree to an
    106     intermediate representation suitable for feeding the
    107     automaton. This also trims off any optional pattern elements(like
    108     [a], a*).
    109     """
    110 
    111     new_node = None
    112     #switch on the node type
    113     if node.type == syms.Matcher:
    114         #skip
    115         node = node.children[0]
    116 
    117     if node.type == syms.Alternatives  :
    118         #2 cases
    119         if len(node.children) <= 2:
    120             #just a single 'Alternative', skip this node
    121             new_node = reduce_tree(node.children[0], parent)
    122         else:
    123             #real alternatives
    124             new_node = MinNode(type=TYPE_ALTERNATIVES)
    125             #skip odd children('|' tokens)
    126             for child in node.children:
    127                 if node.children.index(child)%2:
    128                     continue
    129                 reduced = reduce_tree(child, new_node)
    130                 if reduced is not None:
    131                     new_node.children.append(reduced)
    132     elif node.type == syms.Alternative:
    133         if len(node.children) > 1:
    134 
    135             new_node = MinNode(type=TYPE_GROUP)
    136             for child in node.children:
    137                 reduced = reduce_tree(child, new_node)
    138                 if reduced:
    139                     new_node.children.append(reduced)
    140             if not new_node.children:
    141                 # delete the group if all of the children were reduced to None
    142                 new_node = None
    143 
    144         else:
    145             new_node = reduce_tree(node.children[0], parent)
    146 
    147     elif node.type == syms.Unit:
    148         if (isinstance(node.children[0], pytree.Leaf) and
    149             node.children[0].value == '('):
    150             #skip parentheses
    151             return reduce_tree(node.children[1], parent)
    152         if ((isinstance(node.children[0], pytree.Leaf) and
    153                node.children[0].value == '[')
    154                or
    155                (len(node.children)>1 and
    156                hasattr(node.children[1], "value") and
    157                node.children[1].value == '[')):
    158             #skip whole unit if its optional
    159             return None
    160 
    161         leaf = True
    162         details_node = None
    163         alternatives_node = None
    164         has_repeater = False
    165         repeater_node = None
    166         has_variable_name = False
    167 
    168         for child in node.children:
    169             if child.type == syms.Details:
    170                 leaf = False
    171                 details_node = child
    172             elif child.type == syms.Repeater:
    173                 has_repeater = True
    174                 repeater_node = child
    175             elif child.type == syms.Alternatives:
    176                 alternatives_node = child
    177             if hasattr(child, 'value') and child.value == '=': # variable name
    178                 has_variable_name = True
    179 
    180         #skip variable name
    181         if has_variable_name:
    182             #skip variable name, '='
    183             name_leaf = node.children[2]
    184             if hasattr(name_leaf, 'value') and name_leaf.value == '(':
    185                 # skip parenthesis
    186                 name_leaf = node.children[3]
    187         else:
    188             name_leaf = node.children[0]
    189 
    190         #set node type
    191         if name_leaf.type == token_labels.NAME:
    192             #(python) non-name or wildcard
    193             if name_leaf.value == 'any':
    194                 new_node = MinNode(type=TYPE_ANY)
    195             else:
    196                 if hasattr(token_labels, name_leaf.value):
    197                     new_node = MinNode(type=getattr(token_labels, name_leaf.value))
    198                 else:
    199                     new_node = MinNode(type=getattr(pysyms, name_leaf.value))
    200 
    201         elif name_leaf.type == token_labels.STRING:
    202             #(python) name or character; remove the apostrophes from
    203             #the string value
    204             name = name_leaf.value.strip("'")
    205             if name in tokens:
    206                 new_node = MinNode(type=tokens[name])
    207             else:
    208                 new_node = MinNode(type=token_labels.NAME, name=name)
    209         elif name_leaf.type == syms.Alternatives:
    210             new_node = reduce_tree(alternatives_node, parent)
    211 
    212         #handle repeaters
    213         if has_repeater:
    214             if repeater_node.children[0].value == '*':
    215                 #reduce to None
    216                 new_node = None
    217             elif repeater_node.children[0].value == '+':
    218                 #reduce to a single occurrence i.e. do nothing
    219                 pass
    220             else:
    221                 #TODO: handle {min, max} repeaters
    222                 raise NotImplementedError
    223                 pass
    224 
    225         #add children
    226         if details_node and new_node is not None:
    227             for child in details_node.children[1:-1]:
    228                 #skip '<', '>' markers
    229                 reduced = reduce_tree(child, new_node)
    230                 if reduced is not None:
    231                     new_node.children.append(reduced)
    232     if new_node:
    233         new_node.parent = parent
    234     return new_node
    235 
    236 
    237 def get_characteristic_subpattern(subpatterns):
    238     """Picks the most characteristic from a list of linear patterns
    239     Current order used is:
    240     names > common_names > common_chars
    241     """
    242     if not isinstance(subpatterns, list):
    243         return subpatterns
    244     if len(subpatterns)==1:
    245         return subpatterns[0]
    246 
    247     # first pick out the ones containing variable names
    248     subpatterns_with_names = []
    249     subpatterns_with_common_names = []
    250     common_names = ['in', 'for', 'if' , 'not', 'None']
    251     subpatterns_with_common_chars = []
    252     common_chars = "[]().,:"
    253     for subpattern in subpatterns:
    254         if any(rec_test(subpattern, lambda x: type(x) is str)):
    255             if any(rec_test(subpattern,
    256                             lambda x: isinstance(x, str) and x in common_chars)):
    257                 subpatterns_with_common_chars.append(subpattern)
    258             elif any(rec_test(subpattern,
    259                               lambda x: isinstance(x, str) and x in common_names)):
    260                 subpatterns_with_common_names.append(subpattern)
    261 
    262             else:
    263                 subpatterns_with_names.append(subpattern)
    264 
    265     if subpatterns_with_names:
    266         subpatterns = subpatterns_with_names
    267     elif subpatterns_with_common_names:
    268         subpatterns = subpatterns_with_common_names
    269     elif subpatterns_with_common_chars:
    270         subpatterns = subpatterns_with_common_chars
    271     # of the remaining subpatterns pick out the longest one
    272     return max(subpatterns, key=len)
    273 
    274 def rec_test(sequence, test_func):
    275     """Tests test_func on all items of sequence and items of included
    276     sub-iterables"""
    277     for x in sequence:
    278         if isinstance(x, (list, tuple)):
    279             yield from rec_test(x, test_func)
    280         else:
    281             yield test_func(x)
    282