1 "Utility functions used by the btm_matcher module" 2 3 from . import pytree 4 from .pgen2 import grammar, token 5 from .pygram import pattern_symbols, python_symbols 6 7 syms = pattern_symbols 8 pysyms = python_symbols 9 tokens = grammar.opmap 10 token_labels = token 11 12 TYPE_ANY = -1 13 TYPE_ALTERNATIVES = -2 14 TYPE_GROUP = -3 15 16 class MinNode(object): 17 """This class serves as an intermediate representation of the 18 pattern tree during the conversion to sets of leaf-to-root 19 subpatterns""" 20 21 def __init__(self, type=None, name=None): 22 self.type = type 23 self.name = name 24 self.children = [] 25 self.leaf = False 26 self.parent = None 27 self.alternatives = [] 28 self.group = [] 29 30 def __repr__(self): 31 return str(self.type) + ' ' + str(self.name) 32 33 def leaf_to_root(self): 34 """Internal method. Returns a characteristic path of the 35 pattern tree. This method must be run for all leaves until the 36 linear subpatterns are merged into a single""" 37 node = self 38 subp = [] 39 while node: 40 if node.type == TYPE_ALTERNATIVES: 41 node.alternatives.append(subp) 42 if len(node.alternatives) == len(node.children): 43 #last alternative 44 subp = [tuple(node.alternatives)] 45 node.alternatives = [] 46 node = node.parent 47 continue 48 else: 49 node = node.parent 50 subp = None 51 break 52 53 if node.type == TYPE_GROUP: 54 node.group.append(subp) 55 #probably should check the number of leaves 56 if len(node.group) == len(node.children): 57 subp = get_characteristic_subpattern(node.group) 58 node.group = [] 59 node = node.parent 60 continue 61 else: 62 node = node.parent 63 subp = None 64 break 65 66 if node.type == token_labels.NAME and node.name: 67 #in case of type=name, use the name instead 68 subp.append(node.name) 69 else: 70 subp.append(node.type) 71 72 node = node.parent 73 return subp 74 75 def get_linear_subpattern(self): 76 """Drives the leaf_to_root method. The reason that 77 leaf_to_root must be run multiple times is because we need to 78 reject 'group' matches; for example the alternative form 79 (a | b c) creates a group [b c] that needs to be matched. Since 80 matching multiple linear patterns overcomes the automaton's 81 capabilities, leaf_to_root merges each group into a single 82 choice based on 'characteristic'ity, 83 84 i.e. (a|b c) -> (a|b) if b more characteristic than c 85 86 Returns: The most 'characteristic'(as defined by 87 get_characteristic_subpattern) path for the compiled pattern 88 tree. 89 """ 90 91 for l in self.leaves(): 92 subp = l.leaf_to_root() 93 if subp: 94 return subp 95 96 def leaves(self): 97 "Generator that returns the leaves of the tree" 98 for child in self.children: 99 yield from child.leaves() 100 if not self.children: 101 yield self 102 103 def reduce_tree(node, parent=None): 104 """ 105 Internal function. Reduces a compiled pattern tree to an 106 intermediate representation suitable for feeding the 107 automaton. This also trims off any optional pattern elements(like 108 [a], a*). 109 """ 110 111 new_node = None 112 #switch on the node type 113 if node.type == syms.Matcher: 114 #skip 115 node = node.children[0] 116 117 if node.type == syms.Alternatives : 118 #2 cases 119 if len(node.children) <= 2: 120 #just a single 'Alternative', skip this node 121 new_node = reduce_tree(node.children[0], parent) 122 else: 123 #real alternatives 124 new_node = MinNode(type=TYPE_ALTERNATIVES) 125 #skip odd children('|' tokens) 126 for child in node.children: 127 if node.children.index(child)%2: 128 continue 129 reduced = reduce_tree(child, new_node) 130 if reduced is not None: 131 new_node.children.append(reduced) 132 elif node.type == syms.Alternative: 133 if len(node.children) > 1: 134 135 new_node = MinNode(type=TYPE_GROUP) 136 for child in node.children: 137 reduced = reduce_tree(child, new_node) 138 if reduced: 139 new_node.children.append(reduced) 140 if not new_node.children: 141 # delete the group if all of the children were reduced to None 142 new_node = None 143 144 else: 145 new_node = reduce_tree(node.children[0], parent) 146 147 elif node.type == syms.Unit: 148 if (isinstance(node.children[0], pytree.Leaf) and 149 node.children[0].value == '('): 150 #skip parentheses 151 return reduce_tree(node.children[1], parent) 152 if ((isinstance(node.children[0], pytree.Leaf) and 153 node.children[0].value == '[') 154 or 155 (len(node.children)>1 and 156 hasattr(node.children[1], "value") and 157 node.children[1].value == '[')): 158 #skip whole unit if its optional 159 return None 160 161 leaf = True 162 details_node = None 163 alternatives_node = None 164 has_repeater = False 165 repeater_node = None 166 has_variable_name = False 167 168 for child in node.children: 169 if child.type == syms.Details: 170 leaf = False 171 details_node = child 172 elif child.type == syms.Repeater: 173 has_repeater = True 174 repeater_node = child 175 elif child.type == syms.Alternatives: 176 alternatives_node = child 177 if hasattr(child, 'value') and child.value == '=': # variable name 178 has_variable_name = True 179 180 #skip variable name 181 if has_variable_name: 182 #skip variable name, '=' 183 name_leaf = node.children[2] 184 if hasattr(name_leaf, 'value') and name_leaf.value == '(': 185 # skip parenthesis 186 name_leaf = node.children[3] 187 else: 188 name_leaf = node.children[0] 189 190 #set node type 191 if name_leaf.type == token_labels.NAME: 192 #(python) non-name or wildcard 193 if name_leaf.value == 'any': 194 new_node = MinNode(type=TYPE_ANY) 195 else: 196 if hasattr(token_labels, name_leaf.value): 197 new_node = MinNode(type=getattr(token_labels, name_leaf.value)) 198 else: 199 new_node = MinNode(type=getattr(pysyms, name_leaf.value)) 200 201 elif name_leaf.type == token_labels.STRING: 202 #(python) name or character; remove the apostrophes from 203 #the string value 204 name = name_leaf.value.strip("'") 205 if name in tokens: 206 new_node = MinNode(type=tokens[name]) 207 else: 208 new_node = MinNode(type=token_labels.NAME, name=name) 209 elif name_leaf.type == syms.Alternatives: 210 new_node = reduce_tree(alternatives_node, parent) 211 212 #handle repeaters 213 if has_repeater: 214 if repeater_node.children[0].value == '*': 215 #reduce to None 216 new_node = None 217 elif repeater_node.children[0].value == '+': 218 #reduce to a single occurrence i.e. do nothing 219 pass 220 else: 221 #TODO: handle {min, max} repeaters 222 raise NotImplementedError 223 pass 224 225 #add children 226 if details_node and new_node is not None: 227 for child in details_node.children[1:-1]: 228 #skip '<', '>' markers 229 reduced = reduce_tree(child, new_node) 230 if reduced is not None: 231 new_node.children.append(reduced) 232 if new_node: 233 new_node.parent = parent 234 return new_node 235 236 237 def get_characteristic_subpattern(subpatterns): 238 """Picks the most characteristic from a list of linear patterns 239 Current order used is: 240 names > common_names > common_chars 241 """ 242 if not isinstance(subpatterns, list): 243 return subpatterns 244 if len(subpatterns)==1: 245 return subpatterns[0] 246 247 # first pick out the ones containing variable names 248 subpatterns_with_names = [] 249 subpatterns_with_common_names = [] 250 common_names = ['in', 'for', 'if' , 'not', 'None'] 251 subpatterns_with_common_chars = [] 252 common_chars = "[]().,:" 253 for subpattern in subpatterns: 254 if any(rec_test(subpattern, lambda x: type(x) is str)): 255 if any(rec_test(subpattern, 256 lambda x: isinstance(x, str) and x in common_chars)): 257 subpatterns_with_common_chars.append(subpattern) 258 elif any(rec_test(subpattern, 259 lambda x: isinstance(x, str) and x in common_names)): 260 subpatterns_with_common_names.append(subpattern) 261 262 else: 263 subpatterns_with_names.append(subpattern) 264 265 if subpatterns_with_names: 266 subpatterns = subpatterns_with_names 267 elif subpatterns_with_common_names: 268 subpatterns = subpatterns_with_common_names 269 elif subpatterns_with_common_chars: 270 subpatterns = subpatterns_with_common_chars 271 # of the remaining subpatterns pick out the longest one 272 return max(subpatterns, key=len) 273 274 def rec_test(sequence, test_func): 275 """Tests test_func on all items of sequence and items of included 276 sub-iterables""" 277 for x in sequence: 278 if isinstance(x, (list, tuple)): 279 yield from rec_test(x, test_func) 280 else: 281 yield test_func(x) 282