1 "Utility functions used by the btm_matcher module" 2 3 from . import pytree 4 from .pgen2 import grammar, token 5 from .pygram import pattern_symbols, python_symbols 6 7 syms = pattern_symbols 8 pysyms = python_symbols 9 tokens = grammar.opmap 10 token_labels = token 11 12 TYPE_ANY = -1 13 TYPE_ALTERNATIVES = -2 14 TYPE_GROUP = -3 15 16 class MinNode(object): 17 """This class serves as an intermediate representation of the 18 pattern tree during the conversion to sets of leaf-to-root 19 subpatterns""" 20 21 def __init__(self, type=None, name=None): 22 self.type = type 23 self.name = name 24 self.children = [] 25 self.leaf = False 26 self.parent = None 27 self.alternatives = [] 28 self.group = [] 29 30 def __repr__(self): 31 return str(self.type) + ' ' + str(self.name) 32 33 def leaf_to_root(self): 34 """Internal method. Returns a characteristic path of the 35 pattern tree. This method must be run for all leaves until the 36 linear subpatterns are merged into a single""" 37 node = self 38 subp = [] 39 while node: 40 if node.type == TYPE_ALTERNATIVES: 41 node.alternatives.append(subp) 42 if len(node.alternatives) == len(node.children): 43 #last alternative 44 subp = [tuple(node.alternatives)] 45 node.alternatives = [] 46 node = node.parent 47 continue 48 else: 49 node = node.parent 50 subp = None 51 break 52 53 if node.type == TYPE_GROUP: 54 node.group.append(subp) 55 #probably should check the number of leaves 56 if len(node.group) == len(node.children): 57 subp = get_characteristic_subpattern(node.group) 58 node.group = [] 59 node = node.parent 60 continue 61 else: 62 node = node.parent 63 subp = None 64 break 65 66 if node.type == token_labels.NAME and node.name: 67 #in case of type=name, use the name instead 68 subp.append(node.name) 69 else: 70 subp.append(node.type) 71 72 node = node.parent 73 return subp 74 75 def get_linear_subpattern(self): 76 """Drives the leaf_to_root method. The reason that 77 leaf_to_root must be run multiple times is because we need to 78 reject 'group' matches; for example the alternative form 79 (a | b c) creates a group [b c] that needs to be matched. Since 80 matching multiple linear patterns overcomes the automaton's 81 capabilities, leaf_to_root merges each group into a single 82 choice based on 'characteristic'ity, 83 84 i.e. (a|b c) -> (a|b) if b more characteristic than c 85 86 Returns: The most 'characteristic'(as defined by 87 get_characteristic_subpattern) path for the compiled pattern 88 tree. 89 """ 90 91 for l in self.leaves(): 92 subp = l.leaf_to_root() 93 if subp: 94 return subp 95 96 def leaves(self): 97 "Generator that returns the leaves of the tree" 98 for child in self.children: 99 for x in child.leaves(): 100 yield x 101 if not self.children: 102 yield self 103 104 def reduce_tree(node, parent=None): 105 """ 106 Internal function. Reduces a compiled pattern tree to an 107 intermediate representation suitable for feeding the 108 automaton. This also trims off any optional pattern elements(like 109 [a], a*). 110 """ 111 112 new_node = None 113 #switch on the node type 114 if node.type == syms.Matcher: 115 #skip 116 node = node.children[0] 117 118 if node.type == syms.Alternatives : 119 #2 cases 120 if len(node.children) <= 2: 121 #just a single 'Alternative', skip this node 122 new_node = reduce_tree(node.children[0], parent) 123 else: 124 #real alternatives 125 new_node = MinNode(type=TYPE_ALTERNATIVES) 126 #skip odd children('|' tokens) 127 for child in node.children: 128 if node.children.index(child)%2: 129 continue 130 reduced = reduce_tree(child, new_node) 131 if reduced is not None: 132 new_node.children.append(reduced) 133 elif node.type == syms.Alternative: 134 if len(node.children) > 1: 135 136 new_node = MinNode(type=TYPE_GROUP) 137 for child in node.children: 138 reduced = reduce_tree(child, new_node) 139 if reduced: 140 new_node.children.append(reduced) 141 if not new_node.children: 142 # delete the group if all of the children were reduced to None 143 new_node = None 144 145 else: 146 new_node = reduce_tree(node.children[0], parent) 147 148 elif node.type == syms.Unit: 149 if (isinstance(node.children[0], pytree.Leaf) and 150 node.children[0].value == '('): 151 #skip parentheses 152 return reduce_tree(node.children[1], parent) 153 if ((isinstance(node.children[0], pytree.Leaf) and 154 node.children[0].value == '[') 155 or 156 (len(node.children)>1 and 157 hasattr(node.children[1], "value") and 158 node.children[1].value == '[')): 159 #skip whole unit if its optional 160 return None 161 162 leaf = True 163 details_node = None 164 alternatives_node = None 165 has_repeater = False 166 repeater_node = None 167 has_variable_name = False 168 169 for child in node.children: 170 if child.type == syms.Details: 171 leaf = False 172 details_node = child 173 elif child.type == syms.Repeater: 174 has_repeater = True 175 repeater_node = child 176 elif child.type == syms.Alternatives: 177 alternatives_node = child 178 if hasattr(child, 'value') and child.value == '=': # variable name 179 has_variable_name = True 180 181 #skip variable name 182 if has_variable_name: 183 #skip variable name, '=' 184 name_leaf = node.children[2] 185 if hasattr(name_leaf, 'value') and name_leaf.value == '(': 186 # skip parenthesis 187 name_leaf = node.children[3] 188 else: 189 name_leaf = node.children[0] 190 191 #set node type 192 if name_leaf.type == token_labels.NAME: 193 #(python) non-name or wildcard 194 if name_leaf.value == 'any': 195 new_node = MinNode(type=TYPE_ANY) 196 else: 197 if hasattr(token_labels, name_leaf.value): 198 new_node = MinNode(type=getattr(token_labels, name_leaf.value)) 199 else: 200 new_node = MinNode(type=getattr(pysyms, name_leaf.value)) 201 202 elif name_leaf.type == token_labels.STRING: 203 #(python) name or character; remove the apostrophes from 204 #the string value 205 name = name_leaf.value.strip("'") 206 if name in tokens: 207 new_node = MinNode(type=tokens[name]) 208 else: 209 new_node = MinNode(type=token_labels.NAME, name=name) 210 elif name_leaf.type == syms.Alternatives: 211 new_node = reduce_tree(alternatives_node, parent) 212 213 #handle repeaters 214 if has_repeater: 215 if repeater_node.children[0].value == '*': 216 #reduce to None 217 new_node = None 218 elif repeater_node.children[0].value == '+': 219 #reduce to a single occurence i.e. do nothing 220 pass 221 else: 222 #TODO: handle {min, max} repeaters 223 raise NotImplementedError 224 pass 225 226 #add children 227 if details_node and new_node is not None: 228 for child in details_node.children[1:-1]: 229 #skip '<', '>' markers 230 reduced = reduce_tree(child, new_node) 231 if reduced is not None: 232 new_node.children.append(reduced) 233 if new_node: 234 new_node.parent = parent 235 return new_node 236 237 238 def get_characteristic_subpattern(subpatterns): 239 """Picks the most characteristic from a list of linear patterns 240 Current order used is: 241 names > common_names > common_chars 242 """ 243 if not isinstance(subpatterns, list): 244 return subpatterns 245 if len(subpatterns)==1: 246 return subpatterns[0] 247 248 # first pick out the ones containing variable names 249 subpatterns_with_names = [] 250 subpatterns_with_common_names = [] 251 common_names = ['in', 'for', 'if' , 'not', 'None'] 252 subpatterns_with_common_chars = [] 253 common_chars = "[]().,:" 254 for subpattern in subpatterns: 255 if any(rec_test(subpattern, lambda x: type(x) is str)): 256 if any(rec_test(subpattern, 257 lambda x: isinstance(x, str) and x in common_chars)): 258 subpatterns_with_common_chars.append(subpattern) 259 elif any(rec_test(subpattern, 260 lambda x: isinstance(x, str) and x in common_names)): 261 subpatterns_with_common_names.append(subpattern) 262 263 else: 264 subpatterns_with_names.append(subpattern) 265 266 if subpatterns_with_names: 267 subpatterns = subpatterns_with_names 268 elif subpatterns_with_common_names: 269 subpatterns = subpatterns_with_common_names 270 elif subpatterns_with_common_chars: 271 subpatterns = subpatterns_with_common_chars 272 # of the remaining subpatterns pick out the longest one 273 return max(subpatterns, key=len) 274 275 def rec_test(sequence, test_func): 276 """Tests test_func on all items of sequence and items of included 277 sub-iterables""" 278 for x in sequence: 279 if isinstance(x, (list, tuple)): 280 for y in rec_test(x, test_func): 281 yield y 282 else: 283 yield test_func(x) 284