1 # Copyright 2006 Google, Inc. All Rights Reserved. 2 # Licensed to PSF under a Contributor Agreement. 3 4 """Pattern compiler. 5 6 The grammer is taken from PatternGrammar.txt. 7 8 The compiler compiles a pattern to a pytree.*Pattern instance. 9 """ 10 11 __author__ = "Guido van Rossum <guido (at] python.org>" 12 13 # Python imports 14 import os 15 import StringIO 16 17 # Fairly local imports 18 from .pgen2 import driver, literals, token, tokenize, parse, grammar 19 20 # Really local imports 21 from . import pytree 22 from . import pygram 23 24 # The pattern grammar file 25 _PATTERN_GRAMMAR_FILE = os.path.join(os.path.dirname(__file__), 26 "PatternGrammar.txt") 27 28 29 class PatternSyntaxError(Exception): 30 pass 31 32 33 def tokenize_wrapper(input): 34 """Tokenizes a string suppressing significant whitespace.""" 35 skip = set((token.NEWLINE, token.INDENT, token.DEDENT)) 36 tokens = tokenize.generate_tokens(StringIO.StringIO(input).readline) 37 for quintuple in tokens: 38 type, value, start, end, line_text = quintuple 39 if type not in skip: 40 yield quintuple 41 42 43 class PatternCompiler(object): 44 45 def __init__(self, grammar_file=_PATTERN_GRAMMAR_FILE): 46 """Initializer. 47 48 Takes an optional alternative filename for the pattern grammar. 49 """ 50 self.grammar = driver.load_grammar(grammar_file) 51 self.syms = pygram.Symbols(self.grammar) 52 self.pygrammar = pygram.python_grammar 53 self.pysyms = pygram.python_symbols 54 self.driver = driver.Driver(self.grammar, convert=pattern_convert) 55 56 def compile_pattern(self, input, debug=False, with_tree=False): 57 """Compiles a pattern string to a nested pytree.*Pattern object.""" 58 tokens = tokenize_wrapper(input) 59 try: 60 root = self.driver.parse_tokens(tokens, debug=debug) 61 except parse.ParseError as e: 62 raise PatternSyntaxError(str(e)) 63 if with_tree: 64 return self.compile_node(root), root 65 else: 66 return self.compile_node(root) 67 68 def compile_node(self, node): 69 """Compiles a node, recursively. 70 71 This is one big switch on the node type. 72 """ 73 # XXX Optimize certain Wildcard-containing-Wildcard patterns 74 # that can be merged 75 if node.type == self.syms.Matcher: 76 node = node.children[0] # Avoid unneeded recursion 77 78 if node.type == self.syms.Alternatives: 79 # Skip the odd children since they are just '|' tokens 80 alts = [self.compile_node(ch) for ch in node.children[::2]] 81 if len(alts) == 1: 82 return alts[0] 83 p = pytree.WildcardPattern([[a] for a in alts], min=1, max=1) 84 return p.optimize() 85 86 if node.type == self.syms.Alternative: 87 units = [self.compile_node(ch) for ch in node.children] 88 if len(units) == 1: 89 return units[0] 90 p = pytree.WildcardPattern([units], min=1, max=1) 91 return p.optimize() 92 93 if node.type == self.syms.NegatedUnit: 94 pattern = self.compile_basic(node.children[1:]) 95 p = pytree.NegatedPattern(pattern) 96 return p.optimize() 97 98 assert node.type == self.syms.Unit 99 100 name = None 101 nodes = node.children 102 if len(nodes) >= 3 and nodes[1].type == token.EQUAL: 103 name = nodes[0].value 104 nodes = nodes[2:] 105 repeat = None 106 if len(nodes) >= 2 and nodes[-1].type == self.syms.Repeater: 107 repeat = nodes[-1] 108 nodes = nodes[:-1] 109 110 # Now we've reduced it to: STRING | NAME [Details] | (...) | [...] 111 pattern = self.compile_basic(nodes, repeat) 112 113 if repeat is not None: 114 assert repeat.type == self.syms.Repeater 115 children = repeat.children 116 child = children[0] 117 if child.type == token.STAR: 118 min = 0 119 max = pytree.HUGE 120 elif child.type == token.PLUS: 121 min = 1 122 max = pytree.HUGE 123 elif child.type == token.LBRACE: 124 assert children[-1].type == token.RBRACE 125 assert len(children) in (3, 5) 126 min = max = self.get_int(children[1]) 127 if len(children) == 5: 128 max = self.get_int(children[3]) 129 else: 130 assert False 131 if min != 1 or max != 1: 132 pattern = pattern.optimize() 133 pattern = pytree.WildcardPattern([[pattern]], min=min, max=max) 134 135 if name is not None: 136 pattern.name = name 137 return pattern.optimize() 138 139 def compile_basic(self, nodes, repeat=None): 140 # Compile STRING | NAME [Details] | (...) | [...] 141 assert len(nodes) >= 1 142 node = nodes[0] 143 if node.type == token.STRING: 144 value = unicode(literals.evalString(node.value)) 145 return pytree.LeafPattern(_type_of_literal(value), value) 146 elif node.type == token.NAME: 147 value = node.value 148 if value.isupper(): 149 if value not in TOKEN_MAP: 150 raise PatternSyntaxError("Invalid token: %r" % value) 151 if nodes[1:]: 152 raise PatternSyntaxError("Can't have details for token") 153 return pytree.LeafPattern(TOKEN_MAP[value]) 154 else: 155 if value == "any": 156 type = None 157 elif not value.startswith("_"): 158 type = getattr(self.pysyms, value, None) 159 if type is None: 160 raise PatternSyntaxError("Invalid symbol: %r" % value) 161 if nodes[1:]: # Details present 162 content = [self.compile_node(nodes[1].children[1])] 163 else: 164 content = None 165 return pytree.NodePattern(type, content) 166 elif node.value == "(": 167 return self.compile_node(nodes[1]) 168 elif node.value == "[": 169 assert repeat is None 170 subpattern = self.compile_node(nodes[1]) 171 return pytree.WildcardPattern([[subpattern]], min=0, max=1) 172 assert False, node 173 174 def get_int(self, node): 175 assert node.type == token.NUMBER 176 return int(node.value) 177 178 179 # Map named tokens to the type value for a LeafPattern 180 TOKEN_MAP = {"NAME": token.NAME, 181 "STRING": token.STRING, 182 "NUMBER": token.NUMBER, 183 "TOKEN": None} 184 185 186 def _type_of_literal(value): 187 if value[0].isalpha(): 188 return token.NAME 189 elif value in grammar.opmap: 190 return grammar.opmap[value] 191 else: 192 return None 193 194 195 def pattern_convert(grammar, raw_node_info): 196 """Converts raw node information to a Node or Leaf instance.""" 197 type, value, context, children = raw_node_info 198 if children or type in grammar.number2symbol: 199 return pytree.Node(type, children, context=context) 200 else: 201 return pytree.Leaf(type, value, context=context) 202 203 204 def compile_pattern(pattern): 205 return PatternCompiler().compile_pattern(pattern) 206