Home | History | Annotate | Download | only in lib2to3
      1 # Copyright 2006 Google, Inc. All Rights Reserved.
      2 # Licensed to PSF under a Contributor Agreement.
      3 
      4 """Pattern compiler.
      5 
      6 The grammer is taken from PatternGrammar.txt.
      7 
      8 The compiler compiles a pattern to a pytree.*Pattern instance.
      9 """
     10 
     11 __author__ = "Guido van Rossum <guido (at] python.org>"
     12 
     13 # Python imports
     14 import os
     15 import StringIO
     16 
     17 # Fairly local imports
     18 from .pgen2 import driver, literals, token, tokenize, parse, grammar
     19 
     20 # Really local imports
     21 from . import pytree
     22 from . import pygram
     23 
     24 # The pattern grammar file
     25 _PATTERN_GRAMMAR_FILE = os.path.join(os.path.dirname(__file__),
     26                                      "PatternGrammar.txt")
     27 
     28 
     29 class PatternSyntaxError(Exception):
     30     pass
     31 
     32 
     33 def tokenize_wrapper(input):
     34     """Tokenizes a string suppressing significant whitespace."""
     35     skip = set((token.NEWLINE, token.INDENT, token.DEDENT))
     36     tokens = tokenize.generate_tokens(StringIO.StringIO(input).readline)
     37     for quintuple in tokens:
     38         type, value, start, end, line_text = quintuple
     39         if type not in skip:
     40             yield quintuple
     41 
     42 
     43 class PatternCompiler(object):
     44 
     45     def __init__(self, grammar_file=_PATTERN_GRAMMAR_FILE):
     46         """Initializer.
     47 
     48         Takes an optional alternative filename for the pattern grammar.
     49         """
     50         self.grammar = driver.load_grammar(grammar_file)
     51         self.syms = pygram.Symbols(self.grammar)
     52         self.pygrammar = pygram.python_grammar
     53         self.pysyms = pygram.python_symbols
     54         self.driver = driver.Driver(self.grammar, convert=pattern_convert)
     55 
     56     def compile_pattern(self, input, debug=False, with_tree=False):
     57         """Compiles a pattern string to a nested pytree.*Pattern object."""
     58         tokens = tokenize_wrapper(input)
     59         try:
     60             root = self.driver.parse_tokens(tokens, debug=debug)
     61         except parse.ParseError as e:
     62             raise PatternSyntaxError(str(e))
     63         if with_tree:
     64             return self.compile_node(root), root
     65         else:
     66             return self.compile_node(root)
     67 
     68     def compile_node(self, node):
     69         """Compiles a node, recursively.
     70 
     71         This is one big switch on the node type.
     72         """
     73         # XXX Optimize certain Wildcard-containing-Wildcard patterns
     74         # that can be merged
     75         if node.type == self.syms.Matcher:
     76             node = node.children[0] # Avoid unneeded recursion
     77 
     78         if node.type == self.syms.Alternatives:
     79             # Skip the odd children since they are just '|' tokens
     80             alts = [self.compile_node(ch) for ch in node.children[::2]]
     81             if len(alts) == 1:
     82                 return alts[0]
     83             p = pytree.WildcardPattern([[a] for a in alts], min=1, max=1)
     84             return p.optimize()
     85 
     86         if node.type == self.syms.Alternative:
     87             units = [self.compile_node(ch) for ch in node.children]
     88             if len(units) == 1:
     89                 return units[0]
     90             p = pytree.WildcardPattern([units], min=1, max=1)
     91             return p.optimize()
     92 
     93         if node.type == self.syms.NegatedUnit:
     94             pattern = self.compile_basic(node.children[1:])
     95             p = pytree.NegatedPattern(pattern)
     96             return p.optimize()
     97 
     98         assert node.type == self.syms.Unit
     99 
    100         name = None
    101         nodes = node.children
    102         if len(nodes) >= 3 and nodes[1].type == token.EQUAL:
    103             name = nodes[0].value
    104             nodes = nodes[2:]
    105         repeat = None
    106         if len(nodes) >= 2 and nodes[-1].type == self.syms.Repeater:
    107             repeat = nodes[-1]
    108             nodes = nodes[:-1]
    109 
    110         # Now we've reduced it to: STRING | NAME [Details] | (...) | [...]
    111         pattern = self.compile_basic(nodes, repeat)
    112 
    113         if repeat is not None:
    114             assert repeat.type == self.syms.Repeater
    115             children = repeat.children
    116             child = children[0]
    117             if child.type == token.STAR:
    118                 min = 0
    119                 max = pytree.HUGE
    120             elif child.type == token.PLUS:
    121                 min = 1
    122                 max = pytree.HUGE
    123             elif child.type == token.LBRACE:
    124                 assert children[-1].type == token.RBRACE
    125                 assert  len(children) in (3, 5)
    126                 min = max = self.get_int(children[1])
    127                 if len(children) == 5:
    128                     max = self.get_int(children[3])
    129             else:
    130                 assert False
    131             if min != 1 or max != 1:
    132                 pattern = pattern.optimize()
    133                 pattern = pytree.WildcardPattern([[pattern]], min=min, max=max)
    134 
    135         if name is not None:
    136             pattern.name = name
    137         return pattern.optimize()
    138 
    139     def compile_basic(self, nodes, repeat=None):
    140         # Compile STRING | NAME [Details] | (...) | [...]
    141         assert len(nodes) >= 1
    142         node = nodes[0]
    143         if node.type == token.STRING:
    144             value = unicode(literals.evalString(node.value))
    145             return pytree.LeafPattern(_type_of_literal(value), value)
    146         elif node.type == token.NAME:
    147             value = node.value
    148             if value.isupper():
    149                 if value not in TOKEN_MAP:
    150                     raise PatternSyntaxError("Invalid token: %r" % value)
    151                 if nodes[1:]:
    152                     raise PatternSyntaxError("Can't have details for token")
    153                 return pytree.LeafPattern(TOKEN_MAP[value])
    154             else:
    155                 if value == "any":
    156                     type = None
    157                 elif not value.startswith("_"):
    158                     type = getattr(self.pysyms, value, None)
    159                     if type is None:
    160                         raise PatternSyntaxError("Invalid symbol: %r" % value)
    161                 if nodes[1:]: # Details present
    162                     content = [self.compile_node(nodes[1].children[1])]
    163                 else:
    164                     content = None
    165                 return pytree.NodePattern(type, content)
    166         elif node.value == "(":
    167             return self.compile_node(nodes[1])
    168         elif node.value == "[":
    169             assert repeat is None
    170             subpattern = self.compile_node(nodes[1])
    171             return pytree.WildcardPattern([[subpattern]], min=0, max=1)
    172         assert False, node
    173 
    174     def get_int(self, node):
    175         assert node.type == token.NUMBER
    176         return int(node.value)
    177 
    178 
    179 # Map named tokens to the type value for a LeafPattern
    180 TOKEN_MAP = {"NAME": token.NAME,
    181              "STRING": token.STRING,
    182              "NUMBER": token.NUMBER,
    183              "TOKEN": None}
    184 
    185 
    186 def _type_of_literal(value):
    187     if value[0].isalpha():
    188         return token.NAME
    189     elif value in grammar.opmap:
    190         return grammar.opmap[value]
    191     else:
    192         return None
    193 
    194 
    195 def pattern_convert(grammar, raw_node_info):
    196     """Converts raw node information to a Node or Leaf instance."""
    197     type, value, context, children = raw_node_info
    198     if children or type in grammar.number2symbol:
    199         return pytree.Node(type, children, context=context)
    200     else:
    201         return pytree.Leaf(type, value, context=context)
    202 
    203 
    204 def compile_pattern(pattern):
    205     return PatternCompiler().compile_pattern(pattern)
    206