Home | History | Annotate | Download | only in pgen2
      1 # Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
      2 # Licensed to PSF under a Contributor Agreement.
      3 
      4 # Modifications:
      5 # Copyright 2006 Google, Inc. All Rights Reserved.
      6 # Licensed to PSF under a Contributor Agreement.
      7 
      8 """Parser driver.
      9 
     10 This provides a high-level interface to parse a file into a syntax tree.
     11 
     12 """
     13 
     14 __author__ = "Guido van Rossum <guido (at] python.org>"
     15 
     16 __all__ = ["Driver", "load_grammar"]
     17 
     18 # Python imports
     19 import codecs
     20 import os
     21 import logging
     22 import StringIO
     23 import sys
     24 
     25 # Pgen imports
     26 from . import grammar, parse, token, tokenize, pgen
     27 
     28 
     29 class Driver(object):
     30 
     31     def __init__(self, grammar, convert=None, logger=None):
     32         self.grammar = grammar
     33         if logger is None:
     34             logger = logging.getLogger()
     35         self.logger = logger
     36         self.convert = convert
     37 
     38     def parse_tokens(self, tokens, debug=False):
     39         """Parse a series of tokens and return the syntax tree."""
     40         # XXX Move the prefix computation into a wrapper around tokenize.
     41         p = parse.Parser(self.grammar, self.convert)
     42         p.setup()
     43         lineno = 1
     44         column = 0
     45         type = value = start = end = line_text = None
     46         prefix = u""
     47         for quintuple in tokens:
     48             type, value, start, end, line_text = quintuple
     49             if start != (lineno, column):
     50                 assert (lineno, column) <= start, ((lineno, column), start)
     51                 s_lineno, s_column = start
     52                 if lineno < s_lineno:
     53                     prefix += "\n" * (s_lineno - lineno)
     54                     lineno = s_lineno
     55                     column = 0
     56                 if column < s_column:
     57                     prefix += line_text[column:s_column]
     58                     column = s_column
     59             if type in (tokenize.COMMENT, tokenize.NL):
     60                 prefix += value
     61                 lineno, column = end
     62                 if value.endswith("\n"):
     63                     lineno += 1
     64                     column = 0
     65                 continue
     66             if type == token.OP:
     67                 type = grammar.opmap[value]
     68             if debug:
     69                 self.logger.debug("%s %r (prefix=%r)",
     70                                   token.tok_name[type], value, prefix)
     71             if p.addtoken(type, value, (prefix, start)):
     72                 if debug:
     73                     self.logger.debug("Stop.")
     74                 break
     75             prefix = ""
     76             lineno, column = end
     77             if value.endswith("\n"):
     78                 lineno += 1
     79                 column = 0
     80         else:
     81             # We never broke out -- EOF is too soon (how can this happen???)
     82             raise parse.ParseError("incomplete input",
     83                                    type, value, (prefix, start))
     84         return p.rootnode
     85 
     86     def parse_stream_raw(self, stream, debug=False):
     87         """Parse a stream and return the syntax tree."""
     88         tokens = tokenize.generate_tokens(stream.readline)
     89         return self.parse_tokens(tokens, debug)
     90 
     91     def parse_stream(self, stream, debug=False):
     92         """Parse a stream and return the syntax tree."""
     93         return self.parse_stream_raw(stream, debug)
     94 
     95     def parse_file(self, filename, encoding=None, debug=False):
     96         """Parse a file and return the syntax tree."""
     97         stream = codecs.open(filename, "r", encoding)
     98         try:
     99             return self.parse_stream(stream, debug)
    100         finally:
    101             stream.close()
    102 
    103     def parse_string(self, text, debug=False):
    104         """Parse a string and return the syntax tree."""
    105         tokens = tokenize.generate_tokens(StringIO.StringIO(text).readline)
    106         return self.parse_tokens(tokens, debug)
    107 
    108 
    109 def load_grammar(gt="Grammar.txt", gp=None,
    110                  save=True, force=False, logger=None):
    111     """Load the grammar (maybe from a pickle)."""
    112     if logger is None:
    113         logger = logging.getLogger()
    114     if gp is None:
    115         head, tail = os.path.splitext(gt)
    116         if tail == ".txt":
    117             tail = ""
    118         gp = head + tail + ".".join(map(str, sys.version_info)) + ".pickle"
    119     if force or not _newer(gp, gt):
    120         logger.info("Generating grammar tables from %s", gt)
    121         g = pgen.generate_grammar(gt)
    122         if save:
    123             logger.info("Writing grammar tables to %s", gp)
    124             try:
    125                 g.dump(gp)
    126             except IOError, e:
    127                 logger.info("Writing failed:"+str(e))
    128     else:
    129         g = grammar.Grammar()
    130         g.load(gp)
    131     return g
    132 
    133 
    134 def _newer(a, b):
    135     """Inquire whether file a was written since file b."""
    136     if not os.path.exists(a):
    137         return False
    138     if not os.path.exists(b):
    139         return True
    140     return os.path.getmtime(a) >= os.path.getmtime(b)
    141 
    142 
    143 def main(*args):
    144     """Main program, when run as a script: produce grammar pickle files.
    145 
    146     Calls load_grammar for each argument, a path to a grammar text file.
    147     """
    148     if not args:
    149         args = sys.argv[1:]
    150     logging.basicConfig(level=logging.INFO, stream=sys.stdout,
    151                         format='%(message)s')
    152     for gt in args:
    153         load_grammar(gt, save=True, force=True)
    154     return True
    155 
    156 if __name__ == "__main__":
    157     sys.exit(int(not main()))
    158