1 # Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved. 2 # Licensed to PSF under a Contributor Agreement. 3 4 # Modifications: 5 # Copyright 2006 Google, Inc. All Rights Reserved. 6 # Licensed to PSF under a Contributor Agreement. 7 8 """Parser driver. 9 10 This provides a high-level interface to parse a file into a syntax tree. 11 12 """ 13 14 __author__ = "Guido van Rossum <guido (at] python.org>" 15 16 __all__ = ["Driver", "load_grammar"] 17 18 # Python imports 19 import codecs 20 import os 21 import logging 22 import StringIO 23 import sys 24 25 # Pgen imports 26 from . import grammar, parse, token, tokenize, pgen 27 28 29 class Driver(object): 30 31 def __init__(self, grammar, convert=None, logger=None): 32 self.grammar = grammar 33 if logger is None: 34 logger = logging.getLogger() 35 self.logger = logger 36 self.convert = convert 37 38 def parse_tokens(self, tokens, debug=False): 39 """Parse a series of tokens and return the syntax tree.""" 40 # XXX Move the prefix computation into a wrapper around tokenize. 41 p = parse.Parser(self.grammar, self.convert) 42 p.setup() 43 lineno = 1 44 column = 0 45 type = value = start = end = line_text = None 46 prefix = u"" 47 for quintuple in tokens: 48 type, value, start, end, line_text = quintuple 49 if start != (lineno, column): 50 assert (lineno, column) <= start, ((lineno, column), start) 51 s_lineno, s_column = start 52 if lineno < s_lineno: 53 prefix += "\n" * (s_lineno - lineno) 54 lineno = s_lineno 55 column = 0 56 if column < s_column: 57 prefix += line_text[column:s_column] 58 column = s_column 59 if type in (tokenize.COMMENT, tokenize.NL): 60 prefix += value 61 lineno, column = end 62 if value.endswith("\n"): 63 lineno += 1 64 column = 0 65 continue 66 if type == token.OP: 67 type = grammar.opmap[value] 68 if debug: 69 self.logger.debug("%s %r (prefix=%r)", 70 token.tok_name[type], value, prefix) 71 if p.addtoken(type, value, (prefix, start)): 72 if debug: 73 self.logger.debug("Stop.") 74 break 75 prefix = "" 76 lineno, column = end 77 if value.endswith("\n"): 78 lineno += 1 79 column = 0 80 else: 81 # We never broke out -- EOF is too soon (how can this happen???) 82 raise parse.ParseError("incomplete input", 83 type, value, (prefix, start)) 84 return p.rootnode 85 86 def parse_stream_raw(self, stream, debug=False): 87 """Parse a stream and return the syntax tree.""" 88 tokens = tokenize.generate_tokens(stream.readline) 89 return self.parse_tokens(tokens, debug) 90 91 def parse_stream(self, stream, debug=False): 92 """Parse a stream and return the syntax tree.""" 93 return self.parse_stream_raw(stream, debug) 94 95 def parse_file(self, filename, encoding=None, debug=False): 96 """Parse a file and return the syntax tree.""" 97 stream = codecs.open(filename, "r", encoding) 98 try: 99 return self.parse_stream(stream, debug) 100 finally: 101 stream.close() 102 103 def parse_string(self, text, debug=False): 104 """Parse a string and return the syntax tree.""" 105 tokens = tokenize.generate_tokens(StringIO.StringIO(text).readline) 106 return self.parse_tokens(tokens, debug) 107 108 109 def load_grammar(gt="Grammar.txt", gp=None, 110 save=True, force=False, logger=None): 111 """Load the grammar (maybe from a pickle).""" 112 if logger is None: 113 logger = logging.getLogger() 114 if gp is None: 115 head, tail = os.path.splitext(gt) 116 if tail == ".txt": 117 tail = "" 118 gp = head + tail + ".".join(map(str, sys.version_info)) + ".pickle" 119 if force or not _newer(gp, gt): 120 logger.info("Generating grammar tables from %s", gt) 121 g = pgen.generate_grammar(gt) 122 if save: 123 logger.info("Writing grammar tables to %s", gp) 124 try: 125 g.dump(gp) 126 except IOError, e: 127 logger.info("Writing failed:"+str(e)) 128 else: 129 g = grammar.Grammar() 130 g.load(gp) 131 return g 132 133 134 def _newer(a, b): 135 """Inquire whether file a was written since file b.""" 136 if not os.path.exists(a): 137 return False 138 if not os.path.exists(b): 139 return True 140 return os.path.getmtime(a) >= os.path.getmtime(b) 141 142 143 def main(*args): 144 """Main program, when run as a script: produce grammar pickle files. 145 146 Calls load_grammar for each argument, a path to a grammar text file. 147 """ 148 if not args: 149 args = sys.argv[1:] 150 logging.basicConfig(level=logging.INFO, stream=sys.stdout, 151 format='%(message)s') 152 for gt in args: 153 load_grammar(gt, save=True, force=True) 154 return True 155 156 if __name__ == "__main__": 157 sys.exit(int(not main())) 158