Home | History | Annotate | Download | only in Compiler
      1 # cython: infer_types=True, language_level=3, py2_import=True
      2 #
      3 #   Cython Scanner
      4 #
      5 
      6 import os
      7 import platform
      8 
      9 import cython
     10 cython.declare(EncodedString=object, any_string_prefix=unicode, IDENT=unicode,
     11                print_function=object)
     12 
     13 from Cython import Utils
     14 from Cython.Plex.Scanners import Scanner
     15 from Cython.Plex.Errors import UnrecognizedInput
     16 from Errors import error
     17 from Lexicon import any_string_prefix, make_lexicon, IDENT
     18 from Future import print_function
     19 
     20 from StringEncoding import EncodedString
     21 
     22 debug_scanner = 0
     23 trace_scanner = 0
     24 scanner_debug_flags = 0
     25 scanner_dump_file = None
     26 
     27 lexicon = None
     28 
     29 def get_lexicon():
     30     global lexicon
     31     if not lexicon:
     32         lexicon = make_lexicon()
     33     return lexicon
     34 
     35 #------------------------------------------------------------------
     36 
     37 py_reserved_words = [
     38     "global", "nonlocal", "def", "class", "print", "del", "pass", "break",
     39     "continue", "return", "raise", "import", "exec", "try",
     40     "except", "finally", "while", "if", "elif", "else", "for",
     41     "in", "assert", "and", "or", "not", "is", "in", "lambda",
     42     "from", "yield", "with", "nonlocal",
     43 ]
     44 
     45 pyx_reserved_words = py_reserved_words + [
     46     "include", "ctypedef", "cdef", "cpdef",
     47     "cimport", "DEF", "IF", "ELIF", "ELSE"
     48 ]
     49 
     50 class Method(object):
     51 
     52     def __init__(self, name):
     53         self.name = name
     54         self.__name__ = name # for Plex tracing
     55 
     56     def __call__(self, stream, text):
     57         return getattr(stream, self.name)(text)
     58 
     59 #------------------------------------------------------------------
     60 
     61 class CompileTimeScope(object):
     62 
     63     def __init__(self, outer = None):
     64         self.entries = {}
     65         self.outer = outer
     66 
     67     def declare(self, name, value):
     68         self.entries[name] = value
     69 
     70     def update(self, other):
     71         self.entries.update(other)
     72 
     73     def lookup_here(self, name):
     74         return self.entries[name]
     75 
     76     def __contains__(self, name):
     77         return name in self.entries
     78 
     79     def lookup(self, name):
     80         try:
     81             return self.lookup_here(name)
     82         except KeyError:
     83             outer = self.outer
     84             if outer:
     85                 return outer.lookup(name)
     86             else:
     87                 raise
     88 
     89 def initial_compile_time_env():
     90     benv = CompileTimeScope()
     91     names = ('UNAME_SYSNAME', 'UNAME_NODENAME', 'UNAME_RELEASE',
     92         'UNAME_VERSION', 'UNAME_MACHINE')
     93     for name, value in zip(names, platform.uname()):
     94         benv.declare(name, value)
     95     try:
     96         import __builtin__ as builtins
     97     except ImportError:
     98         import builtins
     99 
    100     names = ('False', 'True',
    101              'abs', 'all', 'any', 'ascii', 'bin', 'bool', 'bytearray', 'bytes',
    102              'chr', 'cmp', 'complex', 'dict', 'divmod', 'enumerate', 'filter',
    103              'float', 'format', 'frozenset', 'hash', 'hex', 'int', 'len',
    104              'list', 'long', 'map', 'max', 'min', 'oct', 'ord', 'pow', 'range',
    105              'repr', 'reversed', 'round', 'set', 'slice', 'sorted', 'str',
    106              'sum', 'tuple', 'xrange', 'zip')
    107 
    108     for name in names:
    109         try:
    110             benv.declare(name, getattr(builtins, name))
    111         except AttributeError:
    112             # ignore, likely Py3
    113             pass
    114     denv = CompileTimeScope(benv)
    115     return denv
    116 
    117 #------------------------------------------------------------------
    118 
    119 class SourceDescriptor(object):
    120     """
    121     A SourceDescriptor should be considered immutable.
    122     """
    123     _file_type = 'pyx'
    124 
    125     _escaped_description = None
    126     _cmp_name = ''
    127     def __str__(self):
    128         assert False # To catch all places where a descriptor is used directly as a filename
    129 
    130     def set_file_type_from_name(self, filename):
    131         name, ext = os.path.splitext(filename)
    132         self._file_type = ext in ('.pyx', '.pxd', '.py') and ext[1:] or 'pyx'
    133 
    134     def is_cython_file(self):
    135         return self._file_type in ('pyx', 'pxd')
    136 
    137     def is_python_file(self):
    138         return self._file_type == 'py'
    139 
    140     def get_escaped_description(self):
    141         if self._escaped_description is None:
    142             self._escaped_description = \
    143                 self.get_description().encode('ASCII', 'replace').decode("ASCII")
    144         return self._escaped_description
    145 
    146     def __gt__(self, other):
    147         # this is only used to provide some sort of order
    148         try:
    149             return self._cmp_name > other._cmp_name
    150         except AttributeError:
    151             return False
    152 
    153     def __lt__(self, other):
    154         # this is only used to provide some sort of order
    155         try:
    156             return self._cmp_name < other._cmp_name
    157         except AttributeError:
    158             return False
    159 
    160     def __le__(self, other):
    161         # this is only used to provide some sort of order
    162         try:
    163             return self._cmp_name <= other._cmp_name
    164         except AttributeError:
    165             return False
    166 
    167 class FileSourceDescriptor(SourceDescriptor):
    168     """
    169     Represents a code source. A code source is a more generic abstraction
    170     for a "filename" (as sometimes the code doesn't come from a file).
    171     Instances of code sources are passed to Scanner.__init__ as the
    172     optional name argument and will be passed back when asking for
    173     the position()-tuple.
    174     """
    175     def __init__(self, filename, path_description=None):
    176         filename = Utils.decode_filename(filename)
    177         self.path_description = path_description or filename
    178         self.filename = filename
    179         self.set_file_type_from_name(filename)
    180         self._cmp_name = filename
    181         self._lines = {}
    182 
    183     def get_lines(self, encoding=None, error_handling=None):
    184         # we cache the lines only the second time this is called, in
    185         # order to save memory when they are only used once
    186         key = (encoding, error_handling)
    187         try:
    188             lines = self._lines[key]
    189             if lines is not None:
    190                 return lines
    191         except KeyError:
    192             pass
    193         f = Utils.open_source_file(
    194             self.filename, encoding=encoding,
    195             error_handling=error_handling,
    196             # newline normalisation is costly before Py2.6
    197             require_normalised_newlines=False)
    198         try:
    199             lines = list(f)
    200         finally:
    201             f.close()
    202         if key in self._lines:
    203             self._lines[key] = lines
    204         else:
    205             # do not cache the first access, but remember that we
    206             # already read it once
    207             self._lines[key] = None
    208         return lines
    209 
    210     def get_description(self):
    211         return self.path_description
    212 
    213     def get_error_description(self):
    214         path = self.filename
    215         cwd = Utils.decode_filename(os.getcwd() + os.path.sep)
    216         if path.startswith(cwd):
    217             return path[len(cwd):]
    218         return path
    219 
    220     def get_filenametable_entry(self):
    221         return self.filename
    222 
    223     def __eq__(self, other):
    224         return isinstance(other, FileSourceDescriptor) and self.filename == other.filename
    225 
    226     def __hash__(self):
    227         return hash(self.filename)
    228 
    229     def __repr__(self):
    230         return "<FileSourceDescriptor:%s>" % self.filename
    231 
    232 class StringSourceDescriptor(SourceDescriptor):
    233     """
    234     Instances of this class can be used instead of a filenames if the
    235     code originates from a string object.
    236     """
    237     filename = None
    238 
    239     def __init__(self, name, code):
    240         self.name = name
    241         #self.set_file_type_from_name(name)
    242         self.codelines = [x + "\n" for x in code.split("\n")]
    243         self._cmp_name = name
    244 
    245     def get_lines(self, encoding=None, error_handling=None):
    246         if not encoding:
    247             return self.codelines
    248         else:
    249             return [ line.encode(encoding, error_handling).decode(encoding)
    250                      for line in self.codelines ]
    251 
    252     def get_description(self):
    253         return self.name
    254 
    255     get_error_description = get_description
    256 
    257     def get_filenametable_entry(self):
    258         return "stringsource"
    259 
    260     def __hash__(self):
    261         return id(self)
    262         # Do not hash on the name, an identical string source should be the
    263         # same object (name is often defaulted in other places)
    264         # return hash(self.name)
    265 
    266     def __eq__(self, other):
    267         return isinstance(other, StringSourceDescriptor) and self.name == other.name
    268 
    269     def __repr__(self):
    270         return "<StringSourceDescriptor:%s>" % self.name
    271 
    272 #------------------------------------------------------------------
    273 
    274 class PyrexScanner(Scanner):
    275     #  context            Context  Compilation context
    276     #  included_files     [string] Files included with 'include' statement
    277     #  compile_time_env   dict     Environment for conditional compilation
    278     #  compile_time_eval  boolean  In a true conditional compilation context
    279     #  compile_time_expr  boolean  In a compile-time expression context
    280 
    281     def __init__(self, file, filename, parent_scanner = None,
    282                  scope = None, context = None, source_encoding=None, parse_comments=True, initial_pos=None):
    283         Scanner.__init__(self, get_lexicon(), file, filename, initial_pos)
    284         if parent_scanner:
    285             self.context = parent_scanner.context
    286             self.included_files = parent_scanner.included_files
    287             self.compile_time_env = parent_scanner.compile_time_env
    288             self.compile_time_eval = parent_scanner.compile_time_eval
    289             self.compile_time_expr = parent_scanner.compile_time_expr
    290         else:
    291             self.context = context
    292             self.included_files = scope.included_files
    293             self.compile_time_env = initial_compile_time_env()
    294             self.compile_time_eval = 1
    295             self.compile_time_expr = 0
    296             if hasattr(context.options, 'compile_time_env') and \
    297                context.options.compile_time_env is not None:
    298                 self.compile_time_env.update(context.options.compile_time_env)
    299         self.parse_comments = parse_comments
    300         self.source_encoding = source_encoding
    301         if filename.is_python_file():
    302             self.in_python_file = True
    303             self.keywords = set(py_reserved_words)
    304         else:
    305             self.in_python_file = False
    306             self.keywords = set(pyx_reserved_words)
    307         self.trace = trace_scanner
    308         self.indentation_stack = [0]
    309         self.indentation_char = None
    310         self.bracket_nesting_level = 0
    311         self.begin('INDENT')
    312         self.sy = ''
    313         self.next()
    314 
    315     def commentline(self, text):
    316         if self.parse_comments:
    317             self.produce('commentline', text)
    318 
    319     def current_level(self):
    320         return self.indentation_stack[-1]
    321 
    322     def open_bracket_action(self, text):
    323         self.bracket_nesting_level = self.bracket_nesting_level + 1
    324         return text
    325 
    326     def close_bracket_action(self, text):
    327         self.bracket_nesting_level = self.bracket_nesting_level - 1
    328         return text
    329 
    330     def newline_action(self, text):
    331         if self.bracket_nesting_level == 0:
    332             self.begin('INDENT')
    333             self.produce('NEWLINE', '')
    334 
    335     string_states = {
    336         "'":   'SQ_STRING',
    337         '"':   'DQ_STRING',
    338         "'''": 'TSQ_STRING',
    339         '"""': 'TDQ_STRING'
    340     }
    341 
    342     def begin_string_action(self, text):
    343         while text[:1] in any_string_prefix:
    344             text = text[1:]
    345         self.begin(self.string_states[text])
    346         self.produce('BEGIN_STRING')
    347 
    348     def end_string_action(self, text):
    349         self.begin('')
    350         self.produce('END_STRING')
    351 
    352     def unclosed_string_action(self, text):
    353         self.end_string_action(text)
    354         self.error("Unclosed string literal")
    355 
    356     def indentation_action(self, text):
    357         self.begin('')
    358         # Indentation within brackets should be ignored.
    359         #if self.bracket_nesting_level > 0:
    360         #    return
    361         # Check that tabs and spaces are being used consistently.
    362         if text:
    363             c = text[0]
    364             #print "Scanner.indentation_action: indent with", repr(c) ###
    365             if self.indentation_char is None:
    366                 self.indentation_char = c
    367                 #print "Scanner.indentation_action: setting indent_char to", repr(c)
    368             else:
    369                 if self.indentation_char != c:
    370                     self.error("Mixed use of tabs and spaces")
    371             if text.replace(c, "") != "":
    372                 self.error("Mixed use of tabs and spaces")
    373         # Figure out how many indents/dedents to do
    374         current_level = self.current_level()
    375         new_level = len(text)
    376         #print "Changing indent level from", current_level, "to", new_level ###
    377         if new_level == current_level:
    378             return
    379         elif new_level > current_level:
    380             #print "...pushing level", new_level ###
    381             self.indentation_stack.append(new_level)
    382             self.produce('INDENT', '')
    383         else:
    384             while new_level < self.current_level():
    385                 #print "...popping level", self.indentation_stack[-1] ###
    386                 self.indentation_stack.pop()
    387                 self.produce('DEDENT', '')
    388             #print "...current level now", self.current_level() ###
    389             if new_level != self.current_level():
    390                 self.error("Inconsistent indentation")
    391 
    392     def eof_action(self, text):
    393         while len(self.indentation_stack) > 1:
    394             self.produce('DEDENT', '')
    395             self.indentation_stack.pop()
    396         self.produce('EOF', '')
    397 
    398     def next(self):
    399         try:
    400             sy, systring = self.read()
    401         except UnrecognizedInput:
    402             self.error("Unrecognized character")
    403         if sy == IDENT:
    404             if systring in self.keywords:
    405                 if systring == u'print' and print_function in self.context.future_directives:
    406                     self.keywords.discard('print')
    407                     systring = EncodedString(systring)
    408                 elif systring == u'exec' and self.context.language_level >= 3:
    409                     self.keywords.discard('exec')
    410                     systring = EncodedString(systring)
    411                 else:
    412                     sy = systring
    413             else:
    414                 systring = EncodedString(systring)
    415         self.sy = sy
    416         self.systring = systring
    417         if False: # debug_scanner:
    418             _, line, col = self.position()
    419             if not self.systring or self.sy == self.systring:
    420                 t = self.sy
    421             else:
    422                 t = "%s %s" % (self.sy, self.systring)
    423             print("--- %3d %2d %s" % (line, col, t))
    424 
    425     def peek(self):
    426         saved = self.sy, self.systring
    427         self.next()
    428         next = self.sy, self.systring
    429         self.unread(*next)
    430         self.sy, self.systring = saved
    431         return next
    432 
    433     def put_back(self, sy, systring):
    434         self.unread(self.sy, self.systring)
    435         self.sy = sy
    436         self.systring = systring
    437 
    438     def unread(self, token, value):
    439         # This method should be added to Plex
    440         self.queue.insert(0, (token, value))
    441 
    442     def error(self, message, pos = None, fatal = True):
    443         if pos is None:
    444             pos = self.position()
    445         if self.sy == 'INDENT':
    446             err = error(pos, "Possible inconsistent indentation")
    447         err = error(pos, message)
    448         if fatal: raise err
    449 
    450     def expect(self, what, message = None):
    451         if self.sy == what:
    452             self.next()
    453         else:
    454             self.expected(what, message)
    455 
    456     def expect_keyword(self, what, message = None):
    457         if self.sy == IDENT and self.systring == what:
    458             self.next()
    459         else:
    460             self.expected(what, message)
    461 
    462     def expected(self, what, message = None):
    463         if message:
    464             self.error(message)
    465         else:
    466             if self.sy == IDENT:
    467                 found = self.systring
    468             else:
    469                 found = self.sy
    470             self.error("Expected '%s', found '%s'" % (what, found))
    471 
    472     def expect_indent(self):
    473         self.expect('INDENT',
    474             "Expected an increase in indentation level")
    475 
    476     def expect_dedent(self):
    477         self.expect('DEDENT',
    478             "Expected a decrease in indentation level")
    479 
    480     def expect_newline(self, message = "Expected a newline"):
    481         # Expect either a newline or end of file
    482         if self.sy != 'EOF':
    483             self.expect('NEWLINE', message)
    484