Home | History | Annotate | Download | only in parse
      1 # Copyright 2014 The Chromium Authors. All rights reserved.
      2 # Use of this source code is governed by a BSD-style license that can be
      3 # found in the LICENSE file.
      4 
      5 """Generates a syntax tree from a Mojo IDL file."""
      6 
      7 import imp
      8 import os.path
      9 import sys
     10 
     11 def _GetDirAbove(dirname):
     12   """Returns the directory "above" this file containing |dirname| (which must
     13   also be "above" this file)."""
     14   path = os.path.abspath(__file__)
     15   while True:
     16     path, tail = os.path.split(path)
     17     assert tail
     18     if tail == dirname:
     19       return path
     20 
     21 try:
     22   imp.find_module("ply")
     23 except ImportError:
     24   sys.path.append(os.path.join(_GetDirAbove("mojo"), "third_party"))
     25 from ply import lex
     26 from ply import yacc
     27 
     28 from ..error import Error
     29 from . import ast
     30 from .lexer import Lexer
     31 
     32 
     33 _MAX_ORDINAL_VALUE = 0xffffffff
     34 _MAX_ARRAY_SIZE = 0xffffffff
     35 
     36 
     37 class ParseError(Error):
     38   """Class for errors from the parser."""
     39 
     40   def __init__(self, filename, message, lineno=None, snippet=None):
     41     Error.__init__(self, filename, message, lineno=lineno,
     42                    addenda=([snippet] if snippet else None))
     43 
     44 
     45 # We have methods which look like they could be functions:
     46 # pylint: disable=R0201
     47 class Parser(object):
     48 
     49   def __init__(self, lexer, source, filename):
     50     self.tokens = lexer.tokens
     51     self.source = source
     52     self.filename = filename
     53 
     54   # Names of functions
     55   #
     56   # In general, we name functions after the left-hand-side of the rule(s) that
     57   # they handle. E.g., |p_foo_bar| for a rule |foo_bar : ...|.
     58   #
     59   # There may be multiple functions handling rules for the same left-hand-side;
     60   # then we name the functions |p_foo_bar_N| (for left-hand-side |foo_bar|),
     61   # where N is a number (numbered starting from 1). Note that using multiple
     62   # functions is actually more efficient than having single functions handle
     63   # multiple rules (and, e.g., distinguishing them by examining |len(p)|).
     64   #
     65   # It's also possible to have a function handling multiple rules with different
     66   # left-hand-sides. We do not do this.
     67   #
     68   # See http://www.dabeaz.com/ply/ply.html#ply_nn25 for more details.
     69 
     70   # TODO(vtl): Get rid of the braces in the module "statement". (Consider
     71   # renaming "module" -> "package".) Then we'll be able to have a single rule
     72   # for root (by making module "optional").
     73   def p_root_1(self, p):
     74     """root : """
     75     p[0] = ast.Mojom(None, ast.ImportList(), [])
     76 
     77   def p_root_2(self, p):
     78     """root : root module"""
     79     if p[1].module is not None:
     80       raise ParseError(self.filename,
     81                        "Multiple \"module\" statements not allowed:",
     82                        p[2].lineno, snippet=self._GetSnippet(p[2].lineno))
     83     if p[1].import_list.items or p[1].definition_list:
     84       raise ParseError(
     85           self.filename,
     86           "\"module\" statements must precede imports and definitions:",
     87           p[2].lineno, snippet=self._GetSnippet(p[2].lineno))
     88     p[0] = p[1]
     89     p[0].module = p[2]
     90 
     91   def p_root_3(self, p):
     92     """root : root import"""
     93     if p[1].definition_list:
     94       raise ParseError(self.filename,
     95                        "\"import\" statements must precede definitions:",
     96                        p[2].lineno, snippet=self._GetSnippet(p[2].lineno))
     97     p[0] = p[1]
     98     p[0].import_list.Append(p[2])
     99 
    100   def p_root_4(self, p):
    101     """root : root definition"""
    102     p[0] = p[1]
    103     p[0].definition_list.append(p[2])
    104 
    105   def p_import(self, p):
    106     """import : IMPORT STRING_LITERAL SEMI"""
    107     # 'eval' the literal to strip the quotes.
    108     # TODO(vtl): This eval is dubious. We should unquote/unescape ourselves.
    109     p[0] = ast.Import(eval(p[2]), filename=self.filename, lineno=p.lineno(2))
    110 
    111   def p_module(self, p):
    112     """module : attribute_section MODULE identifier_wrapped SEMI"""
    113     p[0] = ast.Module(p[3], p[1], filename=self.filename, lineno=p.lineno(2))
    114 
    115   def p_definition(self, p):
    116     """definition : struct
    117                   | union
    118                   | interface
    119                   | enum
    120                   | const"""
    121     p[0] = p[1]
    122 
    123   def p_attribute_section_1(self, p):
    124     """attribute_section : """
    125     p[0] = None
    126 
    127   def p_attribute_section_2(self, p):
    128     """attribute_section : LBRACKET attribute_list RBRACKET"""
    129     p[0] = p[2]
    130 
    131   def p_attribute_list_1(self, p):
    132     """attribute_list : """
    133     p[0] = ast.AttributeList()
    134 
    135   def p_attribute_list_2(self, p):
    136     """attribute_list : nonempty_attribute_list"""
    137     p[0] = p[1]
    138 
    139   def p_nonempty_attribute_list_1(self, p):
    140     """nonempty_attribute_list : attribute"""
    141     p[0] = ast.AttributeList(p[1])
    142 
    143   def p_nonempty_attribute_list_2(self, p):
    144     """nonempty_attribute_list : nonempty_attribute_list COMMA attribute"""
    145     p[0] = p[1]
    146     p[0].Append(p[3])
    147 
    148   def p_attribute_1(self, p):
    149     """attribute : NAME EQUALS evaled_literal
    150                  | NAME EQUALS NAME"""
    151     p[0] = ast.Attribute(p[1], p[3], filename=self.filename, lineno=p.lineno(1))
    152 
    153   def p_attribute_2(self, p):
    154     """attribute : NAME"""
    155     p[0] = ast.Attribute(p[1], True, filename=self.filename, lineno=p.lineno(1))
    156 
    157   def p_evaled_literal(self, p):
    158     """evaled_literal : literal"""
    159     # 'eval' the literal to strip the quotes. Handle keywords "true" and "false"
    160     # specially since they cannot directly be evaluated to python boolean
    161     # values.
    162     if p[1] == "true":
    163       p[0] = True
    164     elif p[1] == "false":
    165       p[0] = False
    166     else:
    167       p[0] = eval(p[1])
    168 
    169   def p_struct_1(self, p):
    170     """struct : attribute_section STRUCT NAME LBRACE struct_body RBRACE SEMI"""
    171     p[0] = ast.Struct(p[3], p[1], p[5])
    172 
    173   def p_struct_2(self, p):
    174     """struct : attribute_section STRUCT NAME SEMI"""
    175     p[0] = ast.Struct(p[3], p[1], None)
    176 
    177   def p_struct_body_1(self, p):
    178     """struct_body : """
    179     p[0] = ast.StructBody()
    180 
    181   def p_struct_body_2(self, p):
    182     """struct_body : struct_body const
    183                    | struct_body enum
    184                    | struct_body struct_field"""
    185     p[0] = p[1]
    186     p[0].Append(p[2])
    187 
    188   def p_struct_field(self, p):
    189     """struct_field : attribute_section typename NAME ordinal default SEMI"""
    190     p[0] = ast.StructField(p[3], p[1], p[4], p[2], p[5])
    191 
    192   def p_union(self, p):
    193     """union : attribute_section UNION NAME LBRACE union_body RBRACE SEMI"""
    194     p[0] = ast.Union(p[3], p[1], p[5])
    195 
    196   def p_union_body_1(self, p):
    197     """union_body : """
    198     p[0] = ast.UnionBody()
    199 
    200   def p_union_body_2(self, p):
    201     """union_body : union_body union_field"""
    202     p[0] = p[1]
    203     p[1].Append(p[2])
    204 
    205   def p_union_field(self, p):
    206     """union_field : attribute_section typename NAME ordinal SEMI"""
    207     p[0] = ast.UnionField(p[3], p[1], p[4], p[2])
    208 
    209   def p_default_1(self, p):
    210     """default : """
    211     p[0] = None
    212 
    213   def p_default_2(self, p):
    214     """default : EQUALS constant"""
    215     p[0] = p[2]
    216 
    217   def p_interface(self, p):
    218     """interface : attribute_section INTERFACE NAME LBRACE interface_body \
    219                        RBRACE SEMI"""
    220     p[0] = ast.Interface(p[3], p[1], p[5])
    221 
    222   def p_interface_body_1(self, p):
    223     """interface_body : """
    224     p[0] = ast.InterfaceBody()
    225 
    226   def p_interface_body_2(self, p):
    227     """interface_body : interface_body const
    228                       | interface_body enum
    229                       | interface_body method"""
    230     p[0] = p[1]
    231     p[0].Append(p[2])
    232 
    233   def p_response_1(self, p):
    234     """response : """
    235     p[0] = None
    236 
    237   def p_response_2(self, p):
    238     """response : RESPONSE LPAREN parameter_list RPAREN"""
    239     p[0] = p[3]
    240 
    241   def p_method(self, p):
    242     """method : attribute_section NAME ordinal LPAREN parameter_list RPAREN \
    243                     response SEMI"""
    244     p[0] = ast.Method(p[2], p[1], p[3], p[5], p[7])
    245 
    246   def p_parameter_list_1(self, p):
    247     """parameter_list : """
    248     p[0] = ast.ParameterList()
    249 
    250   def p_parameter_list_2(self, p):
    251     """parameter_list : nonempty_parameter_list"""
    252     p[0] = p[1]
    253 
    254   def p_nonempty_parameter_list_1(self, p):
    255     """nonempty_parameter_list : parameter"""
    256     p[0] = ast.ParameterList(p[1])
    257 
    258   def p_nonempty_parameter_list_2(self, p):
    259     """nonempty_parameter_list : nonempty_parameter_list COMMA parameter"""
    260     p[0] = p[1]
    261     p[0].Append(p[3])
    262 
    263   def p_parameter(self, p):
    264     """parameter : attribute_section typename NAME ordinal"""
    265     p[0] = ast.Parameter(p[3], p[1], p[4], p[2],
    266                          filename=self.filename, lineno=p.lineno(3))
    267 
    268   def p_typename(self, p):
    269     """typename : nonnullable_typename QSTN
    270                 | nonnullable_typename"""
    271     if len(p) == 2:
    272       p[0] = p[1]
    273     else:
    274       p[0] = p[1] + "?"
    275 
    276   def p_nonnullable_typename(self, p):
    277     """nonnullable_typename : basictypename
    278                             | array
    279                             | fixed_array
    280                             | associative_array
    281                             | interfacerequest"""
    282     p[0] = p[1]
    283 
    284   def p_basictypename(self, p):
    285     """basictypename : identifier
    286                      | ASSOCIATED identifier
    287                      | handletype"""
    288     if len(p) == 2:
    289       p[0] = p[1]
    290     else:
    291       p[0] = "asso<" + p[2] + ">"
    292 
    293   def p_handletype(self, p):
    294     """handletype : HANDLE
    295                   | HANDLE LANGLE NAME RANGLE"""
    296     if len(p) == 2:
    297       p[0] = p[1]
    298     else:
    299       if p[3] not in ('data_pipe_consumer',
    300                       'data_pipe_producer',
    301                       'message_pipe',
    302                       'shared_buffer'):
    303         # Note: We don't enable tracking of line numbers for everything, so we
    304         # can't use |p.lineno(3)|.
    305         raise ParseError(self.filename, "Invalid handle type %r:" % p[3],
    306                          lineno=p.lineno(1),
    307                          snippet=self._GetSnippet(p.lineno(1)))
    308       p[0] = "handle<" + p[3] + ">"
    309 
    310   def p_array(self, p):
    311     """array : ARRAY LANGLE typename RANGLE"""
    312     p[0] = p[3] + "[]"
    313 
    314   def p_fixed_array(self, p):
    315     """fixed_array : ARRAY LANGLE typename COMMA INT_CONST_DEC RANGLE"""
    316     value = int(p[5])
    317     if value == 0 or value > _MAX_ARRAY_SIZE:
    318       raise ParseError(self.filename, "Fixed array size %d invalid:" % value,
    319                        lineno=p.lineno(5),
    320                        snippet=self._GetSnippet(p.lineno(5)))
    321     p[0] = p[3] + "[" + p[5] + "]"
    322 
    323   def p_associative_array(self, p):
    324     """associative_array : MAP LANGLE identifier COMMA typename RANGLE"""
    325     p[0] = p[5] + "{" + p[3] + "}"
    326 
    327   def p_interfacerequest(self, p):
    328     """interfacerequest : identifier AMP
    329                         | ASSOCIATED identifier AMP"""
    330     if len(p) == 3:
    331       p[0] = p[1] + "&"
    332     else:
    333       p[0] = "asso<" + p[2] + "&>"
    334 
    335   def p_ordinal_1(self, p):
    336     """ordinal : """
    337     p[0] = None
    338 
    339   def p_ordinal_2(self, p):
    340     """ordinal : ORDINAL"""
    341     value = int(p[1][1:])
    342     if value > _MAX_ORDINAL_VALUE:
    343       raise ParseError(self.filename, "Ordinal value %d too large:" % value,
    344                        lineno=p.lineno(1),
    345                        snippet=self._GetSnippet(p.lineno(1)))
    346     p[0] = ast.Ordinal(value, filename=self.filename, lineno=p.lineno(1))
    347 
    348   def p_enum_1(self, p):
    349     """enum : attribute_section ENUM NAME LBRACE enum_value_list \
    350                   RBRACE SEMI
    351             | attribute_section ENUM NAME LBRACE nonempty_enum_value_list \
    352                   COMMA RBRACE SEMI"""
    353     p[0] = ast.Enum(p[3], p[1], p[5], filename=self.filename,
    354                     lineno=p.lineno(2))
    355 
    356   def p_enum_2(self, p):
    357     """enum : attribute_section ENUM NAME SEMI"""
    358     p[0] = ast.Enum(p[3], p[1], None, filename=self.filename,
    359                     lineno=p.lineno(2))
    360 
    361   def p_enum_value_list_1(self, p):
    362     """enum_value_list : """
    363     p[0] = ast.EnumValueList()
    364 
    365   def p_enum_value_list_2(self, p):
    366     """enum_value_list : nonempty_enum_value_list"""
    367     p[0] = p[1]
    368 
    369   def p_nonempty_enum_value_list_1(self, p):
    370     """nonempty_enum_value_list : enum_value"""
    371     p[0] = ast.EnumValueList(p[1])
    372 
    373   def p_nonempty_enum_value_list_2(self, p):
    374     """nonempty_enum_value_list : nonempty_enum_value_list COMMA enum_value"""
    375     p[0] = p[1]
    376     p[0].Append(p[3])
    377 
    378   def p_enum_value(self, p):
    379     """enum_value : attribute_section NAME
    380                   | attribute_section NAME EQUALS int
    381                   | attribute_section NAME EQUALS identifier_wrapped"""
    382     p[0] = ast.EnumValue(p[2], p[1], p[4] if len(p) == 5 else None,
    383                          filename=self.filename, lineno=p.lineno(2))
    384 
    385   def p_const(self, p):
    386     """const : CONST typename NAME EQUALS constant SEMI"""
    387     p[0] = ast.Const(p[3], p[2], p[5])
    388 
    389   def p_constant(self, p):
    390     """constant : literal
    391                 | identifier_wrapped"""
    392     p[0] = p[1]
    393 
    394   def p_identifier_wrapped(self, p):
    395     """identifier_wrapped : identifier"""
    396     p[0] = ('IDENTIFIER', p[1])
    397 
    398   # TODO(vtl): Make this produce a "wrapped" identifier (probably as an
    399   # |ast.Identifier|, to be added) and get rid of identifier_wrapped.
    400   def p_identifier(self, p):
    401     """identifier : NAME
    402                   | NAME DOT identifier"""
    403     p[0] = ''.join(p[1:])
    404 
    405   def p_literal(self, p):
    406     """literal : int
    407                | float
    408                | TRUE
    409                | FALSE
    410                | DEFAULT
    411                | STRING_LITERAL"""
    412     p[0] = p[1]
    413 
    414   def p_int(self, p):
    415     """int : int_const
    416            | PLUS int_const
    417            | MINUS int_const"""
    418     p[0] = ''.join(p[1:])
    419 
    420   def p_int_const(self, p):
    421     """int_const : INT_CONST_DEC
    422                  | INT_CONST_HEX"""
    423     p[0] = p[1]
    424 
    425   def p_float(self, p):
    426     """float : FLOAT_CONST
    427              | PLUS FLOAT_CONST
    428              | MINUS FLOAT_CONST"""
    429     p[0] = ''.join(p[1:])
    430 
    431   def p_error(self, e):
    432     if e is None:
    433       # Unexpected EOF.
    434       # TODO(vtl): Can we figure out what's missing?
    435       raise ParseError(self.filename, "Unexpected end of file")
    436 
    437     raise ParseError(self.filename, "Unexpected %r:" % e.value, lineno=e.lineno,
    438                      snippet=self._GetSnippet(e.lineno))
    439 
    440   def _GetSnippet(self, lineno):
    441     return self.source.split('\n')[lineno - 1]
    442 
    443 
    444 def Parse(source, filename):
    445   """Parse source file to AST.
    446 
    447   Args:
    448     source: The source text as a str.
    449     filename: The filename that |source| originates from.
    450 
    451   Returns:
    452     The AST as a mojom.parse.ast.Mojom object.
    453   """
    454   lexer = Lexer(filename)
    455   parser = Parser(lexer, source, filename)
    456 
    457   lex.lex(object=lexer)
    458   yacc.yacc(module=parser, debug=0, write_tables=0)
    459 
    460   tree = yacc.parse(source)
    461   return tree
    462