Home | History | Annotate | Download | only in scripts
      1 # Copyright (C) 2013 Google Inc. All rights reserved.
      2 #
      3 # Redistribution and use in source and binary forms, with or without
      4 # modification, are permitted provided that the following conditions are
      5 # met:
      6 #
      7 #     * Redistributions of source code must retain the above copyright
      8 # notice, this list of conditions and the following disclaimer.
      9 #     * Redistributions in binary form must reproduce the above
     10 # copyright notice, this list of conditions and the following disclaimer
     11 # in the documentation and/or other materials provided with the
     12 # distribution.
     13 #     * Neither the name of Google Inc. nor the names of its
     14 # contributors may be used to endorse or promote products derived from
     15 # this software without specific prior written permission.
     16 #
     17 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
     18 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
     19 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
     20 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
     21 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
     22 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
     23 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     24 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     25 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     26 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     27 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     28 
     29 """Parser for Blink IDL.
     30 
     31 The parser uses the PLY (Python Lex-Yacc) library to build a set of parsing
     32 rules which understand the Blink dialect of Web IDL.
     33 It derives from a standard Web IDL parser, overriding rules where Blink IDL
     34 differs syntactically or semantically from the base parser, or where the base
     35 parser diverges from the Web IDL standard.
     36 
     37 Web IDL:
     38     http://www.w3.org/TR/WebIDL/
     39 Web IDL Grammar:
     40     http://www.w3.org/TR/WebIDL/#idl-grammar
     41 PLY:
     42     http://www.dabeaz.com/ply/
     43 
     44 Design doc:
     45 http://www.chromium.org/developers/design-documents/idl-compiler#TOC-Front-end
     46 """
     47 
     48 # Disable check for line length and Member as Function due to how grammar rules
     49 # are defined with PLY
     50 #
     51 # pylint: disable=R0201
     52 # pylint: disable=C0301
     53 #
     54 # Disable attribute validation, as lint can't import parent class to check
     55 # pylint: disable=E1101
     56 
     57 import os.path
     58 import sys
     59 
     60 # PLY is in Chromium src/third_party/ply
     61 module_path, module_name = os.path.split(__file__)
     62 third_party = os.path.join(module_path, os.pardir, os.pardir, os.pardir, os.pardir)
     63 # Insert at front to override system libraries, and after path[0] == script dir
     64 sys.path.insert(1, third_party)
     65 from ply import yacc
     66 
     67 # Base parser is in Chromium src/tools/idl_parser
     68 tools_dir = os.path.join(module_path, os.pardir, os.pardir, os.pardir, os.pardir, os.pardir, 'tools')
     69 sys.path.append(tools_dir)
     70 from idl_parser.idl_parser import IDLParser, ListFromConcat
     71 from idl_parser.idl_parser import ParseFile as parse_file
     72 
     73 from blink_idl_lexer import BlinkIDLLexer
     74 
     75 
     76 # Explicitly set starting symbol to rule defined only in base parser.
     77 # BEWARE that the starting symbol should NOT be defined in both the base parser
     78 # and the derived one, as otherwise which is used depends on which line number
     79 # is lower, which is fragile. Instead, either use one in base parser or
     80 # create a new symbol, so that this is unambiguous.
     81 # FIXME: unfortunately, this doesn't work in PLY 3.4, so need to duplicate the
     82 # rule below.
     83 STARTING_SYMBOL = 'Definitions'
     84 
     85 # We ignore comments (and hence don't need 'Top') but base parser preserves them
     86 # FIXME: Upstream: comments should be removed in base parser
     87 REMOVED_RULES = ['Top',  # [0]
     88                  'Comments',  # [0.1]
     89                  'CommentsRest',  # [0.2]
     90                 ]
     91 
     92 # Remove rules from base class
     93 # FIXME: add a class method upstream: @classmethod IDLParser._RemoveRules
     94 for rule in REMOVED_RULES:
     95     production_name = 'p_' + rule
     96     delattr(IDLParser, production_name)
     97 
     98 
     99 class BlinkIDLParser(IDLParser):
    100     # [1]
    101     # FIXME: Need to duplicate rule for starting symbol here, with line number
    102     # *lower* than in the base parser (idl_parser.py).
    103     # This is a bug in PLY: it determines starting symbol by lowest line number.
    104     # This can be overridden by the 'start' parameter, but as of PLY 3.4 this
    105     # doesn't work correctly.
    106     def p_Definitions(self, p):
    107         """Definitions : ExtendedAttributeList Definition Definitions
    108                        | """
    109         if len(p) > 1:
    110             p[2].AddChildren(p[1])
    111             p[0] = ListFromConcat(p[2], p[3])
    112 
    113     # Below are grammar rules used by yacc, given by functions named p_<RULE>.
    114     # * The docstring is the production rule in BNF (grammar).
    115     # * The body is the yacc action (semantics).
    116     #
    117     # The PLY framework builds the actual low-level parser by introspecting this
    118     # parser object, selecting all attributes named p_<RULE> as grammar rules.
    119     # It extracts the docstrings and uses them as the production rules, building
    120     # the table of a LALR parser, and uses the body of the functions as actions.
    121     #
    122     # Reference:
    123     # http://www.dabeaz.com/ply/ply.html#ply_nn23
    124     #
    125     # Review of yacc:
    126     # Yacc parses a token stream, internally producing a Concrete Syntax Tree
    127     # (CST), where each node corresponds to a production rule in the grammar.
    128     # At each node, it runs an action, which is usually "produce a node in the
    129     # Abstract Syntax Tree (AST)" or "ignore this node" (for nodes in the CST
    130     # that aren't included in the AST, since only needed for parsing).
    131     #
    132     # The rules use pseudo-variables; in PLY syntax:
    133     # p[0] is the left side: assign return value to p[0] instead of returning,
    134     # p[1] ... p[n] are the right side: the values can be accessed, and they
    135     # can be modified.
    136     # (In yacc these are $$ and $1 ... $n.)
    137     #
    138     # The rules can look cryptic at first, but there are a few standard
    139     # transforms from the CST to AST. With these in mind, the actions should
    140     # be reasonably legible.
    141     #
    142     # * Ignore production
    143     #   Discard this branch. Primarily used when one alternative is empty.
    144     #
    145     #   Sample code:
    146     #   if len(p) > 1:
    147     #       p[0] = ...
    148     #   # Note no assignment if len(p) == 1
    149     #
    150     # * Eliminate singleton production
    151     #   Discard this node in the CST, pass the next level down up the tree.
    152     #   Used to ignore productions only necessary for parsing, but not needed
    153     #   in the AST.
    154     #
    155     #   Sample code:
    156     #   p[0] = p[1]
    157     #
    158     # * Build node
    159     #   The key type of rule. In this parser, produces object of class IDLNode.
    160     #   There are several helper functions:
    161     #   * BuildProduction: actually builds an IDLNode, based on a production.
    162     #   * BuildAttribute: builds an IDLAttribute, which is a temporary
    163     #                     object to hold a name-value pair, which is then
    164     #                     set as a Property of the IDLNode when the IDLNode
    165     #                     is built.
    166     #   * BuildNamed: Same as BuildProduction, and sets the 'NAME' property.
    167     #   * BuildTrue: BuildAttribute with value True, for flags.
    168     #   See base idl_parser.py for definitions and more examples of use.
    169     #
    170     #   Sample code:
    171     #   # Build node of type NodeType, with value p[1], and children.
    172     #   p[0] = self.BuildProduction('NodeType', p, 1, children)
    173     #
    174     #   # Build named node of type NodeType, with name and value p[1].
    175     #   # (children optional)
    176     #   p[0] = self.BuildNamed('NodeType', p, 1)
    177     #
    178     #   # Make a list
    179     #   # Used if one node has several children.
    180     #   children = ListFromConcat(p[2], p[3])
    181     #   p[0] = self.BuildProduction('NodeType', p, 1, children)
    182     #
    183     #   # Also used to collapse the right-associative tree
    184     #   # produced by parsing a list back into a single list.
    185     #   """Foos : Foo Foos
    186     #           |"""
    187     #   if len(p) > 1:
    188     #       p[0] = ListFromConcat(p[1], p[2])
    189     #
    190     #   # Add children.
    191     #   # Primarily used to add attributes, produced via BuildTrue.
    192     #   # p_StaticAttribute
    193     #   """StaticAttribute : STATIC Attribute"""
    194     #   p[2].AddChildren(self.BuildTrue('STATIC'))
    195     #   p[0] = p[2]
    196     #
    197     # Numbering scheme for the rules is:
    198     # [1] for Web IDL spec (or additions in base parser)
    199     #     These should all be upstreamed to the base parser.
    200     # [b1] for Blink IDL changes (overrides Web IDL)
    201     # [b1.1] for Blink IDL additions, auxiliary rules for [b1]
    202     # Numbers are as per Candidate Recommendation 19 April 2012:
    203     # http://www.w3.org/TR/2012/CR-WebIDL-20120419/
    204 
    205     # [3] Override action, since we distinguish callbacks
    206     # FIXME: Upstream
    207     def p_CallbackOrInterface(self, p):
    208         """CallbackOrInterface : CALLBACK CallbackRestOrInterface
    209                                | Interface"""
    210         if len(p) > 2:
    211             p[2].AddChildren(self.BuildTrue('CALLBACK'))
    212             p[0] = p[2]
    213         else:
    214             p[0] = p[1]
    215 
    216     # [b27] Add strings, more 'Literal' productions
    217     # 'Literal's needed because integers and strings are both internally strings
    218     def p_ConstValue(self, p):
    219         """ConstValue : BooleanLiteral
    220                       | FloatLiteral
    221                       | IntegerLiteral
    222                       | StringLiteral
    223                       | null"""
    224         # Standard is (no 'string', fewer 'Literal's):
    225         # ConstValue : BooleanLiteral
    226         #            | FloatLiteral
    227         #            | integer
    228         #            | NULL
    229         p[0] = p[1]
    230 
    231     # [b27.1]
    232     def p_IntegerLiteral(self, p):
    233         """IntegerLiteral : integer"""
    234         p[0] = ListFromConcat(self.BuildAttribute('TYPE', 'integer'),
    235                               self.BuildAttribute('NAME', p[1]))
    236 
    237     # [b27.2]
    238     def p_StringLiteral(self, p):
    239         """StringLiteral : string"""
    240         p[0] = ListFromConcat(self.BuildAttribute('TYPE', 'DOMString'),
    241                               self.BuildAttribute('NAME', p[1]))
    242 
    243     # [b47]
    244     def p_ExceptionMember(self, p):
    245         """ExceptionMember : Const
    246                            | ExceptionField
    247                            | Attribute
    248                            | ExceptionOperation"""
    249         # Standard is (no Attribute, no ExceptionOperation):
    250         # ExceptionMember : Const
    251         #                 | ExceptionField
    252         # FIXME: In DOMException.idl, Attributes should be changed to
    253         # ExceptionFields, and Attribute removed from this rule.
    254         p[0] = p[1]
    255 
    256     # [b47.1]
    257     def p_ExceptionOperation(self, p):
    258         """ExceptionOperation : Type identifier '(' ')' ';'"""
    259         # Needed to handle one case in DOMException.idl:
    260         # // Override in a Mozilla compatible format
    261         # [NotEnumerable] DOMString toString();
    262         # Limited form of Operation to prevent others from being added.
    263         # FIXME: Should be a stringifier instead.
    264         p[0] = self.BuildNamed('ExceptionOperation', p, 2, p[1])
    265 
    266     # Extended attributes
    267     # [b49] Override base parser: remove comment field, since comments stripped
    268     # FIXME: Upstream
    269     def p_ExtendedAttributeList(self, p):
    270         """ExtendedAttributeList : '[' ExtendedAttribute ExtendedAttributes ']'
    271                                  | '[' ']'
    272                                  | """
    273         if len(p) > 3:
    274             items = ListFromConcat(p[2], p[3])
    275             p[0] = self.BuildProduction('ExtAttributes', p, 1, items)
    276 
    277     # [b50] Allow optional trailing comma
    278     # Blink-only, marked as WONTFIX in Web IDL spec:
    279     # https://www.w3.org/Bugs/Public/show_bug.cgi?id=22156
    280     def p_ExtendedAttributes(self, p):
    281         """ExtendedAttributes : ',' ExtendedAttribute ExtendedAttributes
    282                               | ','
    283                               |"""
    284         if len(p) > 3:
    285             p[0] = ListFromConcat(p[2], p[3])
    286 
    287     # [b51] Add ExtendedAttributeIdentAndOrIdent
    288     def p_ExtendedAttribute(self, p):
    289         """ExtendedAttribute : ExtendedAttributeNoArgs
    290                              | ExtendedAttributeArgList
    291                              | ExtendedAttributeIdent
    292                              | ExtendedAttributeIdentList
    293                              | ExtendedAttributeStringLiteralList
    294                              | ExtendedAttributeNamedArgList"""
    295         p[0] = p[1]
    296 
    297     # [59]
    298     # FIXME: Upstream UnionType
    299     def p_UnionType(self, p):
    300         """UnionType : '(' UnionMemberType OR UnionMemberType UnionMemberTypes ')'"""
    301         members = ListFromConcat(p[2], p[4], p[5])
    302         p[0] = self.BuildProduction('UnionType', p, 1, members)
    303 
    304     # [60]
    305     def p_UnionMemberType(self, p):
    306         """UnionMemberType : NonAnyType
    307                            | UnionType TypeSuffix
    308                            | ANY '[' ']' TypeSuffix"""
    309         if len(p) == 2:
    310             p[0] = self.BuildProduction('Type', p, 1, p[1])
    311         elif len(p) == 3:
    312             p[0] = self.BuildProduction('Type', p, 1, ListFromConcat(p[1], p[2]))
    313         else:
    314             any_node = ListFromConcat(self.BuildProduction('Any', p, 1), p[4])
    315             p[0] = self.BuildProduction('Type', p, 1, any_node)
    316 
    317     # [61]
    318     def p_UnionMemberTypes(self, p):
    319         """UnionMemberTypes : OR UnionMemberType UnionMemberTypes
    320                             |"""
    321         if len(p) > 2:
    322             p[0] = ListFromConcat(p[2], p[3])
    323 
    324     # [70] Override base parser to remove non-standard sized array
    325     # FIXME: Upstream
    326     def p_TypeSuffix(self, p):
    327         """TypeSuffix : '[' ']' TypeSuffix
    328                       | '?' TypeSuffixStartingWithArray
    329                       |"""
    330         if len(p) == 4:
    331             p[0] = self.BuildProduction('Array', p, 1, p[3])
    332         elif len(p) == 3:
    333             p[0] = ListFromConcat(self.BuildTrue('NULLABLE'), p[2])
    334 
    335     # [b76.1] Add support for compound Extended Attribute values (A&B and A|B)
    336     def p_ExtendedAttributeIdentList(self, p):
    337         """ExtendedAttributeIdentList : identifier '=' identifier '&' IdentAndList
    338                                       | identifier '=' identifier '|' IdentOrList"""
    339         value = self.BuildAttribute('VALUE', p[3] + p[4] + p[5])
    340         p[0] = self.BuildNamed('ExtAttribute', p, 1, value)
    341 
    342     # [b76.2] A&B&C
    343     def p_IdentAndList(self, p):
    344         """IdentAndList : identifier '&' IdentAndList
    345                         | identifier"""
    346         if len(p) > 3:
    347             p[0] = p[1] + p[2] + p[3]
    348         else:
    349             p[0] = p[1]
    350 
    351     # [b76.3] A|B|C
    352     def p_IdentOrList(self, p):
    353         """IdentOrList : identifier '|' IdentOrList
    354                        | identifier"""
    355         if len(p) > 3:
    356             p[0] = p[1] + p[2] + p[3]
    357         else:
    358             p[0] = p[1]
    359 
    360     # Blink extension: Add support for compound Extended Attribute values over string literals ("A"|"B")
    361     def p_ExtendedAttributeStringLiteralList(self, p):
    362         """ExtendedAttributeStringLiteralList : identifier '=' StringLiteralOrList"""
    363         value = self.BuildAttribute('VALUE', p[3])
    364         p[0] = self.BuildNamed('ExtAttribute', p, 1, value)
    365 
    366     # Blink extension: one or more string literals. The values aren't propagated as literals,
    367     # but their by their value only.
    368     def p_StringLiteralOrList(self, p):
    369         """StringLiteralOrList : StringLiteral '|' StringLiteralOrList
    370                                | StringLiteral"""
    371         def unwrap_string(ls):
    372             """Reach in and grab the string literal's "NAME"."""
    373             return ls[1].value
    374 
    375         if len(p) > 3:
    376             p[0] = unwrap_string(p[1]) + p[2] + p[3]
    377         else:
    378             p[0] = unwrap_string(p[1])
    379 
    380     def __init__(self,
    381                  # common parameters
    382                  debug=False,
    383                  # idl_parser parameters
    384                  lexer=None, verbose=False, mute_error=False,
    385                  # yacc parameters
    386                  outputdir='', optimize=True, write_tables=False,
    387                  picklefile=None):
    388         if debug:
    389             # Turn off optimization and caching, and write out tables,
    390             # to help debugging
    391             optimize = False
    392             outputdir = None
    393             picklefile = None
    394             write_tables = True
    395         if outputdir:
    396             picklefile = picklefile or os.path.join(outputdir, 'parsetab.pickle')
    397 
    398         lexer = lexer or BlinkIDLLexer(debug=debug,
    399                                        outputdir=outputdir,
    400                                        optimize=optimize)
    401         self.lexer = lexer
    402         self.tokens = lexer.KnownTokens()
    403         # Using SLR (instead of LALR) generates the table faster,
    404         # but produces the same output. This is ok b/c Web IDL (and Blink IDL)
    405         # is an SLR grammar (as is often the case for simple LL(1) grammars).
    406         #
    407         # Optimized mode substantially decreases startup time (by disabling
    408         # error checking), and also allows use of Python's optimized mode.
    409         # See: Using Python's Optimized Mode
    410         # http://www.dabeaz.com/ply/ply.html#ply_nn38
    411         #
    412         # |picklefile| allows simpler importing than |tabmodule| (parsetab.py),
    413         # as we don't need to modify sys.path; virtually identical speed.
    414         # See: CHANGES, Version 3.2
    415         # http://ply.googlecode.com/svn/trunk/CHANGES
    416         self.yaccobj = yacc.yacc(module=self,
    417                                  start=STARTING_SYMBOL,
    418                                  method='SLR',
    419                                  debug=debug,
    420                                  optimize=optimize,
    421                                  write_tables=write_tables,
    422                                  picklefile=picklefile)
    423         self.parse_debug = debug
    424         self.verbose = verbose
    425         self.mute_error = mute_error
    426         self._parse_errors = 0
    427         self._parse_warnings = 0
    428         self._last_error_msg = None
    429         self._last_error_lineno = 0
    430         self._last_error_pos = 0
    431 
    432 
    433 ################################################################################
    434 
    435 def main(argv):
    436     # If file itself executed, cache parse table
    437     try:
    438         outputdir = argv[1]
    439     except IndexError as err:
    440         print 'Usage: %s OUTPUT_DIR' % argv[0]
    441         return 1
    442     parser = BlinkIDLParser(outputdir=outputdir)
    443 
    444 
    445 if __name__ == '__main__':
    446     sys.exit(main(sys.argv))
    447