1 # Copyright (C) 2013 Google Inc. All rights reserved. 2 # 3 # Redistribution and use in source and binary forms, with or without 4 # modification, are permitted provided that the following conditions are 5 # met: 6 # 7 # * Redistributions of source code must retain the above copyright 8 # notice, this list of conditions and the following disclaimer. 9 # * Redistributions in binary form must reproduce the above 10 # copyright notice, this list of conditions and the following disclaimer 11 # in the documentation and/or other materials provided with the 12 # distribution. 13 # * Neither the name of Google Inc. nor the names of its 14 # contributors may be used to endorse or promote products derived from 15 # this software without specific prior written permission. 16 # 17 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 18 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 19 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 20 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 21 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 22 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 23 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 29 """Parser for Blink IDL. 30 31 The parser uses the PLY (Python Lex-Yacc) library to build a set of parsing 32 rules which understand the Blink dialect of Web IDL. 33 It derives from a standard Web IDL parser, overriding rules where Blink IDL 34 differs syntactically or semantically from the base parser, or where the base 35 parser diverges from the Web IDL standard. 36 37 Web IDL: 38 http://www.w3.org/TR/WebIDL/ 39 Web IDL Grammar: 40 http://www.w3.org/TR/WebIDL/#idl-grammar 41 PLY: 42 http://www.dabeaz.com/ply/ 43 44 Design doc: 45 http://www.chromium.org/developers/design-documents/idl-compiler#TOC-Front-end 46 """ 47 48 # Disable check for line length and Member as Function due to how grammar rules 49 # are defined with PLY 50 # 51 # pylint: disable=R0201 52 # pylint: disable=C0301 53 # 54 # Disable attribute validation, as lint can't import parent class to check 55 # pylint: disable=E1101 56 57 import os.path 58 import sys 59 60 # PLY is in Chromium src/third_party/ply 61 module_path, module_name = os.path.split(__file__) 62 third_party = os.path.join(module_path, os.pardir, os.pardir, os.pardir, os.pardir) 63 # Insert at front to override system libraries, and after path[0] == script dir 64 sys.path.insert(1, third_party) 65 from ply import yacc 66 67 # Base parser is in Chromium src/tools/idl_parser 68 tools_dir = os.path.join(module_path, os.pardir, os.pardir, os.pardir, os.pardir, os.pardir, 'tools') 69 sys.path.append(tools_dir) 70 from idl_parser.idl_parser import IDLParser, ListFromConcat 71 from idl_parser.idl_parser import ParseFile as parse_file 72 73 from blink_idl_lexer import BlinkIDLLexer 74 75 76 # Explicitly set starting symbol to rule defined only in base parser. 77 # BEWARE that the starting symbol should NOT be defined in both the base parser 78 # and the derived one, as otherwise which is used depends on which line number 79 # is lower, which is fragile. Instead, either use one in base parser or 80 # create a new symbol, so that this is unambiguous. 81 # FIXME: unfortunately, this doesn't work in PLY 3.4, so need to duplicate the 82 # rule below. 83 STARTING_SYMBOL = 'Definitions' 84 85 # We ignore comments (and hence don't need 'Top') but base parser preserves them 86 # FIXME: Upstream: comments should be removed in base parser 87 REMOVED_RULES = ['Top', # [0] 88 'Comments', # [0.1] 89 'CommentsRest', # [0.2] 90 ] 91 92 # Remove rules from base class 93 # FIXME: add a class method upstream: @classmethod IDLParser._RemoveRules 94 for rule in REMOVED_RULES: 95 production_name = 'p_' + rule 96 delattr(IDLParser, production_name) 97 98 99 class BlinkIDLParser(IDLParser): 100 # [1] 101 # FIXME: Need to duplicate rule for starting symbol here, with line number 102 # *lower* than in the base parser (idl_parser.py). 103 # This is a bug in PLY: it determines starting symbol by lowest line number. 104 # This can be overridden by the 'start' parameter, but as of PLY 3.4 this 105 # doesn't work correctly. 106 def p_Definitions(self, p): 107 """Definitions : ExtendedAttributeList Definition Definitions 108 | """ 109 if len(p) > 1: 110 p[2].AddChildren(p[1]) 111 p[0] = ListFromConcat(p[2], p[3]) 112 113 # Below are grammar rules used by yacc, given by functions named p_<RULE>. 114 # * The docstring is the production rule in BNF (grammar). 115 # * The body is the yacc action (semantics). 116 # 117 # The PLY framework builds the actual low-level parser by introspecting this 118 # parser object, selecting all attributes named p_<RULE> as grammar rules. 119 # It extracts the docstrings and uses them as the production rules, building 120 # the table of a LALR parser, and uses the body of the functions as actions. 121 # 122 # Reference: 123 # http://www.dabeaz.com/ply/ply.html#ply_nn23 124 # 125 # Review of yacc: 126 # Yacc parses a token stream, internally producing a Concrete Syntax Tree 127 # (CST), where each node corresponds to a production rule in the grammar. 128 # At each node, it runs an action, which is usually "produce a node in the 129 # Abstract Syntax Tree (AST)" or "ignore this node" (for nodes in the CST 130 # that aren't included in the AST, since only needed for parsing). 131 # 132 # The rules use pseudo-variables; in PLY syntax: 133 # p[0] is the left side: assign return value to p[0] instead of returning, 134 # p[1] ... p[n] are the right side: the values can be accessed, and they 135 # can be modified. 136 # (In yacc these are $$ and $1 ... $n.) 137 # 138 # The rules can look cryptic at first, but there are a few standard 139 # transforms from the CST to AST. With these in mind, the actions should 140 # be reasonably legible. 141 # 142 # * Ignore production 143 # Discard this branch. Primarily used when one alternative is empty. 144 # 145 # Sample code: 146 # if len(p) > 1: 147 # p[0] = ... 148 # # Note no assignment if len(p) == 1 149 # 150 # * Eliminate singleton production 151 # Discard this node in the CST, pass the next level down up the tree. 152 # Used to ignore productions only necessary for parsing, but not needed 153 # in the AST. 154 # 155 # Sample code: 156 # p[0] = p[1] 157 # 158 # * Build node 159 # The key type of rule. In this parser, produces object of class IDLNode. 160 # There are several helper functions: 161 # * BuildProduction: actually builds an IDLNode, based on a production. 162 # * BuildAttribute: builds an IDLAttribute, which is a temporary 163 # object to hold a name-value pair, which is then 164 # set as a Property of the IDLNode when the IDLNode 165 # is built. 166 # * BuildNamed: Same as BuildProduction, and sets the 'NAME' property. 167 # * BuildTrue: BuildAttribute with value True, for flags. 168 # See base idl_parser.py for definitions and more examples of use. 169 # 170 # Sample code: 171 # # Build node of type NodeType, with value p[1], and children. 172 # p[0] = self.BuildProduction('NodeType', p, 1, children) 173 # 174 # # Build named node of type NodeType, with name and value p[1]. 175 # # (children optional) 176 # p[0] = self.BuildNamed('NodeType', p, 1) 177 # 178 # # Make a list 179 # # Used if one node has several children. 180 # children = ListFromConcat(p[2], p[3]) 181 # p[0] = self.BuildProduction('NodeType', p, 1, children) 182 # 183 # # Also used to collapse the right-associative tree 184 # # produced by parsing a list back into a single list. 185 # """Foos : Foo Foos 186 # |""" 187 # if len(p) > 1: 188 # p[0] = ListFromConcat(p[1], p[2]) 189 # 190 # # Add children. 191 # # Primarily used to add attributes, produced via BuildTrue. 192 # # p_StaticAttribute 193 # """StaticAttribute : STATIC Attribute""" 194 # p[2].AddChildren(self.BuildTrue('STATIC')) 195 # p[0] = p[2] 196 # 197 # Numbering scheme for the rules is: 198 # [1] for Web IDL spec (or additions in base parser) 199 # These should all be upstreamed to the base parser. 200 # [b1] for Blink IDL changes (overrides Web IDL) 201 # [b1.1] for Blink IDL additions, auxiliary rules for [b1] 202 # Numbers are as per Candidate Recommendation 19 April 2012: 203 # http://www.w3.org/TR/2012/CR-WebIDL-20120419/ 204 205 # [3] Override action, since we distinguish callbacks 206 # FIXME: Upstream 207 def p_CallbackOrInterface(self, p): 208 """CallbackOrInterface : CALLBACK CallbackRestOrInterface 209 | Interface""" 210 if len(p) > 2: 211 p[2].AddChildren(self.BuildTrue('CALLBACK')) 212 p[0] = p[2] 213 else: 214 p[0] = p[1] 215 216 # [b27] Add strings, more 'Literal' productions 217 # 'Literal's needed because integers and strings are both internally strings 218 def p_ConstValue(self, p): 219 """ConstValue : BooleanLiteral 220 | FloatLiteral 221 | IntegerLiteral 222 | StringLiteral 223 | null""" 224 # Standard is (no 'string', fewer 'Literal's): 225 # ConstValue : BooleanLiteral 226 # | FloatLiteral 227 # | integer 228 # | NULL 229 p[0] = p[1] 230 231 # [b27.1] 232 def p_IntegerLiteral(self, p): 233 """IntegerLiteral : integer""" 234 p[0] = ListFromConcat(self.BuildAttribute('TYPE', 'integer'), 235 self.BuildAttribute('NAME', p[1])) 236 237 # [b27.2] 238 def p_StringLiteral(self, p): 239 """StringLiteral : string""" 240 p[0] = ListFromConcat(self.BuildAttribute('TYPE', 'DOMString'), 241 self.BuildAttribute('NAME', p[1])) 242 243 # [b47] 244 def p_ExceptionMember(self, p): 245 """ExceptionMember : Const 246 | ExceptionField 247 | Attribute 248 | ExceptionOperation""" 249 # Standard is (no Attribute, no ExceptionOperation): 250 # ExceptionMember : Const 251 # | ExceptionField 252 # FIXME: In DOMException.idl, Attributes should be changed to 253 # ExceptionFields, and Attribute removed from this rule. 254 p[0] = p[1] 255 256 # [b47.1] 257 def p_ExceptionOperation(self, p): 258 """ExceptionOperation : Type identifier '(' ')' ';'""" 259 # Needed to handle one case in DOMException.idl: 260 # // Override in a Mozilla compatible format 261 # [NotEnumerable] DOMString toString(); 262 # Limited form of Operation to prevent others from being added. 263 # FIXME: Should be a stringifier instead. 264 p[0] = self.BuildNamed('ExceptionOperation', p, 2, p[1]) 265 266 # Extended attributes 267 # [b49] Override base parser: remove comment field, since comments stripped 268 # FIXME: Upstream 269 def p_ExtendedAttributeList(self, p): 270 """ExtendedAttributeList : '[' ExtendedAttribute ExtendedAttributes ']' 271 | '[' ']' 272 | """ 273 if len(p) > 3: 274 items = ListFromConcat(p[2], p[3]) 275 p[0] = self.BuildProduction('ExtAttributes', p, 1, items) 276 277 # [b50] Allow optional trailing comma 278 # Blink-only, marked as WONTFIX in Web IDL spec: 279 # https://www.w3.org/Bugs/Public/show_bug.cgi?id=22156 280 def p_ExtendedAttributes(self, p): 281 """ExtendedAttributes : ',' ExtendedAttribute ExtendedAttributes 282 | ',' 283 |""" 284 if len(p) > 3: 285 p[0] = ListFromConcat(p[2], p[3]) 286 287 # [b51] Add ExtendedAttributeIdentAndOrIdent 288 def p_ExtendedAttribute(self, p): 289 """ExtendedAttribute : ExtendedAttributeNoArgs 290 | ExtendedAttributeArgList 291 | ExtendedAttributeIdent 292 | ExtendedAttributeIdentList 293 | ExtendedAttributeStringLiteralList 294 | ExtendedAttributeNamedArgList""" 295 p[0] = p[1] 296 297 # [59] 298 # FIXME: Upstream UnionType 299 def p_UnionType(self, p): 300 """UnionType : '(' UnionMemberType OR UnionMemberType UnionMemberTypes ')'""" 301 members = ListFromConcat(p[2], p[4], p[5]) 302 p[0] = self.BuildProduction('UnionType', p, 1, members) 303 304 # [60] 305 def p_UnionMemberType(self, p): 306 """UnionMemberType : NonAnyType 307 | UnionType TypeSuffix 308 | ANY '[' ']' TypeSuffix""" 309 if len(p) == 2: 310 p[0] = self.BuildProduction('Type', p, 1, p[1]) 311 elif len(p) == 3: 312 p[0] = self.BuildProduction('Type', p, 1, ListFromConcat(p[1], p[2])) 313 else: 314 any_node = ListFromConcat(self.BuildProduction('Any', p, 1), p[4]) 315 p[0] = self.BuildProduction('Type', p, 1, any_node) 316 317 # [61] 318 def p_UnionMemberTypes(self, p): 319 """UnionMemberTypes : OR UnionMemberType UnionMemberTypes 320 |""" 321 if len(p) > 2: 322 p[0] = ListFromConcat(p[2], p[3]) 323 324 # [70] Override base parser to remove non-standard sized array 325 # FIXME: Upstream 326 def p_TypeSuffix(self, p): 327 """TypeSuffix : '[' ']' TypeSuffix 328 | '?' TypeSuffixStartingWithArray 329 |""" 330 if len(p) == 4: 331 p[0] = self.BuildProduction('Array', p, 1, p[3]) 332 elif len(p) == 3: 333 p[0] = ListFromConcat(self.BuildTrue('NULLABLE'), p[2]) 334 335 # [b76.1] Add support for compound Extended Attribute values (A&B and A|B) 336 def p_ExtendedAttributeIdentList(self, p): 337 """ExtendedAttributeIdentList : identifier '=' identifier '&' IdentAndList 338 | identifier '=' identifier '|' IdentOrList""" 339 value = self.BuildAttribute('VALUE', p[3] + p[4] + p[5]) 340 p[0] = self.BuildNamed('ExtAttribute', p, 1, value) 341 342 # [b76.2] A&B&C 343 def p_IdentAndList(self, p): 344 """IdentAndList : identifier '&' IdentAndList 345 | identifier""" 346 if len(p) > 3: 347 p[0] = p[1] + p[2] + p[3] 348 else: 349 p[0] = p[1] 350 351 # [b76.3] A|B|C 352 def p_IdentOrList(self, p): 353 """IdentOrList : identifier '|' IdentOrList 354 | identifier""" 355 if len(p) > 3: 356 p[0] = p[1] + p[2] + p[3] 357 else: 358 p[0] = p[1] 359 360 # Blink extension: Add support for compound Extended Attribute values over string literals ("A"|"B") 361 def p_ExtendedAttributeStringLiteralList(self, p): 362 """ExtendedAttributeStringLiteralList : identifier '=' StringLiteralOrList""" 363 value = self.BuildAttribute('VALUE', p[3]) 364 p[0] = self.BuildNamed('ExtAttribute', p, 1, value) 365 366 # Blink extension: one or more string literals. The values aren't propagated as literals, 367 # but their by their value only. 368 def p_StringLiteralOrList(self, p): 369 """StringLiteralOrList : StringLiteral '|' StringLiteralOrList 370 | StringLiteral""" 371 def unwrap_string(ls): 372 """Reach in and grab the string literal's "NAME".""" 373 return ls[1].value 374 375 if len(p) > 3: 376 p[0] = unwrap_string(p[1]) + p[2] + p[3] 377 else: 378 p[0] = unwrap_string(p[1]) 379 380 def __init__(self, 381 # common parameters 382 debug=False, 383 # idl_parser parameters 384 lexer=None, verbose=False, mute_error=False, 385 # yacc parameters 386 outputdir='', optimize=True, write_tables=False, 387 picklefile=None): 388 if debug: 389 # Turn off optimization and caching, and write out tables, 390 # to help debugging 391 optimize = False 392 outputdir = None 393 picklefile = None 394 write_tables = True 395 if outputdir: 396 picklefile = picklefile or os.path.join(outputdir, 'parsetab.pickle') 397 398 lexer = lexer or BlinkIDLLexer(debug=debug, 399 outputdir=outputdir, 400 optimize=optimize) 401 self.lexer = lexer 402 self.tokens = lexer.KnownTokens() 403 # Using SLR (instead of LALR) generates the table faster, 404 # but produces the same output. This is ok b/c Web IDL (and Blink IDL) 405 # is an SLR grammar (as is often the case for simple LL(1) grammars). 406 # 407 # Optimized mode substantially decreases startup time (by disabling 408 # error checking), and also allows use of Python's optimized mode. 409 # See: Using Python's Optimized Mode 410 # http://www.dabeaz.com/ply/ply.html#ply_nn38 411 # 412 # |picklefile| allows simpler importing than |tabmodule| (parsetab.py), 413 # as we don't need to modify sys.path; virtually identical speed. 414 # See: CHANGES, Version 3.2 415 # http://ply.googlecode.com/svn/trunk/CHANGES 416 self.yaccobj = yacc.yacc(module=self, 417 start=STARTING_SYMBOL, 418 method='SLR', 419 debug=debug, 420 optimize=optimize, 421 write_tables=write_tables, 422 picklefile=picklefile) 423 self.parse_debug = debug 424 self.verbose = verbose 425 self.mute_error = mute_error 426 self._parse_errors = 0 427 self._parse_warnings = 0 428 self._last_error_msg = None 429 self._last_error_lineno = 0 430 self._last_error_pos = 0 431 432 433 ################################################################################ 434 435 def main(argv): 436 # If file itself executed, cache parse table 437 try: 438 outputdir = argv[1] 439 except IndexError as err: 440 print 'Usage: %s OUTPUT_DIR' % argv[0] 441 return 1 442 parser = BlinkIDLParser(outputdir=outputdir) 443 444 445 if __name__ == '__main__': 446 sys.exit(main(sys.argv)) 447