1 grammar t033backtracking; 2 options { 3 language=Python; 4 backtrack=true; 5 memoize=true; 6 k=2; 7 } 8 9 scope Symbols { 10 types; 11 } 12 13 @header { 14 # compatibility stuff 15 try: 16 set = set 17 frozenset = frozenset 18 except NameError: 19 from sets import Set as set, ImmutableSet as frozenset 20 21 22 try: 23 reversed = reversed 24 except NameError: 25 def reversed(l): 26 l = l[:] 27 l.reverse() 28 return l 29 30 } 31 32 @members { 33 def isTypeName(self, name): 34 for scope in reversed(self.Symbols_stack): 35 if name in scope.types: 36 return True 37 38 return False 39 40 } 41 42 translation_unit 43 scope Symbols; // entire file is a scope 44 @init { 45 $Symbols::types = set() 46 } 47 : external_declaration+ 48 ; 49 50 /** Either a function definition or any other kind of C decl/def. 51 * The LL(*) analysis algorithm fails to deal with this due to 52 * recursion in the declarator rules. I'm putting in a 53 * manual predicate here so that we don't backtrack over 54 * the entire function. Further, you get a better error 55 * as errors within the function itself don't make it fail 56 * to predict that it's a function. Weird errors previously. 57 * Remember: the goal is to avoid backtrack like the plague 58 * because it makes debugging, actions, and errors harder. 59 * 60 * Note that k=1 results in a much smaller predictor for the 61 * fixed lookahead; k=2 made a few extra thousand lines. ;) 62 * I'll have to optimize that in the future. 63 */ 64 external_declaration 65 options {k=1;} 66 : ( declaration_specifiers? declarator declaration* '{' )=> function_definition 67 | declaration 68 ; 69 70 function_definition 71 scope Symbols; // put parameters and locals into same scope for now 72 @init { 73 $Symbols::types = set() 74 } 75 : declaration_specifiers? declarator 76 // ( declaration+ compound_statement // K&R style 77 // | compound_statement // ANSI style 78 // ) 79 ; 80 81 declaration 82 scope { 83 isTypedef; 84 } 85 @init { 86 $declaration::isTypedef = False 87 } 88 : 'typedef' declaration_specifiers? {$declaration::isTypedef = True} 89 init_declarator_list ';' // special case, looking for typedef 90 | declaration_specifiers init_declarator_list? ';' 91 ; 92 93 declaration_specifiers 94 : ( storage_class_specifier 95 | type_specifier 96 | type_qualifier 97 )+ 98 ; 99 100 init_declarator_list 101 : init_declarator (',' init_declarator)* 102 ; 103 104 init_declarator 105 : declarator //('=' initializer)? 106 ; 107 108 storage_class_specifier 109 : 'extern' 110 | 'static' 111 | 'auto' 112 | 'register' 113 ; 114 115 type_specifier 116 : 'void' 117 | 'char' 118 | 'short' 119 | 'int' 120 | 'long' 121 | 'float' 122 | 'double' 123 | 'signed' 124 | 'unsigned' 125 // | struct_or_union_specifier 126 // | enum_specifier 127 | type_id 128 ; 129 130 type_id 131 : {self.isTypeName(self.input.LT(1).getText())}? IDENTIFIER 132 // {System.out.println($IDENTIFIER.text+" is a type");} 133 ; 134 135 // struct_or_union_specifier 136 // options {k=3;} 137 // scope Symbols; // structs are scopes 138 // @init { 139 // $Symbols::types = set() 140 // } 141 // : struct_or_union IDENTIFIER? '{' struct_declaration_list '}' 142 // | struct_or_union IDENTIFIER 143 // ; 144 145 // struct_or_union 146 // : 'struct' 147 // | 'union' 148 // ; 149 150 // struct_declaration_list 151 // : struct_declaration+ 152 // ; 153 154 // struct_declaration 155 // : specifier_qualifier_list struct_declarator_list ';' 156 // ; 157 158 // specifier_qualifier_list 159 // : ( type_qualifier | type_specifier )+ 160 // ; 161 162 // struct_declarator_list 163 // : struct_declarator (',' struct_declarator)* 164 // ; 165 166 // struct_declarator 167 // : declarator (':' constant_expression)? 168 // | ':' constant_expression 169 // ; 170 171 // enum_specifier 172 // options {k=3;} 173 // : 'enum' '{' enumerator_list '}' 174 // | 'enum' IDENTIFIER '{' enumerator_list '}' 175 // | 'enum' IDENTIFIER 176 // ; 177 178 // enumerator_list 179 // : enumerator (',' enumerator)* 180 // ; 181 182 // enumerator 183 // : IDENTIFIER ('=' constant_expression)? 184 // ; 185 186 type_qualifier 187 : 'const' 188 | 'volatile' 189 ; 190 191 declarator 192 : pointer? direct_declarator 193 | pointer 194 ; 195 196 direct_declarator 197 : ( IDENTIFIER 198 { 199 if len($declaration)>0 and $declaration::isTypedef: 200 $Symbols::types.add($IDENTIFIER.text) 201 print "define type "+$IDENTIFIER.text 202 } 203 | '(' declarator ')' 204 ) 205 declarator_suffix* 206 ; 207 208 declarator_suffix 209 : /*'[' constant_expression ']' 210 |*/ '[' ']' 211 // | '(' parameter_type_list ')' 212 // | '(' identifier_list ')' 213 | '(' ')' 214 ; 215 216 pointer 217 : '*' type_qualifier+ pointer? 218 | '*' pointer 219 | '*' 220 ; 221 222 // parameter_type_list 223 // : parameter_list (',' '...')? 224 // ; 225 226 // parameter_list 227 // : parameter_declaration (',' parameter_declaration)* 228 // ; 229 230 // parameter_declaration 231 // : declaration_specifiers (declarator|abstract_declarator)* 232 // ; 233 234 // identifier_list 235 // : IDENTIFIER (',' IDENTIFIER)* 236 // ; 237 238 // type_name 239 // : specifier_qualifier_list abstract_declarator? 240 // ; 241 242 // abstract_declarator 243 // : pointer direct_abstract_declarator? 244 // | direct_abstract_declarator 245 // ; 246 247 // direct_abstract_declarator 248 // : ( '(' abstract_declarator ')' | abstract_declarator_suffix ) abstract_declarator_suffix* 249 // ; 250 251 // abstract_declarator_suffix 252 // : '[' ']' 253 // | '[' constant_expression ']' 254 // | '(' ')' 255 // | '(' parameter_type_list ')' 256 // ; 257 258 // initializer 259 // : assignment_expression 260 // | '{' initializer_list ','? '}' 261 // ; 262 263 // initializer_list 264 // : initializer (',' initializer)* 265 // ; 266 267 // // E x p r e s s i o n s 268 269 // argument_expression_list 270 // : assignment_expression (',' assignment_expression)* 271 // ; 272 273 // additive_expression 274 // : (multiplicative_expression) ('+' multiplicative_expression | '-' multiplicative_expression)* 275 // ; 276 277 // multiplicative_expression 278 // : (cast_expression) ('*' cast_expression | '/' cast_expression | '%' cast_expression)* 279 // ; 280 281 // cast_expression 282 // : '(' type_name ')' cast_expression 283 // | unary_expression 284 // ; 285 286 // unary_expression 287 // : postfix_expression 288 // | '++' unary_expression 289 // | '--' unary_expression 290 // | unary_operator cast_expression 291 // | 'sizeof' unary_expression 292 // | 'sizeof' '(' type_name ')' 293 // ; 294 295 // postfix_expression 296 // : primary_expression 297 // ( '[' expression ']' 298 // | '(' ')' 299 // | '(' argument_expression_list ')' 300 // | '.' IDENTIFIER 301 // | '*' IDENTIFIER 302 // | '->' IDENTIFIER 303 // | '++' 304 // | '--' 305 // )* 306 // ; 307 308 // unary_operator 309 // : '&' 310 // | '*' 311 // | '+' 312 // | '-' 313 // | '~' 314 // | '!' 315 // ; 316 317 // primary_expression 318 // : IDENTIFIER 319 // | constant 320 // | '(' expression ')' 321 // ; 322 323 // constant 324 // : HEX_LITERAL 325 // | OCTAL_LITERAL 326 // | DECIMAL_LITERAL 327 // | CHARACTER_LITERAL 328 // | STRING_LITERAL 329 // | FLOATING_POINT_LITERAL 330 // ; 331 332 // ///// 333 334 // expression 335 // : assignment_expression (',' assignment_expression)* 336 // ; 337 338 // constant_expression 339 // : conditional_expression 340 // ; 341 342 // assignment_expression 343 // : lvalue assignment_operator assignment_expression 344 // | conditional_expression 345 // ; 346 347 // lvalue 348 // : unary_expression 349 // ; 350 351 // assignment_operator 352 // : '=' 353 // | '*=' 354 // | '/=' 355 // | '%=' 356 // | '+=' 357 // | '-=' 358 // | '<<=' 359 // | '>>=' 360 // | '&=' 361 // | '^=' 362 // | '|=' 363 // ; 364 365 // conditional_expression 366 // : logical_or_expression ('?' expression ':' conditional_expression)? 367 // ; 368 369 // logical_or_expression 370 // : logical_and_expression ('||' logical_and_expression)* 371 // ; 372 373 // logical_and_expression 374 // : inclusive_or_expression ('&&' inclusive_or_expression)* 375 // ; 376 377 // inclusive_or_expression 378 // : exclusive_or_expression ('|' exclusive_or_expression)* 379 // ; 380 381 // exclusive_or_expression 382 // : and_expression ('^' and_expression)* 383 // ; 384 385 // and_expression 386 // : equality_expression ('&' equality_expression)* 387 // ; 388 // equality_expression 389 // : relational_expression (('=='|'!=') relational_expression)* 390 // ; 391 392 // relational_expression 393 // : shift_expression (('<'|'>'|'<='|'>=') shift_expression)* 394 // ; 395 396 // shift_expression 397 // : additive_expression (('<<'|'>>') additive_expression)* 398 // ; 399 400 // // S t a t e m e n t s 401 402 // statement 403 // : labeled_statement 404 // | compound_statement 405 // | expression_statement 406 // | selection_statement 407 // | iteration_statement 408 // | jump_statement 409 // ; 410 411 // labeled_statement 412 // : IDENTIFIER ':' statement 413 // | 'case' constant_expression ':' statement 414 // | 'default' ':' statement 415 // ; 416 417 // compound_statement 418 // scope Symbols; // blocks have a scope of symbols 419 // @init { 420 // $Symbols::types = {} 421 // } 422 // : '{' declaration* statement_list? '}' 423 // ; 424 425 // statement_list 426 // : statement+ 427 // ; 428 429 // expression_statement 430 // : ';' 431 // | expression ';' 432 // ; 433 434 // selection_statement 435 // : 'if' '(' expression ')' statement (options {k=1; backtrack=false;}:'else' statement)? 436 // | 'switch' '(' expression ')' statement 437 // ; 438 439 // iteration_statement 440 // : 'while' '(' expression ')' statement 441 // | 'do' statement 'while' '(' expression ')' ';' 442 // | 'for' '(' expression_statement expression_statement expression? ')' statement 443 // ; 444 445 // jump_statement 446 // : 'goto' IDENTIFIER ';' 447 // | 'continue' ';' 448 // | 'break' ';' 449 // | 'return' ';' 450 // | 'return' expression ';' 451 // ; 452 453 IDENTIFIER 454 : LETTER (LETTER|'0'..'9')* 455 ; 456 457 fragment 458 LETTER 459 : '$' 460 | 'A'..'Z' 461 | 'a'..'z' 462 | '_' 463 ; 464 465 CHARACTER_LITERAL 466 : '\'' ( EscapeSequence | ~('\''|'\\') ) '\'' 467 ; 468 469 STRING_LITERAL 470 : '"' ( EscapeSequence | ~('\\'|'"') )* '"' 471 ; 472 473 HEX_LITERAL : '0' ('x'|'X') HexDigit+ IntegerTypeSuffix? ; 474 475 DECIMAL_LITERAL : ('0' | '1'..'9' '0'..'9'*) IntegerTypeSuffix? ; 476 477 OCTAL_LITERAL : '0' ('0'..'7')+ IntegerTypeSuffix? ; 478 479 fragment 480 HexDigit : ('0'..'9'|'a'..'f'|'A'..'F') ; 481 482 fragment 483 IntegerTypeSuffix 484 : ('u'|'U')? ('l'|'L') 485 | ('u'|'U') ('l'|'L')? 486 ; 487 488 FLOATING_POINT_LITERAL 489 : ('0'..'9')+ '.' ('0'..'9')* Exponent? FloatTypeSuffix? 490 | '.' ('0'..'9')+ Exponent? FloatTypeSuffix? 491 | ('0'..'9')+ Exponent FloatTypeSuffix? 492 | ('0'..'9')+ Exponent? FloatTypeSuffix 493 ; 494 495 fragment 496 Exponent : ('e'|'E') ('+'|'-')? ('0'..'9')+ ; 497 498 fragment 499 FloatTypeSuffix : ('f'|'F'|'d'|'D') ; 500 501 fragment 502 EscapeSequence 503 : '\\' ('b'|'t'|'n'|'f'|'r'|'\"'|'\''|'\\') 504 | OctalEscape 505 ; 506 507 fragment 508 OctalEscape 509 : '\\' ('0'..'3') ('0'..'7') ('0'..'7') 510 | '\\' ('0'..'7') ('0'..'7') 511 | '\\' ('0'..'7') 512 ; 513 514 fragment 515 UnicodeEscape 516 : '\\' 'u' HexDigit HexDigit HexDigit HexDigit 517 ; 518 519 WS : (' '|'\r'|'\t'|'\u000C'|'\n') {$channel=HIDDEN;} 520 ; 521 522 COMMENT 523 : '/*' ( options {greedy=false;} : . )* '*/' {$channel=HIDDEN;} 524 ; 525 526 LINE_COMMENT 527 : '//' ~('\n'|'\r')* '\r'? '\n' {$channel=HIDDEN;} 528 ; 529 530 // ignore #line info for now 531 LINE_COMMAND 532 : '#' ~('\n'|'\r')* '\r'? '\n' {$channel=HIDDEN;} 533 ; 534 535