1 grammar t033backtracking; 2 options { 3 language=Python3; 4 backtrack=true; 5 memoize=true; 6 k=2; 7 } 8 9 scope Symbols { 10 types; 11 } 12 13 @members { 14 def isTypeName(self, name): 15 for scope in reversed(self.Symbols_stack): 16 if name in scope.types: 17 return True 18 19 return False 20 21 } 22 23 translation_unit 24 scope Symbols; // entire file is a scope 25 @init { 26 $Symbols::types = set() 27 } 28 : external_declaration+ 29 ; 30 31 /** Either a function definition or any other kind of C decl/def. 32 * The LL(*) analysis algorithm fails to deal with this due to 33 * recursion in the declarator rules. I'm putting in a 34 * manual predicate here so that we don't backtrack over 35 * the entire function. Further, you get a better error 36 * as errors within the function itself don't make it fail 37 * to predict that it's a function. Weird errors previously. 38 * Remember: the goal is to avoid backtrack like the plague 39 * because it makes debugging, actions, and errors harder. 40 * 41 * Note that k=1 results in a much smaller predictor for the 42 * fixed lookahead; k=2 made a few extra thousand lines. ;) 43 * I'll have to optimize that in the future. 44 */ 45 external_declaration 46 options {k=1;} 47 : ( declaration_specifiers? declarator declaration* '{' )=> function_definition 48 | declaration 49 ; 50 51 function_definition 52 scope Symbols; // put parameters and locals into same scope for now 53 @init { 54 $Symbols::types = set() 55 } 56 : declaration_specifiers? declarator 57 // ( declaration+ compound_statement // K&R style 58 // | compound_statement // ANSI style 59 // ) 60 ; 61 62 declaration 63 scope { 64 isTypedef; 65 } 66 @init { 67 $declaration::isTypedef = False 68 } 69 : 'typedef' declaration_specifiers? {$declaration::isTypedef = True} 70 init_declarator_list ';' // special case, looking for typedef 71 | declaration_specifiers init_declarator_list? ';' 72 ; 73 74 declaration_specifiers 75 : ( storage_class_specifier 76 | type_specifier 77 | type_qualifier 78 )+ 79 ; 80 81 init_declarator_list 82 : init_declarator (',' init_declarator)* 83 ; 84 85 init_declarator 86 : declarator //('=' initializer)? 87 ; 88 89 storage_class_specifier 90 : 'extern' 91 | 'static' 92 | 'auto' 93 | 'register' 94 ; 95 96 type_specifier 97 : 'void' 98 | 'char' 99 | 'short' 100 | 'int' 101 | 'long' 102 | 'float' 103 | 'double' 104 | 'signed' 105 | 'unsigned' 106 // | struct_or_union_specifier 107 // | enum_specifier 108 | type_id 109 ; 110 111 type_id 112 : {self.isTypeName(self.input.LT(1).getText())}? IDENTIFIER 113 // {System.out.println($IDENTIFIER.text+" is a type");} 114 ; 115 116 // struct_or_union_specifier 117 // options {k=3;} 118 // scope Symbols; // structs are scopes 119 // @init { 120 // $Symbols::types = set() 121 // } 122 // : struct_or_union IDENTIFIER? '{' struct_declaration_list '}' 123 // | struct_or_union IDENTIFIER 124 // ; 125 126 // struct_or_union 127 // : 'struct' 128 // | 'union' 129 // ; 130 131 // struct_declaration_list 132 // : struct_declaration+ 133 // ; 134 135 // struct_declaration 136 // : specifier_qualifier_list struct_declarator_list ';' 137 // ; 138 139 // specifier_qualifier_list 140 // : ( type_qualifier | type_specifier )+ 141 // ; 142 143 // struct_declarator_list 144 // : struct_declarator (',' struct_declarator)* 145 // ; 146 147 // struct_declarator 148 // : declarator (':' constant_expression)? 149 // | ':' constant_expression 150 // ; 151 152 // enum_specifier 153 // options {k=3;} 154 // : 'enum' '{' enumerator_list '}' 155 // | 'enum' IDENTIFIER '{' enumerator_list '}' 156 // | 'enum' IDENTIFIER 157 // ; 158 159 // enumerator_list 160 // : enumerator (',' enumerator)* 161 // ; 162 163 // enumerator 164 // : IDENTIFIER ('=' constant_expression)? 165 // ; 166 167 type_qualifier 168 : 'const' 169 | 'volatile' 170 ; 171 172 declarator 173 : pointer? direct_declarator 174 | pointer 175 ; 176 177 direct_declarator 178 : ( IDENTIFIER 179 { 180 if $declaration and $declaration::isTypedef: 181 $Symbols::types.add($IDENTIFIER.text) 182 print("define type "+$IDENTIFIER.text) 183 } 184 | '(' declarator ')' 185 ) 186 declarator_suffix* 187 ; 188 189 declarator_suffix 190 : /*'[' constant_expression ']' 191 |*/ '[' ']' 192 // | '(' parameter_type_list ')' 193 // | '(' identifier_list ')' 194 | '(' ')' 195 ; 196 197 pointer 198 : '*' type_qualifier+ pointer? 199 | '*' pointer 200 | '*' 201 ; 202 203 // parameter_type_list 204 // : parameter_list (',' '...')? 205 // ; 206 207 // parameter_list 208 // : parameter_declaration (',' parameter_declaration)* 209 // ; 210 211 // parameter_declaration 212 // : declaration_specifiers (declarator|abstract_declarator)* 213 // ; 214 215 // identifier_list 216 // : IDENTIFIER (',' IDENTIFIER)* 217 // ; 218 219 // type_name 220 // : specifier_qualifier_list abstract_declarator? 221 // ; 222 223 // abstract_declarator 224 // : pointer direct_abstract_declarator? 225 // | direct_abstract_declarator 226 // ; 227 228 // direct_abstract_declarator 229 // : ( '(' abstract_declarator ')' | abstract_declarator_suffix ) abstract_declarator_suffix* 230 // ; 231 232 // abstract_declarator_suffix 233 // : '[' ']' 234 // | '[' constant_expression ']' 235 // | '(' ')' 236 // | '(' parameter_type_list ')' 237 // ; 238 239 // initializer 240 // : assignment_expression 241 // | '{' initializer_list ','? '}' 242 // ; 243 244 // initializer_list 245 // : initializer (',' initializer)* 246 // ; 247 248 // // E x p r e s s i o n s 249 250 // argument_expression_list 251 // : assignment_expression (',' assignment_expression)* 252 // ; 253 254 // additive_expression 255 // : (multiplicative_expression) ('+' multiplicative_expression | '-' multiplicative_expression)* 256 // ; 257 258 // multiplicative_expression 259 // : (cast_expression) ('*' cast_expression | '/' cast_expression | '%' cast_expression)* 260 // ; 261 262 // cast_expression 263 // : '(' type_name ')' cast_expression 264 // | unary_expression 265 // ; 266 267 // unary_expression 268 // : postfix_expression 269 // | '++' unary_expression 270 // | '--' unary_expression 271 // | unary_operator cast_expression 272 // | 'sizeof' unary_expression 273 // | 'sizeof' '(' type_name ')' 274 // ; 275 276 // postfix_expression 277 // : primary_expression 278 // ( '[' expression ']' 279 // | '(' ')' 280 // | '(' argument_expression_list ')' 281 // | '.' IDENTIFIER 282 // | '*' IDENTIFIER 283 // | '->' IDENTIFIER 284 // | '++' 285 // | '--' 286 // )* 287 // ; 288 289 // unary_operator 290 // : '&' 291 // | '*' 292 // | '+' 293 // | '-' 294 // | '~' 295 // | '!' 296 // ; 297 298 // primary_expression 299 // : IDENTIFIER 300 // | constant 301 // | '(' expression ')' 302 // ; 303 304 // constant 305 // : HEX_LITERAL 306 // | OCTAL_LITERAL 307 // | DECIMAL_LITERAL 308 // | CHARACTER_LITERAL 309 // | STRING_LITERAL 310 // | FLOATING_POINT_LITERAL 311 // ; 312 313 // ///// 314 315 // expression 316 // : assignment_expression (',' assignment_expression)* 317 // ; 318 319 // constant_expression 320 // : conditional_expression 321 // ; 322 323 // assignment_expression 324 // : lvalue assignment_operator assignment_expression 325 // | conditional_expression 326 // ; 327 328 // lvalue 329 // : unary_expression 330 // ; 331 332 // assignment_operator 333 // : '=' 334 // | '*=' 335 // | '/=' 336 // | '%=' 337 // | '+=' 338 // | '-=' 339 // | '<<=' 340 // | '>>=' 341 // | '&=' 342 // | '^=' 343 // | '|=' 344 // ; 345 346 // conditional_expression 347 // : logical_or_expression ('?' expression ':' conditional_expression)? 348 // ; 349 350 // logical_or_expression 351 // : logical_and_expression ('||' logical_and_expression)* 352 // ; 353 354 // logical_and_expression 355 // : inclusive_or_expression ('&&' inclusive_or_expression)* 356 // ; 357 358 // inclusive_or_expression 359 // : exclusive_or_expression ('|' exclusive_or_expression)* 360 // ; 361 362 // exclusive_or_expression 363 // : and_expression ('^' and_expression)* 364 // ; 365 366 // and_expression 367 // : equality_expression ('&' equality_expression)* 368 // ; 369 // equality_expression 370 // : relational_expression (('=='|'!=') relational_expression)* 371 // ; 372 373 // relational_expression 374 // : shift_expression (('<'|'>'|'<='|'>=') shift_expression)* 375 // ; 376 377 // shift_expression 378 // : additive_expression (('<<'|'>>') additive_expression)* 379 // ; 380 381 // // S t a t e m e n t s 382 383 // statement 384 // : labeled_statement 385 // | compound_statement 386 // | expression_statement 387 // | selection_statement 388 // | iteration_statement 389 // | jump_statement 390 // ; 391 392 // labeled_statement 393 // : IDENTIFIER ':' statement 394 // | 'case' constant_expression ':' statement 395 // | 'default' ':' statement 396 // ; 397 398 // compound_statement 399 // scope Symbols; // blocks have a scope of symbols 400 // @init { 401 // $Symbols::types = {} 402 // } 403 // : '{' declaration* statement_list? '}' 404 // ; 405 406 // statement_list 407 // : statement+ 408 // ; 409 410 // expression_statement 411 // : ';' 412 // | expression ';' 413 // ; 414 415 // selection_statement 416 // : 'if' '(' expression ')' statement (options {k=1; backtrack=false;}:'else' statement)? 417 // | 'switch' '(' expression ')' statement 418 // ; 419 420 // iteration_statement 421 // : 'while' '(' expression ')' statement 422 // | 'do' statement 'while' '(' expression ')' ';' 423 // | 'for' '(' expression_statement expression_statement expression? ')' statement 424 // ; 425 426 // jump_statement 427 // : 'goto' IDENTIFIER ';' 428 // | 'continue' ';' 429 // | 'break' ';' 430 // | 'return' ';' 431 // | 'return' expression ';' 432 // ; 433 434 IDENTIFIER 435 : LETTER (LETTER|'0'..'9')* 436 ; 437 438 fragment 439 LETTER 440 : '$' 441 | 'A'..'Z' 442 | 'a'..'z' 443 | '_' 444 ; 445 446 CHARACTER_LITERAL 447 : '\'' ( EscapeSequence | ~('\''|'\\') ) '\'' 448 ; 449 450 STRING_LITERAL 451 : '"' ( EscapeSequence | ~('\\'|'"') )* '"' 452 ; 453 454 HEX_LITERAL : '0' ('x'|'X') HexDigit+ IntegerTypeSuffix? ; 455 456 DECIMAL_LITERAL : ('0' | '1'..'9' '0'..'9'*) IntegerTypeSuffix? ; 457 458 OCTAL_LITERAL : '0' ('0'..'7')+ IntegerTypeSuffix? ; 459 460 fragment 461 HexDigit : ('0'..'9'|'a'..'f'|'A'..'F') ; 462 463 fragment 464 IntegerTypeSuffix 465 : ('u'|'U')? ('l'|'L') 466 | ('u'|'U') ('l'|'L')? 467 ; 468 469 FLOATING_POINT_LITERAL 470 : ('0'..'9')+ '.' ('0'..'9')* Exponent? FloatTypeSuffix? 471 | '.' ('0'..'9')+ Exponent? FloatTypeSuffix? 472 | ('0'..'9')+ Exponent FloatTypeSuffix? 473 | ('0'..'9')+ Exponent? FloatTypeSuffix 474 ; 475 476 fragment 477 Exponent : ('e'|'E') ('+'|'-')? ('0'..'9')+ ; 478 479 fragment 480 FloatTypeSuffix : ('f'|'F'|'d'|'D') ; 481 482 fragment 483 EscapeSequence 484 : '\\' ('b'|'t'|'n'|'f'|'r'|'\"'|'\''|'\\') 485 | OctalEscape 486 ; 487 488 fragment 489 OctalEscape 490 : '\\' ('0'..'3') ('0'..'7') ('0'..'7') 491 | '\\' ('0'..'7') ('0'..'7') 492 | '\\' ('0'..'7') 493 ; 494 495 fragment 496 UnicodeEscape 497 : '\\' 'u' HexDigit HexDigit HexDigit HexDigit 498 ; 499 500 WS : (' '|'\r'|'\t'|'\u000C'|'\n') {$channel=HIDDEN;} 501 ; 502 503 COMMENT 504 : '/*' ( options {greedy=false;} : . )* '*/' {$channel=HIDDEN;} 505 ; 506 507 LINE_COMMENT 508 : '//' ~('\n'|'\r')* '\r'? '\n' {$channel=HIDDEN;} 509 ; 510 511 // ignore #line info for now 512 LINE_COMMAND 513 : '#' ~('\n'|'\r')* '\r'? '\n' {$channel=HIDDEN;} 514 ; 515 516