Home | History | Annotate | Download | only in tests
      1 grammar t033backtracking;
      2 options {
      3     language=Python;
      4     backtrack=true;
      5     memoize=true;
      6     k=2;
      7 }
      8 
      9 scope Symbols {
     10 	types;
     11 }
     12 
     13 @header {
     14 # compatibility stuff
     15 try:
     16     set = set
     17     frozenset = frozenset
     18 except NameError:
     19     from sets import Set as set, ImmutableSet as frozenset
     20 
     21 
     22 try:
     23     reversed = reversed
     24 except NameError:
     25     def reversed(l):
     26         l = l[:]
     27         l.reverse()
     28         return l
     29 
     30 }
     31 
     32 @members {
     33     def isTypeName(self, name):
     34         for scope in reversed(self.Symbols_stack):
     35             if name in scope.types:
     36                 return True
     37 
     38         return False
     39 
     40 }
     41 
     42 translation_unit
     43 scope Symbols; // entire file is a scope
     44 @init {
     45   $Symbols::types = set()
     46 }
     47 	: external_declaration+
     48 	;
     49 
     50 /** Either a function definition or any other kind of C decl/def.
     51  *  The LL(*) analysis algorithm fails to deal with this due to
     52  *  recursion in the declarator rules.  I'm putting in a
     53  *  manual predicate here so that we don't backtrack over
     54  *  the entire function.  Further, you get a better error
     55  *  as errors within the function itself don't make it fail
     56  *  to predict that it's a function.  Weird errors previously.
     57  *  Remember: the goal is to avoid backtrack like the plague
     58  *  because it makes debugging, actions, and errors harder.
     59  *
     60  *  Note that k=1 results in a much smaller predictor for the 
     61  *  fixed lookahead; k=2 made a few extra thousand lines. ;)
     62  *  I'll have to optimize that in the future.
     63  */
     64 external_declaration
     65 options {k=1;}
     66 	: ( declaration_specifiers? declarator declaration* '{' )=> function_definition
     67 	| declaration
     68 	;
     69 
     70 function_definition
     71 scope Symbols; // put parameters and locals into same scope for now
     72 @init {
     73   $Symbols::types = set()
     74 }
     75 	:	declaration_specifiers? declarator
     76 // 		(	declaration+ compound_statement	// K&R style
     77 // 		|	compound_statement				// ANSI style
     78 // 		)
     79 	;
     80 
     81 declaration
     82 scope {
     83   isTypedef;
     84 }
     85 @init {
     86   $declaration::isTypedef = False
     87 }
     88 	: 'typedef' declaration_specifiers? {$declaration::isTypedef = True}
     89 	  init_declarator_list ';' // special case, looking for typedef	
     90 	| declaration_specifiers init_declarator_list? ';'
     91 	;
     92 
     93 declaration_specifiers
     94 	:   (   storage_class_specifier
     95 		|   type_specifier
     96         |   type_qualifier
     97         )+
     98 	;
     99 
    100 init_declarator_list
    101 	: init_declarator (',' init_declarator)*
    102 	;
    103 
    104 init_declarator
    105 	: declarator //('=' initializer)?
    106 	;
    107 
    108 storage_class_specifier
    109 	: 'extern'
    110 	| 'static'
    111 	| 'auto'
    112 	| 'register'
    113 	;
    114 
    115 type_specifier
    116 	: 'void'
    117 	| 'char'
    118 	| 'short'
    119 	| 'int'
    120 	| 'long'
    121 	| 'float'
    122 	| 'double'
    123 	| 'signed'
    124 	| 'unsigned'
    125 // 	| struct_or_union_specifier
    126 // 	| enum_specifier
    127 	| type_id
    128 	;
    129 
    130 type_id
    131     :   {self.isTypeName(self.input.LT(1).getText())}? IDENTIFIER
    132 //    	{System.out.println($IDENTIFIER.text+" is a type");}
    133     ;
    134 
    135 // struct_or_union_specifier
    136 // options {k=3;}
    137 // scope Symbols; // structs are scopes
    138 // @init {
    139 //   $Symbols::types = set()
    140 // }
    141 // 	: struct_or_union IDENTIFIER? '{' struct_declaration_list '}'
    142 // 	| struct_or_union IDENTIFIER
    143 // 	;
    144 
    145 // struct_or_union
    146 // 	: 'struct'
    147 // 	| 'union'
    148 // 	;
    149 
    150 // struct_declaration_list
    151 // 	: struct_declaration+
    152 // 	;
    153 
    154 // struct_declaration
    155 // 	: specifier_qualifier_list struct_declarator_list ';'
    156 // 	;
    157 
    158 // specifier_qualifier_list
    159 // 	: ( type_qualifier | type_specifier )+
    160 // 	;
    161 
    162 // struct_declarator_list
    163 // 	: struct_declarator (',' struct_declarator)*
    164 // 	;
    165 
    166 // struct_declarator
    167 // 	: declarator (':' constant_expression)?
    168 // 	| ':' constant_expression
    169 // 	;
    170 
    171 // enum_specifier
    172 // options {k=3;}
    173 // 	: 'enum' '{' enumerator_list '}'
    174 // 	| 'enum' IDENTIFIER '{' enumerator_list '}'
    175 // 	| 'enum' IDENTIFIER
    176 // 	;
    177 
    178 // enumerator_list
    179 // 	: enumerator (',' enumerator)*
    180 // 	;
    181 
    182 // enumerator
    183 // 	: IDENTIFIER ('=' constant_expression)?
    184 // 	;
    185 
    186 type_qualifier
    187 	: 'const'
    188 	| 'volatile'
    189 	;
    190 
    191 declarator
    192 	: pointer? direct_declarator
    193 	| pointer
    194 	;
    195 
    196 direct_declarator
    197 	:   (	IDENTIFIER
    198 			{
    199 			if len($declaration)>0 and $declaration::isTypedef:
    200 				$Symbols::types.add($IDENTIFIER.text)
    201 				print "define type "+$IDENTIFIER.text
    202 			}
    203 		|	'(' declarator ')'
    204 		)
    205         declarator_suffix*
    206 	;
    207 
    208 declarator_suffix
    209 	:   /*'[' constant_expression ']'
    210     |*/   '[' ']'
    211 //     |   '(' parameter_type_list ')'
    212 //     |   '(' identifier_list ')'
    213     |   '(' ')'
    214 	;
    215 
    216 pointer
    217 	: '*' type_qualifier+ pointer?
    218 	| '*' pointer
    219 	| '*'
    220 	;
    221 
    222 // parameter_type_list
    223 // 	: parameter_list (',' '...')?
    224 // 	;
    225 
    226 // parameter_list
    227 // 	: parameter_declaration (',' parameter_declaration)*
    228 // 	;
    229 
    230 // parameter_declaration
    231 // 	: declaration_specifiers (declarator|abstract_declarator)*
    232 // 	;
    233 
    234 // identifier_list
    235 // 	: IDENTIFIER (',' IDENTIFIER)*
    236 // 	;
    237 
    238 // type_name
    239 // 	: specifier_qualifier_list abstract_declarator?
    240 // 	;
    241 
    242 // abstract_declarator
    243 // 	: pointer direct_abstract_declarator?
    244 // 	| direct_abstract_declarator
    245 // 	;
    246 
    247 // direct_abstract_declarator
    248 // 	:	( '(' abstract_declarator ')' | abstract_declarator_suffix ) abstract_declarator_suffix*
    249 // 	;
    250 
    251 // abstract_declarator_suffix
    252 // 	:	'[' ']'
    253 // 	|	'[' constant_expression ']'
    254 // 	|	'(' ')'
    255 // 	|	'(' parameter_type_list ')'
    256 // 	;
    257 	
    258 // initializer
    259 // 	: assignment_expression
    260 // 	| '{' initializer_list ','? '}'
    261 // 	;
    262 
    263 // initializer_list
    264 // 	: initializer (',' initializer)*
    265 // 	;
    266 
    267 // // E x p r e s s i o n s
    268 
    269 // argument_expression_list
    270 // 	:   assignment_expression (',' assignment_expression)*
    271 // 	;
    272 
    273 // additive_expression
    274 // 	: (multiplicative_expression) ('+' multiplicative_expression | '-' multiplicative_expression)*
    275 // 	;
    276 
    277 // multiplicative_expression
    278 // 	: (cast_expression) ('*' cast_expression | '/' cast_expression | '%' cast_expression)*
    279 // 	;
    280 
    281 // cast_expression
    282 // 	: '(' type_name ')' cast_expression
    283 // 	| unary_expression
    284 // 	;
    285 
    286 // unary_expression
    287 // 	: postfix_expression
    288 // 	| '++' unary_expression
    289 // 	| '--' unary_expression
    290 // 	| unary_operator cast_expression
    291 // 	| 'sizeof' unary_expression
    292 // 	| 'sizeof' '(' type_name ')'
    293 // 	;
    294 
    295 // postfix_expression
    296 // 	:   primary_expression
    297 //         (   '[' expression ']'
    298 //         |   '(' ')'
    299 //         |   '(' argument_expression_list ')'
    300 //         |   '.' IDENTIFIER
    301 //         |   '*' IDENTIFIER
    302 //         |   '->' IDENTIFIER
    303 //         |   '++'
    304 //         |   '--'
    305 //         )*
    306 // 	;
    307 
    308 // unary_operator
    309 // 	: '&'
    310 // 	| '*'
    311 // 	| '+'
    312 // 	| '-'
    313 // 	| '~'
    314 // 	| '!'
    315 // 	;
    316 
    317 // primary_expression
    318 // 	: IDENTIFIER
    319 // 	| constant
    320 // 	| '(' expression ')'
    321 // 	;
    322 
    323 // constant
    324 //     :   HEX_LITERAL
    325 //     |   OCTAL_LITERAL
    326 //     |   DECIMAL_LITERAL
    327 //     |	CHARACTER_LITERAL
    328 // 	|	STRING_LITERAL
    329 //     |   FLOATING_POINT_LITERAL
    330 //     ;
    331 
    332 // /////
    333 
    334 // expression
    335 // 	: assignment_expression (',' assignment_expression)*
    336 // 	;
    337 
    338 // constant_expression
    339 // 	: conditional_expression
    340 // 	;
    341 
    342 // assignment_expression
    343 // 	: lvalue assignment_operator assignment_expression
    344 // 	| conditional_expression
    345 // 	;
    346 	
    347 // lvalue
    348 // 	:	unary_expression
    349 // 	;
    350 
    351 // assignment_operator
    352 // 	: '='
    353 // 	| '*='
    354 // 	| '/='
    355 // 	| '%='
    356 // 	| '+='
    357 // 	| '-='
    358 // 	| '<<='
    359 // 	| '>>='
    360 // 	| '&='
    361 // 	| '^='
    362 // 	| '|='
    363 // 	;
    364 
    365 // conditional_expression
    366 // 	: logical_or_expression ('?' expression ':' conditional_expression)?
    367 // 	;
    368 
    369 // logical_or_expression
    370 // 	: logical_and_expression ('||' logical_and_expression)*
    371 // 	;
    372 
    373 // logical_and_expression
    374 // 	: inclusive_or_expression ('&&' inclusive_or_expression)*
    375 // 	;
    376 
    377 // inclusive_or_expression
    378 // 	: exclusive_or_expression ('|' exclusive_or_expression)*
    379 // 	;
    380 
    381 // exclusive_or_expression
    382 // 	: and_expression ('^' and_expression)*
    383 // 	;
    384 
    385 // and_expression
    386 // 	: equality_expression ('&' equality_expression)*
    387 // 	;
    388 // equality_expression
    389 // 	: relational_expression (('=='|'!=') relational_expression)*
    390 // 	;
    391 
    392 // relational_expression
    393 // 	: shift_expression (('<'|'>'|'<='|'>=') shift_expression)*
    394 // 	;
    395 
    396 // shift_expression
    397 // 	: additive_expression (('<<'|'>>') additive_expression)*
    398 // 	;
    399 
    400 // // S t a t e m e n t s
    401 
    402 // statement
    403 // 	: labeled_statement
    404 // 	| compound_statement
    405 // 	| expression_statement
    406 // 	| selection_statement
    407 // 	| iteration_statement
    408 // 	| jump_statement
    409 // 	;
    410 
    411 // labeled_statement
    412 // 	: IDENTIFIER ':' statement
    413 // 	| 'case' constant_expression ':' statement
    414 // 	| 'default' ':' statement
    415 // 	;
    416 
    417 // compound_statement
    418 // scope Symbols; // blocks have a scope of symbols
    419 // @init {
    420 //   $Symbols::types = {}
    421 // }
    422 // 	: '{' declaration* statement_list? '}'
    423 // 	;
    424 
    425 // statement_list
    426 // 	: statement+
    427 // 	;
    428 
    429 // expression_statement
    430 // 	: ';'
    431 // 	| expression ';'
    432 // 	;
    433 
    434 // selection_statement
    435 // 	: 'if' '(' expression ')' statement (options {k=1; backtrack=false;}:'else' statement)?
    436 // 	| 'switch' '(' expression ')' statement
    437 // 	;
    438 
    439 // iteration_statement
    440 // 	: 'while' '(' expression ')' statement
    441 // 	| 'do' statement 'while' '(' expression ')' ';'
    442 // 	| 'for' '(' expression_statement expression_statement expression? ')' statement
    443 // 	;
    444 
    445 // jump_statement
    446 // 	: 'goto' IDENTIFIER ';'
    447 // 	| 'continue' ';'
    448 // 	| 'break' ';'
    449 // 	| 'return' ';'
    450 // 	| 'return' expression ';'
    451 // 	;
    452 
    453 IDENTIFIER
    454 	:	LETTER (LETTER|'0'..'9')*
    455 	;
    456 	
    457 fragment
    458 LETTER
    459 	:	'$'
    460 	|	'A'..'Z'
    461 	|	'a'..'z'
    462 	|	'_'
    463 	;
    464 
    465 CHARACTER_LITERAL
    466     :   '\'' ( EscapeSequence | ~('\''|'\\') ) '\''
    467     ;
    468 
    469 STRING_LITERAL
    470     :  '"' ( EscapeSequence | ~('\\'|'"') )* '"'
    471     ;
    472 
    473 HEX_LITERAL : '0' ('x'|'X') HexDigit+ IntegerTypeSuffix? ;
    474 
    475 DECIMAL_LITERAL : ('0' | '1'..'9' '0'..'9'*) IntegerTypeSuffix? ;
    476 
    477 OCTAL_LITERAL : '0' ('0'..'7')+ IntegerTypeSuffix? ;
    478 
    479 fragment
    480 HexDigit : ('0'..'9'|'a'..'f'|'A'..'F') ;
    481 
    482 fragment
    483 IntegerTypeSuffix
    484 	:	('u'|'U')? ('l'|'L')
    485 	|	('u'|'U')  ('l'|'L')?
    486 	;
    487 
    488 FLOATING_POINT_LITERAL
    489     :   ('0'..'9')+ '.' ('0'..'9')* Exponent? FloatTypeSuffix?
    490     |   '.' ('0'..'9')+ Exponent? FloatTypeSuffix?
    491     |   ('0'..'9')+ Exponent FloatTypeSuffix?
    492     |   ('0'..'9')+ Exponent? FloatTypeSuffix
    493 	;
    494 
    495 fragment
    496 Exponent : ('e'|'E') ('+'|'-')? ('0'..'9')+ ;
    497 
    498 fragment
    499 FloatTypeSuffix : ('f'|'F'|'d'|'D') ;
    500 
    501 fragment
    502 EscapeSequence
    503     :   '\\' ('b'|'t'|'n'|'f'|'r'|'\"'|'\''|'\\')
    504     |   OctalEscape
    505     ;
    506 
    507 fragment
    508 OctalEscape
    509     :   '\\' ('0'..'3') ('0'..'7') ('0'..'7')
    510     |   '\\' ('0'..'7') ('0'..'7')
    511     |   '\\' ('0'..'7')
    512     ;
    513 
    514 fragment
    515 UnicodeEscape
    516     :   '\\' 'u' HexDigit HexDigit HexDigit HexDigit
    517     ;
    518 
    519 WS  :  (' '|'\r'|'\t'|'\u000C'|'\n') {$channel=HIDDEN;}
    520     ;
    521 
    522 COMMENT
    523     :   '/*' ( options {greedy=false;} : . )* '*/' {$channel=HIDDEN;}
    524     ;
    525 
    526 LINE_COMMENT
    527     : '//' ~('\n'|'\r')* '\r'? '\n' {$channel=HIDDEN;}
    528     ;
    529 
    530 // ignore #line info for now
    531 LINE_COMMAND 
    532     : '#' ~('\n'|'\r')* '\r'? '\n' {$channel=HIDDEN;}
    533     ;
    534 
    535