Home | History | Annotate | Download | only in functional
      1 grammar t033backtracking;
      2 options {
      3     language=JavaScript;
      4     backtrack=true;
      5     memoize=true;
      6     k=2;
      7 }
      8 
      9 scope Symbols {
     10 	types;
     11 }
     12 
     13 @members {
     14 this.isTypeName = function(name) {
     15     var i, scope;
     16     for (i=this.Symbols_stack.length-1; i>=0; i--) {
     17         scope = this.Symbols_stack[i];
     18         if (!org.antlr.lang.isUndefined(scope.types[name])) {
     19             return true;
     20         }
     21     }
     22     return false;
     23 };
     24 }
     25 
     26 translation_unit
     27 scope Symbols; // entire file is a scope
     28 @init {
     29   $Symbols::types = {};
     30 }
     31 	: external_declaration+
     32 	;
     33 
     34 /** Either a function definition or any other kind of C decl/def.
     35  *  The LL(*) analysis algorithm fails to deal with this due to
     36  *  recursion in the declarator rules.  I'm putting in a
     37  *  manual predicate here so that we don't backtrack over
     38  *  the entire function.  Further, you get a better error
     39  *  as errors within the function itself don't make it fail
     40  *  to predict that it's a function.  Weird errors previously.
     41  *  Remember: the goal is to avoid backtrack like the plague
     42  *  because it makes debugging, actions, and errors harder.
     43  *
     44  *  Note that k=1 results in a much smaller predictor for the 
     45  *  fixed lookahead; k=2 made a few extra thousand lines. ;)
     46  *  I'll have to optimize that in the future.
     47  */
     48 external_declaration
     49 options {k=1;}
     50 	: ( declaration_specifiers? declarator declaration* '{' )=> function_definition
     51 	| declaration
     52 	;
     53 
     54 function_definition
     55 scope Symbols; // put parameters and locals into same scope for now
     56 @init {
     57   $Symbols::types = set()
     58 }
     59 	:	declaration_specifiers? declarator
     60 // 		(	declaration+ compound_statement	// K&R style
     61 // 		|	compound_statement				// ANSI style
     62 // 		)
     63 	;
     64 
     65 declaration
     66 scope {
     67   isTypedef;
     68 }
     69 @init {
     70   $declaration::isTypedef = false;
     71 }
     72 	: 'typedef' declaration_specifiers? {$declaration::isTypedef = true;}
     73 	  init_declarator_list ';' // special case, looking for typedef	
     74 	| declaration_specifiers init_declarator_list? ';'
     75 	;
     76 
     77 declaration_specifiers
     78 	:   (   storage_class_specifier
     79 		|   type_specifier
     80         |   type_qualifier
     81         )+
     82 	;
     83 
     84 init_declarator_list
     85 	: init_declarator (',' init_declarator)*
     86 	;
     87 
     88 init_declarator
     89 	: declarator //('=' initializer)?
     90 	;
     91 
     92 storage_class_specifier
     93 	: 'extern'
     94 	| 'static'
     95 	| 'auto'
     96 	| 'register'
     97 	;
     98 
     99 type_specifier
    100 	: 'void'
    101 	| 'char'
    102 	| 'short'
    103 	| 'int'
    104 	| 'long'
    105 	| 'float'
    106 	| 'double'
    107 	| 'signed'
    108 	| 'unsigned'
    109 // 	| struct_or_union_specifier
    110 // 	| enum_specifier
    111 	| type_id
    112 	;
    113 
    114 type_id
    115     :   {this.isTypeName(this.input.LT(1).getText())}? IDENTIFIER
    116 //    	{System.out.println($IDENTIFIER.text+" is a type");}
    117     ;
    118 
    119 // struct_or_union_specifier
    120 // options {k=3;}
    121 // scope Symbols; // structs are scopes
    122 // @init {
    123 //   $Symbols::types = set()
    124 // }
    125 // 	: struct_or_union IDENTIFIER? '{' struct_declaration_list '}'
    126 // 	| struct_or_union IDENTIFIER
    127 // 	;
    128 
    129 // struct_or_union
    130 // 	: 'struct'
    131 // 	| 'union'
    132 // 	;
    133 
    134 // struct_declaration_list
    135 // 	: struct_declaration+
    136 // 	;
    137 
    138 // struct_declaration
    139 // 	: specifier_qualifier_list struct_declarator_list ';'
    140 // 	;
    141 
    142 // specifier_qualifier_list
    143 // 	: ( type_qualifier | type_specifier )+
    144 // 	;
    145 
    146 // struct_declarator_list
    147 // 	: struct_declarator (',' struct_declarator)*
    148 // 	;
    149 
    150 // struct_declarator
    151 // 	: declarator (':' constant_expression)?
    152 // 	| ':' constant_expression
    153 // 	;
    154 
    155 // enum_specifier
    156 // options {k=3;}
    157 // 	: 'enum' '{' enumerator_list '}'
    158 // 	| 'enum' IDENTIFIER '{' enumerator_list '}'
    159 // 	| 'enum' IDENTIFIER
    160 // 	;
    161 
    162 // enumerator_list
    163 // 	: enumerator (',' enumerator)*
    164 // 	;
    165 
    166 // enumerator
    167 // 	: IDENTIFIER ('=' constant_expression)?
    168 // 	;
    169 
    170 type_qualifier
    171 	: 'const'
    172 	| 'volatile'
    173 	;
    174 
    175 declarator
    176 	: pointer? direct_declarator
    177 	| pointer
    178 	;
    179 
    180 direct_declarator
    181 	:   (	IDENTIFIER
    182 			{
    183 			if ($declaration.length >0 && $declaration::isTypedef) {
    184 				$Symbols::types[$IDENTIFIER.text] = true;
    185 				alert ("define type "+$IDENTIFIER.text);
    186             }
    187 			}
    188 		|	'(' declarator ')'
    189 		)
    190         declarator_suffix*
    191 	;
    192 
    193 declarator_suffix
    194 	:   /*'[' constant_expression ']'
    195     |*/   '[' ']'
    196 //     |   '(' parameter_type_list ')'
    197 //     |   '(' identifier_list ')'
    198     |   '(' ')'
    199 	;
    200 
    201 pointer
    202 	: '*' type_qualifier+ pointer?
    203 	| '*' pointer
    204 	| '*'
    205 	;
    206 
    207 // parameter_type_list
    208 // 	: parameter_list (',' '...')?
    209 // 	;
    210 
    211 // parameter_list
    212 // 	: parameter_declaration (',' parameter_declaration)*
    213 // 	;
    214 
    215 // parameter_declaration
    216 // 	: declaration_specifiers (declarator|abstract_declarator)*
    217 // 	;
    218 
    219 // identifier_list
    220 // 	: IDENTIFIER (',' IDENTIFIER)*
    221 // 	;
    222 
    223 // type_name
    224 // 	: specifier_qualifier_list abstract_declarator?
    225 // 	;
    226 
    227 // abstract_declarator
    228 // 	: pointer direct_abstract_declarator?
    229 // 	| direct_abstract_declarator
    230 // 	;
    231 
    232 // direct_abstract_declarator
    233 // 	:	( '(' abstract_declarator ')' | abstract_declarator_suffix ) abstract_declarator_suffix*
    234 // 	;
    235 
    236 // abstract_declarator_suffix
    237 // 	:	'[' ']'
    238 // 	|	'[' constant_expression ']'
    239 // 	|	'(' ')'
    240 // 	|	'(' parameter_type_list ')'
    241 // 	;
    242 	
    243 // initializer
    244 // 	: assignment_expression
    245 // 	| '{' initializer_list ','? '}'
    246 // 	;
    247 
    248 // initializer_list
    249 // 	: initializer (',' initializer)*
    250 // 	;
    251 
    252 // // E x p r e s s i o n s
    253 
    254 // argument_expression_list
    255 // 	:   assignment_expression (',' assignment_expression)*
    256 // 	;
    257 
    258 // additive_expression
    259 // 	: (multiplicative_expression) ('+' multiplicative_expression | '-' multiplicative_expression)*
    260 // 	;
    261 
    262 // multiplicative_expression
    263 // 	: (cast_expression) ('*' cast_expression | '/' cast_expression | '%' cast_expression)*
    264 // 	;
    265 
    266 // cast_expression
    267 // 	: '(' type_name ')' cast_expression
    268 // 	| unary_expression
    269 // 	;
    270 
    271 // unary_expression
    272 // 	: postfix_expression
    273 // 	| '++' unary_expression
    274 // 	| '--' unary_expression
    275 // 	| unary_operator cast_expression
    276 // 	| 'sizeof' unary_expression
    277 // 	| 'sizeof' '(' type_name ')'
    278 // 	;
    279 
    280 // postfix_expression
    281 // 	:   primary_expression
    282 //         (   '[' expression ']'
    283 //         |   '(' ')'
    284 //         |   '(' argument_expression_list ')'
    285 //         |   '.' IDENTIFIER
    286 //         |   '*' IDENTIFIER
    287 //         |   '->' IDENTIFIER
    288 //         |   '++'
    289 //         |   '--'
    290 //         )*
    291 // 	;
    292 
    293 // unary_operator
    294 // 	: '&'
    295 // 	| '*'
    296 // 	| '+'
    297 // 	| '-'
    298 // 	| '~'
    299 // 	| '!'
    300 // 	;
    301 
    302 // primary_expression
    303 // 	: IDENTIFIER
    304 // 	| constant
    305 // 	| '(' expression ')'
    306 // 	;
    307 
    308 // constant
    309 //     :   HEX_LITERAL
    310 //     |   OCTAL_LITERAL
    311 //     |   DECIMAL_LITERAL
    312 //     |	CHARACTER_LITERAL
    313 // 	|	STRING_LITERAL
    314 //     |   FLOATING_POINT_LITERAL
    315 //     ;
    316 
    317 // /////
    318 
    319 // expression
    320 // 	: assignment_expression (',' assignment_expression)*
    321 // 	;
    322 
    323 // constant_expression
    324 // 	: conditional_expression
    325 // 	;
    326 
    327 // assignment_expression
    328 // 	: lvalue assignment_operator assignment_expression
    329 // 	| conditional_expression
    330 // 	;
    331 	
    332 // lvalue
    333 // 	:	unary_expression
    334 // 	;
    335 
    336 // assignment_operator
    337 // 	: '='
    338 // 	| '*='
    339 // 	| '/='
    340 // 	| '%='
    341 // 	| '+='
    342 // 	| '-='
    343 // 	| '<<='
    344 // 	| '>>='
    345 // 	| '&='
    346 // 	| '^='
    347 // 	| '|='
    348 // 	;
    349 
    350 // conditional_expression
    351 // 	: logical_or_expression ('?' expression ':' conditional_expression)?
    352 // 	;
    353 
    354 // logical_or_expression
    355 // 	: logical_and_expression ('||' logical_and_expression)*
    356 // 	;
    357 
    358 // logical_and_expression
    359 // 	: inclusive_or_expression ('&&' inclusive_or_expression)*
    360 // 	;
    361 
    362 // inclusive_or_expression
    363 // 	: exclusive_or_expression ('|' exclusive_or_expression)*
    364 // 	;
    365 
    366 // exclusive_or_expression
    367 // 	: and_expression ('^' and_expression)*
    368 // 	;
    369 
    370 // and_expression
    371 // 	: equality_expression ('&' equality_expression)*
    372 // 	;
    373 // equality_expression
    374 // 	: relational_expression (('=='|'!=') relational_expression)*
    375 // 	;
    376 
    377 // relational_expression
    378 // 	: shift_expression (('<'|'>'|'<='|'>=') shift_expression)*
    379 // 	;
    380 
    381 // shift_expression
    382 // 	: additive_expression (('<<'|'>>') additive_expression)*
    383 // 	;
    384 
    385 // // S t a t e m e n t s
    386 
    387 // statement
    388 // 	: labeled_statement
    389 // 	| compound_statement
    390 // 	| expression_statement
    391 // 	| selection_statement
    392 // 	| iteration_statement
    393 // 	| jump_statement
    394 // 	;
    395 
    396 // labeled_statement
    397 // 	: IDENTIFIER ':' statement
    398 // 	| 'case' constant_expression ':' statement
    399 // 	| 'default' ':' statement
    400 // 	;
    401 
    402 // compound_statement
    403 // scope Symbols; // blocks have a scope of symbols
    404 // @init {
    405 //   $Symbols::types = {}
    406 // }
    407 // 	: '{' declaration* statement_list? '}'
    408 // 	;
    409 
    410 // statement_list
    411 // 	: statement+
    412 // 	;
    413 
    414 // expression_statement
    415 // 	: ';'
    416 // 	| expression ';'
    417 // 	;
    418 
    419 // selection_statement
    420 // 	: 'if' '(' expression ')' statement (options {k=1; backtrack=false;}:'else' statement)?
    421 // 	| 'switch' '(' expression ')' statement
    422 // 	;
    423 
    424 // iteration_statement
    425 // 	: 'while' '(' expression ')' statement
    426 // 	| 'do' statement 'while' '(' expression ')' ';'
    427 // 	| 'for' '(' expression_statement expression_statement expression? ')' statement
    428 // 	;
    429 
    430 // jump_statement
    431 // 	: 'goto' IDENTIFIER ';'
    432 // 	| 'continue' ';'
    433 // 	| 'break' ';'
    434 // 	| 'return' ';'
    435 // 	| 'return' expression ';'
    436 // 	;
    437 
    438 IDENTIFIER
    439 	:	LETTER (LETTER|'0'..'9')*
    440 	;
    441 	
    442 fragment
    443 LETTER
    444 	:	'$'
    445 	|	'A'..'Z'
    446 	|	'a'..'z'
    447 	|	'_'
    448 	;
    449 
    450 CHARACTER_LITERAL
    451     :   '\'' ( EscapeSequence | ~('\''|'\\') ) '\''
    452     ;
    453 
    454 STRING_LITERAL
    455     :  '"' ( EscapeSequence | ~('\\'|'"') )* '"'
    456     ;
    457 
    458 HEX_LITERAL : '0' ('x'|'X') HexDigit+ IntegerTypeSuffix? ;
    459 
    460 DECIMAL_LITERAL : ('0' | '1'..'9' '0'..'9'*) IntegerTypeSuffix? ;
    461 
    462 OCTAL_LITERAL : '0' ('0'..'7')+ IntegerTypeSuffix? ;
    463 
    464 fragment
    465 HexDigit : ('0'..'9'|'a'..'f'|'A'..'F') ;
    466 
    467 fragment
    468 IntegerTypeSuffix
    469 	:	('u'|'U')? ('l'|'L')
    470 	|	('u'|'U')  ('l'|'L')?
    471 	;
    472 
    473 FLOATING_POINT_LITERAL
    474     :   ('0'..'9')+ '.' ('0'..'9')* Exponent? FloatTypeSuffix?
    475     |   '.' ('0'..'9')+ Exponent? FloatTypeSuffix?
    476     |   ('0'..'9')+ Exponent FloatTypeSuffix?
    477     |   ('0'..'9')+ Exponent? FloatTypeSuffix
    478 	;
    479 
    480 fragment
    481 Exponent : ('e'|'E') ('+'|'-')? ('0'..'9')+ ;
    482 
    483 fragment
    484 FloatTypeSuffix : ('f'|'F'|'d'|'D') ;
    485 
    486 fragment
    487 EscapeSequence
    488     :   '\\' ('b'|'t'|'n'|'f'|'r'|'\"'|'\''|'\\')
    489     |   OctalEscape
    490     ;
    491 
    492 fragment
    493 OctalEscape
    494     :   '\\' ('0'..'3') ('0'..'7') ('0'..'7')
    495     |   '\\' ('0'..'7') ('0'..'7')
    496     |   '\\' ('0'..'7')
    497     ;
    498 
    499 fragment
    500 UnicodeEscape
    501     :   '\\' 'u' HexDigit HexDigit HexDigit HexDigit
    502     ;
    503 
    504 WS  :  (' '|'\r'|'\t'|'\u000C'|'\n') {$channel=org.antlr.runtime.Token.HIDDEN_CHANNEL;}
    505     ;
    506 
    507 COMMENT
    508     :   '/*' ( options {greedy=false;} : . )* '*/' {$channel=org.antlr.runtime.Token.HIDDEN_CHANNEL;}
    509     ;
    510 
    511 LINE_COMMENT
    512     : '//' ~('\n'|'\r')* '\r'? '\n' {$channel=org.antlr.runtime.Token.HIDDEN_CHANNEL;}
    513     ;
    514 
    515 // ignore #line info for now
    516 LINE_COMMAND 
    517     : '#' ~('\n'|'\r')* '\r'? '\n' {$channel=org.antlr.runtime.Token.HIDDEN_CHANNEL;}
    518     ;
    519 
    520