Home | History | Annotate | Download | only in parser
      1 #!/usr/bin/ruby
      2 # encoding: utf-8
      3 
      4 require 'antlr3/test/functional'
      5 
      6 class TestBacktracking < ANTLR3::Test::Functional
      7 
      8   inline_grammar( <<-'END' )
      9     grammar Backtrack;
     10     options {
     11 			language = Ruby;
     12 			backtrack=true;
     13 			memoize=true;
     14 			k=2;
     15     }
     16     
     17     scope Symbols {
     18     	types;
     19     }
     20     
     21     @members {
     22       def is_type_name?(name)
     23         @Symbols_stack.reverse_each do |scope|
     24           scope.types.include?(name) and return true
     25         end
     26         return false
     27       end
     28       
     29       def report_error(e)
     30         # do nothing
     31       end
     32       
     33     }
     34     
     35     translation_unit
     36     scope Symbols; // entire file is a scope
     37     @init {
     38       $Symbols::types = Set.new
     39     }
     40     	: external_declaration+
     41     	;
     42     
     43     /** Either a function definition or any other kind of C decl/def.
     44      *  The LL(*) analysis algorithm fails to deal with this due to
     45      *  recursion in the declarator rules.  I'm putting in a
     46      *  manual predicate here so that we don't backtrack over
     47      *  the entire function.  Further, you get a better error
     48      *  as errors within the function itself don't make it fail
     49      *  to predict that it's a function.  Weird errors previously.
     50      *  Remember: the goal is to avoid backtrack like the plague
     51      *  because it makes debugging, actions, and errors harder.
     52      *
     53      *  Note that k=1 results in a much smaller predictor for the 
     54      *  fixed look; k=2 made a few extra thousand lines. ;)
     55      *  I'll have to optimize that in the future.
     56      */
     57     external_declaration
     58     options {k=1;}
     59     	: ( declaration_specifiers? declarator declaration* '{' )=> function_definition
     60     	| declaration
     61     	;
     62     
     63     function_definition
     64     scope Symbols; // put parameters and locals into same scope for now
     65     @init {
     66       $Symbols::types = set()
     67     }
     68     	:	declaration_specifiers? declarator
     69     	;
     70     
     71     declaration
     72     scope {
     73       is_type_def;
     74     }
     75     @init {
     76       $declaration::is_type_def = false
     77     }
     78     	: 'typedef' declaration_specifiers? {$declaration::is_type_def = true}
     79     	  init_declarator_list ';' // special case, looking for typedef	
     80     	| declaration_specifiers init_declarator_list? ';'
     81     	;
     82     
     83     declaration_specifiers
     84     	:   (   storage_class_specifier
     85     		|   type_specifier
     86             |   type_qualifier
     87             )+
     88     	;
     89     
     90     init_declarator_list
     91     	: init_declarator (',' init_declarator)*
     92     	;
     93     
     94     init_declarator
     95     	: declarator //('=' initializer)?
     96     	;
     97     
     98     storage_class_specifier
     99     	: 'extern'
    100     	| 'static'
    101     	| 'auto'
    102     	| 'register'
    103     	;
    104     
    105     type_specifier
    106     	: 'void'
    107     	| 'char'
    108     	| 'short'
    109     	| 'int'
    110     	| 'long'
    111     	| 'float'
    112     	| 'double'
    113     	| 'signed'
    114     	| 'unsigned'
    115     	| type_id
    116     	;
    117     
    118     type_id
    119         :   { is_type_name?(@input.look(1).text)}? IDENTIFIER
    120         ;
    121     
    122     type_qualifier
    123     	: 'const'
    124     	| 'volatile'
    125     	;
    126     
    127     declarator
    128     	: pointer? direct_declarator
    129     	| pointer
    130     	;
    131     
    132     direct_declarator
    133     	:   (	IDENTIFIER
    134     			{
    135     			if $declaration.length > 0 && $declaration::is_type_def
    136 						$Symbols::types.add($IDENTIFIER.text)
    137 					end
    138     			}
    139     		|	'(' declarator ')'
    140     		)
    141             declarator_suffix*
    142     	;
    143     
    144     declarator_suffix
    145     	:   /*'[' constant_expression ']'
    146         |*/   '[' ']'
    147         |   '(' ')'
    148     	;
    149     
    150     pointer
    151     	: '*' type_qualifier+ pointer?
    152     	| '*' pointer
    153     	| '*'
    154     	;
    155     
    156     IDENTIFIER
    157     	:	LETTER (LETTER|'0'..'9')*
    158     	;
    159     	
    160     fragment
    161     LETTER
    162     	:	'$'
    163     	|	'A'..'Z'
    164     	|	'a'..'z'
    165     	|	'_'
    166     	;
    167     
    168     CHARACTER_LITERAL
    169         :   '\'' ( EscapeSequence | ~('\''|'\\') ) '\''
    170         ;
    171     
    172     STRING_LITERAL
    173         :  '"' ( EscapeSequence | ~('\\'|'"') )* '"'
    174         ;
    175     
    176     HEX_LITERAL : '0' ('x'|'X') HexDigit+ IntegerTypeSuffix? ;
    177     
    178     DECIMAL_LITERAL : ('0' | '1'..'9' '0'..'9'*) IntegerTypeSuffix? ;
    179     
    180     OCTAL_LITERAL : '0' ('0'..'7')+ IntegerTypeSuffix? ;
    181     
    182     fragment
    183     HexDigit : ('0'..'9'|'a'..'f'|'A'..'F') ;
    184     
    185     fragment
    186     IntegerTypeSuffix
    187     	:	('u'|'U')? ('l'|'L')
    188     	|	('u'|'U')  ('l'|'L')?
    189     	;
    190     
    191     FLOATING_POINT_LITERAL
    192         :   ('0'..'9')+ '.' ('0'..'9')* Exponent? FloatTypeSuffix?
    193         |   '.' ('0'..'9')+ Exponent? FloatTypeSuffix?
    194         |   ('0'..'9')+ Exponent FloatTypeSuffix?
    195         |   ('0'..'9')+ Exponent? FloatTypeSuffix
    196     	;
    197     
    198     fragment
    199     Exponent : ('e'|'E') ('+'|'-')? ('0'..'9')+ ;
    200     
    201     fragment
    202     FloatTypeSuffix : ('f'|'F'|'d'|'D') ;
    203     
    204     fragment
    205     EscapeSequence
    206         :   '\\' ('b'|'t'|'n'|'f'|'r'|'\"'|'\''|'\\')
    207         |   OctalEscape
    208         ;
    209     
    210     fragment
    211     OctalEscape
    212         :   '\\' ('0'..'3') ('0'..'7') ('0'..'7')
    213         |   '\\' ('0'..'7') ('0'..'7')
    214         |   '\\' ('0'..'7')
    215         ;
    216     
    217     fragment
    218     UnicodeEscape
    219         :   '\\' 'u' HexDigit HexDigit HexDigit HexDigit
    220         ;
    221     
    222     WS  :  (' '|'\r'|'\t'|'\u000C'|'\n') {$channel=HIDDEN;}
    223         ;
    224     
    225     COMMENT
    226         :   '/*' ( options {greedy=false;} : . )* '*/' {$channel=HIDDEN;}
    227         ;
    228     
    229     LINE_COMMENT
    230         : '//' ~('\n'|'\r')* '\r'? '\n' {$channel=HIDDEN;}
    231         ;
    232     LINE_COMMAND 
    233         : '#' ~('\n'|'\r')* '\r'? '\n' {$channel=HIDDEN;}
    234         ;
    235   END
    236 
    237   example "grammar with backtracking and memoization" do
    238     lexer = Backtrack::Lexer.new( 'int a;' )
    239     parser = Backtrack::Parser.new lexer
    240     events = parser.translation_unit
    241   end
    242 
    243 end
    244