Home | History | Annotate | Download | only in lexer
      1 #!/usr/bin/ruby
      2 # encoding: utf-8
      3 
      4 require 'antlr3/test/functional'
      5 
      6 class TestFilterMode < ANTLR3::Test::Functional
      7 
      8   inline_grammar( <<-'END' )
      9     lexer grammar Filter;
     10     options {
     11         language = Ruby;
     12         filter=true;
     13     }
     14     
     15     IMPORT
     16       :  'import' WS QIDStar WS? ';'
     17       ;
     18       
     19     RETURN
     20       :  'return' .* ';'
     21       ;
     22     
     23     CLASS
     24       :  'class' WS ID WS? ('extends' WS QID WS?)?
     25         ('implements' WS QID WS? (',' WS? QID WS?)*)? '{'
     26       ;
     27       
     28     COMMENT
     29         :   '/*' .* '*/'
     30         ;
     31     
     32     STRING
     33         :  '"' (options {greedy=false;}: ESC | .)* '"'
     34       ;
     35     
     36     CHAR
     37       :  '\'' (options {greedy=false;}: ESC | .)* '\''
     38       ;
     39     
     40     WS  :   (' '|'\t'|'\n')+
     41         ;
     42     
     43     fragment
     44     QID :  ID ('.' ID)*
     45       ;
     46       
     47     /** QID cannot see beyond end of token so using QID '.*'? somewhere won't
     48      *  ever match since k=1 look in the QID loop of '.' will make it loop.
     49      *  I made this rule to compensate.
     50      */
     51     fragment
     52     QIDStar
     53       :  ID ('.' ID)* '.*'?
     54       ;
     55     
     56     fragment
     57     TYPE:   QID '[]'?
     58         ;
     59         
     60     fragment
     61     ARG :   TYPE WS ID
     62         ;
     63     
     64     fragment
     65     ID  :   ('a'..'z'|'A'..'Z'|'_') ('a'..'z'|'A'..'Z'|'_'|'0'..'9')*
     66         ;
     67     
     68     fragment
     69     ESC  :  '\\' ('"'|'\''|'\\')
     70       ;
     71   END
     72 
     73   example "skipping tokens that aren't important with filter mode" do
     74     input = <<-END.fixed_indent( 0 )
     75       import org.antlr.runtime.*;
     76       
     77       public class Main {
     78         public static void main(String[] args) throws Exception {
     79             for (int i=0; i<args.length; i++) {
     80           CharStream input = new ANTLRFileStream(args[i]);
     81           FuzzyJava lex = new FuzzyJava(input);
     82           TokenStream tokens = new CommonTokenStream(lex);
     83           tokens.toString();
     84           //System.out.println(tokens);
     85             }
     86         }
     87       }
     88     END
     89     
     90     lexer = Filter::Lexer.new( input )
     91     tokens = lexer.map { |tk| tk }
     92   end
     93   
     94 
     95 end
     96 
     97 
     98 class TestFuzzy < ANTLR3::Test::Functional
     99 
    100   inline_grammar( <<-'END' )
    101     lexer grammar Fuzzy;
    102     options {
    103         language = Ruby;
    104         filter=true;
    105     }
    106     
    107     @members {
    108       include ANTLR3::Test::CaptureOutput
    109     }
    110     
    111     IMPORT
    112       :  'import' WS name=QIDStar WS? ';'
    113       ;
    114       
    115     /** Avoids having "return foo;" match as a field */
    116     RETURN
    117       :  'return' (options {greedy=false;}:.)* ';'
    118       ;
    119     
    120     CLASS
    121       :  'class' WS name=ID WS? ('extends' WS QID WS?)?
    122         ('implements' WS QID WS? (',' WS? QID WS?)*)? '{'
    123         {  
    124           say("found class " << $name.text)  
    125         }
    126       ;
    127       
    128     METHOD
    129         :   TYPE WS name=ID WS? '(' ( ARG WS? (',' WS? ARG WS?)* )? ')' WS? 
    130            ('throws' WS QID WS? (',' WS? QID WS?)*)? '{'
    131             {
    132               say("found method " << $name.text)
    133             }
    134         ;
    135     
    136     FIELD
    137         :   TYPE WS name=ID '[]'? WS? (';'|'=')
    138             {
    139               say("found var " << $name.text)
    140             }
    141         ;
    142     
    143     STAT:  ('if'|'while'|'switch'|'for') WS? '(' ;
    144       
    145     CALL
    146         :   name=QID WS? '('
    147             {
    148               say("found call " << $name.text)
    149             }
    150         ;
    151     
    152     COMMENT
    153         :   '/*' (options {greedy=false;} : . )* '*/'
    154             {
    155               say("found comment " << self.text)
    156             }
    157         ;
    158     
    159     SL_COMMENT
    160         :   '//' (options {greedy=false;} : . )* '\n'
    161             {
    162               say("found // comment " << self.text)
    163             }
    164         ;
    165       
    166     STRING
    167       :  '"' (options {greedy=false;}: ESC | .)* '"'
    168       ;
    169     
    170     CHAR
    171       :  '\'' (options {greedy=false;}: ESC | .)* '\''
    172       ;
    173     
    174     WS  :   (' '|'\t'|'\n')+
    175         ;
    176     
    177     fragment
    178     QID :  ID ('.' ID)*
    179       ;
    180       
    181     /** QID cannot see beyond end of token so using QID '.*'? somewhere won't
    182      *  ever match since k=1 look in the QID loop of '.' will make it loop.
    183      *  I made this rule to compensate.
    184      */
    185     fragment
    186     QIDStar
    187       :  ID ('.' ID)* '.*'?
    188       ;
    189     
    190     fragment
    191     TYPE:   QID '[]'?
    192         ;
    193         
    194     fragment
    195     ARG :   TYPE WS ID
    196         ;
    197     
    198     fragment
    199     ID  :   ('a'..'z'|'A'..'Z'|'_') ('a'..'z'|'A'..'Z'|'_'|'0'..'9')*
    200         ;
    201     
    202     fragment
    203     ESC  :  '\\' ('"'|'\''|'\\')
    204       ;
    205   END
    206   
    207   example "fuzzy lexing with the filter mode option" do
    208     input = <<-END.fixed_indent( 0 )
    209       import org.antlr.runtime.*;
    210       
    211       public class Main {
    212         public static void main(String[] args) throws Exception {
    213             for (int i=0; i<args.length; i++) {
    214           CharStream input = new ANTLRFileStream(args[i]);
    215           FuzzyJava lex = new FuzzyJava(input);
    216           TokenStream tokens = new CommonTokenStream(lex);
    217           tokens.toString();
    218           //System.out.println(tokens);
    219             }
    220         }
    221       }
    222     END
    223     
    224     expected_output = <<-END.fixed_indent( 0 )
    225       found class Main
    226       found method main
    227       found var i
    228       found var input
    229       found call ANTLRFileStream
    230       found var lex
    231       found call FuzzyJava
    232       found var tokens
    233       found call CommonTokenStream
    234       found call tokens.toString
    235       found // comment //System.out.println(tokens);
    236     END
    237     
    238     lexer = Fuzzy::Lexer.new( input )
    239     lexer.each { |tk| tk }
    240     lexer.output.should == expected_output
    241   end
    242 
    243 
    244 end
    245