Home | History | Annotate | Download | only in lexer
      1 #!/usr/bin/ruby
      2 # encoding: utf-8
      3 require 'antlr3/test/functional'
      4 
      5 class LexerTest001 < ANTLR3::Test::Functional
      6   inline_grammar( <<-'END' )
      7     lexer grammar Zero;
      8     options {
      9       language = Ruby;
     10     }
     11     
     12     @members { include ANTLR3::Test::RaiseErrors }
     13     
     14     ZERO: '0';
     15   END
     16   
     17   example %(lexing '0') do
     18     lexer = Zero::Lexer.new( '0' )
     19     
     20     token = lexer.next_token
     21     token.name.should == 'ZERO'
     22     
     23     token = lexer.next_token
     24     token.name.should == '<EOF>'
     25   end
     26   
     27   example %(iterating over tokens) do
     28     lexer = Zero::Lexer.new( '0' )
     29     
     30     token_types = lexer.map { |token| token.name }
     31     token_types.should == %w(ZERO)
     32   end
     33 
     34   example "mismatched token" do
     35     lexer = Zero::Lexer.new( '1' )
     36     
     37     proc { 
     38       token = lexer.next_token
     39     }.should raise_error( ANTLR3::Error::MismatchedToken ) do |e|
     40       e.expecting.should == '0'
     41       e.unexpected_type.should == '1'
     42     end
     43   end
     44 end
     45 
     46 class LexerTest002 < ANTLR3::Test::Functional
     47   inline_grammar( <<-'END' )
     48     lexer grammar Binary;
     49     options {
     50       language = Ruby;
     51     }
     52     
     53     @members { include ANTLR3::Test::RaiseErrors }
     54     
     55     ZERO: '0';
     56     ONE: '1';
     57   END
     58   
     59   example "lexing '01'" do
     60     lexer = Binary::Lexer.new( '01' )
     61     
     62     token = lexer.next_token
     63     token.name.should == 'ZERO'
     64     
     65     token = lexer.next_token
     66     token.name.should == 'ONE'
     67     
     68     token = lexer.next_token
     69     token.name.should == '<EOF>'
     70   end
     71   
     72   example "no matching token rule" do
     73     lexer = Binary::Lexer.new( '2' )
     74     
     75     b = lambda { token = lexer.next_token }
     76     b.should raise_error( ANTLR3::Error::NoViableAlternative ) do |exc|
     77       exc.unexpected_type.should == '2'
     78     end
     79   end
     80   
     81 end
     82 
     83 class LexerTest003 < ANTLR3::Test::Functional
     84   inline_grammar( <<-'END' )
     85     lexer grammar BinaryFooze;
     86     options {
     87       language = Ruby;
     88     }
     89     
     90     @members { include ANTLR3::Test::RaiseErrors }
     91     
     92     ZERO: '0';
     93     ONE:  '1';
     94     FOOZE: 'fooze';
     95   END
     96   
     97   example "lexing '0fooze1'" do
     98     lexer = BinaryFooze::Lexer.new( '0fooze1' )
     99     
    100     token = lexer.next_token
    101     token.name.should == 'ZERO'
    102     
    103     token = lexer.next_token
    104     token.name.should == 'FOOZE'
    105     
    106     token = lexer.next_token
    107     token.name.should == 'ONE'
    108     
    109     token = lexer.next_token
    110     token.name.should == '<EOF>'
    111   end
    112   
    113   example "no token match" do
    114     lexer = BinaryFooze::Lexer.new( '2' )
    115     
    116     proc { lexer.next_token }.
    117     should raise_error( ANTLR3::Error::NoViableAlternative ) do |exc|
    118       exc.unexpected_type.should == '2'
    119     end
    120   end
    121 end
    122 
    123 
    124 class LexerTest004 < ANTLR3::Test::Functional
    125   inline_grammar( <<-'END' )
    126     lexer grammar FooStar;
    127     options {
    128       language = Ruby;
    129     }
    130     
    131     @members { include ANTLR3::Test::RaiseErrors }
    132     
    133     FOO: 'f' 'o'*;
    134   END
    135 
    136   example "lexing 'ffofoofooo'" do
    137     lexer = FooStar::Lexer.new( 'ffofoofooo' )
    138     
    139     token = lexer.next_token
    140     token.name.should == 'FOO'
    141     token.start.should == 0
    142     token.stop.should == 0
    143     token.text.should == 'f'
    144     
    145     token = lexer.next_token
    146     token.name.should == 'FOO'
    147     token.text.should == 'fo'
    148     token.start.should == 1
    149     token.stop.should == 2
    150     
    151     token = lexer.next_token
    152     token.name.should == 'FOO'
    153     token.start.should == 3
    154     token.stop.should == 5
    155     token.text.should == 'foo'
    156     
    157     token = lexer.next_token
    158     token.name.should == 'FOO'
    159     token.start.should == 6
    160     token.stop.should == 9
    161     token.text.should == 'fooo'
    162     
    163     token = lexer.next_token
    164     token.name.should == '<EOF>'
    165   end
    166   
    167   example "mismatched token" do
    168     lexer = FooStar::Lexer.new( '2' )
    169     
    170     proc { lexer.next_token }.
    171     should raise_error( ANTLR3::Error::MismatchedToken ) do |exc|
    172       exc.expecting.should == 'f'
    173       exc.unexpected_type.should == '2'
    174     end
    175   end
    176 end
    177 
    178 class LexerTest005 < ANTLR3::Test::Functional
    179   inline_grammar( <<-'END' )
    180     lexer grammar FooPlus;
    181     options {
    182       language = Ruby;
    183     }
    184     
    185     @members { include ANTLR3::Test::RaiseErrors }
    186     
    187     FOO: 'f' 'o'+;
    188   END
    189   
    190   example "lexing 'fofoofooo'" do
    191     lexer = FooPlus::Lexer.new( 'fofoofooo' )
    192     
    193     token = lexer.next_token
    194     token.name.should == 'FOO'
    195     token.start.should == 0
    196     token.stop.should == 1
    197     token.text.should == 'fo'
    198     
    199     token = lexer.next_token
    200     token.name.should == 'FOO'
    201     token.text.should == 'foo'
    202     token.start.should == 2
    203     token.stop.should == 4
    204     
    205     token = lexer.next_token
    206     token.name.should == 'FOO'
    207     token.start.should == 5
    208     token.stop.should == 8
    209     token.text.should == 'fooo'
    210     
    211     token = lexer.next_token
    212     token.name.should == '<EOF>'
    213   end
    214   
    215   example "mismatched token" do
    216     lexer = FooPlus::Lexer.new( '2' )
    217     
    218     proc { lexer.next_token }.
    219     should raise_error( ANTLR3::Error::MismatchedToken ) do |exc|
    220       exc.expecting.should == 'f'
    221       exc.unexpected_type.should == '2'
    222     end
    223   end
    224   
    225   example "early exit" do
    226     lexer = FooPlus::Lexer.new( 'f' )
    227     
    228     proc { token = lexer.next_token }.
    229     should raise_error( ANTLR3::Error::EarlyExit ) { |exc|
    230       exc.unexpected_type.should == ANTLR3::Constants::EOF
    231     }
    232   end
    233   
    234 end
    235 
    236 class LexerTest006 < ANTLR3::Test::Functional
    237   inline_grammar( <<-'END' )
    238     lexer grammar FoaStar;
    239     options {
    240       language = Ruby;
    241     }
    242     
    243     @members { include ANTLR3::Test::RaiseErrors }
    244     
    245     FOO: 'f' ('o' | 'a')*;
    246   END
    247   
    248   example "lexing 'fofaaooa'" do
    249     lexer = FoaStar::Lexer.new( 'fofaaooa' )
    250     
    251     token = lexer.next_token
    252     token.name.should == 'FOO'
    253     token.start.should == 0
    254     token.stop.should == 1
    255     token.text.should == 'fo'
    256     
    257     token = lexer.next_token
    258     token.name.should == 'FOO'
    259     token.text.should == 'faaooa'
    260     token.start.should == 2
    261     token.stop.should == 7
    262     
    263     token = lexer.next_token
    264     token.name.should == '<EOF>'
    265   end
    266   
    267   example "mismatched token" do
    268     lexer = FoaStar::Lexer.new( 'fofoaooaoa2' )
    269     
    270     lexer.next_token
    271     lexer.next_token
    272     proc { lexer.next_token }.
    273     should raise_error( ANTLR3::Error::MismatchedToken ) do |exc|
    274       exc.expecting.should == 'f'
    275       exc.unexpected_type.should == '2'
    276       exc.column.should == 10
    277       exc.line.should == 1
    278     end
    279   end
    280 end
    281 
    282 class LexerTest007 < ANTLR3::Test::Functional
    283   inline_grammar( <<-'END' )
    284     lexer grammar Foab;
    285     options {
    286       language = Ruby;
    287     }
    288     
    289     @members { include ANTLR3::Test::RaiseErrors }
    290     
    291     FOO: 'f' ('o' | 'a' 'b'+)*;
    292   END
    293   
    294   example "lexing 'fofababbooabb'" do
    295     lexer = Foab::Lexer.new( 'fofababbooabb' )
    296     
    297     token = lexer.next_token
    298     token.name.should == 'FOO'
    299     token.start.should == 0
    300     token.stop.should == 1
    301     token.text.should == 'fo'
    302     
    303     token = lexer.next_token
    304     token.name.should == 'FOO'
    305     token.start.should == 2
    306     token.stop.should == 12
    307     token.text.should == 'fababbooabb'
    308     
    309     token = lexer.next_token
    310     token.name.should == '<EOF>'
    311   end
    312   
    313   example "early exit" do
    314     lexer = Foab::Lexer.new( 'foaboao' )
    315     
    316     proc { lexer.next_token }.
    317     should raise_error( ANTLR3::Error::EarlyExit ) do |exc|
    318       exc.unexpected_type.should == 'o'
    319       exc.column.should == 6
    320       exc.line.should == 1
    321     end
    322   end
    323 end
    324 
    325 class LexerTest008 < ANTLR3::Test::Functional
    326   inline_grammar( <<-'END' )
    327     lexer grammar Fa;
    328     options {
    329       language = Ruby;
    330     }
    331     
    332     @members { include ANTLR3::Test::RaiseErrors }
    333     
    334     FOO: 'f' 'a'?;
    335   END
    336   
    337   example "lexing 'ffaf'" do
    338     lexer = Fa::Lexer.new( 'ffaf' )
    339     
    340     token = lexer.next_token
    341     token.name.should == 'FOO'
    342     token.start.should == 0
    343     token.stop.should == 0
    344     token.text.should == 'f'
    345     
    346     token = lexer.next_token
    347     token.name.should == 'FOO'
    348     token.start.should == 1
    349     token.stop.should == 2
    350     token.text.should == 'fa'
    351     
    352     token = lexer.next_token
    353     token.name.should == 'FOO'
    354     token.start.should == 3
    355     token.stop.should == 3
    356     token.text.should == 'f'
    357     
    358     token = lexer.next_token
    359     token.name.should == '<EOF>'
    360   end
    361   
    362   example "mismatched token" do
    363     lexer = Fa::Lexer.new( 'fafb' )
    364     
    365     lexer.next_token
    366     lexer.next_token
    367     proc { lexer.next_token }.
    368     should raise_error( ANTLR3::Error::MismatchedToken ) do |exc|
    369       exc.unexpected_type.should == 'b'
    370       exc.column.should == 3
    371       exc.line.should == 1
    372     end
    373   end
    374 end
    375 
    376 
    377 class LexerTest009 < ANTLR3::Test::Functional
    378   inline_grammar( <<-'END' )
    379     lexer grammar Digit;
    380     options {
    381       language = Ruby;
    382     }
    383     
    384     @members { include ANTLR3::Test::RaiseErrors }
    385     
    386     DIGIT: '0' .. '9';
    387   END
    388   
    389   example "lexing '085'" do
    390     lexer = Digit::Lexer.new( '085' )
    391     
    392     token = lexer.next_token
    393     token.name.should == 'DIGIT'
    394     token.start.should == 0
    395     token.stop.should == 0
    396     token.text.should == '0'
    397     
    398     token = lexer.next_token
    399     token.name.should == 'DIGIT'
    400     token.start.should == 1
    401     token.stop.should == 1
    402     token.text.should == '8'
    403     
    404     token = lexer.next_token
    405     token.name.should == 'DIGIT'
    406     token.start.should == 2
    407     token.stop.should == 2
    408     token.text.should == '5'
    409     
    410     token = lexer.next_token
    411     token.name.should == '<EOF>'
    412   end
    413   
    414   example "mismatched range" do
    415     lexer = Digit::Lexer.new( '2a' )
    416     
    417     lexer.next_token
    418     proc { lexer.next_token }.
    419     should raise_error( ANTLR3::Error::MismatchedRange ) do |exc|
    420       exc.min.should == '0'
    421       exc.max.should == '9'
    422       exc.unexpected_type.should == 'a'
    423       exc.column.should == 1
    424       exc.line.should == 1
    425     end
    426   end
    427 end
    428 
    429 class LexerTest010 < ANTLR3::Test::Functional
    430   inline_grammar( <<-'END' )
    431     lexer grammar IDsAndSpaces;
    432     options {
    433       language = Ruby;
    434     }
    435         
    436     @members { include ANTLR3::Test::RaiseErrors }
    437     
    438     IDENTIFIER: ('a'..'z'|'A'..'Z'|'_') ('a'..'z'|'A'..'Z'|'0'..'9'|'_')*;
    439     WS: (' ' | '\n')+;
    440   END
    441 
    442   example "lexing 'foobar _Ab98 \n A12sdf'" do
    443     lexer = IDsAndSpaces::Lexer.new( "foobar _Ab98 \n A12sdf" )
    444     
    445     token = lexer.next_token
    446     token.name.should == 'IDENTIFIER'
    447     token.start.should == 0
    448     token.stop.should == 5
    449     token.text.should == 'foobar'
    450     
    451     token = lexer.next_token
    452     token.name.should == 'WS'
    453     token.start.should == 6
    454     token.stop.should == 6
    455     token.text.should == ' '
    456     
    457     token = lexer.next_token
    458     token.name.should == 'IDENTIFIER'
    459     token.start.should == 7
    460     token.stop.should == 11
    461     token.text.should == '_Ab98'
    462     
    463     token = lexer.next_token
    464     token.name.should == 'WS'
    465     token.start.should == 12
    466     token.stop.should == 14
    467     token.text.should == " \n "
    468     
    469     token = lexer.next_token
    470     token.name.should == 'IDENTIFIER'
    471     token.start.should == 15
    472     token.stop.should == 20
    473     token.text.should == 'A12sdf'
    474     
    475     token = lexer.next_token
    476     token.name.should == '<EOF>'
    477   end
    478   
    479   example "contains characters without a matching token rule" do
    480     lexer = IDsAndSpaces::Lexer.new( 'a-b' )
    481     
    482     lexer.next_token
    483     proc { lexer.next_token }.
    484     should raise_error( ANTLR3::Error::NoViableAlternative ) do |exc|
    485       exc.unexpected_type.should == '-'
    486       exc.column.should == 1
    487       exc.line.should == 1
    488     end
    489   end
    490 end
    491 
    492 class LexerTest011 < ANTLR3::Test::Functional
    493   inline_grammar( <<-'END' )
    494     lexer grammar IDsWithAction;
    495     options {language = Ruby;}
    496         
    497     @members { include ANTLR3::Test::RaiseErrors }
    498     
    499     IDENTIFIER: 
    500             ('a'..'z'|'A'..'Z'|'_') 
    501             ('a'..'z'
    502             |'A'..'Z'
    503             |'0'..'9'
    504             |'_' { \$action_var = '_' }
    505             )*
    506         ;
    507     
    508     WS: (' ' | '\n')+;
    509   END
    510   
    511   example "lexing 'foobar _Ab98 \n A12sdf'" do
    512     lexer = IDsWithAction::Lexer.new( "foobar _Ab98 \n A12sdf" )
    513     
    514     token = lexer.next_token
    515     token.name.should == 'IDENTIFIER'
    516     token.start.should == 0
    517     token.stop.should == 5
    518     token.text.should == 'foobar'
    519     
    520     token = lexer.next_token
    521     token.name.should == 'WS'
    522     token.start.should == 6
    523     token.stop.should == 6
    524     token.text.should == ' '
    525     
    526     token = lexer.next_token
    527     token.name.should == 'IDENTIFIER'
    528     token.start.should == 7
    529     token.stop.should == 11
    530     token.text.should == '_Ab98'
    531     
    532     token = lexer.next_token
    533     token.name.should == 'WS'
    534     token.start.should == 12
    535     token.stop.should == 14
    536     token.text.should == " \n "
    537     
    538     token = lexer.next_token
    539     token.name.should == 'IDENTIFIER'
    540     token.start.should == 15
    541     token.stop.should == 20
    542     token.text.should == 'A12sdf'
    543     
    544     token = lexer.next_token
    545     token.name.should == '<EOF>'
    546   end
    547   
    548   example "contains characters without a matching token" do
    549     lexer = IDsWithAction::Lexer.new( 'a-b' )
    550     
    551     lexer.next_token
    552     proc { lexer.next_token }.
    553     should raise_error( ANTLR3::Error::NoViableAlternative ) do |exc|
    554       exc.unexpected_type.should == '-'
    555       exc.column.should == 1
    556       exc.line.should == 1
    557     end
    558   end
    559 end
    560