Home | History | Annotate | Download | only in parse
      1 /*
      2  * Copyright 2016 Google Inc. All Rights Reserved.
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *     http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 package com.google.turbine.parse;
     18 
     19 import static com.google.common.truth.Truth.assertThat;
     20 
     21 import com.google.common.escape.SourceCodeEscapers;
     22 import com.google.turbine.diag.SourceFile;
     23 import java.util.ArrayList;
     24 import java.util.List;
     25 import org.junit.Test;
     26 import org.junit.runner.RunWith;
     27 import org.junit.runners.JUnit4;
     28 
     29 @RunWith(JUnit4.class)
     30 public class LexerTest {
     31 
     32   @Test
     33   public void testSimple() {
     34     assertThat(lex("\nasd dsa\n")).containsExactly("IDENT(asd)", "IDENT(dsa)", "EOF");
     35   }
     36 
     37   @Test
     38   public void testOperator() {
     39     assertThat(lex("\nasd++asd\n")).containsExactly("IDENT(asd)", "INCR", "IDENT(asd)", "EOF");
     40   }
     41 
     42   @Test
     43   public void unterminated() {
     44     assertThat(lex("/* foo")).containsExactly("EOF");
     45     assertThat(lex("\" foo")).containsExactly("EOF");
     46   }
     47 
     48   @Test
     49   public void boolLiteral() {
     50     lexerComparisonTest("0b0101__01010");
     51     assertThat(lex("1 + 0b1000100101"))
     52         .containsExactly("INT_LITERAL(1)", "PLUS", "INT_LITERAL(0b1000100101)", "EOF");
     53   }
     54 
     55   @Test
     56   public void octalLiteral() {
     57     assertThat(lex("1 + 01234567"))
     58         .containsExactly("INT_LITERAL(1)", "PLUS", "INT_LITERAL(01234567)", "EOF");
     59   }
     60 
     61   @Test
     62   public void testLiteral() {
     63     assertThat(lex("0L")).containsExactly("LONG_LITERAL(0L)", "EOF");
     64     assertThat(lex("0")).containsExactly("INT_LITERAL(0)", "EOF");
     65     assertThat(lex("0x7fff_ffff")).containsExactly("INT_LITERAL(0x7fff_ffff)", "EOF");
     66     assertThat(lex("0177_7777_7777")).containsExactly("INT_LITERAL(0177_7777_7777)", "EOF");
     67     assertThat(lex("0b0111_1111_1111_1111_1111_1111_1111_1111"))
     68         .containsExactly("INT_LITERAL(0b0111_1111_1111_1111_1111_1111_1111_1111)", "EOF");
     69     assertThat(lex("0x8000_0000")).containsExactly("INT_LITERAL(0x8000_0000)", "EOF");
     70     assertThat(lex("0200_0000_0000")).containsExactly("INT_LITERAL(0200_0000_0000)", "EOF");
     71     assertThat(lex("0b1000_0000_0000_0000_0000_0000_0000_0000"))
     72         .containsExactly("INT_LITERAL(0b1000_0000_0000_0000_0000_0000_0000_0000)", "EOF");
     73     assertThat(lex("0xffff_ffff")).containsExactly("INT_LITERAL(0xffff_ffff)", "EOF");
     74     assertThat(lex("0377_7777_7777")).containsExactly("INT_LITERAL(0377_7777_7777)", "EOF");
     75     assertThat(lex("0b1111_1111_1111_1111_1111_1111_1111_1111"))
     76         .containsExactly("INT_LITERAL(0b1111_1111_1111_1111_1111_1111_1111_1111)", "EOF");
     77   }
     78 
     79   @Test
     80   public void testLong() {
     81     assertThat(lex("1l")).containsExactly("LONG_LITERAL(1l)", "EOF");
     82     assertThat(lex("9223372036854775807L"))
     83         .containsExactly("LONG_LITERAL(9223372036854775807L)", "EOF");
     84     assertThat(lex("-9223372036854775808L"))
     85         .containsExactly("MINUS", "LONG_LITERAL(9223372036854775808L)", "EOF");
     86     assertThat(lex("0x7fff_ffff_ffff_ffffL"))
     87         .containsExactly("LONG_LITERAL(0x7fff_ffff_ffff_ffffL)", "EOF");
     88     assertThat(lex("07_7777_7777_7777_7777_7777L"))
     89         .containsExactly("LONG_LITERAL(07_7777_7777_7777_7777_7777L)", "EOF");
     90     assertThat(
     91             lex(
     92                 "0b0111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111L"))
     93         .containsExactly(
     94             "LONG_LITERAL(0b0111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111L)",
     95             "EOF");
     96     assertThat(lex("0x8000_0000_0000_0000L"))
     97         .containsExactly("LONG_LITERAL(0x8000_0000_0000_0000L)", "EOF");
     98     assertThat(lex("010_0000_0000_0000_0000_0000L"))
     99         .containsExactly("LONG_LITERAL(010_0000_0000_0000_0000_0000L)", "EOF");
    100     assertThat(
    101             lex(
    102                 "0b1000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000L"))
    103         .containsExactly(
    104             "LONG_LITERAL(0b1000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000L)",
    105             "EOF");
    106     assertThat(lex("0xffff_ffff_ffff_ffffL"))
    107         .containsExactly("LONG_LITERAL(0xffff_ffff_ffff_ffffL)", "EOF");
    108     assertThat(lex("017_7777_7777_7777_7777_7777L"))
    109         .containsExactly("LONG_LITERAL(017_7777_7777_7777_7777_7777L)", "EOF");
    110     assertThat(
    111             lex(
    112                 "0b1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111L"))
    113         .containsExactly(
    114             "LONG_LITERAL(0b1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111L)",
    115             "EOF");
    116   }
    117 
    118   @Test
    119   public void testDoubleLiteral() {
    120     assertThat(lex("1D")).containsExactly("DOUBLE_LITERAL(1D)", "EOF");
    121     assertThat(lex("123d")).containsExactly("DOUBLE_LITERAL(123d)", "EOF");
    122     assertThat(lex("1.7976931348623157e308"))
    123         .containsExactly("DOUBLE_LITERAL(1.7976931348623157e308)", "EOF");
    124     assertThat(lex("4.9e-324")).containsExactly("DOUBLE_LITERAL(4.9e-324)", "EOF");
    125   }
    126 
    127   @Test
    128   public void testFloatLiteral() {
    129     assertThat(lex("1F")).containsExactly("FLOAT_LITERAL(1F)", "EOF");
    130     assertThat(lex("123f")).containsExactly("FLOAT_LITERAL(123f)", "EOF");
    131     assertThat(lex("3.4028235e38f")).containsExactly("FLOAT_LITERAL(3.4028235e38f)", "EOF");
    132     assertThat(lex("1.40e-45f")).containsExactly("FLOAT_LITERAL(1.40e-45f)", "EOF");
    133   }
    134 
    135   @Test
    136   public void testComment() {
    137     assertThat(lex("a//comment\nb //comment")).containsExactly("IDENT(a)", "IDENT(b)", "EOF");
    138     assertThat(lex("a/*comment*/\nb /*comment**/c/*asd*/"))
    139         .containsExactly("IDENT(a)", "IDENT(b)", "IDENT(c)", "EOF");
    140   }
    141 
    142   @Test
    143   public void testStringLiteral() {
    144     assertThat(lex("\"asd\" \"\\n\""))
    145         .containsExactly("STRING_LITERAL(asd)", "STRING_LITERAL(\\n)", "EOF");
    146   }
    147 
    148   @Test
    149   public void charLiteral() {
    150     assertThat(lex("'a' '\\t' '\\r'"))
    151         .containsExactly("CHAR_LITERAL(a)", "CHAR_LITERAL(\\t)", "CHAR_LITERAL(\\r)", "EOF");
    152   }
    153 
    154   @Test
    155   public void negativeInt() {
    156     assertThat(lex("(int)-1"))
    157         .containsExactly("LPAREN", "INT", "RPAREN", "MINUS", "INT_LITERAL(1)", "EOF");
    158   }
    159 
    160   @Test
    161   public void importStmt() {
    162     assertThat(lex("import com.google.Foo;"))
    163         .containsExactly(
    164             "IMPORT", "IDENT(com)", "DOT", "IDENT(google)", "DOT", "IDENT(Foo)", "SEMI", "EOF");
    165   }
    166 
    167   @Test
    168   public void annotation() {
    169     assertThat(lex("@GwtCompatible(serializable = true, emulated = true)"))
    170         .containsExactly(
    171             "AT",
    172             "IDENT(GwtCompatible)",
    173             "LPAREN",
    174             "IDENT(serializable)",
    175             "ASSIGN",
    176             "TRUE",
    177             "COMMA",
    178             "IDENT(emulated)",
    179             "ASSIGN",
    180             "TRUE",
    181             "RPAREN",
    182             "EOF");
    183   }
    184 
    185   @Test
    186   public void operators() {
    187     assertThat(
    188             lex(
    189                 "=   >   <   !   ~   ?   :   ->\n"
    190                     + "==  >=  <=  !=  &&  ||  ++  --\n"
    191                     + "+   -   *   /   &   |   ^   %   <<   >>   >>>\n"
    192                     + "+=  -=  *=  /=  &=  |=  ^=  %=  <<=  >>=  >>>="))
    193         .containsExactly(
    194             "ASSIGN", "GT", "LT", "NOT", "TILDE", "COND", "COLON", "ARROW", "EQ", "GTE", "LTE",
    195             "NOTEQ", "ANDAND", "OROR", "INCR", "DECR", "PLUS", "MINUS", "MULT", "DIV", "AND", "OR",
    196             "XOR", "MOD", "LTLT", "GTGT", "GTGTGT", "PLUSEQ", "MINUSEQ", "MULTEQ", "DIVEQ", "ANDEQ",
    197             "OREQ", "XOREQ", "MODEQ", "LTLTE", "GTGTE", "GTGTGTE", "EOF");
    198   }
    199 
    200   @Test
    201   public void keywords() {
    202     assertThat(
    203             lex(
    204                 "    abstract   continue   for          new         switch\n"
    205                     + "    assert     default    if           package     synchronized\n"
    206                     + "    boolean    do         goto         private     this\n"
    207                     + "    break      double     implements   protected   throw\n"
    208                     + "    byte       else       import       public      throws\n"
    209                     + "    case       enum       instanceof   return      transient\n"
    210                     + "    catch      extends    int          short       try\n"
    211                     + "    char       final      interface    static      void\n"
    212                     + "    class      finally    long         strictfp    volatile\n"
    213                     + "    const      float      native       super       while\n"
    214                     + "=   >   <   !   ~   ?   :   ->\n"))
    215         .containsExactly(
    216             "ABSTRACT",
    217             "CONTINUE",
    218             "FOR",
    219             "NEW",
    220             "SWITCH",
    221             "ASSERT",
    222             "DEFAULT",
    223             "IF",
    224             "PACKAGE",
    225             "SYNCHRONIZED",
    226             "BOOLEAN",
    227             "DO",
    228             "GOTO",
    229             "PRIVATE",
    230             "THIS",
    231             "BREAK",
    232             "DOUBLE",
    233             "IMPLEMENTS",
    234             "PROTECTED",
    235             "THROW",
    236             "BYTE",
    237             "ELSE",
    238             "IMPORT",
    239             "PUBLIC",
    240             "THROWS",
    241             "CASE",
    242             "ENUM",
    243             "INSTANCEOF",
    244             "RETURN",
    245             "TRANSIENT",
    246             "CATCH",
    247             "EXTENDS",
    248             "INT",
    249             "SHORT",
    250             "TRY",
    251             "CHAR",
    252             "FINAL",
    253             "INTERFACE",
    254             "STATIC",
    255             "VOID",
    256             "CLASS",
    257             "FINALLY",
    258             "LONG",
    259             "STRICTFP",
    260             "VOLATILE",
    261             "CONST",
    262             "FLOAT",
    263             "NATIVE",
    264             "SUPER",
    265             "WHILE",
    266             "ASSIGN",
    267             "GT",
    268             "LT",
    269             "NOT",
    270             "TILDE",
    271             "COND",
    272             "COLON",
    273             "ARROW",
    274             "EOF");
    275   }
    276 
    277   @Test
    278   public void hexFloat() {
    279     lexerComparisonTest("0x1.0p31");
    280     lexerComparisonTest("0x1p31");
    281   }
    282 
    283   @Test
    284   public void zeroFloat() {
    285     lexerComparisonTest("0f");
    286   }
    287 
    288   @Test
    289   public void escape() {
    290     lexerComparisonTest("'\\b'");
    291     lexerComparisonTest("'\\0'");
    292     lexerComparisonTest("'\\01'");
    293     lexerComparisonTest("'\\001'");
    294   }
    295 
    296   @Test
    297   public void floatLiteral() {
    298     lexerComparisonTest(".123321f");
    299     lexerComparisonTest(".123321F");
    300     lexerComparisonTest(".123321d");
    301     lexerComparisonTest(".123321D");
    302     lexerComparisonTest("0.0e+1f");
    303     lexerComparisonTest("0.0e-1f");
    304     lexerComparisonTest(".123321");
    305   }
    306 
    307   @Test
    308   public void digitsUnderscore() {
    309     lexerComparisonTest("123__123______3");
    310   }
    311 
    312   @Test
    313   public void moreOperators() {
    314     lexerComparisonTest("* / %");
    315   }
    316 
    317   @Test
    318   public void unusualKeywords() {
    319     lexerComparisonTest("const goto assert");
    320   }
    321 
    322   @Test
    323   public void specialCharLiteral() {
    324     lexerComparisonTest("'\\013'");
    325   }
    326 
    327   @Test
    328   public void stringEscape() {
    329     lexerComparisonTest("\"asd\\\"dsa\"");
    330   }
    331 
    332   @Test
    333   public void blockCommentEndingSlash() {
    334     lexerComparisonTest("foo /*/*/ bar");
    335   }
    336 
    337   private void lexerComparisonTest(String s) {
    338     assertThat(lex(s)).containsExactlyElementsIn(JavacLexer.javacLex(s));
    339   }
    340 
    341   public static List<String> lex(String input) {
    342     Lexer lexer = new StreamLexer(new UnicodeEscapePreprocessor(new SourceFile(null, input)));
    343     List<String> tokens = new ArrayList<>();
    344     Token token;
    345     do {
    346       token = lexer.next();
    347       String tokenString;
    348       switch (token) {
    349         case IDENT:
    350         case INT_LITERAL:
    351         case LONG_LITERAL:
    352         case FLOAT_LITERAL:
    353         case DOUBLE_LITERAL:
    354           tokenString = String.format("%s(%s)", token.name(), lexer.stringValue());
    355           break;
    356         case CHAR_LITERAL:
    357         case STRING_LITERAL:
    358           tokenString =
    359               String.format(
    360                   "%s(%s)",
    361                   token.name(), SourceCodeEscapers.javaCharEscaper().escape(lexer.stringValue()));
    362           break;
    363         default:
    364           tokenString = token.name();
    365           break;
    366       }
    367       tokens.add(tokenString);
    368     } while (token != Token.EOF);
    369     return tokens;
    370   }
    371 }
    372