1 /* 2 * Copyright 2016 Google Inc. All Rights Reserved. 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package com.google.turbine.parse; 18 19 import static com.google.common.truth.Truth.assertThat; 20 21 import com.google.common.escape.SourceCodeEscapers; 22 import com.google.turbine.diag.SourceFile; 23 import java.util.ArrayList; 24 import java.util.List; 25 import org.junit.Test; 26 import org.junit.runner.RunWith; 27 import org.junit.runners.JUnit4; 28 29 @RunWith(JUnit4.class) 30 public class LexerTest { 31 32 @Test 33 public void testSimple() { 34 assertThat(lex("\nasd dsa\n")).containsExactly("IDENT(asd)", "IDENT(dsa)", "EOF"); 35 } 36 37 @Test 38 public void testOperator() { 39 assertThat(lex("\nasd++asd\n")).containsExactly("IDENT(asd)", "INCR", "IDENT(asd)", "EOF"); 40 } 41 42 @Test 43 public void unterminated() { 44 assertThat(lex("/* foo")).containsExactly("EOF"); 45 assertThat(lex("\" foo")).containsExactly("EOF"); 46 } 47 48 @Test 49 public void boolLiteral() { 50 lexerComparisonTest("0b0101__01010"); 51 assertThat(lex("1 + 0b1000100101")) 52 .containsExactly("INT_LITERAL(1)", "PLUS", "INT_LITERAL(0b1000100101)", "EOF"); 53 } 54 55 @Test 56 public void octalLiteral() { 57 assertThat(lex("1 + 01234567")) 58 .containsExactly("INT_LITERAL(1)", "PLUS", "INT_LITERAL(01234567)", "EOF"); 59 } 60 61 @Test 62 public void testLiteral() { 63 assertThat(lex("0L")).containsExactly("LONG_LITERAL(0L)", "EOF"); 64 assertThat(lex("0")).containsExactly("INT_LITERAL(0)", "EOF"); 65 assertThat(lex("0x7fff_ffff")).containsExactly("INT_LITERAL(0x7fff_ffff)", "EOF"); 66 assertThat(lex("0177_7777_7777")).containsExactly("INT_LITERAL(0177_7777_7777)", "EOF"); 67 assertThat(lex("0b0111_1111_1111_1111_1111_1111_1111_1111")) 68 .containsExactly("INT_LITERAL(0b0111_1111_1111_1111_1111_1111_1111_1111)", "EOF"); 69 assertThat(lex("0x8000_0000")).containsExactly("INT_LITERAL(0x8000_0000)", "EOF"); 70 assertThat(lex("0200_0000_0000")).containsExactly("INT_LITERAL(0200_0000_0000)", "EOF"); 71 assertThat(lex("0b1000_0000_0000_0000_0000_0000_0000_0000")) 72 .containsExactly("INT_LITERAL(0b1000_0000_0000_0000_0000_0000_0000_0000)", "EOF"); 73 assertThat(lex("0xffff_ffff")).containsExactly("INT_LITERAL(0xffff_ffff)", "EOF"); 74 assertThat(lex("0377_7777_7777")).containsExactly("INT_LITERAL(0377_7777_7777)", "EOF"); 75 assertThat(lex("0b1111_1111_1111_1111_1111_1111_1111_1111")) 76 .containsExactly("INT_LITERAL(0b1111_1111_1111_1111_1111_1111_1111_1111)", "EOF"); 77 } 78 79 @Test 80 public void testLong() { 81 assertThat(lex("1l")).containsExactly("LONG_LITERAL(1l)", "EOF"); 82 assertThat(lex("9223372036854775807L")) 83 .containsExactly("LONG_LITERAL(9223372036854775807L)", "EOF"); 84 assertThat(lex("-9223372036854775808L")) 85 .containsExactly("MINUS", "LONG_LITERAL(9223372036854775808L)", "EOF"); 86 assertThat(lex("0x7fff_ffff_ffff_ffffL")) 87 .containsExactly("LONG_LITERAL(0x7fff_ffff_ffff_ffffL)", "EOF"); 88 assertThat(lex("07_7777_7777_7777_7777_7777L")) 89 .containsExactly("LONG_LITERAL(07_7777_7777_7777_7777_7777L)", "EOF"); 90 assertThat( 91 lex( 92 "0b0111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111L")) 93 .containsExactly( 94 "LONG_LITERAL(0b0111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111L)", 95 "EOF"); 96 assertThat(lex("0x8000_0000_0000_0000L")) 97 .containsExactly("LONG_LITERAL(0x8000_0000_0000_0000L)", "EOF"); 98 assertThat(lex("010_0000_0000_0000_0000_0000L")) 99 .containsExactly("LONG_LITERAL(010_0000_0000_0000_0000_0000L)", "EOF"); 100 assertThat( 101 lex( 102 "0b1000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000L")) 103 .containsExactly( 104 "LONG_LITERAL(0b1000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000L)", 105 "EOF"); 106 assertThat(lex("0xffff_ffff_ffff_ffffL")) 107 .containsExactly("LONG_LITERAL(0xffff_ffff_ffff_ffffL)", "EOF"); 108 assertThat(lex("017_7777_7777_7777_7777_7777L")) 109 .containsExactly("LONG_LITERAL(017_7777_7777_7777_7777_7777L)", "EOF"); 110 assertThat( 111 lex( 112 "0b1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111L")) 113 .containsExactly( 114 "LONG_LITERAL(0b1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111L)", 115 "EOF"); 116 } 117 118 @Test 119 public void testDoubleLiteral() { 120 assertThat(lex("1D")).containsExactly("DOUBLE_LITERAL(1D)", "EOF"); 121 assertThat(lex("123d")).containsExactly("DOUBLE_LITERAL(123d)", "EOF"); 122 assertThat(lex("1.7976931348623157e308")) 123 .containsExactly("DOUBLE_LITERAL(1.7976931348623157e308)", "EOF"); 124 assertThat(lex("4.9e-324")).containsExactly("DOUBLE_LITERAL(4.9e-324)", "EOF"); 125 } 126 127 @Test 128 public void testFloatLiteral() { 129 assertThat(lex("1F")).containsExactly("FLOAT_LITERAL(1F)", "EOF"); 130 assertThat(lex("123f")).containsExactly("FLOAT_LITERAL(123f)", "EOF"); 131 assertThat(lex("3.4028235e38f")).containsExactly("FLOAT_LITERAL(3.4028235e38f)", "EOF"); 132 assertThat(lex("1.40e-45f")).containsExactly("FLOAT_LITERAL(1.40e-45f)", "EOF"); 133 } 134 135 @Test 136 public void testComment() { 137 assertThat(lex("a//comment\nb //comment")).containsExactly("IDENT(a)", "IDENT(b)", "EOF"); 138 assertThat(lex("a/*comment*/\nb /*comment**/c/*asd*/")) 139 .containsExactly("IDENT(a)", "IDENT(b)", "IDENT(c)", "EOF"); 140 } 141 142 @Test 143 public void testStringLiteral() { 144 assertThat(lex("\"asd\" \"\\n\"")) 145 .containsExactly("STRING_LITERAL(asd)", "STRING_LITERAL(\\n)", "EOF"); 146 } 147 148 @Test 149 public void charLiteral() { 150 assertThat(lex("'a' '\\t' '\\r'")) 151 .containsExactly("CHAR_LITERAL(a)", "CHAR_LITERAL(\\t)", "CHAR_LITERAL(\\r)", "EOF"); 152 } 153 154 @Test 155 public void negativeInt() { 156 assertThat(lex("(int)-1")) 157 .containsExactly("LPAREN", "INT", "RPAREN", "MINUS", "INT_LITERAL(1)", "EOF"); 158 } 159 160 @Test 161 public void importStmt() { 162 assertThat(lex("import com.google.Foo;")) 163 .containsExactly( 164 "IMPORT", "IDENT(com)", "DOT", "IDENT(google)", "DOT", "IDENT(Foo)", "SEMI", "EOF"); 165 } 166 167 @Test 168 public void annotation() { 169 assertThat(lex("@GwtCompatible(serializable = true, emulated = true)")) 170 .containsExactly( 171 "AT", 172 "IDENT(GwtCompatible)", 173 "LPAREN", 174 "IDENT(serializable)", 175 "ASSIGN", 176 "TRUE", 177 "COMMA", 178 "IDENT(emulated)", 179 "ASSIGN", 180 "TRUE", 181 "RPAREN", 182 "EOF"); 183 } 184 185 @Test 186 public void operators() { 187 assertThat( 188 lex( 189 "= > < ! ~ ? : ->\n" 190 + "== >= <= != && || ++ --\n" 191 + "+ - * / & | ^ % << >> >>>\n" 192 + "+= -= *= /= &= |= ^= %= <<= >>= >>>=")) 193 .containsExactly( 194 "ASSIGN", "GT", "LT", "NOT", "TILDE", "COND", "COLON", "ARROW", "EQ", "GTE", "LTE", 195 "NOTEQ", "ANDAND", "OROR", "INCR", "DECR", "PLUS", "MINUS", "MULT", "DIV", "AND", "OR", 196 "XOR", "MOD", "LTLT", "GTGT", "GTGTGT", "PLUSEQ", "MINUSEQ", "MULTEQ", "DIVEQ", "ANDEQ", 197 "OREQ", "XOREQ", "MODEQ", "LTLTE", "GTGTE", "GTGTGTE", "EOF"); 198 } 199 200 @Test 201 public void keywords() { 202 assertThat( 203 lex( 204 " abstract continue for new switch\n" 205 + " assert default if package synchronized\n" 206 + " boolean do goto private this\n" 207 + " break double implements protected throw\n" 208 + " byte else import public throws\n" 209 + " case enum instanceof return transient\n" 210 + " catch extends int short try\n" 211 + " char final interface static void\n" 212 + " class finally long strictfp volatile\n" 213 + " const float native super while\n" 214 + "= > < ! ~ ? : ->\n")) 215 .containsExactly( 216 "ABSTRACT", 217 "CONTINUE", 218 "FOR", 219 "NEW", 220 "SWITCH", 221 "ASSERT", 222 "DEFAULT", 223 "IF", 224 "PACKAGE", 225 "SYNCHRONIZED", 226 "BOOLEAN", 227 "DO", 228 "GOTO", 229 "PRIVATE", 230 "THIS", 231 "BREAK", 232 "DOUBLE", 233 "IMPLEMENTS", 234 "PROTECTED", 235 "THROW", 236 "BYTE", 237 "ELSE", 238 "IMPORT", 239 "PUBLIC", 240 "THROWS", 241 "CASE", 242 "ENUM", 243 "INSTANCEOF", 244 "RETURN", 245 "TRANSIENT", 246 "CATCH", 247 "EXTENDS", 248 "INT", 249 "SHORT", 250 "TRY", 251 "CHAR", 252 "FINAL", 253 "INTERFACE", 254 "STATIC", 255 "VOID", 256 "CLASS", 257 "FINALLY", 258 "LONG", 259 "STRICTFP", 260 "VOLATILE", 261 "CONST", 262 "FLOAT", 263 "NATIVE", 264 "SUPER", 265 "WHILE", 266 "ASSIGN", 267 "GT", 268 "LT", 269 "NOT", 270 "TILDE", 271 "COND", 272 "COLON", 273 "ARROW", 274 "EOF"); 275 } 276 277 @Test 278 public void hexFloat() { 279 lexerComparisonTest("0x1.0p31"); 280 lexerComparisonTest("0x1p31"); 281 } 282 283 @Test 284 public void zeroFloat() { 285 lexerComparisonTest("0f"); 286 } 287 288 @Test 289 public void escape() { 290 lexerComparisonTest("'\\b'"); 291 lexerComparisonTest("'\\0'"); 292 lexerComparisonTest("'\\01'"); 293 lexerComparisonTest("'\\001'"); 294 } 295 296 @Test 297 public void floatLiteral() { 298 lexerComparisonTest(".123321f"); 299 lexerComparisonTest(".123321F"); 300 lexerComparisonTest(".123321d"); 301 lexerComparisonTest(".123321D"); 302 lexerComparisonTest("0.0e+1f"); 303 lexerComparisonTest("0.0e-1f"); 304 lexerComparisonTest(".123321"); 305 } 306 307 @Test 308 public void digitsUnderscore() { 309 lexerComparisonTest("123__123______3"); 310 } 311 312 @Test 313 public void moreOperators() { 314 lexerComparisonTest("* / %"); 315 } 316 317 @Test 318 public void unusualKeywords() { 319 lexerComparisonTest("const goto assert"); 320 } 321 322 @Test 323 public void specialCharLiteral() { 324 lexerComparisonTest("'\\013'"); 325 } 326 327 @Test 328 public void stringEscape() { 329 lexerComparisonTest("\"asd\\\"dsa\""); 330 } 331 332 @Test 333 public void blockCommentEndingSlash() { 334 lexerComparisonTest("foo /*/*/ bar"); 335 } 336 337 private void lexerComparisonTest(String s) { 338 assertThat(lex(s)).containsExactlyElementsIn(JavacLexer.javacLex(s)); 339 } 340 341 public static List<String> lex(String input) { 342 Lexer lexer = new StreamLexer(new UnicodeEscapePreprocessor(new SourceFile(null, input))); 343 List<String> tokens = new ArrayList<>(); 344 Token token; 345 do { 346 token = lexer.next(); 347 String tokenString; 348 switch (token) { 349 case IDENT: 350 case INT_LITERAL: 351 case LONG_LITERAL: 352 case FLOAT_LITERAL: 353 case DOUBLE_LITERAL: 354 tokenString = String.format("%s(%s)", token.name(), lexer.stringValue()); 355 break; 356 case CHAR_LITERAL: 357 case STRING_LITERAL: 358 tokenString = 359 String.format( 360 "%s(%s)", 361 token.name(), SourceCodeEscapers.javaCharEscaper().escape(lexer.stringValue())); 362 break; 363 default: 364 tokenString = token.name(); 365 break; 366 } 367 tokens.add(tokenString); 368 } while (token != Token.EOF); 369 return tokens; 370 } 371 } 372