1 /* 2 * Copyright 2016 Google Inc. All Rights Reserved. 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package com.google.turbine.parse; 18 19 import com.google.common.collect.ImmutableList; 20 import java.util.ArrayDeque; 21 import java.util.ArrayList; 22 import java.util.List; 23 24 /** 25 * Pre-process variable initializer expressions to handle multi-variable declarations. 26 * 27 * <p>Turbine needs to be able to parse compile-time constant expressions in constant variable 28 * intializers and annotations. Parsing JLS 15.28 constant expressions is much easier than parsing 29 * the full expression language, so we pre-process variable initializers to extract the expression 30 * and then parse it with an simple constant expression parser that fails if it sees an expression 31 * it doesn't understand. 32 * 33 * <p>To extract the (possibly constant) expression, we can usually just scan ahead to the 34 * semi-colon at the end of the variable. To avoid matching on semi-colons inside lambdas or 35 * anonymous class declarations, the preprocessor also matches braces. 36 * 37 * <p>That handles everything except multi-variable declarations (int x = 1, y = 2;), which in 38 * hindsight were probably a mistake. Multi-variable declarations contain a list of name and 39 * initializer pairs separated by commas. The initializer expressions may also contain commas, so 40 * it's non-trivial to split on initializer boundaries. For example, consider `int x = a < b, c = 41 * d;`. We can't tell looking at the prefix `a < b, c` whether that's a less-than expression 42 * followed by another initializer, or the start of a generic type: `a<b, c>.foo()`. Distinguishing 43 * between these cases requires arbitrary lookahead. 44 * 45 * <p>The preprocessor seems to be operationally correct. It's possible there are edge cases that it 46 * doesn't handle, but it's extremely rare for compile-time constant multi-variable declarations to 47 * contain complex generics. Multi-variable declarations are also disallowed by the Style guide. 48 */ 49 public class VariableInitializerParser { 50 51 enum FieldInitState { 52 /** The beginning of an initializer expression. */ 53 START, 54 /** The state after `<identifier> <`. */ 55 TYPE, 56 } 57 58 /** Indices into {@code LT} tokens used for backtracking. */ 59 final ArrayDeque<Integer> ltIndices = new ArrayDeque<>(); 60 61 /** Indices into {@code commas} used for backtracking. */ 62 final ArrayDeque<Integer> commaIndices = new ArrayDeque<>(); 63 64 /** The saved tokens. */ 65 List<SavedToken> tokens = new ArrayList<>(); 66 67 /** 68 * Indices of boundaries between variable initializers in {@code tokens} (which are indicated by 69 * commas in the input). 70 */ 71 List<Integer> commas = new ArrayList<>(); 72 73 public Token token; 74 FieldInitState state = FieldInitState.START; 75 int depth = 0; 76 77 final Lexer lexer; 78 79 public VariableInitializerParser(Token token, Lexer lexer) { 80 this.token = token; 81 this.lexer = lexer; 82 } 83 84 private void next() { 85 token = lexer.next(); 86 } 87 88 /** Returns lists of tokens for individual initializers in a (mutli-)variable initializer. */ 89 public List<List<SavedToken>> parseInitializers() { 90 OUTER: 91 while (true) { 92 switch (token) { 93 case IDENT: 94 save(); 95 next(); 96 if (state == FieldInitState.START) { 97 if (token == Token.LT) { 98 state = FieldInitState.TYPE; 99 depth = 1; 100 ltIndices.clear(); 101 commaIndices.clear(); 102 ltIndices.addLast(tokens.size()); 103 commaIndices.addLast(commas.size()); 104 save(); 105 next(); 106 break; 107 } 108 } 109 break; 110 case LT: 111 if (state == FieldInitState.TYPE) { 112 depth++; 113 ltIndices.addLast(tokens.size()); 114 commaIndices.addLast(commas.size()); 115 } 116 save(); 117 next(); 118 break; 119 case GTGTGT: 120 save(); 121 next(); 122 dropBracks(3); 123 break; 124 case GTGT: 125 save(); 126 next(); 127 dropBracks(2); 128 break; 129 case GT: 130 save(); 131 next(); 132 dropBracks(1); 133 break; 134 case LPAREN: 135 save(); 136 next(); 137 dropParens(); 138 break; 139 case LBRACE: 140 save(); 141 next(); 142 dropBraces(); 143 break; 144 case SEMI: 145 switch (state) { 146 case START: 147 case TYPE: 148 break OUTER; 149 default: 150 break; 151 } 152 save(); 153 next(); 154 break; 155 case COMMA: 156 save(); 157 next(); 158 switch (state) { 159 case START: 160 case TYPE: 161 commas.add(tokens.size()); 162 break; 163 default: 164 break; 165 } 166 break; 167 case DOT: 168 save(); 169 next(); 170 dropTypeArguments(); 171 break; 172 case NEW: 173 save(); 174 next(); 175 dropTypeArguments(); 176 while (token == Token.IDENT) { 177 save(); 178 next(); 179 dropTypeArguments(); 180 if (token == Token.DOT) { 181 next(); 182 } else { 183 break; 184 } 185 } 186 break; 187 case COLONCOLON: 188 save(); 189 next(); 190 dropTypeArguments(); 191 if (token == Token.NEW) { 192 next(); 193 } 194 break; 195 case EOF: 196 break OUTER; 197 default: 198 save(); 199 next(); 200 break; 201 } 202 } 203 List<List<SavedToken>> result = new ArrayList<>(); 204 int start = 0; 205 for (int idx : commas) { 206 result.add( 207 ImmutableList.<SavedToken>builder() 208 .addAll(tokens.subList(start, idx - 1)) 209 .add(new SavedToken(Token.EOF, null, -1)) 210 .build()); 211 start = idx; 212 } 213 result.add( 214 ImmutableList.<SavedToken>builder() 215 .addAll(tokens.subList(start, tokens.size())) 216 .add(new SavedToken(Token.EOF, null, -1)) 217 .build()); 218 return result; 219 } 220 221 private void dropParens() { 222 int depth = 1; 223 while (depth > 0) { 224 switch (token) { 225 case LPAREN: 226 save(); 227 next(); 228 depth++; 229 break; 230 case RPAREN: 231 save(); 232 next(); 233 depth--; 234 break; 235 default: 236 save(); 237 next(); 238 break; 239 } 240 } 241 } 242 243 private void dropBraces() { 244 int depth = 1; 245 while (depth > 0) { 246 switch (token) { 247 case LBRACE: 248 save(); 249 next(); 250 depth++; 251 break; 252 case RBRACE: 253 save(); 254 next(); 255 depth--; 256 break; 257 default: 258 save(); 259 next(); 260 break; 261 } 262 } 263 } 264 265 private void save() { 266 tokens.add(new SavedToken(token, lexer.stringValue(), lexer.position())); 267 } 268 269 private void dropBracks(int many) { 270 if (state != FieldInitState.TYPE) { 271 return; 272 } 273 if (depth <= many) { 274 state = FieldInitState.START; 275 } 276 depth -= many; 277 int lastType = -1; 278 int lastComma = -1; 279 for (int i = 0; i < many; i++) { 280 lastType = ltIndices.removeLast(); 281 lastComma = commaIndices.removeLast(); 282 } 283 // The only known type argument locations that require look-ahead to classify are method 284 // references with parametric receivers, and qualified nested type names: 285 switch (token) { 286 case COLONCOLON: 287 case DOT: 288 this.tokens = tokens.subList(0, lastType); 289 this.commas = commas.subList(0, lastComma); 290 break; 291 default: 292 break; 293 } 294 } 295 296 /** 297 * Drops pairs of `<` `>` from the input. Should only be called in contexts where the braces are 298 * unambiguously type argument lists, not less-than. 299 * 300 * <p>Since the lexer munches multiple close braces as a single token, there's handling of right 301 * shifts for cases like the `>>` in `List<SavedToken<String, Integer>>`. 302 */ 303 private void dropTypeArguments() { 304 if (token != Token.LT) { 305 return; 306 } 307 next(); 308 int depth = 1; 309 while (depth > 0) { 310 switch (token) { 311 case LT: 312 depth++; 313 next(); 314 break; 315 case GTGTGT: 316 depth -= 3; 317 next(); 318 break; 319 case GTGT: 320 depth -= 2; 321 next(); 322 break; 323 case GT: 324 depth--; 325 next(); 326 break; 327 default: 328 next(); 329 break; 330 } 331 } 332 } 333 } 334