Home | History | Annotate | Download | only in parse
      1 /*
      2  * Copyright 2016 Google Inc. All Rights Reserved.
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *     http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 package com.google.turbine.parse;
     18 
     19 import com.google.common.collect.ImmutableList;
     20 import java.util.ArrayDeque;
     21 import java.util.ArrayList;
     22 import java.util.List;
     23 
     24 /**
     25  * Pre-process variable initializer expressions to handle multi-variable declarations.
     26  *
     27  * <p>Turbine needs to be able to parse compile-time constant expressions in constant variable
     28  * intializers and annotations. Parsing JLS 15.28 constant expressions is much easier than parsing
     29  * the full expression language, so we pre-process variable initializers to extract the expression
     30  * and then parse it with an simple constant expression parser that fails if it sees an expression
     31  * it doesn't understand.
     32  *
     33  * <p>To extract the (possibly constant) expression, we can usually just scan ahead to the
     34  * semi-colon at the end of the variable. To avoid matching on semi-colons inside lambdas or
     35  * anonymous class declarations, the preprocessor also matches braces.
     36  *
     37  * <p>That handles everything except multi-variable declarations (int x = 1, y = 2;), which in
     38  * hindsight were probably a mistake. Multi-variable declarations contain a list of name and
     39  * initializer pairs separated by commas. The initializer expressions may also contain commas, so
     40  * it's non-trivial to split on initializer boundaries. For example, consider `int x = a < b, c =
     41  * d;`. We can't tell looking at the prefix `a < b, c` whether that's a less-than expression
     42  * followed by another initializer, or the start of a generic type: `a<b, c>.foo()`. Distinguishing
     43  * between these cases requires arbitrary lookahead.
     44  *
     45  * <p>The preprocessor seems to be operationally correct. It's possible there are edge cases that it
     46  * doesn't handle, but it's extremely rare for compile-time constant multi-variable declarations to
     47  * contain complex generics. Multi-variable declarations are also disallowed by the Style guide.
     48  */
     49 public class VariableInitializerParser {
     50 
     51   enum FieldInitState {
     52     /** The beginning of an initializer expression. */
     53     START,
     54     /** The state after `<identifier> <`. */
     55     TYPE,
     56   }
     57 
     58   /** Indices into {@code LT} tokens used for backtracking. */
     59   final ArrayDeque<Integer> ltIndices = new ArrayDeque<>();
     60 
     61   /** Indices into {@code commas} used for backtracking. */
     62   final ArrayDeque<Integer> commaIndices = new ArrayDeque<>();
     63 
     64   /** The saved tokens. */
     65   List<SavedToken> tokens = new ArrayList<>();
     66 
     67   /**
     68    * Indices of boundaries between variable initializers in {@code tokens} (which are indicated by
     69    * commas in the input).
     70    */
     71   List<Integer> commas = new ArrayList<>();
     72 
     73   public Token token;
     74   FieldInitState state = FieldInitState.START;
     75   int depth = 0;
     76 
     77   final Lexer lexer;
     78 
     79   public VariableInitializerParser(Token token, Lexer lexer) {
     80     this.token = token;
     81     this.lexer = lexer;
     82   }
     83 
     84   private void next() {
     85     token = lexer.next();
     86   }
     87 
     88   /** Returns lists of tokens for individual initializers in a (mutli-)variable initializer. */
     89   public List<List<SavedToken>> parseInitializers() {
     90     OUTER:
     91     while (true) {
     92       switch (token) {
     93         case IDENT:
     94           save();
     95           next();
     96           if (state == FieldInitState.START) {
     97             if (token == Token.LT) {
     98               state = FieldInitState.TYPE;
     99               depth = 1;
    100               ltIndices.clear();
    101               commaIndices.clear();
    102               ltIndices.addLast(tokens.size());
    103               commaIndices.addLast(commas.size());
    104               save();
    105               next();
    106               break;
    107             }
    108           }
    109           break;
    110         case LT:
    111           if (state == FieldInitState.TYPE) {
    112             depth++;
    113             ltIndices.addLast(tokens.size());
    114             commaIndices.addLast(commas.size());
    115           }
    116           save();
    117           next();
    118           break;
    119         case GTGTGT:
    120           save();
    121           next();
    122           dropBracks(3);
    123           break;
    124         case GTGT:
    125           save();
    126           next();
    127           dropBracks(2);
    128           break;
    129         case GT:
    130           save();
    131           next();
    132           dropBracks(1);
    133           break;
    134         case LPAREN:
    135           save();
    136           next();
    137           dropParens();
    138           break;
    139         case LBRACE:
    140           save();
    141           next();
    142           dropBraces();
    143           break;
    144         case SEMI:
    145           switch (state) {
    146             case START:
    147             case TYPE:
    148               break OUTER;
    149             default:
    150               break;
    151           }
    152           save();
    153           next();
    154           break;
    155         case COMMA:
    156           save();
    157           next();
    158           switch (state) {
    159             case START:
    160             case TYPE:
    161               commas.add(tokens.size());
    162               break;
    163             default:
    164               break;
    165           }
    166           break;
    167         case DOT:
    168           save();
    169           next();
    170           dropTypeArguments();
    171           break;
    172         case NEW:
    173           save();
    174           next();
    175           dropTypeArguments();
    176           while (token == Token.IDENT) {
    177             save();
    178             next();
    179             dropTypeArguments();
    180             if (token == Token.DOT) {
    181               next();
    182             } else {
    183               break;
    184             }
    185           }
    186           break;
    187         case COLONCOLON:
    188           save();
    189           next();
    190           dropTypeArguments();
    191           if (token == Token.NEW) {
    192             next();
    193           }
    194           break;
    195         case EOF:
    196           break OUTER;
    197         default:
    198           save();
    199           next();
    200           break;
    201       }
    202     }
    203     List<List<SavedToken>> result = new ArrayList<>();
    204     int start = 0;
    205     for (int idx : commas) {
    206       result.add(
    207           ImmutableList.<SavedToken>builder()
    208               .addAll(tokens.subList(start, idx - 1))
    209               .add(new SavedToken(Token.EOF, null, -1))
    210               .build());
    211       start = idx;
    212     }
    213     result.add(
    214         ImmutableList.<SavedToken>builder()
    215             .addAll(tokens.subList(start, tokens.size()))
    216             .add(new SavedToken(Token.EOF, null, -1))
    217             .build());
    218     return result;
    219   }
    220 
    221   private void dropParens() {
    222     int depth = 1;
    223     while (depth > 0) {
    224       switch (token) {
    225         case LPAREN:
    226           save();
    227           next();
    228           depth++;
    229           break;
    230         case RPAREN:
    231           save();
    232           next();
    233           depth--;
    234           break;
    235         default:
    236           save();
    237           next();
    238           break;
    239       }
    240     }
    241   }
    242 
    243   private void dropBraces() {
    244     int depth = 1;
    245     while (depth > 0) {
    246       switch (token) {
    247         case LBRACE:
    248           save();
    249           next();
    250           depth++;
    251           break;
    252         case RBRACE:
    253           save();
    254           next();
    255           depth--;
    256           break;
    257         default:
    258           save();
    259           next();
    260           break;
    261       }
    262     }
    263   }
    264 
    265   private void save() {
    266     tokens.add(new SavedToken(token, lexer.stringValue(), lexer.position()));
    267   }
    268 
    269   private void dropBracks(int many) {
    270     if (state != FieldInitState.TYPE) {
    271       return;
    272     }
    273     if (depth <= many) {
    274       state = FieldInitState.START;
    275     }
    276     depth -= many;
    277     int lastType = -1;
    278     int lastComma = -1;
    279     for (int i = 0; i < many; i++) {
    280       lastType = ltIndices.removeLast();
    281       lastComma = commaIndices.removeLast();
    282     }
    283     // The only known type argument locations that require look-ahead to classify are method
    284     // references with parametric receivers, and qualified nested type names:
    285     switch (token) {
    286       case COLONCOLON:
    287       case DOT:
    288         this.tokens = tokens.subList(0, lastType);
    289         this.commas = commas.subList(0, lastComma);
    290         break;
    291       default:
    292         break;
    293     }
    294   }
    295 
    296   /**
    297    * Drops pairs of `<` `>` from the input. Should only be called in contexts where the braces are
    298    * unambiguously type argument lists, not less-than.
    299    *
    300    * <p>Since the lexer munches multiple close braces as a single token, there's handling of right
    301    * shifts for cases like the `>>` in `List<SavedToken<String, Integer>>`.
    302    */
    303   private void dropTypeArguments() {
    304     if (token != Token.LT) {
    305       return;
    306     }
    307     next();
    308     int depth = 1;
    309     while (depth > 0) {
    310       switch (token) {
    311         case LT:
    312           depth++;
    313           next();
    314           break;
    315         case GTGTGT:
    316           depth -= 3;
    317           next();
    318           break;
    319         case GTGT:
    320           depth -= 2;
    321           next();
    322           break;
    323         case GT:
    324           depth--;
    325           next();
    326           break;
    327         default:
    328           next();
    329           break;
    330       }
    331     }
    332   }
    333 }
    334