Home | History | Annotate | Download | only in compiler
      1 /*
      2  * Javassist, a Java-bytecode translator toolkit.
      3  * Copyright (C) 1999-2007 Shigeru Chiba. All Rights Reserved.
      4  *
      5  * The contents of this file are subject to the Mozilla Public License Version
      6  * 1.1 (the "License"); you may not use this file except in compliance with
      7  * the License.  Alternatively, the contents of this file may be used under
      8  * the terms of the GNU Lesser General Public License Version 2.1 or later.
      9  *
     10  * Software distributed under the License is distributed on an "AS IS" basis,
     11  * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
     12  * for the specific language governing rights and limitations under the
     13  * License.
     14  */
     15 
     16 package javassist.compiler;
     17 
     18 class Token {
     19     public Token next = null;
     20     public int tokenId;
     21 
     22     public long longValue;
     23     public double doubleValue;
     24     public String textValue;
     25 }
     26 
     27 public class Lex implements TokenId {
     28     private int lastChar;
     29     private StringBuffer textBuffer;
     30     private Token currentToken;
     31     private Token lookAheadTokens;
     32 
     33     private String input;
     34     private int position, maxlen, lineNumber;
     35 
     36     /**
     37      * Constructs a lexical analyzer.
     38      */
     39     public Lex(String s) {
     40         lastChar = -1;
     41         textBuffer = new StringBuffer();
     42         currentToken = new Token();
     43         lookAheadTokens = null;
     44 
     45         input = s;
     46         position = 0;
     47         maxlen = s.length();
     48         lineNumber = 0;
     49     }
     50 
     51     public int get() {
     52         if (lookAheadTokens == null)
     53             return get(currentToken);
     54         else {
     55             Token t;
     56             currentToken = t = lookAheadTokens;
     57             lookAheadTokens = lookAheadTokens.next;
     58             return t.tokenId;
     59         }
     60     }
     61 
     62     /**
     63      * Looks at the next token.
     64      */
     65     public int lookAhead() {
     66         return lookAhead(0);
     67     }
     68 
     69     public int lookAhead(int i) {
     70         Token tk = lookAheadTokens;
     71         if (tk == null) {
     72             lookAheadTokens = tk = currentToken;  // reuse an object!
     73             tk.next = null;
     74             get(tk);
     75         }
     76 
     77         for (; i-- > 0; tk = tk.next)
     78             if (tk.next == null) {
     79                 Token tk2;
     80                 tk.next = tk2 = new Token();
     81                 get(tk2);
     82             }
     83 
     84         currentToken = tk;
     85         return tk.tokenId;
     86     }
     87 
     88     public String getString() {
     89         return currentToken.textValue;
     90     }
     91 
     92     public long getLong() {
     93         return currentToken.longValue;
     94     }
     95 
     96     public double getDouble() {
     97         return currentToken.doubleValue;
     98     }
     99 
    100     private int get(Token token) {
    101         int t;
    102         do {
    103             t = readLine(token);
    104         } while (t == '\n');
    105         token.tokenId = t;
    106         return t;
    107     }
    108 
    109     private int readLine(Token token) {
    110         int c = getNextNonWhiteChar();
    111         if(c < 0)
    112             return c;
    113         else if(c == '\n') {
    114             ++lineNumber;
    115             return '\n';
    116         }
    117         else if (c == '\'')
    118             return readCharConst(token);
    119         else if (c == '"')
    120             return readStringL(token);
    121         else if ('0' <= c && c <= '9')
    122             return readNumber(c, token);
    123         else if(c == '.'){
    124             c = getc();
    125             if ('0' <= c && c <= '9') {
    126                 StringBuffer tbuf = textBuffer;
    127                 tbuf.setLength(0);
    128                 tbuf.append('.');
    129                 return readDouble(tbuf, c, token);
    130             }
    131             else{
    132                 ungetc(c);
    133                 return readSeparator('.');
    134             }
    135         }
    136         else if (Character.isJavaIdentifierStart((char)c))
    137             return readIdentifier(c, token);
    138         else
    139             return readSeparator(c);
    140     }
    141 
    142     private int getNextNonWhiteChar() {
    143         int c;
    144         do {
    145             c = getc();
    146             if (c == '/') {
    147                 c = getc();
    148                 if (c == '/')
    149                     do {
    150                         c = getc();
    151                     } while (c != '\n' && c != '\r' && c != -1);
    152                 else if (c == '*')
    153                     while (true) {
    154                         c = getc();
    155                         if (c == -1)
    156                             break;
    157                         else if (c == '*')
    158                             if ((c = getc()) == '/') {
    159                                 c = ' ';
    160                                 break;
    161                             }
    162                             else
    163                                 ungetc(c);
    164                     }
    165                 else {
    166                     ungetc(c);
    167                     c = '/';
    168                 }
    169             }
    170         } while(isBlank(c));
    171         return c;
    172     }
    173 
    174     private int readCharConst(Token token) {
    175         int c;
    176         int value = 0;
    177         while ((c = getc()) != '\'')
    178             if (c == '\\')
    179                 value = readEscapeChar();
    180             else if (c < 0x20) {
    181                 if (c == '\n')
    182                     ++lineNumber;
    183 
    184                 return BadToken;
    185             }
    186             else
    187                 value = c;
    188 
    189         token.longValue = value;
    190         return CharConstant;
    191     }
    192 
    193     private int readEscapeChar() {
    194         int c = getc();
    195         if (c == 'n')
    196             c = '\n';
    197         else if (c == 't')
    198             c = '\t';
    199         else if (c == 'r')
    200             c = '\r';
    201         else if (c == 'f')
    202             c = '\f';
    203         else if (c == '\n')
    204             ++lineNumber;
    205 
    206         return c;
    207     }
    208 
    209     private int readStringL(Token token) {
    210         int c;
    211         StringBuffer tbuf = textBuffer;
    212         tbuf.setLength(0);
    213         for (;;) {
    214             while ((c = getc()) != '"') {
    215                 if (c == '\\')
    216                     c = readEscapeChar();
    217                 else if (c == '\n' || c < 0) {
    218                     ++lineNumber;
    219                     return BadToken;
    220                 }
    221 
    222                 tbuf.append((char)c);
    223             }
    224 
    225             for (;;) {
    226                 c = getc();
    227                 if (c == '\n')
    228                     ++lineNumber;
    229                 else if (!isBlank(c))
    230                     break;
    231             }
    232 
    233             if (c != '"') {
    234                 ungetc(c);
    235                 break;
    236             }
    237         }
    238 
    239         token.textValue = tbuf.toString();
    240         return StringL;
    241     }
    242 
    243     private int readNumber(int c, Token token) {
    244         long value = 0;
    245         int c2 = getc();
    246         if (c == '0')
    247             if (c2 == 'X' || c2 == 'x')
    248                 for (;;) {
    249                     c = getc();
    250                     if ('0' <= c && c <= '9')
    251                         value = value * 16 + (long)(c - '0');
    252                     else if ('A' <= c && c <= 'F')
    253                         value = value * 16 + (long)(c - 'A' + 10);
    254                     else if ('a' <= c && c <= 'f')
    255                         value = value * 16 + (long)(c - 'a' + 10);
    256                     else {
    257                         token.longValue = value;
    258                         if (c == 'L' || c == 'l')
    259                             return LongConstant;
    260                         else {
    261                             ungetc(c);
    262                             return IntConstant;
    263                         }
    264                     }
    265                 }
    266             else if ('0' <= c2 && c2 <= '7') {
    267                 value = c2 - '0';
    268                 for (;;) {
    269                     c = getc();
    270                     if ('0' <= c && c <= '7')
    271                         value = value * 8 + (long)(c - '0');
    272                     else {
    273                         token.longValue = value;
    274                         if (c == 'L' || c == 'l')
    275                             return LongConstant;
    276                         else {
    277                             ungetc(c);
    278                             return IntConstant;
    279                         }
    280                     }
    281                 }
    282             }
    283 
    284         value = c - '0';
    285         while ('0' <= c2 && c2 <= '9') {
    286             value = value * 10 + c2 - '0';
    287             c2 = getc();
    288         }
    289 
    290         token.longValue = value;
    291         if (c2 == 'F' || c2 == 'f') {
    292             token.doubleValue = (double)value;
    293             return FloatConstant;
    294         }
    295         else if (c2 == 'E' || c2 == 'e'
    296                  || c2 == 'D' || c2 == 'd' || c2 == '.') {
    297             StringBuffer tbuf = textBuffer;
    298             tbuf.setLength(0);
    299             tbuf.append(value);
    300             return readDouble(tbuf, c2, token);
    301         }
    302         else if (c2 == 'L' || c2 == 'l')
    303             return LongConstant;
    304         else {
    305             ungetc(c2);
    306             return IntConstant;
    307         }
    308     }
    309 
    310     private int readDouble(StringBuffer sbuf, int c, Token token) {
    311         if (c != 'E' && c != 'e' && c != 'D' && c != 'd') {
    312             sbuf.append((char)c);
    313             for (;;) {
    314                 c = getc();
    315                 if ('0' <= c && c <= '9')
    316                     sbuf.append((char)c);
    317                 else
    318                     break;
    319             }
    320         }
    321 
    322         if (c == 'E' || c == 'e') {
    323             sbuf.append((char)c);
    324             c = getc();
    325             if (c == '+' || c == '-') {
    326                 sbuf.append((char)c);
    327                 c = getc();
    328             }
    329 
    330             while ('0' <= c && c <= '9') {
    331                 sbuf.append((char)c);
    332                 c = getc();
    333             }
    334         }
    335 
    336         try {
    337             token.doubleValue = Double.parseDouble(sbuf.toString());
    338         }
    339         catch (NumberFormatException e) {
    340             return BadToken;
    341         }
    342 
    343         if (c == 'F' || c == 'f')
    344             return FloatConstant;
    345         else {
    346             if (c != 'D' && c != 'd')
    347                 ungetc(c);
    348 
    349             return DoubleConstant;
    350         }
    351     }
    352 
    353     // !"#$%&'(    )*+,-./0    12345678    9:;<=>?
    354     private static final int[] equalOps
    355         =  { NEQ, 0, 0, 0, MOD_E, AND_E, 0, 0,
    356              0, MUL_E, PLUS_E, 0, MINUS_E, 0, DIV_E, 0,
    357              0, 0, 0, 0, 0, 0, 0, 0,
    358              0, 0, 0, LE, EQ, GE, 0 };
    359 
    360     private int readSeparator(int c) {
    361         int c2, c3;
    362         if ('!' <= c && c <= '?') {
    363             int t = equalOps[c - '!'];
    364             if (t == 0)
    365                 return c;
    366             else {
    367                 c2 = getc();
    368                 if (c == c2)
    369                     switch (c) {
    370                     case '=' :
    371                         return EQ;
    372                     case '+' :
    373                         return PLUSPLUS;
    374                     case '-' :
    375                         return MINUSMINUS;
    376                     case '&' :
    377                         return ANDAND;
    378                     case '<' :
    379                         c3 = getc();
    380                         if (c3 == '=')
    381                             return LSHIFT_E;
    382                         else {
    383                             ungetc(c3);
    384                             return LSHIFT;
    385                         }
    386                     case '>' :
    387                         c3 = getc();
    388                         if (c3 == '=')
    389                             return RSHIFT_E;
    390                         else if (c3 == '>') {
    391                             c3 = getc();
    392                             if (c3 == '=')
    393                                 return ARSHIFT_E;
    394                             else {
    395                                 ungetc(c3);
    396                                 return ARSHIFT;
    397                             }
    398                         }
    399                         else {
    400                             ungetc(c3);
    401                             return RSHIFT;
    402                         }
    403                     default :
    404                         break;
    405                     }
    406                 else if (c2 == '=')
    407                     return t;
    408             }
    409         }
    410         else if (c == '^') {
    411             c2 = getc();
    412             if (c2 == '=')
    413                 return EXOR_E;
    414         }
    415         else if (c == '|') {
    416             c2 = getc();
    417             if (c2 == '=')
    418                 return OR_E;
    419             else if (c2 == '|')
    420                 return OROR;
    421         }
    422         else
    423             return c;
    424 
    425         ungetc(c2);
    426         return c;
    427     }
    428 
    429     private int readIdentifier(int c, Token token) {
    430         StringBuffer tbuf = textBuffer;
    431         tbuf.setLength(0);
    432 
    433         do {
    434             tbuf.append((char)c);
    435             c = getc();
    436         } while (Character.isJavaIdentifierPart((char)c));
    437 
    438         ungetc(c);
    439 
    440         String name = tbuf.toString();
    441         int t = ktable.lookup(name);
    442         if (t >= 0)
    443             return t;
    444         else {
    445             /* tbuf.toString() is executed quickly since it does not
    446              * need memory copy.  Using a hand-written extensible
    447              * byte-array class instead of StringBuffer is not a good idea
    448              * for execution speed.  Converting a byte array to a String
    449              * object is very slow.  Using an extensible char array
    450              * might be OK.
    451              */
    452             token.textValue = name;
    453             return Identifier;
    454         }
    455     }
    456 
    457     private static final KeywordTable ktable = new KeywordTable();
    458 
    459     static {
    460         ktable.append("abstract", ABSTRACT);
    461         ktable.append("boolean", BOOLEAN);
    462         ktable.append("break", BREAK);
    463         ktable.append("byte", BYTE);
    464         ktable.append("case", CASE);
    465         ktable.append("catch", CATCH);
    466         ktable.append("char", CHAR);
    467         ktable.append("class", CLASS);
    468         ktable.append("const", CONST);
    469         ktable.append("continue", CONTINUE);
    470         ktable.append("default", DEFAULT);
    471         ktable.append("do", DO);
    472         ktable.append("double", DOUBLE);
    473         ktable.append("else", ELSE);
    474         ktable.append("extends", EXTENDS);
    475         ktable.append("false", FALSE);
    476         ktable.append("final", FINAL);
    477         ktable.append("finally", FINALLY);
    478         ktable.append("float", FLOAT);
    479         ktable.append("for", FOR);
    480         ktable.append("goto", GOTO);
    481         ktable.append("if", IF);
    482         ktable.append("implements", IMPLEMENTS);
    483         ktable.append("import", IMPORT);
    484         ktable.append("instanceof", INSTANCEOF);
    485         ktable.append("int", INT);
    486         ktable.append("interface", INTERFACE);
    487         ktable.append("long", LONG);
    488         ktable.append("native", NATIVE);
    489         ktable.append("new", NEW);
    490         ktable.append("null", NULL);
    491         ktable.append("package", PACKAGE);
    492         ktable.append("private", PRIVATE);
    493         ktable.append("protected", PROTECTED);
    494         ktable.append("public", PUBLIC);
    495         ktable.append("return", RETURN);
    496         ktable.append("short", SHORT);
    497         ktable.append("static", STATIC);
    498         ktable.append("strictfp", STRICT);
    499         ktable.append("super", SUPER);
    500         ktable.append("switch", SWITCH);
    501         ktable.append("synchronized", SYNCHRONIZED);
    502         ktable.append("this", THIS);
    503         ktable.append("throw", THROW);
    504         ktable.append("throws", THROWS);
    505         ktable.append("transient", TRANSIENT);
    506         ktable.append("true", TRUE);
    507         ktable.append("try", TRY);
    508         ktable.append("void", VOID);
    509         ktable.append("volatile", VOLATILE);
    510         ktable.append("while", WHILE);
    511     }
    512 
    513     private static boolean isBlank(int c) {
    514         return c == ' ' || c == '\t' || c == '\f' || c == '\r'
    515             || c == '\n';
    516     }
    517 
    518     private static boolean isDigit(int c) {
    519         return '0' <= c && c <= '9';
    520     }
    521 
    522     private void ungetc(int c) {
    523         lastChar = c;
    524     }
    525 
    526     public String getTextAround() {
    527         int begin = position - 10;
    528         if (begin < 0)
    529             begin = 0;
    530 
    531         int end = position + 10;
    532         if (end > maxlen)
    533             end = maxlen;
    534 
    535         return input.substring(begin, end);
    536     }
    537 
    538     private int getc() {
    539         if (lastChar < 0)
    540             if (position < maxlen)
    541                 return input.charAt(position++);
    542             else
    543                 return -1;
    544         else {
    545             int c = lastChar;
    546             lastChar = -1;
    547             return c;
    548         }
    549     }
    550 }
    551