Home | History | Annotate | Download | only in dasm
      1 /*
      2  * Copyright (C) 2008 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 package dasm;
     18 
     19 
     20 import dasm.tokens.number_token;
     21 import dasm.tokens.relative_number_token;
     22 
     23 import java.io.IOException;
     24 import java.io.Reader;
     25 import java.util.Hashtable;
     26 
     27 import java_cup.runtime.int_token;
     28 import java_cup.runtime.str_token;
     29 import java_cup.runtime.token;
     30 
     31 /**
     32  * Tokenizer
     33  */
     34 
     35 
     36 class Scanner {
     37     /**
     38      * Chars buffer with autoexpanding.
     39      */
     40     class CharBuffer {
     41         private int buffer_size = 256;
     42         private char[] buffer = new char[buffer_size];
     43         private int cur_pos = 0;
     44 
     45         private void expand() {
     46             char tmp[] = new char[buffer_size * 2];
     47             System.arraycopy(buffer, 0, tmp, 0, buffer_size);
     48             buffer_size *= 2;
     49             buffer = tmp;
     50         }
     51 
     52         void add(char c) {
     53             buffer[cur_pos] = c;
     54             cur_pos++;
     55             if (cur_pos == buffer_size) expand();
     56         }
     57 
     58         int size() {
     59             return cur_pos;
     60         }
     61 
     62         char charAt(int idx) {
     63             return buffer[idx];
     64         }
     65 
     66         public String toString() {
     67             return new String(buffer, 0, cur_pos);
     68         }
     69 
     70         void reset() {
     71             cur_pos = 0;
     72         }
     73     }
     74 
     75 
     76     protected Reader inputReader;
     77 
     78     /**
     79      * next character in input stream
     80      */
     81     protected int nextChar;
     82 
     83     protected CharBuffer charBuf = new CharBuffer();
     84 
     85     /**
     86      * Whitespace characters
     87      */
     88     protected static final String WHITESPACE = " \n\t\r";
     89 
     90     /**
     91      * Separator characters
     92      */
     93     protected static final String SEPARATORS = WHITESPACE + ":=" + ",";
     94 
     95     /**
     96      * error reporting
     97      */
     98     public int line_num, token_line_num, char_num;
     99     public StringBuffer line;
    100 
    101     /**
    102      * Holds new variables defined by .set directive
    103      */
    104     public Hashtable dict = new Hashtable();
    105 
    106     public Scanner(Reader i) throws IOException, DasmError {
    107         inputReader = i;
    108         line_num = 1;
    109         char_num = 0;
    110         line = new StringBuffer();
    111         nextChar = 0;
    112         skipEmptyLines();
    113         if (nextChar == -1) throw new DasmError("empty source file");
    114     }
    115 
    116     /**
    117      * Checks if a character code is a whitespace character
    118      */
    119     protected static boolean isWhitespace(int c) {
    120         return (WHITESPACE.indexOf(c) != -1);
    121     }
    122 
    123     /**
    124      * Checks if a character code is a separator character
    125      */
    126     protected static boolean isSeparator(int c) {
    127         return (c == -1 || SEPARATORS.indexOf(c) != -1);
    128     }
    129 
    130     /**
    131      * Gets next char from input
    132      */
    133     protected void readNextChar() throws IOException {
    134         nextChar = inputReader.read();
    135         switch (nextChar) {
    136         case -1: // EOF
    137             if (char_num == 0) {
    138                 char_num = -1;
    139                 break;
    140             }
    141             nextChar = '\n';
    142             // fall thru
    143         case '\n':
    144             line_num++;
    145             char_num = 0;
    146             break;
    147         default:
    148             line.append((char) nextChar);
    149             char_num++;
    150             return;
    151         }
    152         line.setLength(0);
    153     }
    154 
    155     /**
    156      * Skips empty lines in input stream
    157      */
    158     private void skipEmptyLines() throws IOException {
    159         for (;;) {
    160             if (nextChar != ';') {
    161                 do {
    162                     readNextChar();
    163                 } while (isWhitespace(nextChar));
    164                 if (nextChar != ';') return;
    165             }
    166             do {
    167                 readNextChar();
    168                 if (nextChar == -1) return;
    169             } while (nextChar != '\n');
    170         }
    171     }
    172 
    173     /**
    174      * Reads unicode char (\\uXXXX)
    175      */
    176     private char readUnicodeChar() throws IOException, DasmError {
    177         int result = 0;
    178         for (int i = 0; i < 4; i++) {
    179             readNextChar();
    180             if (nextChar == -1) return 0;
    181 
    182             int tmp = Character.digit((char) nextChar, 16);
    183             if (tmp == -1)
    184                 throw new DasmError("Invalid '\\u' escape sequence");
    185             result = (result << 4) | tmp;
    186         }
    187         return (char) result;
    188     }
    189 
    190     private char nameEscape() throws IOException, DasmError {
    191         readNextChar();
    192         if (nextChar != 'u')
    193             throw new DasmError("Only '\\u' escape sequence allowed in names");
    194         char chval = readUnicodeChar();
    195         if (nextChar == -1)
    196             throw new DasmError("Left over '\\u' escape sequence");
    197         return chval;
    198     }
    199 
    200     /**
    201      * Read and recognize next token
    202      */
    203     public token next_token() throws IOException, DasmError {
    204         token_line_num = line_num;
    205 
    206         for (;;)
    207             switch (nextChar) {
    208             case ';': // a comment
    209             case '\n':
    210                 // return single SEP token (skip multiple newlines
    211                 // interspersed with whitespace or comments)
    212                 skipEmptyLines();
    213                 token_line_num = line_num;
    214                 return new token(sym.SEP);
    215 
    216             case ' ':
    217             case '\t':
    218             case '\r':
    219             case ',': // whitespace
    220                 readNextChar();
    221                 break;
    222 
    223             case -1: // EOF token
    224                 char_num = -1;
    225                 return new token(sym.EOF);
    226 
    227             case '=': // EQUALS token
    228                 readNextChar();
    229                 return new token(sym.EQ);
    230 
    231             case ':': // COLON token
    232                 readNextChar();
    233                 return new token(sym.COLON);
    234 
    235             case '-':
    236             case '+':
    237             case '0':
    238             case '1':
    239             case '2':
    240             case '3':
    241             case '4':
    242             case '5':
    243             case '6':
    244             case '7':
    245             case '8':
    246             case '9':
    247             case '.': // a number
    248             {
    249                 return readNumber();
    250             }
    251 
    252             case '"': // quoted string
    253             {
    254                 return readQuotedString();
    255             }
    256 
    257             case '{': // list of registers
    258             {
    259                 return readRegList();
    260             }
    261 
    262             case '\'': // quotation for overloading reserved words
    263                 return readQuotedReservedWord();
    264 
    265             default: {
    266                 // read up until a separatorcharacter
    267                 boolean only_name = false;
    268 
    269                 charBuf.reset();
    270                 do {
    271                     char chval = (char) nextChar;
    272                     if (nextChar == '\\') {
    273                         chval = nameEscape();
    274                         only_name = true;
    275                     }
    276                     charBuf.add(chval);
    277                     readNextChar();
    278                 } while (!isSeparator(nextChar));
    279 
    280                 String str = charBuf.toString();
    281 
    282                 if (!only_name) {
    283                     token tok;
    284 
    285                     // keyword or directive?
    286                     if ((tok = ReservedWords.get(str)) != null) return tok;
    287 
    288                     // VM instruction?
    289                     if (DopInfo.contains(str))
    290                         return new str_token(sym.Insn, str);
    291 
    292                     if (str.charAt(0) == '$') {
    293                         String s = str.substring(1);
    294                         Object v;
    295                         int n = 10;
    296                         boolean neg = false;
    297                         switch (s.charAt(0)) {
    298                         default:
    299                             break;
    300 
    301                         case '-':
    302                             neg = true;
    303                         case '+':
    304                             s = s.substring(1);
    305                             if (s.startsWith("0x")) {
    306                                 n = 16;
    307                                 s = s.substring(2);
    308                             }
    309                             try {
    310                                 n = Integer.parseInt(s, n);
    311                             } catch (NumberFormatException e) {
    312                                 throw new DasmError(
    313                                         "Bad relative offset number");
    314                             }
    315                             if (neg) n = -n;
    316                             return new relative_number_token(sym.Relative, n);
    317                         }
    318                         // Do variable substitution
    319                         if ((v = dict.get(s)) != null) return (token) v;
    320                     } // not begin from '$'
    321                 } // !only_name
    322                 // Unrecognized string token (e.g. a classname)
    323                 return new str_token(sym.Word, str);
    324             }
    325             }
    326     }
    327 
    328     /**
    329      * Reads "-quoted string
    330      */
    331     protected token readQuotedString() throws IOException, DasmError {
    332         boolean f = false;
    333         charBuf.reset();
    334         for (;;) {
    335             if (f)
    336                 f = false;
    337             else
    338                 readNextChar();
    339 
    340             if (nextChar == '"') {
    341                 readNextChar(); // skip closing quote
    342                 return new str_token(sym.Str, charBuf.toString());
    343             }
    344 
    345             if (nextChar == -1) throw new DasmError("Unterminated string");
    346 
    347             char chval = (char) nextChar;
    348 
    349             if (chval == '\\') {
    350                 readNextChar();
    351                 switch (nextChar) {
    352                 case -1:
    353                     f = true;
    354                     continue;
    355                 case 'n':
    356                     chval = '\n';
    357                     break;
    358                 case 'r':
    359                     chval = '\r';
    360                     break;
    361                 case 't':
    362                     chval = '\t';
    363                     break;
    364                 case 'f':
    365                     chval = '\f';
    366                     break;
    367                 case 'b':
    368                     chval = '\b';
    369                     break;
    370                 case '"':
    371                     chval = '"';
    372                     break;
    373                 case '\'':
    374                     chval = '\'';
    375                     break;
    376                 case '\\':
    377                     chval = '\\';
    378                     break;
    379 
    380                 case 'u':
    381                     chval = readUnicodeChar();
    382                     if (nextChar == -1) {
    383                         f = true;
    384                         continue;
    385                     }
    386                     break;
    387 
    388                 // octals
    389                 case '0':
    390                 case '1':
    391                 case '2':
    392                 case '3':
    393                 case '4':
    394                 case '5':
    395                 case '6':
    396                 case '7': {
    397                     int res = nextChar & 7;
    398                     readNextChar();
    399                     if (nextChar < '0' || nextChar > '7')
    400                         f = true;
    401                     else {
    402                         res = res * 8 + (nextChar & 7);
    403                         readNextChar();
    404                         if (nextChar < '0' || nextChar > '7')
    405                             f = true;
    406                         else {
    407                             int val = res * 8 + (nextChar & 7);
    408                             if (val >= 0x100)
    409                                 f = true;
    410                             else
    411                                 res = val;
    412                         }
    413                     }
    414                     chval = (char) res;
    415                 }
    416                     break;
    417 
    418                 default:
    419                     throw new DasmError("Incorrect backslash escape sequence");
    420                 }
    421             }
    422             charBuf.add(chval);
    423         }
    424     }
    425 
    426     /**
    427      * Reads list of registers ({v1, v2, v3} or {v1..v3})
    428      */
    429     protected token readRegList() throws IOException, DasmError {
    430         charBuf.reset();
    431         for (;;) {
    432             readNextChar();
    433 
    434             if (nextChar == '}') {
    435                 readNextChar(); // skip closing quote
    436                 return new str_token(sym.Word, charBuf.toString());
    437             }
    438 
    439             if (nextChar == -1)
    440                 throw new DasmError("Unterminated list of registers");
    441 
    442 
    443             charBuf.add((char) nextChar);
    444         }
    445     }
    446 
    447     /**
    448      * Reads number
    449      */
    450     protected token readNumber() throws IOException, DasmError {
    451         charBuf.reset();
    452 
    453         do {
    454             charBuf.add((char) nextChar);
    455             readNextChar();
    456         } while (!isSeparator(nextChar));
    457 
    458         String str = charBuf.toString();
    459         token tok;
    460 
    461         // directive?
    462         if ((tok = ReservedWords.get(str)) != null) return tok;
    463 
    464         Number num;
    465         try {
    466             num = Utils.stringToNumber(str);
    467         } catch (NumberFormatException e) {
    468             if (charBuf.charAt(0) != '.') // directive?
    469                 throw new DasmError("Bad number format");
    470             throw new DasmError("Unknown directive or bad number format");
    471         }
    472 
    473         if (num instanceof Integer) {
    474             return new int_token(sym.Int, num.intValue());
    475         }
    476 
    477         return new number_token(sym.Num, num);
    478     }
    479 
    480     /**
    481      * Reads ''-quoted overloaded reserved words
    482      */
    483     protected token readQuotedReservedWord() throws IOException, DasmError {
    484         charBuf.reset();
    485         for (;;) {
    486             readNextChar();
    487             if (isSeparator(nextChar))
    488                 throw new DasmError("Unterminated ''-enclosed name");
    489             if (nextChar == '\'') {
    490                 if (charBuf.size() == 0)
    491                     throw new DasmError("Empty ''-enclosed name");
    492                 readNextChar(); // skip close quote
    493                 if (!isSeparator(nextChar))
    494                     throw new DasmError(
    495                             "Missed separator after ''-enclosed name");
    496                 return new str_token(sym.Word, charBuf.toString());
    497             }
    498             char chval = (char) nextChar;
    499             if (nextChar == '\\') chval = nameEscape();
    500             charBuf.add(chval);
    501         }
    502     }
    503 };
    504