Home | History | Annotate | Download | only in opcode-gen
      1 # Copyright (C) 2007 The Android Open Source Project
      2 #
      3 # Licensed under the Apache License, Version 2.0 (the "License");
      4 # you may not use this file except in compliance with the License.
      5 # You may obtain a copy of the License at
      6 #
      7 #     http://www.apache.org/licenses/LICENSE-2.0
      8 #
      9 # Unless required by applicable law or agreed to in writing, software
     10 # distributed under the License is distributed on an "AS IS" BASIS,
     11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 # See the License for the specific language governing permissions and
     13 # limitations under the License.
     14 
     15 #
     16 # Awk helper script for opcode-gen.
     17 #
     18 
     19 #
     20 # Initialization.
     21 #
     22 
     23 BEGIN {
     24     MAX_OPCODE = 65535;
     25     MAX_PACKED_OPCODE = 511;
     26     MAX_PACKED_OPCODE = 255; # TODO: Not for long!
     27     initIndexTypes();
     28     initFlags();
     29     if (readBytecodes()) exit 1;
     30     deriveOpcodeChains();
     31     createPackedTables();
     32     consumeUntil = "";
     33     emission = "";
     34 }
     35 
     36 #
     37 # General control (must appear above directive handlers).
     38 #
     39 
     40 # Clear out the preexisting output within a directive section.
     41 consumeUntil != "" {
     42     if (index($0, consumeUntil) != 0) {
     43         consumeUntil = "";
     44         print;
     45     }
     46 
     47     next;
     48 }
     49 
     50 # Detect directives.
     51 /BEGIN\([a-z-]*\)/ {
     52     i = match($0, /BEGIN\([a-z-]*\)/);
     53     emission = substr($0, i + 6, RLENGTH - 7);
     54     consumeUntil = "END(" emission ")";
     55     emissionHandled = 0;
     56 }
     57 
     58 # Most lines just get copied from the source as-is, including the start
     59 # comment for directives.
     60 {
     61     print;
     62 }
     63 
     64 #
     65 # Handlers for all of the directives.
     66 #
     67 
     68 emission == "opcodes" {
     69     emissionHandled = 1;
     70 
     71     for (i = 0; i <= MAX_OPCODE; i++) {
     72         if (isUnused(i) || isOptimized(i)) continue;
     73         printf("    public static final int %s = 0x%s;\n",
     74                constName[i], hex[i]);
     75     }
     76 }
     77 
     78 emission == "first-opcodes" {
     79     emissionHandled = 1;
     80 
     81     for (i = 0; i <= MAX_OPCODE; i++) {
     82         if (isUnused(i) || isOptimized(i)) continue;
     83         if (isFirst[i] == "true") {
     84             printf("    //     Opcodes.%s\n", constName[i]);
     85         }
     86     }
     87 }
     88 
     89 emission == "dops" {
     90     emissionHandled = 1;
     91 
     92     for (i = 0; i <= MAX_OPCODE; i++) {
     93         if (isUnused(i) || isOptimized(i)) continue;
     94 
     95         nextOp = nextOpcode[i];
     96         nextOp = (nextOp == -1) ? "NO_NEXT" : constName[nextOp];
     97 
     98         printf("    public static final Dop %s =\n" \
     99                "        new Dop(Opcodes.%s, Opcodes.%s,\n" \
    100                "            Opcodes.%s, Form%s.THE_ONE, %s);\n\n",
    101                constName[i], constName[i], family[i], nextOp, format[i],
    102                hasResult[i]);
    103     }
    104 }
    105 
    106 emission == "opcode-info-defs" {
    107     emissionHandled = 1;
    108 
    109     for (i = 0; i <= MAX_OPCODE; i++) {
    110         if (isUnused(i) || isOptimized(i)) continue;
    111 
    112         itype = toupper(indexType[i]);
    113         gsub(/-/, "_", itype);
    114 
    115         printf("    public static final Info %s =\n" \
    116                "        new Info(Opcodes.%s, \"%s\",\n" \
    117                "            InstructionCodec.FORMAT_%s, IndexType.%s);\n\n", \
    118                constName[i], constName[i], name[i], toupper(format[i]), itype);
    119     }
    120 }
    121 
    122 emission == "dops-init" || emission == "opcode-info-init" {
    123     emissionHandled = 1;
    124 
    125     for (i = 0; i <= MAX_OPCODE; i++) {
    126         if (isUnused(i) || isOptimized(i)) continue;
    127         printf("        set(%s);\n", constName[i]);
    128     }
    129 }
    130 
    131 emission == "libcore-opcodes" {
    132     emissionHandled = 1;
    133 
    134     for (i = 0; i <= MAX_OPCODE; i++) {
    135         if (isUnused(i) || isOptimized(i)) continue;
    136         printf("    int OP_%-28s = 0x%04x;\n", constName[i], i);
    137     }
    138 }
    139 
    140 emission == "libcore-maximum-values" {
    141     emissionHandled = 1;
    142 
    143     printf("        MAXIMUM_VALUE = %d;\n", MAX_OPCODE);
    144     printf("        MAXIMUM_PACKED_VALUE = %d;\n", MAX_PACKED_OPCODE);
    145 }
    146 
    147 emission == "libdex-maximum-values" {
    148     emissionHandled = 1;
    149 
    150     printf("#define kMaxOpcodeValue 0x%x\n", MAX_OPCODE);
    151     printf("#define kNumPackedOpcodes 0x%x\n", MAX_PACKED_OPCODE + 1);
    152 }
    153 
    154 emission == "libdex-opcode-enum" {
    155     emissionHandled = 1;
    156 
    157     for (i = 0; i <= MAX_PACKED_OPCODE; i++) {
    158         printf("    OP_%-28s = 0x%02x,\n", packedConstName[i], i);
    159     }
    160 }
    161 
    162 emission == "libdex-goto-table" {
    163     emissionHandled = 1;
    164 
    165     for (i = 0; i <= MAX_PACKED_OPCODE; i++) {
    166         content = sprintf("        H(OP_%s),", packedConstName[i]);
    167         printf("%-78s\\\n", content);
    168     }
    169 }
    170 
    171 emission == "libdex-opcode-names" {
    172     emissionHandled = 1;
    173 
    174     for (i = 0; i <= MAX_PACKED_OPCODE; i++) {
    175         printf("    \"%s\",\n", packedName[i]);
    176     }
    177 }
    178 
    179 emission == "libdex-widths" {
    180     emissionHandled = 1;
    181 
    182     col = 1;
    183     for (i = 0; i <= MAX_PACKED_OPCODE; i++) {
    184         value = sprintf("%d,", packedWidth[i]);
    185         col = colPrint(value, (i == MAX_PACKED_OPCODE), col, 16, 2, "    ");
    186     }
    187 }
    188 
    189 emission == "libdex-flags" {
    190     emissionHandled = 1;
    191 
    192     for (i = 0; i <= MAX_PACKED_OPCODE; i++) {
    193         value = flagsToC(packedFlags[i]);
    194         printf("    %s,\n", value);
    195     }
    196 }
    197 
    198 emission == "libdex-formats" {
    199     emissionHandled = 1;
    200 
    201     col = 1;
    202     for (i = 0; i <= MAX_PACKED_OPCODE; i++) {
    203         value = sprintf("kFmt%s,", packedFormat[i]);
    204         col = colPrint(value, (i == MAX_PACKED_OPCODE), col, 7, 9, "    ");
    205     }
    206 }
    207 
    208 emission == "libdex-index-types" {
    209     emissionHandled = 1;
    210 
    211     col = 1;
    212     for (i = 0; i <= MAX_PACKED_OPCODE; i++) {
    213         value = sprintf("%s,", indexTypeValues[packedIndexType[i]]);
    214         col = colPrint(value, (i == MAX_PACKED_OPCODE), col, 3, 19, "    ");
    215     }
    216 }
    217 
    218 # Handle the end of directive processing (must appear after the directive
    219 # clauses).
    220 emission != "" {
    221     if (!emissionHandled) {
    222         printf("WARNING: unknown tag \"%s\"\n", emission) >"/dev/stderr";
    223         consumeUntil = "";
    224     }
    225 
    226     emission = "";
    227 }
    228 
    229 #
    230 # Helper functions.
    231 #
    232 
    233 # Helper to print out an element in a multi-column fashion. It returns
    234 # the (one-based) column number that the next element will be printed
    235 # in.
    236 function colPrint(value, isLast, col, numCols, colWidth, linePrefix) {
    237     isLast = (isLast || (col == numCols));
    238     printf("%s%-*s%s",
    239         (col == 1) ? linePrefix : " ",
    240         isLast ? 1 : colWidth, value,
    241         isLast ? "\n" : "");
    242 
    243     return (col % numCols) + 1;
    244 }
    245 
    246 # Read the bytecode description file.
    247 function readBytecodes(i, parts, line, cmd, status, count) {
    248     # locals: parts, line, cmd, status, count
    249     for (;;) {
    250         # Read a line.
    251         status = getline line <bytecodeFile;
    252         if (status == 0) break;
    253         if (status < 0) {
    254             print "trouble reading bytecode file";
    255             exit 1;
    256         }
    257 
    258         # Clean up the line and extract the command.
    259         gsub(/  */, " ", line);
    260         sub(/ *#.*$/, "", line);
    261         sub(/ $/, "", line);
    262         sub(/^ /, "", line);
    263         count = split(line, parts);
    264         if (count == 0) continue; # Blank or comment line.
    265         cmd = parts[1];
    266         sub(/^[a-z][a-z]* */, "", line); # Remove the command from line.
    267 
    268         if (cmd == "op") {
    269             status = defineOpcode(line);
    270         } else if (cmd == "format") {
    271             status = defineFormat(line);
    272         } else {
    273             status = -1;
    274         }
    275 
    276         if (status != 0) {
    277             printf("syntax error on line: %s\n", line) >"/dev/stderr";
    278             return 1;
    279         }
    280     }
    281 
    282     return 0;
    283 }
    284 
    285 # Define an opcode.
    286 function defineOpcode(line, count, parts, idx) {
    287     # locals: count, parts, idx
    288     count = split(line, parts);
    289     if (count != 6)  return -1;
    290     idx = parseHex(parts[1]);
    291     if (idx < 0) return -1;
    292 
    293     # Extract directly specified values from the line.
    294     hex[idx] = parts[1];
    295     name[idx] = parts[2];
    296     format[idx] = parts[3];
    297     hasResult[idx] = (parts[4] == "n") ? "false" : "true";
    298     indexType[idx] = parts[5];
    299     flags[idx] = parts[6];
    300 
    301     # Calculate derived values.
    302 
    303     constName[idx] = toupper(name[idx]);
    304     gsub("[/-]", "_", constName[idx]);   # Dash and slash become underscore.
    305     gsub("[+^]", "", constName[idx]);    # Plus and caret are removed.
    306     split(name[idx], parts, "/");
    307 
    308     family[idx] = toupper(parts[1]);
    309     gsub("-", "_", family[idx]);         # Dash becomes underscore.
    310     gsub("[+^]", "", family[idx]);       # Plus and caret are removed.
    311 
    312     split(format[idx], parts, "");       # Width is the first format char.
    313     width[idx] = parts[1];
    314 
    315     # This association is used when computing "next" opcodes.
    316     familyFormat[family[idx],format[idx]] = idx;
    317 
    318     # Verify values.
    319 
    320     if (nextFormat[format[idx]] == "") {
    321         printf("unknown format: %s\n", format[idx]) >"/dev/stderr";
    322         return 1;
    323     }
    324 
    325     if (indexTypeValues[indexType[idx]] == "") {
    326         printf("unknown index type: %s\n", indexType[idx]) >"/dev/stderr";
    327         return 1;
    328     }
    329 
    330     if (flagsToC(flags[idx]) == "") {
    331         printf("bogus flags: %s\n", flags[idx]) >"/dev/stderr";
    332         return 1;
    333     }
    334 
    335     return 0;
    336 }
    337 
    338 # Define a format family.
    339 function defineFormat(line, count, parts, i) {
    340     # locals: count, parts, i
    341     count = split(line, parts);
    342     if (count < 1)  return -1;
    343     formats[parts[1]] = line;
    344 
    345     parts[count + 1] = "none";
    346     for (i = 1; i <= count; i++) {
    347         nextFormat[parts[i]] = parts[i + 1];
    348     }
    349 
    350     return 0;
    351 }
    352 
    353 # Produce the nextOpcode and isFirst arrays. The former indicates, for
    354 # each opcode, which one should be tried next when doing instruction
    355 # fitting. The latter indicates which opcodes are at the head of an
    356 # instruction fitting chain.
    357 function deriveOpcodeChains(i, op) {
    358     # locals: i, op
    359 
    360     for (i = 0; i <= MAX_OPCODE; i++) {
    361         if (isUnused(i)) continue;
    362         isFirst[i] = "true";
    363     }
    364 
    365     for (i = 0; i <= MAX_OPCODE; i++) {
    366         if (isUnused(i)) continue;
    367         op = findNextOpcode(i);
    368         nextOpcode[i] = op;
    369         if (op != -1) {
    370             isFirst[op] = "false";
    371         }
    372     }
    373 }
    374 
    375 # Given an opcode by index, find the next opcode in the same family
    376 # (that is, with the same base name) to try when matching instructions
    377 # to opcodes. This simply walks the nextFormat chain looking for a
    378 # match. This returns the index of the matching opcode or -1 if there
    379 # is none.
    380 function findNextOpcode(idx, fam, fmt, result) {
    381     # locals: fam, fmt, result
    382     fam = family[idx];
    383     fmt = format[idx];
    384 
    385     # Not every opcode has a version with every possible format, so
    386     # we have to iterate down the chain until we find one or run out of
    387     # formats to try.
    388     for (fmt = nextFormat[format[idx]]; fmt != "none"; fmt = nextFormat[fmt]) {
    389         result = familyFormat[fam,fmt];
    390         if (result != "") {
    391             return result;
    392         }
    393     }
    394 
    395     return -1;
    396 }
    397 
    398 # Construct the tables of info indexed by packed opcode. The packed opcode
    399 # values are in the range 0-0x1ff, whereas the unpacked opcodes sparsely
    400 # span the range 0-0xffff.
    401 function createPackedTables(i, op) {
    402     # locals: i, op
    403     for (i = 0; i <= MAX_PACKED_OPCODE; i++) {
    404         op = unpackOpcode(i);
    405         if (isUnused(op)) {
    406             packedName[i]      = unusedName(op);
    407             packedConstName[i] = unusedConstName(op);
    408             packedFormat[i]    = "00x";
    409             packedFlags[i]     = 0;
    410             packedWidth[i]     = 0;
    411             packedIndexType[i] = "unknown";
    412         } else {
    413             packedName[i]      = name[op];
    414             packedConstName[i] = constName[op];
    415             packedFormat[i]    = format[op];
    416             packedFlags[i]     = flags[op];
    417             packedWidth[i]     = width[op];
    418             packedIndexType[i] = indexType[op];
    419         }
    420     }
    421 }
    422 
    423 # Given a packed opcode, returns the raw (unpacked) opcode value.
    424 function unpackOpcode(idx) {
    425     # Note: This must be the inverse of the corresponding code in
    426     # libdex/DexOpcodes.h.
    427     if (idx <= 255) {
    428         return idx;
    429     } else {
    430         idx -= 256;
    431         return (idx * 256) + 255;
    432     }
    433 }
    434 
    435 # Returns the "unused" name of the given opcode (by index).
    436 # That is, this is the human-oriented name to use for an opcode
    437 # definition in cases
    438 # where the opcode isn't used.
    439 function unusedName(idx) {
    440     if (idx <= 255) {
    441          return sprintf("unused-%02x", idx);
    442     } else {
    443          return sprintf("unused-%04x", idx);
    444     }
    445 }
    446 
    447 # Returns the "unused" constant name of the given opcode (by index).
    448 # That is, this is the name to use for a constant definition in cases
    449 # where the opcode isn't used.
    450 function unusedConstName(idx) {
    451     if (idx <= 255) {
    452          return toupper(sprintf("UNUSED_%02x", idx));
    453     } else {
    454          return toupper(sprintf("UNUSED_%04x", idx));
    455     }
    456 }
    457 
    458 # Convert a hex value to an int.
    459 function parseHex(hex, result, chars, count, c, i) {
    460     # locals: result, chars, count, c, i
    461     hex = tolower(hex);
    462     count = split(hex, chars, "");
    463     result = 0;
    464     for (i = 1; i <= count; i++) {
    465         c = index("0123456789abcdef", chars[i]);
    466         if (c == 0) {
    467             printf("bogus hex value: %s\n", hex) >"/dev/stderr";
    468             return -1;
    469         }
    470         result = (result * 16) + c - 1;
    471     }
    472     return result;
    473 }
    474 
    475 # Initialize the indexTypes data.
    476 function initIndexTypes() {
    477     indexTypeValues["unknown"]              = "kIndexUnknown";
    478     indexTypeValues["none"]                 = "kIndexNone";
    479     indexTypeValues["varies"]               = "kIndexVaries";
    480     indexTypeValues["type-ref"]             = "kIndexTypeRef";
    481     indexTypeValues["string-ref"]           = "kIndexStringRef";
    482     indexTypeValues["method-ref"]           = "kIndexMethodRef";
    483     indexTypeValues["field-ref"]            = "kIndexFieldRef";
    484     indexTypeValues["inline-method"]        = "kIndexInlineMethod";
    485     indexTypeValues["vtable-offset"]        = "kIndexVtableOffset";
    486     indexTypeValues["field-offset"]         = "kIndexFieldOffset";
    487     indexTypeValues["method-and-proto-ref"] = "kIndexMethodAndProtoRef";
    488     indexTypeValues["call-site-ref"]        = "kCallSiteRef";
    489 }
    490 
    491 # Initialize the flags data.
    492 function initFlags() {
    493     flagValues["branch"]        = "kInstrCanBranch";
    494     flagValues["continue"]      = "kInstrCanContinue";
    495     flagValues["switch"]        = "kInstrCanSwitch";
    496     flagValues["throw"]         = "kInstrCanThrow";
    497     flagValues["return"]        = "kInstrCanReturn";
    498     flagValues["invoke"]        = "kInstrInvoke";
    499     flagValues["optimized"]     = "0"; # Not represented in C output
    500     flagValues["0"]             = "0";
    501 }
    502 
    503 # Translate the given flags into the equivalent C expression. Returns
    504 # "" on error.
    505 function flagsToC(f, parts, result, i) {
    506     # locals: parts, result, i
    507     count = split(f, parts, /\|/); # Split input at pipe characters.
    508     result = "0";
    509 
    510     for (i = 1; i <= count; i++) {
    511         f = flagValues[parts[i]];
    512         if (f == "") {
    513             printf("bogus flag: %s\n", f) >"/dev/stderr";
    514             return ""; # Bogus flag name.
    515         } else if (f == "0") {
    516             # Nothing to append for this case.
    517         } else if (result == "0") {
    518             result = f;
    519         } else {
    520             result = result "|" f;
    521         }
    522     }
    523 
    524     return result;
    525 }
    526 
    527 # Returns true if the given opcode (by index) is an "optimized" opcode.
    528 function isOptimized(idx, parts, f) {
    529     # locals: parts, f
    530     split(flags[idx], parts, /\|/); # Split flags[idx] at pipes.
    531     for (f in parts) {
    532         if (parts[f] == "optimized") return 1;
    533     }
    534     return 0;
    535 }
    536 
    537 # Returns true if there is no definition for the given opcode (by index).
    538 function isUnused(idx) {
    539     return (name[idx] == "");
    540 }
    541