Home | History | Annotate | Download | only in opcode-gen
      1 # Copyright (C) 2007 The Android Open Source Project
      2 #
      3 # Licensed under the Apache License, Version 2.0 (the "License");
      4 # you may not use this file except in compliance with the License.
      5 # You may obtain a copy of the License at
      6 #
      7 #     http://www.apache.org/licenses/LICENSE-2.0
      8 #
      9 # Unless required by applicable law or agreed to in writing, software
     10 # distributed under the License is distributed on an "AS IS" BASIS,
     11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 # See the License for the specific language governing permissions and
     13 # limitations under the License.
     14 
     15 #
     16 # Awk helper script for opcode-gen.
     17 #
     18 
     19 #
     20 # Initialization.
     21 #
     22 
     23 BEGIN {
     24     MAX_OPCODE = 65535;
     25     MAX_PACKED_OPCODE = 511;
     26     initIndexTypes();
     27     initFlags();
     28     if (readBytecodes()) exit 1;
     29     deriveOpcodeChains();
     30     createPackedTables();
     31     consumeUntil = "";
     32     emission = "";
     33 }
     34 
     35 #
     36 # General control (must appear above directive handlers).
     37 #
     38 
     39 # Clear out the preexisting output within a directive section.
     40 consumeUntil != "" {
     41     if (index($0, consumeUntil) != 0) {
     42         consumeUntil = "";
     43         print;
     44     }
     45 
     46     next;
     47 }
     48 
     49 # Detect directives.
     50 /BEGIN\([a-z-]*\)/ {
     51     i = match($0, /BEGIN\([a-z-]*\)/);
     52     emission = substr($0, i + 6, RLENGTH - 7);
     53     consumeUntil = "END(" emission ")";
     54     emissionHandled = 0;
     55 }
     56 
     57 # Most lines just get copied from the source as-is, including the start
     58 # comment for directives.
     59 {
     60     print;
     61 }
     62 
     63 #
     64 # Handlers for all of the directives.
     65 #
     66 
     67 emission == "opcodes" {
     68     emissionHandled = 1;
     69 
     70     for (i = 0; i <= MAX_OPCODE; i++) {
     71         if (isUnused(i) || isOptimized(i)) continue;
     72         printf("    public static final int %s = 0x%s;\n",
     73                constName[i], hex[i]);
     74     }
     75 }
     76 
     77 emission == "first-opcodes" {
     78     emissionHandled = 1;
     79 
     80     for (i = 0; i <= MAX_OPCODE; i++) {
     81         if (isUnused(i) || isOptimized(i)) continue;
     82         if (isFirst[i] == "true") {
     83             printf("    //     Opcodes.%s\n", constName[i]);
     84         }
     85     }
     86 }
     87 
     88 emission == "dops" {
     89     emissionHandled = 1;
     90 
     91     for (i = 0; i <= MAX_OPCODE; i++) {
     92         if (isUnused(i) || isOptimized(i)) continue;
     93 
     94         nextOp = nextOpcode[i];
     95         nextOp = (nextOp == -1) ? "NO_NEXT" : constName[nextOp];
     96 
     97         printf("    public static final Dop %s =\n" \
     98                "        new Dop(Opcodes.%s, Opcodes.%s,\n" \
     99                "            Opcodes.%s, Form%s.THE_ONE, %s);\n\n",
    100                constName[i], constName[i], family[i], nextOp, format[i],
    101                hasResult[i]);
    102     }
    103 }
    104 
    105 emission == "opcode-info-defs" {
    106     emissionHandled = 1;
    107 
    108     for (i = 0; i <= MAX_OPCODE; i++) {
    109         if (isUnused(i) || isOptimized(i)) continue;
    110 
    111         itype = toupper(indexType[i]);
    112         gsub(/-/, "_", itype);
    113 
    114         printf("    public static final Info %s =\n" \
    115                "        new Info(Opcodes.%s, \"%s\",\n" \
    116                "            InstructionCodec.FORMAT_%s, IndexType.%s);\n\n", \
    117                constName[i], constName[i], name[i], toupper(format[i]), itype);
    118     }
    119 }
    120 
    121 emission == "dops-init" || emission == "opcode-info-init" {
    122     emissionHandled = 1;
    123 
    124     for (i = 0; i <= MAX_OPCODE; i++) {
    125         if (isUnused(i) || isOptimized(i)) continue;
    126         printf("        set(%s);\n", constName[i]);
    127     }
    128 }
    129 
    130 emission == "libcore-opcodes" {
    131     emissionHandled = 1;
    132 
    133     for (i = 0; i <= MAX_OPCODE; i++) {
    134         if (isUnused(i) || isOptimized(i)) continue;
    135         printf("    int OP_%-28s = 0x%04x;\n", constName[i], i);
    136     }
    137 }
    138 
    139 emission == "libcore-maximum-values" {
    140     emissionHandled = 1;
    141 
    142     printf("        MAXIMUM_VALUE = %d;\n", MAX_OPCODE);
    143     printf("        MAXIMUM_PACKED_VALUE = %d;\n", MAX_PACKED_OPCODE);
    144 }
    145 
    146 emission == "libdex-maximum-values" {
    147     emissionHandled = 1;
    148 
    149     printf("#define kMaxOpcodeValue 0x%x\n", MAX_OPCODE);
    150     printf("#define kNumPackedOpcodes 0x%x\n", MAX_PACKED_OPCODE + 1);
    151 }
    152 
    153 emission == "libdex-opcode-enum" {
    154     emissionHandled = 1;
    155 
    156     for (i = 0; i <= MAX_PACKED_OPCODE; i++) {
    157         printf("    OP_%-28s = 0x%02x,\n", packedConstName[i], i);
    158     }
    159 }
    160 
    161 emission == "libdex-goto-table" {
    162     emissionHandled = 1;
    163 
    164     for (i = 0; i <= MAX_PACKED_OPCODE; i++) {
    165         content = sprintf("        H(OP_%s),", packedConstName[i]);
    166         printf("%-78s\\\n", content);
    167     }
    168 }
    169 
    170 emission == "libdex-opcode-names" {
    171     emissionHandled = 1;
    172 
    173     for (i = 0; i <= MAX_PACKED_OPCODE; i++) {
    174         printf("    \"%s\",\n", packedName[i]);
    175     }
    176 }
    177 
    178 emission == "libdex-widths" {
    179     emissionHandled = 1;
    180 
    181     col = 1;
    182     for (i = 0; i <= MAX_PACKED_OPCODE; i++) {
    183         value = sprintf("%d,", packedWidth[i]);
    184         col = colPrint(value, (i == MAX_PACKED_OPCODE), col, 16, 2, "    ");
    185     }
    186 }
    187 
    188 emission == "libdex-flags" {
    189     emissionHandled = 1;
    190 
    191     for (i = 0; i <= MAX_PACKED_OPCODE; i++) {
    192         value = flagsToC(packedFlags[i]);
    193         printf("    %s,\n", value);
    194     }
    195 }
    196 
    197 emission == "libdex-formats" {
    198     emissionHandled = 1;
    199 
    200     col = 1;
    201     for (i = 0; i <= MAX_PACKED_OPCODE; i++) {
    202         value = sprintf("kFmt%s,", packedFormat[i]);
    203         col = colPrint(value, (i == MAX_PACKED_OPCODE), col, 7, 9, "    ");
    204     }
    205 }
    206 
    207 emission == "libdex-index-types" {
    208     emissionHandled = 1;
    209 
    210     col = 1;
    211     for (i = 0; i <= MAX_PACKED_OPCODE; i++) {
    212         value = sprintf("%s,", indexTypeValues[packedIndexType[i]]);
    213         col = colPrint(value, (i == MAX_PACKED_OPCODE), col, 3, 19, "    ");
    214     }
    215 }
    216 
    217 # Handle the end of directive processing (must appear after the directive
    218 # clauses).
    219 emission != "" {
    220     if (!emissionHandled) {
    221         printf("WARNING: unknown tag \"%s\"\n", emission) >"/dev/stderr";
    222         consumeUntil = "";
    223     }
    224 
    225     emission = "";
    226 }
    227 
    228 #
    229 # Helper functions.
    230 #
    231 
    232 # Helper to print out an element in a multi-column fashion. It returns
    233 # the (one-based) column number that the next element will be printed
    234 # in.
    235 function colPrint(value, isLast, col, numCols, colWidth, linePrefix) {
    236     isLast = (isLast || (col == numCols));
    237     printf("%s%-*s%s",
    238         (col == 1) ? linePrefix : " ",
    239         isLast ? 1 : colWidth, value,
    240         isLast ? "\n" : "");
    241 
    242     return (col % numCols) + 1;
    243 }
    244 
    245 # Read the bytecode description file.
    246 function readBytecodes(i, parts, line, cmd, status, count) {
    247     # locals: parts, line, cmd, status, count
    248     for (;;) {
    249         # Read a line.
    250         status = getline line <bytecodeFile;
    251         if (status == 0) break;
    252         if (status < 0) {
    253             print "trouble reading bytecode file";
    254             exit 1;
    255         }
    256 
    257         # Clean up the line and extract the command.
    258         gsub(/  */, " ", line);
    259         sub(/ *#.*$/, "", line);
    260         sub(/ $/, "", line);
    261         sub(/^ /, "", line);
    262         count = split(line, parts);
    263         if (count == 0) continue; # Blank or comment line.
    264         cmd = parts[1];
    265         sub(/^[a-z][a-z]* */, "", line); # Remove the command from line.
    266 
    267         if (cmd == "op") {
    268             status = defineOpcode(line);
    269         } else if (cmd == "format") {
    270             status = defineFormat(line);
    271         } else {
    272             status = -1;
    273         }
    274 
    275         if (status != 0) {
    276             printf("syntax error on line: %s\n", line) >"/dev/stderr";
    277             return 1;
    278         }
    279     }
    280 
    281     return 0;
    282 }
    283 
    284 # Define an opcode.
    285 function defineOpcode(line, count, parts, idx) {
    286     # locals: count, parts, idx
    287     count = split(line, parts);
    288     if (count != 6)  return -1;
    289     idx = parseHex(parts[1]);
    290     if (idx < 0) return -1;
    291 
    292     # Extract directly specified values from the line.
    293     hex[idx] = parts[1];
    294     name[idx] = parts[2];
    295     format[idx] = parts[3];
    296     hasResult[idx] = (parts[4] == "n") ? "false" : "true";
    297     indexType[idx] = parts[5];
    298     flags[idx] = parts[6];
    299 
    300     # Calculate derived values.
    301 
    302     constName[idx] = toupper(name[idx]);
    303     gsub("[/-]", "_", constName[idx]);   # Dash and slash become underscore.
    304     gsub("[+^]", "", constName[idx]);    # Plus and caret are removed.
    305     split(name[idx], parts, "/");
    306 
    307     family[idx] = toupper(parts[1]);
    308     gsub("-", "_", family[idx]);         # Dash becomes underscore.
    309     gsub("[+^]", "", family[idx]);       # Plus and caret are removed.
    310 
    311     split(format[idx], parts, "");       # Width is the first format char.
    312     width[idx] = parts[1];
    313 
    314     # This association is used when computing "next" opcodes.
    315     familyFormat[family[idx],format[idx]] = idx;
    316 
    317     # Verify values.
    318 
    319     if (nextFormat[format[idx]] == "") {
    320         printf("unknown format: %s\n", format[idx]) >"/dev/stderr";
    321         return 1;
    322     }
    323 
    324     if (indexTypeValues[indexType[idx]] == "") {
    325         printf("unknown index type: %s\n", indexType[idx]) >"/dev/stderr";
    326         return 1;
    327     }
    328 
    329     if (flagsToC(flags[idx]) == "") {
    330         printf("bogus flags: %s\n", flags[idx]) >"/dev/stderr";
    331         return 1;
    332     }
    333 
    334     return 0;
    335 }
    336 
    337 # Define a format family.
    338 function defineFormat(line, count, parts, i) {
    339     # locals: count, parts, i
    340     count = split(line, parts);
    341     if (count < 1)  return -1;
    342     formats[parts[1]] = line;
    343 
    344     parts[count + 1] = "none";
    345     for (i = 1; i <= count; i++) {
    346         nextFormat[parts[i]] = parts[i + 1];
    347     }
    348 
    349     return 0;
    350 }
    351 
    352 # Produce the nextOpcode and isFirst arrays. The former indicates, for
    353 # each opcode, which one should be tried next when doing instruction
    354 # fitting. The latter indicates which opcodes are at the head of an
    355 # instruction fitting chain.
    356 function deriveOpcodeChains(i, op) {
    357     # locals: i, op
    358 
    359     for (i = 0; i <= MAX_OPCODE; i++) {
    360         if (isUnused(i)) continue;
    361         isFirst[i] = "true";
    362     }
    363 
    364     for (i = 0; i <= MAX_OPCODE; i++) {
    365         if (isUnused(i)) continue;
    366         op = findNextOpcode(i);
    367         nextOpcode[i] = op;
    368         if (op != -1) {
    369             isFirst[op] = "false";
    370         }
    371     }
    372 }
    373 
    374 # Given an opcode by index, find the next opcode in the same family
    375 # (that is, with the same base name) to try when matching instructions
    376 # to opcodes. This simply walks the nextFormat chain looking for a
    377 # match. This returns the index of the matching opcode or -1 if there
    378 # is none.
    379 function findNextOpcode(idx, fam, fmt, result) {
    380     # locals: fam, fmt, result
    381     fam = family[idx];
    382     fmt = format[idx];
    383 
    384     # Not every opcode has a version with every possible format, so
    385     # we have to iterate down the chain until we find one or run out of
    386     # formats to try.
    387     for (fmt = nextFormat[format[idx]]; fmt != "none"; fmt = nextFormat[fmt]) {
    388         result = familyFormat[fam,fmt];
    389         if (result != "") {
    390             return result;
    391         }
    392     }
    393 
    394     return -1;
    395 }
    396 
    397 # Construct the tables of info indexed by packed opcode. The packed opcode
    398 # values are in the range 0-0x1ff, whereas the unpacked opcodes sparsely
    399 # span the range 0-0xffff.
    400 function createPackedTables(i, op) {
    401     # locals: i, op
    402     for (i = 0; i <= MAX_PACKED_OPCODE; i++) {
    403         op = unpackOpcode(i);
    404         if (i == 255) {
    405             # Special case: This is the low-opcode slot for a would-be
    406             # extended opcode dispatch implementation.
    407             packedName[i]      = "dispatch-ff";
    408             packedConstName[i] = "DISPATCH_FF";
    409             packedFormat[i]    = "00x";
    410             packedFlags[i]     = 0;
    411             packedWidth[i]     = 0;
    412             packedIndexType[i] = "unknown";
    413         } else if (isUnused(op)) {
    414             packedName[i]      = unusedName(op);
    415             packedConstName[i] = unusedConstName(op);
    416             packedFormat[i]    = "00x";
    417             packedFlags[i]     = 0;
    418             packedWidth[i]     = 0;
    419             packedIndexType[i] = "unknown";
    420         } else {
    421             packedName[i]      = name[op];
    422             packedConstName[i] = constName[op];
    423             packedFormat[i]    = format[op];
    424             packedFlags[i]     = flags[op];
    425             packedWidth[i]     = width[op];
    426             packedIndexType[i] = indexType[op];
    427         }
    428     }
    429 }
    430 
    431 # Given a packed opcode, returns the raw (unpacked) opcode value.
    432 function unpackOpcode(idx) {
    433     # Note: This must be the inverse of the corresponding code in
    434     # libdex/DexOpcodes.h.
    435     if (idx <= 255) {
    436         return idx;
    437     } else {
    438         idx -= 256;
    439         return (idx * 256) + 255;
    440     }
    441 }
    442 
    443 # Returns the "unused" name of the given opcode (by index).
    444 # That is, this is the human-oriented name to use for an opcode
    445 # definition in cases
    446 # where the opcode isn't used.
    447 function unusedName(idx) {
    448     if (idx <= 255) {
    449          return sprintf("unused-%02x", idx);
    450     } else {
    451          return sprintf("unused-%04x", idx);
    452     }
    453 }
    454 
    455 # Returns the "unused" constant name of the given opcode (by index).
    456 # That is, this is the name to use for a constant definition in cases
    457 # where the opcode isn't used.
    458 function unusedConstName(idx) {
    459     if (idx <= 255) {
    460          return toupper(sprintf("UNUSED_%02x", idx));
    461     } else {
    462          return toupper(sprintf("UNUSED_%04x", idx));
    463     }
    464 }
    465 
    466 # Convert a hex value to an int.
    467 function parseHex(hex, result, chars, count, c, i) {
    468     # locals: result, chars, count, c, i
    469     hex = tolower(hex);
    470     count = split(hex, chars, "");
    471     result = 0;
    472     for (i = 1; i <= count; i++) {
    473         c = index("0123456789abcdef", chars[i]);
    474         if (c == 0) {
    475             printf("bogus hex value: %s\n", hex) >"/dev/stderr";
    476             return -1;
    477         }
    478         result = (result * 16) + c - 1;
    479     }
    480     return result;
    481 }
    482 
    483 # Initialize the indexTypes data.
    484 function initIndexTypes() {
    485     indexTypeValues["unknown"]       = "kIndexUnknown";
    486     indexTypeValues["none"]          = "kIndexNone";
    487     indexTypeValues["varies"]        = "kIndexVaries";
    488     indexTypeValues["type-ref"]      = "kIndexTypeRef";
    489     indexTypeValues["string-ref"]    = "kIndexStringRef";
    490     indexTypeValues["method-ref"]    = "kIndexMethodRef";
    491     indexTypeValues["field-ref"]     = "kIndexFieldRef";
    492     indexTypeValues["inline-method"] = "kIndexInlineMethod";
    493     indexTypeValues["vtable-offset"] = "kIndexVtableOffset";
    494     indexTypeValues["field-offset"]  = "kIndexFieldOffset";
    495 }
    496 
    497 # Initialize the flags data.
    498 function initFlags() {
    499     flagValues["branch"]        = "kInstrCanBranch";
    500     flagValues["continue"]      = "kInstrCanContinue";
    501     flagValues["switch"]        = "kInstrCanSwitch";
    502     flagValues["throw"]         = "kInstrCanThrow";
    503     flagValues["return"]        = "kInstrCanReturn";
    504     flagValues["invoke"]        = "kInstrInvoke";
    505     flagValues["optimized"]     = "0"; # Not represented in C output
    506     flagValues["0"]             = "0";
    507 }
    508 
    509 # Translate the given flags into the equivalent C expression. Returns
    510 # "" on error.
    511 function flagsToC(f, parts, result, i) {
    512     # locals: parts, result, i
    513     count = split(f, parts, /\|/); # Split input at pipe characters.
    514     result = "0";
    515 
    516     for (i = 1; i <= count; i++) {
    517         f = flagValues[parts[i]];
    518         if (f == "") {
    519             printf("bogus flag: %s\n", f) >"/dev/stderr";
    520             return ""; # Bogus flag name.
    521         } else if (f == "0") {
    522             # Nothing to append for this case.
    523         } else if (result == "0") {
    524             result = f;
    525         } else {
    526             result = result "|" f;
    527         }
    528     }
    529 
    530     return result;
    531 }
    532 
    533 # Returns true if the given opcode (by index) is an "optimized" opcode.
    534 function isOptimized(idx, parts, f) {
    535     # locals: parts, f
    536     split(flags[idx], parts, /\|/); # Split flags[idx] at pipes.
    537     for (f in parts) {
    538         if (parts[f] == "optimized") return 1;
    539     }
    540     return 0;
    541 }
    542 
    543 # Returns true if there is no definition for the given opcode (by index).
    544 function isUnused(idx) {
    545     return (name[idx] == "");
    546 }
    547