1 # Copyright (C) 2007 The Android Open Source Project 2 # 3 # Licensed under the Apache License, Version 2.0 (the "License"); 4 # you may not use this file except in compliance with the License. 5 # You may obtain a copy of the License at 6 # 7 # http://www.apache.org/licenses/LICENSE-2.0 8 # 9 # Unless required by applicable law or agreed to in writing, software 10 # distributed under the License is distributed on an "AS IS" BASIS, 11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 # See the License for the specific language governing permissions and 13 # limitations under the License. 14 15 # 16 # Awk helper script for opcode-gen. 17 # 18 19 # 20 # Initialization. 21 # 22 23 BEGIN { 24 MAX_OPCODE = 65535; 25 MAX_PACKED_OPCODE = 511; 26 MAX_PACKED_OPCODE = 255; # TODO: Not for long! 27 initIndexTypes(); 28 initFlags(); 29 if (readBytecodes()) exit 1; 30 deriveOpcodeChains(); 31 createPackedTables(); 32 consumeUntil = ""; 33 emission = ""; 34 } 35 36 # 37 # General control (must appear above directive handlers). 38 # 39 40 # Clear out the preexisting output within a directive section. 41 consumeUntil != "" { 42 if (index($0, consumeUntil) != 0) { 43 consumeUntil = ""; 44 print; 45 } 46 47 next; 48 } 49 50 # Detect directives. 51 /BEGIN\([a-z-]*\)/ { 52 i = match($0, /BEGIN\([a-z-]*\)/); 53 emission = substr($0, i + 6, RLENGTH - 7); 54 consumeUntil = "END(" emission ")"; 55 emissionHandled = 0; 56 } 57 58 # Most lines just get copied from the source as-is, including the start 59 # comment for directives. 60 { 61 print; 62 } 63 64 # 65 # Handlers for all of the directives. 66 # 67 68 emission == "opcodes" { 69 emissionHandled = 1; 70 71 for (i = 0; i <= MAX_OPCODE; i++) { 72 if (isUnused(i) || isOptimized(i)) continue; 73 printf(" public static final int %s = 0x%s;\n", 74 constName[i], hex[i]); 75 } 76 } 77 78 emission == "first-opcodes" { 79 emissionHandled = 1; 80 81 for (i = 0; i <= MAX_OPCODE; i++) { 82 if (isUnused(i) || isOptimized(i)) continue; 83 if (isFirst[i] == "true") { 84 printf(" // Opcodes.%s\n", constName[i]); 85 } 86 } 87 } 88 89 emission == "dops" { 90 emissionHandled = 1; 91 92 for (i = 0; i <= MAX_OPCODE; i++) { 93 if (isUnused(i) || isOptimized(i)) continue; 94 95 nextOp = nextOpcode[i]; 96 nextOp = (nextOp == -1) ? "NO_NEXT" : constName[nextOp]; 97 98 printf(" public static final Dop %s =\n" \ 99 " new Dop(Opcodes.%s, Opcodes.%s,\n" \ 100 " Opcodes.%s, Form%s.THE_ONE, %s);\n\n", 101 constName[i], constName[i], family[i], nextOp, format[i], 102 hasResult[i]); 103 } 104 } 105 106 emission == "opcode-info-defs" { 107 emissionHandled = 1; 108 109 for (i = 0; i <= MAX_OPCODE; i++) { 110 if (isUnused(i) || isOptimized(i)) continue; 111 112 itype = toupper(indexType[i]); 113 gsub(/-/, "_", itype); 114 115 printf(" public static final Info %s =\n" \ 116 " new Info(Opcodes.%s, \"%s\",\n" \ 117 " InstructionCodec.FORMAT_%s, IndexType.%s);\n\n", \ 118 constName[i], constName[i], name[i], toupper(format[i]), itype); 119 } 120 } 121 122 emission == "dops-init" || emission == "opcode-info-init" { 123 emissionHandled = 1; 124 125 for (i = 0; i <= MAX_OPCODE; i++) { 126 if (isUnused(i) || isOptimized(i)) continue; 127 printf(" set(%s);\n", constName[i]); 128 } 129 } 130 131 emission == "libcore-opcodes" { 132 emissionHandled = 1; 133 134 for (i = 0; i <= MAX_OPCODE; i++) { 135 if (isUnused(i) || isOptimized(i)) continue; 136 printf(" int OP_%-28s = 0x%04x;\n", constName[i], i); 137 } 138 } 139 140 emission == "libcore-maximum-values" { 141 emissionHandled = 1; 142 143 printf(" MAXIMUM_VALUE = %d;\n", MAX_OPCODE); 144 printf(" MAXIMUM_PACKED_VALUE = %d;\n", MAX_PACKED_OPCODE); 145 } 146 147 emission == "libdex-maximum-values" { 148 emissionHandled = 1; 149 150 printf("#define kMaxOpcodeValue 0x%x\n", MAX_OPCODE); 151 printf("#define kNumPackedOpcodes 0x%x\n", MAX_PACKED_OPCODE + 1); 152 } 153 154 emission == "libdex-opcode-enum" { 155 emissionHandled = 1; 156 157 for (i = 0; i <= MAX_PACKED_OPCODE; i++) { 158 printf(" OP_%-28s = 0x%02x,\n", packedConstName[i], i); 159 } 160 } 161 162 emission == "libdex-goto-table" { 163 emissionHandled = 1; 164 165 for (i = 0; i <= MAX_PACKED_OPCODE; i++) { 166 content = sprintf(" H(OP_%s),", packedConstName[i]); 167 printf("%-78s\\\n", content); 168 } 169 } 170 171 emission == "libdex-opcode-names" { 172 emissionHandled = 1; 173 174 for (i = 0; i <= MAX_PACKED_OPCODE; i++) { 175 printf(" \"%s\",\n", packedName[i]); 176 } 177 } 178 179 emission == "libdex-widths" { 180 emissionHandled = 1; 181 182 col = 1; 183 for (i = 0; i <= MAX_PACKED_OPCODE; i++) { 184 value = sprintf("%d,", packedWidth[i]); 185 col = colPrint(value, (i == MAX_PACKED_OPCODE), col, 16, 2, " "); 186 } 187 } 188 189 emission == "libdex-flags" { 190 emissionHandled = 1; 191 192 for (i = 0; i <= MAX_PACKED_OPCODE; i++) { 193 value = flagsToC(packedFlags[i]); 194 printf(" %s,\n", value); 195 } 196 } 197 198 emission == "libdex-formats" { 199 emissionHandled = 1; 200 201 col = 1; 202 for (i = 0; i <= MAX_PACKED_OPCODE; i++) { 203 value = sprintf("kFmt%s,", packedFormat[i]); 204 col = colPrint(value, (i == MAX_PACKED_OPCODE), col, 7, 9, " "); 205 } 206 } 207 208 emission == "libdex-index-types" { 209 emissionHandled = 1; 210 211 col = 1; 212 for (i = 0; i <= MAX_PACKED_OPCODE; i++) { 213 value = sprintf("%s,", indexTypeValues[packedIndexType[i]]); 214 col = colPrint(value, (i == MAX_PACKED_OPCODE), col, 3, 19, " "); 215 } 216 } 217 218 # Handle the end of directive processing (must appear after the directive 219 # clauses). 220 emission != "" { 221 if (!emissionHandled) { 222 printf("WARNING: unknown tag \"%s\"\n", emission) >"/dev/stderr"; 223 consumeUntil = ""; 224 } 225 226 emission = ""; 227 } 228 229 # 230 # Helper functions. 231 # 232 233 # Helper to print out an element in a multi-column fashion. It returns 234 # the (one-based) column number that the next element will be printed 235 # in. 236 function colPrint(value, isLast, col, numCols, colWidth, linePrefix) { 237 isLast = (isLast || (col == numCols)); 238 printf("%s%-*s%s", 239 (col == 1) ? linePrefix : " ", 240 isLast ? 1 : colWidth, value, 241 isLast ? "\n" : ""); 242 243 return (col % numCols) + 1; 244 } 245 246 # Read the bytecode description file. 247 function readBytecodes(i, parts, line, cmd, status, count) { 248 # locals: parts, line, cmd, status, count 249 for (;;) { 250 # Read a line. 251 status = getline line <bytecodeFile; 252 if (status == 0) break; 253 if (status < 0) { 254 print "trouble reading bytecode file"; 255 exit 1; 256 } 257 258 # Clean up the line and extract the command. 259 gsub(/ */, " ", line); 260 sub(/ *#.*$/, "", line); 261 sub(/ $/, "", line); 262 sub(/^ /, "", line); 263 count = split(line, parts); 264 if (count == 0) continue; # Blank or comment line. 265 cmd = parts[1]; 266 sub(/^[a-z][a-z]* */, "", line); # Remove the command from line. 267 268 if (cmd == "op") { 269 status = defineOpcode(line); 270 } else if (cmd == "format") { 271 status = defineFormat(line); 272 } else { 273 status = -1; 274 } 275 276 if (status != 0) { 277 printf("syntax error on line: %s\n", line) >"/dev/stderr"; 278 return 1; 279 } 280 } 281 282 return 0; 283 } 284 285 # Define an opcode. 286 function defineOpcode(line, count, parts, idx) { 287 # locals: count, parts, idx 288 count = split(line, parts); 289 if (count != 6) return -1; 290 idx = parseHex(parts[1]); 291 if (idx < 0) return -1; 292 293 # Extract directly specified values from the line. 294 hex[idx] = parts[1]; 295 name[idx] = parts[2]; 296 format[idx] = parts[3]; 297 hasResult[idx] = (parts[4] == "n") ? "false" : "true"; 298 indexType[idx] = parts[5]; 299 flags[idx] = parts[6]; 300 301 # Calculate derived values. 302 303 constName[idx] = toupper(name[idx]); 304 gsub("[/-]", "_", constName[idx]); # Dash and slash become underscore. 305 gsub("[+^]", "", constName[idx]); # Plus and caret are removed. 306 split(name[idx], parts, "/"); 307 308 family[idx] = toupper(parts[1]); 309 gsub("-", "_", family[idx]); # Dash becomes underscore. 310 gsub("[+^]", "", family[idx]); # Plus and caret are removed. 311 312 split(format[idx], parts, ""); # Width is the first format char. 313 width[idx] = parts[1]; 314 315 # This association is used when computing "next" opcodes. 316 familyFormat[family[idx],format[idx]] = idx; 317 318 # Verify values. 319 320 if (nextFormat[format[idx]] == "") { 321 printf("unknown format: %s\n", format[idx]) >"/dev/stderr"; 322 return 1; 323 } 324 325 if (indexTypeValues[indexType[idx]] == "") { 326 printf("unknown index type: %s\n", indexType[idx]) >"/dev/stderr"; 327 return 1; 328 } 329 330 if (flagsToC(flags[idx]) == "") { 331 printf("bogus flags: %s\n", flags[idx]) >"/dev/stderr"; 332 return 1; 333 } 334 335 return 0; 336 } 337 338 # Define a format family. 339 function defineFormat(line, count, parts, i) { 340 # locals: count, parts, i 341 count = split(line, parts); 342 if (count < 1) return -1; 343 formats[parts[1]] = line; 344 345 parts[count + 1] = "none"; 346 for (i = 1; i <= count; i++) { 347 nextFormat[parts[i]] = parts[i + 1]; 348 } 349 350 return 0; 351 } 352 353 # Produce the nextOpcode and isFirst arrays. The former indicates, for 354 # each opcode, which one should be tried next when doing instruction 355 # fitting. The latter indicates which opcodes are at the head of an 356 # instruction fitting chain. 357 function deriveOpcodeChains(i, op) { 358 # locals: i, op 359 360 for (i = 0; i <= MAX_OPCODE; i++) { 361 if (isUnused(i)) continue; 362 isFirst[i] = "true"; 363 } 364 365 for (i = 0; i <= MAX_OPCODE; i++) { 366 if (isUnused(i)) continue; 367 op = findNextOpcode(i); 368 nextOpcode[i] = op; 369 if (op != -1) { 370 isFirst[op] = "false"; 371 } 372 } 373 } 374 375 # Given an opcode by index, find the next opcode in the same family 376 # (that is, with the same base name) to try when matching instructions 377 # to opcodes. This simply walks the nextFormat chain looking for a 378 # match. This returns the index of the matching opcode or -1 if there 379 # is none. 380 function findNextOpcode(idx, fam, fmt, result) { 381 # locals: fam, fmt, result 382 fam = family[idx]; 383 fmt = format[idx]; 384 385 # Not every opcode has a version with every possible format, so 386 # we have to iterate down the chain until we find one or run out of 387 # formats to try. 388 for (fmt = nextFormat[format[idx]]; fmt != "none"; fmt = nextFormat[fmt]) { 389 result = familyFormat[fam,fmt]; 390 if (result != "") { 391 return result; 392 } 393 } 394 395 return -1; 396 } 397 398 # Construct the tables of info indexed by packed opcode. The packed opcode 399 # values are in the range 0-0x1ff, whereas the unpacked opcodes sparsely 400 # span the range 0-0xffff. 401 function createPackedTables(i, op) { 402 # locals: i, op 403 for (i = 0; i <= MAX_PACKED_OPCODE; i++) { 404 op = unpackOpcode(i); 405 if (isUnused(op)) { 406 packedName[i] = unusedName(op); 407 packedConstName[i] = unusedConstName(op); 408 packedFormat[i] = "00x"; 409 packedFlags[i] = 0; 410 packedWidth[i] = 0; 411 packedIndexType[i] = "unknown"; 412 } else { 413 packedName[i] = name[op]; 414 packedConstName[i] = constName[op]; 415 packedFormat[i] = format[op]; 416 packedFlags[i] = flags[op]; 417 packedWidth[i] = width[op]; 418 packedIndexType[i] = indexType[op]; 419 } 420 } 421 } 422 423 # Given a packed opcode, returns the raw (unpacked) opcode value. 424 function unpackOpcode(idx) { 425 # Note: This must be the inverse of the corresponding code in 426 # libdex/DexOpcodes.h. 427 if (idx <= 255) { 428 return idx; 429 } else { 430 idx -= 256; 431 return (idx * 256) + 255; 432 } 433 } 434 435 # Returns the "unused" name of the given opcode (by index). 436 # That is, this is the human-oriented name to use for an opcode 437 # definition in cases 438 # where the opcode isn't used. 439 function unusedName(idx) { 440 if (idx <= 255) { 441 return sprintf("unused-%02x", idx); 442 } else { 443 return sprintf("unused-%04x", idx); 444 } 445 } 446 447 # Returns the "unused" constant name of the given opcode (by index). 448 # That is, this is the name to use for a constant definition in cases 449 # where the opcode isn't used. 450 function unusedConstName(idx) { 451 if (idx <= 255) { 452 return toupper(sprintf("UNUSED_%02x", idx)); 453 } else { 454 return toupper(sprintf("UNUSED_%04x", idx)); 455 } 456 } 457 458 # Convert a hex value to an int. 459 function parseHex(hex, result, chars, count, c, i) { 460 # locals: result, chars, count, c, i 461 hex = tolower(hex); 462 count = split(hex, chars, ""); 463 result = 0; 464 for (i = 1; i <= count; i++) { 465 c = index("0123456789abcdef", chars[i]); 466 if (c == 0) { 467 printf("bogus hex value: %s\n", hex) >"/dev/stderr"; 468 return -1; 469 } 470 result = (result * 16) + c - 1; 471 } 472 return result; 473 } 474 475 # Initialize the indexTypes data. 476 function initIndexTypes() { 477 indexTypeValues["unknown"] = "kIndexUnknown"; 478 indexTypeValues["none"] = "kIndexNone"; 479 indexTypeValues["varies"] = "kIndexVaries"; 480 indexTypeValues["type-ref"] = "kIndexTypeRef"; 481 indexTypeValues["string-ref"] = "kIndexStringRef"; 482 indexTypeValues["method-ref"] = "kIndexMethodRef"; 483 indexTypeValues["field-ref"] = "kIndexFieldRef"; 484 indexTypeValues["inline-method"] = "kIndexInlineMethod"; 485 indexTypeValues["vtable-offset"] = "kIndexVtableOffset"; 486 indexTypeValues["field-offset"] = "kIndexFieldOffset"; 487 } 488 489 # Initialize the flags data. 490 function initFlags() { 491 flagValues["branch"] = "kInstrCanBranch"; 492 flagValues["continue"] = "kInstrCanContinue"; 493 flagValues["switch"] = "kInstrCanSwitch"; 494 flagValues["throw"] = "kInstrCanThrow"; 495 flagValues["return"] = "kInstrCanReturn"; 496 flagValues["invoke"] = "kInstrInvoke"; 497 flagValues["optimized"] = "0"; # Not represented in C output 498 flagValues["0"] = "0"; 499 } 500 501 # Translate the given flags into the equivalent C expression. Returns 502 # "" on error. 503 function flagsToC(f, parts, result, i) { 504 # locals: parts, result, i 505 count = split(f, parts, /\|/); # Split input at pipe characters. 506 result = "0"; 507 508 for (i = 1; i <= count; i++) { 509 f = flagValues[parts[i]]; 510 if (f == "") { 511 printf("bogus flag: %s\n", f) >"/dev/stderr"; 512 return ""; # Bogus flag name. 513 } else if (f == "0") { 514 # Nothing to append for this case. 515 } else if (result == "0") { 516 result = f; 517 } else { 518 result = result "|" f; 519 } 520 } 521 522 return result; 523 } 524 525 # Returns true if the given opcode (by index) is an "optimized" opcode. 526 function isOptimized(idx, parts, f) { 527 # locals: parts, f 528 split(flags[idx], parts, /\|/); # Split flags[idx] at pipes. 529 for (f in parts) { 530 if (parts[f] == "optimized") return 1; 531 } 532 return 0; 533 } 534 535 # Returns true if there is no definition for the given opcode (by index). 536 function isUnused(idx) { 537 return (name[idx] == ""); 538 } 539