1 /* 2 * Copyright (C) 2008 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 /* 18 * Dalvik classfile verification. This file contains the verifier entry 19 * points and the static constraint checks. 20 */ 21 #include "Dalvik.h" 22 #include "analysis/CodeVerify.h" 23 24 25 /* fwd */ 26 static bool verifyMethod(Method* meth); 27 static bool verifyInstructions(VerifierData* vdata); 28 29 30 /* 31 * Initialize some things we need for verification. 32 */ 33 bool dvmVerificationStartup(void) 34 { 35 gDvm.instrWidth = dexCreateInstrWidthTable(); 36 gDvm.instrFormat = dexCreateInstrFormatTable(); 37 gDvm.instrFlags = dexCreateInstrFlagsTable(); 38 if (gDvm.instrWidth == NULL || gDvm.instrFormat == NULL || 39 gDvm.instrFlags == NULL) 40 { 41 LOGE("Unable to create instruction tables\n"); 42 return false; 43 } 44 45 return true; 46 } 47 48 /* 49 * Free up some things we needed for verification. 50 */ 51 void dvmVerificationShutdown(void) 52 { 53 free(gDvm.instrWidth); 54 free(gDvm.instrFormat); 55 free(gDvm.instrFlags); 56 } 57 58 59 /* 60 * Verify a class. 61 * 62 * By the time we get here, the value of gDvm.classVerifyMode should already 63 * have been factored in. If you want to call into the verifier even 64 * though verification is disabled, that's your business. 65 * 66 * Returns "true" on success. 67 */ 68 bool dvmVerifyClass(ClassObject* clazz) 69 { 70 int i; 71 72 if (dvmIsClassVerified(clazz)) { 73 LOGD("Ignoring duplicate verify attempt on %s\n", clazz->descriptor); 74 return true; 75 } 76 77 for (i = 0; i < clazz->directMethodCount; i++) { 78 if (!verifyMethod(&clazz->directMethods[i])) { 79 LOG_VFY("Verifier rejected class %s\n", clazz->descriptor); 80 return false; 81 } 82 } 83 for (i = 0; i < clazz->virtualMethodCount; i++) { 84 if (!verifyMethod(&clazz->virtualMethods[i])) { 85 LOG_VFY("Verifier rejected class %s\n", clazz->descriptor); 86 return false; 87 } 88 } 89 90 return true; 91 } 92 93 94 /* 95 * Perform verification on a single method. 96 * 97 * We do this in three passes: 98 * (1) Walk through all code units, determining instruction locations, 99 * widths, and other characteristics. 100 * (2) Walk through all code units, performing static checks on 101 * operands. 102 * (3) Iterate through the method, checking type safety and looking 103 * for code flow problems. 104 * 105 * Some checks may be bypassed depending on the verification mode. We can't 106 * turn this stuff off completely if we want to do "exact" GC. 107 * 108 * TODO: cite source? 109 * Confirmed here: 110 * - code array must not be empty 111 * - (N/A) code_length must be less than 65536 112 * Confirmed by dvmComputeCodeWidths(): 113 * - opcode of first instruction begins at index 0 114 * - only documented instructions may appear 115 * - each instruction follows the last 116 * - last byte of last instruction is at (code_length-1) 117 */ 118 static bool verifyMethod(Method* meth) 119 { 120 bool result = false; 121 int newInstanceCount; 122 123 /* 124 * Verifier state blob. Various values will be cached here so we 125 * can avoid expensive lookups and pass fewer arguments around. 126 */ 127 VerifierData vdata; 128 #if 1 // ndef NDEBUG 129 memset(&vdata, 0x99, sizeof(vdata)); 130 #endif 131 132 vdata.method = meth; 133 vdata.insnsSize = dvmGetMethodInsnsSize(meth); 134 vdata.insnRegCount = meth->registersSize; 135 vdata.insnFlags = NULL; 136 vdata.uninitMap = NULL; 137 138 /* 139 * If there aren't any instructions, make sure that's expected, then 140 * exit successfully. Note: for native methods, meth->insns gets set 141 * to a native function pointer on first call, so don't use that as 142 * an indicator. 143 */ 144 if (vdata.insnsSize == 0) { 145 if (!dvmIsNativeMethod(meth) && !dvmIsAbstractMethod(meth)) { 146 LOG_VFY_METH(meth, 147 "VFY: zero-length code in concrete non-native method\n"); 148 goto bail; 149 } 150 151 goto success; 152 } 153 154 /* 155 * Sanity-check the register counts. ins + locals = registers, so make 156 * sure that ins <= registers. 157 */ 158 if (meth->insSize > meth->registersSize) { 159 LOG_VFY_METH(meth, "VFY: bad register counts (ins=%d regs=%d)\n", 160 meth->insSize, meth->registersSize); 161 goto bail; 162 } 163 164 /* 165 * Allocate and populate an array to hold instruction data. 166 * 167 * TODO: Consider keeping a reusable pre-allocated array sitting 168 * around for smaller methods. 169 */ 170 vdata.insnFlags = (InsnFlags*) 171 calloc(dvmGetMethodInsnsSize(meth), sizeof(InsnFlags)); 172 if (vdata.insnFlags == NULL) 173 goto bail; 174 175 /* 176 * Compute the width of each instruction and store the result in insnFlags. 177 * Count up the #of occurrences of new-instance instructions while we're 178 * at it. 179 */ 180 if (!dvmComputeCodeWidths(meth, vdata.insnFlags, &newInstanceCount)) 181 goto bail; 182 183 /* 184 * Allocate a map to hold the classes of uninitialized instances. 185 */ 186 vdata.uninitMap = dvmCreateUninitInstanceMap(meth, vdata.insnFlags, 187 newInstanceCount); 188 if (vdata.uninitMap == NULL) 189 goto bail; 190 191 /* 192 * Set the "in try" flags for all instructions guarded by a "try" block. 193 */ 194 if (!dvmSetTryFlags(meth, vdata.insnFlags)) 195 goto bail; 196 197 /* 198 * Perform static instruction verification. 199 */ 200 if (!verifyInstructions(&vdata)) 201 goto bail; 202 203 /* 204 * Do code-flow analysis. Do this after verifying the branch targets 205 * so we don't need to worry about it here. 206 * 207 * If there are no registers, we don't need to do much in the way of 208 * analysis, but we still need to verify that nothing actually tries 209 * to use a register. 210 */ 211 if (!dvmVerifyCodeFlow(&vdata)) { 212 //LOGD("+++ %s failed code flow\n", meth->name); 213 goto bail; 214 } 215 216 success: 217 result = true; 218 219 bail: 220 dvmFreeUninitInstanceMap(vdata.uninitMap); 221 free(vdata.insnFlags); 222 return result; 223 } 224 225 226 /* 227 * Verify an array data table. "curOffset" is the offset of the fill-array-data 228 * instruction. 229 */ 230 static bool checkArrayData(const Method* meth, int curOffset) 231 { 232 const int insnCount = dvmGetMethodInsnsSize(meth); 233 const u2* insns = meth->insns + curOffset; 234 const u2* arrayData; 235 int valueCount, valueWidth, tableSize; 236 int offsetToArrayData; 237 238 assert(curOffset >= 0 && curOffset < insnCount); 239 240 /* make sure the start of the array data table is in range */ 241 offsetToArrayData = insns[1] | (((s4)insns[2]) << 16); 242 if (curOffset + offsetToArrayData < 0 || 243 curOffset + offsetToArrayData + 2 >= insnCount) 244 { 245 LOG_VFY_METH(meth, 246 "VFY: invalid array data start: at %d, data offset %d, count %d\n", 247 curOffset, offsetToArrayData, insnCount); 248 return false; 249 } 250 251 /* offset to array data table is a relative branch-style offset */ 252 arrayData = insns + offsetToArrayData; 253 254 /* make sure the table is 32-bit aligned */ 255 if ((((u4) arrayData) & 0x03) != 0) { 256 LOG_VFY_METH(meth, 257 "VFY: unaligned array data table: at %d, data offset %d\n", 258 curOffset, offsetToArrayData); 259 return false; 260 } 261 262 valueWidth = arrayData[1]; 263 valueCount = *(u4*)(&arrayData[2]); 264 265 tableSize = 4 + (valueWidth * valueCount + 1) / 2; 266 267 /* make sure the end of the switch is in range */ 268 if (curOffset + offsetToArrayData + tableSize > insnCount) { 269 LOG_VFY_METH(meth, 270 "VFY: invalid array data end: at %d, data offset %d, end %d, " 271 "count %d\n", 272 curOffset, offsetToArrayData, 273 curOffset + offsetToArrayData + tableSize, 274 insnCount); 275 return false; 276 } 277 278 return true; 279 } 280 281 282 /* 283 * Decode the current instruction. 284 */ 285 static void decodeInstruction(const Method* meth, int insnIdx, 286 DecodedInstruction* pDecInsn) 287 { 288 dexDecodeInstruction(gDvm.instrFormat, meth->insns + insnIdx, pDecInsn); 289 } 290 291 292 /* 293 * Perform static checks on a "new-instance" instruction. Specifically, 294 * make sure the class reference isn't for an array class. 295 * 296 * We don't need the actual class, just a pointer to the class name. 297 */ 298 static bool checkNewInstance(const Method* meth, int insnIdx) 299 { 300 DvmDex* pDvmDex = meth->clazz->pDvmDex; 301 DecodedInstruction decInsn; 302 const char* classDescriptor; 303 u4 idx; 304 305 decodeInstruction(meth, insnIdx, &decInsn); 306 idx = decInsn.vB; // 2nd item 307 if (idx >= pDvmDex->pHeader->typeIdsSize) { 308 LOG_VFY_METH(meth, "VFY: bad type index %d (max %d)\n", 309 idx, pDvmDex->pHeader->typeIdsSize); 310 return false; 311 } 312 313 classDescriptor = dexStringByTypeIdx(pDvmDex->pDexFile, idx); 314 if (classDescriptor[0] != 'L') { 315 LOG_VFY_METH(meth, "VFY: can't call new-instance on type '%s'\n", 316 classDescriptor); 317 return false; 318 } 319 320 return true; 321 } 322 323 /* 324 * Perform static checks on a "new-array" instruction. Specifically, make 325 * sure they aren't creating an array of arrays that causes the number of 326 * dimensions to exceed 255. 327 */ 328 static bool checkNewArray(const Method* meth, int insnIdx) 329 { 330 DvmDex* pDvmDex = meth->clazz->pDvmDex; 331 DecodedInstruction decInsn; 332 const char* classDescriptor; 333 u4 idx; 334 335 decodeInstruction(meth, insnIdx, &decInsn); 336 idx = decInsn.vC; // 3rd item 337 if (idx >= pDvmDex->pHeader->typeIdsSize) { 338 LOG_VFY_METH(meth, "VFY: bad type index %d (max %d)\n", 339 idx, pDvmDex->pHeader->typeIdsSize); 340 return false; 341 } 342 343 classDescriptor = dexStringByTypeIdx(pDvmDex->pDexFile, idx); 344 345 int bracketCount = 0; 346 const char* cp = classDescriptor; 347 while (*cp++ == '[') 348 bracketCount++; 349 350 if (bracketCount == 0) { 351 /* The given class must be an array type. */ 352 LOG_VFY_METH(meth, "VFY: can't new-array class '%s' (not an array)\n", 353 classDescriptor); 354 return false; 355 } else if (bracketCount > 255) { 356 /* It is illegal to create an array of more than 255 dimensions. */ 357 LOG_VFY_METH(meth, "VFY: can't new-array class '%s' (exceeds limit)\n", 358 classDescriptor); 359 return false; 360 } 361 362 return true; 363 } 364 365 /* 366 * Perform static checks on an instruction that takes a class constant. 367 * Ensure that the class index is in the valid range. 368 */ 369 static bool checkTypeIndex(const Method* meth, int insnIdx, bool useB) 370 { 371 DvmDex* pDvmDex = meth->clazz->pDvmDex; 372 DecodedInstruction decInsn; 373 u4 idx; 374 375 decodeInstruction(meth, insnIdx, &decInsn); 376 if (useB) 377 idx = decInsn.vB; 378 else 379 idx = decInsn.vC; 380 if (idx >= pDvmDex->pHeader->typeIdsSize) { 381 LOG_VFY_METH(meth, "VFY: bad type index %d (max %d)\n", 382 idx, pDvmDex->pHeader->typeIdsSize); 383 return false; 384 } 385 386 return true; 387 } 388 389 /* 390 * Perform static checks on a field get or set instruction. All we do 391 * here is ensure that the field index is in the valid range. 392 */ 393 static bool checkFieldIndex(const Method* meth, int insnIdx, bool useB) 394 { 395 DvmDex* pDvmDex = meth->clazz->pDvmDex; 396 DecodedInstruction decInsn; 397 u4 idx; 398 399 decodeInstruction(meth, insnIdx, &decInsn); 400 if (useB) 401 idx = decInsn.vB; 402 else 403 idx = decInsn.vC; 404 if (idx >= pDvmDex->pHeader->fieldIdsSize) { 405 LOG_VFY_METH(meth, 406 "VFY: bad field index %d (max %d) at offset 0x%04x\n", 407 idx, pDvmDex->pHeader->fieldIdsSize, insnIdx); 408 return false; 409 } 410 411 return true; 412 } 413 414 /* 415 * Perform static checks on a method invocation instruction. All we do 416 * here is ensure that the method index is in the valid range. 417 */ 418 static bool checkMethodIndex(const Method* meth, int insnIdx) 419 { 420 DvmDex* pDvmDex = meth->clazz->pDvmDex; 421 DecodedInstruction decInsn; 422 423 decodeInstruction(meth, insnIdx, &decInsn); 424 if (decInsn.vB >= pDvmDex->pHeader->methodIdsSize) { 425 LOG_VFY_METH(meth, "VFY: bad method index %d (max %d)\n", 426 decInsn.vB, pDvmDex->pHeader->methodIdsSize); 427 return false; 428 } 429 430 return true; 431 } 432 433 /* 434 * Perform static checks on a string constant instruction. All we do 435 * here is ensure that the string index is in the valid range. 436 */ 437 static bool checkStringIndex(const Method* meth, int insnIdx) 438 { 439 DvmDex* pDvmDex = meth->clazz->pDvmDex; 440 DecodedInstruction decInsn; 441 442 decodeInstruction(meth, insnIdx, &decInsn); 443 if (decInsn.vB >= pDvmDex->pHeader->stringIdsSize) { 444 LOG_VFY_METH(meth, "VFY: bad string index %d (max %d)\n", 445 decInsn.vB, pDvmDex->pHeader->stringIdsSize); 446 return false; 447 } 448 449 return true; 450 } 451 452 /* 453 * Perform static verification on instructions. 454 * 455 * As a side effect, this sets the "branch target" flags in InsnFlags. 456 * 457 * "(CF)" items are handled during code-flow analysis. 458 * 459 * v3 4.10.1 460 * - target of each jump and branch instruction must be valid 461 * - targets of switch statements must be valid 462 * - (CF) operands referencing constant pool entries must be valid 463 * - (CF) operands of getfield, putfield, getstatic, putstatic must be valid 464 * - (new) verify operands of "quick" field ops 465 * - (CF) operands of method invocation instructions must be valid 466 * - (new) verify operands of "quick" method invoke ops 467 * - (CF) only invoke-direct can call a method starting with '<' 468 * - (CF) <clinit> must never be called explicitly 469 * - (CF) operands of instanceof, checkcast, new (and variants) must be valid 470 * - new-array[-type] limited to 255 dimensions 471 * - can't use "new" on an array class 472 * - (?) limit dimensions in multi-array creation 473 * - (CF) local variable load/store register values must be in valid range 474 * 475 * v3 4.11.1.2 476 * - branches must be within the bounds of the code array 477 * - targets of all control-flow instructions are the start of an instruction 478 * - (CF) register accesses fall within range of allocated registers 479 * - (N/A) access to constant pool must be of appropriate type 480 * - (CF) code does not end in the middle of an instruction 481 * - (CF) execution cannot fall off the end of the code 482 * - (earlier) for each exception handler, the "try" area must begin and 483 * end at the start of an instruction (end can be at the end of the code) 484 * - (earlier) for each exception handler, the handler must start at a valid 485 * instruction 486 * 487 * TODO: move some of the "CF" items in here for better performance (the 488 * code-flow analysis sometimes has to process the same instruction several 489 * times). 490 */ 491 static bool verifyInstructions(VerifierData* vdata) 492 { 493 const Method* meth = vdata->method; 494 InsnFlags* insnFlags = vdata->insnFlags; 495 const size_t insnCount = vdata->insnsSize; 496 const u2* insns = meth->insns; 497 int i; 498 499 /* the start of the method is a "branch target" */ 500 dvmInsnSetBranchTarget(insnFlags, 0, true); 501 502 for (i = 0; i < (int) insnCount; /**/) { 503 /* 504 * These types of instructions can be GC points. To support precise 505 * GC, all such instructions must export the PC in the interpreter, 506 * or the GC won't be able to identify the current PC for the thread. 507 */ 508 static const int gcMask = kInstrCanBranch | kInstrCanSwitch | 509 kInstrCanThrow | kInstrCanReturn; 510 511 int width = dvmInsnGetWidth(insnFlags, i); 512 OpCode opcode = *insns & 0xff; 513 InstructionFlags opFlags = dexGetInstrFlags(gDvm.instrFlags, opcode); 514 515 if ((opFlags & gcMask) != 0) { 516 /* 517 * This instruction is probably a GC point. Branch instructions 518 * only qualify if they go backward, so we need to check the 519 * offset. 520 */ 521 int offset = -1; 522 bool unused; 523 if (dvmGetBranchTarget(meth, insnFlags, i, &offset, &unused)) { 524 if (offset <= 0) { 525 dvmInsnSetGcPoint(insnFlags, i, true); 526 } 527 } else { 528 /* not a branch target */ 529 dvmInsnSetGcPoint(insnFlags, i, true); 530 } 531 } 532 533 switch (opcode) { 534 case OP_NOP: 535 /* plain no-op or switch table data; nothing to do here */ 536 break; 537 538 case OP_CONST_STRING: 539 case OP_CONST_STRING_JUMBO: 540 if (!checkStringIndex(meth, i)) 541 return false; 542 break; 543 544 case OP_CONST_CLASS: 545 case OP_CHECK_CAST: 546 if (!checkTypeIndex(meth, i, true)) 547 return false; 548 break; 549 case OP_INSTANCE_OF: 550 if (!checkTypeIndex(meth, i, false)) 551 return false; 552 break; 553 554 case OP_PACKED_SWITCH: 555 case OP_SPARSE_SWITCH: 556 /* verify the associated table */ 557 if (!dvmCheckSwitchTargets(meth, insnFlags, i)) 558 return false; 559 break; 560 561 case OP_FILL_ARRAY_DATA: 562 /* verify the associated table */ 563 if (!checkArrayData(meth, i)) 564 return false; 565 break; 566 567 case OP_GOTO: 568 case OP_GOTO_16: 569 case OP_IF_EQ: 570 case OP_IF_NE: 571 case OP_IF_LT: 572 case OP_IF_GE: 573 case OP_IF_GT: 574 case OP_IF_LE: 575 case OP_IF_EQZ: 576 case OP_IF_NEZ: 577 case OP_IF_LTZ: 578 case OP_IF_GEZ: 579 case OP_IF_GTZ: 580 case OP_IF_LEZ: 581 /* check the destination */ 582 if (!dvmCheckBranchTarget(meth, insnFlags, i, false)) 583 return false; 584 break; 585 case OP_GOTO_32: 586 /* check the destination; self-branch is okay */ 587 if (!dvmCheckBranchTarget(meth, insnFlags, i, true)) 588 return false; 589 break; 590 591 case OP_NEW_INSTANCE: 592 if (!checkNewInstance(meth, i)) 593 return false; 594 break; 595 596 case OP_NEW_ARRAY: 597 if (!checkNewArray(meth, i)) 598 return false; 599 break; 600 601 case OP_FILLED_NEW_ARRAY: 602 if (!checkTypeIndex(meth, i, true)) 603 return false; 604 break; 605 case OP_FILLED_NEW_ARRAY_RANGE: 606 if (!checkTypeIndex(meth, i, true)) 607 return false; 608 break; 609 610 case OP_IGET: 611 case OP_IGET_WIDE: 612 case OP_IGET_OBJECT: 613 case OP_IGET_BOOLEAN: 614 case OP_IGET_BYTE: 615 case OP_IGET_CHAR: 616 case OP_IGET_SHORT: 617 case OP_IPUT: 618 case OP_IPUT_WIDE: 619 case OP_IPUT_OBJECT: 620 case OP_IPUT_BOOLEAN: 621 case OP_IPUT_BYTE: 622 case OP_IPUT_CHAR: 623 case OP_IPUT_SHORT: 624 /* check the field index */ 625 if (!checkFieldIndex(meth, i, false)) 626 return false; 627 break; 628 case OP_SGET: 629 case OP_SGET_WIDE: 630 case OP_SGET_OBJECT: 631 case OP_SGET_BOOLEAN: 632 case OP_SGET_BYTE: 633 case OP_SGET_CHAR: 634 case OP_SGET_SHORT: 635 case OP_SPUT: 636 case OP_SPUT_WIDE: 637 case OP_SPUT_OBJECT: 638 case OP_SPUT_BOOLEAN: 639 case OP_SPUT_BYTE: 640 case OP_SPUT_CHAR: 641 case OP_SPUT_SHORT: 642 /* check the field index */ 643 if (!checkFieldIndex(meth, i, true)) 644 return false; 645 break; 646 647 case OP_INVOKE_VIRTUAL: 648 case OP_INVOKE_SUPER: 649 case OP_INVOKE_DIRECT: 650 case OP_INVOKE_STATIC: 651 case OP_INVOKE_INTERFACE: 652 case OP_INVOKE_VIRTUAL_RANGE: 653 case OP_INVOKE_SUPER_RANGE: 654 case OP_INVOKE_DIRECT_RANGE: 655 case OP_INVOKE_STATIC_RANGE: 656 case OP_INVOKE_INTERFACE_RANGE: 657 /* check the method index */ 658 if (!checkMethodIndex(meth, i)) 659 return false; 660 break; 661 662 case OP_EXECUTE_INLINE: 663 case OP_INVOKE_DIRECT_EMPTY: 664 case OP_IGET_QUICK: 665 case OP_IGET_WIDE_QUICK: 666 case OP_IGET_OBJECT_QUICK: 667 case OP_IPUT_QUICK: 668 case OP_IPUT_WIDE_QUICK: 669 case OP_IPUT_OBJECT_QUICK: 670 case OP_INVOKE_VIRTUAL_QUICK: 671 case OP_INVOKE_VIRTUAL_QUICK_RANGE: 672 case OP_INVOKE_SUPER_QUICK: 673 case OP_INVOKE_SUPER_QUICK_RANGE: 674 LOG_VFY("VFY: not expecting optimized instructions\n"); 675 return false; 676 break; 677 678 default: 679 /* nothing to do */ 680 break; 681 } 682 683 assert(width > 0); 684 i += width; 685 insns += width; 686 } 687 688 /* make sure the last instruction ends at the end of the insn area */ 689 if (i != (int) insnCount) { 690 LOG_VFY_METH(meth, 691 "VFY: code did not end when expected (end at %d, count %d)\n", 692 i, insnCount); 693 return false; 694 } 695 696 return true; 697 } 698