1 /* 2 * Copyright (C) 2014 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package dexfuzz.program; 18 19 import dexfuzz.Log; 20 import dexfuzz.rawdex.CodeItem; 21 import dexfuzz.rawdex.EncodedCatchHandler; 22 import dexfuzz.rawdex.EncodedTypeAddrPair; 23 import dexfuzz.rawdex.Instruction; 24 import dexfuzz.rawdex.Opcode; 25 import dexfuzz.rawdex.TryItem; 26 import dexfuzz.rawdex.formats.ContainsTarget; 27 import dexfuzz.rawdex.formats.RawInsnHelper; 28 29 import java.util.ArrayList; 30 import java.util.Collections; 31 import java.util.Comparator; 32 import java.util.HashMap; 33 import java.util.LinkedList; 34 import java.util.List; 35 import java.util.Map; 36 37 /** 38 * Translates from a CodeItem (the raw list of Instructions) to MutatableCode 39 * (graph of Instructions, using MInsns and subclasses) and vice-versa. 40 */ 41 public class CodeTranslator { 42 43 /** 44 * Given a raw DEX file's CodeItem, produce a MutatableCode object, that CodeMutators 45 * are designed to operate on. 46 * @param codeItemIdx Used to make sure the correct CodeItem is updated later after mutation. 47 * @return A new MutatableCode object, which contains all relevant information 48 * obtained from the CodeItem. 49 */ 50 public MutatableCode codeItemToMutatableCode(Program program, CodeItem codeItem, 51 int codeItemIdx, int mutatableCodeIdx) { 52 Log.debug("Translating CodeItem " + codeItemIdx 53 + " (" + codeItem.meta.methodName + ") to MutatableCode"); 54 55 MutatableCode mutatableCode = new MutatableCode(program); 56 57 codeItem.registerMutatableCode(mutatableCode); 58 59 mutatableCode.name = codeItem.meta.methodName; 60 mutatableCode.shorty = codeItem.meta.shorty; 61 mutatableCode.isStatic = codeItem.meta.isStatic; 62 63 mutatableCode.codeItemIdx = codeItemIdx; 64 65 mutatableCode.mutatableCodeIdx = mutatableCodeIdx; 66 67 mutatableCode.registersSize = codeItem.registersSize; 68 mutatableCode.insSize = codeItem.insSize; 69 mutatableCode.outsSize = codeItem.outsSize; 70 mutatableCode.triesSize = codeItem.triesSize; 71 72 // Temporary map from bytecode offset -> instruction. 73 Map<Integer,MInsn> insnLocationMap = new HashMap<Integer,MInsn>(); 74 75 List<Instruction> inputInsns = codeItem.insns; 76 77 // Create the MInsns. 78 int loc = 0; 79 for (Instruction insn : inputInsns) { 80 MInsn mInsn = null; 81 82 if (isInstructionSwitch(insn)) { 83 mInsn = new MSwitchInsn(); 84 } else if (isInstructionBranch(insn)) { 85 mInsn = new MBranchInsn(); 86 } else if (isInstructionFillArrayData(insn)) { 87 mInsn = new MInsnWithData(); 88 } else { 89 mInsn = new MInsn(); 90 } 91 92 mInsn.insn = insn; 93 94 // Populate the temporary map. 95 insnLocationMap.put(loc, mInsn); 96 97 // Populate the proper list of mutatable instructions. 98 mutatableCode.addInstructionToEnd(mInsn); 99 100 // Calculate the offsets for each instruction. 101 mInsn.location = loc; 102 mInsn.locationUpdated = false; 103 104 loc += mInsn.insn.getSize(); 105 } 106 107 // Now make branch/switch instructions point at the right target instructions. 108 for (MInsn mInsn : mutatableCode.getInstructions()) { 109 if (mInsn instanceof MSwitchInsn) { 110 readSwitchInstruction((MSwitchInsn) mInsn, insnLocationMap); 111 } else if (mInsn instanceof MInsnWithData) { 112 ContainsTarget containsTarget = (ContainsTarget) mInsn.insn.info.format; 113 int targetLoc = mInsn.location + (int) containsTarget.getTarget(mInsn.insn); 114 ((MInsnWithData)mInsn).dataTarget = insnLocationMap.get(targetLoc); 115 if (((MInsnWithData)mInsn).dataTarget == null) { 116 Log.errorAndQuit("Bad offset calculation in data-target insn"); 117 } 118 } else if (mInsn instanceof MBranchInsn) { 119 ContainsTarget containsTarget = (ContainsTarget) mInsn.insn.info.format; 120 int targetLoc = mInsn.location + (int) containsTarget.getTarget(mInsn.insn); 121 ((MBranchInsn)mInsn).target = insnLocationMap.get(targetLoc); 122 if (((MBranchInsn)mInsn).target == null) { 123 Log.errorAndQuit("Bad offset calculation in branch insn"); 124 } 125 } 126 } 127 128 // Now create try blocks. 129 if (mutatableCode.triesSize > 0) { 130 readTryBlocks(codeItem, mutatableCode, insnLocationMap); 131 } 132 133 return mutatableCode; 134 } 135 136 /** 137 * Given a MutatableCode item that may have been mutated, update the original CodeItem 138 * correctly, to allow valid DEX to be written back to the output file. 139 */ 140 public void mutatableCodeToCodeItem(CodeItem codeItem, MutatableCode mutatableCode) { 141 Log.debug("Translating MutatableCode " + mutatableCode.name 142 + " to CodeItem " + mutatableCode.codeItemIdx); 143 144 // We must first align any data instructions at the end of the code 145 // before we recalculate any offsets. 146 // This also updates their sizes... 147 alignDataInstructions(mutatableCode); 148 149 // Validate that the tracked locations for instructions are valid. 150 // Also mark locations as no longer being updated. 151 int loc = 0; 152 for (MInsn mInsn : mutatableCode.getInstructions()) { 153 if (mInsn.insn.justRaw) { 154 // All just_raw instructions need alignment! 155 if ((loc % 2) != 0) { 156 loc++; 157 } 158 } 159 if (mInsn.location != loc) { 160 Log.errorAndQuit(String.format("%s does not have expected location 0x%x", 161 mInsn, loc)); 162 } 163 mInsn.locationUpdated = false; 164 loc += mInsn.insn.getSize(); 165 } 166 167 // This new list will be attached to the CodeItem at the end... 168 List<Instruction> outputInsns = new LinkedList<Instruction>(); 169 170 // Go through our new list of MInsns, adding them to the new 171 // list of instructions that will be attached to the CodeItem. 172 // Also recalculate offsets for branches. 173 for (MInsn mInsn : mutatableCode.getInstructions()) { 174 if (mInsn instanceof MSwitchInsn) { 175 updateSwitchInstruction((MSwitchInsn)mInsn, mutatableCode); 176 } else if (mInsn instanceof MInsnWithData) { 177 MInsn target = ((MInsnWithData) mInsn).dataTarget; 178 int dataOffset = target.location - mInsn.location; 179 ContainsTarget containsTarget = (ContainsTarget) mInsn.insn.info.format; 180 containsTarget.setTarget(mInsn.insn, dataOffset); 181 } else if (mInsn instanceof MBranchInsn) { 182 MInsn target = ((MBranchInsn) mInsn).target; 183 int branchOffset = target.location - mInsn.location; 184 ContainsTarget containsTarget = (ContainsTarget) mInsn.insn.info.format; 185 containsTarget.setTarget(mInsn.insn, branchOffset); 186 } 187 outputInsns.add(mInsn.insn); 188 } 189 190 // Calculate the new insns_size. 191 int newInsnsSize = 0; 192 for (Instruction insn : outputInsns) { 193 newInsnsSize += insn.getSize(); 194 } 195 196 if (mutatableCode.triesSize > 0) { 197 updateTryBlocks(codeItem, mutatableCode); 198 } 199 200 codeItem.insnsSize = newInsnsSize; 201 codeItem.insns = outputInsns; 202 codeItem.registersSize = mutatableCode.registersSize; 203 codeItem.insSize = mutatableCode.insSize; 204 codeItem.outsSize = mutatableCode.outsSize; 205 codeItem.triesSize = mutatableCode.triesSize; 206 } 207 208 /** 209 * The TryItem specifies an offset into the EncodedCatchHandlerList for a given CodeItem, 210 * but we only have an array of the EncodedCatchHandlers that the List contains. 211 * This function produces a map that offers a way to find out the index into our array, 212 * from the try handler's offset. 213 */ 214 private Map<Short,Integer> createTryHandlerOffsetToIndexMap(CodeItem codeItem) { 215 // Create a sorted set of offsets. 216 List<Short> uniqueOffsets = new ArrayList<Short>(); 217 for (TryItem tryItem : codeItem.tries) { 218 int index = 0; 219 while (true) { 220 if ((index == uniqueOffsets.size()) 221 || (uniqueOffsets.get(index) > tryItem.handlerOff)) { 222 // First condition means we're at the end of the set (or we're inserting 223 // into an empty set) 224 // Second condition means that the offset belongs here 225 // ...so insert it here, pushing the element currently in this position to the 226 // right, if it exists 227 uniqueOffsets.add(index, tryItem.handlerOff); 228 break; 229 } else if (uniqueOffsets.get(index) == tryItem.handlerOff) { 230 // We've already seen it, and we're making a set, not a list. 231 break; 232 } else { 233 // Keep searching. 234 index++; 235 } 236 } 237 } 238 // Now we have an (implicit) index -> offset mapping! 239 240 // Now create the reverse mapping. 241 Map<Short,Integer> offsetIndexMap = new HashMap<Short,Integer>(); 242 for (int i = 0; i < uniqueOffsets.size(); i++) { 243 offsetIndexMap.put(uniqueOffsets.get(i), i); 244 } 245 246 return offsetIndexMap; 247 } 248 249 private void readTryBlocks(CodeItem codeItem, MutatableCode mutatableCode, 250 Map<Integer,MInsn> insnLocationMap) { 251 mutatableCode.mutatableTries = new LinkedList<MTryBlock>(); 252 253 Map<Short,Integer> offsetIndexMap = createTryHandlerOffsetToIndexMap(codeItem); 254 255 // Read each TryItem into a MutatableTryBlock. 256 for (TryItem tryItem : codeItem.tries) { 257 MTryBlock mTryBlock = new MTryBlock(); 258 259 // Get the MInsns that form the start and end of the try block. 260 int startLocation = tryItem.startAddr; 261 mTryBlock.startInsn = insnLocationMap.get(startLocation); 262 263 // The instructions vary in size, so we have to find the last instruction in the block in a 264 // few tries. 265 int endLocation = tryItem.startAddr + tryItem.insnCount - 1; 266 mTryBlock.endInsn = insnLocationMap.get(endLocation); 267 while ((mTryBlock.endInsn == null) && (endLocation >= startLocation)) { 268 endLocation--; 269 mTryBlock.endInsn = insnLocationMap.get(endLocation); 270 } 271 272 // Sanity checks. 273 if (mTryBlock.startInsn == null) { 274 Log.errorAndQuit(String.format( 275 "Couldn't find a mutatable insn at start offset 0x%x", 276 startLocation)); 277 } 278 if (mTryBlock.endInsn == null) { 279 Log.errorAndQuit(String.format( 280 "Couldn't find a mutatable insn at end offset 0x%x", 281 endLocation)); 282 } 283 284 // Get the EncodedCatchHandler. 285 int handlerIdx = offsetIndexMap.get(tryItem.handlerOff); 286 EncodedCatchHandler encodedCatchHandler = codeItem.handlers.list[handlerIdx]; 287 288 // Do we have a catch all? If so, associate the MInsn that's there. 289 if (encodedCatchHandler.size <= 0) { 290 mTryBlock.catchAllHandler = 291 insnLocationMap.get(encodedCatchHandler.catchAllAddr); 292 // Sanity check. 293 if (mTryBlock.catchAllHandler == null) { 294 Log.errorAndQuit( 295 String.format("Couldn't find a mutatable insn at catch-all offset 0x%x", 296 encodedCatchHandler.catchAllAddr)); 297 } 298 } 299 // Do we have explicitly-typed handlers? This will remain empty if not. 300 mTryBlock.handlers = new LinkedList<MInsn>(); 301 302 // Associate all the explicitly-typed handlers. 303 for (int i = 0; i < Math.abs(encodedCatchHandler.size); i++) { 304 EncodedTypeAddrPair handler = encodedCatchHandler.handlers[i]; 305 MInsn handlerInsn = insnLocationMap.get(handler.addr); 306 // Sanity check. 307 if (handlerInsn == null) { 308 Log.errorAndQuit(String.format( 309 "Couldn't find a mutatable instruction at handler offset 0x%x", 310 handler.addr)); 311 } 312 mTryBlock.handlers.add(handlerInsn); 313 } 314 315 // Now finally add the new MutatableTryBlock into this MutatableCode's list! 316 mutatableCode.mutatableTries.add(mTryBlock); 317 } 318 } 319 320 private void updateTryBlocks(CodeItem codeItem, MutatableCode mutatableCode) { 321 322 // TODO: Support ability to add extra try blocks/handlers? 323 324 for (MTryBlock mTryBlock : mutatableCode.mutatableTries) { 325 if (mTryBlock.startInsn.location > mTryBlock.endInsn.location) { 326 // Mutation has put this try block's end insn before its start insn. Fix this. 327 MInsn tempInsn = mTryBlock.startInsn; 328 mTryBlock.startInsn = mTryBlock.endInsn; 329 mTryBlock.endInsn = tempInsn; 330 } 331 } 332 333 // First, manipulate the try blocks if they overlap. 334 for (int i = 0; i < mutatableCode.mutatableTries.size() - 1; i++) { 335 MTryBlock first = mutatableCode.mutatableTries.get(i); 336 MTryBlock second = mutatableCode.mutatableTries.get(i + 1); 337 338 // Do they overlap? 339 if (first.endInsn.location > second.startInsn.location) { 340 341 Log.debug("Found overlap in TryBlocks, moving 2nd TryBlock..."); 342 Log.debug("1st TryBlock goes from " + first.startInsn + " to " + first.endInsn); 343 Log.debug("2nd TryBlock goes from " + second.startInsn + " to " + second.endInsn); 344 345 // Find the first instruction that comes after that does not overlap 346 // with the first try block. 347 MInsn newInsn = second.startInsn; 348 int ptr = mutatableCode.getInstructionIndex(newInsn); 349 while (first.endInsn.location > newInsn.location) { 350 ptr++; 351 newInsn = mutatableCode.getInstructionAt(ptr); 352 } 353 second.startInsn = newInsn; 354 355 Log.debug("Now 2nd TryBlock goes from " + second.startInsn + " to " + second.endInsn); 356 } 357 } 358 359 Map<Short,Integer> offsetIndexMap = createTryHandlerOffsetToIndexMap(codeItem); 360 361 int tryItemIdx = 0; 362 for (MTryBlock mTryBlock : mutatableCode.mutatableTries) { 363 TryItem tryItem = codeItem.tries[tryItemIdx]; 364 365 tryItem.startAddr = mTryBlock.startInsn.location; 366 int insnCount = mTryBlock.endInsn.location - mTryBlock.startInsn.location + 367 mTryBlock.endInsn.insn.getSize(); 368 tryItem.insnCount = (short) insnCount; 369 370 // Get the EncodedCatchHandler. 371 EncodedCatchHandler encodedCatchHandler = 372 codeItem.handlers.list[offsetIndexMap.get(tryItem.handlerOff)]; 373 374 if (encodedCatchHandler.size <= 0) { 375 encodedCatchHandler.catchAllAddr = mTryBlock.catchAllHandler.location; 376 } 377 for (int i = 0; i < Math.abs(encodedCatchHandler.size); i++) { 378 MInsn handlerInsn = mTryBlock.handlers.get(i); 379 EncodedTypeAddrPair handler = encodedCatchHandler.handlers[i]; 380 handler.addr = handlerInsn.location; 381 } 382 tryItemIdx++; 383 } 384 } 385 386 /** 387 * Given a switch instruction, find the associated data's raw[] form, and update 388 * the targets of the switch instruction to point to the correct instructions. 389 */ 390 private void readSwitchInstruction(MSwitchInsn switchInsn, 391 Map<Integer,MInsn> insnLocationMap) { 392 // Find the data. 393 ContainsTarget containsTarget = (ContainsTarget) switchInsn.insn.info.format; 394 int dataLocation = switchInsn.location + (int) containsTarget.getTarget(switchInsn.insn); 395 switchInsn.dataTarget = insnLocationMap.get(dataLocation); 396 if (switchInsn.dataTarget == null) { 397 Log.errorAndQuit("Bad offset calculation for data target in switch insn"); 398 } 399 400 // Now read the data. 401 Instruction dataInsn = switchInsn.dataTarget.insn; 402 403 int rawPtr = 2; 404 405 int targetsSize = (int) RawInsnHelper.getUnsignedShortFromTwoBytes(dataInsn.rawBytes, rawPtr); 406 rawPtr += 2; 407 408 int[] keys = new int[targetsSize]; 409 int[] targets = new int[targetsSize]; 410 411 if (dataInsn.rawType == 1) { 412 switchInsn.packed = true; 413 // Dealing with a packed-switch. 414 // Read the first key. 415 keys[0] = (int) RawInsnHelper.getUnsignedIntFromFourBytes(dataInsn.rawBytes, rawPtr); 416 rawPtr += 4; 417 // Calculate the rest of the keys. 418 for (int i = 1; i < targetsSize; i++) { 419 keys[i] = keys[i - 1] + 1; 420 } 421 } else if (dataInsn.rawType == 2) { 422 // Dealing with a sparse-switch. 423 // Read all of the keys. 424 for (int i = 0; i < targetsSize; i++) { 425 keys[i] = (int) RawInsnHelper.getUnsignedIntFromFourBytes(dataInsn.rawBytes, 426 rawPtr); 427 rawPtr += 4; 428 } 429 } 430 431 // Now read the targets. 432 for (int i = 0; i < targetsSize; i++) { 433 targets[i] = (int) RawInsnHelper.getUnsignedIntFromFourBytes(dataInsn.rawBytes, 434 rawPtr); 435 rawPtr += 4; 436 } 437 438 // Store the keys. 439 switchInsn.keys = keys; 440 441 // Convert our targets[] offsets into pointers to MInsns. 442 for (int target : targets) { 443 int targetLocation = switchInsn.location + target; 444 MInsn targetInsn = insnLocationMap.get(targetLocation); 445 switchInsn.targets.add(targetInsn); 446 if (targetInsn == null) { 447 Log.errorAndQuit("Bad offset calculation for target in switch insn"); 448 } 449 } 450 } 451 452 /** 453 * Given a mutatable switch instruction, which may have had some of its branch 454 * targets moved, update all the target offsets in the raw[] form of the instruction. 455 */ 456 private void updateSwitchInstruction(MSwitchInsn switchInsn, MutatableCode mutatableCode) { 457 // Update the offset to the data instruction 458 MInsn dataTarget = switchInsn.dataTarget; 459 int dataOffset = dataTarget.location - switchInsn.location; 460 ContainsTarget containsTarget = (ContainsTarget) switchInsn.insn.info.format; 461 containsTarget.setTarget(switchInsn.insn, dataOffset); 462 463 int targetsSize = switchInsn.targets.size(); 464 465 int[] keys = switchInsn.keys; 466 int[] targets = new int[targetsSize]; 467 468 // Calculate the new offsets. 469 int targetIdx = 0; 470 for (MInsn target : switchInsn.targets) { 471 targets[targetIdx] = target.location - switchInsn.location; 472 targetIdx++; 473 } 474 475 // Now write the data back to the raw bytes. 476 Instruction dataInsn = switchInsn.dataTarget.insn; 477 478 int rawPtr = 2; 479 480 // Write out the size. 481 RawInsnHelper.writeUnsignedShortToTwoBytes(dataInsn.rawBytes, rawPtr, targetsSize); 482 rawPtr += 2; 483 484 // Write out the keys. 485 if (switchInsn.packed) { 486 // Only write out one key - the first. 487 RawInsnHelper.writeUnsignedIntToFourBytes(dataInsn.rawBytes, rawPtr, keys[0]); 488 rawPtr += 4; 489 } else { 490 // Write out all the keys. 491 for (int i = 0; i < targetsSize; i++) { 492 RawInsnHelper.writeUnsignedIntToFourBytes(dataInsn.rawBytes, rawPtr, keys[i]); 493 rawPtr += 4; 494 } 495 } 496 497 // Write out all the targets. 498 for (int i = 0; i < targetsSize; i++) { 499 RawInsnHelper.writeUnsignedIntToFourBytes(dataInsn.rawBytes, rawPtr, targets[i]); 500 rawPtr += 4; 501 } 502 } 503 504 /** 505 * After mutation, data instructions may no longer be 4-byte aligned. 506 * If this is the case, insert nops to align them all. 507 * This makes a number of assumptions about data currently: 508 * - data is always at the end of method insns 509 * - all data instructions are stored contiguously 510 */ 511 private void alignDataInstructions(MutatableCode mutatableCode) { 512 // Find all the switch data instructions. 513 List<MInsn> dataInsns = new ArrayList<MInsn>(); 514 515 // Update raw sizes of the data instructions as well. 516 for (MInsn mInsn : mutatableCode.getInstructions()) { 517 if (mInsn instanceof MSwitchInsn) { 518 // Update the raw size of the instruction. 519 MSwitchInsn switchInsn = (MSwitchInsn) mInsn; 520 int targetsSize = switchInsn.targets.size(); 521 Instruction dataInsn = switchInsn.dataTarget.insn; 522 if (switchInsn.packed) { 523 dataInsn.rawSize = (targetsSize * 2) + 4; 524 } else { 525 dataInsn.rawSize = (targetsSize * 4) + 2; 526 } 527 dataInsns.add(switchInsn.dataTarget); 528 } else if (mInsn instanceof MInsnWithData) { 529 MInsnWithData insnWithData = 530 (MInsnWithData) mInsn; 531 dataInsns.add(insnWithData.dataTarget); 532 } 533 } 534 535 // Only need to align switch data instructions if there are any! 536 if (!dataInsns.isEmpty()) { 537 538 Log.debug("Found data instructions, checking alignment..."); 539 540 // Sort data_insns by location. 541 Collections.sort(dataInsns, new Comparator<MInsn>() { 542 @Override 543 public int compare(MInsn first, MInsn second) { 544 if (first.location < second.location) { 545 return -1; 546 } else if (first.location > second.location) { 547 return 1; 548 } 549 return 0; 550 } 551 }); 552 553 boolean performedAlignment = false; 554 555 // Go through all the data insns, and insert an alignment nop if they're unaligned. 556 for (MInsn dataInsn : dataInsns) { 557 if (dataInsn.location % 2 != 0) { 558 Log.debug("Aligning data instruction with a nop."); 559 int alignmentNopIdx = mutatableCode.getInstructionIndex(dataInsn); 560 MInsn nop = new MInsn(); 561 nop.insn = new Instruction(); 562 nop.insn.info = Instruction.getOpcodeInfo(Opcode.NOP); 563 mutatableCode.insertInstructionAt(nop, alignmentNopIdx); 564 performedAlignment = true; 565 } 566 } 567 568 if (!performedAlignment) { 569 Log.debug("Alignment okay."); 570 } 571 } 572 } 573 574 /** 575 * Determine if a particular instruction is a branch instruction, based on opcode. 576 */ 577 private boolean isInstructionBranch(Instruction insn) { 578 Opcode opcode = insn.info.opcode; 579 if (Opcode.isBetween(opcode, Opcode.IF_EQ, Opcode.IF_LEZ) 580 || Opcode.isBetween(opcode, Opcode.GOTO, Opcode.GOTO_32)) { 581 return true; 582 } 583 return false; 584 } 585 586 /** 587 * Determine if a particular instruction is a switch instruction, based on opcode. 588 */ 589 private boolean isInstructionSwitch(Instruction insn) { 590 Opcode opcode = insn.info.opcode; 591 if (Opcode.isBetween(opcode, Opcode.PACKED_SWITCH, Opcode.SPARSE_SWITCH)) { 592 return true; 593 } 594 return false; 595 } 596 597 private boolean isInstructionFillArrayData(Instruction insn) { 598 return (insn.info.opcode == Opcode.FILL_ARRAY_DATA); 599 } 600 } 601