1 /*===-- X86DisassemblerDecoder.c - Disassembler decoder ------------*- C -*-===* 2 * 3 * The LLVM Compiler Infrastructure 4 * 5 * This file is distributed under the University of Illinois Open Source 6 * License. See LICENSE.TXT for details. 7 * 8 *===----------------------------------------------------------------------===* 9 * 10 * This file is part of the X86 Disassembler. 11 * It contains the implementation of the instruction decoder. 12 * Documentation for the disassembler can be found in X86Disassembler.h. 13 * 14 *===----------------------------------------------------------------------===*/ 15 16 #include <stdarg.h> /* for va_*() */ 17 #include <stdio.h> /* for vsnprintf() */ 18 #include <stdlib.h> /* for exit() */ 19 #include <string.h> /* for memset() */ 20 21 #include "X86DisassemblerDecoder.h" 22 23 #include "X86GenDisassemblerTables.inc" 24 25 #define TRUE 1 26 #define FALSE 0 27 28 typedef int8_t bool; 29 30 #ifndef NDEBUG 31 #define debug(s) do { x86DisassemblerDebug(__FILE__, __LINE__, s); } while (0) 32 #else 33 #define debug(s) do { } while (0) 34 #endif 35 36 37 /* 38 * contextForAttrs - Client for the instruction context table. Takes a set of 39 * attributes and returns the appropriate decode context. 40 * 41 * @param attrMask - Attributes, from the enumeration attributeBits. 42 * @return - The InstructionContext to use when looking up an 43 * an instruction with these attributes. 44 */ 45 static InstructionContext contextForAttrs(uint8_t attrMask) { 46 return CONTEXTS_SYM[attrMask]; 47 } 48 49 /* 50 * modRMRequired - Reads the appropriate instruction table to determine whether 51 * the ModR/M byte is required to decode a particular instruction. 52 * 53 * @param type - The opcode type (i.e., how many bytes it has). 54 * @param insnContext - The context for the instruction, as returned by 55 * contextForAttrs. 56 * @param opcode - The last byte of the instruction's opcode, not counting 57 * ModR/M extensions and escapes. 58 * @return - TRUE if the ModR/M byte is required, FALSE otherwise. 59 */ 60 static int modRMRequired(OpcodeType type, 61 InstructionContext insnContext, 62 uint8_t opcode) { 63 const struct ContextDecision* decision = 0; 64 65 switch (type) { 66 case ONEBYTE: 67 decision = &ONEBYTE_SYM; 68 break; 69 case TWOBYTE: 70 decision = &TWOBYTE_SYM; 71 break; 72 case THREEBYTE_38: 73 decision = &THREEBYTE38_SYM; 74 break; 75 case THREEBYTE_3A: 76 decision = &THREEBYTE3A_SYM; 77 break; 78 case THREEBYTE_A6: 79 decision = &THREEBYTEA6_SYM; 80 break; 81 case THREEBYTE_A7: 82 decision = &THREEBYTEA7_SYM; 83 break; 84 } 85 86 return decision->opcodeDecisions[insnContext].modRMDecisions[opcode]. 87 modrm_type != MODRM_ONEENTRY; 88 } 89 90 /* 91 * decode - Reads the appropriate instruction table to obtain the unique ID of 92 * an instruction. 93 * 94 * @param type - See modRMRequired(). 95 * @param insnContext - See modRMRequired(). 96 * @param opcode - See modRMRequired(). 97 * @param modRM - The ModR/M byte if required, or any value if not. 98 * @return - The UID of the instruction, or 0 on failure. 99 */ 100 static InstrUID decode(OpcodeType type, 101 InstructionContext insnContext, 102 uint8_t opcode, 103 uint8_t modRM) { 104 const struct ModRMDecision* dec = 0; 105 106 switch (type) { 107 case ONEBYTE: 108 dec = &ONEBYTE_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; 109 break; 110 case TWOBYTE: 111 dec = &TWOBYTE_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; 112 break; 113 case THREEBYTE_38: 114 dec = &THREEBYTE38_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; 115 break; 116 case THREEBYTE_3A: 117 dec = &THREEBYTE3A_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; 118 break; 119 case THREEBYTE_A6: 120 dec = &THREEBYTEA6_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; 121 break; 122 case THREEBYTE_A7: 123 dec = &THREEBYTEA7_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; 124 break; 125 } 126 127 switch (dec->modrm_type) { 128 default: 129 debug("Corrupt table! Unknown modrm_type"); 130 return 0; 131 case MODRM_ONEENTRY: 132 return modRMTable[dec->instructionIDs]; 133 case MODRM_SPLITRM: 134 if (modFromModRM(modRM) == 0x3) 135 return modRMTable[dec->instructionIDs+1]; 136 return modRMTable[dec->instructionIDs]; 137 case MODRM_SPLITREG: 138 if (modFromModRM(modRM) == 0x3) 139 return modRMTable[dec->instructionIDs+((modRM & 0x38) >> 3)+8]; 140 return modRMTable[dec->instructionIDs+((modRM & 0x38) >> 3)]; 141 case MODRM_SPLITMISC: 142 if (modFromModRM(modRM) == 0x3) 143 return modRMTable[dec->instructionIDs+(modRM & 0x3f)+8]; 144 return modRMTable[dec->instructionIDs+((modRM & 0x38) >> 3)]; 145 case MODRM_FULL: 146 return modRMTable[dec->instructionIDs+modRM]; 147 } 148 } 149 150 /* 151 * specifierForUID - Given a UID, returns the name and operand specification for 152 * that instruction. 153 * 154 * @param uid - The unique ID for the instruction. This should be returned by 155 * decode(); specifierForUID will not check bounds. 156 * @return - A pointer to the specification for that instruction. 157 */ 158 static const struct InstructionSpecifier *specifierForUID(InstrUID uid) { 159 return &INSTRUCTIONS_SYM[uid]; 160 } 161 162 /* 163 * consumeByte - Uses the reader function provided by the user to consume one 164 * byte from the instruction's memory and advance the cursor. 165 * 166 * @param insn - The instruction with the reader function to use. The cursor 167 * for this instruction is advanced. 168 * @param byte - A pointer to a pre-allocated memory buffer to be populated 169 * with the data read. 170 * @return - 0 if the read was successful; nonzero otherwise. 171 */ 172 static int consumeByte(struct InternalInstruction* insn, uint8_t* byte) { 173 int ret = insn->reader(insn->readerArg, byte, insn->readerCursor); 174 175 if (!ret) 176 ++(insn->readerCursor); 177 178 return ret; 179 } 180 181 /* 182 * lookAtByte - Like consumeByte, but does not advance the cursor. 183 * 184 * @param insn - See consumeByte(). 185 * @param byte - See consumeByte(). 186 * @return - See consumeByte(). 187 */ 188 static int lookAtByte(struct InternalInstruction* insn, uint8_t* byte) { 189 return insn->reader(insn->readerArg, byte, insn->readerCursor); 190 } 191 192 static void unconsumeByte(struct InternalInstruction* insn) { 193 insn->readerCursor--; 194 } 195 196 #define CONSUME_FUNC(name, type) \ 197 static int name(struct InternalInstruction* insn, type* ptr) { \ 198 type combined = 0; \ 199 unsigned offset; \ 200 for (offset = 0; offset < sizeof(type); ++offset) { \ 201 uint8_t byte; \ 202 int ret = insn->reader(insn->readerArg, \ 203 &byte, \ 204 insn->readerCursor + offset); \ 205 if (ret) \ 206 return ret; \ 207 combined = combined | ((uint64_t)byte << (offset * 8)); \ 208 } \ 209 *ptr = combined; \ 210 insn->readerCursor += sizeof(type); \ 211 return 0; \ 212 } 213 214 /* 215 * consume* - Use the reader function provided by the user to consume data 216 * values of various sizes from the instruction's memory and advance the 217 * cursor appropriately. These readers perform endian conversion. 218 * 219 * @param insn - See consumeByte(). 220 * @param ptr - A pointer to a pre-allocated memory of appropriate size to 221 * be populated with the data read. 222 * @return - See consumeByte(). 223 */ 224 CONSUME_FUNC(consumeInt8, int8_t) 225 CONSUME_FUNC(consumeInt16, int16_t) 226 CONSUME_FUNC(consumeInt32, int32_t) 227 CONSUME_FUNC(consumeUInt16, uint16_t) 228 CONSUME_FUNC(consumeUInt32, uint32_t) 229 CONSUME_FUNC(consumeUInt64, uint64_t) 230 231 /* 232 * dbgprintf - Uses the logging function provided by the user to log a single 233 * message, typically without a carriage-return. 234 * 235 * @param insn - The instruction containing the logging function. 236 * @param format - See printf(). 237 * @param ... - See printf(). 238 */ 239 static void dbgprintf(struct InternalInstruction* insn, 240 const char* format, 241 ...) { 242 char buffer[256]; 243 va_list ap; 244 245 if (!insn->dlog) 246 return; 247 248 va_start(ap, format); 249 (void)vsnprintf(buffer, sizeof(buffer), format, ap); 250 va_end(ap); 251 252 insn->dlog(insn->dlogArg, buffer); 253 254 return; 255 } 256 257 /* 258 * setPrefixPresent - Marks that a particular prefix is present at a particular 259 * location. 260 * 261 * @param insn - The instruction to be marked as having the prefix. 262 * @param prefix - The prefix that is present. 263 * @param location - The location where the prefix is located (in the address 264 * space of the instruction's reader). 265 */ 266 static void setPrefixPresent(struct InternalInstruction* insn, 267 uint8_t prefix, 268 uint64_t location) 269 { 270 insn->prefixPresent[prefix] = 1; 271 insn->prefixLocations[prefix] = location; 272 } 273 274 /* 275 * isPrefixAtLocation - Queries an instruction to determine whether a prefix is 276 * present at a given location. 277 * 278 * @param insn - The instruction to be queried. 279 * @param prefix - The prefix. 280 * @param location - The location to query. 281 * @return - Whether the prefix is at that location. 282 */ 283 static BOOL isPrefixAtLocation(struct InternalInstruction* insn, 284 uint8_t prefix, 285 uint64_t location) 286 { 287 if (insn->prefixPresent[prefix] == 1 && 288 insn->prefixLocations[prefix] == location) 289 return TRUE; 290 else 291 return FALSE; 292 } 293 294 /* 295 * readPrefixes - Consumes all of an instruction's prefix bytes, and marks the 296 * instruction as having them. Also sets the instruction's default operand, 297 * address, and other relevant data sizes to report operands correctly. 298 * 299 * @param insn - The instruction whose prefixes are to be read. 300 * @return - 0 if the instruction could be read until the end of the prefix 301 * bytes, and no prefixes conflicted; nonzero otherwise. 302 */ 303 static int readPrefixes(struct InternalInstruction* insn) { 304 BOOL isPrefix = TRUE; 305 BOOL prefixGroups[4] = { FALSE }; 306 uint64_t prefixLocation; 307 uint8_t byte = 0; 308 309 BOOL hasAdSize = FALSE; 310 BOOL hasOpSize = FALSE; 311 312 dbgprintf(insn, "readPrefixes()"); 313 314 while (isPrefix) { 315 prefixLocation = insn->readerCursor; 316 317 if (consumeByte(insn, &byte)) 318 return -1; 319 320 /* 321 * If the byte is a LOCK/REP/REPNE prefix and not a part of the opcode, then 322 * break and let it be disassembled as a normal "instruction". 323 */ 324 if (insn->readerCursor - 1 == insn->startLocation 325 && (byte == 0xf0 || byte == 0xf2 || byte == 0xf3)) { 326 uint8_t nextByte; 327 if (byte == 0xf0) 328 break; 329 if (lookAtByte(insn, &nextByte)) 330 return -1; 331 /* 332 * If the byte is 0xf2 or 0xf3, and any of the following conditions are 333 * met: 334 * - it is followed by a LOCK (0xf0) prefix 335 * - it is followed by an xchg instruction 336 * then it should be disassembled as a xacquire/xrelease not repne/rep. 337 */ 338 if ((byte == 0xf2 || byte == 0xf3) && 339 ((nextByte == 0xf0) | 340 ((nextByte & 0xfe) == 0x86 || (nextByte & 0xf8) == 0x90))) 341 insn->xAcquireRelease = TRUE; 342 /* 343 * Also if the byte is 0xf3, and the following condition is met: 344 * - it is followed by a "mov mem, reg" (opcode 0x88/0x89) or 345 * "mov mem, imm" (opcode 0xc6/0xc7) instructions. 346 * then it should be disassembled as an xrelease not rep. 347 */ 348 if (byte == 0xf3 && 349 (nextByte == 0x88 || nextByte == 0x89 || 350 nextByte == 0xc6 || nextByte == 0xc7)) 351 insn->xAcquireRelease = TRUE; 352 if (insn->mode == MODE_64BIT && (nextByte & 0xf0) == 0x40) { 353 if (consumeByte(insn, &nextByte)) 354 return -1; 355 if (lookAtByte(insn, &nextByte)) 356 return -1; 357 unconsumeByte(insn); 358 } 359 if (nextByte != 0x0f && nextByte != 0x90) 360 break; 361 } 362 363 switch (byte) { 364 case 0xf0: /* LOCK */ 365 case 0xf2: /* REPNE/REPNZ */ 366 case 0xf3: /* REP or REPE/REPZ */ 367 if (prefixGroups[0]) 368 dbgprintf(insn, "Redundant Group 1 prefix"); 369 prefixGroups[0] = TRUE; 370 setPrefixPresent(insn, byte, prefixLocation); 371 break; 372 case 0x2e: /* CS segment override -OR- Branch not taken */ 373 case 0x36: /* SS segment override -OR- Branch taken */ 374 case 0x3e: /* DS segment override */ 375 case 0x26: /* ES segment override */ 376 case 0x64: /* FS segment override */ 377 case 0x65: /* GS segment override */ 378 switch (byte) { 379 case 0x2e: 380 insn->segmentOverride = SEG_OVERRIDE_CS; 381 break; 382 case 0x36: 383 insn->segmentOverride = SEG_OVERRIDE_SS; 384 break; 385 case 0x3e: 386 insn->segmentOverride = SEG_OVERRIDE_DS; 387 break; 388 case 0x26: 389 insn->segmentOverride = SEG_OVERRIDE_ES; 390 break; 391 case 0x64: 392 insn->segmentOverride = SEG_OVERRIDE_FS; 393 break; 394 case 0x65: 395 insn->segmentOverride = SEG_OVERRIDE_GS; 396 break; 397 default: 398 debug("Unhandled override"); 399 return -1; 400 } 401 if (prefixGroups[1]) 402 dbgprintf(insn, "Redundant Group 2 prefix"); 403 prefixGroups[1] = TRUE; 404 setPrefixPresent(insn, byte, prefixLocation); 405 break; 406 case 0x66: /* Operand-size override */ 407 if (prefixGroups[2]) 408 dbgprintf(insn, "Redundant Group 3 prefix"); 409 prefixGroups[2] = TRUE; 410 hasOpSize = TRUE; 411 setPrefixPresent(insn, byte, prefixLocation); 412 break; 413 case 0x67: /* Address-size override */ 414 if (prefixGroups[3]) 415 dbgprintf(insn, "Redundant Group 4 prefix"); 416 prefixGroups[3] = TRUE; 417 hasAdSize = TRUE; 418 setPrefixPresent(insn, byte, prefixLocation); 419 break; 420 default: /* Not a prefix byte */ 421 isPrefix = FALSE; 422 break; 423 } 424 425 if (isPrefix) 426 dbgprintf(insn, "Found prefix 0x%hhx", byte); 427 } 428 429 insn->vexSize = 0; 430 431 if (byte == 0xc4) { 432 uint8_t byte1; 433 434 if (lookAtByte(insn, &byte1)) { 435 dbgprintf(insn, "Couldn't read second byte of VEX"); 436 return -1; 437 } 438 439 if (insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0) { 440 insn->vexSize = 3; 441 insn->necessaryPrefixLocation = insn->readerCursor - 1; 442 } 443 else { 444 unconsumeByte(insn); 445 insn->necessaryPrefixLocation = insn->readerCursor - 1; 446 } 447 448 if (insn->vexSize == 3) { 449 insn->vexPrefix[0] = byte; 450 consumeByte(insn, &insn->vexPrefix[1]); 451 consumeByte(insn, &insn->vexPrefix[2]); 452 453 /* We simulate the REX prefix for simplicity's sake */ 454 455 if (insn->mode == MODE_64BIT) { 456 insn->rexPrefix = 0x40 457 | (wFromVEX3of3(insn->vexPrefix[2]) << 3) 458 | (rFromVEX2of3(insn->vexPrefix[1]) << 2) 459 | (xFromVEX2of3(insn->vexPrefix[1]) << 1) 460 | (bFromVEX2of3(insn->vexPrefix[1]) << 0); 461 } 462 463 switch (ppFromVEX3of3(insn->vexPrefix[2])) 464 { 465 default: 466 break; 467 case VEX_PREFIX_66: 468 hasOpSize = TRUE; 469 break; 470 } 471 472 dbgprintf(insn, "Found VEX prefix 0x%hhx 0x%hhx 0x%hhx", insn->vexPrefix[0], insn->vexPrefix[1], insn->vexPrefix[2]); 473 } 474 } 475 else if (byte == 0xc5) { 476 uint8_t byte1; 477 478 if (lookAtByte(insn, &byte1)) { 479 dbgprintf(insn, "Couldn't read second byte of VEX"); 480 return -1; 481 } 482 483 if (insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0) { 484 insn->vexSize = 2; 485 } 486 else { 487 unconsumeByte(insn); 488 } 489 490 if (insn->vexSize == 2) { 491 insn->vexPrefix[0] = byte; 492 consumeByte(insn, &insn->vexPrefix[1]); 493 494 if (insn->mode == MODE_64BIT) { 495 insn->rexPrefix = 0x40 496 | (rFromVEX2of2(insn->vexPrefix[1]) << 2); 497 } 498 499 switch (ppFromVEX2of2(insn->vexPrefix[1])) 500 { 501 default: 502 break; 503 case VEX_PREFIX_66: 504 hasOpSize = TRUE; 505 break; 506 } 507 508 dbgprintf(insn, "Found VEX prefix 0x%hhx 0x%hhx", insn->vexPrefix[0], insn->vexPrefix[1]); 509 } 510 } 511 else { 512 if (insn->mode == MODE_64BIT) { 513 if ((byte & 0xf0) == 0x40) { 514 uint8_t opcodeByte; 515 516 if (lookAtByte(insn, &opcodeByte) || ((opcodeByte & 0xf0) == 0x40)) { 517 dbgprintf(insn, "Redundant REX prefix"); 518 return -1; 519 } 520 521 insn->rexPrefix = byte; 522 insn->necessaryPrefixLocation = insn->readerCursor - 2; 523 524 dbgprintf(insn, "Found REX prefix 0x%hhx", byte); 525 } else { 526 unconsumeByte(insn); 527 insn->necessaryPrefixLocation = insn->readerCursor - 1; 528 } 529 } else { 530 unconsumeByte(insn); 531 insn->necessaryPrefixLocation = insn->readerCursor - 1; 532 } 533 } 534 535 if (insn->mode == MODE_16BIT) { 536 insn->registerSize = (hasOpSize ? 4 : 2); 537 insn->addressSize = (hasAdSize ? 4 : 2); 538 insn->displacementSize = (hasAdSize ? 4 : 2); 539 insn->immediateSize = (hasOpSize ? 4 : 2); 540 } else if (insn->mode == MODE_32BIT) { 541 insn->registerSize = (hasOpSize ? 2 : 4); 542 insn->addressSize = (hasAdSize ? 2 : 4); 543 insn->displacementSize = (hasAdSize ? 2 : 4); 544 insn->immediateSize = (hasOpSize ? 2 : 4); 545 } else if (insn->mode == MODE_64BIT) { 546 if (insn->rexPrefix && wFromREX(insn->rexPrefix)) { 547 insn->registerSize = 8; 548 insn->addressSize = (hasAdSize ? 4 : 8); 549 insn->displacementSize = 4; 550 insn->immediateSize = 4; 551 } else if (insn->rexPrefix) { 552 insn->registerSize = (hasOpSize ? 2 : 4); 553 insn->addressSize = (hasAdSize ? 4 : 8); 554 insn->displacementSize = (hasOpSize ? 2 : 4); 555 insn->immediateSize = (hasOpSize ? 2 : 4); 556 } else { 557 insn->registerSize = (hasOpSize ? 2 : 4); 558 insn->addressSize = (hasAdSize ? 4 : 8); 559 insn->displacementSize = (hasOpSize ? 2 : 4); 560 insn->immediateSize = (hasOpSize ? 2 : 4); 561 } 562 } 563 564 return 0; 565 } 566 567 /* 568 * readOpcode - Reads the opcode (excepting the ModR/M byte in the case of 569 * extended or escape opcodes). 570 * 571 * @param insn - The instruction whose opcode is to be read. 572 * @return - 0 if the opcode could be read successfully; nonzero otherwise. 573 */ 574 static int readOpcode(struct InternalInstruction* insn) { 575 /* Determine the length of the primary opcode */ 576 577 uint8_t current; 578 579 dbgprintf(insn, "readOpcode()"); 580 581 insn->opcodeType = ONEBYTE; 582 583 if (insn->vexSize == 3) 584 { 585 switch (mmmmmFromVEX2of3(insn->vexPrefix[1])) 586 { 587 default: 588 dbgprintf(insn, "Unhandled m-mmmm field for instruction (0x%hhx)", mmmmmFromVEX2of3(insn->vexPrefix[1])); 589 return -1; 590 case 0: 591 break; 592 case VEX_LOB_0F: 593 insn->twoByteEscape = 0x0f; 594 insn->opcodeType = TWOBYTE; 595 return consumeByte(insn, &insn->opcode); 596 case VEX_LOB_0F38: 597 insn->twoByteEscape = 0x0f; 598 insn->threeByteEscape = 0x38; 599 insn->opcodeType = THREEBYTE_38; 600 return consumeByte(insn, &insn->opcode); 601 case VEX_LOB_0F3A: 602 insn->twoByteEscape = 0x0f; 603 insn->threeByteEscape = 0x3a; 604 insn->opcodeType = THREEBYTE_3A; 605 return consumeByte(insn, &insn->opcode); 606 } 607 } 608 else if (insn->vexSize == 2) 609 { 610 insn->twoByteEscape = 0x0f; 611 insn->opcodeType = TWOBYTE; 612 return consumeByte(insn, &insn->opcode); 613 } 614 615 if (consumeByte(insn, ¤t)) 616 return -1; 617 618 if (current == 0x0f) { 619 dbgprintf(insn, "Found a two-byte escape prefix (0x%hhx)", current); 620 621 insn->twoByteEscape = current; 622 623 if (consumeByte(insn, ¤t)) 624 return -1; 625 626 if (current == 0x38) { 627 dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current); 628 629 insn->threeByteEscape = current; 630 631 if (consumeByte(insn, ¤t)) 632 return -1; 633 634 insn->opcodeType = THREEBYTE_38; 635 } else if (current == 0x3a) { 636 dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current); 637 638 insn->threeByteEscape = current; 639 640 if (consumeByte(insn, ¤t)) 641 return -1; 642 643 insn->opcodeType = THREEBYTE_3A; 644 } else if (current == 0xa6) { 645 dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current); 646 647 insn->threeByteEscape = current; 648 649 if (consumeByte(insn, ¤t)) 650 return -1; 651 652 insn->opcodeType = THREEBYTE_A6; 653 } else if (current == 0xa7) { 654 dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current); 655 656 insn->threeByteEscape = current; 657 658 if (consumeByte(insn, ¤t)) 659 return -1; 660 661 insn->opcodeType = THREEBYTE_A7; 662 } else { 663 dbgprintf(insn, "Didn't find a three-byte escape prefix"); 664 665 insn->opcodeType = TWOBYTE; 666 } 667 } 668 669 /* 670 * At this point we have consumed the full opcode. 671 * Anything we consume from here on must be unconsumed. 672 */ 673 674 insn->opcode = current; 675 676 return 0; 677 } 678 679 static int readModRM(struct InternalInstruction* insn); 680 681 /* 682 * getIDWithAttrMask - Determines the ID of an instruction, consuming 683 * the ModR/M byte as appropriate for extended and escape opcodes, 684 * and using a supplied attribute mask. 685 * 686 * @param instructionID - A pointer whose target is filled in with the ID of the 687 * instruction. 688 * @param insn - The instruction whose ID is to be determined. 689 * @param attrMask - The attribute mask to search. 690 * @return - 0 if the ModR/M could be read when needed or was not 691 * needed; nonzero otherwise. 692 */ 693 static int getIDWithAttrMask(uint16_t* instructionID, 694 struct InternalInstruction* insn, 695 uint8_t attrMask) { 696 BOOL hasModRMExtension; 697 698 uint8_t instructionClass; 699 700 instructionClass = contextForAttrs(attrMask); 701 702 hasModRMExtension = modRMRequired(insn->opcodeType, 703 instructionClass, 704 insn->opcode); 705 706 if (hasModRMExtension) { 707 if (readModRM(insn)) 708 return -1; 709 710 *instructionID = decode(insn->opcodeType, 711 instructionClass, 712 insn->opcode, 713 insn->modRM); 714 } else { 715 *instructionID = decode(insn->opcodeType, 716 instructionClass, 717 insn->opcode, 718 0); 719 } 720 721 return 0; 722 } 723 724 /* 725 * is16BitEquivalent - Determines whether two instruction names refer to 726 * equivalent instructions but one is 16-bit whereas the other is not. 727 * 728 * @param orig - The instruction that is not 16-bit 729 * @param equiv - The instruction that is 16-bit 730 */ 731 static BOOL is16BitEquivalent(const char* orig, const char* equiv) { 732 off_t i; 733 734 for (i = 0;; i++) { 735 if (orig[i] == '\0' && equiv[i] == '\0') 736 return TRUE; 737 if (orig[i] == '\0' || equiv[i] == '\0') 738 return FALSE; 739 if (orig[i] != equiv[i]) { 740 if ((orig[i] == 'Q' || orig[i] == 'L') && equiv[i] == 'W') 741 continue; 742 if ((orig[i] == '6' || orig[i] == '3') && equiv[i] == '1') 743 continue; 744 if ((orig[i] == '4' || orig[i] == '2') && equiv[i] == '6') 745 continue; 746 return FALSE; 747 } 748 } 749 } 750 751 /* 752 * getID - Determines the ID of an instruction, consuming the ModR/M byte as 753 * appropriate for extended and escape opcodes. Determines the attributes and 754 * context for the instruction before doing so. 755 * 756 * @param insn - The instruction whose ID is to be determined. 757 * @return - 0 if the ModR/M could be read when needed or was not needed; 758 * nonzero otherwise. 759 */ 760 static int getID(struct InternalInstruction* insn, const void *miiArg) { 761 uint8_t attrMask; 762 uint16_t instructionID; 763 764 dbgprintf(insn, "getID()"); 765 766 attrMask = ATTR_NONE; 767 768 if (insn->mode == MODE_64BIT) 769 attrMask |= ATTR_64BIT; 770 771 if (insn->vexSize) { 772 attrMask |= ATTR_VEX; 773 774 if (insn->vexSize == 3) { 775 switch (ppFromVEX3of3(insn->vexPrefix[2])) { 776 case VEX_PREFIX_66: 777 attrMask |= ATTR_OPSIZE; 778 break; 779 case VEX_PREFIX_F3: 780 attrMask |= ATTR_XS; 781 break; 782 case VEX_PREFIX_F2: 783 attrMask |= ATTR_XD; 784 break; 785 } 786 787 if (lFromVEX3of3(insn->vexPrefix[2])) 788 attrMask |= ATTR_VEXL; 789 } 790 else if (insn->vexSize == 2) { 791 switch (ppFromVEX2of2(insn->vexPrefix[1])) { 792 case VEX_PREFIX_66: 793 attrMask |= ATTR_OPSIZE; 794 break; 795 case VEX_PREFIX_F3: 796 attrMask |= ATTR_XS; 797 break; 798 case VEX_PREFIX_F2: 799 attrMask |= ATTR_XD; 800 break; 801 } 802 803 if (lFromVEX2of2(insn->vexPrefix[1])) 804 attrMask |= ATTR_VEXL; 805 } 806 else { 807 return -1; 808 } 809 } 810 else { 811 if (isPrefixAtLocation(insn, 0x66, insn->necessaryPrefixLocation)) 812 attrMask |= ATTR_OPSIZE; 813 else if (isPrefixAtLocation(insn, 0x67, insn->necessaryPrefixLocation)) 814 attrMask |= ATTR_ADSIZE; 815 else if (isPrefixAtLocation(insn, 0xf3, insn->necessaryPrefixLocation)) 816 attrMask |= ATTR_XS; 817 else if (isPrefixAtLocation(insn, 0xf2, insn->necessaryPrefixLocation)) 818 attrMask |= ATTR_XD; 819 } 820 821 if (insn->rexPrefix & 0x08) 822 attrMask |= ATTR_REXW; 823 824 if (getIDWithAttrMask(&instructionID, insn, attrMask)) 825 return -1; 826 827 /* The following clauses compensate for limitations of the tables. */ 828 829 if ((attrMask & ATTR_VEXL) && (attrMask & ATTR_REXW) && 830 !(attrMask & ATTR_OPSIZE)) { 831 /* 832 * Some VEX instructions ignore the L-bit, but use the W-bit. Normally L-bit 833 * has precedence since there are no L-bit with W-bit entries in the tables. 834 * So if the L-bit isn't significant we should use the W-bit instead. 835 * We only need to do this if the instruction doesn't specify OpSize since 836 * there is a VEX_L_W_OPSIZE table. 837 */ 838 839 const struct InstructionSpecifier *spec; 840 uint16_t instructionIDWithWBit; 841 const struct InstructionSpecifier *specWithWBit; 842 843 spec = specifierForUID(instructionID); 844 845 if (getIDWithAttrMask(&instructionIDWithWBit, 846 insn, 847 (attrMask & (~ATTR_VEXL)) | ATTR_REXW)) { 848 insn->instructionID = instructionID; 849 insn->spec = spec; 850 return 0; 851 } 852 853 specWithWBit = specifierForUID(instructionIDWithWBit); 854 855 if (instructionID != instructionIDWithWBit) { 856 insn->instructionID = instructionIDWithWBit; 857 insn->spec = specWithWBit; 858 } else { 859 insn->instructionID = instructionID; 860 insn->spec = spec; 861 } 862 return 0; 863 } 864 865 if (insn->prefixPresent[0x66] && !(attrMask & ATTR_OPSIZE)) { 866 /* 867 * The instruction tables make no distinction between instructions that 868 * allow OpSize anywhere (i.e., 16-bit operations) and that need it in a 869 * particular spot (i.e., many MMX operations). In general we're 870 * conservative, but in the specific case where OpSize is present but not 871 * in the right place we check if there's a 16-bit operation. 872 */ 873 874 const struct InstructionSpecifier *spec; 875 uint16_t instructionIDWithOpsize; 876 const char *specName, *specWithOpSizeName; 877 878 spec = specifierForUID(instructionID); 879 880 if (getIDWithAttrMask(&instructionIDWithOpsize, 881 insn, 882 attrMask | ATTR_OPSIZE)) { 883 /* 884 * ModRM required with OpSize but not present; give up and return version 885 * without OpSize set 886 */ 887 888 insn->instructionID = instructionID; 889 insn->spec = spec; 890 return 0; 891 } 892 893 specName = x86DisassemblerGetInstrName(instructionID, miiArg); 894 specWithOpSizeName = 895 x86DisassemblerGetInstrName(instructionIDWithOpsize, miiArg); 896 897 if (is16BitEquivalent(specName, specWithOpSizeName)) { 898 insn->instructionID = instructionIDWithOpsize; 899 insn->spec = specifierForUID(instructionIDWithOpsize); 900 } else { 901 insn->instructionID = instructionID; 902 insn->spec = spec; 903 } 904 return 0; 905 } 906 907 if (insn->opcodeType == ONEBYTE && insn->opcode == 0x90 && 908 insn->rexPrefix & 0x01) { 909 /* 910 * NOOP shouldn't decode as NOOP if REX.b is set. Instead 911 * it should decode as XCHG %r8, %eax. 912 */ 913 914 const struct InstructionSpecifier *spec; 915 uint16_t instructionIDWithNewOpcode; 916 const struct InstructionSpecifier *specWithNewOpcode; 917 918 spec = specifierForUID(instructionID); 919 920 /* Borrow opcode from one of the other XCHGar opcodes */ 921 insn->opcode = 0x91; 922 923 if (getIDWithAttrMask(&instructionIDWithNewOpcode, 924 insn, 925 attrMask)) { 926 insn->opcode = 0x90; 927 928 insn->instructionID = instructionID; 929 insn->spec = spec; 930 return 0; 931 } 932 933 specWithNewOpcode = specifierForUID(instructionIDWithNewOpcode); 934 935 /* Change back */ 936 insn->opcode = 0x90; 937 938 insn->instructionID = instructionIDWithNewOpcode; 939 insn->spec = specWithNewOpcode; 940 941 return 0; 942 } 943 944 insn->instructionID = instructionID; 945 insn->spec = specifierForUID(insn->instructionID); 946 947 return 0; 948 } 949 950 /* 951 * readSIB - Consumes the SIB byte to determine addressing information for an 952 * instruction. 953 * 954 * @param insn - The instruction whose SIB byte is to be read. 955 * @return - 0 if the SIB byte was successfully read; nonzero otherwise. 956 */ 957 static int readSIB(struct InternalInstruction* insn) { 958 SIBIndex sibIndexBase = 0; 959 SIBBase sibBaseBase = 0; 960 uint8_t index, base; 961 962 dbgprintf(insn, "readSIB()"); 963 964 if (insn->consumedSIB) 965 return 0; 966 967 insn->consumedSIB = TRUE; 968 969 switch (insn->addressSize) { 970 case 2: 971 dbgprintf(insn, "SIB-based addressing doesn't work in 16-bit mode"); 972 return -1; 973 break; 974 case 4: 975 sibIndexBase = SIB_INDEX_EAX; 976 sibBaseBase = SIB_BASE_EAX; 977 break; 978 case 8: 979 sibIndexBase = SIB_INDEX_RAX; 980 sibBaseBase = SIB_BASE_RAX; 981 break; 982 } 983 984 if (consumeByte(insn, &insn->sib)) 985 return -1; 986 987 index = indexFromSIB(insn->sib) | (xFromREX(insn->rexPrefix) << 3); 988 989 switch (index) { 990 case 0x4: 991 insn->sibIndex = SIB_INDEX_NONE; 992 break; 993 default: 994 insn->sibIndex = (SIBIndex)(sibIndexBase + index); 995 if (insn->sibIndex == SIB_INDEX_sib || 996 insn->sibIndex == SIB_INDEX_sib64) 997 insn->sibIndex = SIB_INDEX_NONE; 998 break; 999 } 1000 1001 switch (scaleFromSIB(insn->sib)) { 1002 case 0: 1003 insn->sibScale = 1; 1004 break; 1005 case 1: 1006 insn->sibScale = 2; 1007 break; 1008 case 2: 1009 insn->sibScale = 4; 1010 break; 1011 case 3: 1012 insn->sibScale = 8; 1013 break; 1014 } 1015 1016 base = baseFromSIB(insn->sib) | (bFromREX(insn->rexPrefix) << 3); 1017 1018 switch (base) { 1019 case 0x5: 1020 switch (modFromModRM(insn->modRM)) { 1021 case 0x0: 1022 insn->eaDisplacement = EA_DISP_32; 1023 insn->sibBase = SIB_BASE_NONE; 1024 break; 1025 case 0x1: 1026 insn->eaDisplacement = EA_DISP_8; 1027 insn->sibBase = (insn->addressSize == 4 ? 1028 SIB_BASE_EBP : SIB_BASE_RBP); 1029 break; 1030 case 0x2: 1031 insn->eaDisplacement = EA_DISP_32; 1032 insn->sibBase = (insn->addressSize == 4 ? 1033 SIB_BASE_EBP : SIB_BASE_RBP); 1034 break; 1035 case 0x3: 1036 debug("Cannot have Mod = 0b11 and a SIB byte"); 1037 return -1; 1038 } 1039 break; 1040 default: 1041 insn->sibBase = (SIBBase)(sibBaseBase + base); 1042 break; 1043 } 1044 1045 return 0; 1046 } 1047 1048 /* 1049 * readDisplacement - Consumes the displacement of an instruction. 1050 * 1051 * @param insn - The instruction whose displacement is to be read. 1052 * @return - 0 if the displacement byte was successfully read; nonzero 1053 * otherwise. 1054 */ 1055 static int readDisplacement(struct InternalInstruction* insn) { 1056 int8_t d8; 1057 int16_t d16; 1058 int32_t d32; 1059 1060 dbgprintf(insn, "readDisplacement()"); 1061 1062 if (insn->consumedDisplacement) 1063 return 0; 1064 1065 insn->consumedDisplacement = TRUE; 1066 insn->displacementOffset = insn->readerCursor - insn->startLocation; 1067 1068 switch (insn->eaDisplacement) { 1069 case EA_DISP_NONE: 1070 insn->consumedDisplacement = FALSE; 1071 break; 1072 case EA_DISP_8: 1073 if (consumeInt8(insn, &d8)) 1074 return -1; 1075 insn->displacement = d8; 1076 break; 1077 case EA_DISP_16: 1078 if (consumeInt16(insn, &d16)) 1079 return -1; 1080 insn->displacement = d16; 1081 break; 1082 case EA_DISP_32: 1083 if (consumeInt32(insn, &d32)) 1084 return -1; 1085 insn->displacement = d32; 1086 break; 1087 } 1088 1089 insn->consumedDisplacement = TRUE; 1090 return 0; 1091 } 1092 1093 /* 1094 * readModRM - Consumes all addressing information (ModR/M byte, SIB byte, and 1095 * displacement) for an instruction and interprets it. 1096 * 1097 * @param insn - The instruction whose addressing information is to be read. 1098 * @return - 0 if the information was successfully read; nonzero otherwise. 1099 */ 1100 static int readModRM(struct InternalInstruction* insn) { 1101 uint8_t mod, rm, reg; 1102 1103 dbgprintf(insn, "readModRM()"); 1104 1105 if (insn->consumedModRM) 1106 return 0; 1107 1108 if (consumeByte(insn, &insn->modRM)) 1109 return -1; 1110 insn->consumedModRM = TRUE; 1111 1112 mod = modFromModRM(insn->modRM); 1113 rm = rmFromModRM(insn->modRM); 1114 reg = regFromModRM(insn->modRM); 1115 1116 /* 1117 * This goes by insn->registerSize to pick the correct register, which messes 1118 * up if we're using (say) XMM or 8-bit register operands. That gets fixed in 1119 * fixupReg(). 1120 */ 1121 switch (insn->registerSize) { 1122 case 2: 1123 insn->regBase = MODRM_REG_AX; 1124 insn->eaRegBase = EA_REG_AX; 1125 break; 1126 case 4: 1127 insn->regBase = MODRM_REG_EAX; 1128 insn->eaRegBase = EA_REG_EAX; 1129 break; 1130 case 8: 1131 insn->regBase = MODRM_REG_RAX; 1132 insn->eaRegBase = EA_REG_RAX; 1133 break; 1134 } 1135 1136 reg |= rFromREX(insn->rexPrefix) << 3; 1137 rm |= bFromREX(insn->rexPrefix) << 3; 1138 1139 insn->reg = (Reg)(insn->regBase + reg); 1140 1141 switch (insn->addressSize) { 1142 case 2: 1143 insn->eaBaseBase = EA_BASE_BX_SI; 1144 1145 switch (mod) { 1146 case 0x0: 1147 if (rm == 0x6) { 1148 insn->eaBase = EA_BASE_NONE; 1149 insn->eaDisplacement = EA_DISP_16; 1150 if (readDisplacement(insn)) 1151 return -1; 1152 } else { 1153 insn->eaBase = (EABase)(insn->eaBaseBase + rm); 1154 insn->eaDisplacement = EA_DISP_NONE; 1155 } 1156 break; 1157 case 0x1: 1158 insn->eaBase = (EABase)(insn->eaBaseBase + rm); 1159 insn->eaDisplacement = EA_DISP_8; 1160 if (readDisplacement(insn)) 1161 return -1; 1162 break; 1163 case 0x2: 1164 insn->eaBase = (EABase)(insn->eaBaseBase + rm); 1165 insn->eaDisplacement = EA_DISP_16; 1166 if (readDisplacement(insn)) 1167 return -1; 1168 break; 1169 case 0x3: 1170 insn->eaBase = (EABase)(insn->eaRegBase + rm); 1171 if (readDisplacement(insn)) 1172 return -1; 1173 break; 1174 } 1175 break; 1176 case 4: 1177 case 8: 1178 insn->eaBaseBase = (insn->addressSize == 4 ? EA_BASE_EAX : EA_BASE_RAX); 1179 1180 switch (mod) { 1181 case 0x0: 1182 insn->eaDisplacement = EA_DISP_NONE; /* readSIB may override this */ 1183 switch (rm) { 1184 case 0x4: 1185 case 0xc: /* in case REXW.b is set */ 1186 insn->eaBase = (insn->addressSize == 4 ? 1187 EA_BASE_sib : EA_BASE_sib64); 1188 readSIB(insn); 1189 if (readDisplacement(insn)) 1190 return -1; 1191 break; 1192 case 0x5: 1193 insn->eaBase = EA_BASE_NONE; 1194 insn->eaDisplacement = EA_DISP_32; 1195 if (readDisplacement(insn)) 1196 return -1; 1197 break; 1198 default: 1199 insn->eaBase = (EABase)(insn->eaBaseBase + rm); 1200 break; 1201 } 1202 break; 1203 case 0x1: 1204 case 0x2: 1205 insn->eaDisplacement = (mod == 0x1 ? EA_DISP_8 : EA_DISP_32); 1206 switch (rm) { 1207 case 0x4: 1208 case 0xc: /* in case REXW.b is set */ 1209 insn->eaBase = EA_BASE_sib; 1210 readSIB(insn); 1211 if (readDisplacement(insn)) 1212 return -1; 1213 break; 1214 default: 1215 insn->eaBase = (EABase)(insn->eaBaseBase + rm); 1216 if (readDisplacement(insn)) 1217 return -1; 1218 break; 1219 } 1220 break; 1221 case 0x3: 1222 insn->eaDisplacement = EA_DISP_NONE; 1223 insn->eaBase = (EABase)(insn->eaRegBase + rm); 1224 break; 1225 } 1226 break; 1227 } /* switch (insn->addressSize) */ 1228 1229 return 0; 1230 } 1231 1232 #define GENERIC_FIXUP_FUNC(name, base, prefix) \ 1233 static uint8_t name(struct InternalInstruction *insn, \ 1234 OperandType type, \ 1235 uint8_t index, \ 1236 uint8_t *valid) { \ 1237 *valid = 1; \ 1238 switch (type) { \ 1239 default: \ 1240 debug("Unhandled register type"); \ 1241 *valid = 0; \ 1242 return 0; \ 1243 case TYPE_Rv: \ 1244 return base + index; \ 1245 case TYPE_R8: \ 1246 if (insn->rexPrefix && \ 1247 index >= 4 && index <= 7) { \ 1248 return prefix##_SPL + (index - 4); \ 1249 } else { \ 1250 return prefix##_AL + index; \ 1251 } \ 1252 case TYPE_R16: \ 1253 return prefix##_AX + index; \ 1254 case TYPE_R32: \ 1255 return prefix##_EAX + index; \ 1256 case TYPE_R64: \ 1257 return prefix##_RAX + index; \ 1258 case TYPE_XMM512: \ 1259 return prefix##_ZMM0 + index; \ 1260 case TYPE_XMM256: \ 1261 return prefix##_YMM0 + index; \ 1262 case TYPE_XMM128: \ 1263 case TYPE_XMM64: \ 1264 case TYPE_XMM32: \ 1265 case TYPE_XMM: \ 1266 return prefix##_XMM0 + index; \ 1267 case TYPE_MM64: \ 1268 case TYPE_MM32: \ 1269 case TYPE_MM: \ 1270 if (index > 7) \ 1271 *valid = 0; \ 1272 return prefix##_MM0 + index; \ 1273 case TYPE_SEGMENTREG: \ 1274 if (index > 5) \ 1275 *valid = 0; \ 1276 return prefix##_ES + index; \ 1277 case TYPE_DEBUGREG: \ 1278 if (index > 7) \ 1279 *valid = 0; \ 1280 return prefix##_DR0 + index; \ 1281 case TYPE_CONTROLREG: \ 1282 if (index > 8) \ 1283 *valid = 0; \ 1284 return prefix##_CR0 + index; \ 1285 } \ 1286 } 1287 1288 /* 1289 * fixup*Value - Consults an operand type to determine the meaning of the 1290 * reg or R/M field. If the operand is an XMM operand, for example, an 1291 * operand would be XMM0 instead of AX, which readModRM() would otherwise 1292 * misinterpret it as. 1293 * 1294 * @param insn - The instruction containing the operand. 1295 * @param type - The operand type. 1296 * @param index - The existing value of the field as reported by readModRM(). 1297 * @param valid - The address of a uint8_t. The target is set to 1 if the 1298 * field is valid for the register class; 0 if not. 1299 * @return - The proper value. 1300 */ 1301 GENERIC_FIXUP_FUNC(fixupRegValue, insn->regBase, MODRM_REG) 1302 GENERIC_FIXUP_FUNC(fixupRMValue, insn->eaRegBase, EA_REG) 1303 1304 /* 1305 * fixupReg - Consults an operand specifier to determine which of the 1306 * fixup*Value functions to use in correcting readModRM()'ss interpretation. 1307 * 1308 * @param insn - See fixup*Value(). 1309 * @param op - The operand specifier. 1310 * @return - 0 if fixup was successful; -1 if the register returned was 1311 * invalid for its class. 1312 */ 1313 static int fixupReg(struct InternalInstruction *insn, 1314 const struct OperandSpecifier *op) { 1315 uint8_t valid; 1316 1317 dbgprintf(insn, "fixupReg()"); 1318 1319 switch ((OperandEncoding)op->encoding) { 1320 default: 1321 debug("Expected a REG or R/M encoding in fixupReg"); 1322 return -1; 1323 case ENCODING_VVVV: 1324 insn->vvvv = (Reg)fixupRegValue(insn, 1325 (OperandType)op->type, 1326 insn->vvvv, 1327 &valid); 1328 if (!valid) 1329 return -1; 1330 break; 1331 case ENCODING_REG: 1332 insn->reg = (Reg)fixupRegValue(insn, 1333 (OperandType)op->type, 1334 insn->reg - insn->regBase, 1335 &valid); 1336 if (!valid) 1337 return -1; 1338 break; 1339 case ENCODING_RM: 1340 if (insn->eaBase >= insn->eaRegBase) { 1341 insn->eaBase = (EABase)fixupRMValue(insn, 1342 (OperandType)op->type, 1343 insn->eaBase - insn->eaRegBase, 1344 &valid); 1345 if (!valid) 1346 return -1; 1347 } 1348 break; 1349 } 1350 1351 return 0; 1352 } 1353 1354 /* 1355 * readOpcodeModifier - Reads an operand from the opcode field of an 1356 * instruction. Handles AddRegFrm instructions. 1357 * 1358 * @param insn - The instruction whose opcode field is to be read. 1359 * @param inModRM - Indicates that the opcode field is to be read from the 1360 * ModR/M extension; useful for escape opcodes 1361 * @return - 0 on success; nonzero otherwise. 1362 */ 1363 static int readOpcodeModifier(struct InternalInstruction* insn) { 1364 dbgprintf(insn, "readOpcodeModifier()"); 1365 1366 if (insn->consumedOpcodeModifier) 1367 return 0; 1368 1369 insn->consumedOpcodeModifier = TRUE; 1370 1371 switch (insn->spec->modifierType) { 1372 default: 1373 debug("Unknown modifier type."); 1374 return -1; 1375 case MODIFIER_NONE: 1376 debug("No modifier but an operand expects one."); 1377 return -1; 1378 case MODIFIER_OPCODE: 1379 insn->opcodeModifier = insn->opcode - insn->spec->modifierBase; 1380 return 0; 1381 case MODIFIER_MODRM: 1382 insn->opcodeModifier = insn->modRM - insn->spec->modifierBase; 1383 return 0; 1384 } 1385 } 1386 1387 /* 1388 * readOpcodeRegister - Reads an operand from the opcode field of an 1389 * instruction and interprets it appropriately given the operand width. 1390 * Handles AddRegFrm instructions. 1391 * 1392 * @param insn - See readOpcodeModifier(). 1393 * @param size - The width (in bytes) of the register being specified. 1394 * 1 means AL and friends, 2 means AX, 4 means EAX, and 8 means 1395 * RAX. 1396 * @return - 0 on success; nonzero otherwise. 1397 */ 1398 static int readOpcodeRegister(struct InternalInstruction* insn, uint8_t size) { 1399 dbgprintf(insn, "readOpcodeRegister()"); 1400 1401 if (readOpcodeModifier(insn)) 1402 return -1; 1403 1404 if (size == 0) 1405 size = insn->registerSize; 1406 1407 switch (size) { 1408 case 1: 1409 insn->opcodeRegister = (Reg)(MODRM_REG_AL + ((bFromREX(insn->rexPrefix) << 3) 1410 | insn->opcodeModifier)); 1411 if (insn->rexPrefix && 1412 insn->opcodeRegister >= MODRM_REG_AL + 0x4 && 1413 insn->opcodeRegister < MODRM_REG_AL + 0x8) { 1414 insn->opcodeRegister = (Reg)(MODRM_REG_SPL 1415 + (insn->opcodeRegister - MODRM_REG_AL - 4)); 1416 } 1417 1418 break; 1419 case 2: 1420 insn->opcodeRegister = (Reg)(MODRM_REG_AX 1421 + ((bFromREX(insn->rexPrefix) << 3) 1422 | insn->opcodeModifier)); 1423 break; 1424 case 4: 1425 insn->opcodeRegister = (Reg)(MODRM_REG_EAX 1426 + ((bFromREX(insn->rexPrefix) << 3) 1427 | insn->opcodeModifier)); 1428 break; 1429 case 8: 1430 insn->opcodeRegister = (Reg)(MODRM_REG_RAX 1431 + ((bFromREX(insn->rexPrefix) << 3) 1432 | insn->opcodeModifier)); 1433 break; 1434 } 1435 1436 return 0; 1437 } 1438 1439 /* 1440 * readImmediate - Consumes an immediate operand from an instruction, given the 1441 * desired operand size. 1442 * 1443 * @param insn - The instruction whose operand is to be read. 1444 * @param size - The width (in bytes) of the operand. 1445 * @return - 0 if the immediate was successfully consumed; nonzero 1446 * otherwise. 1447 */ 1448 static int readImmediate(struct InternalInstruction* insn, uint8_t size) { 1449 uint8_t imm8; 1450 uint16_t imm16; 1451 uint32_t imm32; 1452 uint64_t imm64; 1453 1454 dbgprintf(insn, "readImmediate()"); 1455 1456 if (insn->numImmediatesConsumed == 2) { 1457 debug("Already consumed two immediates"); 1458 return -1; 1459 } 1460 1461 if (size == 0) 1462 size = insn->immediateSize; 1463 else 1464 insn->immediateSize = size; 1465 insn->immediateOffset = insn->readerCursor - insn->startLocation; 1466 1467 switch (size) { 1468 case 1: 1469 if (consumeByte(insn, &imm8)) 1470 return -1; 1471 insn->immediates[insn->numImmediatesConsumed] = imm8; 1472 break; 1473 case 2: 1474 if (consumeUInt16(insn, &imm16)) 1475 return -1; 1476 insn->immediates[insn->numImmediatesConsumed] = imm16; 1477 break; 1478 case 4: 1479 if (consumeUInt32(insn, &imm32)) 1480 return -1; 1481 insn->immediates[insn->numImmediatesConsumed] = imm32; 1482 break; 1483 case 8: 1484 if (consumeUInt64(insn, &imm64)) 1485 return -1; 1486 insn->immediates[insn->numImmediatesConsumed] = imm64; 1487 break; 1488 } 1489 1490 insn->numImmediatesConsumed++; 1491 1492 return 0; 1493 } 1494 1495 /* 1496 * readVVVV - Consumes vvvv from an instruction if it has a VEX prefix. 1497 * 1498 * @param insn - The instruction whose operand is to be read. 1499 * @return - 0 if the vvvv was successfully consumed; nonzero 1500 * otherwise. 1501 */ 1502 static int readVVVV(struct InternalInstruction* insn) { 1503 dbgprintf(insn, "readVVVV()"); 1504 1505 if (insn->vexSize == 3) 1506 insn->vvvv = vvvvFromVEX3of3(insn->vexPrefix[2]); 1507 else if (insn->vexSize == 2) 1508 insn->vvvv = vvvvFromVEX2of2(insn->vexPrefix[1]); 1509 else 1510 return -1; 1511 1512 if (insn->mode != MODE_64BIT) 1513 insn->vvvv &= 0x7; 1514 1515 return 0; 1516 } 1517 1518 /* 1519 * readOperands - Consults the specifier for an instruction and consumes all 1520 * operands for that instruction, interpreting them as it goes. 1521 * 1522 * @param insn - The instruction whose operands are to be read and interpreted. 1523 * @return - 0 if all operands could be read; nonzero otherwise. 1524 */ 1525 static int readOperands(struct InternalInstruction* insn) { 1526 int index; 1527 int hasVVVV, needVVVV; 1528 int sawRegImm = 0; 1529 1530 dbgprintf(insn, "readOperands()"); 1531 1532 /* If non-zero vvvv specified, need to make sure one of the operands 1533 uses it. */ 1534 hasVVVV = !readVVVV(insn); 1535 needVVVV = hasVVVV && (insn->vvvv != 0); 1536 1537 for (index = 0; index < X86_MAX_OPERANDS; ++index) { 1538 switch (x86OperandSets[insn->spec->operands][index].encoding) { 1539 case ENCODING_NONE: 1540 break; 1541 case ENCODING_REG: 1542 case ENCODING_RM: 1543 if (readModRM(insn)) 1544 return -1; 1545 if (fixupReg(insn, &x86OperandSets[insn->spec->operands][index])) 1546 return -1; 1547 break; 1548 case ENCODING_CB: 1549 case ENCODING_CW: 1550 case ENCODING_CD: 1551 case ENCODING_CP: 1552 case ENCODING_CO: 1553 case ENCODING_CT: 1554 dbgprintf(insn, "We currently don't hande code-offset encodings"); 1555 return -1; 1556 case ENCODING_IB: 1557 if (sawRegImm) { 1558 /* Saw a register immediate so don't read again and instead split the 1559 previous immediate. FIXME: This is a hack. */ 1560 insn->immediates[insn->numImmediatesConsumed] = 1561 insn->immediates[insn->numImmediatesConsumed - 1] & 0xf; 1562 ++insn->numImmediatesConsumed; 1563 break; 1564 } 1565 if (readImmediate(insn, 1)) 1566 return -1; 1567 if (x86OperandSets[insn->spec->operands][index].type == TYPE_IMM3 && 1568 insn->immediates[insn->numImmediatesConsumed - 1] > 7) 1569 return -1; 1570 if (x86OperandSets[insn->spec->operands][index].type == TYPE_IMM5 && 1571 insn->immediates[insn->numImmediatesConsumed - 1] > 31) 1572 return -1; 1573 if (x86OperandSets[insn->spec->operands][index].type == TYPE_XMM128 || 1574 x86OperandSets[insn->spec->operands][index].type == TYPE_XMM256) 1575 sawRegImm = 1; 1576 break; 1577 case ENCODING_IW: 1578 if (readImmediate(insn, 2)) 1579 return -1; 1580 break; 1581 case ENCODING_ID: 1582 if (readImmediate(insn, 4)) 1583 return -1; 1584 break; 1585 case ENCODING_IO: 1586 if (readImmediate(insn, 8)) 1587 return -1; 1588 break; 1589 case ENCODING_Iv: 1590 if (readImmediate(insn, insn->immediateSize)) 1591 return -1; 1592 break; 1593 case ENCODING_Ia: 1594 if (readImmediate(insn, insn->addressSize)) 1595 return -1; 1596 break; 1597 case ENCODING_RB: 1598 if (readOpcodeRegister(insn, 1)) 1599 return -1; 1600 break; 1601 case ENCODING_RW: 1602 if (readOpcodeRegister(insn, 2)) 1603 return -1; 1604 break; 1605 case ENCODING_RD: 1606 if (readOpcodeRegister(insn, 4)) 1607 return -1; 1608 break; 1609 case ENCODING_RO: 1610 if (readOpcodeRegister(insn, 8)) 1611 return -1; 1612 break; 1613 case ENCODING_Rv: 1614 if (readOpcodeRegister(insn, 0)) 1615 return -1; 1616 break; 1617 case ENCODING_I: 1618 if (readOpcodeModifier(insn)) 1619 return -1; 1620 break; 1621 case ENCODING_VVVV: 1622 needVVVV = 0; /* Mark that we have found a VVVV operand. */ 1623 if (!hasVVVV) 1624 return -1; 1625 if (fixupReg(insn, &x86OperandSets[insn->spec->operands][index])) 1626 return -1; 1627 break; 1628 case ENCODING_DUP: 1629 break; 1630 default: 1631 dbgprintf(insn, "Encountered an operand with an unknown encoding."); 1632 return -1; 1633 } 1634 } 1635 1636 /* If we didn't find ENCODING_VVVV operand, but non-zero vvvv present, fail */ 1637 if (needVVVV) return -1; 1638 1639 return 0; 1640 } 1641 1642 /* 1643 * decodeInstruction - Reads and interprets a full instruction provided by the 1644 * user. 1645 * 1646 * @param insn - A pointer to the instruction to be populated. Must be 1647 * pre-allocated. 1648 * @param reader - The function to be used to read the instruction's bytes. 1649 * @param readerArg - A generic argument to be passed to the reader to store 1650 * any internal state. 1651 * @param logger - If non-NULL, the function to be used to write log messages 1652 * and warnings. 1653 * @param loggerArg - A generic argument to be passed to the logger to store 1654 * any internal state. 1655 * @param startLoc - The address (in the reader's address space) of the first 1656 * byte in the instruction. 1657 * @param mode - The mode (real mode, IA-32e, or IA-32e in 64-bit mode) to 1658 * decode the instruction in. 1659 * @return - 0 if the instruction's memory could be read; nonzero if 1660 * not. 1661 */ 1662 int decodeInstruction(struct InternalInstruction* insn, 1663 byteReader_t reader, 1664 const void* readerArg, 1665 dlog_t logger, 1666 void* loggerArg, 1667 const void* miiArg, 1668 uint64_t startLoc, 1669 DisassemblerMode mode) { 1670 memset(insn, 0, sizeof(struct InternalInstruction)); 1671 1672 insn->reader = reader; 1673 insn->readerArg = readerArg; 1674 insn->dlog = logger; 1675 insn->dlogArg = loggerArg; 1676 insn->startLocation = startLoc; 1677 insn->readerCursor = startLoc; 1678 insn->mode = mode; 1679 insn->numImmediatesConsumed = 0; 1680 1681 if (readPrefixes(insn) || 1682 readOpcode(insn) || 1683 getID(insn, miiArg) || 1684 insn->instructionID == 0 || 1685 readOperands(insn)) 1686 return -1; 1687 1688 insn->operands = &x86OperandSets[insn->spec->operands][0]; 1689 1690 insn->length = insn->readerCursor - insn->startLocation; 1691 1692 dbgprintf(insn, "Read from 0x%llx to 0x%llx: length %zu", 1693 startLoc, insn->readerCursor, insn->length); 1694 1695 if (insn->length > 15) 1696 dbgprintf(insn, "Instruction exceeds 15-byte limit"); 1697 1698 return 0; 1699 } 1700