1 /*===- X86DisassemblerDecoder.c - Disassembler decoder -------------*- C -*-==* 2 * 3 * The LLVM Compiler Infrastructure 4 * 5 * This file is distributed under the University of Illinois Open Source 6 * License. See LICENSE.TXT for details. 7 * 8 *===----------------------------------------------------------------------===* 9 * 10 * This file is part of the X86 Disassembler. 11 * It contains the implementation of the instruction decoder. 12 * Documentation for the disassembler can be found in X86Disassembler.h. 13 * 14 *===----------------------------------------------------------------------===*/ 15 16 #include <stdarg.h> /* for va_*() */ 17 #include <stdio.h> /* for vsnprintf() */ 18 #include <stdlib.h> /* for exit() */ 19 #include <string.h> /* for memset() */ 20 21 #include "X86DisassemblerDecoder.h" 22 23 #include "X86GenDisassemblerTables.inc" 24 25 #define TRUE 1 26 #define FALSE 0 27 28 typedef int8_t bool; 29 30 #ifndef NDEBUG 31 #define debug(s) do { x86DisassemblerDebug(__FILE__, __LINE__, s); } while (0) 32 #else 33 #define debug(s) do { } while (0) 34 #endif 35 36 37 /* 38 * contextForAttrs - Client for the instruction context table. Takes a set of 39 * attributes and returns the appropriate decode context. 40 * 41 * @param attrMask - Attributes, from the enumeration attributeBits. 42 * @return - The InstructionContext to use when looking up an 43 * an instruction with these attributes. 44 */ 45 static InstructionContext contextForAttrs(uint8_t attrMask) { 46 return CONTEXTS_SYM[attrMask]; 47 } 48 49 /* 50 * modRMRequired - Reads the appropriate instruction table to determine whether 51 * the ModR/M byte is required to decode a particular instruction. 52 * 53 * @param type - The opcode type (i.e., how many bytes it has). 54 * @param insnContext - The context for the instruction, as returned by 55 * contextForAttrs. 56 * @param opcode - The last byte of the instruction's opcode, not counting 57 * ModR/M extensions and escapes. 58 * @return - TRUE if the ModR/M byte is required, FALSE otherwise. 59 */ 60 static int modRMRequired(OpcodeType type, 61 InstructionContext insnContext, 62 uint8_t opcode) { 63 const struct ContextDecision* decision = 0; 64 65 switch (type) { 66 case ONEBYTE: 67 decision = &ONEBYTE_SYM; 68 break; 69 case TWOBYTE: 70 decision = &TWOBYTE_SYM; 71 break; 72 case THREEBYTE_38: 73 decision = &THREEBYTE38_SYM; 74 break; 75 case THREEBYTE_3A: 76 decision = &THREEBYTE3A_SYM; 77 break; 78 case THREEBYTE_A6: 79 decision = &THREEBYTEA6_SYM; 80 break; 81 case THREEBYTE_A7: 82 decision = &THREEBYTEA7_SYM; 83 break; 84 } 85 86 return decision->opcodeDecisions[insnContext].modRMDecisions[opcode]. 87 modrm_type != MODRM_ONEENTRY; 88 89 return 0; 90 } 91 92 /* 93 * decode - Reads the appropriate instruction table to obtain the unique ID of 94 * an instruction. 95 * 96 * @param type - See modRMRequired(). 97 * @param insnContext - See modRMRequired(). 98 * @param opcode - See modRMRequired(). 99 * @param modRM - The ModR/M byte if required, or any value if not. 100 * @return - The UID of the instruction, or 0 on failure. 101 */ 102 static InstrUID decode(OpcodeType type, 103 InstructionContext insnContext, 104 uint8_t opcode, 105 uint8_t modRM) { 106 const struct ModRMDecision* dec; 107 108 switch (type) { 109 default: 110 debug("Unknown opcode type"); 111 return 0; 112 case ONEBYTE: 113 dec = &ONEBYTE_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; 114 break; 115 case TWOBYTE: 116 dec = &TWOBYTE_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; 117 break; 118 case THREEBYTE_38: 119 dec = &THREEBYTE38_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; 120 break; 121 case THREEBYTE_3A: 122 dec = &THREEBYTE3A_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; 123 break; 124 case THREEBYTE_A6: 125 dec = &THREEBYTEA6_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; 126 break; 127 case THREEBYTE_A7: 128 dec = &THREEBYTEA7_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; 129 break; 130 } 131 132 switch (dec->modrm_type) { 133 default: 134 debug("Corrupt table! Unknown modrm_type"); 135 return 0; 136 case MODRM_ONEENTRY: 137 return dec->instructionIDs[0]; 138 case MODRM_SPLITRM: 139 if (modFromModRM(modRM) == 0x3) 140 return dec->instructionIDs[1]; 141 else 142 return dec->instructionIDs[0]; 143 case MODRM_FULL: 144 return dec->instructionIDs[modRM]; 145 } 146 } 147 148 /* 149 * specifierForUID - Given a UID, returns the name and operand specification for 150 * that instruction. 151 * 152 * @param uid - The unique ID for the instruction. This should be returned by 153 * decode(); specifierForUID will not check bounds. 154 * @return - A pointer to the specification for that instruction. 155 */ 156 static const struct InstructionSpecifier *specifierForUID(InstrUID uid) { 157 return &INSTRUCTIONS_SYM[uid]; 158 } 159 160 /* 161 * consumeByte - Uses the reader function provided by the user to consume one 162 * byte from the instruction's memory and advance the cursor. 163 * 164 * @param insn - The instruction with the reader function to use. The cursor 165 * for this instruction is advanced. 166 * @param byte - A pointer to a pre-allocated memory buffer to be populated 167 * with the data read. 168 * @return - 0 if the read was successful; nonzero otherwise. 169 */ 170 static int consumeByte(struct InternalInstruction* insn, uint8_t* byte) { 171 int ret = insn->reader(insn->readerArg, byte, insn->readerCursor); 172 173 if (!ret) 174 ++(insn->readerCursor); 175 176 return ret; 177 } 178 179 /* 180 * lookAtByte - Like consumeByte, but does not advance the cursor. 181 * 182 * @param insn - See consumeByte(). 183 * @param byte - See consumeByte(). 184 * @return - See consumeByte(). 185 */ 186 static int lookAtByte(struct InternalInstruction* insn, uint8_t* byte) { 187 return insn->reader(insn->readerArg, byte, insn->readerCursor); 188 } 189 190 static void unconsumeByte(struct InternalInstruction* insn) { 191 insn->readerCursor--; 192 } 193 194 #define CONSUME_FUNC(name, type) \ 195 static int name(struct InternalInstruction* insn, type* ptr) { \ 196 type combined = 0; \ 197 unsigned offset; \ 198 for (offset = 0; offset < sizeof(type); ++offset) { \ 199 uint8_t byte; \ 200 int ret = insn->reader(insn->readerArg, \ 201 &byte, \ 202 insn->readerCursor + offset); \ 203 if (ret) \ 204 return ret; \ 205 combined = combined | ((type)byte << ((type)offset * 8)); \ 206 } \ 207 *ptr = combined; \ 208 insn->readerCursor += sizeof(type); \ 209 return 0; \ 210 } 211 212 /* 213 * consume* - Use the reader function provided by the user to consume data 214 * values of various sizes from the instruction's memory and advance the 215 * cursor appropriately. These readers perform endian conversion. 216 * 217 * @param insn - See consumeByte(). 218 * @param ptr - A pointer to a pre-allocated memory of appropriate size to 219 * be populated with the data read. 220 * @return - See consumeByte(). 221 */ 222 CONSUME_FUNC(consumeInt8, int8_t) 223 CONSUME_FUNC(consumeInt16, int16_t) 224 CONSUME_FUNC(consumeInt32, int32_t) 225 CONSUME_FUNC(consumeUInt16, uint16_t) 226 CONSUME_FUNC(consumeUInt32, uint32_t) 227 CONSUME_FUNC(consumeUInt64, uint64_t) 228 229 /* 230 * dbgprintf - Uses the logging function provided by the user to log a single 231 * message, typically without a carriage-return. 232 * 233 * @param insn - The instruction containing the logging function. 234 * @param format - See printf(). 235 * @param ... - See printf(). 236 */ 237 static void dbgprintf(struct InternalInstruction* insn, 238 const char* format, 239 ...) { 240 char buffer[256]; 241 va_list ap; 242 243 if (!insn->dlog) 244 return; 245 246 va_start(ap, format); 247 (void)vsnprintf(buffer, sizeof(buffer), format, ap); 248 va_end(ap); 249 250 insn->dlog(insn->dlogArg, buffer); 251 252 return; 253 } 254 255 /* 256 * setPrefixPresent - Marks that a particular prefix is present at a particular 257 * location. 258 * 259 * @param insn - The instruction to be marked as having the prefix. 260 * @param prefix - The prefix that is present. 261 * @param location - The location where the prefix is located (in the address 262 * space of the instruction's reader). 263 */ 264 static void setPrefixPresent(struct InternalInstruction* insn, 265 uint8_t prefix, 266 uint64_t location) 267 { 268 insn->prefixPresent[prefix] = 1; 269 insn->prefixLocations[prefix] = location; 270 } 271 272 /* 273 * isPrefixAtLocation - Queries an instruction to determine whether a prefix is 274 * present at a given location. 275 * 276 * @param insn - The instruction to be queried. 277 * @param prefix - The prefix. 278 * @param location - The location to query. 279 * @return - Whether the prefix is at that location. 280 */ 281 static BOOL isPrefixAtLocation(struct InternalInstruction* insn, 282 uint8_t prefix, 283 uint64_t location) 284 { 285 if (insn->prefixPresent[prefix] == 1 && 286 insn->prefixLocations[prefix] == location) 287 return TRUE; 288 else 289 return FALSE; 290 } 291 292 /* 293 * readPrefixes - Consumes all of an instruction's prefix bytes, and marks the 294 * instruction as having them. Also sets the instruction's default operand, 295 * address, and other relevant data sizes to report operands correctly. 296 * 297 * @param insn - The instruction whose prefixes are to be read. 298 * @return - 0 if the instruction could be read until the end of the prefix 299 * bytes, and no prefixes conflicted; nonzero otherwise. 300 */ 301 static int readPrefixes(struct InternalInstruction* insn) { 302 BOOL isPrefix = TRUE; 303 BOOL prefixGroups[4] = { FALSE }; 304 uint64_t prefixLocation; 305 uint8_t byte = 0; 306 307 BOOL hasAdSize = FALSE; 308 BOOL hasOpSize = FALSE; 309 310 dbgprintf(insn, "readPrefixes()"); 311 312 while (isPrefix) { 313 prefixLocation = insn->readerCursor; 314 315 if (consumeByte(insn, &byte)) 316 return -1; 317 318 switch (byte) { 319 case 0xf0: /* LOCK */ 320 case 0xf2: /* REPNE/REPNZ */ 321 case 0xf3: /* REP or REPE/REPZ */ 322 if (prefixGroups[0]) 323 dbgprintf(insn, "Redundant Group 1 prefix"); 324 prefixGroups[0] = TRUE; 325 setPrefixPresent(insn, byte, prefixLocation); 326 break; 327 case 0x2e: /* CS segment override -OR- Branch not taken */ 328 case 0x36: /* SS segment override -OR- Branch taken */ 329 case 0x3e: /* DS segment override */ 330 case 0x26: /* ES segment override */ 331 case 0x64: /* FS segment override */ 332 case 0x65: /* GS segment override */ 333 switch (byte) { 334 case 0x2e: 335 insn->segmentOverride = SEG_OVERRIDE_CS; 336 break; 337 case 0x36: 338 insn->segmentOverride = SEG_OVERRIDE_SS; 339 break; 340 case 0x3e: 341 insn->segmentOverride = SEG_OVERRIDE_DS; 342 break; 343 case 0x26: 344 insn->segmentOverride = SEG_OVERRIDE_ES; 345 break; 346 case 0x64: 347 insn->segmentOverride = SEG_OVERRIDE_FS; 348 break; 349 case 0x65: 350 insn->segmentOverride = SEG_OVERRIDE_GS; 351 break; 352 default: 353 debug("Unhandled override"); 354 return -1; 355 } 356 if (prefixGroups[1]) 357 dbgprintf(insn, "Redundant Group 2 prefix"); 358 prefixGroups[1] = TRUE; 359 setPrefixPresent(insn, byte, prefixLocation); 360 break; 361 case 0x66: /* Operand-size override */ 362 if (prefixGroups[2]) 363 dbgprintf(insn, "Redundant Group 3 prefix"); 364 prefixGroups[2] = TRUE; 365 hasOpSize = TRUE; 366 setPrefixPresent(insn, byte, prefixLocation); 367 break; 368 case 0x67: /* Address-size override */ 369 if (prefixGroups[3]) 370 dbgprintf(insn, "Redundant Group 4 prefix"); 371 prefixGroups[3] = TRUE; 372 hasAdSize = TRUE; 373 setPrefixPresent(insn, byte, prefixLocation); 374 break; 375 default: /* Not a prefix byte */ 376 isPrefix = FALSE; 377 break; 378 } 379 380 if (isPrefix) 381 dbgprintf(insn, "Found prefix 0x%hhx", byte); 382 } 383 384 insn->vexSize = 0; 385 386 if (byte == 0xc4) { 387 uint8_t byte1; 388 389 if (lookAtByte(insn, &byte1)) { 390 dbgprintf(insn, "Couldn't read second byte of VEX"); 391 return -1; 392 } 393 394 if (insn->mode == MODE_64BIT || byte1 & 0x8) { 395 insn->vexSize = 3; 396 insn->necessaryPrefixLocation = insn->readerCursor - 1; 397 } 398 else { 399 unconsumeByte(insn); 400 insn->necessaryPrefixLocation = insn->readerCursor - 1; 401 } 402 403 if (insn->vexSize == 3) { 404 insn->vexPrefix[0] = byte; 405 consumeByte(insn, &insn->vexPrefix[1]); 406 consumeByte(insn, &insn->vexPrefix[2]); 407 408 /* We simulate the REX prefix for simplicity's sake */ 409 410 insn->rexPrefix = 0x40 411 | (wFromVEX3of3(insn->vexPrefix[2]) << 3) 412 | (rFromVEX2of3(insn->vexPrefix[1]) << 2) 413 | (xFromVEX2of3(insn->vexPrefix[1]) << 1) 414 | (bFromVEX2of3(insn->vexPrefix[1]) << 0); 415 416 switch (ppFromVEX3of3(insn->vexPrefix[2])) 417 { 418 default: 419 break; 420 case VEX_PREFIX_66: 421 hasOpSize = TRUE; 422 break; 423 } 424 425 dbgprintf(insn, "Found VEX prefix 0x%hhx 0x%hhx 0x%hhx", insn->vexPrefix[0], insn->vexPrefix[1], insn->vexPrefix[2]); 426 } 427 } 428 else if (byte == 0xc5) { 429 uint8_t byte1; 430 431 if (lookAtByte(insn, &byte1)) { 432 dbgprintf(insn, "Couldn't read second byte of VEX"); 433 return -1; 434 } 435 436 if (insn->mode == MODE_64BIT || byte1 & 0x8) { 437 insn->vexSize = 2; 438 } 439 else { 440 unconsumeByte(insn); 441 } 442 443 if (insn->vexSize == 2) { 444 insn->vexPrefix[0] = byte; 445 consumeByte(insn, &insn->vexPrefix[1]); 446 447 insn->rexPrefix = 0x40 448 | (rFromVEX2of2(insn->vexPrefix[1]) << 2); 449 450 switch (ppFromVEX2of2(insn->vexPrefix[1])) 451 { 452 default: 453 break; 454 case VEX_PREFIX_66: 455 hasOpSize = TRUE; 456 break; 457 } 458 459 dbgprintf(insn, "Found VEX prefix 0x%hhx 0x%hhx", insn->vexPrefix[0], insn->vexPrefix[1]); 460 } 461 } 462 else { 463 if (insn->mode == MODE_64BIT) { 464 if ((byte & 0xf0) == 0x40) { 465 uint8_t opcodeByte; 466 467 if (lookAtByte(insn, &opcodeByte) || ((opcodeByte & 0xf0) == 0x40)) { 468 dbgprintf(insn, "Redundant REX prefix"); 469 return -1; 470 } 471 472 insn->rexPrefix = byte; 473 insn->necessaryPrefixLocation = insn->readerCursor - 2; 474 475 dbgprintf(insn, "Found REX prefix 0x%hhx", byte); 476 } else { 477 unconsumeByte(insn); 478 insn->necessaryPrefixLocation = insn->readerCursor - 1; 479 } 480 } else { 481 unconsumeByte(insn); 482 insn->necessaryPrefixLocation = insn->readerCursor - 1; 483 } 484 } 485 486 if (insn->mode == MODE_16BIT) { 487 insn->registerSize = (hasOpSize ? 4 : 2); 488 insn->addressSize = (hasAdSize ? 4 : 2); 489 insn->displacementSize = (hasAdSize ? 4 : 2); 490 insn->immediateSize = (hasOpSize ? 4 : 2); 491 } else if (insn->mode == MODE_32BIT) { 492 insn->registerSize = (hasOpSize ? 2 : 4); 493 insn->addressSize = (hasAdSize ? 2 : 4); 494 insn->displacementSize = (hasAdSize ? 2 : 4); 495 insn->immediateSize = (hasOpSize ? 2 : 4); 496 } else if (insn->mode == MODE_64BIT) { 497 if (insn->rexPrefix && wFromREX(insn->rexPrefix)) { 498 insn->registerSize = 8; 499 insn->addressSize = (hasAdSize ? 4 : 8); 500 insn->displacementSize = 4; 501 insn->immediateSize = 4; 502 } else if (insn->rexPrefix) { 503 insn->registerSize = (hasOpSize ? 2 : 4); 504 insn->addressSize = (hasAdSize ? 4 : 8); 505 insn->displacementSize = (hasOpSize ? 2 : 4); 506 insn->immediateSize = (hasOpSize ? 2 : 4); 507 } else { 508 insn->registerSize = (hasOpSize ? 2 : 4); 509 insn->addressSize = (hasAdSize ? 4 : 8); 510 insn->displacementSize = (hasOpSize ? 2 : 4); 511 insn->immediateSize = (hasOpSize ? 2 : 4); 512 } 513 } 514 515 return 0; 516 } 517 518 /* 519 * readOpcode - Reads the opcode (excepting the ModR/M byte in the case of 520 * extended or escape opcodes). 521 * 522 * @param insn - The instruction whose opcode is to be read. 523 * @return - 0 if the opcode could be read successfully; nonzero otherwise. 524 */ 525 static int readOpcode(struct InternalInstruction* insn) { 526 /* Determine the length of the primary opcode */ 527 528 uint8_t current; 529 530 dbgprintf(insn, "readOpcode()"); 531 532 insn->opcodeType = ONEBYTE; 533 534 if (insn->vexSize == 3) 535 { 536 switch (mmmmmFromVEX2of3(insn->vexPrefix[1])) 537 { 538 default: 539 dbgprintf(insn, "Unhandled m-mmmm field for instruction (0x%hhx)", mmmmmFromVEX2of3(insn->vexPrefix[1])); 540 return -1; 541 case 0: 542 break; 543 case VEX_LOB_0F: 544 insn->twoByteEscape = 0x0f; 545 insn->opcodeType = TWOBYTE; 546 return consumeByte(insn, &insn->opcode); 547 case VEX_LOB_0F38: 548 insn->twoByteEscape = 0x0f; 549 insn->threeByteEscape = 0x38; 550 insn->opcodeType = THREEBYTE_38; 551 return consumeByte(insn, &insn->opcode); 552 case VEX_LOB_0F3A: 553 insn->twoByteEscape = 0x0f; 554 insn->threeByteEscape = 0x3a; 555 insn->opcodeType = THREEBYTE_3A; 556 return consumeByte(insn, &insn->opcode); 557 } 558 } 559 else if (insn->vexSize == 2) 560 { 561 insn->twoByteEscape = 0x0f; 562 insn->opcodeType = TWOBYTE; 563 return consumeByte(insn, &insn->opcode); 564 } 565 566 if (consumeByte(insn, ¤t)) 567 return -1; 568 569 if (current == 0x0f) { 570 dbgprintf(insn, "Found a two-byte escape prefix (0x%hhx)", current); 571 572 insn->twoByteEscape = current; 573 574 if (consumeByte(insn, ¤t)) 575 return -1; 576 577 if (current == 0x38) { 578 dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current); 579 580 insn->threeByteEscape = current; 581 582 if (consumeByte(insn, ¤t)) 583 return -1; 584 585 insn->opcodeType = THREEBYTE_38; 586 } else if (current == 0x3a) { 587 dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current); 588 589 insn->threeByteEscape = current; 590 591 if (consumeByte(insn, ¤t)) 592 return -1; 593 594 insn->opcodeType = THREEBYTE_3A; 595 } else if (current == 0xa6) { 596 dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current); 597 598 insn->threeByteEscape = current; 599 600 if (consumeByte(insn, ¤t)) 601 return -1; 602 603 insn->opcodeType = THREEBYTE_A6; 604 } else if (current == 0xa7) { 605 dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current); 606 607 insn->threeByteEscape = current; 608 609 if (consumeByte(insn, ¤t)) 610 return -1; 611 612 insn->opcodeType = THREEBYTE_A7; 613 } else { 614 dbgprintf(insn, "Didn't find a three-byte escape prefix"); 615 616 insn->opcodeType = TWOBYTE; 617 } 618 } 619 620 /* 621 * At this point we have consumed the full opcode. 622 * Anything we consume from here on must be unconsumed. 623 */ 624 625 insn->opcode = current; 626 627 return 0; 628 } 629 630 static int readModRM(struct InternalInstruction* insn); 631 632 /* 633 * getIDWithAttrMask - Determines the ID of an instruction, consuming 634 * the ModR/M byte as appropriate for extended and escape opcodes, 635 * and using a supplied attribute mask. 636 * 637 * @param instructionID - A pointer whose target is filled in with the ID of the 638 * instruction. 639 * @param insn - The instruction whose ID is to be determined. 640 * @param attrMask - The attribute mask to search. 641 * @return - 0 if the ModR/M could be read when needed or was not 642 * needed; nonzero otherwise. 643 */ 644 static int getIDWithAttrMask(uint16_t* instructionID, 645 struct InternalInstruction* insn, 646 uint8_t attrMask) { 647 BOOL hasModRMExtension; 648 649 uint8_t instructionClass; 650 651 instructionClass = contextForAttrs(attrMask); 652 653 hasModRMExtension = modRMRequired(insn->opcodeType, 654 instructionClass, 655 insn->opcode); 656 657 if (hasModRMExtension) { 658 if (readModRM(insn)) 659 return -1; 660 661 *instructionID = decode(insn->opcodeType, 662 instructionClass, 663 insn->opcode, 664 insn->modRM); 665 } else { 666 *instructionID = decode(insn->opcodeType, 667 instructionClass, 668 insn->opcode, 669 0); 670 } 671 672 return 0; 673 } 674 675 /* 676 * is16BitEquivalent - Determines whether two instruction names refer to 677 * equivalent instructions but one is 16-bit whereas the other is not. 678 * 679 * @param orig - The instruction that is not 16-bit 680 * @param equiv - The instruction that is 16-bit 681 */ 682 static BOOL is16BitEquvalent(const char* orig, const char* equiv) { 683 off_t i; 684 685 for (i = 0;; i++) { 686 if (orig[i] == '\0' && equiv[i] == '\0') 687 return TRUE; 688 if (orig[i] == '\0' || equiv[i] == '\0') 689 return FALSE; 690 if (orig[i] != equiv[i]) { 691 if ((orig[i] == 'Q' || orig[i] == 'L') && equiv[i] == 'W') 692 continue; 693 if ((orig[i] == '6' || orig[i] == '3') && equiv[i] == '1') 694 continue; 695 if ((orig[i] == '4' || orig[i] == '2') && equiv[i] == '6') 696 continue; 697 return FALSE; 698 } 699 } 700 } 701 702 /* 703 * is64BitEquivalent - Determines whether two instruction names refer to 704 * equivalent instructions but one is 64-bit whereas the other is not. 705 * 706 * @param orig - The instruction that is not 64-bit 707 * @param equiv - The instruction that is 64-bit 708 */ 709 static BOOL is64BitEquivalent(const char* orig, const char* equiv) { 710 off_t i; 711 712 for (i = 0;; i++) { 713 if (orig[i] == '\0' && equiv[i] == '\0') 714 return TRUE; 715 if (orig[i] == '\0' || equiv[i] == '\0') 716 return FALSE; 717 if (orig[i] != equiv[i]) { 718 if ((orig[i] == 'W' || orig[i] == 'L') && equiv[i] == 'Q') 719 continue; 720 if ((orig[i] == '1' || orig[i] == '3') && equiv[i] == '6') 721 continue; 722 if ((orig[i] == '6' || orig[i] == '2') && equiv[i] == '4') 723 continue; 724 return FALSE; 725 } 726 } 727 } 728 729 730 /* 731 * getID - Determines the ID of an instruction, consuming the ModR/M byte as 732 * appropriate for extended and escape opcodes. Determines the attributes and 733 * context for the instruction before doing so. 734 * 735 * @param insn - The instruction whose ID is to be determined. 736 * @return - 0 if the ModR/M could be read when needed or was not needed; 737 * nonzero otherwise. 738 */ 739 static int getID(struct InternalInstruction* insn) { 740 uint8_t attrMask; 741 uint16_t instructionID; 742 743 dbgprintf(insn, "getID()"); 744 745 attrMask = ATTR_NONE; 746 747 if (insn->mode == MODE_64BIT) 748 attrMask |= ATTR_64BIT; 749 750 if (insn->vexSize) { 751 attrMask |= ATTR_VEX; 752 753 if (insn->vexSize == 3) { 754 switch (ppFromVEX3of3(insn->vexPrefix[2])) { 755 case VEX_PREFIX_66: 756 attrMask |= ATTR_OPSIZE; 757 break; 758 case VEX_PREFIX_F3: 759 attrMask |= ATTR_XS; 760 break; 761 case VEX_PREFIX_F2: 762 attrMask |= ATTR_XD; 763 break; 764 } 765 766 if (wFromVEX3of3(insn->vexPrefix[2])) 767 attrMask |= ATTR_REXW; 768 if (lFromVEX3of3(insn->vexPrefix[2])) 769 attrMask |= ATTR_VEXL; 770 } 771 else if (insn->vexSize == 2) { 772 switch (ppFromVEX2of2(insn->vexPrefix[1])) { 773 case VEX_PREFIX_66: 774 attrMask |= ATTR_OPSIZE; 775 break; 776 case VEX_PREFIX_F3: 777 attrMask |= ATTR_XS; 778 break; 779 case VEX_PREFIX_F2: 780 attrMask |= ATTR_XD; 781 break; 782 } 783 784 if (lFromVEX2of2(insn->vexPrefix[1])) 785 attrMask |= ATTR_VEXL; 786 } 787 else { 788 return -1; 789 } 790 } 791 else { 792 if (insn->rexPrefix & 0x08) 793 attrMask |= ATTR_REXW; 794 795 if (isPrefixAtLocation(insn, 0x66, insn->necessaryPrefixLocation)) 796 attrMask |= ATTR_OPSIZE; 797 else if (isPrefixAtLocation(insn, 0xf3, insn->necessaryPrefixLocation)) 798 attrMask |= ATTR_XS; 799 else if (isPrefixAtLocation(insn, 0xf2, insn->necessaryPrefixLocation)) 800 attrMask |= ATTR_XD; 801 802 } 803 804 if (getIDWithAttrMask(&instructionID, insn, attrMask)) 805 return -1; 806 807 /* The following clauses compensate for limitations of the tables. */ 808 809 if ((attrMask & ATTR_XD) && (attrMask & ATTR_REXW)) { 810 /* 811 * Although for SSE instructions it is usually necessary to treat REX.W+F2 812 * as F2 for decode (in the absence of a 64BIT_REXW_XD category) there is 813 * an occasional instruction where F2 is incidental and REX.W is the more 814 * significant. If the decoded instruction is 32-bit and adding REX.W 815 * instead of F2 changes a 32 to a 64, we adopt the new encoding. 816 */ 817 818 const struct InstructionSpecifier *spec; 819 uint16_t instructionIDWithREXw; 820 const struct InstructionSpecifier *specWithREXw; 821 822 spec = specifierForUID(instructionID); 823 824 if (getIDWithAttrMask(&instructionIDWithREXw, 825 insn, 826 attrMask & (~ATTR_XD))) { 827 /* 828 * Decoding with REX.w would yield nothing; give up and return original 829 * decode. 830 */ 831 832 insn->instructionID = instructionID; 833 insn->spec = spec; 834 return 0; 835 } 836 837 specWithREXw = specifierForUID(instructionIDWithREXw); 838 839 if (is64BitEquivalent(spec->name, specWithREXw->name)) { 840 insn->instructionID = instructionIDWithREXw; 841 insn->spec = specWithREXw; 842 } else { 843 insn->instructionID = instructionID; 844 insn->spec = spec; 845 } 846 return 0; 847 } 848 849 if (insn->prefixPresent[0x66] && !(attrMask & ATTR_OPSIZE)) { 850 /* 851 * The instruction tables make no distinction between instructions that 852 * allow OpSize anywhere (i.e., 16-bit operations) and that need it in a 853 * particular spot (i.e., many MMX operations). In general we're 854 * conservative, but in the specific case where OpSize is present but not 855 * in the right place we check if there's a 16-bit operation. 856 */ 857 858 const struct InstructionSpecifier *spec; 859 uint16_t instructionIDWithOpsize; 860 const struct InstructionSpecifier *specWithOpsize; 861 862 spec = specifierForUID(instructionID); 863 864 if (getIDWithAttrMask(&instructionIDWithOpsize, 865 insn, 866 attrMask | ATTR_OPSIZE)) { 867 /* 868 * ModRM required with OpSize but not present; give up and return version 869 * without OpSize set 870 */ 871 872 insn->instructionID = instructionID; 873 insn->spec = spec; 874 return 0; 875 } 876 877 specWithOpsize = specifierForUID(instructionIDWithOpsize); 878 879 if (is16BitEquvalent(spec->name, specWithOpsize->name)) { 880 insn->instructionID = instructionIDWithOpsize; 881 insn->spec = specWithOpsize; 882 } else { 883 insn->instructionID = instructionID; 884 insn->spec = spec; 885 } 886 return 0; 887 } 888 889 insn->instructionID = instructionID; 890 insn->spec = specifierForUID(insn->instructionID); 891 892 return 0; 893 } 894 895 /* 896 * readSIB - Consumes the SIB byte to determine addressing information for an 897 * instruction. 898 * 899 * @param insn - The instruction whose SIB byte is to be read. 900 * @return - 0 if the SIB byte was successfully read; nonzero otherwise. 901 */ 902 static int readSIB(struct InternalInstruction* insn) { 903 SIBIndex sibIndexBase = 0; 904 SIBBase sibBaseBase = 0; 905 uint8_t index, base; 906 907 dbgprintf(insn, "readSIB()"); 908 909 if (insn->consumedSIB) 910 return 0; 911 912 insn->consumedSIB = TRUE; 913 914 switch (insn->addressSize) { 915 case 2: 916 dbgprintf(insn, "SIB-based addressing doesn't work in 16-bit mode"); 917 return -1; 918 break; 919 case 4: 920 sibIndexBase = SIB_INDEX_EAX; 921 sibBaseBase = SIB_BASE_EAX; 922 break; 923 case 8: 924 sibIndexBase = SIB_INDEX_RAX; 925 sibBaseBase = SIB_BASE_RAX; 926 break; 927 } 928 929 if (consumeByte(insn, &insn->sib)) 930 return -1; 931 932 index = indexFromSIB(insn->sib) | (xFromREX(insn->rexPrefix) << 3); 933 934 switch (index) { 935 case 0x4: 936 insn->sibIndex = SIB_INDEX_NONE; 937 break; 938 default: 939 insn->sibIndex = (SIBIndex)(sibIndexBase + index); 940 if (insn->sibIndex == SIB_INDEX_sib || 941 insn->sibIndex == SIB_INDEX_sib64) 942 insn->sibIndex = SIB_INDEX_NONE; 943 break; 944 } 945 946 switch (scaleFromSIB(insn->sib)) { 947 case 0: 948 insn->sibScale = 1; 949 break; 950 case 1: 951 insn->sibScale = 2; 952 break; 953 case 2: 954 insn->sibScale = 4; 955 break; 956 case 3: 957 insn->sibScale = 8; 958 break; 959 } 960 961 base = baseFromSIB(insn->sib) | (bFromREX(insn->rexPrefix) << 3); 962 963 switch (base) { 964 case 0x5: 965 switch (modFromModRM(insn->modRM)) { 966 case 0x0: 967 insn->eaDisplacement = EA_DISP_32; 968 insn->sibBase = SIB_BASE_NONE; 969 break; 970 case 0x1: 971 insn->eaDisplacement = EA_DISP_8; 972 insn->sibBase = (insn->addressSize == 4 ? 973 SIB_BASE_EBP : SIB_BASE_RBP); 974 break; 975 case 0x2: 976 insn->eaDisplacement = EA_DISP_32; 977 insn->sibBase = (insn->addressSize == 4 ? 978 SIB_BASE_EBP : SIB_BASE_RBP); 979 break; 980 case 0x3: 981 debug("Cannot have Mod = 0b11 and a SIB byte"); 982 return -1; 983 } 984 break; 985 default: 986 insn->sibBase = (SIBBase)(sibBaseBase + base); 987 break; 988 } 989 990 return 0; 991 } 992 993 /* 994 * readDisplacement - Consumes the displacement of an instruction. 995 * 996 * @param insn - The instruction whose displacement is to be read. 997 * @return - 0 if the displacement byte was successfully read; nonzero 998 * otherwise. 999 */ 1000 static int readDisplacement(struct InternalInstruction* insn) { 1001 int8_t d8; 1002 int16_t d16; 1003 int32_t d32; 1004 1005 dbgprintf(insn, "readDisplacement()"); 1006 1007 if (insn->consumedDisplacement) 1008 return 0; 1009 1010 insn->consumedDisplacement = TRUE; 1011 1012 switch (insn->eaDisplacement) { 1013 case EA_DISP_NONE: 1014 insn->consumedDisplacement = FALSE; 1015 break; 1016 case EA_DISP_8: 1017 if (consumeInt8(insn, &d8)) 1018 return -1; 1019 insn->displacement = d8; 1020 break; 1021 case EA_DISP_16: 1022 if (consumeInt16(insn, &d16)) 1023 return -1; 1024 insn->displacement = d16; 1025 break; 1026 case EA_DISP_32: 1027 if (consumeInt32(insn, &d32)) 1028 return -1; 1029 insn->displacement = d32; 1030 break; 1031 } 1032 1033 insn->consumedDisplacement = TRUE; 1034 return 0; 1035 } 1036 1037 /* 1038 * readModRM - Consumes all addressing information (ModR/M byte, SIB byte, and 1039 * displacement) for an instruction and interprets it. 1040 * 1041 * @param insn - The instruction whose addressing information is to be read. 1042 * @return - 0 if the information was successfully read; nonzero otherwise. 1043 */ 1044 static int readModRM(struct InternalInstruction* insn) { 1045 uint8_t mod, rm, reg; 1046 1047 dbgprintf(insn, "readModRM()"); 1048 1049 if (insn->consumedModRM) 1050 return 0; 1051 1052 if (consumeByte(insn, &insn->modRM)) 1053 return -1; 1054 insn->consumedModRM = TRUE; 1055 1056 mod = modFromModRM(insn->modRM); 1057 rm = rmFromModRM(insn->modRM); 1058 reg = regFromModRM(insn->modRM); 1059 1060 /* 1061 * This goes by insn->registerSize to pick the correct register, which messes 1062 * up if we're using (say) XMM or 8-bit register operands. That gets fixed in 1063 * fixupReg(). 1064 */ 1065 switch (insn->registerSize) { 1066 case 2: 1067 insn->regBase = MODRM_REG_AX; 1068 insn->eaRegBase = EA_REG_AX; 1069 break; 1070 case 4: 1071 insn->regBase = MODRM_REG_EAX; 1072 insn->eaRegBase = EA_REG_EAX; 1073 break; 1074 case 8: 1075 insn->regBase = MODRM_REG_RAX; 1076 insn->eaRegBase = EA_REG_RAX; 1077 break; 1078 } 1079 1080 reg |= rFromREX(insn->rexPrefix) << 3; 1081 rm |= bFromREX(insn->rexPrefix) << 3; 1082 1083 insn->reg = (Reg)(insn->regBase + reg); 1084 1085 switch (insn->addressSize) { 1086 case 2: 1087 insn->eaBaseBase = EA_BASE_BX_SI; 1088 1089 switch (mod) { 1090 case 0x0: 1091 if (rm == 0x6) { 1092 insn->eaBase = EA_BASE_NONE; 1093 insn->eaDisplacement = EA_DISP_16; 1094 if (readDisplacement(insn)) 1095 return -1; 1096 } else { 1097 insn->eaBase = (EABase)(insn->eaBaseBase + rm); 1098 insn->eaDisplacement = EA_DISP_NONE; 1099 } 1100 break; 1101 case 0x1: 1102 insn->eaBase = (EABase)(insn->eaBaseBase + rm); 1103 insn->eaDisplacement = EA_DISP_8; 1104 if (readDisplacement(insn)) 1105 return -1; 1106 break; 1107 case 0x2: 1108 insn->eaBase = (EABase)(insn->eaBaseBase + rm); 1109 insn->eaDisplacement = EA_DISP_16; 1110 if (readDisplacement(insn)) 1111 return -1; 1112 break; 1113 case 0x3: 1114 insn->eaBase = (EABase)(insn->eaRegBase + rm); 1115 if (readDisplacement(insn)) 1116 return -1; 1117 break; 1118 } 1119 break; 1120 case 4: 1121 case 8: 1122 insn->eaBaseBase = (insn->addressSize == 4 ? EA_BASE_EAX : EA_BASE_RAX); 1123 1124 switch (mod) { 1125 case 0x0: 1126 insn->eaDisplacement = EA_DISP_NONE; /* readSIB may override this */ 1127 switch (rm) { 1128 case 0x4: 1129 case 0xc: /* in case REXW.b is set */ 1130 insn->eaBase = (insn->addressSize == 4 ? 1131 EA_BASE_sib : EA_BASE_sib64); 1132 readSIB(insn); 1133 if (readDisplacement(insn)) 1134 return -1; 1135 break; 1136 case 0x5: 1137 insn->eaBase = EA_BASE_NONE; 1138 insn->eaDisplacement = EA_DISP_32; 1139 if (readDisplacement(insn)) 1140 return -1; 1141 break; 1142 default: 1143 insn->eaBase = (EABase)(insn->eaBaseBase + rm); 1144 break; 1145 } 1146 break; 1147 case 0x1: 1148 case 0x2: 1149 insn->eaDisplacement = (mod == 0x1 ? EA_DISP_8 : EA_DISP_32); 1150 switch (rm) { 1151 case 0x4: 1152 case 0xc: /* in case REXW.b is set */ 1153 insn->eaBase = EA_BASE_sib; 1154 readSIB(insn); 1155 if (readDisplacement(insn)) 1156 return -1; 1157 break; 1158 default: 1159 insn->eaBase = (EABase)(insn->eaBaseBase + rm); 1160 if (readDisplacement(insn)) 1161 return -1; 1162 break; 1163 } 1164 break; 1165 case 0x3: 1166 insn->eaDisplacement = EA_DISP_NONE; 1167 insn->eaBase = (EABase)(insn->eaRegBase + rm); 1168 break; 1169 } 1170 break; 1171 } /* switch (insn->addressSize) */ 1172 1173 return 0; 1174 } 1175 1176 #define GENERIC_FIXUP_FUNC(name, base, prefix) \ 1177 static uint8_t name(struct InternalInstruction *insn, \ 1178 OperandType type, \ 1179 uint8_t index, \ 1180 uint8_t *valid) { \ 1181 *valid = 1; \ 1182 switch (type) { \ 1183 default: \ 1184 debug("Unhandled register type"); \ 1185 *valid = 0; \ 1186 return 0; \ 1187 case TYPE_Rv: \ 1188 return base + index; \ 1189 case TYPE_R8: \ 1190 if (insn->rexPrefix && \ 1191 index >= 4 && index <= 7) { \ 1192 return prefix##_SPL + (index - 4); \ 1193 } else { \ 1194 return prefix##_AL + index; \ 1195 } \ 1196 case TYPE_R16: \ 1197 return prefix##_AX + index; \ 1198 case TYPE_R32: \ 1199 return prefix##_EAX + index; \ 1200 case TYPE_R64: \ 1201 return prefix##_RAX + index; \ 1202 case TYPE_XMM256: \ 1203 return prefix##_YMM0 + index; \ 1204 case TYPE_XMM128: \ 1205 case TYPE_XMM64: \ 1206 case TYPE_XMM32: \ 1207 case TYPE_XMM: \ 1208 return prefix##_XMM0 + index; \ 1209 case TYPE_MM64: \ 1210 case TYPE_MM32: \ 1211 case TYPE_MM: \ 1212 if (index > 7) \ 1213 *valid = 0; \ 1214 return prefix##_MM0 + index; \ 1215 case TYPE_SEGMENTREG: \ 1216 if (index > 5) \ 1217 *valid = 0; \ 1218 return prefix##_ES + index; \ 1219 case TYPE_DEBUGREG: \ 1220 if (index > 7) \ 1221 *valid = 0; \ 1222 return prefix##_DR0 + index; \ 1223 case TYPE_CONTROLREG: \ 1224 if (index > 8) \ 1225 *valid = 0; \ 1226 return prefix##_CR0 + index; \ 1227 } \ 1228 } 1229 1230 /* 1231 * fixup*Value - Consults an operand type to determine the meaning of the 1232 * reg or R/M field. If the operand is an XMM operand, for example, an 1233 * operand would be XMM0 instead of AX, which readModRM() would otherwise 1234 * misinterpret it as. 1235 * 1236 * @param insn - The instruction containing the operand. 1237 * @param type - The operand type. 1238 * @param index - The existing value of the field as reported by readModRM(). 1239 * @param valid - The address of a uint8_t. The target is set to 1 if the 1240 * field is valid for the register class; 0 if not. 1241 * @return - The proper value. 1242 */ 1243 GENERIC_FIXUP_FUNC(fixupRegValue, insn->regBase, MODRM_REG) 1244 GENERIC_FIXUP_FUNC(fixupRMValue, insn->eaRegBase, EA_REG) 1245 1246 /* 1247 * fixupReg - Consults an operand specifier to determine which of the 1248 * fixup*Value functions to use in correcting readModRM()'ss interpretation. 1249 * 1250 * @param insn - See fixup*Value(). 1251 * @param op - The operand specifier. 1252 * @return - 0 if fixup was successful; -1 if the register returned was 1253 * invalid for its class. 1254 */ 1255 static int fixupReg(struct InternalInstruction *insn, 1256 const struct OperandSpecifier *op) { 1257 uint8_t valid; 1258 1259 dbgprintf(insn, "fixupReg()"); 1260 1261 switch ((OperandEncoding)op->encoding) { 1262 default: 1263 debug("Expected a REG or R/M encoding in fixupReg"); 1264 return -1; 1265 case ENCODING_VVVV: 1266 insn->vvvv = (Reg)fixupRegValue(insn, 1267 (OperandType)op->type, 1268 insn->vvvv, 1269 &valid); 1270 if (!valid) 1271 return -1; 1272 break; 1273 case ENCODING_REG: 1274 insn->reg = (Reg)fixupRegValue(insn, 1275 (OperandType)op->type, 1276 insn->reg - insn->regBase, 1277 &valid); 1278 if (!valid) 1279 return -1; 1280 break; 1281 case ENCODING_RM: 1282 if (insn->eaBase >= insn->eaRegBase) { 1283 insn->eaBase = (EABase)fixupRMValue(insn, 1284 (OperandType)op->type, 1285 insn->eaBase - insn->eaRegBase, 1286 &valid); 1287 if (!valid) 1288 return -1; 1289 } 1290 break; 1291 } 1292 1293 return 0; 1294 } 1295 1296 /* 1297 * readOpcodeModifier - Reads an operand from the opcode field of an 1298 * instruction. Handles AddRegFrm instructions. 1299 * 1300 * @param insn - The instruction whose opcode field is to be read. 1301 * @param inModRM - Indicates that the opcode field is to be read from the 1302 * ModR/M extension; useful for escape opcodes 1303 * @return - 0 on success; nonzero otherwise. 1304 */ 1305 static int readOpcodeModifier(struct InternalInstruction* insn) { 1306 dbgprintf(insn, "readOpcodeModifier()"); 1307 1308 if (insn->consumedOpcodeModifier) 1309 return 0; 1310 1311 insn->consumedOpcodeModifier = TRUE; 1312 1313 switch (insn->spec->modifierType) { 1314 default: 1315 debug("Unknown modifier type."); 1316 return -1; 1317 case MODIFIER_NONE: 1318 debug("No modifier but an operand expects one."); 1319 return -1; 1320 case MODIFIER_OPCODE: 1321 insn->opcodeModifier = insn->opcode - insn->spec->modifierBase; 1322 return 0; 1323 case MODIFIER_MODRM: 1324 insn->opcodeModifier = insn->modRM - insn->spec->modifierBase; 1325 return 0; 1326 } 1327 } 1328 1329 /* 1330 * readOpcodeRegister - Reads an operand from the opcode field of an 1331 * instruction and interprets it appropriately given the operand width. 1332 * Handles AddRegFrm instructions. 1333 * 1334 * @param insn - See readOpcodeModifier(). 1335 * @param size - The width (in bytes) of the register being specified. 1336 * 1 means AL and friends, 2 means AX, 4 means EAX, and 8 means 1337 * RAX. 1338 * @return - 0 on success; nonzero otherwise. 1339 */ 1340 static int readOpcodeRegister(struct InternalInstruction* insn, uint8_t size) { 1341 dbgprintf(insn, "readOpcodeRegister()"); 1342 1343 if (readOpcodeModifier(insn)) 1344 return -1; 1345 1346 if (size == 0) 1347 size = insn->registerSize; 1348 1349 switch (size) { 1350 case 1: 1351 insn->opcodeRegister = (Reg)(MODRM_REG_AL + ((bFromREX(insn->rexPrefix) << 3) 1352 | insn->opcodeModifier)); 1353 if (insn->rexPrefix && 1354 insn->opcodeRegister >= MODRM_REG_AL + 0x4 && 1355 insn->opcodeRegister < MODRM_REG_AL + 0x8) { 1356 insn->opcodeRegister = (Reg)(MODRM_REG_SPL 1357 + (insn->opcodeRegister - MODRM_REG_AL - 4)); 1358 } 1359 1360 break; 1361 case 2: 1362 insn->opcodeRegister = (Reg)(MODRM_REG_AX 1363 + ((bFromREX(insn->rexPrefix) << 3) 1364 | insn->opcodeModifier)); 1365 break; 1366 case 4: 1367 insn->opcodeRegister = (Reg)(MODRM_REG_EAX 1368 + ((bFromREX(insn->rexPrefix) << 3) 1369 | insn->opcodeModifier)); 1370 break; 1371 case 8: 1372 insn->opcodeRegister = (Reg)(MODRM_REG_RAX 1373 + ((bFromREX(insn->rexPrefix) << 3) 1374 | insn->opcodeModifier)); 1375 break; 1376 } 1377 1378 return 0; 1379 } 1380 1381 /* 1382 * readImmediate - Consumes an immediate operand from an instruction, given the 1383 * desired operand size. 1384 * 1385 * @param insn - The instruction whose operand is to be read. 1386 * @param size - The width (in bytes) of the operand. 1387 * @return - 0 if the immediate was successfully consumed; nonzero 1388 * otherwise. 1389 */ 1390 static int readImmediate(struct InternalInstruction* insn, uint8_t size) { 1391 uint8_t imm8; 1392 uint16_t imm16; 1393 uint32_t imm32; 1394 uint64_t imm64; 1395 1396 dbgprintf(insn, "readImmediate()"); 1397 1398 if (insn->numImmediatesConsumed == 2) { 1399 debug("Already consumed two immediates"); 1400 return -1; 1401 } 1402 1403 if (size == 0) 1404 size = insn->immediateSize; 1405 else 1406 insn->immediateSize = size; 1407 1408 switch (size) { 1409 case 1: 1410 if (consumeByte(insn, &imm8)) 1411 return -1; 1412 insn->immediates[insn->numImmediatesConsumed] = imm8; 1413 break; 1414 case 2: 1415 if (consumeUInt16(insn, &imm16)) 1416 return -1; 1417 insn->immediates[insn->numImmediatesConsumed] = imm16; 1418 break; 1419 case 4: 1420 if (consumeUInt32(insn, &imm32)) 1421 return -1; 1422 insn->immediates[insn->numImmediatesConsumed] = imm32; 1423 break; 1424 case 8: 1425 if (consumeUInt64(insn, &imm64)) 1426 return -1; 1427 insn->immediates[insn->numImmediatesConsumed] = imm64; 1428 break; 1429 } 1430 1431 insn->numImmediatesConsumed++; 1432 1433 return 0; 1434 } 1435 1436 /* 1437 * readVVVV - Consumes an immediate operand from an instruction, given the 1438 * desired operand size. 1439 * 1440 * @param insn - The instruction whose operand is to be read. 1441 * @return - 0 if the immediate was successfully consumed; nonzero 1442 * otherwise. 1443 */ 1444 static int readVVVV(struct InternalInstruction* insn) { 1445 dbgprintf(insn, "readVVVV()"); 1446 1447 if (insn->vexSize == 3) 1448 insn->vvvv = vvvvFromVEX3of3(insn->vexPrefix[2]); 1449 else if (insn->vexSize == 2) 1450 insn->vvvv = vvvvFromVEX2of2(insn->vexPrefix[1]); 1451 else 1452 return -1; 1453 1454 return 0; 1455 } 1456 1457 /* 1458 * readOperands - Consults the specifier for an instruction and consumes all 1459 * operands for that instruction, interpreting them as it goes. 1460 * 1461 * @param insn - The instruction whose operands are to be read and interpreted. 1462 * @return - 0 if all operands could be read; nonzero otherwise. 1463 */ 1464 static int readOperands(struct InternalInstruction* insn) { 1465 int index; 1466 1467 dbgprintf(insn, "readOperands()"); 1468 1469 for (index = 0; index < X86_MAX_OPERANDS; ++index) { 1470 switch (insn->spec->operands[index].encoding) { 1471 case ENCODING_NONE: 1472 break; 1473 case ENCODING_REG: 1474 case ENCODING_RM: 1475 if (readModRM(insn)) 1476 return -1; 1477 if (fixupReg(insn, &insn->spec->operands[index])) 1478 return -1; 1479 break; 1480 case ENCODING_CB: 1481 case ENCODING_CW: 1482 case ENCODING_CD: 1483 case ENCODING_CP: 1484 case ENCODING_CO: 1485 case ENCODING_CT: 1486 dbgprintf(insn, "We currently don't hande code-offset encodings"); 1487 return -1; 1488 case ENCODING_IB: 1489 if (readImmediate(insn, 1)) 1490 return -1; 1491 if (insn->spec->operands[index].type == TYPE_IMM3 && 1492 insn->immediates[insn->numImmediatesConsumed - 1] > 7) 1493 return -1; 1494 break; 1495 case ENCODING_IW: 1496 if (readImmediate(insn, 2)) 1497 return -1; 1498 break; 1499 case ENCODING_ID: 1500 if (readImmediate(insn, 4)) 1501 return -1; 1502 break; 1503 case ENCODING_IO: 1504 if (readImmediate(insn, 8)) 1505 return -1; 1506 break; 1507 case ENCODING_Iv: 1508 if (readImmediate(insn, insn->immediateSize)) 1509 return -1; 1510 break; 1511 case ENCODING_Ia: 1512 if (readImmediate(insn, insn->addressSize)) 1513 return -1; 1514 break; 1515 case ENCODING_RB: 1516 if (readOpcodeRegister(insn, 1)) 1517 return -1; 1518 break; 1519 case ENCODING_RW: 1520 if (readOpcodeRegister(insn, 2)) 1521 return -1; 1522 break; 1523 case ENCODING_RD: 1524 if (readOpcodeRegister(insn, 4)) 1525 return -1; 1526 break; 1527 case ENCODING_RO: 1528 if (readOpcodeRegister(insn, 8)) 1529 return -1; 1530 break; 1531 case ENCODING_Rv: 1532 if (readOpcodeRegister(insn, 0)) 1533 return -1; 1534 break; 1535 case ENCODING_I: 1536 if (readOpcodeModifier(insn)) 1537 return -1; 1538 break; 1539 case ENCODING_VVVV: 1540 if (readVVVV(insn)) 1541 return -1; 1542 if (fixupReg(insn, &insn->spec->operands[index])) 1543 return -1; 1544 break; 1545 case ENCODING_DUP: 1546 break; 1547 default: 1548 dbgprintf(insn, "Encountered an operand with an unknown encoding."); 1549 return -1; 1550 } 1551 } 1552 1553 return 0; 1554 } 1555 1556 /* 1557 * decodeInstruction - Reads and interprets a full instruction provided by the 1558 * user. 1559 * 1560 * @param insn - A pointer to the instruction to be populated. Must be 1561 * pre-allocated. 1562 * @param reader - The function to be used to read the instruction's bytes. 1563 * @param readerArg - A generic argument to be passed to the reader to store 1564 * any internal state. 1565 * @param logger - If non-NULL, the function to be used to write log messages 1566 * and warnings. 1567 * @param loggerArg - A generic argument to be passed to the logger to store 1568 * any internal state. 1569 * @param startLoc - The address (in the reader's address space) of the first 1570 * byte in the instruction. 1571 * @param mode - The mode (real mode, IA-32e, or IA-32e in 64-bit mode) to 1572 * decode the instruction in. 1573 * @return - 0 if the instruction's memory could be read; nonzero if 1574 * not. 1575 */ 1576 int decodeInstruction(struct InternalInstruction* insn, 1577 byteReader_t reader, 1578 void* readerArg, 1579 dlog_t logger, 1580 void* loggerArg, 1581 uint64_t startLoc, 1582 DisassemblerMode mode) { 1583 memset(insn, 0, sizeof(struct InternalInstruction)); 1584 1585 insn->reader = reader; 1586 insn->readerArg = readerArg; 1587 insn->dlog = logger; 1588 insn->dlogArg = loggerArg; 1589 insn->startLocation = startLoc; 1590 insn->readerCursor = startLoc; 1591 insn->mode = mode; 1592 insn->numImmediatesConsumed = 0; 1593 1594 if (readPrefixes(insn) || 1595 readOpcode(insn) || 1596 getID(insn) || 1597 insn->instructionID == 0 || 1598 readOperands(insn)) 1599 return -1; 1600 1601 insn->length = insn->readerCursor - insn->startLocation; 1602 1603 dbgprintf(insn, "Read from 0x%llx to 0x%llx: length %zu", 1604 startLoc, insn->readerCursor, insn->length); 1605 1606 if (insn->length > 15) 1607 dbgprintf(insn, "Instruction exceeds 15-byte limit"); 1608 1609 return 0; 1610 } 1611