1 //===-- X86DisassemblerDecoder.cpp - Disassembler decoder -----------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file is part of the X86 Disassembler. 11 // It contains the implementation of the instruction decoder. 12 // Documentation for the disassembler can be found in X86Disassembler.h. 13 // 14 //===----------------------------------------------------------------------===// 15 16 #include <cstdarg> /* for va_*() */ 17 #include <cstdio> /* for vsnprintf() */ 18 #include <cstdlib> /* for exit() */ 19 #include <cstring> /* for memset() */ 20 21 #include "X86DisassemblerDecoder.h" 22 23 using namespace llvm::X86Disassembler; 24 25 /// Specifies whether a ModR/M byte is needed and (if so) which 26 /// instruction each possible value of the ModR/M byte corresponds to. Once 27 /// this information is known, we have narrowed down to a single instruction. 28 struct ModRMDecision { 29 uint8_t modrm_type; 30 uint16_t instructionIDs; 31 }; 32 33 /// Specifies which set of ModR/M->instruction tables to look at 34 /// given a particular opcode. 35 struct OpcodeDecision { 36 ModRMDecision modRMDecisions[256]; 37 }; 38 39 /// Specifies which opcode->instruction tables to look at given 40 /// a particular context (set of attributes). Since there are many possible 41 /// contexts, the decoder first uses CONTEXTS_SYM to determine which context 42 /// applies given a specific set of attributes. Hence there are only IC_max 43 /// entries in this table, rather than 2^(ATTR_max). 44 struct ContextDecision { 45 OpcodeDecision opcodeDecisions[IC_max]; 46 }; 47 48 #include "X86GenDisassemblerTables.inc" 49 50 #ifndef NDEBUG 51 #define debug(s) do { Debug(__FILE__, __LINE__, s); } while (0) 52 #else 53 #define debug(s) do { } while (0) 54 #endif 55 56 /* 57 * contextForAttrs - Client for the instruction context table. Takes a set of 58 * attributes and returns the appropriate decode context. 59 * 60 * @param attrMask - Attributes, from the enumeration attributeBits. 61 * @return - The InstructionContext to use when looking up an 62 * an instruction with these attributes. 63 */ 64 static InstructionContext contextForAttrs(uint16_t attrMask) { 65 return static_cast<InstructionContext>(CONTEXTS_SYM[attrMask]); 66 } 67 68 /* 69 * modRMRequired - Reads the appropriate instruction table to determine whether 70 * the ModR/M byte is required to decode a particular instruction. 71 * 72 * @param type - The opcode type (i.e., how many bytes it has). 73 * @param insnContext - The context for the instruction, as returned by 74 * contextForAttrs. 75 * @param opcode - The last byte of the instruction's opcode, not counting 76 * ModR/M extensions and escapes. 77 * @return - true if the ModR/M byte is required, false otherwise. 78 */ 79 static int modRMRequired(OpcodeType type, 80 InstructionContext insnContext, 81 uint16_t opcode) { 82 const struct ContextDecision* decision = nullptr; 83 84 switch (type) { 85 case ONEBYTE: 86 decision = &ONEBYTE_SYM; 87 break; 88 case TWOBYTE: 89 decision = &TWOBYTE_SYM; 90 break; 91 case THREEBYTE_38: 92 decision = &THREEBYTE38_SYM; 93 break; 94 case THREEBYTE_3A: 95 decision = &THREEBYTE3A_SYM; 96 break; 97 case XOP8_MAP: 98 decision = &XOP8_MAP_SYM; 99 break; 100 case XOP9_MAP: 101 decision = &XOP9_MAP_SYM; 102 break; 103 case XOPA_MAP: 104 decision = &XOPA_MAP_SYM; 105 break; 106 } 107 108 return decision->opcodeDecisions[insnContext].modRMDecisions[opcode]. 109 modrm_type != MODRM_ONEENTRY; 110 } 111 112 /* 113 * decode - Reads the appropriate instruction table to obtain the unique ID of 114 * an instruction. 115 * 116 * @param type - See modRMRequired(). 117 * @param insnContext - See modRMRequired(). 118 * @param opcode - See modRMRequired(). 119 * @param modRM - The ModR/M byte if required, or any value if not. 120 * @return - The UID of the instruction, or 0 on failure. 121 */ 122 static InstrUID decode(OpcodeType type, 123 InstructionContext insnContext, 124 uint8_t opcode, 125 uint8_t modRM) { 126 const struct ModRMDecision* dec = nullptr; 127 128 switch (type) { 129 case ONEBYTE: 130 dec = &ONEBYTE_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; 131 break; 132 case TWOBYTE: 133 dec = &TWOBYTE_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; 134 break; 135 case THREEBYTE_38: 136 dec = &THREEBYTE38_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; 137 break; 138 case THREEBYTE_3A: 139 dec = &THREEBYTE3A_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; 140 break; 141 case XOP8_MAP: 142 dec = &XOP8_MAP_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; 143 break; 144 case XOP9_MAP: 145 dec = &XOP9_MAP_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; 146 break; 147 case XOPA_MAP: 148 dec = &XOPA_MAP_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; 149 break; 150 } 151 152 switch (dec->modrm_type) { 153 default: 154 debug("Corrupt table! Unknown modrm_type"); 155 return 0; 156 case MODRM_ONEENTRY: 157 return modRMTable[dec->instructionIDs]; 158 case MODRM_SPLITRM: 159 if (modFromModRM(modRM) == 0x3) 160 return modRMTable[dec->instructionIDs+1]; 161 return modRMTable[dec->instructionIDs]; 162 case MODRM_SPLITREG: 163 if (modFromModRM(modRM) == 0x3) 164 return modRMTable[dec->instructionIDs+((modRM & 0x38) >> 3)+8]; 165 return modRMTable[dec->instructionIDs+((modRM & 0x38) >> 3)]; 166 case MODRM_SPLITMISC: 167 if (modFromModRM(modRM) == 0x3) 168 return modRMTable[dec->instructionIDs+(modRM & 0x3f)+8]; 169 return modRMTable[dec->instructionIDs+((modRM & 0x38) >> 3)]; 170 case MODRM_FULL: 171 return modRMTable[dec->instructionIDs+modRM]; 172 } 173 } 174 175 /* 176 * specifierForUID - Given a UID, returns the name and operand specification for 177 * that instruction. 178 * 179 * @param uid - The unique ID for the instruction. This should be returned by 180 * decode(); specifierForUID will not check bounds. 181 * @return - A pointer to the specification for that instruction. 182 */ 183 static const struct InstructionSpecifier *specifierForUID(InstrUID uid) { 184 return &INSTRUCTIONS_SYM[uid]; 185 } 186 187 /* 188 * consumeByte - Uses the reader function provided by the user to consume one 189 * byte from the instruction's memory and advance the cursor. 190 * 191 * @param insn - The instruction with the reader function to use. The cursor 192 * for this instruction is advanced. 193 * @param byte - A pointer to a pre-allocated memory buffer to be populated 194 * with the data read. 195 * @return - 0 if the read was successful; nonzero otherwise. 196 */ 197 static int consumeByte(struct InternalInstruction* insn, uint8_t* byte) { 198 int ret = insn->reader(insn->readerArg, byte, insn->readerCursor); 199 200 if (!ret) 201 ++(insn->readerCursor); 202 203 return ret; 204 } 205 206 /* 207 * lookAtByte - Like consumeByte, but does not advance the cursor. 208 * 209 * @param insn - See consumeByte(). 210 * @param byte - See consumeByte(). 211 * @return - See consumeByte(). 212 */ 213 static int lookAtByte(struct InternalInstruction* insn, uint8_t* byte) { 214 return insn->reader(insn->readerArg, byte, insn->readerCursor); 215 } 216 217 static void unconsumeByte(struct InternalInstruction* insn) { 218 insn->readerCursor--; 219 } 220 221 #define CONSUME_FUNC(name, type) \ 222 static int name(struct InternalInstruction* insn, type* ptr) { \ 223 type combined = 0; \ 224 unsigned offset; \ 225 for (offset = 0; offset < sizeof(type); ++offset) { \ 226 uint8_t byte; \ 227 int ret = insn->reader(insn->readerArg, \ 228 &byte, \ 229 insn->readerCursor + offset); \ 230 if (ret) \ 231 return ret; \ 232 combined = combined | ((uint64_t)byte << (offset * 8)); \ 233 } \ 234 *ptr = combined; \ 235 insn->readerCursor += sizeof(type); \ 236 return 0; \ 237 } 238 239 /* 240 * consume* - Use the reader function provided by the user to consume data 241 * values of various sizes from the instruction's memory and advance the 242 * cursor appropriately. These readers perform endian conversion. 243 * 244 * @param insn - See consumeByte(). 245 * @param ptr - A pointer to a pre-allocated memory of appropriate size to 246 * be populated with the data read. 247 * @return - See consumeByte(). 248 */ 249 CONSUME_FUNC(consumeInt8, int8_t) 250 CONSUME_FUNC(consumeInt16, int16_t) 251 CONSUME_FUNC(consumeInt32, int32_t) 252 CONSUME_FUNC(consumeUInt16, uint16_t) 253 CONSUME_FUNC(consumeUInt32, uint32_t) 254 CONSUME_FUNC(consumeUInt64, uint64_t) 255 256 /* 257 * dbgprintf - Uses the logging function provided by the user to log a single 258 * message, typically without a carriage-return. 259 * 260 * @param insn - The instruction containing the logging function. 261 * @param format - See printf(). 262 * @param ... - See printf(). 263 */ 264 static void dbgprintf(struct InternalInstruction* insn, 265 const char* format, 266 ...) { 267 char buffer[256]; 268 va_list ap; 269 270 if (!insn->dlog) 271 return; 272 273 va_start(ap, format); 274 (void)vsnprintf(buffer, sizeof(buffer), format, ap); 275 va_end(ap); 276 277 insn->dlog(insn->dlogArg, buffer); 278 } 279 280 /* 281 * setPrefixPresent - Marks that a particular prefix is present at a particular 282 * location. 283 * 284 * @param insn - The instruction to be marked as having the prefix. 285 * @param prefix - The prefix that is present. 286 * @param location - The location where the prefix is located (in the address 287 * space of the instruction's reader). 288 */ 289 static void setPrefixPresent(struct InternalInstruction* insn, 290 uint8_t prefix, 291 uint64_t location) 292 { 293 insn->prefixPresent[prefix] = 1; 294 insn->prefixLocations[prefix] = location; 295 } 296 297 /* 298 * isPrefixAtLocation - Queries an instruction to determine whether a prefix is 299 * present at a given location. 300 * 301 * @param insn - The instruction to be queried. 302 * @param prefix - The prefix. 303 * @param location - The location to query. 304 * @return - Whether the prefix is at that location. 305 */ 306 static bool isPrefixAtLocation(struct InternalInstruction* insn, 307 uint8_t prefix, 308 uint64_t location) 309 { 310 return insn->prefixPresent[prefix] == 1 && 311 insn->prefixLocations[prefix] == location; 312 } 313 314 /* 315 * readPrefixes - Consumes all of an instruction's prefix bytes, and marks the 316 * instruction as having them. Also sets the instruction's default operand, 317 * address, and other relevant data sizes to report operands correctly. 318 * 319 * @param insn - The instruction whose prefixes are to be read. 320 * @return - 0 if the instruction could be read until the end of the prefix 321 * bytes, and no prefixes conflicted; nonzero otherwise. 322 */ 323 static int readPrefixes(struct InternalInstruction* insn) { 324 bool isPrefix = true; 325 bool prefixGroups[4] = { false }; 326 uint64_t prefixLocation; 327 uint8_t byte = 0; 328 uint8_t nextByte; 329 330 bool hasAdSize = false; 331 bool hasOpSize = false; 332 333 dbgprintf(insn, "readPrefixes()"); 334 335 while (isPrefix) { 336 prefixLocation = insn->readerCursor; 337 338 /* If we fail reading prefixes, just stop here and let the opcode reader deal with it */ 339 if (consumeByte(insn, &byte)) 340 break; 341 342 /* 343 * If the byte is a LOCK/REP/REPNE prefix and not a part of the opcode, then 344 * break and let it be disassembled as a normal "instruction". 345 */ 346 if (insn->readerCursor - 1 == insn->startLocation && byte == 0xf0) 347 break; 348 349 if (insn->readerCursor - 1 == insn->startLocation 350 && (byte == 0xf2 || byte == 0xf3) 351 && !lookAtByte(insn, &nextByte)) 352 { 353 /* 354 * If the byte is 0xf2 or 0xf3, and any of the following conditions are 355 * met: 356 * - it is followed by a LOCK (0xf0) prefix 357 * - it is followed by an xchg instruction 358 * then it should be disassembled as a xacquire/xrelease not repne/rep. 359 */ 360 if ((byte == 0xf2 || byte == 0xf3) && 361 ((nextByte == 0xf0) || 362 ((nextByte & 0xfe) == 0x86 || (nextByte & 0xf8) == 0x90))) 363 insn->xAcquireRelease = true; 364 /* 365 * Also if the byte is 0xf3, and the following condition is met: 366 * - it is followed by a "mov mem, reg" (opcode 0x88/0x89) or 367 * "mov mem, imm" (opcode 0xc6/0xc7) instructions. 368 * then it should be disassembled as an xrelease not rep. 369 */ 370 if (byte == 0xf3 && 371 (nextByte == 0x88 || nextByte == 0x89 || 372 nextByte == 0xc6 || nextByte == 0xc7)) 373 insn->xAcquireRelease = true; 374 if (insn->mode == MODE_64BIT && (nextByte & 0xf0) == 0x40) { 375 if (consumeByte(insn, &nextByte)) 376 return -1; 377 if (lookAtByte(insn, &nextByte)) 378 return -1; 379 unconsumeByte(insn); 380 } 381 if (nextByte != 0x0f && nextByte != 0x90) 382 break; 383 } 384 385 switch (byte) { 386 case 0xf0: /* LOCK */ 387 case 0xf2: /* REPNE/REPNZ */ 388 case 0xf3: /* REP or REPE/REPZ */ 389 if (prefixGroups[0]) 390 dbgprintf(insn, "Redundant Group 1 prefix"); 391 prefixGroups[0] = true; 392 setPrefixPresent(insn, byte, prefixLocation); 393 break; 394 case 0x2e: /* CS segment override -OR- Branch not taken */ 395 case 0x36: /* SS segment override -OR- Branch taken */ 396 case 0x3e: /* DS segment override */ 397 case 0x26: /* ES segment override */ 398 case 0x64: /* FS segment override */ 399 case 0x65: /* GS segment override */ 400 switch (byte) { 401 case 0x2e: 402 insn->segmentOverride = SEG_OVERRIDE_CS; 403 break; 404 case 0x36: 405 insn->segmentOverride = SEG_OVERRIDE_SS; 406 break; 407 case 0x3e: 408 insn->segmentOverride = SEG_OVERRIDE_DS; 409 break; 410 case 0x26: 411 insn->segmentOverride = SEG_OVERRIDE_ES; 412 break; 413 case 0x64: 414 insn->segmentOverride = SEG_OVERRIDE_FS; 415 break; 416 case 0x65: 417 insn->segmentOverride = SEG_OVERRIDE_GS; 418 break; 419 default: 420 debug("Unhandled override"); 421 return -1; 422 } 423 if (prefixGroups[1]) 424 dbgprintf(insn, "Redundant Group 2 prefix"); 425 prefixGroups[1] = true; 426 setPrefixPresent(insn, byte, prefixLocation); 427 break; 428 case 0x66: /* Operand-size override */ 429 if (prefixGroups[2]) 430 dbgprintf(insn, "Redundant Group 3 prefix"); 431 prefixGroups[2] = true; 432 hasOpSize = true; 433 setPrefixPresent(insn, byte, prefixLocation); 434 break; 435 case 0x67: /* Address-size override */ 436 if (prefixGroups[3]) 437 dbgprintf(insn, "Redundant Group 4 prefix"); 438 prefixGroups[3] = true; 439 hasAdSize = true; 440 setPrefixPresent(insn, byte, prefixLocation); 441 break; 442 default: /* Not a prefix byte */ 443 isPrefix = false; 444 break; 445 } 446 447 if (isPrefix) 448 dbgprintf(insn, "Found prefix 0x%hhx", byte); 449 } 450 451 insn->vectorExtensionType = TYPE_NO_VEX_XOP; 452 453 if (byte == 0x62) { 454 uint8_t byte1, byte2; 455 456 if (consumeByte(insn, &byte1)) { 457 dbgprintf(insn, "Couldn't read second byte of EVEX prefix"); 458 return -1; 459 } 460 461 if (lookAtByte(insn, &byte2)) { 462 dbgprintf(insn, "Couldn't read third byte of EVEX prefix"); 463 return -1; 464 } 465 466 if ((insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0) && 467 ((~byte1 & 0xc) == 0xc) && ((byte2 & 0x4) == 0x4)) { 468 insn->vectorExtensionType = TYPE_EVEX; 469 } else { 470 unconsumeByte(insn); /* unconsume byte1 */ 471 unconsumeByte(insn); /* unconsume byte */ 472 insn->necessaryPrefixLocation = insn->readerCursor - 2; 473 } 474 475 if (insn->vectorExtensionType == TYPE_EVEX) { 476 insn->vectorExtensionPrefix[0] = byte; 477 insn->vectorExtensionPrefix[1] = byte1; 478 if (consumeByte(insn, &insn->vectorExtensionPrefix[2])) { 479 dbgprintf(insn, "Couldn't read third byte of EVEX prefix"); 480 return -1; 481 } 482 if (consumeByte(insn, &insn->vectorExtensionPrefix[3])) { 483 dbgprintf(insn, "Couldn't read fourth byte of EVEX prefix"); 484 return -1; 485 } 486 487 /* We simulate the REX prefix for simplicity's sake */ 488 if (insn->mode == MODE_64BIT) { 489 insn->rexPrefix = 0x40 490 | (wFromEVEX3of4(insn->vectorExtensionPrefix[2]) << 3) 491 | (rFromEVEX2of4(insn->vectorExtensionPrefix[1]) << 2) 492 | (xFromEVEX2of4(insn->vectorExtensionPrefix[1]) << 1) 493 | (bFromEVEX2of4(insn->vectorExtensionPrefix[1]) << 0); 494 } 495 496 dbgprintf(insn, "Found EVEX prefix 0x%hhx 0x%hhx 0x%hhx 0x%hhx", 497 insn->vectorExtensionPrefix[0], insn->vectorExtensionPrefix[1], 498 insn->vectorExtensionPrefix[2], insn->vectorExtensionPrefix[3]); 499 } 500 } else if (byte == 0xc4) { 501 uint8_t byte1; 502 503 if (lookAtByte(insn, &byte1)) { 504 dbgprintf(insn, "Couldn't read second byte of VEX"); 505 return -1; 506 } 507 508 if (insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0) { 509 insn->vectorExtensionType = TYPE_VEX_3B; 510 insn->necessaryPrefixLocation = insn->readerCursor - 1; 511 } else { 512 unconsumeByte(insn); 513 insn->necessaryPrefixLocation = insn->readerCursor - 1; 514 } 515 516 if (insn->vectorExtensionType == TYPE_VEX_3B) { 517 insn->vectorExtensionPrefix[0] = byte; 518 consumeByte(insn, &insn->vectorExtensionPrefix[1]); 519 consumeByte(insn, &insn->vectorExtensionPrefix[2]); 520 521 /* We simulate the REX prefix for simplicity's sake */ 522 523 if (insn->mode == MODE_64BIT) { 524 insn->rexPrefix = 0x40 525 | (wFromVEX3of3(insn->vectorExtensionPrefix[2]) << 3) 526 | (rFromVEX2of3(insn->vectorExtensionPrefix[1]) << 2) 527 | (xFromVEX2of3(insn->vectorExtensionPrefix[1]) << 1) 528 | (bFromVEX2of3(insn->vectorExtensionPrefix[1]) << 0); 529 } 530 531 dbgprintf(insn, "Found VEX prefix 0x%hhx 0x%hhx 0x%hhx", 532 insn->vectorExtensionPrefix[0], insn->vectorExtensionPrefix[1], 533 insn->vectorExtensionPrefix[2]); 534 } 535 } else if (byte == 0xc5) { 536 uint8_t byte1; 537 538 if (lookAtByte(insn, &byte1)) { 539 dbgprintf(insn, "Couldn't read second byte of VEX"); 540 return -1; 541 } 542 543 if (insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0) { 544 insn->vectorExtensionType = TYPE_VEX_2B; 545 } else { 546 unconsumeByte(insn); 547 } 548 549 if (insn->vectorExtensionType == TYPE_VEX_2B) { 550 insn->vectorExtensionPrefix[0] = byte; 551 consumeByte(insn, &insn->vectorExtensionPrefix[1]); 552 553 if (insn->mode == MODE_64BIT) { 554 insn->rexPrefix = 0x40 555 | (rFromVEX2of2(insn->vectorExtensionPrefix[1]) << 2); 556 } 557 558 switch (ppFromVEX2of2(insn->vectorExtensionPrefix[1])) { 559 default: 560 break; 561 case VEX_PREFIX_66: 562 hasOpSize = true; 563 break; 564 } 565 566 dbgprintf(insn, "Found VEX prefix 0x%hhx 0x%hhx", 567 insn->vectorExtensionPrefix[0], 568 insn->vectorExtensionPrefix[1]); 569 } 570 } else if (byte == 0x8f) { 571 uint8_t byte1; 572 573 if (lookAtByte(insn, &byte1)) { 574 dbgprintf(insn, "Couldn't read second byte of XOP"); 575 return -1; 576 } 577 578 if ((byte1 & 0x38) != 0x0) { /* 0 in these 3 bits is a POP instruction. */ 579 insn->vectorExtensionType = TYPE_XOP; 580 insn->necessaryPrefixLocation = insn->readerCursor - 1; 581 } else { 582 unconsumeByte(insn); 583 insn->necessaryPrefixLocation = insn->readerCursor - 1; 584 } 585 586 if (insn->vectorExtensionType == TYPE_XOP) { 587 insn->vectorExtensionPrefix[0] = byte; 588 consumeByte(insn, &insn->vectorExtensionPrefix[1]); 589 consumeByte(insn, &insn->vectorExtensionPrefix[2]); 590 591 /* We simulate the REX prefix for simplicity's sake */ 592 593 if (insn->mode == MODE_64BIT) { 594 insn->rexPrefix = 0x40 595 | (wFromXOP3of3(insn->vectorExtensionPrefix[2]) << 3) 596 | (rFromXOP2of3(insn->vectorExtensionPrefix[1]) << 2) 597 | (xFromXOP2of3(insn->vectorExtensionPrefix[1]) << 1) 598 | (bFromXOP2of3(insn->vectorExtensionPrefix[1]) << 0); 599 } 600 601 switch (ppFromXOP3of3(insn->vectorExtensionPrefix[2])) { 602 default: 603 break; 604 case VEX_PREFIX_66: 605 hasOpSize = true; 606 break; 607 } 608 609 dbgprintf(insn, "Found XOP prefix 0x%hhx 0x%hhx 0x%hhx", 610 insn->vectorExtensionPrefix[0], insn->vectorExtensionPrefix[1], 611 insn->vectorExtensionPrefix[2]); 612 } 613 } else { 614 if (insn->mode == MODE_64BIT) { 615 if ((byte & 0xf0) == 0x40) { 616 uint8_t opcodeByte; 617 618 if (lookAtByte(insn, &opcodeByte) || ((opcodeByte & 0xf0) == 0x40)) { 619 dbgprintf(insn, "Redundant REX prefix"); 620 return -1; 621 } 622 623 insn->rexPrefix = byte; 624 insn->necessaryPrefixLocation = insn->readerCursor - 2; 625 626 dbgprintf(insn, "Found REX prefix 0x%hhx", byte); 627 } else { 628 unconsumeByte(insn); 629 insn->necessaryPrefixLocation = insn->readerCursor - 1; 630 } 631 } else { 632 unconsumeByte(insn); 633 insn->necessaryPrefixLocation = insn->readerCursor - 1; 634 } 635 } 636 637 if (insn->mode == MODE_16BIT) { 638 insn->registerSize = (hasOpSize ? 4 : 2); 639 insn->addressSize = (hasAdSize ? 4 : 2); 640 insn->displacementSize = (hasAdSize ? 4 : 2); 641 insn->immediateSize = (hasOpSize ? 4 : 2); 642 } else if (insn->mode == MODE_32BIT) { 643 insn->registerSize = (hasOpSize ? 2 : 4); 644 insn->addressSize = (hasAdSize ? 2 : 4); 645 insn->displacementSize = (hasAdSize ? 2 : 4); 646 insn->immediateSize = (hasOpSize ? 2 : 4); 647 } else if (insn->mode == MODE_64BIT) { 648 if (insn->rexPrefix && wFromREX(insn->rexPrefix)) { 649 insn->registerSize = 8; 650 insn->addressSize = (hasAdSize ? 4 : 8); 651 insn->displacementSize = 4; 652 insn->immediateSize = 4; 653 } else if (insn->rexPrefix) { 654 insn->registerSize = (hasOpSize ? 2 : 4); 655 insn->addressSize = (hasAdSize ? 4 : 8); 656 insn->displacementSize = (hasOpSize ? 2 : 4); 657 insn->immediateSize = (hasOpSize ? 2 : 4); 658 } else { 659 insn->registerSize = (hasOpSize ? 2 : 4); 660 insn->addressSize = (hasAdSize ? 4 : 8); 661 insn->displacementSize = (hasOpSize ? 2 : 4); 662 insn->immediateSize = (hasOpSize ? 2 : 4); 663 } 664 } 665 666 return 0; 667 } 668 669 /* 670 * readOpcode - Reads the opcode (excepting the ModR/M byte in the case of 671 * extended or escape opcodes). 672 * 673 * @param insn - The instruction whose opcode is to be read. 674 * @return - 0 if the opcode could be read successfully; nonzero otherwise. 675 */ 676 static int readOpcode(struct InternalInstruction* insn) { 677 /* Determine the length of the primary opcode */ 678 679 uint8_t current; 680 681 dbgprintf(insn, "readOpcode()"); 682 683 insn->opcodeType = ONEBYTE; 684 685 if (insn->vectorExtensionType == TYPE_EVEX) { 686 switch (mmFromEVEX2of4(insn->vectorExtensionPrefix[1])) { 687 default: 688 dbgprintf(insn, "Unhandled mm field for instruction (0x%hhx)", 689 mmFromEVEX2of4(insn->vectorExtensionPrefix[1])); 690 return -1; 691 case VEX_LOB_0F: 692 insn->opcodeType = TWOBYTE; 693 return consumeByte(insn, &insn->opcode); 694 case VEX_LOB_0F38: 695 insn->opcodeType = THREEBYTE_38; 696 return consumeByte(insn, &insn->opcode); 697 case VEX_LOB_0F3A: 698 insn->opcodeType = THREEBYTE_3A; 699 return consumeByte(insn, &insn->opcode); 700 } 701 } else if (insn->vectorExtensionType == TYPE_VEX_3B) { 702 switch (mmmmmFromVEX2of3(insn->vectorExtensionPrefix[1])) { 703 default: 704 dbgprintf(insn, "Unhandled m-mmmm field for instruction (0x%hhx)", 705 mmmmmFromVEX2of3(insn->vectorExtensionPrefix[1])); 706 return -1; 707 case VEX_LOB_0F: 708 insn->opcodeType = TWOBYTE; 709 return consumeByte(insn, &insn->opcode); 710 case VEX_LOB_0F38: 711 insn->opcodeType = THREEBYTE_38; 712 return consumeByte(insn, &insn->opcode); 713 case VEX_LOB_0F3A: 714 insn->opcodeType = THREEBYTE_3A; 715 return consumeByte(insn, &insn->opcode); 716 } 717 } else if (insn->vectorExtensionType == TYPE_VEX_2B) { 718 insn->opcodeType = TWOBYTE; 719 return consumeByte(insn, &insn->opcode); 720 } else if (insn->vectorExtensionType == TYPE_XOP) { 721 switch (mmmmmFromXOP2of3(insn->vectorExtensionPrefix[1])) { 722 default: 723 dbgprintf(insn, "Unhandled m-mmmm field for instruction (0x%hhx)", 724 mmmmmFromVEX2of3(insn->vectorExtensionPrefix[1])); 725 return -1; 726 case XOP_MAP_SELECT_8: 727 insn->opcodeType = XOP8_MAP; 728 return consumeByte(insn, &insn->opcode); 729 case XOP_MAP_SELECT_9: 730 insn->opcodeType = XOP9_MAP; 731 return consumeByte(insn, &insn->opcode); 732 case XOP_MAP_SELECT_A: 733 insn->opcodeType = XOPA_MAP; 734 return consumeByte(insn, &insn->opcode); 735 } 736 } 737 738 if (consumeByte(insn, ¤t)) 739 return -1; 740 741 if (current == 0x0f) { 742 dbgprintf(insn, "Found a two-byte escape prefix (0x%hhx)", current); 743 744 if (consumeByte(insn, ¤t)) 745 return -1; 746 747 if (current == 0x38) { 748 dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current); 749 750 if (consumeByte(insn, ¤t)) 751 return -1; 752 753 insn->opcodeType = THREEBYTE_38; 754 } else if (current == 0x3a) { 755 dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current); 756 757 if (consumeByte(insn, ¤t)) 758 return -1; 759 760 insn->opcodeType = THREEBYTE_3A; 761 } else { 762 dbgprintf(insn, "Didn't find a three-byte escape prefix"); 763 764 insn->opcodeType = TWOBYTE; 765 } 766 } 767 768 /* 769 * At this point we have consumed the full opcode. 770 * Anything we consume from here on must be unconsumed. 771 */ 772 773 insn->opcode = current; 774 775 return 0; 776 } 777 778 static int readModRM(struct InternalInstruction* insn); 779 780 /* 781 * getIDWithAttrMask - Determines the ID of an instruction, consuming 782 * the ModR/M byte as appropriate for extended and escape opcodes, 783 * and using a supplied attribute mask. 784 * 785 * @param instructionID - A pointer whose target is filled in with the ID of the 786 * instruction. 787 * @param insn - The instruction whose ID is to be determined. 788 * @param attrMask - The attribute mask to search. 789 * @return - 0 if the ModR/M could be read when needed or was not 790 * needed; nonzero otherwise. 791 */ 792 static int getIDWithAttrMask(uint16_t* instructionID, 793 struct InternalInstruction* insn, 794 uint16_t attrMask) { 795 bool hasModRMExtension; 796 797 InstructionContext instructionClass = contextForAttrs(attrMask); 798 799 hasModRMExtension = modRMRequired(insn->opcodeType, 800 instructionClass, 801 insn->opcode); 802 803 if (hasModRMExtension) { 804 if (readModRM(insn)) 805 return -1; 806 807 *instructionID = decode(insn->opcodeType, 808 instructionClass, 809 insn->opcode, 810 insn->modRM); 811 } else { 812 *instructionID = decode(insn->opcodeType, 813 instructionClass, 814 insn->opcode, 815 0); 816 } 817 818 return 0; 819 } 820 821 /* 822 * is16BitEquivalent - Determines whether two instruction names refer to 823 * equivalent instructions but one is 16-bit whereas the other is not. 824 * 825 * @param orig - The instruction that is not 16-bit 826 * @param equiv - The instruction that is 16-bit 827 */ 828 static bool is16BitEquivalent(const char* orig, const char* equiv) { 829 off_t i; 830 831 for (i = 0;; i++) { 832 if (orig[i] == '\0' && equiv[i] == '\0') 833 return true; 834 if (orig[i] == '\0' || equiv[i] == '\0') 835 return false; 836 if (orig[i] != equiv[i]) { 837 if ((orig[i] == 'Q' || orig[i] == 'L') && equiv[i] == 'W') 838 continue; 839 if ((orig[i] == '6' || orig[i] == '3') && equiv[i] == '1') 840 continue; 841 if ((orig[i] == '4' || orig[i] == '2') && equiv[i] == '6') 842 continue; 843 return false; 844 } 845 } 846 } 847 848 /* 849 * is64Bit - Determines whether this instruction is a 64-bit instruction. 850 * 851 * @param name - The instruction that is not 16-bit 852 */ 853 static bool is64Bit(const char* name) { 854 off_t i; 855 856 for (i = 0;; ++i) { 857 if (name[i] == '\0') 858 return false; 859 if (name[i] == '6' && name[i+1] == '4') 860 return true; 861 } 862 } 863 864 /* 865 * getID - Determines the ID of an instruction, consuming the ModR/M byte as 866 * appropriate for extended and escape opcodes. Determines the attributes and 867 * context for the instruction before doing so. 868 * 869 * @param insn - The instruction whose ID is to be determined. 870 * @return - 0 if the ModR/M could be read when needed or was not needed; 871 * nonzero otherwise. 872 */ 873 static int getID(struct InternalInstruction* insn, const void *miiArg) { 874 uint16_t attrMask; 875 uint16_t instructionID; 876 877 dbgprintf(insn, "getID()"); 878 879 attrMask = ATTR_NONE; 880 881 if (insn->mode == MODE_64BIT) 882 attrMask |= ATTR_64BIT; 883 884 if (insn->vectorExtensionType != TYPE_NO_VEX_XOP) { 885 attrMask |= (insn->vectorExtensionType == TYPE_EVEX) ? ATTR_EVEX : ATTR_VEX; 886 887 if (insn->vectorExtensionType == TYPE_EVEX) { 888 switch (ppFromEVEX3of4(insn->vectorExtensionPrefix[2])) { 889 case VEX_PREFIX_66: 890 attrMask |= ATTR_OPSIZE; 891 break; 892 case VEX_PREFIX_F3: 893 attrMask |= ATTR_XS; 894 break; 895 case VEX_PREFIX_F2: 896 attrMask |= ATTR_XD; 897 break; 898 } 899 900 if (zFromEVEX4of4(insn->vectorExtensionPrefix[3])) 901 attrMask |= ATTR_EVEXKZ; 902 if (bFromEVEX4of4(insn->vectorExtensionPrefix[3])) 903 attrMask |= ATTR_EVEXB; 904 if (aaaFromEVEX4of4(insn->vectorExtensionPrefix[3])) 905 attrMask |= ATTR_EVEXK; 906 if (lFromEVEX4of4(insn->vectorExtensionPrefix[3])) 907 attrMask |= ATTR_EVEXL; 908 if (l2FromEVEX4of4(insn->vectorExtensionPrefix[3])) 909 attrMask |= ATTR_EVEXL2; 910 } else if (insn->vectorExtensionType == TYPE_VEX_3B) { 911 switch (ppFromVEX3of3(insn->vectorExtensionPrefix[2])) { 912 case VEX_PREFIX_66: 913 attrMask |= ATTR_OPSIZE; 914 break; 915 case VEX_PREFIX_F3: 916 attrMask |= ATTR_XS; 917 break; 918 case VEX_PREFIX_F2: 919 attrMask |= ATTR_XD; 920 break; 921 } 922 923 if (lFromVEX3of3(insn->vectorExtensionPrefix[2])) 924 attrMask |= ATTR_VEXL; 925 } else if (insn->vectorExtensionType == TYPE_VEX_2B) { 926 switch (ppFromVEX2of2(insn->vectorExtensionPrefix[1])) { 927 case VEX_PREFIX_66: 928 attrMask |= ATTR_OPSIZE; 929 break; 930 case VEX_PREFIX_F3: 931 attrMask |= ATTR_XS; 932 break; 933 case VEX_PREFIX_F2: 934 attrMask |= ATTR_XD; 935 break; 936 } 937 938 if (lFromVEX2of2(insn->vectorExtensionPrefix[1])) 939 attrMask |= ATTR_VEXL; 940 } else if (insn->vectorExtensionType == TYPE_XOP) { 941 switch (ppFromXOP3of3(insn->vectorExtensionPrefix[2])) { 942 case VEX_PREFIX_66: 943 attrMask |= ATTR_OPSIZE; 944 break; 945 case VEX_PREFIX_F3: 946 attrMask |= ATTR_XS; 947 break; 948 case VEX_PREFIX_F2: 949 attrMask |= ATTR_XD; 950 break; 951 } 952 953 if (lFromXOP3of3(insn->vectorExtensionPrefix[2])) 954 attrMask |= ATTR_VEXL; 955 } else { 956 return -1; 957 } 958 } else { 959 if (insn->mode != MODE_16BIT && isPrefixAtLocation(insn, 0x66, insn->necessaryPrefixLocation)) 960 attrMask |= ATTR_OPSIZE; 961 else if (isPrefixAtLocation(insn, 0x67, insn->necessaryPrefixLocation)) 962 attrMask |= ATTR_ADSIZE; 963 else if (isPrefixAtLocation(insn, 0xf3, insn->necessaryPrefixLocation)) 964 attrMask |= ATTR_XS; 965 else if (isPrefixAtLocation(insn, 0xf2, insn->necessaryPrefixLocation)) 966 attrMask |= ATTR_XD; 967 } 968 969 if (insn->rexPrefix & 0x08) 970 attrMask |= ATTR_REXW; 971 972 /* 973 * JCXZ/JECXZ need special handling for 16-bit mode because the meaning 974 * of the AdSize prefix is inverted w.r.t. 32-bit mode. 975 */ 976 if (insn->mode == MODE_16BIT && insn->opcodeType == ONEBYTE && 977 insn->opcode == 0xE3) 978 attrMask ^= ATTR_ADSIZE; 979 980 /* 981 * In 64-bit mode all f64 superscripted opcodes ignore opcode size prefix 982 * CALL/JMP/JCC instructions need to ignore 0x66 and consume 4 bytes 983 */ 984 985 if (insn->mode == MODE_64BIT && 986 isPrefixAtLocation(insn, 0x66, insn->necessaryPrefixLocation)) { 987 switch (insn->opcode) { 988 case 0xE8: 989 case 0xE9: 990 // Take care of psubsb and other mmx instructions. 991 if (insn->opcodeType == ONEBYTE) { 992 attrMask ^= ATTR_OPSIZE; 993 insn->immediateSize = 4; 994 insn->displacementSize = 4; 995 } 996 break; 997 case 0x82: 998 case 0x83: 999 case 0x84: 1000 case 0x85: 1001 case 0x86: 1002 case 0x87: 1003 case 0x88: 1004 case 0x89: 1005 case 0x8A: 1006 case 0x8B: 1007 case 0x8C: 1008 case 0x8D: 1009 case 0x8E: 1010 case 0x8F: 1011 // Take care of lea and three byte ops. 1012 if (insn->opcodeType == TWOBYTE) { 1013 attrMask ^= ATTR_OPSIZE; 1014 insn->immediateSize = 4; 1015 insn->displacementSize = 4; 1016 } 1017 break; 1018 } 1019 } 1020 1021 if (getIDWithAttrMask(&instructionID, insn, attrMask)) 1022 return -1; 1023 1024 /* The following clauses compensate for limitations of the tables. */ 1025 1026 if (insn->mode != MODE_64BIT && 1027 insn->vectorExtensionType != TYPE_NO_VEX_XOP) { 1028 /* 1029 * The tables can't distinquish between cases where the W-bit is used to 1030 * select register size and cases where its a required part of the opcode. 1031 */ 1032 if ((insn->vectorExtensionType == TYPE_EVEX && 1033 wFromEVEX3of4(insn->vectorExtensionPrefix[2])) || 1034 (insn->vectorExtensionType == TYPE_VEX_3B && 1035 wFromVEX3of3(insn->vectorExtensionPrefix[2])) || 1036 (insn->vectorExtensionType == TYPE_XOP && 1037 wFromXOP3of3(insn->vectorExtensionPrefix[2]))) { 1038 1039 uint16_t instructionIDWithREXW; 1040 if (getIDWithAttrMask(&instructionIDWithREXW, 1041 insn, attrMask | ATTR_REXW)) { 1042 insn->instructionID = instructionID; 1043 insn->spec = specifierForUID(instructionID); 1044 return 0; 1045 } 1046 1047 const char *SpecName = GetInstrName(instructionIDWithREXW, miiArg); 1048 // If not a 64-bit instruction. Switch the opcode. 1049 if (!is64Bit(SpecName)) { 1050 insn->instructionID = instructionIDWithREXW; 1051 insn->spec = specifierForUID(instructionIDWithREXW); 1052 return 0; 1053 } 1054 } 1055 } 1056 1057 /* 1058 * Absolute moves need special handling. 1059 * -For 16-bit mode because the meaning of the AdSize and OpSize prefixes are 1060 * inverted w.r.t. 1061 * -For 32-bit mode we need to ensure the ADSIZE prefix is observed in 1062 * any position. 1063 */ 1064 if (insn->opcodeType == ONEBYTE && ((insn->opcode & 0xFC) == 0xA0)) { 1065 /* Make sure we observed the prefixes in any position. */ 1066 if (insn->prefixPresent[0x67]) 1067 attrMask |= ATTR_ADSIZE; 1068 if (insn->prefixPresent[0x66]) 1069 attrMask |= ATTR_OPSIZE; 1070 1071 /* In 16-bit, invert the attributes. */ 1072 if (insn->mode == MODE_16BIT) 1073 attrMask ^= ATTR_ADSIZE | ATTR_OPSIZE; 1074 1075 if (getIDWithAttrMask(&instructionID, insn, attrMask)) 1076 return -1; 1077 1078 insn->instructionID = instructionID; 1079 insn->spec = specifierForUID(instructionID); 1080 return 0; 1081 } 1082 1083 if ((insn->mode == MODE_16BIT || insn->prefixPresent[0x66]) && 1084 !(attrMask & ATTR_OPSIZE)) { 1085 /* 1086 * The instruction tables make no distinction between instructions that 1087 * allow OpSize anywhere (i.e., 16-bit operations) and that need it in a 1088 * particular spot (i.e., many MMX operations). In general we're 1089 * conservative, but in the specific case where OpSize is present but not 1090 * in the right place we check if there's a 16-bit operation. 1091 */ 1092 1093 const struct InstructionSpecifier *spec; 1094 uint16_t instructionIDWithOpsize; 1095 const char *specName, *specWithOpSizeName; 1096 1097 spec = specifierForUID(instructionID); 1098 1099 if (getIDWithAttrMask(&instructionIDWithOpsize, 1100 insn, 1101 attrMask | ATTR_OPSIZE)) { 1102 /* 1103 * ModRM required with OpSize but not present; give up and return version 1104 * without OpSize set 1105 */ 1106 1107 insn->instructionID = instructionID; 1108 insn->spec = spec; 1109 return 0; 1110 } 1111 1112 specName = GetInstrName(instructionID, miiArg); 1113 specWithOpSizeName = GetInstrName(instructionIDWithOpsize, miiArg); 1114 1115 if (is16BitEquivalent(specName, specWithOpSizeName) && 1116 (insn->mode == MODE_16BIT) ^ insn->prefixPresent[0x66]) { 1117 insn->instructionID = instructionIDWithOpsize; 1118 insn->spec = specifierForUID(instructionIDWithOpsize); 1119 } else { 1120 insn->instructionID = instructionID; 1121 insn->spec = spec; 1122 } 1123 return 0; 1124 } 1125 1126 if (insn->opcodeType == ONEBYTE && insn->opcode == 0x90 && 1127 insn->rexPrefix & 0x01) { 1128 /* 1129 * NOOP shouldn't decode as NOOP if REX.b is set. Instead 1130 * it should decode as XCHG %r8, %eax. 1131 */ 1132 1133 const struct InstructionSpecifier *spec; 1134 uint16_t instructionIDWithNewOpcode; 1135 const struct InstructionSpecifier *specWithNewOpcode; 1136 1137 spec = specifierForUID(instructionID); 1138 1139 /* Borrow opcode from one of the other XCHGar opcodes */ 1140 insn->opcode = 0x91; 1141 1142 if (getIDWithAttrMask(&instructionIDWithNewOpcode, 1143 insn, 1144 attrMask)) { 1145 insn->opcode = 0x90; 1146 1147 insn->instructionID = instructionID; 1148 insn->spec = spec; 1149 return 0; 1150 } 1151 1152 specWithNewOpcode = specifierForUID(instructionIDWithNewOpcode); 1153 1154 /* Change back */ 1155 insn->opcode = 0x90; 1156 1157 insn->instructionID = instructionIDWithNewOpcode; 1158 insn->spec = specWithNewOpcode; 1159 1160 return 0; 1161 } 1162 1163 insn->instructionID = instructionID; 1164 insn->spec = specifierForUID(insn->instructionID); 1165 1166 return 0; 1167 } 1168 1169 /* 1170 * readSIB - Consumes the SIB byte to determine addressing information for an 1171 * instruction. 1172 * 1173 * @param insn - The instruction whose SIB byte is to be read. 1174 * @return - 0 if the SIB byte was successfully read; nonzero otherwise. 1175 */ 1176 static int readSIB(struct InternalInstruction* insn) { 1177 SIBIndex sibIndexBase = SIB_INDEX_NONE; 1178 SIBBase sibBaseBase = SIB_BASE_NONE; 1179 uint8_t index, base; 1180 1181 dbgprintf(insn, "readSIB()"); 1182 1183 if (insn->consumedSIB) 1184 return 0; 1185 1186 insn->consumedSIB = true; 1187 1188 switch (insn->addressSize) { 1189 case 2: 1190 dbgprintf(insn, "SIB-based addressing doesn't work in 16-bit mode"); 1191 return -1; 1192 case 4: 1193 sibIndexBase = SIB_INDEX_EAX; 1194 sibBaseBase = SIB_BASE_EAX; 1195 break; 1196 case 8: 1197 sibIndexBase = SIB_INDEX_RAX; 1198 sibBaseBase = SIB_BASE_RAX; 1199 break; 1200 } 1201 1202 if (consumeByte(insn, &insn->sib)) 1203 return -1; 1204 1205 index = indexFromSIB(insn->sib) | (xFromREX(insn->rexPrefix) << 3); 1206 1207 // FIXME: The fifth bit (bit index 4) is only to be used for instructions 1208 // that understand VSIB indexing. ORing the bit in here is mildy dangerous 1209 // because performing math on an 'enum SIBIndex' can produce garbage. 1210 // Excluding the "none" value, it should cover 6 spaces of register names: 1211 // - 16 possibilities for 16-bit GPR starting at SIB_INDEX_BX_SI 1212 // - 16 possibilities for 32-bit GPR starting at SIB_INDEX_EAX 1213 // - 16 possibilities for 64-bit GPR starting at SIB_INDEX_RAX 1214 // - 32 possibilities for each of XMM, YMM, ZMM registers 1215 // When sibIndexBase gets assigned SIB_INDEX_RAX as it does in 64-bit mode, 1216 // summing in a fully decoded index between 0 and 31 can end up with a value 1217 // that looks like something in the low half of the XMM range. 1218 // translateRMMemory() tries to reverse the damage, with only partial success, 1219 // as evidenced by known bugs in "test/MC/Disassembler/X86/x86-64.txt" 1220 if (insn->vectorExtensionType == TYPE_EVEX) 1221 index |= v2FromEVEX4of4(insn->vectorExtensionPrefix[3]) << 4; 1222 1223 if (index == 0x4) { 1224 insn->sibIndex = SIB_INDEX_NONE; 1225 } else { 1226 insn->sibIndex = (SIBIndex)(sibIndexBase + index); 1227 } 1228 1229 insn->sibScale = 1 << scaleFromSIB(insn->sib); 1230 1231 base = baseFromSIB(insn->sib) | (bFromREX(insn->rexPrefix) << 3); 1232 1233 switch (base) { 1234 case 0x5: 1235 case 0xd: 1236 switch (modFromModRM(insn->modRM)) { 1237 case 0x0: 1238 insn->eaDisplacement = EA_DISP_32; 1239 insn->sibBase = SIB_BASE_NONE; 1240 break; 1241 case 0x1: 1242 insn->eaDisplacement = EA_DISP_8; 1243 insn->sibBase = (SIBBase)(sibBaseBase + base); 1244 break; 1245 case 0x2: 1246 insn->eaDisplacement = EA_DISP_32; 1247 insn->sibBase = (SIBBase)(sibBaseBase + base); 1248 break; 1249 case 0x3: 1250 debug("Cannot have Mod = 0b11 and a SIB byte"); 1251 return -1; 1252 } 1253 break; 1254 default: 1255 insn->sibBase = (SIBBase)(sibBaseBase + base); 1256 break; 1257 } 1258 1259 return 0; 1260 } 1261 1262 /* 1263 * readDisplacement - Consumes the displacement of an instruction. 1264 * 1265 * @param insn - The instruction whose displacement is to be read. 1266 * @return - 0 if the displacement byte was successfully read; nonzero 1267 * otherwise. 1268 */ 1269 static int readDisplacement(struct InternalInstruction* insn) { 1270 int8_t d8; 1271 int16_t d16; 1272 int32_t d32; 1273 1274 dbgprintf(insn, "readDisplacement()"); 1275 1276 if (insn->consumedDisplacement) 1277 return 0; 1278 1279 insn->consumedDisplacement = true; 1280 insn->displacementOffset = insn->readerCursor - insn->startLocation; 1281 1282 switch (insn->eaDisplacement) { 1283 case EA_DISP_NONE: 1284 insn->consumedDisplacement = false; 1285 break; 1286 case EA_DISP_8: 1287 if (consumeInt8(insn, &d8)) 1288 return -1; 1289 insn->displacement = d8; 1290 break; 1291 case EA_DISP_16: 1292 if (consumeInt16(insn, &d16)) 1293 return -1; 1294 insn->displacement = d16; 1295 break; 1296 case EA_DISP_32: 1297 if (consumeInt32(insn, &d32)) 1298 return -1; 1299 insn->displacement = d32; 1300 break; 1301 } 1302 1303 insn->consumedDisplacement = true; 1304 return 0; 1305 } 1306 1307 /* 1308 * readModRM - Consumes all addressing information (ModR/M byte, SIB byte, and 1309 * displacement) for an instruction and interprets it. 1310 * 1311 * @param insn - The instruction whose addressing information is to be read. 1312 * @return - 0 if the information was successfully read; nonzero otherwise. 1313 */ 1314 static int readModRM(struct InternalInstruction* insn) { 1315 uint8_t mod, rm, reg; 1316 1317 dbgprintf(insn, "readModRM()"); 1318 1319 if (insn->consumedModRM) 1320 return 0; 1321 1322 if (consumeByte(insn, &insn->modRM)) 1323 return -1; 1324 insn->consumedModRM = true; 1325 1326 mod = modFromModRM(insn->modRM); 1327 rm = rmFromModRM(insn->modRM); 1328 reg = regFromModRM(insn->modRM); 1329 1330 /* 1331 * This goes by insn->registerSize to pick the correct register, which messes 1332 * up if we're using (say) XMM or 8-bit register operands. That gets fixed in 1333 * fixupReg(). 1334 */ 1335 switch (insn->registerSize) { 1336 case 2: 1337 insn->regBase = MODRM_REG_AX; 1338 insn->eaRegBase = EA_REG_AX; 1339 break; 1340 case 4: 1341 insn->regBase = MODRM_REG_EAX; 1342 insn->eaRegBase = EA_REG_EAX; 1343 break; 1344 case 8: 1345 insn->regBase = MODRM_REG_RAX; 1346 insn->eaRegBase = EA_REG_RAX; 1347 break; 1348 } 1349 1350 reg |= rFromREX(insn->rexPrefix) << 3; 1351 rm |= bFromREX(insn->rexPrefix) << 3; 1352 if (insn->vectorExtensionType == TYPE_EVEX) { 1353 reg |= r2FromEVEX2of4(insn->vectorExtensionPrefix[1]) << 4; 1354 rm |= xFromEVEX2of4(insn->vectorExtensionPrefix[1]) << 4; 1355 } 1356 1357 insn->reg = (Reg)(insn->regBase + reg); 1358 1359 switch (insn->addressSize) { 1360 case 2: 1361 insn->eaBaseBase = EA_BASE_BX_SI; 1362 1363 switch (mod) { 1364 case 0x0: 1365 if (rm == 0x6) { 1366 insn->eaBase = EA_BASE_NONE; 1367 insn->eaDisplacement = EA_DISP_16; 1368 if (readDisplacement(insn)) 1369 return -1; 1370 } else { 1371 insn->eaBase = (EABase)(insn->eaBaseBase + rm); 1372 insn->eaDisplacement = EA_DISP_NONE; 1373 } 1374 break; 1375 case 0x1: 1376 insn->eaBase = (EABase)(insn->eaBaseBase + rm); 1377 insn->eaDisplacement = EA_DISP_8; 1378 insn->displacementSize = 1; 1379 if (readDisplacement(insn)) 1380 return -1; 1381 break; 1382 case 0x2: 1383 insn->eaBase = (EABase)(insn->eaBaseBase + rm); 1384 insn->eaDisplacement = EA_DISP_16; 1385 if (readDisplacement(insn)) 1386 return -1; 1387 break; 1388 case 0x3: 1389 insn->eaBase = (EABase)(insn->eaRegBase + rm); 1390 if (readDisplacement(insn)) 1391 return -1; 1392 break; 1393 } 1394 break; 1395 case 4: 1396 case 8: 1397 insn->eaBaseBase = (insn->addressSize == 4 ? EA_BASE_EAX : EA_BASE_RAX); 1398 1399 switch (mod) { 1400 case 0x0: 1401 insn->eaDisplacement = EA_DISP_NONE; /* readSIB may override this */ 1402 // In determining whether RIP-relative mode is used (rm=5), 1403 // or whether a SIB byte is present (rm=4), 1404 // the extension bits (REX.b and EVEX.x) are ignored. 1405 switch (rm & 7) { 1406 case 0x4: // SIB byte is present 1407 insn->eaBase = (insn->addressSize == 4 ? 1408 EA_BASE_sib : EA_BASE_sib64); 1409 if (readSIB(insn) || readDisplacement(insn)) 1410 return -1; 1411 break; 1412 case 0x5: // RIP-relative 1413 insn->eaBase = EA_BASE_NONE; 1414 insn->eaDisplacement = EA_DISP_32; 1415 if (readDisplacement(insn)) 1416 return -1; 1417 break; 1418 default: 1419 insn->eaBase = (EABase)(insn->eaBaseBase + rm); 1420 break; 1421 } 1422 break; 1423 case 0x1: 1424 insn->displacementSize = 1; 1425 /* FALLTHROUGH */ 1426 case 0x2: 1427 insn->eaDisplacement = (mod == 0x1 ? EA_DISP_8 : EA_DISP_32); 1428 switch (rm & 7) { 1429 case 0x4: // SIB byte is present 1430 insn->eaBase = EA_BASE_sib; 1431 if (readSIB(insn) || readDisplacement(insn)) 1432 return -1; 1433 break; 1434 default: 1435 insn->eaBase = (EABase)(insn->eaBaseBase + rm); 1436 if (readDisplacement(insn)) 1437 return -1; 1438 break; 1439 } 1440 break; 1441 case 0x3: 1442 insn->eaDisplacement = EA_DISP_NONE; 1443 insn->eaBase = (EABase)(insn->eaRegBase + rm); 1444 break; 1445 } 1446 break; 1447 } /* switch (insn->addressSize) */ 1448 1449 return 0; 1450 } 1451 1452 #define GENERIC_FIXUP_FUNC(name, base, prefix) \ 1453 static uint16_t name(struct InternalInstruction *insn, \ 1454 OperandType type, \ 1455 uint8_t index, \ 1456 uint8_t *valid) { \ 1457 *valid = 1; \ 1458 switch (type) { \ 1459 default: \ 1460 debug("Unhandled register type"); \ 1461 *valid = 0; \ 1462 return 0; \ 1463 case TYPE_Rv: \ 1464 return base + index; \ 1465 case TYPE_R8: \ 1466 if (insn->rexPrefix && \ 1467 index >= 4 && index <= 7) { \ 1468 return prefix##_SPL + (index - 4); \ 1469 } else { \ 1470 return prefix##_AL + index; \ 1471 } \ 1472 case TYPE_R16: \ 1473 return prefix##_AX + index; \ 1474 case TYPE_R32: \ 1475 return prefix##_EAX + index; \ 1476 case TYPE_R64: \ 1477 return prefix##_RAX + index; \ 1478 case TYPE_XMM512: \ 1479 return prefix##_ZMM0 + index; \ 1480 case TYPE_XMM256: \ 1481 return prefix##_YMM0 + index; \ 1482 case TYPE_XMM128: \ 1483 case TYPE_XMM64: \ 1484 case TYPE_XMM32: \ 1485 return prefix##_XMM0 + index; \ 1486 case TYPE_VK1: \ 1487 case TYPE_VK2: \ 1488 case TYPE_VK4: \ 1489 case TYPE_VK8: \ 1490 case TYPE_VK16: \ 1491 case TYPE_VK32: \ 1492 case TYPE_VK64: \ 1493 if (index > 7) \ 1494 *valid = 0; \ 1495 return prefix##_K0 + index; \ 1496 case TYPE_MM64: \ 1497 return prefix##_MM0 + (index & 0x7); \ 1498 case TYPE_SEGMENTREG: \ 1499 if (index > 5) \ 1500 *valid = 0; \ 1501 return prefix##_ES + index; \ 1502 case TYPE_DEBUGREG: \ 1503 return prefix##_DR0 + index; \ 1504 case TYPE_CONTROLREG: \ 1505 return prefix##_CR0 + index; \ 1506 case TYPE_BNDR: \ 1507 if (index > 3) \ 1508 *valid = 0; \ 1509 return prefix##_BND0 + index; \ 1510 } \ 1511 } 1512 1513 /* 1514 * fixup*Value - Consults an operand type to determine the meaning of the 1515 * reg or R/M field. If the operand is an XMM operand, for example, an 1516 * operand would be XMM0 instead of AX, which readModRM() would otherwise 1517 * misinterpret it as. 1518 * 1519 * @param insn - The instruction containing the operand. 1520 * @param type - The operand type. 1521 * @param index - The existing value of the field as reported by readModRM(). 1522 * @param valid - The address of a uint8_t. The target is set to 1 if the 1523 * field is valid for the register class; 0 if not. 1524 * @return - The proper value. 1525 */ 1526 GENERIC_FIXUP_FUNC(fixupRegValue, insn->regBase, MODRM_REG) 1527 GENERIC_FIXUP_FUNC(fixupRMValue, insn->eaRegBase, EA_REG) 1528 1529 /* 1530 * fixupReg - Consults an operand specifier to determine which of the 1531 * fixup*Value functions to use in correcting readModRM()'ss interpretation. 1532 * 1533 * @param insn - See fixup*Value(). 1534 * @param op - The operand specifier. 1535 * @return - 0 if fixup was successful; -1 if the register returned was 1536 * invalid for its class. 1537 */ 1538 static int fixupReg(struct InternalInstruction *insn, 1539 const struct OperandSpecifier *op) { 1540 uint8_t valid; 1541 1542 dbgprintf(insn, "fixupReg()"); 1543 1544 switch ((OperandEncoding)op->encoding) { 1545 default: 1546 debug("Expected a REG or R/M encoding in fixupReg"); 1547 return -1; 1548 case ENCODING_VVVV: 1549 insn->vvvv = (Reg)fixupRegValue(insn, 1550 (OperandType)op->type, 1551 insn->vvvv, 1552 &valid); 1553 if (!valid) 1554 return -1; 1555 break; 1556 case ENCODING_REG: 1557 insn->reg = (Reg)fixupRegValue(insn, 1558 (OperandType)op->type, 1559 insn->reg - insn->regBase, 1560 &valid); 1561 if (!valid) 1562 return -1; 1563 break; 1564 CASE_ENCODING_RM: 1565 if (insn->eaBase >= insn->eaRegBase) { 1566 insn->eaBase = (EABase)fixupRMValue(insn, 1567 (OperandType)op->type, 1568 insn->eaBase - insn->eaRegBase, 1569 &valid); 1570 if (!valid) 1571 return -1; 1572 } 1573 break; 1574 } 1575 1576 return 0; 1577 } 1578 1579 /* 1580 * readOpcodeRegister - Reads an operand from the opcode field of an 1581 * instruction and interprets it appropriately given the operand width. 1582 * Handles AddRegFrm instructions. 1583 * 1584 * @param insn - the instruction whose opcode field is to be read. 1585 * @param size - The width (in bytes) of the register being specified. 1586 * 1 means AL and friends, 2 means AX, 4 means EAX, and 8 means 1587 * RAX. 1588 * @return - 0 on success; nonzero otherwise. 1589 */ 1590 static int readOpcodeRegister(struct InternalInstruction* insn, uint8_t size) { 1591 dbgprintf(insn, "readOpcodeRegister()"); 1592 1593 if (size == 0) 1594 size = insn->registerSize; 1595 1596 switch (size) { 1597 case 1: 1598 insn->opcodeRegister = (Reg)(MODRM_REG_AL + ((bFromREX(insn->rexPrefix) << 3) 1599 | (insn->opcode & 7))); 1600 if (insn->rexPrefix && 1601 insn->opcodeRegister >= MODRM_REG_AL + 0x4 && 1602 insn->opcodeRegister < MODRM_REG_AL + 0x8) { 1603 insn->opcodeRegister = (Reg)(MODRM_REG_SPL 1604 + (insn->opcodeRegister - MODRM_REG_AL - 4)); 1605 } 1606 1607 break; 1608 case 2: 1609 insn->opcodeRegister = (Reg)(MODRM_REG_AX 1610 + ((bFromREX(insn->rexPrefix) << 3) 1611 | (insn->opcode & 7))); 1612 break; 1613 case 4: 1614 insn->opcodeRegister = (Reg)(MODRM_REG_EAX 1615 + ((bFromREX(insn->rexPrefix) << 3) 1616 | (insn->opcode & 7))); 1617 break; 1618 case 8: 1619 insn->opcodeRegister = (Reg)(MODRM_REG_RAX 1620 + ((bFromREX(insn->rexPrefix) << 3) 1621 | (insn->opcode & 7))); 1622 break; 1623 } 1624 1625 return 0; 1626 } 1627 1628 /* 1629 * readImmediate - Consumes an immediate operand from an instruction, given the 1630 * desired operand size. 1631 * 1632 * @param insn - The instruction whose operand is to be read. 1633 * @param size - The width (in bytes) of the operand. 1634 * @return - 0 if the immediate was successfully consumed; nonzero 1635 * otherwise. 1636 */ 1637 static int readImmediate(struct InternalInstruction* insn, uint8_t size) { 1638 uint8_t imm8; 1639 uint16_t imm16; 1640 uint32_t imm32; 1641 uint64_t imm64; 1642 1643 dbgprintf(insn, "readImmediate()"); 1644 1645 if (insn->numImmediatesConsumed == 2) { 1646 debug("Already consumed two immediates"); 1647 return -1; 1648 } 1649 1650 if (size == 0) 1651 size = insn->immediateSize; 1652 else 1653 insn->immediateSize = size; 1654 insn->immediateOffset = insn->readerCursor - insn->startLocation; 1655 1656 switch (size) { 1657 case 1: 1658 if (consumeByte(insn, &imm8)) 1659 return -1; 1660 insn->immediates[insn->numImmediatesConsumed] = imm8; 1661 break; 1662 case 2: 1663 if (consumeUInt16(insn, &imm16)) 1664 return -1; 1665 insn->immediates[insn->numImmediatesConsumed] = imm16; 1666 break; 1667 case 4: 1668 if (consumeUInt32(insn, &imm32)) 1669 return -1; 1670 insn->immediates[insn->numImmediatesConsumed] = imm32; 1671 break; 1672 case 8: 1673 if (consumeUInt64(insn, &imm64)) 1674 return -1; 1675 insn->immediates[insn->numImmediatesConsumed] = imm64; 1676 break; 1677 } 1678 1679 insn->numImmediatesConsumed++; 1680 1681 return 0; 1682 } 1683 1684 /* 1685 * readVVVV - Consumes vvvv from an instruction if it has a VEX prefix. 1686 * 1687 * @param insn - The instruction whose operand is to be read. 1688 * @return - 0 if the vvvv was successfully consumed; nonzero 1689 * otherwise. 1690 */ 1691 static int readVVVV(struct InternalInstruction* insn) { 1692 dbgprintf(insn, "readVVVV()"); 1693 1694 int vvvv; 1695 if (insn->vectorExtensionType == TYPE_EVEX) 1696 vvvv = (v2FromEVEX4of4(insn->vectorExtensionPrefix[3]) << 4 | 1697 vvvvFromEVEX3of4(insn->vectorExtensionPrefix[2])); 1698 else if (insn->vectorExtensionType == TYPE_VEX_3B) 1699 vvvv = vvvvFromVEX3of3(insn->vectorExtensionPrefix[2]); 1700 else if (insn->vectorExtensionType == TYPE_VEX_2B) 1701 vvvv = vvvvFromVEX2of2(insn->vectorExtensionPrefix[1]); 1702 else if (insn->vectorExtensionType == TYPE_XOP) 1703 vvvv = vvvvFromXOP3of3(insn->vectorExtensionPrefix[2]); 1704 else 1705 return -1; 1706 1707 if (insn->mode != MODE_64BIT) 1708 vvvv &= 0x7; 1709 1710 insn->vvvv = static_cast<Reg>(vvvv); 1711 return 0; 1712 } 1713 1714 /* 1715 * readMaskRegister - Reads an mask register from the opcode field of an 1716 * instruction. 1717 * 1718 * @param insn - The instruction whose opcode field is to be read. 1719 * @return - 0 on success; nonzero otherwise. 1720 */ 1721 static int readMaskRegister(struct InternalInstruction* insn) { 1722 dbgprintf(insn, "readMaskRegister()"); 1723 1724 if (insn->vectorExtensionType != TYPE_EVEX) 1725 return -1; 1726 1727 insn->writemask = 1728 static_cast<Reg>(aaaFromEVEX4of4(insn->vectorExtensionPrefix[3])); 1729 return 0; 1730 } 1731 1732 /* 1733 * readOperands - Consults the specifier for an instruction and consumes all 1734 * operands for that instruction, interpreting them as it goes. 1735 * 1736 * @param insn - The instruction whose operands are to be read and interpreted. 1737 * @return - 0 if all operands could be read; nonzero otherwise. 1738 */ 1739 static int readOperands(struct InternalInstruction* insn) { 1740 int hasVVVV, needVVVV; 1741 int sawRegImm = 0; 1742 1743 dbgprintf(insn, "readOperands()"); 1744 1745 /* If non-zero vvvv specified, need to make sure one of the operands 1746 uses it. */ 1747 hasVVVV = !readVVVV(insn); 1748 needVVVV = hasVVVV && (insn->vvvv != 0); 1749 1750 for (const auto &Op : x86OperandSets[insn->spec->operands]) { 1751 switch (Op.encoding) { 1752 case ENCODING_NONE: 1753 case ENCODING_SI: 1754 case ENCODING_DI: 1755 break; 1756 case ENCODING_REG: 1757 CASE_ENCODING_RM: 1758 if (readModRM(insn)) 1759 return -1; 1760 if (fixupReg(insn, &Op)) 1761 return -1; 1762 // Apply the AVX512 compressed displacement scaling factor. 1763 if (Op.encoding != ENCODING_REG && insn->eaDisplacement == EA_DISP_8) 1764 insn->displacement *= 1 << (Op.encoding - ENCODING_RM); 1765 break; 1766 case ENCODING_IB: 1767 if (sawRegImm) { 1768 /* Saw a register immediate so don't read again and instead split the 1769 previous immediate. FIXME: This is a hack. */ 1770 insn->immediates[insn->numImmediatesConsumed] = 1771 insn->immediates[insn->numImmediatesConsumed - 1] & 0xf; 1772 ++insn->numImmediatesConsumed; 1773 break; 1774 } 1775 if (readImmediate(insn, 1)) 1776 return -1; 1777 if (Op.type == TYPE_XMM128 || 1778 Op.type == TYPE_XMM256) 1779 sawRegImm = 1; 1780 break; 1781 case ENCODING_IW: 1782 if (readImmediate(insn, 2)) 1783 return -1; 1784 break; 1785 case ENCODING_ID: 1786 if (readImmediate(insn, 4)) 1787 return -1; 1788 break; 1789 case ENCODING_IO: 1790 if (readImmediate(insn, 8)) 1791 return -1; 1792 break; 1793 case ENCODING_Iv: 1794 if (readImmediate(insn, insn->immediateSize)) 1795 return -1; 1796 break; 1797 case ENCODING_Ia: 1798 if (readImmediate(insn, insn->addressSize)) 1799 return -1; 1800 break; 1801 case ENCODING_RB: 1802 if (readOpcodeRegister(insn, 1)) 1803 return -1; 1804 break; 1805 case ENCODING_RW: 1806 if (readOpcodeRegister(insn, 2)) 1807 return -1; 1808 break; 1809 case ENCODING_RD: 1810 if (readOpcodeRegister(insn, 4)) 1811 return -1; 1812 break; 1813 case ENCODING_RO: 1814 if (readOpcodeRegister(insn, 8)) 1815 return -1; 1816 break; 1817 case ENCODING_Rv: 1818 if (readOpcodeRegister(insn, 0)) 1819 return -1; 1820 break; 1821 case ENCODING_FP: 1822 break; 1823 case ENCODING_VVVV: 1824 needVVVV = 0; /* Mark that we have found a VVVV operand. */ 1825 if (!hasVVVV) 1826 return -1; 1827 if (fixupReg(insn, &Op)) 1828 return -1; 1829 break; 1830 case ENCODING_WRITEMASK: 1831 if (readMaskRegister(insn)) 1832 return -1; 1833 break; 1834 case ENCODING_DUP: 1835 break; 1836 default: 1837 dbgprintf(insn, "Encountered an operand with an unknown encoding."); 1838 return -1; 1839 } 1840 } 1841 1842 /* If we didn't find ENCODING_VVVV operand, but non-zero vvvv present, fail */ 1843 if (needVVVV) return -1; 1844 1845 return 0; 1846 } 1847 1848 /* 1849 * decodeInstruction - Reads and interprets a full instruction provided by the 1850 * user. 1851 * 1852 * @param insn - A pointer to the instruction to be populated. Must be 1853 * pre-allocated. 1854 * @param reader - The function to be used to read the instruction's bytes. 1855 * @param readerArg - A generic argument to be passed to the reader to store 1856 * any internal state. 1857 * @param logger - If non-NULL, the function to be used to write log messages 1858 * and warnings. 1859 * @param loggerArg - A generic argument to be passed to the logger to store 1860 * any internal state. 1861 * @param startLoc - The address (in the reader's address space) of the first 1862 * byte in the instruction. 1863 * @param mode - The mode (real mode, IA-32e, or IA-32e in 64-bit mode) to 1864 * decode the instruction in. 1865 * @return - 0 if the instruction's memory could be read; nonzero if 1866 * not. 1867 */ 1868 int llvm::X86Disassembler::decodeInstruction( 1869 struct InternalInstruction *insn, byteReader_t reader, 1870 const void *readerArg, dlog_t logger, void *loggerArg, const void *miiArg, 1871 uint64_t startLoc, DisassemblerMode mode) { 1872 memset(insn, 0, sizeof(struct InternalInstruction)); 1873 1874 insn->reader = reader; 1875 insn->readerArg = readerArg; 1876 insn->dlog = logger; 1877 insn->dlogArg = loggerArg; 1878 insn->startLocation = startLoc; 1879 insn->readerCursor = startLoc; 1880 insn->mode = mode; 1881 insn->numImmediatesConsumed = 0; 1882 1883 if (readPrefixes(insn) || 1884 readOpcode(insn) || 1885 getID(insn, miiArg) || 1886 insn->instructionID == 0 || 1887 readOperands(insn)) 1888 return -1; 1889 1890 insn->operands = x86OperandSets[insn->spec->operands]; 1891 1892 insn->length = insn->readerCursor - insn->startLocation; 1893 1894 dbgprintf(insn, "Read from 0x%llx to 0x%llx: length %zu", 1895 startLoc, insn->readerCursor, insn->length); 1896 1897 if (insn->length > 15) 1898 dbgprintf(insn, "Instruction exceeds 15-byte limit"); 1899 1900 return 0; 1901 } 1902