1 //===-- X86Disassembler.cpp - Disassembler for x86 and x86_64 -------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file is part of the X86 Disassembler. 11 // It contains code to translate the data produced by the decoder into 12 // MCInsts. 13 // Documentation for the disassembler can be found in X86Disassembler.h. 14 // 15 //===----------------------------------------------------------------------===// 16 17 #include "X86Disassembler.h" 18 #include "X86DisassemblerDecoder.h" 19 #include "llvm/MC/MCContext.h" 20 #include "llvm/MC/MCDisassembler.h" 21 #include "llvm/MC/MCExpr.h" 22 #include "llvm/MC/MCInst.h" 23 #include "llvm/MC/MCInstrInfo.h" 24 #include "llvm/MC/MCSubtargetInfo.h" 25 #include "llvm/Support/Debug.h" 26 #include "llvm/Support/MemoryObject.h" 27 #include "llvm/Support/TargetRegistry.h" 28 #include "llvm/Support/raw_ostream.h" 29 30 using namespace llvm; 31 using namespace llvm::X86Disassembler; 32 33 #define DEBUG_TYPE "x86-disassembler" 34 35 #define GET_REGINFO_ENUM 36 #include "X86GenRegisterInfo.inc" 37 #define GET_INSTRINFO_ENUM 38 #include "X86GenInstrInfo.inc" 39 #define GET_SUBTARGETINFO_ENUM 40 #include "X86GenSubtargetInfo.inc" 41 42 void llvm::X86Disassembler::Debug(const char *file, unsigned line, 43 const char *s) { 44 dbgs() << file << ":" << line << ": " << s; 45 } 46 47 const char *llvm::X86Disassembler::GetInstrName(unsigned Opcode, 48 const void *mii) { 49 const MCInstrInfo *MII = static_cast<const MCInstrInfo *>(mii); 50 return MII->getName(Opcode); 51 } 52 53 #define debug(s) DEBUG(Debug(__FILE__, __LINE__, s)); 54 55 namespace llvm { 56 57 // Fill-ins to make the compiler happy. These constants are never actually 58 // assigned; they are just filler to make an automatically-generated switch 59 // statement work. 60 namespace X86 { 61 enum { 62 BX_SI = 500, 63 BX_DI = 501, 64 BP_SI = 502, 65 BP_DI = 503, 66 sib = 504, 67 sib64 = 505 68 }; 69 } 70 71 extern Target TheX86_32Target, TheX86_64Target; 72 73 } 74 75 static bool translateInstruction(MCInst &target, 76 InternalInstruction &source, 77 const MCDisassembler *Dis); 78 79 X86GenericDisassembler::X86GenericDisassembler( 80 const MCSubtargetInfo &STI, 81 MCContext &Ctx, 82 std::unique_ptr<const MCInstrInfo> MII) 83 : MCDisassembler(STI, Ctx), MII(std::move(MII)) { 84 switch (STI.getFeatureBits() & 85 (X86::Mode16Bit | X86::Mode32Bit | X86::Mode64Bit)) { 86 case X86::Mode16Bit: 87 fMode = MODE_16BIT; 88 break; 89 case X86::Mode32Bit: 90 fMode = MODE_32BIT; 91 break; 92 case X86::Mode64Bit: 93 fMode = MODE_64BIT; 94 break; 95 default: 96 llvm_unreachable("Invalid CPU mode"); 97 } 98 } 99 100 /// regionReader - a callback function that wraps the readByte method from 101 /// MemoryObject. 102 /// 103 /// @param arg - The generic callback parameter. In this case, this should 104 /// be a pointer to a MemoryObject. 105 /// @param byte - A pointer to the byte to be read. 106 /// @param address - The address to be read. 107 static int regionReader(const void* arg, uint8_t* byte, uint64_t address) { 108 const MemoryObject* region = static_cast<const MemoryObject*>(arg); 109 return region->readByte(address, byte); 110 } 111 112 /// logger - a callback function that wraps the operator<< method from 113 /// raw_ostream. 114 /// 115 /// @param arg - The generic callback parameter. This should be a pointe 116 /// to a raw_ostream. 117 /// @param log - A string to be logged. logger() adds a newline. 118 static void logger(void* arg, const char* log) { 119 if (!arg) 120 return; 121 122 raw_ostream &vStream = *(static_cast<raw_ostream*>(arg)); 123 vStream << log << "\n"; 124 } 125 126 // 127 // Public interface for the disassembler 128 // 129 130 MCDisassembler::DecodeStatus 131 X86GenericDisassembler::getInstruction(MCInst &instr, 132 uint64_t &size, 133 const MemoryObject ®ion, 134 uint64_t address, 135 raw_ostream &vStream, 136 raw_ostream &cStream) const { 137 CommentStream = &cStream; 138 139 InternalInstruction internalInstr; 140 141 dlog_t loggerFn = logger; 142 if (&vStream == &nulls()) 143 loggerFn = nullptr; // Disable logging completely if it's going to nulls(). 144 145 int ret = decodeInstruction(&internalInstr, 146 regionReader, 147 (const void*)®ion, 148 loggerFn, 149 (void*)&vStream, 150 (const void*)MII.get(), 151 address, 152 fMode); 153 154 if (ret) { 155 size = internalInstr.readerCursor - address; 156 return Fail; 157 } 158 else { 159 size = internalInstr.length; 160 return (!translateInstruction(instr, internalInstr, this)) ? 161 Success : Fail; 162 } 163 } 164 165 // 166 // Private code that translates from struct InternalInstructions to MCInsts. 167 // 168 169 /// translateRegister - Translates an internal register to the appropriate LLVM 170 /// register, and appends it as an operand to an MCInst. 171 /// 172 /// @param mcInst - The MCInst to append to. 173 /// @param reg - The Reg to append. 174 static void translateRegister(MCInst &mcInst, Reg reg) { 175 #define ENTRY(x) X86::x, 176 uint8_t llvmRegnums[] = { 177 ALL_REGS 178 0 179 }; 180 #undef ENTRY 181 182 uint8_t llvmRegnum = llvmRegnums[reg]; 183 mcInst.addOperand(MCOperand::CreateReg(llvmRegnum)); 184 } 185 186 /// tryAddingSymbolicOperand - trys to add a symbolic operand in place of the 187 /// immediate Value in the MCInst. 188 /// 189 /// @param Value - The immediate Value, has had any PC adjustment made by 190 /// the caller. 191 /// @param isBranch - If the instruction is a branch instruction 192 /// @param Address - The starting address of the instruction 193 /// @param Offset - The byte offset to this immediate in the instruction 194 /// @param Width - The byte width of this immediate in the instruction 195 /// 196 /// If the getOpInfo() function was set when setupForSymbolicDisassembly() was 197 /// called then that function is called to get any symbolic information for the 198 /// immediate in the instruction using the Address, Offset and Width. If that 199 /// returns non-zero then the symbolic information it returns is used to create 200 /// an MCExpr and that is added as an operand to the MCInst. If getOpInfo() 201 /// returns zero and isBranch is true then a symbol look up for immediate Value 202 /// is done and if a symbol is found an MCExpr is created with that, else 203 /// an MCExpr with the immediate Value is created. This function returns true 204 /// if it adds an operand to the MCInst and false otherwise. 205 static bool tryAddingSymbolicOperand(int64_t Value, bool isBranch, 206 uint64_t Address, uint64_t Offset, 207 uint64_t Width, MCInst &MI, 208 const MCDisassembler *Dis) { 209 return Dis->tryAddingSymbolicOperand(MI, Value, Address, isBranch, 210 Offset, Width); 211 } 212 213 /// tryAddingPcLoadReferenceComment - trys to add a comment as to what is being 214 /// referenced by a load instruction with the base register that is the rip. 215 /// These can often be addresses in a literal pool. The Address of the 216 /// instruction and its immediate Value are used to determine the address 217 /// being referenced in the literal pool entry. The SymbolLookUp call back will 218 /// return a pointer to a literal 'C' string if the referenced address is an 219 /// address into a section with 'C' string literals. 220 static void tryAddingPcLoadReferenceComment(uint64_t Address, uint64_t Value, 221 const void *Decoder) { 222 const MCDisassembler *Dis = static_cast<const MCDisassembler*>(Decoder); 223 Dis->tryAddingPcLoadReferenceComment(Value, Address); 224 } 225 226 static const uint8_t segmentRegnums[SEG_OVERRIDE_max] = { 227 0, // SEG_OVERRIDE_NONE 228 X86::CS, 229 X86::SS, 230 X86::DS, 231 X86::ES, 232 X86::FS, 233 X86::GS 234 }; 235 236 /// translateSrcIndex - Appends a source index operand to an MCInst. 237 /// 238 /// @param mcInst - The MCInst to append to. 239 /// @param insn - The internal instruction. 240 static bool translateSrcIndex(MCInst &mcInst, InternalInstruction &insn) { 241 unsigned baseRegNo; 242 243 if (insn.mode == MODE_64BIT) 244 baseRegNo = insn.prefixPresent[0x67] ? X86::ESI : X86::RSI; 245 else if (insn.mode == MODE_32BIT) 246 baseRegNo = insn.prefixPresent[0x67] ? X86::SI : X86::ESI; 247 else { 248 assert(insn.mode == MODE_16BIT); 249 baseRegNo = insn.prefixPresent[0x67] ? X86::ESI : X86::SI; 250 } 251 MCOperand baseReg = MCOperand::CreateReg(baseRegNo); 252 mcInst.addOperand(baseReg); 253 254 MCOperand segmentReg; 255 segmentReg = MCOperand::CreateReg(segmentRegnums[insn.segmentOverride]); 256 mcInst.addOperand(segmentReg); 257 return false; 258 } 259 260 /// translateDstIndex - Appends a destination index operand to an MCInst. 261 /// 262 /// @param mcInst - The MCInst to append to. 263 /// @param insn - The internal instruction. 264 265 static bool translateDstIndex(MCInst &mcInst, InternalInstruction &insn) { 266 unsigned baseRegNo; 267 268 if (insn.mode == MODE_64BIT) 269 baseRegNo = insn.prefixPresent[0x67] ? X86::EDI : X86::RDI; 270 else if (insn.mode == MODE_32BIT) 271 baseRegNo = insn.prefixPresent[0x67] ? X86::DI : X86::EDI; 272 else { 273 assert(insn.mode == MODE_16BIT); 274 baseRegNo = insn.prefixPresent[0x67] ? X86::EDI : X86::DI; 275 } 276 MCOperand baseReg = MCOperand::CreateReg(baseRegNo); 277 mcInst.addOperand(baseReg); 278 return false; 279 } 280 281 /// translateImmediate - Appends an immediate operand to an MCInst. 282 /// 283 /// @param mcInst - The MCInst to append to. 284 /// @param immediate - The immediate value to append. 285 /// @param operand - The operand, as stored in the descriptor table. 286 /// @param insn - The internal instruction. 287 static void translateImmediate(MCInst &mcInst, uint64_t immediate, 288 const OperandSpecifier &operand, 289 InternalInstruction &insn, 290 const MCDisassembler *Dis) { 291 // Sign-extend the immediate if necessary. 292 293 OperandType type = (OperandType)operand.type; 294 295 bool isBranch = false; 296 uint64_t pcrel = 0; 297 if (type == TYPE_RELv) { 298 isBranch = true; 299 pcrel = insn.startLocation + 300 insn.immediateOffset + insn.immediateSize; 301 switch (insn.displacementSize) { 302 default: 303 break; 304 case 1: 305 if(immediate & 0x80) 306 immediate |= ~(0xffull); 307 break; 308 case 2: 309 if(immediate & 0x8000) 310 immediate |= ~(0xffffull); 311 break; 312 case 4: 313 if(immediate & 0x80000000) 314 immediate |= ~(0xffffffffull); 315 break; 316 case 8: 317 break; 318 } 319 } 320 // By default sign-extend all X86 immediates based on their encoding. 321 else if (type == TYPE_IMM8 || type == TYPE_IMM16 || type == TYPE_IMM32 || 322 type == TYPE_IMM64 || type == TYPE_IMMv) { 323 uint32_t Opcode = mcInst.getOpcode(); 324 switch (operand.encoding) { 325 default: 326 break; 327 case ENCODING_IB: 328 // Special case those X86 instructions that use the imm8 as a set of 329 // bits, bit count, etc. and are not sign-extend. 330 if (Opcode != X86::BLENDPSrri && Opcode != X86::BLENDPDrri && 331 Opcode != X86::PBLENDWrri && Opcode != X86::MPSADBWrri && 332 Opcode != X86::DPPSrri && Opcode != X86::DPPDrri && 333 Opcode != X86::INSERTPSrr && Opcode != X86::VBLENDPSYrri && 334 Opcode != X86::VBLENDPSYrmi && Opcode != X86::VBLENDPDYrri && 335 Opcode != X86::VBLENDPDYrmi && Opcode != X86::VPBLENDWrri && 336 Opcode != X86::VMPSADBWrri && Opcode != X86::VDPPSYrri && 337 Opcode != X86::VDPPSYrmi && Opcode != X86::VDPPDrri && 338 Opcode != X86::VINSERTPSrr) 339 if(immediate & 0x80) 340 immediate |= ~(0xffull); 341 break; 342 case ENCODING_IW: 343 if(immediate & 0x8000) 344 immediate |= ~(0xffffull); 345 break; 346 case ENCODING_ID: 347 if(immediate & 0x80000000) 348 immediate |= ~(0xffffffffull); 349 break; 350 case ENCODING_IO: 351 break; 352 } 353 } 354 355 switch (type) { 356 case TYPE_XMM32: 357 case TYPE_XMM64: 358 case TYPE_XMM128: 359 mcInst.addOperand(MCOperand::CreateReg(X86::XMM0 + (immediate >> 4))); 360 return; 361 case TYPE_XMM256: 362 mcInst.addOperand(MCOperand::CreateReg(X86::YMM0 + (immediate >> 4))); 363 return; 364 case TYPE_XMM512: 365 mcInst.addOperand(MCOperand::CreateReg(X86::ZMM0 + (immediate >> 4))); 366 return; 367 case TYPE_REL8: 368 isBranch = true; 369 pcrel = insn.startLocation + insn.immediateOffset + insn.immediateSize; 370 if(immediate & 0x80) 371 immediate |= ~(0xffull); 372 break; 373 case TYPE_REL32: 374 case TYPE_REL64: 375 isBranch = true; 376 pcrel = insn.startLocation + insn.immediateOffset + insn.immediateSize; 377 if(immediate & 0x80000000) 378 immediate |= ~(0xffffffffull); 379 break; 380 default: 381 // operand is 64 bits wide. Do nothing. 382 break; 383 } 384 385 if(!tryAddingSymbolicOperand(immediate + pcrel, isBranch, insn.startLocation, 386 insn.immediateOffset, insn.immediateSize, 387 mcInst, Dis)) 388 mcInst.addOperand(MCOperand::CreateImm(immediate)); 389 390 if (type == TYPE_MOFFS8 || type == TYPE_MOFFS16 || 391 type == TYPE_MOFFS32 || type == TYPE_MOFFS64) { 392 MCOperand segmentReg; 393 segmentReg = MCOperand::CreateReg(segmentRegnums[insn.segmentOverride]); 394 mcInst.addOperand(segmentReg); 395 } 396 } 397 398 /// translateRMRegister - Translates a register stored in the R/M field of the 399 /// ModR/M byte to its LLVM equivalent and appends it to an MCInst. 400 /// @param mcInst - The MCInst to append to. 401 /// @param insn - The internal instruction to extract the R/M field 402 /// from. 403 /// @return - 0 on success; -1 otherwise 404 static bool translateRMRegister(MCInst &mcInst, 405 InternalInstruction &insn) { 406 if (insn.eaBase == EA_BASE_sib || insn.eaBase == EA_BASE_sib64) { 407 debug("A R/M register operand may not have a SIB byte"); 408 return true; 409 } 410 411 switch (insn.eaBase) { 412 default: 413 debug("Unexpected EA base register"); 414 return true; 415 case EA_BASE_NONE: 416 debug("EA_BASE_NONE for ModR/M base"); 417 return true; 418 #define ENTRY(x) case EA_BASE_##x: 419 ALL_EA_BASES 420 #undef ENTRY 421 debug("A R/M register operand may not have a base; " 422 "the operand must be a register."); 423 return true; 424 #define ENTRY(x) \ 425 case EA_REG_##x: \ 426 mcInst.addOperand(MCOperand::CreateReg(X86::x)); break; 427 ALL_REGS 428 #undef ENTRY 429 } 430 431 return false; 432 } 433 434 /// translateRMMemory - Translates a memory operand stored in the Mod and R/M 435 /// fields of an internal instruction (and possibly its SIB byte) to a memory 436 /// operand in LLVM's format, and appends it to an MCInst. 437 /// 438 /// @param mcInst - The MCInst to append to. 439 /// @param insn - The instruction to extract Mod, R/M, and SIB fields 440 /// from. 441 /// @return - 0 on success; nonzero otherwise 442 static bool translateRMMemory(MCInst &mcInst, InternalInstruction &insn, 443 const MCDisassembler *Dis) { 444 // Addresses in an MCInst are represented as five operands: 445 // 1. basereg (register) The R/M base, or (if there is a SIB) the 446 // SIB base 447 // 2. scaleamount (immediate) 1, or (if there is a SIB) the specified 448 // scale amount 449 // 3. indexreg (register) x86_registerNONE, or (if there is a SIB) 450 // the index (which is multiplied by the 451 // scale amount) 452 // 4. displacement (immediate) 0, or the displacement if there is one 453 // 5. segmentreg (register) x86_registerNONE for now, but could be set 454 // if we have segment overrides 455 456 MCOperand baseReg; 457 MCOperand scaleAmount; 458 MCOperand indexReg; 459 MCOperand displacement; 460 MCOperand segmentReg; 461 uint64_t pcrel = 0; 462 463 if (insn.eaBase == EA_BASE_sib || insn.eaBase == EA_BASE_sib64) { 464 if (insn.sibBase != SIB_BASE_NONE) { 465 switch (insn.sibBase) { 466 default: 467 debug("Unexpected sibBase"); 468 return true; 469 #define ENTRY(x) \ 470 case SIB_BASE_##x: \ 471 baseReg = MCOperand::CreateReg(X86::x); break; 472 ALL_SIB_BASES 473 #undef ENTRY 474 } 475 } else { 476 baseReg = MCOperand::CreateReg(0); 477 } 478 479 // Check whether we are handling VSIB addressing mode for GATHER. 480 // If sibIndex was set to SIB_INDEX_NONE, index offset is 4 and 481 // we should use SIB_INDEX_XMM4|YMM4 for VSIB. 482 // I don't see a way to get the correct IndexReg in readSIB: 483 // We can tell whether it is VSIB or SIB after instruction ID is decoded, 484 // but instruction ID may not be decoded yet when calling readSIB. 485 uint32_t Opcode = mcInst.getOpcode(); 486 bool IndexIs128 = (Opcode == X86::VGATHERDPDrm || 487 Opcode == X86::VGATHERDPDYrm || 488 Opcode == X86::VGATHERQPDrm || 489 Opcode == X86::VGATHERDPSrm || 490 Opcode == X86::VGATHERQPSrm || 491 Opcode == X86::VPGATHERDQrm || 492 Opcode == X86::VPGATHERDQYrm || 493 Opcode == X86::VPGATHERQQrm || 494 Opcode == X86::VPGATHERDDrm || 495 Opcode == X86::VPGATHERQDrm); 496 bool IndexIs256 = (Opcode == X86::VGATHERQPDYrm || 497 Opcode == X86::VGATHERDPSYrm || 498 Opcode == X86::VGATHERQPSYrm || 499 Opcode == X86::VGATHERDPDZrm || 500 Opcode == X86::VPGATHERDQZrm || 501 Opcode == X86::VPGATHERQQYrm || 502 Opcode == X86::VPGATHERDDYrm || 503 Opcode == X86::VPGATHERQDYrm); 504 bool IndexIs512 = (Opcode == X86::VGATHERQPDZrm || 505 Opcode == X86::VGATHERDPSZrm || 506 Opcode == X86::VGATHERQPSZrm || 507 Opcode == X86::VPGATHERQQZrm || 508 Opcode == X86::VPGATHERDDZrm || 509 Opcode == X86::VPGATHERQDZrm); 510 if (IndexIs128 || IndexIs256 || IndexIs512) { 511 unsigned IndexOffset = insn.sibIndex - 512 (insn.addressSize == 8 ? SIB_INDEX_RAX:SIB_INDEX_EAX); 513 SIBIndex IndexBase = IndexIs512 ? SIB_INDEX_ZMM0 : 514 IndexIs256 ? SIB_INDEX_YMM0 : SIB_INDEX_XMM0; 515 insn.sibIndex = (SIBIndex)(IndexBase + 516 (insn.sibIndex == SIB_INDEX_NONE ? 4 : IndexOffset)); 517 } 518 519 if (insn.sibIndex != SIB_INDEX_NONE) { 520 switch (insn.sibIndex) { 521 default: 522 debug("Unexpected sibIndex"); 523 return true; 524 #define ENTRY(x) \ 525 case SIB_INDEX_##x: \ 526 indexReg = MCOperand::CreateReg(X86::x); break; 527 EA_BASES_32BIT 528 EA_BASES_64BIT 529 REGS_XMM 530 REGS_YMM 531 REGS_ZMM 532 #undef ENTRY 533 } 534 } else { 535 indexReg = MCOperand::CreateReg(0); 536 } 537 538 scaleAmount = MCOperand::CreateImm(insn.sibScale); 539 } else { 540 switch (insn.eaBase) { 541 case EA_BASE_NONE: 542 if (insn.eaDisplacement == EA_DISP_NONE) { 543 debug("EA_BASE_NONE and EA_DISP_NONE for ModR/M base"); 544 return true; 545 } 546 if (insn.mode == MODE_64BIT){ 547 pcrel = insn.startLocation + 548 insn.displacementOffset + insn.displacementSize; 549 tryAddingPcLoadReferenceComment(insn.startLocation + 550 insn.displacementOffset, 551 insn.displacement + pcrel, Dis); 552 baseReg = MCOperand::CreateReg(X86::RIP); // Section 2.2.1.6 553 } 554 else 555 baseReg = MCOperand::CreateReg(0); 556 557 indexReg = MCOperand::CreateReg(0); 558 break; 559 case EA_BASE_BX_SI: 560 baseReg = MCOperand::CreateReg(X86::BX); 561 indexReg = MCOperand::CreateReg(X86::SI); 562 break; 563 case EA_BASE_BX_DI: 564 baseReg = MCOperand::CreateReg(X86::BX); 565 indexReg = MCOperand::CreateReg(X86::DI); 566 break; 567 case EA_BASE_BP_SI: 568 baseReg = MCOperand::CreateReg(X86::BP); 569 indexReg = MCOperand::CreateReg(X86::SI); 570 break; 571 case EA_BASE_BP_DI: 572 baseReg = MCOperand::CreateReg(X86::BP); 573 indexReg = MCOperand::CreateReg(X86::DI); 574 break; 575 default: 576 indexReg = MCOperand::CreateReg(0); 577 switch (insn.eaBase) { 578 default: 579 debug("Unexpected eaBase"); 580 return true; 581 // Here, we will use the fill-ins defined above. However, 582 // BX_SI, BX_DI, BP_SI, and BP_DI are all handled above and 583 // sib and sib64 were handled in the top-level if, so they're only 584 // placeholders to keep the compiler happy. 585 #define ENTRY(x) \ 586 case EA_BASE_##x: \ 587 baseReg = MCOperand::CreateReg(X86::x); break; 588 ALL_EA_BASES 589 #undef ENTRY 590 #define ENTRY(x) case EA_REG_##x: 591 ALL_REGS 592 #undef ENTRY 593 debug("A R/M memory operand may not be a register; " 594 "the base field must be a base."); 595 return true; 596 } 597 } 598 599 scaleAmount = MCOperand::CreateImm(1); 600 } 601 602 displacement = MCOperand::CreateImm(insn.displacement); 603 604 segmentReg = MCOperand::CreateReg(segmentRegnums[insn.segmentOverride]); 605 606 mcInst.addOperand(baseReg); 607 mcInst.addOperand(scaleAmount); 608 mcInst.addOperand(indexReg); 609 if(!tryAddingSymbolicOperand(insn.displacement + pcrel, false, 610 insn.startLocation, insn.displacementOffset, 611 insn.displacementSize, mcInst, Dis)) 612 mcInst.addOperand(displacement); 613 mcInst.addOperand(segmentReg); 614 return false; 615 } 616 617 /// translateRM - Translates an operand stored in the R/M (and possibly SIB) 618 /// byte of an instruction to LLVM form, and appends it to an MCInst. 619 /// 620 /// @param mcInst - The MCInst to append to. 621 /// @param operand - The operand, as stored in the descriptor table. 622 /// @param insn - The instruction to extract Mod, R/M, and SIB fields 623 /// from. 624 /// @return - 0 on success; nonzero otherwise 625 static bool translateRM(MCInst &mcInst, const OperandSpecifier &operand, 626 InternalInstruction &insn, const MCDisassembler *Dis) { 627 switch (operand.type) { 628 default: 629 debug("Unexpected type for a R/M operand"); 630 return true; 631 case TYPE_R8: 632 case TYPE_R16: 633 case TYPE_R32: 634 case TYPE_R64: 635 case TYPE_Rv: 636 case TYPE_MM: 637 case TYPE_MM32: 638 case TYPE_MM64: 639 case TYPE_XMM: 640 case TYPE_XMM32: 641 case TYPE_XMM64: 642 case TYPE_XMM128: 643 case TYPE_XMM256: 644 case TYPE_XMM512: 645 case TYPE_VK1: 646 case TYPE_VK8: 647 case TYPE_VK16: 648 case TYPE_DEBUGREG: 649 case TYPE_CONTROLREG: 650 return translateRMRegister(mcInst, insn); 651 case TYPE_M: 652 case TYPE_M8: 653 case TYPE_M16: 654 case TYPE_M32: 655 case TYPE_M64: 656 case TYPE_M128: 657 case TYPE_M256: 658 case TYPE_M512: 659 case TYPE_Mv: 660 case TYPE_M32FP: 661 case TYPE_M64FP: 662 case TYPE_M80FP: 663 case TYPE_M16INT: 664 case TYPE_M32INT: 665 case TYPE_M64INT: 666 case TYPE_M1616: 667 case TYPE_M1632: 668 case TYPE_M1664: 669 case TYPE_LEA: 670 return translateRMMemory(mcInst, insn, Dis); 671 } 672 } 673 674 /// translateFPRegister - Translates a stack position on the FPU stack to its 675 /// LLVM form, and appends it to an MCInst. 676 /// 677 /// @param mcInst - The MCInst to append to. 678 /// @param stackPos - The stack position to translate. 679 static void translateFPRegister(MCInst &mcInst, 680 uint8_t stackPos) { 681 mcInst.addOperand(MCOperand::CreateReg(X86::ST0 + stackPos)); 682 } 683 684 /// translateMaskRegister - Translates a 3-bit mask register number to 685 /// LLVM form, and appends it to an MCInst. 686 /// 687 /// @param mcInst - The MCInst to append to. 688 /// @param maskRegNum - Number of mask register from 0 to 7. 689 /// @return - false on success; true otherwise. 690 static bool translateMaskRegister(MCInst &mcInst, 691 uint8_t maskRegNum) { 692 if (maskRegNum >= 8) { 693 debug("Invalid mask register number"); 694 return true; 695 } 696 697 mcInst.addOperand(MCOperand::CreateReg(X86::K0 + maskRegNum)); 698 return false; 699 } 700 701 /// translateOperand - Translates an operand stored in an internal instruction 702 /// to LLVM's format and appends it to an MCInst. 703 /// 704 /// @param mcInst - The MCInst to append to. 705 /// @param operand - The operand, as stored in the descriptor table. 706 /// @param insn - The internal instruction. 707 /// @return - false on success; true otherwise. 708 static bool translateOperand(MCInst &mcInst, const OperandSpecifier &operand, 709 InternalInstruction &insn, 710 const MCDisassembler *Dis) { 711 switch (operand.encoding) { 712 default: 713 debug("Unhandled operand encoding during translation"); 714 return true; 715 case ENCODING_REG: 716 translateRegister(mcInst, insn.reg); 717 return false; 718 case ENCODING_WRITEMASK: 719 return translateMaskRegister(mcInst, insn.writemask); 720 case ENCODING_RM: 721 return translateRM(mcInst, operand, insn, Dis); 722 case ENCODING_CB: 723 case ENCODING_CW: 724 case ENCODING_CD: 725 case ENCODING_CP: 726 case ENCODING_CO: 727 case ENCODING_CT: 728 debug("Translation of code offsets isn't supported."); 729 return true; 730 case ENCODING_IB: 731 case ENCODING_IW: 732 case ENCODING_ID: 733 case ENCODING_IO: 734 case ENCODING_Iv: 735 case ENCODING_Ia: 736 translateImmediate(mcInst, 737 insn.immediates[insn.numImmediatesTranslated++], 738 operand, 739 insn, 740 Dis); 741 return false; 742 case ENCODING_SI: 743 return translateSrcIndex(mcInst, insn); 744 case ENCODING_DI: 745 return translateDstIndex(mcInst, insn); 746 case ENCODING_RB: 747 case ENCODING_RW: 748 case ENCODING_RD: 749 case ENCODING_RO: 750 case ENCODING_Rv: 751 translateRegister(mcInst, insn.opcodeRegister); 752 return false; 753 case ENCODING_FP: 754 translateFPRegister(mcInst, insn.modRM & 7); 755 return false; 756 case ENCODING_VVVV: 757 translateRegister(mcInst, insn.vvvv); 758 return false; 759 case ENCODING_DUP: 760 return translateOperand(mcInst, insn.operands[operand.type - TYPE_DUP0], 761 insn, Dis); 762 } 763 } 764 765 /// translateInstruction - Translates an internal instruction and all its 766 /// operands to an MCInst. 767 /// 768 /// @param mcInst - The MCInst to populate with the instruction's data. 769 /// @param insn - The internal instruction. 770 /// @return - false on success; true otherwise. 771 static bool translateInstruction(MCInst &mcInst, 772 InternalInstruction &insn, 773 const MCDisassembler *Dis) { 774 if (!insn.spec) { 775 debug("Instruction has no specification"); 776 return true; 777 } 778 779 mcInst.setOpcode(insn.instructionID); 780 // If when reading the prefix bytes we determined the overlapping 0xf2 or 0xf3 781 // prefix bytes should be disassembled as xrelease and xacquire then set the 782 // opcode to those instead of the rep and repne opcodes. 783 if (insn.xAcquireRelease) { 784 if(mcInst.getOpcode() == X86::REP_PREFIX) 785 mcInst.setOpcode(X86::XRELEASE_PREFIX); 786 else if(mcInst.getOpcode() == X86::REPNE_PREFIX) 787 mcInst.setOpcode(X86::XACQUIRE_PREFIX); 788 } 789 790 insn.numImmediatesTranslated = 0; 791 792 for (const auto &Op : insn.operands) { 793 if (Op.encoding != ENCODING_NONE) { 794 if (translateOperand(mcInst, Op, insn, Dis)) { 795 return true; 796 } 797 } 798 } 799 800 return false; 801 } 802 803 static MCDisassembler *createX86Disassembler(const Target &T, 804 const MCSubtargetInfo &STI, 805 MCContext &Ctx) { 806 std::unique_ptr<const MCInstrInfo> MII(T.createMCInstrInfo()); 807 return new X86Disassembler::X86GenericDisassembler(STI, Ctx, std::move(MII)); 808 } 809 810 extern "C" void LLVMInitializeX86Disassembler() { 811 // Register the disassembler. 812 TargetRegistry::RegisterMCDisassembler(TheX86_32Target, 813 createX86Disassembler); 814 TargetRegistry::RegisterMCDisassembler(TheX86_64Target, 815 createX86Disassembler); 816 } 817