1 //===- X86Disassembler.cpp - Disassembler for x86 and x86_64 ----*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file is part of the X86 Disassembler. 11 // It contains code to translate the data produced by the decoder into 12 // MCInsts. 13 // Documentation for the disassembler can be found in X86Disassembler.h. 14 // 15 //===----------------------------------------------------------------------===// 16 17 #include "X86Disassembler.h" 18 #include "X86DisassemblerDecoder.h" 19 20 #include "llvm/MC/EDInstInfo.h" 21 #include "llvm/MC/MCDisassembler.h" 22 #include "llvm/MC/MCDisassembler.h" 23 #include "llvm/MC/MCInst.h" 24 #include "llvm/MC/MCSubtargetInfo.h" 25 #include "llvm/Support/Debug.h" 26 #include "llvm/Support/MemoryObject.h" 27 #include "llvm/Support/TargetRegistry.h" 28 #include "llvm/Support/raw_ostream.h" 29 30 #define GET_REGINFO_ENUM 31 #include "X86GenRegisterInfo.inc" 32 #define GET_INSTRINFO_ENUM 33 #include "X86GenInstrInfo.inc" 34 #include "X86GenEDInfo.inc" 35 36 using namespace llvm; 37 using namespace llvm::X86Disassembler; 38 39 void x86DisassemblerDebug(const char *file, 40 unsigned line, 41 const char *s) { 42 dbgs() << file << ":" << line << ": " << s; 43 } 44 45 #define debug(s) DEBUG(x86DisassemblerDebug(__FILE__, __LINE__, s)); 46 47 namespace llvm { 48 49 // Fill-ins to make the compiler happy. These constants are never actually 50 // assigned; they are just filler to make an automatically-generated switch 51 // statement work. 52 namespace X86 { 53 enum { 54 BX_SI = 500, 55 BX_DI = 501, 56 BP_SI = 502, 57 BP_DI = 503, 58 sib = 504, 59 sib64 = 505 60 }; 61 } 62 63 extern Target TheX86_32Target, TheX86_64Target; 64 65 } 66 67 static bool translateInstruction(MCInst &target, 68 InternalInstruction &source); 69 70 X86GenericDisassembler::X86GenericDisassembler(const MCSubtargetInfo &STI, DisassemblerMode mode) : 71 MCDisassembler(STI), 72 fMode(mode) { 73 } 74 75 X86GenericDisassembler::~X86GenericDisassembler() { 76 } 77 78 EDInstInfo *X86GenericDisassembler::getEDInfo() const { 79 return instInfoX86; 80 } 81 82 /// regionReader - a callback function that wraps the readByte method from 83 /// MemoryObject. 84 /// 85 /// @param arg - The generic callback parameter. In this case, this should 86 /// be a pointer to a MemoryObject. 87 /// @param byte - A pointer to the byte to be read. 88 /// @param address - The address to be read. 89 static int regionReader(void* arg, uint8_t* byte, uint64_t address) { 90 MemoryObject* region = static_cast<MemoryObject*>(arg); 91 return region->readByte(address, byte); 92 } 93 94 /// logger - a callback function that wraps the operator<< method from 95 /// raw_ostream. 96 /// 97 /// @param arg - The generic callback parameter. This should be a pointe 98 /// to a raw_ostream. 99 /// @param log - A string to be logged. logger() adds a newline. 100 static void logger(void* arg, const char* log) { 101 if (!arg) 102 return; 103 104 raw_ostream &vStream = *(static_cast<raw_ostream*>(arg)); 105 vStream << log << "\n"; 106 } 107 108 // 109 // Public interface for the disassembler 110 // 111 112 MCDisassembler::DecodeStatus 113 X86GenericDisassembler::getInstruction(MCInst &instr, 114 uint64_t &size, 115 const MemoryObject ®ion, 116 uint64_t address, 117 raw_ostream &vStream, 118 raw_ostream &cStream) const { 119 InternalInstruction internalInstr; 120 121 dlog_t loggerFn = logger; 122 if (&vStream == &nulls()) 123 loggerFn = 0; // Disable logging completely if it's going to nulls(). 124 125 int ret = decodeInstruction(&internalInstr, 126 regionReader, 127 (void*)®ion, 128 loggerFn, 129 (void*)&vStream, 130 address, 131 fMode); 132 133 if (ret) { 134 size = internalInstr.readerCursor - address; 135 return Fail; 136 } 137 else { 138 size = internalInstr.length; 139 return (!translateInstruction(instr, internalInstr)) ? Success : Fail; 140 } 141 } 142 143 // 144 // Private code that translates from struct InternalInstructions to MCInsts. 145 // 146 147 /// translateRegister - Translates an internal register to the appropriate LLVM 148 /// register, and appends it as an operand to an MCInst. 149 /// 150 /// @param mcInst - The MCInst to append to. 151 /// @param reg - The Reg to append. 152 static void translateRegister(MCInst &mcInst, Reg reg) { 153 #define ENTRY(x) X86::x, 154 uint8_t llvmRegnums[] = { 155 ALL_REGS 156 0 157 }; 158 #undef ENTRY 159 160 uint8_t llvmRegnum = llvmRegnums[reg]; 161 mcInst.addOperand(MCOperand::CreateReg(llvmRegnum)); 162 } 163 164 /// translateImmediate - Appends an immediate operand to an MCInst. 165 /// 166 /// @param mcInst - The MCInst to append to. 167 /// @param immediate - The immediate value to append. 168 /// @param operand - The operand, as stored in the descriptor table. 169 /// @param insn - The internal instruction. 170 static void translateImmediate(MCInst &mcInst, uint64_t immediate, 171 const OperandSpecifier &operand, 172 InternalInstruction &insn) { 173 // Sign-extend the immediate if necessary. 174 175 OperandType type = operand.type; 176 177 if (type == TYPE_RELv) { 178 switch (insn.displacementSize) { 179 default: 180 break; 181 case 1: 182 type = TYPE_MOFFS8; 183 break; 184 case 2: 185 type = TYPE_MOFFS16; 186 break; 187 case 4: 188 type = TYPE_MOFFS32; 189 break; 190 case 8: 191 type = TYPE_MOFFS64; 192 break; 193 } 194 } 195 // By default sign-extend all X86 immediates based on their encoding. 196 else if (type == TYPE_IMM8 || type == TYPE_IMM16 || type == TYPE_IMM32 || 197 type == TYPE_IMM64) { 198 uint32_t Opcode = mcInst.getOpcode(); 199 switch (operand.encoding) { 200 default: 201 break; 202 case ENCODING_IB: 203 // Special case those X86 instructions that use the imm8 as a set of 204 // bits, bit count, etc. and are not sign-extend. 205 if (Opcode != X86::BLENDPSrri && Opcode != X86::BLENDPDrri && 206 Opcode != X86::PBLENDWrri && Opcode != X86::MPSADBWrri && 207 Opcode != X86::DPPSrri && Opcode != X86::DPPDrri && 208 Opcode != X86::INSERTPSrr && Opcode != X86::VBLENDPSYrri && 209 Opcode != X86::VBLENDPSYrmi && Opcode != X86::VBLENDPDYrri && 210 Opcode != X86::VBLENDPDYrmi && Opcode != X86::VPBLENDWrri && 211 Opcode != X86::VMPSADBWrri && Opcode != X86::VDPPSYrri && 212 Opcode != X86::VDPPSYrmi && Opcode != X86::VDPPDrri && 213 Opcode != X86::VINSERTPSrr) 214 type = TYPE_MOFFS8; 215 break; 216 case ENCODING_IW: 217 type = TYPE_MOFFS16; 218 break; 219 case ENCODING_ID: 220 type = TYPE_MOFFS32; 221 break; 222 case ENCODING_IO: 223 type = TYPE_MOFFS64; 224 break; 225 } 226 } 227 228 switch (type) { 229 case TYPE_XMM128: 230 mcInst.addOperand(MCOperand::CreateReg(X86::XMM0 + (immediate >> 4))); 231 return; 232 case TYPE_XMM256: 233 mcInst.addOperand(MCOperand::CreateReg(X86::YMM0 + (immediate >> 4))); 234 return; 235 case TYPE_MOFFS8: 236 case TYPE_REL8: 237 if(immediate & 0x80) 238 immediate |= ~(0xffull); 239 break; 240 case TYPE_MOFFS16: 241 if(immediate & 0x8000) 242 immediate |= ~(0xffffull); 243 break; 244 case TYPE_MOFFS32: 245 case TYPE_REL32: 246 case TYPE_REL64: 247 if(immediate & 0x80000000) 248 immediate |= ~(0xffffffffull); 249 break; 250 case TYPE_MOFFS64: 251 default: 252 // operand is 64 bits wide. Do nothing. 253 break; 254 } 255 256 mcInst.addOperand(MCOperand::CreateImm(immediate)); 257 } 258 259 /// translateRMRegister - Translates a register stored in the R/M field of the 260 /// ModR/M byte to its LLVM equivalent and appends it to an MCInst. 261 /// @param mcInst - The MCInst to append to. 262 /// @param insn - The internal instruction to extract the R/M field 263 /// from. 264 /// @return - 0 on success; -1 otherwise 265 static bool translateRMRegister(MCInst &mcInst, 266 InternalInstruction &insn) { 267 if (insn.eaBase == EA_BASE_sib || insn.eaBase == EA_BASE_sib64) { 268 debug("A R/M register operand may not have a SIB byte"); 269 return true; 270 } 271 272 switch (insn.eaBase) { 273 default: 274 debug("Unexpected EA base register"); 275 return true; 276 case EA_BASE_NONE: 277 debug("EA_BASE_NONE for ModR/M base"); 278 return true; 279 #define ENTRY(x) case EA_BASE_##x: 280 ALL_EA_BASES 281 #undef ENTRY 282 debug("A R/M register operand may not have a base; " 283 "the operand must be a register."); 284 return true; 285 #define ENTRY(x) \ 286 case EA_REG_##x: \ 287 mcInst.addOperand(MCOperand::CreateReg(X86::x)); break; 288 ALL_REGS 289 #undef ENTRY 290 } 291 292 return false; 293 } 294 295 /// translateRMMemory - Translates a memory operand stored in the Mod and R/M 296 /// fields of an internal instruction (and possibly its SIB byte) to a memory 297 /// operand in LLVM's format, and appends it to an MCInst. 298 /// 299 /// @param mcInst - The MCInst to append to. 300 /// @param insn - The instruction to extract Mod, R/M, and SIB fields 301 /// from. 302 /// @return - 0 on success; nonzero otherwise 303 static bool translateRMMemory(MCInst &mcInst, InternalInstruction &insn) { 304 // Addresses in an MCInst are represented as five operands: 305 // 1. basereg (register) The R/M base, or (if there is a SIB) the 306 // SIB base 307 // 2. scaleamount (immediate) 1, or (if there is a SIB) the specified 308 // scale amount 309 // 3. indexreg (register) x86_registerNONE, or (if there is a SIB) 310 // the index (which is multiplied by the 311 // scale amount) 312 // 4. displacement (immediate) 0, or the displacement if there is one 313 // 5. segmentreg (register) x86_registerNONE for now, but could be set 314 // if we have segment overrides 315 316 MCOperand baseReg; 317 MCOperand scaleAmount; 318 MCOperand indexReg; 319 MCOperand displacement; 320 MCOperand segmentReg; 321 322 if (insn.eaBase == EA_BASE_sib || insn.eaBase == EA_BASE_sib64) { 323 if (insn.sibBase != SIB_BASE_NONE) { 324 switch (insn.sibBase) { 325 default: 326 debug("Unexpected sibBase"); 327 return true; 328 #define ENTRY(x) \ 329 case SIB_BASE_##x: \ 330 baseReg = MCOperand::CreateReg(X86::x); break; 331 ALL_SIB_BASES 332 #undef ENTRY 333 } 334 } else { 335 baseReg = MCOperand::CreateReg(0); 336 } 337 338 if (insn.sibIndex != SIB_INDEX_NONE) { 339 switch (insn.sibIndex) { 340 default: 341 debug("Unexpected sibIndex"); 342 return true; 343 #define ENTRY(x) \ 344 case SIB_INDEX_##x: \ 345 indexReg = MCOperand::CreateReg(X86::x); break; 346 EA_BASES_32BIT 347 EA_BASES_64BIT 348 #undef ENTRY 349 } 350 } else { 351 indexReg = MCOperand::CreateReg(0); 352 } 353 354 scaleAmount = MCOperand::CreateImm(insn.sibScale); 355 } else { 356 switch (insn.eaBase) { 357 case EA_BASE_NONE: 358 if (insn.eaDisplacement == EA_DISP_NONE) { 359 debug("EA_BASE_NONE and EA_DISP_NONE for ModR/M base"); 360 return true; 361 } 362 if (insn.mode == MODE_64BIT) 363 baseReg = MCOperand::CreateReg(X86::RIP); // Section 2.2.1.6 364 else 365 baseReg = MCOperand::CreateReg(0); 366 367 indexReg = MCOperand::CreateReg(0); 368 break; 369 case EA_BASE_BX_SI: 370 baseReg = MCOperand::CreateReg(X86::BX); 371 indexReg = MCOperand::CreateReg(X86::SI); 372 break; 373 case EA_BASE_BX_DI: 374 baseReg = MCOperand::CreateReg(X86::BX); 375 indexReg = MCOperand::CreateReg(X86::DI); 376 break; 377 case EA_BASE_BP_SI: 378 baseReg = MCOperand::CreateReg(X86::BP); 379 indexReg = MCOperand::CreateReg(X86::SI); 380 break; 381 case EA_BASE_BP_DI: 382 baseReg = MCOperand::CreateReg(X86::BP); 383 indexReg = MCOperand::CreateReg(X86::DI); 384 break; 385 default: 386 indexReg = MCOperand::CreateReg(0); 387 switch (insn.eaBase) { 388 default: 389 debug("Unexpected eaBase"); 390 return true; 391 // Here, we will use the fill-ins defined above. However, 392 // BX_SI, BX_DI, BP_SI, and BP_DI are all handled above and 393 // sib and sib64 were handled in the top-level if, so they're only 394 // placeholders to keep the compiler happy. 395 #define ENTRY(x) \ 396 case EA_BASE_##x: \ 397 baseReg = MCOperand::CreateReg(X86::x); break; 398 ALL_EA_BASES 399 #undef ENTRY 400 #define ENTRY(x) case EA_REG_##x: 401 ALL_REGS 402 #undef ENTRY 403 debug("A R/M memory operand may not be a register; " 404 "the base field must be a base."); 405 return true; 406 } 407 } 408 409 scaleAmount = MCOperand::CreateImm(1); 410 } 411 412 displacement = MCOperand::CreateImm(insn.displacement); 413 414 static const uint8_t segmentRegnums[SEG_OVERRIDE_max] = { 415 0, // SEG_OVERRIDE_NONE 416 X86::CS, 417 X86::SS, 418 X86::DS, 419 X86::ES, 420 X86::FS, 421 X86::GS 422 }; 423 424 segmentReg = MCOperand::CreateReg(segmentRegnums[insn.segmentOverride]); 425 426 mcInst.addOperand(baseReg); 427 mcInst.addOperand(scaleAmount); 428 mcInst.addOperand(indexReg); 429 mcInst.addOperand(displacement); 430 mcInst.addOperand(segmentReg); 431 return false; 432 } 433 434 /// translateRM - Translates an operand stored in the R/M (and possibly SIB) 435 /// byte of an instruction to LLVM form, and appends it to an MCInst. 436 /// 437 /// @param mcInst - The MCInst to append to. 438 /// @param operand - The operand, as stored in the descriptor table. 439 /// @param insn - The instruction to extract Mod, R/M, and SIB fields 440 /// from. 441 /// @return - 0 on success; nonzero otherwise 442 static bool translateRM(MCInst &mcInst, const OperandSpecifier &operand, 443 InternalInstruction &insn) { 444 switch (operand.type) { 445 default: 446 debug("Unexpected type for a R/M operand"); 447 return true; 448 case TYPE_R8: 449 case TYPE_R16: 450 case TYPE_R32: 451 case TYPE_R64: 452 case TYPE_Rv: 453 case TYPE_MM: 454 case TYPE_MM32: 455 case TYPE_MM64: 456 case TYPE_XMM: 457 case TYPE_XMM32: 458 case TYPE_XMM64: 459 case TYPE_XMM128: 460 case TYPE_XMM256: 461 case TYPE_DEBUGREG: 462 case TYPE_CONTROLREG: 463 return translateRMRegister(mcInst, insn); 464 case TYPE_M: 465 case TYPE_M8: 466 case TYPE_M16: 467 case TYPE_M32: 468 case TYPE_M64: 469 case TYPE_M128: 470 case TYPE_M256: 471 case TYPE_M512: 472 case TYPE_Mv: 473 case TYPE_M32FP: 474 case TYPE_M64FP: 475 case TYPE_M80FP: 476 case TYPE_M16INT: 477 case TYPE_M32INT: 478 case TYPE_M64INT: 479 case TYPE_M1616: 480 case TYPE_M1632: 481 case TYPE_M1664: 482 case TYPE_LEA: 483 return translateRMMemory(mcInst, insn); 484 } 485 } 486 487 /// translateFPRegister - Translates a stack position on the FPU stack to its 488 /// LLVM form, and appends it to an MCInst. 489 /// 490 /// @param mcInst - The MCInst to append to. 491 /// @param stackPos - The stack position to translate. 492 /// @return - 0 on success; nonzero otherwise. 493 static bool translateFPRegister(MCInst &mcInst, 494 uint8_t stackPos) { 495 if (stackPos >= 8) { 496 debug("Invalid FP stack position"); 497 return true; 498 } 499 500 mcInst.addOperand(MCOperand::CreateReg(X86::ST0 + stackPos)); 501 502 return false; 503 } 504 505 /// translateOperand - Translates an operand stored in an internal instruction 506 /// to LLVM's format and appends it to an MCInst. 507 /// 508 /// @param mcInst - The MCInst to append to. 509 /// @param operand - The operand, as stored in the descriptor table. 510 /// @param insn - The internal instruction. 511 /// @return - false on success; true otherwise. 512 static bool translateOperand(MCInst &mcInst, const OperandSpecifier &operand, 513 InternalInstruction &insn) { 514 switch (operand.encoding) { 515 default: 516 debug("Unhandled operand encoding during translation"); 517 return true; 518 case ENCODING_REG: 519 translateRegister(mcInst, insn.reg); 520 return false; 521 case ENCODING_RM: 522 return translateRM(mcInst, operand, insn); 523 case ENCODING_CB: 524 case ENCODING_CW: 525 case ENCODING_CD: 526 case ENCODING_CP: 527 case ENCODING_CO: 528 case ENCODING_CT: 529 debug("Translation of code offsets isn't supported."); 530 return true; 531 case ENCODING_IB: 532 case ENCODING_IW: 533 case ENCODING_ID: 534 case ENCODING_IO: 535 case ENCODING_Iv: 536 case ENCODING_Ia: 537 translateImmediate(mcInst, 538 insn.immediates[insn.numImmediatesTranslated++], 539 operand, 540 insn); 541 return false; 542 case ENCODING_RB: 543 case ENCODING_RW: 544 case ENCODING_RD: 545 case ENCODING_RO: 546 translateRegister(mcInst, insn.opcodeRegister); 547 return false; 548 case ENCODING_I: 549 return translateFPRegister(mcInst, insn.opcodeModifier); 550 case ENCODING_Rv: 551 translateRegister(mcInst, insn.opcodeRegister); 552 return false; 553 case ENCODING_VVVV: 554 translateRegister(mcInst, insn.vvvv); 555 return false; 556 case ENCODING_DUP: 557 return translateOperand(mcInst, 558 insn.spec->operands[operand.type - TYPE_DUP0], 559 insn); 560 } 561 } 562 563 /// translateInstruction - Translates an internal instruction and all its 564 /// operands to an MCInst. 565 /// 566 /// @param mcInst - The MCInst to populate with the instruction's data. 567 /// @param insn - The internal instruction. 568 /// @return - false on success; true otherwise. 569 static bool translateInstruction(MCInst &mcInst, 570 InternalInstruction &insn) { 571 if (!insn.spec) { 572 debug("Instruction has no specification"); 573 return true; 574 } 575 576 mcInst.setOpcode(insn.instructionID); 577 578 int index; 579 580 insn.numImmediatesTranslated = 0; 581 582 for (index = 0; index < X86_MAX_OPERANDS; ++index) { 583 if (insn.spec->operands[index].encoding != ENCODING_NONE) { 584 if (translateOperand(mcInst, insn.spec->operands[index], insn)) { 585 return true; 586 } 587 } 588 } 589 590 return false; 591 } 592 593 static MCDisassembler *createX86_32Disassembler(const Target &T, const MCSubtargetInfo &STI) { 594 return new X86Disassembler::X86_32Disassembler(STI); 595 } 596 597 static MCDisassembler *createX86_64Disassembler(const Target &T, const MCSubtargetInfo &STI) { 598 return new X86Disassembler::X86_64Disassembler(STI); 599 } 600 601 extern "C" void LLVMInitializeX86Disassembler() { 602 // Register the disassembler. 603 TargetRegistry::RegisterMCDisassembler(TheX86_32Target, 604 createX86_32Disassembler); 605 TargetRegistry::RegisterMCDisassembler(TheX86_64Target, 606 createX86_64Disassembler); 607 } 608