Home | History | Annotate | Download | only in Disassembler
      1 //===- X86Disassembler.cpp - Disassembler for x86 and x86_64 ----*- C++ -*-===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // This file is part of the X86 Disassembler.
     11 // It contains code to translate the data produced by the decoder into
     12 //  MCInsts.
     13 // Documentation for the disassembler can be found in X86Disassembler.h.
     14 //
     15 //===----------------------------------------------------------------------===//
     16 
     17 #include "X86Disassembler.h"
     18 #include "X86DisassemblerDecoder.h"
     19 
     20 #include "llvm/MC/EDInstInfo.h"
     21 #include "llvm/MC/MCDisassembler.h"
     22 #include "llvm/MC/MCDisassembler.h"
     23 #include "llvm/MC/MCInst.h"
     24 #include "llvm/MC/MCSubtargetInfo.h"
     25 #include "llvm/Support/Debug.h"
     26 #include "llvm/Support/MemoryObject.h"
     27 #include "llvm/Support/TargetRegistry.h"
     28 #include "llvm/Support/raw_ostream.h"
     29 
     30 #define GET_REGINFO_ENUM
     31 #include "X86GenRegisterInfo.inc"
     32 #define GET_INSTRINFO_ENUM
     33 #include "X86GenInstrInfo.inc"
     34 #include "X86GenEDInfo.inc"
     35 
     36 using namespace llvm;
     37 using namespace llvm::X86Disassembler;
     38 
     39 void x86DisassemblerDebug(const char *file,
     40                           unsigned line,
     41                           const char *s) {
     42   dbgs() << file << ":" << line << ": " << s;
     43 }
     44 
     45 #define debug(s) DEBUG(x86DisassemblerDebug(__FILE__, __LINE__, s));
     46 
     47 namespace llvm {
     48 
     49 // Fill-ins to make the compiler happy.  These constants are never actually
     50 //   assigned; they are just filler to make an automatically-generated switch
     51 //   statement work.
     52 namespace X86 {
     53   enum {
     54     BX_SI = 500,
     55     BX_DI = 501,
     56     BP_SI = 502,
     57     BP_DI = 503,
     58     sib   = 504,
     59     sib64 = 505
     60   };
     61 }
     62 
     63 extern Target TheX86_32Target, TheX86_64Target;
     64 
     65 }
     66 
     67 static bool translateInstruction(MCInst &target,
     68                                 InternalInstruction &source);
     69 
     70 X86GenericDisassembler::X86GenericDisassembler(const MCSubtargetInfo &STI, DisassemblerMode mode) :
     71     MCDisassembler(STI),
     72     fMode(mode) {
     73 }
     74 
     75 X86GenericDisassembler::~X86GenericDisassembler() {
     76 }
     77 
     78 EDInstInfo *X86GenericDisassembler::getEDInfo() const {
     79   return instInfoX86;
     80 }
     81 
     82 /// regionReader - a callback function that wraps the readByte method from
     83 ///   MemoryObject.
     84 ///
     85 /// @param arg      - The generic callback parameter.  In this case, this should
     86 ///                   be a pointer to a MemoryObject.
     87 /// @param byte     - A pointer to the byte to be read.
     88 /// @param address  - The address to be read.
     89 static int regionReader(void* arg, uint8_t* byte, uint64_t address) {
     90   MemoryObject* region = static_cast<MemoryObject*>(arg);
     91   return region->readByte(address, byte);
     92 }
     93 
     94 /// logger - a callback function that wraps the operator<< method from
     95 ///   raw_ostream.
     96 ///
     97 /// @param arg      - The generic callback parameter.  This should be a pointe
     98 ///                   to a raw_ostream.
     99 /// @param log      - A string to be logged.  logger() adds a newline.
    100 static void logger(void* arg, const char* log) {
    101   if (!arg)
    102     return;
    103 
    104   raw_ostream &vStream = *(static_cast<raw_ostream*>(arg));
    105   vStream << log << "\n";
    106 }
    107 
    108 //
    109 // Public interface for the disassembler
    110 //
    111 
    112 MCDisassembler::DecodeStatus
    113 X86GenericDisassembler::getInstruction(MCInst &instr,
    114                                        uint64_t &size,
    115                                        const MemoryObject &region,
    116                                        uint64_t address,
    117                                        raw_ostream &vStream,
    118                                        raw_ostream &cStream) const {
    119   InternalInstruction internalInstr;
    120 
    121   dlog_t loggerFn = logger;
    122   if (&vStream == &nulls())
    123     loggerFn = 0; // Disable logging completely if it's going to nulls().
    124 
    125   int ret = decodeInstruction(&internalInstr,
    126                               regionReader,
    127                               (void*)&region,
    128                               loggerFn,
    129                               (void*)&vStream,
    130                               address,
    131                               fMode);
    132 
    133   if (ret) {
    134     size = internalInstr.readerCursor - address;
    135     return Fail;
    136   }
    137   else {
    138     size = internalInstr.length;
    139     return (!translateInstruction(instr, internalInstr)) ? Success : Fail;
    140   }
    141 }
    142 
    143 //
    144 // Private code that translates from struct InternalInstructions to MCInsts.
    145 //
    146 
    147 /// translateRegister - Translates an internal register to the appropriate LLVM
    148 ///   register, and appends it as an operand to an MCInst.
    149 ///
    150 /// @param mcInst     - The MCInst to append to.
    151 /// @param reg        - The Reg to append.
    152 static void translateRegister(MCInst &mcInst, Reg reg) {
    153 #define ENTRY(x) X86::x,
    154   uint8_t llvmRegnums[] = {
    155     ALL_REGS
    156     0
    157   };
    158 #undef ENTRY
    159 
    160   uint8_t llvmRegnum = llvmRegnums[reg];
    161   mcInst.addOperand(MCOperand::CreateReg(llvmRegnum));
    162 }
    163 
    164 /// translateImmediate  - Appends an immediate operand to an MCInst.
    165 ///
    166 /// @param mcInst       - The MCInst to append to.
    167 /// @param immediate    - The immediate value to append.
    168 /// @param operand      - The operand, as stored in the descriptor table.
    169 /// @param insn         - The internal instruction.
    170 static void translateImmediate(MCInst &mcInst, uint64_t immediate,
    171                                const OperandSpecifier &operand,
    172                                InternalInstruction &insn) {
    173   // Sign-extend the immediate if necessary.
    174 
    175   OperandType type = operand.type;
    176 
    177   if (type == TYPE_RELv) {
    178     switch (insn.displacementSize) {
    179     default:
    180       break;
    181     case 1:
    182       type = TYPE_MOFFS8;
    183       break;
    184     case 2:
    185       type = TYPE_MOFFS16;
    186       break;
    187     case 4:
    188       type = TYPE_MOFFS32;
    189       break;
    190     case 8:
    191       type = TYPE_MOFFS64;
    192       break;
    193     }
    194   }
    195   // By default sign-extend all X86 immediates based on their encoding.
    196   else if (type == TYPE_IMM8 || type == TYPE_IMM16 || type == TYPE_IMM32 ||
    197            type == TYPE_IMM64) {
    198     uint32_t Opcode = mcInst.getOpcode();
    199     switch (operand.encoding) {
    200     default:
    201       break;
    202     case ENCODING_IB:
    203       // Special case those X86 instructions that use the imm8 as a set of
    204       // bits, bit count, etc. and are not sign-extend.
    205       if (Opcode != X86::BLENDPSrri && Opcode != X86::BLENDPDrri &&
    206 	  Opcode != X86::PBLENDWrri && Opcode != X86::MPSADBWrri &&
    207 	  Opcode != X86::DPPSrri && Opcode != X86::DPPDrri &&
    208 	  Opcode != X86::INSERTPSrr && Opcode != X86::VBLENDPSYrri &&
    209 	  Opcode != X86::VBLENDPSYrmi && Opcode != X86::VBLENDPDYrri &&
    210 	  Opcode != X86::VBLENDPDYrmi && Opcode != X86::VPBLENDWrri &&
    211 	  Opcode != X86::VMPSADBWrri && Opcode != X86::VDPPSYrri &&
    212 	  Opcode != X86::VDPPSYrmi && Opcode != X86::VDPPDrri &&
    213 	  Opcode != X86::VINSERTPSrr)
    214 	type = TYPE_MOFFS8;
    215       break;
    216     case ENCODING_IW:
    217       type = TYPE_MOFFS16;
    218       break;
    219     case ENCODING_ID:
    220       type = TYPE_MOFFS32;
    221       break;
    222     case ENCODING_IO:
    223       type = TYPE_MOFFS64;
    224       break;
    225     }
    226   }
    227 
    228   switch (type) {
    229   case TYPE_XMM128:
    230     mcInst.addOperand(MCOperand::CreateReg(X86::XMM0 + (immediate >> 4)));
    231     return;
    232   case TYPE_XMM256:
    233     mcInst.addOperand(MCOperand::CreateReg(X86::YMM0 + (immediate >> 4)));
    234     return;
    235   case TYPE_MOFFS8:
    236   case TYPE_REL8:
    237     if(immediate & 0x80)
    238       immediate |= ~(0xffull);
    239     break;
    240   case TYPE_MOFFS16:
    241     if(immediate & 0x8000)
    242       immediate |= ~(0xffffull);
    243     break;
    244   case TYPE_MOFFS32:
    245   case TYPE_REL32:
    246   case TYPE_REL64:
    247     if(immediate & 0x80000000)
    248       immediate |= ~(0xffffffffull);
    249     break;
    250   case TYPE_MOFFS64:
    251   default:
    252     // operand is 64 bits wide.  Do nothing.
    253     break;
    254   }
    255 
    256   mcInst.addOperand(MCOperand::CreateImm(immediate));
    257 }
    258 
    259 /// translateRMRegister - Translates a register stored in the R/M field of the
    260 ///   ModR/M byte to its LLVM equivalent and appends it to an MCInst.
    261 /// @param mcInst       - The MCInst to append to.
    262 /// @param insn         - The internal instruction to extract the R/M field
    263 ///                       from.
    264 /// @return             - 0 on success; -1 otherwise
    265 static bool translateRMRegister(MCInst &mcInst,
    266                                 InternalInstruction &insn) {
    267   if (insn.eaBase == EA_BASE_sib || insn.eaBase == EA_BASE_sib64) {
    268     debug("A R/M register operand may not have a SIB byte");
    269     return true;
    270   }
    271 
    272   switch (insn.eaBase) {
    273   default:
    274     debug("Unexpected EA base register");
    275     return true;
    276   case EA_BASE_NONE:
    277     debug("EA_BASE_NONE for ModR/M base");
    278     return true;
    279 #define ENTRY(x) case EA_BASE_##x:
    280   ALL_EA_BASES
    281 #undef ENTRY
    282     debug("A R/M register operand may not have a base; "
    283           "the operand must be a register.");
    284     return true;
    285 #define ENTRY(x)                                                      \
    286   case EA_REG_##x:                                                    \
    287     mcInst.addOperand(MCOperand::CreateReg(X86::x)); break;
    288   ALL_REGS
    289 #undef ENTRY
    290   }
    291 
    292   return false;
    293 }
    294 
    295 /// translateRMMemory - Translates a memory operand stored in the Mod and R/M
    296 ///   fields of an internal instruction (and possibly its SIB byte) to a memory
    297 ///   operand in LLVM's format, and appends it to an MCInst.
    298 ///
    299 /// @param mcInst       - The MCInst to append to.
    300 /// @param insn         - The instruction to extract Mod, R/M, and SIB fields
    301 ///                       from.
    302 /// @return             - 0 on success; nonzero otherwise
    303 static bool translateRMMemory(MCInst &mcInst, InternalInstruction &insn) {
    304   // Addresses in an MCInst are represented as five operands:
    305   //   1. basereg       (register)  The R/M base, or (if there is a SIB) the
    306   //                                SIB base
    307   //   2. scaleamount   (immediate) 1, or (if there is a SIB) the specified
    308   //                                scale amount
    309   //   3. indexreg      (register)  x86_registerNONE, or (if there is a SIB)
    310   //                                the index (which is multiplied by the
    311   //                                scale amount)
    312   //   4. displacement  (immediate) 0, or the displacement if there is one
    313   //   5. segmentreg    (register)  x86_registerNONE for now, but could be set
    314   //                                if we have segment overrides
    315 
    316   MCOperand baseReg;
    317   MCOperand scaleAmount;
    318   MCOperand indexReg;
    319   MCOperand displacement;
    320   MCOperand segmentReg;
    321 
    322   if (insn.eaBase == EA_BASE_sib || insn.eaBase == EA_BASE_sib64) {
    323     if (insn.sibBase != SIB_BASE_NONE) {
    324       switch (insn.sibBase) {
    325       default:
    326         debug("Unexpected sibBase");
    327         return true;
    328 #define ENTRY(x)                                          \
    329       case SIB_BASE_##x:                                  \
    330         baseReg = MCOperand::CreateReg(X86::x); break;
    331       ALL_SIB_BASES
    332 #undef ENTRY
    333       }
    334     } else {
    335       baseReg = MCOperand::CreateReg(0);
    336     }
    337 
    338     if (insn.sibIndex != SIB_INDEX_NONE) {
    339       switch (insn.sibIndex) {
    340       default:
    341         debug("Unexpected sibIndex");
    342         return true;
    343 #define ENTRY(x)                                          \
    344       case SIB_INDEX_##x:                                 \
    345         indexReg = MCOperand::CreateReg(X86::x); break;
    346       EA_BASES_32BIT
    347       EA_BASES_64BIT
    348 #undef ENTRY
    349       }
    350     } else {
    351       indexReg = MCOperand::CreateReg(0);
    352     }
    353 
    354     scaleAmount = MCOperand::CreateImm(insn.sibScale);
    355   } else {
    356     switch (insn.eaBase) {
    357     case EA_BASE_NONE:
    358       if (insn.eaDisplacement == EA_DISP_NONE) {
    359         debug("EA_BASE_NONE and EA_DISP_NONE for ModR/M base");
    360         return true;
    361       }
    362       if (insn.mode == MODE_64BIT)
    363         baseReg = MCOperand::CreateReg(X86::RIP); // Section 2.2.1.6
    364       else
    365         baseReg = MCOperand::CreateReg(0);
    366 
    367       indexReg = MCOperand::CreateReg(0);
    368       break;
    369     case EA_BASE_BX_SI:
    370       baseReg = MCOperand::CreateReg(X86::BX);
    371       indexReg = MCOperand::CreateReg(X86::SI);
    372       break;
    373     case EA_BASE_BX_DI:
    374       baseReg = MCOperand::CreateReg(X86::BX);
    375       indexReg = MCOperand::CreateReg(X86::DI);
    376       break;
    377     case EA_BASE_BP_SI:
    378       baseReg = MCOperand::CreateReg(X86::BP);
    379       indexReg = MCOperand::CreateReg(X86::SI);
    380       break;
    381     case EA_BASE_BP_DI:
    382       baseReg = MCOperand::CreateReg(X86::BP);
    383       indexReg = MCOperand::CreateReg(X86::DI);
    384       break;
    385     default:
    386       indexReg = MCOperand::CreateReg(0);
    387       switch (insn.eaBase) {
    388       default:
    389         debug("Unexpected eaBase");
    390         return true;
    391         // Here, we will use the fill-ins defined above.  However,
    392         //   BX_SI, BX_DI, BP_SI, and BP_DI are all handled above and
    393         //   sib and sib64 were handled in the top-level if, so they're only
    394         //   placeholders to keep the compiler happy.
    395 #define ENTRY(x)                                        \
    396       case EA_BASE_##x:                                 \
    397         baseReg = MCOperand::CreateReg(X86::x); break;
    398       ALL_EA_BASES
    399 #undef ENTRY
    400 #define ENTRY(x) case EA_REG_##x:
    401       ALL_REGS
    402 #undef ENTRY
    403         debug("A R/M memory operand may not be a register; "
    404               "the base field must be a base.");
    405         return true;
    406       }
    407     }
    408 
    409     scaleAmount = MCOperand::CreateImm(1);
    410   }
    411 
    412   displacement = MCOperand::CreateImm(insn.displacement);
    413 
    414   static const uint8_t segmentRegnums[SEG_OVERRIDE_max] = {
    415     0,        // SEG_OVERRIDE_NONE
    416     X86::CS,
    417     X86::SS,
    418     X86::DS,
    419     X86::ES,
    420     X86::FS,
    421     X86::GS
    422   };
    423 
    424   segmentReg = MCOperand::CreateReg(segmentRegnums[insn.segmentOverride]);
    425 
    426   mcInst.addOperand(baseReg);
    427   mcInst.addOperand(scaleAmount);
    428   mcInst.addOperand(indexReg);
    429   mcInst.addOperand(displacement);
    430   mcInst.addOperand(segmentReg);
    431   return false;
    432 }
    433 
    434 /// translateRM - Translates an operand stored in the R/M (and possibly SIB)
    435 ///   byte of an instruction to LLVM form, and appends it to an MCInst.
    436 ///
    437 /// @param mcInst       - The MCInst to append to.
    438 /// @param operand      - The operand, as stored in the descriptor table.
    439 /// @param insn         - The instruction to extract Mod, R/M, and SIB fields
    440 ///                       from.
    441 /// @return             - 0 on success; nonzero otherwise
    442 static bool translateRM(MCInst &mcInst, const OperandSpecifier &operand,
    443                         InternalInstruction &insn) {
    444   switch (operand.type) {
    445   default:
    446     debug("Unexpected type for a R/M operand");
    447     return true;
    448   case TYPE_R8:
    449   case TYPE_R16:
    450   case TYPE_R32:
    451   case TYPE_R64:
    452   case TYPE_Rv:
    453   case TYPE_MM:
    454   case TYPE_MM32:
    455   case TYPE_MM64:
    456   case TYPE_XMM:
    457   case TYPE_XMM32:
    458   case TYPE_XMM64:
    459   case TYPE_XMM128:
    460   case TYPE_XMM256:
    461   case TYPE_DEBUGREG:
    462   case TYPE_CONTROLREG:
    463     return translateRMRegister(mcInst, insn);
    464   case TYPE_M:
    465   case TYPE_M8:
    466   case TYPE_M16:
    467   case TYPE_M32:
    468   case TYPE_M64:
    469   case TYPE_M128:
    470   case TYPE_M256:
    471   case TYPE_M512:
    472   case TYPE_Mv:
    473   case TYPE_M32FP:
    474   case TYPE_M64FP:
    475   case TYPE_M80FP:
    476   case TYPE_M16INT:
    477   case TYPE_M32INT:
    478   case TYPE_M64INT:
    479   case TYPE_M1616:
    480   case TYPE_M1632:
    481   case TYPE_M1664:
    482   case TYPE_LEA:
    483     return translateRMMemory(mcInst, insn);
    484   }
    485 }
    486 
    487 /// translateFPRegister - Translates a stack position on the FPU stack to its
    488 ///   LLVM form, and appends it to an MCInst.
    489 ///
    490 /// @param mcInst       - The MCInst to append to.
    491 /// @param stackPos     - The stack position to translate.
    492 /// @return             - 0 on success; nonzero otherwise.
    493 static bool translateFPRegister(MCInst &mcInst,
    494                                uint8_t stackPos) {
    495   if (stackPos >= 8) {
    496     debug("Invalid FP stack position");
    497     return true;
    498   }
    499 
    500   mcInst.addOperand(MCOperand::CreateReg(X86::ST0 + stackPos));
    501 
    502   return false;
    503 }
    504 
    505 /// translateOperand - Translates an operand stored in an internal instruction
    506 ///   to LLVM's format and appends it to an MCInst.
    507 ///
    508 /// @param mcInst       - The MCInst to append to.
    509 /// @param operand      - The operand, as stored in the descriptor table.
    510 /// @param insn         - The internal instruction.
    511 /// @return             - false on success; true otherwise.
    512 static bool translateOperand(MCInst &mcInst, const OperandSpecifier &operand,
    513                              InternalInstruction &insn) {
    514   switch (operand.encoding) {
    515   default:
    516     debug("Unhandled operand encoding during translation");
    517     return true;
    518   case ENCODING_REG:
    519     translateRegister(mcInst, insn.reg);
    520     return false;
    521   case ENCODING_RM:
    522     return translateRM(mcInst, operand, insn);
    523   case ENCODING_CB:
    524   case ENCODING_CW:
    525   case ENCODING_CD:
    526   case ENCODING_CP:
    527   case ENCODING_CO:
    528   case ENCODING_CT:
    529     debug("Translation of code offsets isn't supported.");
    530     return true;
    531   case ENCODING_IB:
    532   case ENCODING_IW:
    533   case ENCODING_ID:
    534   case ENCODING_IO:
    535   case ENCODING_Iv:
    536   case ENCODING_Ia:
    537     translateImmediate(mcInst,
    538                        insn.immediates[insn.numImmediatesTranslated++],
    539                        operand,
    540                        insn);
    541     return false;
    542   case ENCODING_RB:
    543   case ENCODING_RW:
    544   case ENCODING_RD:
    545   case ENCODING_RO:
    546     translateRegister(mcInst, insn.opcodeRegister);
    547     return false;
    548   case ENCODING_I:
    549     return translateFPRegister(mcInst, insn.opcodeModifier);
    550   case ENCODING_Rv:
    551     translateRegister(mcInst, insn.opcodeRegister);
    552     return false;
    553   case ENCODING_VVVV:
    554     translateRegister(mcInst, insn.vvvv);
    555     return false;
    556   case ENCODING_DUP:
    557     return translateOperand(mcInst,
    558                             insn.spec->operands[operand.type - TYPE_DUP0],
    559                             insn);
    560   }
    561 }
    562 
    563 /// translateInstruction - Translates an internal instruction and all its
    564 ///   operands to an MCInst.
    565 ///
    566 /// @param mcInst       - The MCInst to populate with the instruction's data.
    567 /// @param insn         - The internal instruction.
    568 /// @return             - false on success; true otherwise.
    569 static bool translateInstruction(MCInst &mcInst,
    570                                 InternalInstruction &insn) {
    571   if (!insn.spec) {
    572     debug("Instruction has no specification");
    573     return true;
    574   }
    575 
    576   mcInst.setOpcode(insn.instructionID);
    577 
    578   int index;
    579 
    580   insn.numImmediatesTranslated = 0;
    581 
    582   for (index = 0; index < X86_MAX_OPERANDS; ++index) {
    583     if (insn.spec->operands[index].encoding != ENCODING_NONE) {
    584       if (translateOperand(mcInst, insn.spec->operands[index], insn)) {
    585         return true;
    586       }
    587     }
    588   }
    589 
    590   return false;
    591 }
    592 
    593 static MCDisassembler *createX86_32Disassembler(const Target &T, const MCSubtargetInfo &STI) {
    594   return new X86Disassembler::X86_32Disassembler(STI);
    595 }
    596 
    597 static MCDisassembler *createX86_64Disassembler(const Target &T, const MCSubtargetInfo &STI) {
    598   return new X86Disassembler::X86_64Disassembler(STI);
    599 }
    600 
    601 extern "C" void LLVMInitializeX86Disassembler() {
    602   // Register the disassembler.
    603   TargetRegistry::RegisterMCDisassembler(TheX86_32Target,
    604                                          createX86_32Disassembler);
    605   TargetRegistry::RegisterMCDisassembler(TheX86_64Target,
    606                                          createX86_64Disassembler);
    607 }
    608