Home | History | Annotate | Download | only in Disassembler
      1 //===-- X86Disassembler.cpp - Disassembler for x86 and x86_64 -------------===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // This file is part of the X86 Disassembler.
     11 // It contains code to translate the data produced by the decoder into
     12 //  MCInsts.
     13 // Documentation for the disassembler can be found in X86Disassembler.h.
     14 //
     15 //===----------------------------------------------------------------------===//
     16 
     17 #include "X86Disassembler.h"
     18 #include "X86DisassemblerDecoder.h"
     19 
     20 #include "llvm/MC/EDInstInfo.h"
     21 #include "llvm/MC/MCExpr.h"
     22 #include "llvm/MC/MCContext.h"
     23 #include "llvm/MC/MCDisassembler.h"
     24 #include "llvm/MC/MCInst.h"
     25 #include "llvm/MC/MCInstrInfo.h"
     26 #include "llvm/MC/MCSubtargetInfo.h"
     27 #include "llvm/Support/Debug.h"
     28 #include "llvm/Support/MemoryObject.h"
     29 #include "llvm/Support/TargetRegistry.h"
     30 #include "llvm/Support/raw_ostream.h"
     31 
     32 #define GET_REGINFO_ENUM
     33 #include "X86GenRegisterInfo.inc"
     34 #define GET_INSTRINFO_ENUM
     35 #include "X86GenInstrInfo.inc"
     36 #include "X86GenEDInfo.inc"
     37 
     38 using namespace llvm;
     39 using namespace llvm::X86Disassembler;
     40 
     41 void x86DisassemblerDebug(const char *file,
     42                           unsigned line,
     43                           const char *s) {
     44   dbgs() << file << ":" << line << ": " << s;
     45 }
     46 
     47 const char *x86DisassemblerGetInstrName(unsigned Opcode, const void *mii) {
     48   const MCInstrInfo *MII = static_cast<const MCInstrInfo *>(mii);
     49   return MII->getName(Opcode);
     50 }
     51 
     52 #define debug(s) DEBUG(x86DisassemblerDebug(__FILE__, __LINE__, s));
     53 
     54 namespace llvm {
     55 
     56 // Fill-ins to make the compiler happy.  These constants are never actually
     57 //   assigned; they are just filler to make an automatically-generated switch
     58 //   statement work.
     59 namespace X86 {
     60   enum {
     61     BX_SI = 500,
     62     BX_DI = 501,
     63     BP_SI = 502,
     64     BP_DI = 503,
     65     sib   = 504,
     66     sib64 = 505
     67   };
     68 }
     69 
     70 extern Target TheX86_32Target, TheX86_64Target;
     71 
     72 }
     73 
     74 static bool translateInstruction(MCInst &target,
     75                                 InternalInstruction &source,
     76                                 const MCDisassembler *Dis);
     77 
     78 X86GenericDisassembler::X86GenericDisassembler(const MCSubtargetInfo &STI,
     79                                                DisassemblerMode mode,
     80                                                const MCInstrInfo *MII)
     81   : MCDisassembler(STI), MII(MII), fMode(mode) {}
     82 
     83 X86GenericDisassembler::~X86GenericDisassembler() {
     84   delete MII;
     85 }
     86 
     87 const EDInstInfo *X86GenericDisassembler::getEDInfo() const {
     88   return instInfoX86;
     89 }
     90 
     91 /// regionReader - a callback function that wraps the readByte method from
     92 ///   MemoryObject.
     93 ///
     94 /// @param arg      - The generic callback parameter.  In this case, this should
     95 ///                   be a pointer to a MemoryObject.
     96 /// @param byte     - A pointer to the byte to be read.
     97 /// @param address  - The address to be read.
     98 static int regionReader(const void* arg, uint8_t* byte, uint64_t address) {
     99   const MemoryObject* region = static_cast<const MemoryObject*>(arg);
    100   return region->readByte(address, byte);
    101 }
    102 
    103 /// logger - a callback function that wraps the operator<< method from
    104 ///   raw_ostream.
    105 ///
    106 /// @param arg      - The generic callback parameter.  This should be a pointe
    107 ///                   to a raw_ostream.
    108 /// @param log      - A string to be logged.  logger() adds a newline.
    109 static void logger(void* arg, const char* log) {
    110   if (!arg)
    111     return;
    112 
    113   raw_ostream &vStream = *(static_cast<raw_ostream*>(arg));
    114   vStream << log << "\n";
    115 }
    116 
    117 //
    118 // Public interface for the disassembler
    119 //
    120 
    121 MCDisassembler::DecodeStatus
    122 X86GenericDisassembler::getInstruction(MCInst &instr,
    123                                        uint64_t &size,
    124                                        const MemoryObject &region,
    125                                        uint64_t address,
    126                                        raw_ostream &vStream,
    127                                        raw_ostream &cStream) const {
    128   CommentStream = &cStream;
    129 
    130   InternalInstruction internalInstr;
    131 
    132   dlog_t loggerFn = logger;
    133   if (&vStream == &nulls())
    134     loggerFn = 0; // Disable logging completely if it's going to nulls().
    135 
    136   int ret = decodeInstruction(&internalInstr,
    137                               regionReader,
    138                               (const void*)&region,
    139                               loggerFn,
    140                               (void*)&vStream,
    141                               (const void*)MII,
    142                               address,
    143                               fMode);
    144 
    145   if (ret) {
    146     size = internalInstr.readerCursor - address;
    147     return Fail;
    148   }
    149   else {
    150     size = internalInstr.length;
    151     return (!translateInstruction(instr, internalInstr, this)) ?
    152             Success : Fail;
    153   }
    154 }
    155 
    156 //
    157 // Private code that translates from struct InternalInstructions to MCInsts.
    158 //
    159 
    160 /// translateRegister - Translates an internal register to the appropriate LLVM
    161 ///   register, and appends it as an operand to an MCInst.
    162 ///
    163 /// @param mcInst     - The MCInst to append to.
    164 /// @param reg        - The Reg to append.
    165 static void translateRegister(MCInst &mcInst, Reg reg) {
    166 #define ENTRY(x) X86::x,
    167   uint8_t llvmRegnums[] = {
    168     ALL_REGS
    169     0
    170   };
    171 #undef ENTRY
    172 
    173   uint8_t llvmRegnum = llvmRegnums[reg];
    174   mcInst.addOperand(MCOperand::CreateReg(llvmRegnum));
    175 }
    176 
    177 /// tryAddingSymbolicOperand - trys to add a symbolic operand in place of the
    178 /// immediate Value in the MCInst.
    179 ///
    180 /// @param Value      - The immediate Value, has had any PC adjustment made by
    181 ///                     the caller.
    182 /// @param isBranch   - If the instruction is a branch instruction
    183 /// @param Address    - The starting address of the instruction
    184 /// @param Offset     - The byte offset to this immediate in the instruction
    185 /// @param Width      - The byte width of this immediate in the instruction
    186 ///
    187 /// If the getOpInfo() function was set when setupForSymbolicDisassembly() was
    188 /// called then that function is called to get any symbolic information for the
    189 /// immediate in the instruction using the Address, Offset and Width.  If that
    190 /// returns non-zero then the symbolic information it returns is used to create
    191 /// an MCExpr and that is added as an operand to the MCInst.  If getOpInfo()
    192 /// returns zero and isBranch is true then a symbol look up for immediate Value
    193 /// is done and if a symbol is found an MCExpr is created with that, else
    194 /// an MCExpr with the immediate Value is created.  This function returns true
    195 /// if it adds an operand to the MCInst and false otherwise.
    196 static bool tryAddingSymbolicOperand(int64_t Value, bool isBranch,
    197                                      uint64_t Address, uint64_t Offset,
    198                                      uint64_t Width, MCInst &MI,
    199                                      const MCDisassembler *Dis) {
    200   LLVMOpInfoCallback getOpInfo = Dis->getLLVMOpInfoCallback();
    201   struct LLVMOpInfo1 SymbolicOp;
    202   memset(&SymbolicOp, '\0', sizeof(struct LLVMOpInfo1));
    203   SymbolicOp.Value = Value;
    204   void *DisInfo = Dis->getDisInfoBlock();
    205 
    206   if (!getOpInfo ||
    207       !getOpInfo(DisInfo, Address, Offset, Width, 1, &SymbolicOp)) {
    208     // Clear SymbolicOp.Value from above and also all other fields.
    209     memset(&SymbolicOp, '\0', sizeof(struct LLVMOpInfo1));
    210     LLVMSymbolLookupCallback SymbolLookUp = Dis->getLLVMSymbolLookupCallback();
    211     if (!SymbolLookUp)
    212       return false;
    213     uint64_t ReferenceType;
    214     if (isBranch)
    215        ReferenceType = LLVMDisassembler_ReferenceType_In_Branch;
    216     else
    217        ReferenceType = LLVMDisassembler_ReferenceType_InOut_None;
    218     const char *ReferenceName;
    219     const char *Name = SymbolLookUp(DisInfo, Value, &ReferenceType, Address,
    220                                     &ReferenceName);
    221     if (Name) {
    222       SymbolicOp.AddSymbol.Name = Name;
    223       SymbolicOp.AddSymbol.Present = true;
    224     }
    225     // For branches always create an MCExpr so it gets printed as hex address.
    226     else if (isBranch) {
    227       SymbolicOp.Value = Value;
    228     }
    229     if(ReferenceType == LLVMDisassembler_ReferenceType_Out_SymbolStub)
    230       (*Dis->CommentStream) << "symbol stub for: " << ReferenceName;
    231     if (!Name && !isBranch)
    232       return false;
    233   }
    234 
    235   MCContext *Ctx = Dis->getMCContext();
    236   const MCExpr *Add = NULL;
    237   if (SymbolicOp.AddSymbol.Present) {
    238     if (SymbolicOp.AddSymbol.Name) {
    239       StringRef Name(SymbolicOp.AddSymbol.Name);
    240       MCSymbol *Sym = Ctx->GetOrCreateSymbol(Name);
    241       Add = MCSymbolRefExpr::Create(Sym, *Ctx);
    242     } else {
    243       Add = MCConstantExpr::Create((int)SymbolicOp.AddSymbol.Value, *Ctx);
    244     }
    245   }
    246 
    247   const MCExpr *Sub = NULL;
    248   if (SymbolicOp.SubtractSymbol.Present) {
    249       if (SymbolicOp.SubtractSymbol.Name) {
    250       StringRef Name(SymbolicOp.SubtractSymbol.Name);
    251       MCSymbol *Sym = Ctx->GetOrCreateSymbol(Name);
    252       Sub = MCSymbolRefExpr::Create(Sym, *Ctx);
    253     } else {
    254       Sub = MCConstantExpr::Create((int)SymbolicOp.SubtractSymbol.Value, *Ctx);
    255     }
    256   }
    257 
    258   const MCExpr *Off = NULL;
    259   if (SymbolicOp.Value != 0)
    260     Off = MCConstantExpr::Create(SymbolicOp.Value, *Ctx);
    261 
    262   const MCExpr *Expr;
    263   if (Sub) {
    264     const MCExpr *LHS;
    265     if (Add)
    266       LHS = MCBinaryExpr::CreateSub(Add, Sub, *Ctx);
    267     else
    268       LHS = MCUnaryExpr::CreateMinus(Sub, *Ctx);
    269     if (Off != 0)
    270       Expr = MCBinaryExpr::CreateAdd(LHS, Off, *Ctx);
    271     else
    272       Expr = LHS;
    273   } else if (Add) {
    274     if (Off != 0)
    275       Expr = MCBinaryExpr::CreateAdd(Add, Off, *Ctx);
    276     else
    277       Expr = Add;
    278   } else {
    279     if (Off != 0)
    280       Expr = Off;
    281     else
    282       Expr = MCConstantExpr::Create(0, *Ctx);
    283   }
    284 
    285   MI.addOperand(MCOperand::CreateExpr(Expr));
    286 
    287   return true;
    288 }
    289 
    290 /// tryAddingPcLoadReferenceComment - trys to add a comment as to what is being
    291 /// referenced by a load instruction with the base register that is the rip.
    292 /// These can often be addresses in a literal pool.  The Address of the
    293 /// instruction and its immediate Value are used to determine the address
    294 /// being referenced in the literal pool entry.  The SymbolLookUp call back will
    295 /// return a pointer to a literal 'C' string if the referenced address is an
    296 /// address into a section with 'C' string literals.
    297 static void tryAddingPcLoadReferenceComment(uint64_t Address, uint64_t Value,
    298                                             const void *Decoder) {
    299   const MCDisassembler *Dis = static_cast<const MCDisassembler*>(Decoder);
    300   LLVMSymbolLookupCallback SymbolLookUp = Dis->getLLVMSymbolLookupCallback();
    301   if (SymbolLookUp) {
    302     void *DisInfo = Dis->getDisInfoBlock();
    303     uint64_t ReferenceType = LLVMDisassembler_ReferenceType_In_PCrel_Load;
    304     const char *ReferenceName;
    305     (void)SymbolLookUp(DisInfo, Value, &ReferenceType, Address, &ReferenceName);
    306     if(ReferenceType == LLVMDisassembler_ReferenceType_Out_LitPool_CstrAddr)
    307       (*Dis->CommentStream) << "literal pool for: " << ReferenceName;
    308   }
    309 }
    310 
    311 /// translateImmediate  - Appends an immediate operand to an MCInst.
    312 ///
    313 /// @param mcInst       - The MCInst to append to.
    314 /// @param immediate    - The immediate value to append.
    315 /// @param operand      - The operand, as stored in the descriptor table.
    316 /// @param insn         - The internal instruction.
    317 static void translateImmediate(MCInst &mcInst, uint64_t immediate,
    318                                const OperandSpecifier &operand,
    319                                InternalInstruction &insn,
    320                                const MCDisassembler *Dis) {
    321   // Sign-extend the immediate if necessary.
    322 
    323   OperandType type = (OperandType)operand.type;
    324 
    325   bool isBranch = false;
    326   uint64_t pcrel = 0;
    327   if (type == TYPE_RELv) {
    328     isBranch = true;
    329     pcrel = insn.startLocation +
    330             insn.immediateOffset + insn.immediateSize;
    331     switch (insn.displacementSize) {
    332     default:
    333       break;
    334     case 1:
    335       type = TYPE_MOFFS8;
    336       break;
    337     case 2:
    338       type = TYPE_MOFFS16;
    339       break;
    340     case 4:
    341       type = TYPE_MOFFS32;
    342       break;
    343     case 8:
    344       type = TYPE_MOFFS64;
    345       break;
    346     }
    347   }
    348   // By default sign-extend all X86 immediates based on their encoding.
    349   else if (type == TYPE_IMM8 || type == TYPE_IMM16 || type == TYPE_IMM32 ||
    350            type == TYPE_IMM64) {
    351     uint32_t Opcode = mcInst.getOpcode();
    352     switch (operand.encoding) {
    353     default:
    354       break;
    355     case ENCODING_IB:
    356       // Special case those X86 instructions that use the imm8 as a set of
    357       // bits, bit count, etc. and are not sign-extend.
    358       if (Opcode != X86::BLENDPSrri && Opcode != X86::BLENDPDrri &&
    359           Opcode != X86::PBLENDWrri && Opcode != X86::MPSADBWrri &&
    360           Opcode != X86::DPPSrri && Opcode != X86::DPPDrri &&
    361           Opcode != X86::INSERTPSrr && Opcode != X86::VBLENDPSYrri &&
    362           Opcode != X86::VBLENDPSYrmi && Opcode != X86::VBLENDPDYrri &&
    363           Opcode != X86::VBLENDPDYrmi && Opcode != X86::VPBLENDWrri &&
    364           Opcode != X86::VMPSADBWrri && Opcode != X86::VDPPSYrri &&
    365           Opcode != X86::VDPPSYrmi && Opcode != X86::VDPPDrri &&
    366           Opcode != X86::VINSERTPSrr)
    367         type = TYPE_MOFFS8;
    368       break;
    369     case ENCODING_IW:
    370       type = TYPE_MOFFS16;
    371       break;
    372     case ENCODING_ID:
    373       type = TYPE_MOFFS32;
    374       break;
    375     case ENCODING_IO:
    376       type = TYPE_MOFFS64;
    377       break;
    378     }
    379   }
    380 
    381   switch (type) {
    382   case TYPE_XMM32:
    383   case TYPE_XMM64:
    384   case TYPE_XMM128:
    385     mcInst.addOperand(MCOperand::CreateReg(X86::XMM0 + (immediate >> 4)));
    386     return;
    387   case TYPE_XMM256:
    388     mcInst.addOperand(MCOperand::CreateReg(X86::YMM0 + (immediate >> 4)));
    389     return;
    390   case TYPE_REL8:
    391     isBranch = true;
    392     pcrel = insn.startLocation + insn.immediateOffset + insn.immediateSize;
    393     // fall through to sign extend the immediate if needed.
    394   case TYPE_MOFFS8:
    395     if(immediate & 0x80)
    396       immediate |= ~(0xffull);
    397     break;
    398   case TYPE_MOFFS16:
    399     if(immediate & 0x8000)
    400       immediate |= ~(0xffffull);
    401     break;
    402   case TYPE_REL32:
    403   case TYPE_REL64:
    404     isBranch = true;
    405     pcrel = insn.startLocation + insn.immediateOffset + insn.immediateSize;
    406     // fall through to sign extend the immediate if needed.
    407   case TYPE_MOFFS32:
    408     if(immediate & 0x80000000)
    409       immediate |= ~(0xffffffffull);
    410     break;
    411   case TYPE_MOFFS64:
    412   default:
    413     // operand is 64 bits wide.  Do nothing.
    414     break;
    415   }
    416 
    417   if(!tryAddingSymbolicOperand(immediate + pcrel, isBranch, insn.startLocation,
    418                                insn.immediateOffset, insn.immediateSize,
    419                                mcInst, Dis))
    420     mcInst.addOperand(MCOperand::CreateImm(immediate));
    421 }
    422 
    423 /// translateRMRegister - Translates a register stored in the R/M field of the
    424 ///   ModR/M byte to its LLVM equivalent and appends it to an MCInst.
    425 /// @param mcInst       - The MCInst to append to.
    426 /// @param insn         - The internal instruction to extract the R/M field
    427 ///                       from.
    428 /// @return             - 0 on success; -1 otherwise
    429 static bool translateRMRegister(MCInst &mcInst,
    430                                 InternalInstruction &insn) {
    431   if (insn.eaBase == EA_BASE_sib || insn.eaBase == EA_BASE_sib64) {
    432     debug("A R/M register operand may not have a SIB byte");
    433     return true;
    434   }
    435 
    436   switch (insn.eaBase) {
    437   default:
    438     debug("Unexpected EA base register");
    439     return true;
    440   case EA_BASE_NONE:
    441     debug("EA_BASE_NONE for ModR/M base");
    442     return true;
    443 #define ENTRY(x) case EA_BASE_##x:
    444   ALL_EA_BASES
    445 #undef ENTRY
    446     debug("A R/M register operand may not have a base; "
    447           "the operand must be a register.");
    448     return true;
    449 #define ENTRY(x)                                                      \
    450   case EA_REG_##x:                                                    \
    451     mcInst.addOperand(MCOperand::CreateReg(X86::x)); break;
    452   ALL_REGS
    453 #undef ENTRY
    454   }
    455 
    456   return false;
    457 }
    458 
    459 /// translateRMMemory - Translates a memory operand stored in the Mod and R/M
    460 ///   fields of an internal instruction (and possibly its SIB byte) to a memory
    461 ///   operand in LLVM's format, and appends it to an MCInst.
    462 ///
    463 /// @param mcInst       - The MCInst to append to.
    464 /// @param insn         - The instruction to extract Mod, R/M, and SIB fields
    465 ///                       from.
    466 /// @return             - 0 on success; nonzero otherwise
    467 static bool translateRMMemory(MCInst &mcInst, InternalInstruction &insn,
    468                               const MCDisassembler *Dis) {
    469   // Addresses in an MCInst are represented as five operands:
    470   //   1. basereg       (register)  The R/M base, or (if there is a SIB) the
    471   //                                SIB base
    472   //   2. scaleamount   (immediate) 1, or (if there is a SIB) the specified
    473   //                                scale amount
    474   //   3. indexreg      (register)  x86_registerNONE, or (if there is a SIB)
    475   //                                the index (which is multiplied by the
    476   //                                scale amount)
    477   //   4. displacement  (immediate) 0, or the displacement if there is one
    478   //   5. segmentreg    (register)  x86_registerNONE for now, but could be set
    479   //                                if we have segment overrides
    480 
    481   MCOperand baseReg;
    482   MCOperand scaleAmount;
    483   MCOperand indexReg;
    484   MCOperand displacement;
    485   MCOperand segmentReg;
    486   uint64_t pcrel = 0;
    487 
    488   if (insn.eaBase == EA_BASE_sib || insn.eaBase == EA_BASE_sib64) {
    489     if (insn.sibBase != SIB_BASE_NONE) {
    490       switch (insn.sibBase) {
    491       default:
    492         debug("Unexpected sibBase");
    493         return true;
    494 #define ENTRY(x)                                          \
    495       case SIB_BASE_##x:                                  \
    496         baseReg = MCOperand::CreateReg(X86::x); break;
    497       ALL_SIB_BASES
    498 #undef ENTRY
    499       }
    500     } else {
    501       baseReg = MCOperand::CreateReg(0);
    502     }
    503 
    504     // Check whether we are handling VSIB addressing mode for GATHER.
    505     // If sibIndex was set to SIB_INDEX_NONE, index offset is 4 and
    506     // we should use SIB_INDEX_XMM4|YMM4 for VSIB.
    507     // I don't see a way to get the correct IndexReg in readSIB:
    508     //   We can tell whether it is VSIB or SIB after instruction ID is decoded,
    509     //   but instruction ID may not be decoded yet when calling readSIB.
    510     uint32_t Opcode = mcInst.getOpcode();
    511     bool IndexIs128 = (Opcode == X86::VGATHERDPDrm ||
    512                        Opcode == X86::VGATHERDPDYrm ||
    513                        Opcode == X86::VGATHERQPDrm ||
    514                        Opcode == X86::VGATHERDPSrm ||
    515                        Opcode == X86::VGATHERQPSrm ||
    516                        Opcode == X86::VPGATHERDQrm ||
    517                        Opcode == X86::VPGATHERDQYrm ||
    518                        Opcode == X86::VPGATHERQQrm ||
    519                        Opcode == X86::VPGATHERDDrm ||
    520                        Opcode == X86::VPGATHERQDrm);
    521     bool IndexIs256 = (Opcode == X86::VGATHERQPDYrm ||
    522                        Opcode == X86::VGATHERDPSYrm ||
    523                        Opcode == X86::VGATHERQPSYrm ||
    524                        Opcode == X86::VPGATHERQQYrm ||
    525                        Opcode == X86::VPGATHERDDYrm ||
    526                        Opcode == X86::VPGATHERQDYrm);
    527     if (IndexIs128 || IndexIs256) {
    528       unsigned IndexOffset = insn.sibIndex -
    529                          (insn.addressSize == 8 ? SIB_INDEX_RAX:SIB_INDEX_EAX);
    530       SIBIndex IndexBase = IndexIs256 ? SIB_INDEX_YMM0 : SIB_INDEX_XMM0;
    531       insn.sibIndex = (SIBIndex)(IndexBase +
    532                            (insn.sibIndex == SIB_INDEX_NONE ? 4 : IndexOffset));
    533     }
    534 
    535     if (insn.sibIndex != SIB_INDEX_NONE) {
    536       switch (insn.sibIndex) {
    537       default:
    538         debug("Unexpected sibIndex");
    539         return true;
    540 #define ENTRY(x)                                          \
    541       case SIB_INDEX_##x:                                 \
    542         indexReg = MCOperand::CreateReg(X86::x); break;
    543       EA_BASES_32BIT
    544       EA_BASES_64BIT
    545       REGS_XMM
    546       REGS_YMM
    547 #undef ENTRY
    548       }
    549     } else {
    550       indexReg = MCOperand::CreateReg(0);
    551     }
    552 
    553     scaleAmount = MCOperand::CreateImm(insn.sibScale);
    554   } else {
    555     switch (insn.eaBase) {
    556     case EA_BASE_NONE:
    557       if (insn.eaDisplacement == EA_DISP_NONE) {
    558         debug("EA_BASE_NONE and EA_DISP_NONE for ModR/M base");
    559         return true;
    560       }
    561       if (insn.mode == MODE_64BIT){
    562         pcrel = insn.startLocation +
    563                 insn.displacementOffset + insn.displacementSize;
    564         tryAddingPcLoadReferenceComment(insn.startLocation +
    565                                         insn.displacementOffset,
    566                                         insn.displacement + pcrel, Dis);
    567         baseReg = MCOperand::CreateReg(X86::RIP); // Section 2.2.1.6
    568       }
    569       else
    570         baseReg = MCOperand::CreateReg(0);
    571 
    572       indexReg = MCOperand::CreateReg(0);
    573       break;
    574     case EA_BASE_BX_SI:
    575       baseReg = MCOperand::CreateReg(X86::BX);
    576       indexReg = MCOperand::CreateReg(X86::SI);
    577       break;
    578     case EA_BASE_BX_DI:
    579       baseReg = MCOperand::CreateReg(X86::BX);
    580       indexReg = MCOperand::CreateReg(X86::DI);
    581       break;
    582     case EA_BASE_BP_SI:
    583       baseReg = MCOperand::CreateReg(X86::BP);
    584       indexReg = MCOperand::CreateReg(X86::SI);
    585       break;
    586     case EA_BASE_BP_DI:
    587       baseReg = MCOperand::CreateReg(X86::BP);
    588       indexReg = MCOperand::CreateReg(X86::DI);
    589       break;
    590     default:
    591       indexReg = MCOperand::CreateReg(0);
    592       switch (insn.eaBase) {
    593       default:
    594         debug("Unexpected eaBase");
    595         return true;
    596         // Here, we will use the fill-ins defined above.  However,
    597         //   BX_SI, BX_DI, BP_SI, and BP_DI are all handled above and
    598         //   sib and sib64 were handled in the top-level if, so they're only
    599         //   placeholders to keep the compiler happy.
    600 #define ENTRY(x)                                        \
    601       case EA_BASE_##x:                                 \
    602         baseReg = MCOperand::CreateReg(X86::x); break;
    603       ALL_EA_BASES
    604 #undef ENTRY
    605 #define ENTRY(x) case EA_REG_##x:
    606       ALL_REGS
    607 #undef ENTRY
    608         debug("A R/M memory operand may not be a register; "
    609               "the base field must be a base.");
    610         return true;
    611       }
    612     }
    613 
    614     scaleAmount = MCOperand::CreateImm(1);
    615   }
    616 
    617   displacement = MCOperand::CreateImm(insn.displacement);
    618 
    619   static const uint8_t segmentRegnums[SEG_OVERRIDE_max] = {
    620     0,        // SEG_OVERRIDE_NONE
    621     X86::CS,
    622     X86::SS,
    623     X86::DS,
    624     X86::ES,
    625     X86::FS,
    626     X86::GS
    627   };
    628 
    629   segmentReg = MCOperand::CreateReg(segmentRegnums[insn.segmentOverride]);
    630 
    631   mcInst.addOperand(baseReg);
    632   mcInst.addOperand(scaleAmount);
    633   mcInst.addOperand(indexReg);
    634   if(!tryAddingSymbolicOperand(insn.displacement + pcrel, false,
    635                                insn.startLocation, insn.displacementOffset,
    636                                insn.displacementSize, mcInst, Dis))
    637     mcInst.addOperand(displacement);
    638   mcInst.addOperand(segmentReg);
    639   return false;
    640 }
    641 
    642 /// translateRM - Translates an operand stored in the R/M (and possibly SIB)
    643 ///   byte of an instruction to LLVM form, and appends it to an MCInst.
    644 ///
    645 /// @param mcInst       - The MCInst to append to.
    646 /// @param operand      - The operand, as stored in the descriptor table.
    647 /// @param insn         - The instruction to extract Mod, R/M, and SIB fields
    648 ///                       from.
    649 /// @return             - 0 on success; nonzero otherwise
    650 static bool translateRM(MCInst &mcInst, const OperandSpecifier &operand,
    651                         InternalInstruction &insn, const MCDisassembler *Dis) {
    652   switch (operand.type) {
    653   default:
    654     debug("Unexpected type for a R/M operand");
    655     return true;
    656   case TYPE_R8:
    657   case TYPE_R16:
    658   case TYPE_R32:
    659   case TYPE_R64:
    660   case TYPE_Rv:
    661   case TYPE_MM:
    662   case TYPE_MM32:
    663   case TYPE_MM64:
    664   case TYPE_XMM:
    665   case TYPE_XMM32:
    666   case TYPE_XMM64:
    667   case TYPE_XMM128:
    668   case TYPE_XMM256:
    669   case TYPE_DEBUGREG:
    670   case TYPE_CONTROLREG:
    671     return translateRMRegister(mcInst, insn);
    672   case TYPE_M:
    673   case TYPE_M8:
    674   case TYPE_M16:
    675   case TYPE_M32:
    676   case TYPE_M64:
    677   case TYPE_M128:
    678   case TYPE_M256:
    679   case TYPE_M512:
    680   case TYPE_Mv:
    681   case TYPE_M32FP:
    682   case TYPE_M64FP:
    683   case TYPE_M80FP:
    684   case TYPE_M16INT:
    685   case TYPE_M32INT:
    686   case TYPE_M64INT:
    687   case TYPE_M1616:
    688   case TYPE_M1632:
    689   case TYPE_M1664:
    690   case TYPE_LEA:
    691     return translateRMMemory(mcInst, insn, Dis);
    692   }
    693 }
    694 
    695 /// translateFPRegister - Translates a stack position on the FPU stack to its
    696 ///   LLVM form, and appends it to an MCInst.
    697 ///
    698 /// @param mcInst       - The MCInst to append to.
    699 /// @param stackPos     - The stack position to translate.
    700 /// @return             - 0 on success; nonzero otherwise.
    701 static bool translateFPRegister(MCInst &mcInst,
    702                                uint8_t stackPos) {
    703   if (stackPos >= 8) {
    704     debug("Invalid FP stack position");
    705     return true;
    706   }
    707 
    708   mcInst.addOperand(MCOperand::CreateReg(X86::ST0 + stackPos));
    709 
    710   return false;
    711 }
    712 
    713 /// translateOperand - Translates an operand stored in an internal instruction
    714 ///   to LLVM's format and appends it to an MCInst.
    715 ///
    716 /// @param mcInst       - The MCInst to append to.
    717 /// @param operand      - The operand, as stored in the descriptor table.
    718 /// @param insn         - The internal instruction.
    719 /// @return             - false on success; true otherwise.
    720 static bool translateOperand(MCInst &mcInst, const OperandSpecifier &operand,
    721                              InternalInstruction &insn,
    722                              const MCDisassembler *Dis) {
    723   switch (operand.encoding) {
    724   default:
    725     debug("Unhandled operand encoding during translation");
    726     return true;
    727   case ENCODING_REG:
    728     translateRegister(mcInst, insn.reg);
    729     return false;
    730   case ENCODING_RM:
    731     return translateRM(mcInst, operand, insn, Dis);
    732   case ENCODING_CB:
    733   case ENCODING_CW:
    734   case ENCODING_CD:
    735   case ENCODING_CP:
    736   case ENCODING_CO:
    737   case ENCODING_CT:
    738     debug("Translation of code offsets isn't supported.");
    739     return true;
    740   case ENCODING_IB:
    741   case ENCODING_IW:
    742   case ENCODING_ID:
    743   case ENCODING_IO:
    744   case ENCODING_Iv:
    745   case ENCODING_Ia:
    746     translateImmediate(mcInst,
    747                        insn.immediates[insn.numImmediatesTranslated++],
    748                        operand,
    749                        insn,
    750                        Dis);
    751     return false;
    752   case ENCODING_RB:
    753   case ENCODING_RW:
    754   case ENCODING_RD:
    755   case ENCODING_RO:
    756     translateRegister(mcInst, insn.opcodeRegister);
    757     return false;
    758   case ENCODING_I:
    759     return translateFPRegister(mcInst, insn.opcodeModifier);
    760   case ENCODING_Rv:
    761     translateRegister(mcInst, insn.opcodeRegister);
    762     return false;
    763   case ENCODING_VVVV:
    764     translateRegister(mcInst, insn.vvvv);
    765     return false;
    766   case ENCODING_DUP:
    767     return translateOperand(mcInst, insn.operands[operand.type - TYPE_DUP0],
    768                             insn, Dis);
    769   }
    770 }
    771 
    772 /// translateInstruction - Translates an internal instruction and all its
    773 ///   operands to an MCInst.
    774 ///
    775 /// @param mcInst       - The MCInst to populate with the instruction's data.
    776 /// @param insn         - The internal instruction.
    777 /// @return             - false on success; true otherwise.
    778 static bool translateInstruction(MCInst &mcInst,
    779                                 InternalInstruction &insn,
    780                                 const MCDisassembler *Dis) {
    781   if (!insn.spec) {
    782     debug("Instruction has no specification");
    783     return true;
    784   }
    785 
    786   mcInst.setOpcode(insn.instructionID);
    787 
    788   int index;
    789 
    790   insn.numImmediatesTranslated = 0;
    791 
    792   for (index = 0; index < X86_MAX_OPERANDS; ++index) {
    793     if (insn.operands[index].encoding != ENCODING_NONE) {
    794       if (translateOperand(mcInst, insn.operands[index], insn, Dis)) {
    795         return true;
    796       }
    797     }
    798   }
    799 
    800   return false;
    801 }
    802 
    803 static MCDisassembler *createX86_32Disassembler(const Target &T,
    804                                                 const MCSubtargetInfo &STI) {
    805   return new X86Disassembler::X86GenericDisassembler(STI, MODE_32BIT,
    806                                                      T.createMCInstrInfo());
    807 }
    808 
    809 static MCDisassembler *createX86_64Disassembler(const Target &T,
    810                                                 const MCSubtargetInfo &STI) {
    811   return new X86Disassembler::X86GenericDisassembler(STI, MODE_64BIT,
    812                                                      T.createMCInstrInfo());
    813 }
    814 
    815 extern "C" void LLVMInitializeX86Disassembler() {
    816   // Register the disassembler.
    817   TargetRegistry::RegisterMCDisassembler(TheX86_32Target,
    818                                          createX86_32Disassembler);
    819   TargetRegistry::RegisterMCDisassembler(TheX86_64Target,
    820                                          createX86_64Disassembler);
    821 }
    822