Home | History | Annotate | Download | only in Disassembler
      1 //===- X86Disassembler.cpp - Disassembler for x86 and x86_64 ----*- C++ -*-===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // This file is part of the X86 Disassembler.
     11 // It contains code to translate the data produced by the decoder into
     12 //  MCInsts.
     13 // Documentation for the disassembler can be found in X86Disassembler.h.
     14 //
     15 //===----------------------------------------------------------------------===//
     16 
     17 #include "X86Disassembler.h"
     18 #include "X86DisassemblerDecoder.h"
     19 
     20 #include "llvm/MC/EDInstInfo.h"
     21 #include "llvm/MC/MCDisassembler.h"
     22 #include "llvm/MC/MCDisassembler.h"
     23 #include "llvm/MC/MCInst.h"
     24 #include "llvm/Target/TargetRegistry.h"
     25 #include "llvm/Support/Debug.h"
     26 #include "llvm/Support/MemoryObject.h"
     27 #include "llvm/Support/raw_ostream.h"
     28 
     29 #define GET_REGINFO_ENUM
     30 #include "X86GenRegisterInfo.inc"
     31 #include "X86GenEDInfo.inc"
     32 
     33 using namespace llvm;
     34 using namespace llvm::X86Disassembler;
     35 
     36 void x86DisassemblerDebug(const char *file,
     37                           unsigned line,
     38                           const char *s) {
     39   dbgs() << file << ":" << line << ": " << s;
     40 }
     41 
     42 #define debug(s) DEBUG(x86DisassemblerDebug(__FILE__, __LINE__, s));
     43 
     44 namespace llvm {
     45 
     46 // Fill-ins to make the compiler happy.  These constants are never actually
     47 //   assigned; they are just filler to make an automatically-generated switch
     48 //   statement work.
     49 namespace X86 {
     50   enum {
     51     BX_SI = 500,
     52     BX_DI = 501,
     53     BP_SI = 502,
     54     BP_DI = 503,
     55     sib   = 504,
     56     sib64 = 505
     57   };
     58 }
     59 
     60 extern Target TheX86_32Target, TheX86_64Target;
     61 
     62 }
     63 
     64 static bool translateInstruction(MCInst &target,
     65                                 InternalInstruction &source);
     66 
     67 X86GenericDisassembler::X86GenericDisassembler(DisassemblerMode mode) :
     68     MCDisassembler(),
     69     fMode(mode) {
     70 }
     71 
     72 X86GenericDisassembler::~X86GenericDisassembler() {
     73 }
     74 
     75 EDInstInfo *X86GenericDisassembler::getEDInfo() const {
     76   return instInfoX86;
     77 }
     78 
     79 /// regionReader - a callback function that wraps the readByte method from
     80 ///   MemoryObject.
     81 ///
     82 /// @param arg      - The generic callback parameter.  In this case, this should
     83 ///                   be a pointer to a MemoryObject.
     84 /// @param byte     - A pointer to the byte to be read.
     85 /// @param address  - The address to be read.
     86 static int regionReader(void* arg, uint8_t* byte, uint64_t address) {
     87   MemoryObject* region = static_cast<MemoryObject*>(arg);
     88   return region->readByte(address, byte);
     89 }
     90 
     91 /// logger - a callback function that wraps the operator<< method from
     92 ///   raw_ostream.
     93 ///
     94 /// @param arg      - The generic callback parameter.  This should be a pointe
     95 ///                   to a raw_ostream.
     96 /// @param log      - A string to be logged.  logger() adds a newline.
     97 static void logger(void* arg, const char* log) {
     98   if (!arg)
     99     return;
    100 
    101   raw_ostream &vStream = *(static_cast<raw_ostream*>(arg));
    102   vStream << log << "\n";
    103 }
    104 
    105 //
    106 // Public interface for the disassembler
    107 //
    108 
    109 bool X86GenericDisassembler::getInstruction(MCInst &instr,
    110                                             uint64_t &size,
    111                                             const MemoryObject &region,
    112                                             uint64_t address,
    113                                             raw_ostream &vStream) const {
    114   InternalInstruction internalInstr;
    115 
    116   int ret = decodeInstruction(&internalInstr,
    117                               regionReader,
    118                               (void*)&region,
    119                               logger,
    120                               (void*)&vStream,
    121                               address,
    122                               fMode);
    123 
    124   if (ret) {
    125     size = internalInstr.readerCursor - address;
    126     return false;
    127   }
    128   else {
    129     size = internalInstr.length;
    130     return !translateInstruction(instr, internalInstr);
    131   }
    132 }
    133 
    134 //
    135 // Private code that translates from struct InternalInstructions to MCInsts.
    136 //
    137 
    138 /// translateRegister - Translates an internal register to the appropriate LLVM
    139 ///   register, and appends it as an operand to an MCInst.
    140 ///
    141 /// @param mcInst     - The MCInst to append to.
    142 /// @param reg        - The Reg to append.
    143 static void translateRegister(MCInst &mcInst, Reg reg) {
    144 #define ENTRY(x) X86::x,
    145   uint8_t llvmRegnums[] = {
    146     ALL_REGS
    147     0
    148   };
    149 #undef ENTRY
    150 
    151   uint8_t llvmRegnum = llvmRegnums[reg];
    152   mcInst.addOperand(MCOperand::CreateReg(llvmRegnum));
    153 }
    154 
    155 /// translateImmediate  - Appends an immediate operand to an MCInst.
    156 ///
    157 /// @param mcInst       - The MCInst to append to.
    158 /// @param immediate    - The immediate value to append.
    159 /// @param operand      - The operand, as stored in the descriptor table.
    160 /// @param insn         - The internal instruction.
    161 static void translateImmediate(MCInst &mcInst, uint64_t immediate,
    162                                const OperandSpecifier &operand,
    163                                InternalInstruction &insn) {
    164   // Sign-extend the immediate if necessary.
    165 
    166   OperandType type = operand.type;
    167 
    168   if (type == TYPE_RELv) {
    169     switch (insn.displacementSize) {
    170     default:
    171       break;
    172     case 1:
    173       type = TYPE_MOFFS8;
    174       break;
    175     case 2:
    176       type = TYPE_MOFFS16;
    177       break;
    178     case 4:
    179       type = TYPE_MOFFS32;
    180       break;
    181     case 8:
    182       type = TYPE_MOFFS64;
    183       break;
    184     }
    185   }
    186 
    187   switch (type) {
    188   case TYPE_MOFFS8:
    189   case TYPE_REL8:
    190     if(immediate & 0x80)
    191       immediate |= ~(0xffull);
    192     break;
    193   case TYPE_MOFFS16:
    194     if(immediate & 0x8000)
    195       immediate |= ~(0xffffull);
    196     break;
    197   case TYPE_MOFFS32:
    198   case TYPE_REL32:
    199   case TYPE_REL64:
    200     if(immediate & 0x80000000)
    201       immediate |= ~(0xffffffffull);
    202     break;
    203   case TYPE_MOFFS64:
    204   default:
    205     // operand is 64 bits wide.  Do nothing.
    206     break;
    207   }
    208 
    209   mcInst.addOperand(MCOperand::CreateImm(immediate));
    210 }
    211 
    212 /// translateRMRegister - Translates a register stored in the R/M field of the
    213 ///   ModR/M byte to its LLVM equivalent and appends it to an MCInst.
    214 /// @param mcInst       - The MCInst to append to.
    215 /// @param insn         - The internal instruction to extract the R/M field
    216 ///                       from.
    217 /// @return             - 0 on success; -1 otherwise
    218 static bool translateRMRegister(MCInst &mcInst,
    219                                 InternalInstruction &insn) {
    220   if (insn.eaBase == EA_BASE_sib || insn.eaBase == EA_BASE_sib64) {
    221     debug("A R/M register operand may not have a SIB byte");
    222     return true;
    223   }
    224 
    225   switch (insn.eaBase) {
    226   default:
    227     debug("Unexpected EA base register");
    228     return true;
    229   case EA_BASE_NONE:
    230     debug("EA_BASE_NONE for ModR/M base");
    231     return true;
    232 #define ENTRY(x) case EA_BASE_##x:
    233   ALL_EA_BASES
    234 #undef ENTRY
    235     debug("A R/M register operand may not have a base; "
    236           "the operand must be a register.");
    237     return true;
    238 #define ENTRY(x)                                                      \
    239   case EA_REG_##x:                                                    \
    240     mcInst.addOperand(MCOperand::CreateReg(X86::x)); break;
    241   ALL_REGS
    242 #undef ENTRY
    243   }
    244 
    245   return false;
    246 }
    247 
    248 /// translateRMMemory - Translates a memory operand stored in the Mod and R/M
    249 ///   fields of an internal instruction (and possibly its SIB byte) to a memory
    250 ///   operand in LLVM's format, and appends it to an MCInst.
    251 ///
    252 /// @param mcInst       - The MCInst to append to.
    253 /// @param insn         - The instruction to extract Mod, R/M, and SIB fields
    254 ///                       from.
    255 /// @return             - 0 on success; nonzero otherwise
    256 static bool translateRMMemory(MCInst &mcInst, InternalInstruction &insn) {
    257   // Addresses in an MCInst are represented as five operands:
    258   //   1. basereg       (register)  The R/M base, or (if there is a SIB) the
    259   //                                SIB base
    260   //   2. scaleamount   (immediate) 1, or (if there is a SIB) the specified
    261   //                                scale amount
    262   //   3. indexreg      (register)  x86_registerNONE, or (if there is a SIB)
    263   //                                the index (which is multiplied by the
    264   //                                scale amount)
    265   //   4. displacement  (immediate) 0, or the displacement if there is one
    266   //   5. segmentreg    (register)  x86_registerNONE for now, but could be set
    267   //                                if we have segment overrides
    268 
    269   MCOperand baseReg;
    270   MCOperand scaleAmount;
    271   MCOperand indexReg;
    272   MCOperand displacement;
    273   MCOperand segmentReg;
    274 
    275   if (insn.eaBase == EA_BASE_sib || insn.eaBase == EA_BASE_sib64) {
    276     if (insn.sibBase != SIB_BASE_NONE) {
    277       switch (insn.sibBase) {
    278       default:
    279         debug("Unexpected sibBase");
    280         return true;
    281 #define ENTRY(x)                                          \
    282       case SIB_BASE_##x:                                  \
    283         baseReg = MCOperand::CreateReg(X86::x); break;
    284       ALL_SIB_BASES
    285 #undef ENTRY
    286       }
    287     } else {
    288       baseReg = MCOperand::CreateReg(0);
    289     }
    290 
    291     if (insn.sibIndex != SIB_INDEX_NONE) {
    292       switch (insn.sibIndex) {
    293       default:
    294         debug("Unexpected sibIndex");
    295         return true;
    296 #define ENTRY(x)                                          \
    297       case SIB_INDEX_##x:                                 \
    298         indexReg = MCOperand::CreateReg(X86::x); break;
    299       EA_BASES_32BIT
    300       EA_BASES_64BIT
    301 #undef ENTRY
    302       }
    303     } else {
    304       indexReg = MCOperand::CreateReg(0);
    305     }
    306 
    307     scaleAmount = MCOperand::CreateImm(insn.sibScale);
    308   } else {
    309     switch (insn.eaBase) {
    310     case EA_BASE_NONE:
    311       if (insn.eaDisplacement == EA_DISP_NONE) {
    312         debug("EA_BASE_NONE and EA_DISP_NONE for ModR/M base");
    313         return true;
    314       }
    315       if (insn.mode == MODE_64BIT)
    316         baseReg = MCOperand::CreateReg(X86::RIP); // Section 2.2.1.6
    317       else
    318         baseReg = MCOperand::CreateReg(0);
    319 
    320       indexReg = MCOperand::CreateReg(0);
    321       break;
    322     case EA_BASE_BX_SI:
    323       baseReg = MCOperand::CreateReg(X86::BX);
    324       indexReg = MCOperand::CreateReg(X86::SI);
    325       break;
    326     case EA_BASE_BX_DI:
    327       baseReg = MCOperand::CreateReg(X86::BX);
    328       indexReg = MCOperand::CreateReg(X86::DI);
    329       break;
    330     case EA_BASE_BP_SI:
    331       baseReg = MCOperand::CreateReg(X86::BP);
    332       indexReg = MCOperand::CreateReg(X86::SI);
    333       break;
    334     case EA_BASE_BP_DI:
    335       baseReg = MCOperand::CreateReg(X86::BP);
    336       indexReg = MCOperand::CreateReg(X86::DI);
    337       break;
    338     default:
    339       indexReg = MCOperand::CreateReg(0);
    340       switch (insn.eaBase) {
    341       default:
    342         debug("Unexpected eaBase");
    343         return true;
    344         // Here, we will use the fill-ins defined above.  However,
    345         //   BX_SI, BX_DI, BP_SI, and BP_DI are all handled above and
    346         //   sib and sib64 were handled in the top-level if, so they're only
    347         //   placeholders to keep the compiler happy.
    348 #define ENTRY(x)                                        \
    349       case EA_BASE_##x:                                 \
    350         baseReg = MCOperand::CreateReg(X86::x); break;
    351       ALL_EA_BASES
    352 #undef ENTRY
    353 #define ENTRY(x) case EA_REG_##x:
    354       ALL_REGS
    355 #undef ENTRY
    356         debug("A R/M memory operand may not be a register; "
    357               "the base field must be a base.");
    358         return true;
    359       }
    360     }
    361 
    362     scaleAmount = MCOperand::CreateImm(1);
    363   }
    364 
    365   displacement = MCOperand::CreateImm(insn.displacement);
    366 
    367   static const uint8_t segmentRegnums[SEG_OVERRIDE_max] = {
    368     0,        // SEG_OVERRIDE_NONE
    369     X86::CS,
    370     X86::SS,
    371     X86::DS,
    372     X86::ES,
    373     X86::FS,
    374     X86::GS
    375   };
    376 
    377   segmentReg = MCOperand::CreateReg(segmentRegnums[insn.segmentOverride]);
    378 
    379   mcInst.addOperand(baseReg);
    380   mcInst.addOperand(scaleAmount);
    381   mcInst.addOperand(indexReg);
    382   mcInst.addOperand(displacement);
    383   mcInst.addOperand(segmentReg);
    384   return false;
    385 }
    386 
    387 /// translateRM - Translates an operand stored in the R/M (and possibly SIB)
    388 ///   byte of an instruction to LLVM form, and appends it to an MCInst.
    389 ///
    390 /// @param mcInst       - The MCInst to append to.
    391 /// @param operand      - The operand, as stored in the descriptor table.
    392 /// @param insn         - The instruction to extract Mod, R/M, and SIB fields
    393 ///                       from.
    394 /// @return             - 0 on success; nonzero otherwise
    395 static bool translateRM(MCInst &mcInst, const OperandSpecifier &operand,
    396                         InternalInstruction &insn) {
    397   switch (operand.type) {
    398   default:
    399     debug("Unexpected type for a R/M operand");
    400     return true;
    401   case TYPE_R8:
    402   case TYPE_R16:
    403   case TYPE_R32:
    404   case TYPE_R64:
    405   case TYPE_Rv:
    406   case TYPE_MM:
    407   case TYPE_MM32:
    408   case TYPE_MM64:
    409   case TYPE_XMM:
    410   case TYPE_XMM32:
    411   case TYPE_XMM64:
    412   case TYPE_XMM128:
    413   case TYPE_XMM256:
    414   case TYPE_DEBUGREG:
    415   case TYPE_CONTROLREG:
    416     return translateRMRegister(mcInst, insn);
    417   case TYPE_M:
    418   case TYPE_M8:
    419   case TYPE_M16:
    420   case TYPE_M32:
    421   case TYPE_M64:
    422   case TYPE_M128:
    423   case TYPE_M256:
    424   case TYPE_M512:
    425   case TYPE_Mv:
    426   case TYPE_M32FP:
    427   case TYPE_M64FP:
    428   case TYPE_M80FP:
    429   case TYPE_M16INT:
    430   case TYPE_M32INT:
    431   case TYPE_M64INT:
    432   case TYPE_M1616:
    433   case TYPE_M1632:
    434   case TYPE_M1664:
    435   case TYPE_LEA:
    436     return translateRMMemory(mcInst, insn);
    437   }
    438 }
    439 
    440 /// translateFPRegister - Translates a stack position on the FPU stack to its
    441 ///   LLVM form, and appends it to an MCInst.
    442 ///
    443 /// @param mcInst       - The MCInst to append to.
    444 /// @param stackPos     - The stack position to translate.
    445 /// @return             - 0 on success; nonzero otherwise.
    446 static bool translateFPRegister(MCInst &mcInst,
    447                                uint8_t stackPos) {
    448   if (stackPos >= 8) {
    449     debug("Invalid FP stack position");
    450     return true;
    451   }
    452 
    453   mcInst.addOperand(MCOperand::CreateReg(X86::ST0 + stackPos));
    454 
    455   return false;
    456 }
    457 
    458 /// translateOperand - Translates an operand stored in an internal instruction
    459 ///   to LLVM's format and appends it to an MCInst.
    460 ///
    461 /// @param mcInst       - The MCInst to append to.
    462 /// @param operand      - The operand, as stored in the descriptor table.
    463 /// @param insn         - The internal instruction.
    464 /// @return             - false on success; true otherwise.
    465 static bool translateOperand(MCInst &mcInst, const OperandSpecifier &operand,
    466                              InternalInstruction &insn) {
    467   switch (operand.encoding) {
    468   default:
    469     debug("Unhandled operand encoding during translation");
    470     return true;
    471   case ENCODING_REG:
    472     translateRegister(mcInst, insn.reg);
    473     return false;
    474   case ENCODING_RM:
    475     return translateRM(mcInst, operand, insn);
    476   case ENCODING_CB:
    477   case ENCODING_CW:
    478   case ENCODING_CD:
    479   case ENCODING_CP:
    480   case ENCODING_CO:
    481   case ENCODING_CT:
    482     debug("Translation of code offsets isn't supported.");
    483     return true;
    484   case ENCODING_IB:
    485   case ENCODING_IW:
    486   case ENCODING_ID:
    487   case ENCODING_IO:
    488   case ENCODING_Iv:
    489   case ENCODING_Ia:
    490     translateImmediate(mcInst,
    491                        insn.immediates[insn.numImmediatesTranslated++],
    492                        operand,
    493                        insn);
    494     return false;
    495   case ENCODING_RB:
    496   case ENCODING_RW:
    497   case ENCODING_RD:
    498   case ENCODING_RO:
    499     translateRegister(mcInst, insn.opcodeRegister);
    500     return false;
    501   case ENCODING_I:
    502     return translateFPRegister(mcInst, insn.opcodeModifier);
    503   case ENCODING_Rv:
    504     translateRegister(mcInst, insn.opcodeRegister);
    505     return false;
    506   case ENCODING_VVVV:
    507     translateRegister(mcInst, insn.vvvv);
    508     return false;
    509   case ENCODING_DUP:
    510     return translateOperand(mcInst,
    511                             insn.spec->operands[operand.type - TYPE_DUP0],
    512                             insn);
    513   }
    514 }
    515 
    516 /// translateInstruction - Translates an internal instruction and all its
    517 ///   operands to an MCInst.
    518 ///
    519 /// @param mcInst       - The MCInst to populate with the instruction's data.
    520 /// @param insn         - The internal instruction.
    521 /// @return             - false on success; true otherwise.
    522 static bool translateInstruction(MCInst &mcInst,
    523                                 InternalInstruction &insn) {
    524   if (!insn.spec) {
    525     debug("Instruction has no specification");
    526     return true;
    527   }
    528 
    529   mcInst.setOpcode(insn.instructionID);
    530 
    531   int index;
    532 
    533   insn.numImmediatesTranslated = 0;
    534 
    535   for (index = 0; index < X86_MAX_OPERANDS; ++index) {
    536     if (insn.spec->operands[index].encoding != ENCODING_NONE) {
    537       if (translateOperand(mcInst, insn.spec->operands[index], insn)) {
    538         return true;
    539       }
    540     }
    541   }
    542 
    543   return false;
    544 }
    545 
    546 static MCDisassembler *createX86_32Disassembler(const Target &T) {
    547   return new X86Disassembler::X86_32Disassembler;
    548 }
    549 
    550 static MCDisassembler *createX86_64Disassembler(const Target &T) {
    551   return new X86Disassembler::X86_64Disassembler;
    552 }
    553 
    554 extern "C" void LLVMInitializeX86Disassembler() {
    555   // Register the disassembler.
    556   TargetRegistry::RegisterMCDisassembler(TheX86_32Target,
    557                                          createX86_32Disassembler);
    558   TargetRegistry::RegisterMCDisassembler(TheX86_64Target,
    559                                          createX86_64Disassembler);
    560 }
    561