Home | History | Annotate | Download | only in Disassembler
      1 /*===-- X86DisassemblerDecoderInternal.h - Disassembler decoder ---*- C -*-===*
      2  *
      3  *                     The LLVM Compiler Infrastructure
      4  *
      5  * This file is distributed under the University of Illinois Open Source
      6  * License. See LICENSE.TXT for details.
      7  *
      8  *===----------------------------------------------------------------------===*
      9  *
     10  * This file is part of the X86 Disassembler.
     11  * It contains the public interface of the instruction decoder.
     12  * Documentation for the disassembler can be found in X86Disassembler.h.
     13  *
     14  *===----------------------------------------------------------------------===*/
     15 
     16 #ifndef X86DISASSEMBLERDECODER_H
     17 #define X86DISASSEMBLERDECODER_H
     18 
     19 #ifdef __cplusplus
     20 extern "C" {
     21 #endif
     22 
     23 #define INSTRUCTION_SPECIFIER_FIELDS \
     24   uint16_t operands;
     25 
     26 #define INSTRUCTION_IDS     \
     27   unsigned instructionIDs;
     28 
     29 #include "X86DisassemblerDecoderCommon.h"
     30 
     31 #undef INSTRUCTION_SPECIFIER_FIELDS
     32 #undef INSTRUCTION_IDS
     33 
     34 /*
     35  * Accessor functions for various fields of an Intel instruction
     36  */
     37 #define modFromModRM(modRM)  (((modRM) & 0xc0) >> 6)
     38 #define regFromModRM(modRM)  (((modRM) & 0x38) >> 3)
     39 #define rmFromModRM(modRM)   ((modRM) & 0x7)
     40 #define scaleFromSIB(sib)    (((sib) & 0xc0) >> 6)
     41 #define indexFromSIB(sib)    (((sib) & 0x38) >> 3)
     42 #define baseFromSIB(sib)     ((sib) & 0x7)
     43 #define wFromREX(rex)        (((rex) & 0x8) >> 3)
     44 #define rFromREX(rex)        (((rex) & 0x4) >> 2)
     45 #define xFromREX(rex)        (((rex) & 0x2) >> 1)
     46 #define bFromREX(rex)        ((rex) & 0x1)
     47 
     48 #define rFromVEX2of3(vex)       (((~(vex)) & 0x80) >> 7)
     49 #define xFromVEX2of3(vex)       (((~(vex)) & 0x40) >> 6)
     50 #define bFromVEX2of3(vex)       (((~(vex)) & 0x20) >> 5)
     51 #define mmmmmFromVEX2of3(vex)   ((vex) & 0x1f)
     52 #define wFromVEX3of3(vex)       (((vex) & 0x80) >> 7)
     53 #define vvvvFromVEX3of3(vex)    (((~(vex)) & 0x78) >> 3)
     54 #define lFromVEX3of3(vex)       (((vex) & 0x4) >> 2)
     55 #define ppFromVEX3of3(vex)      ((vex) & 0x3)
     56 
     57 #define rFromVEX2of2(vex)       (((~(vex)) & 0x80) >> 7)
     58 #define vvvvFromVEX2of2(vex)    (((~(vex)) & 0x78) >> 3)
     59 #define lFromVEX2of2(vex)       (((vex) & 0x4) >> 2)
     60 #define ppFromVEX2of2(vex)      ((vex) & 0x3)
     61 
     62 /*
     63  * These enums represent Intel registers for use by the decoder.
     64  */
     65 
     66 #define REGS_8BIT     \
     67   ENTRY(AL)           \
     68   ENTRY(CL)           \
     69   ENTRY(DL)           \
     70   ENTRY(BL)           \
     71   ENTRY(AH)           \
     72   ENTRY(CH)           \
     73   ENTRY(DH)           \
     74   ENTRY(BH)           \
     75   ENTRY(R8B)          \
     76   ENTRY(R9B)          \
     77   ENTRY(R10B)         \
     78   ENTRY(R11B)         \
     79   ENTRY(R12B)         \
     80   ENTRY(R13B)         \
     81   ENTRY(R14B)         \
     82   ENTRY(R15B)         \
     83   ENTRY(SPL)          \
     84   ENTRY(BPL)          \
     85   ENTRY(SIL)          \
     86   ENTRY(DIL)
     87 
     88 #define EA_BASES_16BIT  \
     89   ENTRY(BX_SI)          \
     90   ENTRY(BX_DI)          \
     91   ENTRY(BP_SI)          \
     92   ENTRY(BP_DI)          \
     93   ENTRY(SI)             \
     94   ENTRY(DI)             \
     95   ENTRY(BP)             \
     96   ENTRY(BX)             \
     97   ENTRY(R8W)            \
     98   ENTRY(R9W)            \
     99   ENTRY(R10W)           \
    100   ENTRY(R11W)           \
    101   ENTRY(R12W)           \
    102   ENTRY(R13W)           \
    103   ENTRY(R14W)           \
    104   ENTRY(R15W)
    105 
    106 #define REGS_16BIT    \
    107   ENTRY(AX)           \
    108   ENTRY(CX)           \
    109   ENTRY(DX)           \
    110   ENTRY(BX)           \
    111   ENTRY(SP)           \
    112   ENTRY(BP)           \
    113   ENTRY(SI)           \
    114   ENTRY(DI)           \
    115   ENTRY(R8W)          \
    116   ENTRY(R9W)          \
    117   ENTRY(R10W)         \
    118   ENTRY(R11W)         \
    119   ENTRY(R12W)         \
    120   ENTRY(R13W)         \
    121   ENTRY(R14W)         \
    122   ENTRY(R15W)
    123 
    124 #define EA_BASES_32BIT  \
    125   ENTRY(EAX)            \
    126   ENTRY(ECX)            \
    127   ENTRY(EDX)            \
    128   ENTRY(EBX)            \
    129   ENTRY(sib)            \
    130   ENTRY(EBP)            \
    131   ENTRY(ESI)            \
    132   ENTRY(EDI)            \
    133   ENTRY(R8D)            \
    134   ENTRY(R9D)            \
    135   ENTRY(R10D)           \
    136   ENTRY(R11D)           \
    137   ENTRY(R12D)           \
    138   ENTRY(R13D)           \
    139   ENTRY(R14D)           \
    140   ENTRY(R15D)
    141 
    142 #define REGS_32BIT  \
    143   ENTRY(EAX)        \
    144   ENTRY(ECX)        \
    145   ENTRY(EDX)        \
    146   ENTRY(EBX)        \
    147   ENTRY(ESP)        \
    148   ENTRY(EBP)        \
    149   ENTRY(ESI)        \
    150   ENTRY(EDI)        \
    151   ENTRY(R8D)        \
    152   ENTRY(R9D)        \
    153   ENTRY(R10D)       \
    154   ENTRY(R11D)       \
    155   ENTRY(R12D)       \
    156   ENTRY(R13D)       \
    157   ENTRY(R14D)       \
    158   ENTRY(R15D)
    159 
    160 #define EA_BASES_64BIT  \
    161   ENTRY(RAX)            \
    162   ENTRY(RCX)            \
    163   ENTRY(RDX)            \
    164   ENTRY(RBX)            \
    165   ENTRY(sib64)          \
    166   ENTRY(RBP)            \
    167   ENTRY(RSI)            \
    168   ENTRY(RDI)            \
    169   ENTRY(R8)             \
    170   ENTRY(R9)             \
    171   ENTRY(R10)            \
    172   ENTRY(R11)            \
    173   ENTRY(R12)            \
    174   ENTRY(R13)            \
    175   ENTRY(R14)            \
    176   ENTRY(R15)
    177 
    178 #define REGS_64BIT  \
    179   ENTRY(RAX)        \
    180   ENTRY(RCX)        \
    181   ENTRY(RDX)        \
    182   ENTRY(RBX)        \
    183   ENTRY(RSP)        \
    184   ENTRY(RBP)        \
    185   ENTRY(RSI)        \
    186   ENTRY(RDI)        \
    187   ENTRY(R8)         \
    188   ENTRY(R9)         \
    189   ENTRY(R10)        \
    190   ENTRY(R11)        \
    191   ENTRY(R12)        \
    192   ENTRY(R13)        \
    193   ENTRY(R14)        \
    194   ENTRY(R15)
    195 
    196 #define REGS_MMX  \
    197   ENTRY(MM0)      \
    198   ENTRY(MM1)      \
    199   ENTRY(MM2)      \
    200   ENTRY(MM3)      \
    201   ENTRY(MM4)      \
    202   ENTRY(MM5)      \
    203   ENTRY(MM6)      \
    204   ENTRY(MM7)
    205 
    206 #define REGS_XMM  \
    207   ENTRY(XMM0)     \
    208   ENTRY(XMM1)     \
    209   ENTRY(XMM2)     \
    210   ENTRY(XMM3)     \
    211   ENTRY(XMM4)     \
    212   ENTRY(XMM5)     \
    213   ENTRY(XMM6)     \
    214   ENTRY(XMM7)     \
    215   ENTRY(XMM8)     \
    216   ENTRY(XMM9)     \
    217   ENTRY(XMM10)    \
    218   ENTRY(XMM11)    \
    219   ENTRY(XMM12)    \
    220   ENTRY(XMM13)    \
    221   ENTRY(XMM14)    \
    222   ENTRY(XMM15)
    223 
    224 #define REGS_YMM  \
    225   ENTRY(YMM0)     \
    226   ENTRY(YMM1)     \
    227   ENTRY(YMM2)     \
    228   ENTRY(YMM3)     \
    229   ENTRY(YMM4)     \
    230   ENTRY(YMM5)     \
    231   ENTRY(YMM6)     \
    232   ENTRY(YMM7)     \
    233   ENTRY(YMM8)     \
    234   ENTRY(YMM9)     \
    235   ENTRY(YMM10)    \
    236   ENTRY(YMM11)    \
    237   ENTRY(YMM12)    \
    238   ENTRY(YMM13)    \
    239   ENTRY(YMM14)    \
    240   ENTRY(YMM15)
    241 
    242 #define REGS_SEGMENT \
    243   ENTRY(ES)          \
    244   ENTRY(CS)          \
    245   ENTRY(SS)          \
    246   ENTRY(DS)          \
    247   ENTRY(FS)          \
    248   ENTRY(GS)
    249 
    250 #define REGS_DEBUG  \
    251   ENTRY(DR0)        \
    252   ENTRY(DR1)        \
    253   ENTRY(DR2)        \
    254   ENTRY(DR3)        \
    255   ENTRY(DR4)        \
    256   ENTRY(DR5)        \
    257   ENTRY(DR6)        \
    258   ENTRY(DR7)
    259 
    260 #define REGS_CONTROL  \
    261   ENTRY(CR0)          \
    262   ENTRY(CR1)          \
    263   ENTRY(CR2)          \
    264   ENTRY(CR3)          \
    265   ENTRY(CR4)          \
    266   ENTRY(CR5)          \
    267   ENTRY(CR6)          \
    268   ENTRY(CR7)          \
    269   ENTRY(CR8)
    270 
    271 #define ALL_EA_BASES  \
    272   EA_BASES_16BIT      \
    273   EA_BASES_32BIT      \
    274   EA_BASES_64BIT
    275 
    276 #define ALL_SIB_BASES \
    277   REGS_32BIT          \
    278   REGS_64BIT
    279 
    280 #define ALL_REGS      \
    281   REGS_8BIT           \
    282   REGS_16BIT          \
    283   REGS_32BIT          \
    284   REGS_64BIT          \
    285   REGS_MMX            \
    286   REGS_XMM            \
    287   REGS_YMM            \
    288   REGS_SEGMENT        \
    289   REGS_DEBUG          \
    290   REGS_CONTROL        \
    291   ENTRY(RIP)
    292 
    293 /*
    294  * EABase - All possible values of the base field for effective-address
    295  *   computations, a.k.a. the Mod and R/M fields of the ModR/M byte.  We
    296  *   distinguish between bases (EA_BASE_*) and registers that just happen to be
    297  *   referred to when Mod == 0b11 (EA_REG_*).
    298  */
    299 typedef enum {
    300   EA_BASE_NONE,
    301 #define ENTRY(x) EA_BASE_##x,
    302   ALL_EA_BASES
    303 #undef ENTRY
    304 #define ENTRY(x) EA_REG_##x,
    305   ALL_REGS
    306 #undef ENTRY
    307   EA_max
    308 } EABase;
    309 
    310 /*
    311  * SIBIndex - All possible values of the SIB index field.
    312  *   Borrows entries from ALL_EA_BASES with the special case that
    313  *   sib is synonymous with NONE.
    314  * Vector SIB: index can be XMM or YMM.
    315  */
    316 typedef enum {
    317   SIB_INDEX_NONE,
    318 #define ENTRY(x) SIB_INDEX_##x,
    319   ALL_EA_BASES
    320   REGS_XMM
    321   REGS_YMM
    322 #undef ENTRY
    323   SIB_INDEX_max
    324 } SIBIndex;
    325 
    326 /*
    327  * SIBBase - All possible values of the SIB base field.
    328  */
    329 typedef enum {
    330   SIB_BASE_NONE,
    331 #define ENTRY(x) SIB_BASE_##x,
    332   ALL_SIB_BASES
    333 #undef ENTRY
    334   SIB_BASE_max
    335 } SIBBase;
    336 
    337 /*
    338  * EADisplacement - Possible displacement types for effective-address
    339  *   computations.
    340  */
    341 typedef enum {
    342   EA_DISP_NONE,
    343   EA_DISP_8,
    344   EA_DISP_16,
    345   EA_DISP_32
    346 } EADisplacement;
    347 
    348 /*
    349  * Reg - All possible values of the reg field in the ModR/M byte.
    350  */
    351 typedef enum {
    352 #define ENTRY(x) MODRM_REG_##x,
    353   ALL_REGS
    354 #undef ENTRY
    355   MODRM_REG_max
    356 } Reg;
    357 
    358 /*
    359  * SegmentOverride - All possible segment overrides.
    360  */
    361 typedef enum {
    362   SEG_OVERRIDE_NONE,
    363   SEG_OVERRIDE_CS,
    364   SEG_OVERRIDE_SS,
    365   SEG_OVERRIDE_DS,
    366   SEG_OVERRIDE_ES,
    367   SEG_OVERRIDE_FS,
    368   SEG_OVERRIDE_GS,
    369   SEG_OVERRIDE_max
    370 } SegmentOverride;
    371 
    372 /*
    373  * VEXLeadingOpcodeByte - Possible values for the VEX.m-mmmm field
    374  */
    375 
    376 typedef enum {
    377   VEX_LOB_0F = 0x1,
    378   VEX_LOB_0F38 = 0x2,
    379   VEX_LOB_0F3A = 0x3
    380 } VEXLeadingOpcodeByte;
    381 
    382 /*
    383  * VEXPrefixCode - Possible values for the VEX.pp field
    384  */
    385 
    386 typedef enum {
    387   VEX_PREFIX_NONE = 0x0,
    388   VEX_PREFIX_66 = 0x1,
    389   VEX_PREFIX_F3 = 0x2,
    390   VEX_PREFIX_F2 = 0x3
    391 } VEXPrefixCode;
    392 
    393 typedef uint8_t BOOL;
    394 
    395 /*
    396  * byteReader_t - Type for the byte reader that the consumer must provide to
    397  *   the decoder.  Reads a single byte from the instruction's address space.
    398  * @param arg     - A baton that the consumer can associate with any internal
    399  *                  state that it needs.
    400  * @param byte    - A pointer to a single byte in memory that should be set to
    401  *                  contain the value at address.
    402  * @param address - The address in the instruction's address space that should
    403  *                  be read from.
    404  * @return        - -1 if the byte cannot be read for any reason; 0 otherwise.
    405  */
    406 typedef int (*byteReader_t)(const void* arg, uint8_t* byte, uint64_t address);
    407 
    408 /*
    409  * dlog_t - Type for the logging function that the consumer can provide to
    410  *   get debugging output from the decoder.
    411  * @param arg     - A baton that the consumer can associate with any internal
    412  *                  state that it needs.
    413  * @param log     - A string that contains the message.  Will be reused after
    414  *                  the logger returns.
    415  */
    416 typedef void (*dlog_t)(void* arg, const char *log);
    417 
    418 /*
    419  * The x86 internal instruction, which is produced by the decoder.
    420  */
    421 struct InternalInstruction {
    422   /* Reader interface (C) */
    423   byteReader_t reader;
    424   /* Opaque value passed to the reader */
    425   const void* readerArg;
    426   /* The address of the next byte to read via the reader */
    427   uint64_t readerCursor;
    428 
    429   /* Logger interface (C) */
    430   dlog_t dlog;
    431   /* Opaque value passed to the logger */
    432   void* dlogArg;
    433 
    434   /* General instruction information */
    435 
    436   /* The mode to disassemble for (64-bit, protected, real) */
    437   DisassemblerMode mode;
    438   /* The start of the instruction, usable with the reader */
    439   uint64_t startLocation;
    440   /* The length of the instruction, in bytes */
    441   size_t length;
    442 
    443   /* Prefix state */
    444 
    445   /* 1 if the prefix byte corresponding to the entry is present; 0 if not */
    446   uint8_t prefixPresent[0x100];
    447   /* contains the location (for use with the reader) of the prefix byte */
    448   uint64_t prefixLocations[0x100];
    449   /* The value of the VEX prefix, if present */
    450   uint8_t vexPrefix[3];
    451   /* The length of the VEX prefix (0 if not present) */
    452   uint8_t vexSize;
    453   /* The value of the REX prefix, if present */
    454   uint8_t rexPrefix;
    455   /* The location where a mandatory prefix would have to be (i.e., right before
    456      the opcode, or right before the REX prefix if one is present) */
    457   uint64_t necessaryPrefixLocation;
    458   /* The segment override type */
    459   SegmentOverride segmentOverride;
    460 
    461   /* Sizes of various critical pieces of data, in bytes */
    462   uint8_t registerSize;
    463   uint8_t addressSize;
    464   uint8_t displacementSize;
    465   uint8_t immediateSize;
    466 
    467   /* Offsets from the start of the instruction to the pieces of data, which is
    468      needed to find relocation entries for adding symbolic operands */
    469   uint8_t displacementOffset;
    470   uint8_t immediateOffset;
    471 
    472   /* opcode state */
    473 
    474   /* The value of the two-byte escape prefix (usually 0x0f) */
    475   uint8_t twoByteEscape;
    476   /* The value of the three-byte escape prefix (usually 0x38 or 0x3a) */
    477   uint8_t threeByteEscape;
    478   /* The last byte of the opcode, not counting any ModR/M extension */
    479   uint8_t opcode;
    480   /* The ModR/M byte of the instruction, if it is an opcode extension */
    481   uint8_t modRMExtension;
    482 
    483   /* decode state */
    484 
    485   /* The type of opcode, used for indexing into the array of decode tables */
    486   OpcodeType opcodeType;
    487   /* The instruction ID, extracted from the decode table */
    488   uint16_t instructionID;
    489   /* The specifier for the instruction, from the instruction info table */
    490   const struct InstructionSpecifier *spec;
    491 
    492   /* state for additional bytes, consumed during operand decode.  Pattern:
    493      consumed___ indicates that the byte was already consumed and does not
    494      need to be consumed again */
    495 
    496   /* The VEX.vvvv field, which contains a third register operand for some AVX
    497      instructions */
    498   Reg                           vvvv;
    499 
    500   /* The ModR/M byte, which contains most register operands and some portion of
    501      all memory operands */
    502   BOOL                          consumedModRM;
    503   uint8_t                       modRM;
    504 
    505   /* The SIB byte, used for more complex 32- or 64-bit memory operands */
    506   BOOL                          consumedSIB;
    507   uint8_t                       sib;
    508 
    509   /* The displacement, used for memory operands */
    510   BOOL                          consumedDisplacement;
    511   int32_t                       displacement;
    512 
    513   /* Immediates.  There can be two in some cases */
    514   uint8_t                       numImmediatesConsumed;
    515   uint8_t                       numImmediatesTranslated;
    516   uint64_t                      immediates[2];
    517 
    518   /* A register or immediate operand encoded into the opcode */
    519   BOOL                          consumedOpcodeModifier;
    520   uint8_t                       opcodeModifier;
    521   Reg                           opcodeRegister;
    522 
    523   /* Portions of the ModR/M byte */
    524 
    525   /* These fields determine the allowable values for the ModR/M fields, which
    526      depend on operand and address widths */
    527   EABase                        eaBaseBase;
    528   EABase                        eaRegBase;
    529   Reg                           regBase;
    530 
    531   /* The Mod and R/M fields can encode a base for an effective address, or a
    532      register.  These are separated into two fields here */
    533   EABase                        eaBase;
    534   EADisplacement                eaDisplacement;
    535   /* The reg field always encodes a register */
    536   Reg                           reg;
    537 
    538   /* SIB state */
    539   SIBIndex                      sibIndex;
    540   uint8_t                       sibScale;
    541   SIBBase                       sibBase;
    542 
    543   const struct OperandSpecifier *operands;
    544 };
    545 
    546 /* decodeInstruction - Decode one instruction and store the decoding results in
    547  *   a buffer provided by the consumer.
    548  * @param insn      - The buffer to store the instruction in.  Allocated by the
    549  *                    consumer.
    550  * @param reader    - The byteReader_t for the bytes to be read.
    551  * @param readerArg - An argument to pass to the reader for storing context
    552  *                    specific to the consumer.  May be NULL.
    553  * @param logger    - The dlog_t to be used in printing status messages from the
    554  *                    disassembler.  May be NULL.
    555  * @param loggerArg - An argument to pass to the logger for storing context
    556  *                    specific to the logger.  May be NULL.
    557  * @param startLoc  - The address (in the reader's address space) of the first
    558  *                    byte in the instruction.
    559  * @param mode      - The mode (16-bit, 32-bit, 64-bit) to decode in.
    560  * @return          - Nonzero if there was an error during decode, 0 otherwise.
    561  */
    562 int decodeInstruction(struct InternalInstruction* insn,
    563                       byteReader_t reader,
    564                       const void* readerArg,
    565                       dlog_t logger,
    566                       void* loggerArg,
    567                       const void* miiArg,
    568                       uint64_t startLoc,
    569                       DisassemblerMode mode);
    570 
    571 /* x86DisassemblerDebug - C-accessible function for printing a message to
    572  *   debugs()
    573  * @param file  - The name of the file printing the debug message.
    574  * @param line  - The line number that printed the debug message.
    575  * @param s     - The message to print.
    576  */
    577 
    578 void x86DisassemblerDebug(const char *file,
    579                           unsigned line,
    580                           const char *s);
    581 
    582 const char *x86DisassemblerGetInstrName(unsigned Opcode, const void *mii);
    583 
    584 #ifdef __cplusplus
    585 }
    586 #endif
    587 
    588 #endif
    589