Home | History | Annotate | Download | only in Disassembler
      1 /*===-- X86DisassemblerDecoder.c - Disassembler decoder ------------*- C -*-===*
      2  *
      3  *                     The LLVM Compiler Infrastructure
      4  *
      5  * This file is distributed under the University of Illinois Open Source
      6  * License. See LICENSE.TXT for details.
      7  *
      8  *===----------------------------------------------------------------------===*
      9  *
     10  * This file is part of the X86 Disassembler.
     11  * It contains the implementation of the instruction decoder.
     12  * Documentation for the disassembler can be found in X86Disassembler.h.
     13  *
     14  *===----------------------------------------------------------------------===*/
     15 
     16 #include <stdarg.h>   /* for va_*()       */
     17 #include <stdio.h>    /* for vsnprintf()  */
     18 #include <stdlib.h>   /* for exit()       */
     19 #include <string.h>   /* for memset()     */
     20 
     21 #include "X86DisassemblerDecoder.h"
     22 
     23 #include "X86GenDisassemblerTables.inc"
     24 
     25 #define TRUE  1
     26 #define FALSE 0
     27 
     28 typedef int8_t bool;
     29 
     30 #ifndef NDEBUG
     31 #define debug(s) do { x86DisassemblerDebug(__FILE__, __LINE__, s); } while (0)
     32 #else
     33 #define debug(s) do { } while (0)
     34 #endif
     35 
     36 
     37 /*
     38  * contextForAttrs - Client for the instruction context table.  Takes a set of
     39  *   attributes and returns the appropriate decode context.
     40  *
     41  * @param attrMask  - Attributes, from the enumeration attributeBits.
     42  * @return          - The InstructionContext to use when looking up an
     43  *                    an instruction with these attributes.
     44  */
     45 static InstructionContext contextForAttrs(uint8_t attrMask) {
     46   return CONTEXTS_SYM[attrMask];
     47 }
     48 
     49 /*
     50  * modRMRequired - Reads the appropriate instruction table to determine whether
     51  *   the ModR/M byte is required to decode a particular instruction.
     52  *
     53  * @param type        - The opcode type (i.e., how many bytes it has).
     54  * @param insnContext - The context for the instruction, as returned by
     55  *                      contextForAttrs.
     56  * @param opcode      - The last byte of the instruction's opcode, not counting
     57  *                      ModR/M extensions and escapes.
     58  * @return            - TRUE if the ModR/M byte is required, FALSE otherwise.
     59  */
     60 static int modRMRequired(OpcodeType type,
     61                          InstructionContext insnContext,
     62                          uint8_t opcode) {
     63   const struct ContextDecision* decision = 0;
     64 
     65   switch (type) {
     66   case ONEBYTE:
     67     decision = &ONEBYTE_SYM;
     68     break;
     69   case TWOBYTE:
     70     decision = &TWOBYTE_SYM;
     71     break;
     72   case THREEBYTE_38:
     73     decision = &THREEBYTE38_SYM;
     74     break;
     75   case THREEBYTE_3A:
     76     decision = &THREEBYTE3A_SYM;
     77     break;
     78   case THREEBYTE_A6:
     79     decision = &THREEBYTEA6_SYM;
     80     break;
     81   case THREEBYTE_A7:
     82     decision = &THREEBYTEA7_SYM;
     83     break;
     84   }
     85 
     86   return decision->opcodeDecisions[insnContext].modRMDecisions[opcode].
     87     modrm_type != MODRM_ONEENTRY;
     88 }
     89 
     90 /*
     91  * decode - Reads the appropriate instruction table to obtain the unique ID of
     92  *   an instruction.
     93  *
     94  * @param type        - See modRMRequired().
     95  * @param insnContext - See modRMRequired().
     96  * @param opcode      - See modRMRequired().
     97  * @param modRM       - The ModR/M byte if required, or any value if not.
     98  * @return            - The UID of the instruction, or 0 on failure.
     99  */
    100 static InstrUID decode(OpcodeType type,
    101                        InstructionContext insnContext,
    102                        uint8_t opcode,
    103                        uint8_t modRM) {
    104   const struct ModRMDecision* dec = 0;
    105 
    106   switch (type) {
    107   case ONEBYTE:
    108     dec = &ONEBYTE_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
    109     break;
    110   case TWOBYTE:
    111     dec = &TWOBYTE_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
    112     break;
    113   case THREEBYTE_38:
    114     dec = &THREEBYTE38_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
    115     break;
    116   case THREEBYTE_3A:
    117     dec = &THREEBYTE3A_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
    118     break;
    119   case THREEBYTE_A6:
    120     dec = &THREEBYTEA6_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
    121     break;
    122   case THREEBYTE_A7:
    123     dec = &THREEBYTEA7_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
    124     break;
    125   }
    126 
    127   switch (dec->modrm_type) {
    128   default:
    129     debug("Corrupt table!  Unknown modrm_type");
    130     return 0;
    131   case MODRM_ONEENTRY:
    132     return modRMTable[dec->instructionIDs];
    133   case MODRM_SPLITRM:
    134     if (modFromModRM(modRM) == 0x3)
    135       return modRMTable[dec->instructionIDs+1];
    136     return modRMTable[dec->instructionIDs];
    137   case MODRM_SPLITREG:
    138     if (modFromModRM(modRM) == 0x3)
    139       return modRMTable[dec->instructionIDs+((modRM & 0x38) >> 3)+8];
    140     return modRMTable[dec->instructionIDs+((modRM & 0x38) >> 3)];
    141   case MODRM_FULL:
    142     return modRMTable[dec->instructionIDs+modRM];
    143   }
    144 }
    145 
    146 /*
    147  * specifierForUID - Given a UID, returns the name and operand specification for
    148  *   that instruction.
    149  *
    150  * @param uid - The unique ID for the instruction.  This should be returned by
    151  *              decode(); specifierForUID will not check bounds.
    152  * @return    - A pointer to the specification for that instruction.
    153  */
    154 static const struct InstructionSpecifier *specifierForUID(InstrUID uid) {
    155   return &INSTRUCTIONS_SYM[uid];
    156 }
    157 
    158 /*
    159  * consumeByte - Uses the reader function provided by the user to consume one
    160  *   byte from the instruction's memory and advance the cursor.
    161  *
    162  * @param insn  - The instruction with the reader function to use.  The cursor
    163  *                for this instruction is advanced.
    164  * @param byte  - A pointer to a pre-allocated memory buffer to be populated
    165  *                with the data read.
    166  * @return      - 0 if the read was successful; nonzero otherwise.
    167  */
    168 static int consumeByte(struct InternalInstruction* insn, uint8_t* byte) {
    169   int ret = insn->reader(insn->readerArg, byte, insn->readerCursor);
    170 
    171   if (!ret)
    172     ++(insn->readerCursor);
    173 
    174   return ret;
    175 }
    176 
    177 /*
    178  * lookAtByte - Like consumeByte, but does not advance the cursor.
    179  *
    180  * @param insn  - See consumeByte().
    181  * @param byte  - See consumeByte().
    182  * @return      - See consumeByte().
    183  */
    184 static int lookAtByte(struct InternalInstruction* insn, uint8_t* byte) {
    185   return insn->reader(insn->readerArg, byte, insn->readerCursor);
    186 }
    187 
    188 static void unconsumeByte(struct InternalInstruction* insn) {
    189   insn->readerCursor--;
    190 }
    191 
    192 #define CONSUME_FUNC(name, type)                                  \
    193   static int name(struct InternalInstruction* insn, type* ptr) {  \
    194     type combined = 0;                                            \
    195     unsigned offset;                                              \
    196     for (offset = 0; offset < sizeof(type); ++offset) {           \
    197       uint8_t byte;                                               \
    198       int ret = insn->reader(insn->readerArg,                     \
    199                              &byte,                               \
    200                              insn->readerCursor + offset);        \
    201       if (ret)                                                    \
    202         return ret;                                               \
    203       combined = combined | ((type)byte << ((type)offset * 8));   \
    204     }                                                             \
    205     *ptr = combined;                                              \
    206     insn->readerCursor += sizeof(type);                           \
    207     return 0;                                                     \
    208   }
    209 
    210 /*
    211  * consume* - Use the reader function provided by the user to consume data
    212  *   values of various sizes from the instruction's memory and advance the
    213  *   cursor appropriately.  These readers perform endian conversion.
    214  *
    215  * @param insn    - See consumeByte().
    216  * @param ptr     - A pointer to a pre-allocated memory of appropriate size to
    217  *                  be populated with the data read.
    218  * @return        - See consumeByte().
    219  */
    220 CONSUME_FUNC(consumeInt8, int8_t)
    221 CONSUME_FUNC(consumeInt16, int16_t)
    222 CONSUME_FUNC(consumeInt32, int32_t)
    223 CONSUME_FUNC(consumeUInt16, uint16_t)
    224 CONSUME_FUNC(consumeUInt32, uint32_t)
    225 CONSUME_FUNC(consumeUInt64, uint64_t)
    226 
    227 /*
    228  * dbgprintf - Uses the logging function provided by the user to log a single
    229  *   message, typically without a carriage-return.
    230  *
    231  * @param insn    - The instruction containing the logging function.
    232  * @param format  - See printf().
    233  * @param ...     - See printf().
    234  */
    235 static void dbgprintf(struct InternalInstruction* insn,
    236                       const char* format,
    237                       ...) {
    238   char buffer[256];
    239   va_list ap;
    240 
    241   if (!insn->dlog)
    242     return;
    243 
    244   va_start(ap, format);
    245   (void)vsnprintf(buffer, sizeof(buffer), format, ap);
    246   va_end(ap);
    247 
    248   insn->dlog(insn->dlogArg, buffer);
    249 
    250   return;
    251 }
    252 
    253 /*
    254  * setPrefixPresent - Marks that a particular prefix is present at a particular
    255  *   location.
    256  *
    257  * @param insn      - The instruction to be marked as having the prefix.
    258  * @param prefix    - The prefix that is present.
    259  * @param location  - The location where the prefix is located (in the address
    260  *                    space of the instruction's reader).
    261  */
    262 static void setPrefixPresent(struct InternalInstruction* insn,
    263                                     uint8_t prefix,
    264                                     uint64_t location)
    265 {
    266   insn->prefixPresent[prefix] = 1;
    267   insn->prefixLocations[prefix] = location;
    268 }
    269 
    270 /*
    271  * isPrefixAtLocation - Queries an instruction to determine whether a prefix is
    272  *   present at a given location.
    273  *
    274  * @param insn      - The instruction to be queried.
    275  * @param prefix    - The prefix.
    276  * @param location  - The location to query.
    277  * @return          - Whether the prefix is at that location.
    278  */
    279 static BOOL isPrefixAtLocation(struct InternalInstruction* insn,
    280                                uint8_t prefix,
    281                                uint64_t location)
    282 {
    283   if (insn->prefixPresent[prefix] == 1 &&
    284      insn->prefixLocations[prefix] == location)
    285     return TRUE;
    286   else
    287     return FALSE;
    288 }
    289 
    290 /*
    291  * readPrefixes - Consumes all of an instruction's prefix bytes, and marks the
    292  *   instruction as having them.  Also sets the instruction's default operand,
    293  *   address, and other relevant data sizes to report operands correctly.
    294  *
    295  * @param insn  - The instruction whose prefixes are to be read.
    296  * @return      - 0 if the instruction could be read until the end of the prefix
    297  *                bytes, and no prefixes conflicted; nonzero otherwise.
    298  */
    299 static int readPrefixes(struct InternalInstruction* insn) {
    300   BOOL isPrefix = TRUE;
    301   BOOL prefixGroups[4] = { FALSE };
    302   uint64_t prefixLocation;
    303   uint8_t byte = 0;
    304 
    305   BOOL hasAdSize = FALSE;
    306   BOOL hasOpSize = FALSE;
    307 
    308   dbgprintf(insn, "readPrefixes()");
    309 
    310   while (isPrefix) {
    311     prefixLocation = insn->readerCursor;
    312 
    313     if (consumeByte(insn, &byte))
    314       return -1;
    315 
    316     /*
    317      * If the first byte is a LOCK prefix break and let it be disassembled
    318      * as a lock "instruction", by creating an <MCInst #xxxx LOCK_PREFIX>.
    319      * FIXME there is currently no way to get the disassembler to print the
    320      * lock prefix if it is not the first byte.
    321      */
    322     if (insn->readerCursor - 1 == insn->startLocation && byte == 0xf0)
    323       break;
    324 
    325     switch (byte) {
    326     case 0xf0:  /* LOCK */
    327     case 0xf2:  /* REPNE/REPNZ */
    328     case 0xf3:  /* REP or REPE/REPZ */
    329       if (prefixGroups[0])
    330         dbgprintf(insn, "Redundant Group 1 prefix");
    331       prefixGroups[0] = TRUE;
    332       setPrefixPresent(insn, byte, prefixLocation);
    333       break;
    334     case 0x2e:  /* CS segment override -OR- Branch not taken */
    335     case 0x36:  /* SS segment override -OR- Branch taken */
    336     case 0x3e:  /* DS segment override */
    337     case 0x26:  /* ES segment override */
    338     case 0x64:  /* FS segment override */
    339     case 0x65:  /* GS segment override */
    340       switch (byte) {
    341       case 0x2e:
    342         insn->segmentOverride = SEG_OVERRIDE_CS;
    343         break;
    344       case 0x36:
    345         insn->segmentOverride = SEG_OVERRIDE_SS;
    346         break;
    347       case 0x3e:
    348         insn->segmentOverride = SEG_OVERRIDE_DS;
    349         break;
    350       case 0x26:
    351         insn->segmentOverride = SEG_OVERRIDE_ES;
    352         break;
    353       case 0x64:
    354         insn->segmentOverride = SEG_OVERRIDE_FS;
    355         break;
    356       case 0x65:
    357         insn->segmentOverride = SEG_OVERRIDE_GS;
    358         break;
    359       default:
    360         debug("Unhandled override");
    361         return -1;
    362       }
    363       if (prefixGroups[1])
    364         dbgprintf(insn, "Redundant Group 2 prefix");
    365       prefixGroups[1] = TRUE;
    366       setPrefixPresent(insn, byte, prefixLocation);
    367       break;
    368     case 0x66:  /* Operand-size override */
    369       if (prefixGroups[2])
    370         dbgprintf(insn, "Redundant Group 3 prefix");
    371       prefixGroups[2] = TRUE;
    372       hasOpSize = TRUE;
    373       setPrefixPresent(insn, byte, prefixLocation);
    374       break;
    375     case 0x67:  /* Address-size override */
    376       if (prefixGroups[3])
    377         dbgprintf(insn, "Redundant Group 4 prefix");
    378       prefixGroups[3] = TRUE;
    379       hasAdSize = TRUE;
    380       setPrefixPresent(insn, byte, prefixLocation);
    381       break;
    382     default:    /* Not a prefix byte */
    383       isPrefix = FALSE;
    384       break;
    385     }
    386 
    387     if (isPrefix)
    388       dbgprintf(insn, "Found prefix 0x%hhx", byte);
    389   }
    390 
    391   insn->vexSize = 0;
    392 
    393   if (byte == 0xc4) {
    394     uint8_t byte1;
    395 
    396     if (lookAtByte(insn, &byte1)) {
    397       dbgprintf(insn, "Couldn't read second byte of VEX");
    398       return -1;
    399     }
    400 
    401     if (insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0) {
    402       insn->vexSize = 3;
    403       insn->necessaryPrefixLocation = insn->readerCursor - 1;
    404     }
    405     else {
    406       unconsumeByte(insn);
    407       insn->necessaryPrefixLocation = insn->readerCursor - 1;
    408     }
    409 
    410     if (insn->vexSize == 3) {
    411       insn->vexPrefix[0] = byte;
    412       consumeByte(insn, &insn->vexPrefix[1]);
    413       consumeByte(insn, &insn->vexPrefix[2]);
    414 
    415       /* We simulate the REX prefix for simplicity's sake */
    416 
    417       if (insn->mode == MODE_64BIT) {
    418         insn->rexPrefix = 0x40
    419                         | (wFromVEX3of3(insn->vexPrefix[2]) << 3)
    420                         | (rFromVEX2of3(insn->vexPrefix[1]) << 2)
    421                         | (xFromVEX2of3(insn->vexPrefix[1]) << 1)
    422                         | (bFromVEX2of3(insn->vexPrefix[1]) << 0);
    423       }
    424 
    425       switch (ppFromVEX3of3(insn->vexPrefix[2]))
    426       {
    427       default:
    428         break;
    429       case VEX_PREFIX_66:
    430         hasOpSize = TRUE;
    431         break;
    432       }
    433 
    434       dbgprintf(insn, "Found VEX prefix 0x%hhx 0x%hhx 0x%hhx", insn->vexPrefix[0], insn->vexPrefix[1], insn->vexPrefix[2]);
    435     }
    436   }
    437   else if (byte == 0xc5) {
    438     uint8_t byte1;
    439 
    440     if (lookAtByte(insn, &byte1)) {
    441       dbgprintf(insn, "Couldn't read second byte of VEX");
    442       return -1;
    443     }
    444 
    445     if (insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0) {
    446       insn->vexSize = 2;
    447     }
    448     else {
    449       unconsumeByte(insn);
    450     }
    451 
    452     if (insn->vexSize == 2) {
    453       insn->vexPrefix[0] = byte;
    454       consumeByte(insn, &insn->vexPrefix[1]);
    455 
    456       if (insn->mode == MODE_64BIT) {
    457         insn->rexPrefix = 0x40
    458                         | (rFromVEX2of2(insn->vexPrefix[1]) << 2);
    459       }
    460 
    461       switch (ppFromVEX2of2(insn->vexPrefix[1]))
    462       {
    463       default:
    464         break;
    465       case VEX_PREFIX_66:
    466         hasOpSize = TRUE;
    467         break;
    468       }
    469 
    470       dbgprintf(insn, "Found VEX prefix 0x%hhx 0x%hhx", insn->vexPrefix[0], insn->vexPrefix[1]);
    471     }
    472   }
    473   else {
    474     if (insn->mode == MODE_64BIT) {
    475       if ((byte & 0xf0) == 0x40) {
    476         uint8_t opcodeByte;
    477 
    478         if (lookAtByte(insn, &opcodeByte) || ((opcodeByte & 0xf0) == 0x40)) {
    479           dbgprintf(insn, "Redundant REX prefix");
    480           return -1;
    481         }
    482 
    483         insn->rexPrefix = byte;
    484         insn->necessaryPrefixLocation = insn->readerCursor - 2;
    485 
    486         dbgprintf(insn, "Found REX prefix 0x%hhx", byte);
    487       } else {
    488         unconsumeByte(insn);
    489         insn->necessaryPrefixLocation = insn->readerCursor - 1;
    490       }
    491     } else {
    492       unconsumeByte(insn);
    493       insn->necessaryPrefixLocation = insn->readerCursor - 1;
    494     }
    495   }
    496 
    497   if (insn->mode == MODE_16BIT) {
    498     insn->registerSize       = (hasOpSize ? 4 : 2);
    499     insn->addressSize        = (hasAdSize ? 4 : 2);
    500     insn->displacementSize   = (hasAdSize ? 4 : 2);
    501     insn->immediateSize      = (hasOpSize ? 4 : 2);
    502   } else if (insn->mode == MODE_32BIT) {
    503     insn->registerSize       = (hasOpSize ? 2 : 4);
    504     insn->addressSize        = (hasAdSize ? 2 : 4);
    505     insn->displacementSize   = (hasAdSize ? 2 : 4);
    506     insn->immediateSize      = (hasOpSize ? 2 : 4);
    507   } else if (insn->mode == MODE_64BIT) {
    508     if (insn->rexPrefix && wFromREX(insn->rexPrefix)) {
    509       insn->registerSize       = 8;
    510       insn->addressSize        = (hasAdSize ? 4 : 8);
    511       insn->displacementSize   = 4;
    512       insn->immediateSize      = 4;
    513     } else if (insn->rexPrefix) {
    514       insn->registerSize       = (hasOpSize ? 2 : 4);
    515       insn->addressSize        = (hasAdSize ? 4 : 8);
    516       insn->displacementSize   = (hasOpSize ? 2 : 4);
    517       insn->immediateSize      = (hasOpSize ? 2 : 4);
    518     } else {
    519       insn->registerSize       = (hasOpSize ? 2 : 4);
    520       insn->addressSize        = (hasAdSize ? 4 : 8);
    521       insn->displacementSize   = (hasOpSize ? 2 : 4);
    522       insn->immediateSize      = (hasOpSize ? 2 : 4);
    523     }
    524   }
    525 
    526   return 0;
    527 }
    528 
    529 /*
    530  * readOpcode - Reads the opcode (excepting the ModR/M byte in the case of
    531  *   extended or escape opcodes).
    532  *
    533  * @param insn  - The instruction whose opcode is to be read.
    534  * @return      - 0 if the opcode could be read successfully; nonzero otherwise.
    535  */
    536 static int readOpcode(struct InternalInstruction* insn) {
    537   /* Determine the length of the primary opcode */
    538 
    539   uint8_t current;
    540 
    541   dbgprintf(insn, "readOpcode()");
    542 
    543   insn->opcodeType = ONEBYTE;
    544 
    545   if (insn->vexSize == 3)
    546   {
    547     switch (mmmmmFromVEX2of3(insn->vexPrefix[1]))
    548     {
    549     default:
    550       dbgprintf(insn, "Unhandled m-mmmm field for instruction (0x%hhx)", mmmmmFromVEX2of3(insn->vexPrefix[1]));
    551       return -1;
    552     case 0:
    553       break;
    554     case VEX_LOB_0F:
    555       insn->twoByteEscape = 0x0f;
    556       insn->opcodeType = TWOBYTE;
    557       return consumeByte(insn, &insn->opcode);
    558     case VEX_LOB_0F38:
    559       insn->twoByteEscape = 0x0f;
    560       insn->threeByteEscape = 0x38;
    561       insn->opcodeType = THREEBYTE_38;
    562       return consumeByte(insn, &insn->opcode);
    563     case VEX_LOB_0F3A:
    564       insn->twoByteEscape = 0x0f;
    565       insn->threeByteEscape = 0x3a;
    566       insn->opcodeType = THREEBYTE_3A;
    567       return consumeByte(insn, &insn->opcode);
    568     }
    569   }
    570   else if (insn->vexSize == 2)
    571   {
    572     insn->twoByteEscape = 0x0f;
    573     insn->opcodeType = TWOBYTE;
    574     return consumeByte(insn, &insn->opcode);
    575   }
    576 
    577   if (consumeByte(insn, &current))
    578     return -1;
    579 
    580   if (current == 0x0f) {
    581     dbgprintf(insn, "Found a two-byte escape prefix (0x%hhx)", current);
    582 
    583     insn->twoByteEscape = current;
    584 
    585     if (consumeByte(insn, &current))
    586       return -1;
    587 
    588     if (current == 0x38) {
    589       dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current);
    590 
    591       insn->threeByteEscape = current;
    592 
    593       if (consumeByte(insn, &current))
    594         return -1;
    595 
    596       insn->opcodeType = THREEBYTE_38;
    597     } else if (current == 0x3a) {
    598       dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current);
    599 
    600       insn->threeByteEscape = current;
    601 
    602       if (consumeByte(insn, &current))
    603         return -1;
    604 
    605       insn->opcodeType = THREEBYTE_3A;
    606     } else if (current == 0xa6) {
    607       dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current);
    608 
    609       insn->threeByteEscape = current;
    610 
    611       if (consumeByte(insn, &current))
    612         return -1;
    613 
    614       insn->opcodeType = THREEBYTE_A6;
    615     } else if (current == 0xa7) {
    616       dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current);
    617 
    618       insn->threeByteEscape = current;
    619 
    620       if (consumeByte(insn, &current))
    621         return -1;
    622 
    623       insn->opcodeType = THREEBYTE_A7;
    624     } else {
    625       dbgprintf(insn, "Didn't find a three-byte escape prefix");
    626 
    627       insn->opcodeType = TWOBYTE;
    628     }
    629   }
    630 
    631   /*
    632    * At this point we have consumed the full opcode.
    633    * Anything we consume from here on must be unconsumed.
    634    */
    635 
    636   insn->opcode = current;
    637 
    638   return 0;
    639 }
    640 
    641 static int readModRM(struct InternalInstruction* insn);
    642 
    643 /*
    644  * getIDWithAttrMask - Determines the ID of an instruction, consuming
    645  *   the ModR/M byte as appropriate for extended and escape opcodes,
    646  *   and using a supplied attribute mask.
    647  *
    648  * @param instructionID - A pointer whose target is filled in with the ID of the
    649  *                        instruction.
    650  * @param insn          - The instruction whose ID is to be determined.
    651  * @param attrMask      - The attribute mask to search.
    652  * @return              - 0 if the ModR/M could be read when needed or was not
    653  *                        needed; nonzero otherwise.
    654  */
    655 static int getIDWithAttrMask(uint16_t* instructionID,
    656                              struct InternalInstruction* insn,
    657                              uint8_t attrMask) {
    658   BOOL hasModRMExtension;
    659 
    660   uint8_t instructionClass;
    661 
    662   instructionClass = contextForAttrs(attrMask);
    663 
    664   hasModRMExtension = modRMRequired(insn->opcodeType,
    665                                     instructionClass,
    666                                     insn->opcode);
    667 
    668   if (hasModRMExtension) {
    669     if (readModRM(insn))
    670       return -1;
    671 
    672     *instructionID = decode(insn->opcodeType,
    673                             instructionClass,
    674                             insn->opcode,
    675                             insn->modRM);
    676   } else {
    677     *instructionID = decode(insn->opcodeType,
    678                             instructionClass,
    679                             insn->opcode,
    680                             0);
    681   }
    682 
    683   return 0;
    684 }
    685 
    686 /*
    687  * is16BitEquivalent - Determines whether two instruction names refer to
    688  * equivalent instructions but one is 16-bit whereas the other is not.
    689  *
    690  * @param orig  - The instruction that is not 16-bit
    691  * @param equiv - The instruction that is 16-bit
    692  */
    693 static BOOL is16BitEquvalent(const char* orig, const char* equiv) {
    694   off_t i;
    695 
    696   for (i = 0;; i++) {
    697     if (orig[i] == '\0' && equiv[i] == '\0')
    698       return TRUE;
    699     if (orig[i] == '\0' || equiv[i] == '\0')
    700       return FALSE;
    701     if (orig[i] != equiv[i]) {
    702       if ((orig[i] == 'Q' || orig[i] == 'L') && equiv[i] == 'W')
    703         continue;
    704       if ((orig[i] == '6' || orig[i] == '3') && equiv[i] == '1')
    705         continue;
    706       if ((orig[i] == '4' || orig[i] == '2') && equiv[i] == '6')
    707         continue;
    708       return FALSE;
    709     }
    710   }
    711 }
    712 
    713 /*
    714  * getID - Determines the ID of an instruction, consuming the ModR/M byte as
    715  *   appropriate for extended and escape opcodes.  Determines the attributes and
    716  *   context for the instruction before doing so.
    717  *
    718  * @param insn  - The instruction whose ID is to be determined.
    719  * @return      - 0 if the ModR/M could be read when needed or was not needed;
    720  *                nonzero otherwise.
    721  */
    722 static int getID(struct InternalInstruction* insn, void *miiArg) {
    723   uint8_t attrMask;
    724   uint16_t instructionID;
    725 
    726   dbgprintf(insn, "getID()");
    727 
    728   attrMask = ATTR_NONE;
    729 
    730   if (insn->mode == MODE_64BIT)
    731     attrMask |= ATTR_64BIT;
    732 
    733   if (insn->vexSize) {
    734     attrMask |= ATTR_VEX;
    735 
    736     if (insn->vexSize == 3) {
    737       switch (ppFromVEX3of3(insn->vexPrefix[2])) {
    738       case VEX_PREFIX_66:
    739         attrMask |= ATTR_OPSIZE;
    740         break;
    741       case VEX_PREFIX_F3:
    742         attrMask |= ATTR_XS;
    743         break;
    744       case VEX_PREFIX_F2:
    745         attrMask |= ATTR_XD;
    746         break;
    747       }
    748 
    749       if (lFromVEX3of3(insn->vexPrefix[2]))
    750         attrMask |= ATTR_VEXL;
    751     }
    752     else if (insn->vexSize == 2) {
    753       switch (ppFromVEX2of2(insn->vexPrefix[1])) {
    754       case VEX_PREFIX_66:
    755         attrMask |= ATTR_OPSIZE;
    756         break;
    757       case VEX_PREFIX_F3:
    758         attrMask |= ATTR_XS;
    759         break;
    760       case VEX_PREFIX_F2:
    761         attrMask |= ATTR_XD;
    762         break;
    763       }
    764 
    765       if (lFromVEX2of2(insn->vexPrefix[1]))
    766         attrMask |= ATTR_VEXL;
    767     }
    768     else {
    769       return -1;
    770     }
    771   }
    772   else {
    773     if (isPrefixAtLocation(insn, 0x66, insn->necessaryPrefixLocation))
    774       attrMask |= ATTR_OPSIZE;
    775     else if (isPrefixAtLocation(insn, 0x67, insn->necessaryPrefixLocation))
    776       attrMask |= ATTR_ADSIZE;
    777     else if (isPrefixAtLocation(insn, 0xf3, insn->necessaryPrefixLocation))
    778       attrMask |= ATTR_XS;
    779     else if (isPrefixAtLocation(insn, 0xf2, insn->necessaryPrefixLocation))
    780       attrMask |= ATTR_XD;
    781   }
    782 
    783   if (insn->rexPrefix & 0x08)
    784     attrMask |= ATTR_REXW;
    785 
    786   if (getIDWithAttrMask(&instructionID, insn, attrMask))
    787     return -1;
    788 
    789   /* The following clauses compensate for limitations of the tables. */
    790 
    791   if ((attrMask & ATTR_VEXL) && (attrMask & ATTR_REXW) &&
    792       !(attrMask & ATTR_OPSIZE)) {
    793     /*
    794      * Some VEX instructions ignore the L-bit, but use the W-bit. Normally L-bit
    795      * has precedence since there are no L-bit with W-bit entries in the tables.
    796      * So if the L-bit isn't significant we should use the W-bit instead.
    797      * We only need to do this if the instruction doesn't specify OpSize since
    798      * there is a VEX_L_W_OPSIZE table.
    799      */
    800 
    801     const struct InstructionSpecifier *spec;
    802     uint16_t instructionIDWithWBit;
    803     const struct InstructionSpecifier *specWithWBit;
    804 
    805     spec = specifierForUID(instructionID);
    806 
    807     if (getIDWithAttrMask(&instructionIDWithWBit,
    808                           insn,
    809                           (attrMask & (~ATTR_VEXL)) | ATTR_REXW)) {
    810       insn->instructionID = instructionID;
    811       insn->spec = spec;
    812       return 0;
    813     }
    814 
    815     specWithWBit = specifierForUID(instructionIDWithWBit);
    816 
    817     if (instructionID != instructionIDWithWBit) {
    818       insn->instructionID = instructionIDWithWBit;
    819       insn->spec = specWithWBit;
    820     } else {
    821       insn->instructionID = instructionID;
    822       insn->spec = spec;
    823     }
    824     return 0;
    825   }
    826 
    827   if (insn->prefixPresent[0x66] && !(attrMask & ATTR_OPSIZE)) {
    828     /*
    829      * The instruction tables make no distinction between instructions that
    830      * allow OpSize anywhere (i.e., 16-bit operations) and that need it in a
    831      * particular spot (i.e., many MMX operations).  In general we're
    832      * conservative, but in the specific case where OpSize is present but not
    833      * in the right place we check if there's a 16-bit operation.
    834      */
    835 
    836     const struct InstructionSpecifier *spec;
    837     uint16_t instructionIDWithOpsize;
    838     const char *specName, *specWithOpSizeName;
    839 
    840     spec = specifierForUID(instructionID);
    841 
    842     if (getIDWithAttrMask(&instructionIDWithOpsize,
    843                           insn,
    844                           attrMask | ATTR_OPSIZE)) {
    845       /*
    846        * ModRM required with OpSize but not present; give up and return version
    847        * without OpSize set
    848        */
    849 
    850       insn->instructionID = instructionID;
    851       insn->spec = spec;
    852       return 0;
    853     }
    854 
    855     specName = x86DisassemblerGetInstrName(instructionID, miiArg);
    856     specWithOpSizeName =
    857       x86DisassemblerGetInstrName(instructionIDWithOpsize, miiArg);
    858 
    859     if (is16BitEquvalent(specName, specWithOpSizeName)) {
    860       insn->instructionID = instructionIDWithOpsize;
    861       insn->spec = specifierForUID(instructionIDWithOpsize);
    862     } else {
    863       insn->instructionID = instructionID;
    864       insn->spec = spec;
    865     }
    866     return 0;
    867   }
    868 
    869   if (insn->opcodeType == ONEBYTE && insn->opcode == 0x90 &&
    870       insn->rexPrefix & 0x01) {
    871     /*
    872      * NOOP shouldn't decode as NOOP if REX.b is set. Instead
    873      * it should decode as XCHG %r8, %eax.
    874      */
    875 
    876     const struct InstructionSpecifier *spec;
    877     uint16_t instructionIDWithNewOpcode;
    878     const struct InstructionSpecifier *specWithNewOpcode;
    879 
    880     spec = specifierForUID(instructionID);
    881 
    882     /* Borrow opcode from one of the other XCHGar opcodes */
    883     insn->opcode = 0x91;
    884 
    885     if (getIDWithAttrMask(&instructionIDWithNewOpcode,
    886                           insn,
    887                           attrMask)) {
    888       insn->opcode = 0x90;
    889 
    890       insn->instructionID = instructionID;
    891       insn->spec = spec;
    892       return 0;
    893     }
    894 
    895     specWithNewOpcode = specifierForUID(instructionIDWithNewOpcode);
    896 
    897     /* Change back */
    898     insn->opcode = 0x90;
    899 
    900     insn->instructionID = instructionIDWithNewOpcode;
    901     insn->spec = specWithNewOpcode;
    902 
    903     return 0;
    904   }
    905 
    906   insn->instructionID = instructionID;
    907   insn->spec = specifierForUID(insn->instructionID);
    908 
    909   return 0;
    910 }
    911 
    912 /*
    913  * readSIB - Consumes the SIB byte to determine addressing information for an
    914  *   instruction.
    915  *
    916  * @param insn  - The instruction whose SIB byte is to be read.
    917  * @return      - 0 if the SIB byte was successfully read; nonzero otherwise.
    918  */
    919 static int readSIB(struct InternalInstruction* insn) {
    920   SIBIndex sibIndexBase = 0;
    921   SIBBase sibBaseBase = 0;
    922   uint8_t index, base;
    923 
    924   dbgprintf(insn, "readSIB()");
    925 
    926   if (insn->consumedSIB)
    927     return 0;
    928 
    929   insn->consumedSIB = TRUE;
    930 
    931   switch (insn->addressSize) {
    932   case 2:
    933     dbgprintf(insn, "SIB-based addressing doesn't work in 16-bit mode");
    934     return -1;
    935     break;
    936   case 4:
    937     sibIndexBase = SIB_INDEX_EAX;
    938     sibBaseBase = SIB_BASE_EAX;
    939     break;
    940   case 8:
    941     sibIndexBase = SIB_INDEX_RAX;
    942     sibBaseBase = SIB_BASE_RAX;
    943     break;
    944   }
    945 
    946   if (consumeByte(insn, &insn->sib))
    947     return -1;
    948 
    949   index = indexFromSIB(insn->sib) | (xFromREX(insn->rexPrefix) << 3);
    950 
    951   switch (index) {
    952   case 0x4:
    953     insn->sibIndex = SIB_INDEX_NONE;
    954     break;
    955   default:
    956     insn->sibIndex = (SIBIndex)(sibIndexBase + index);
    957     if (insn->sibIndex == SIB_INDEX_sib ||
    958         insn->sibIndex == SIB_INDEX_sib64)
    959       insn->sibIndex = SIB_INDEX_NONE;
    960     break;
    961   }
    962 
    963   switch (scaleFromSIB(insn->sib)) {
    964   case 0:
    965     insn->sibScale = 1;
    966     break;
    967   case 1:
    968     insn->sibScale = 2;
    969     break;
    970   case 2:
    971     insn->sibScale = 4;
    972     break;
    973   case 3:
    974     insn->sibScale = 8;
    975     break;
    976   }
    977 
    978   base = baseFromSIB(insn->sib) | (bFromREX(insn->rexPrefix) << 3);
    979 
    980   switch (base) {
    981   case 0x5:
    982     switch (modFromModRM(insn->modRM)) {
    983     case 0x0:
    984       insn->eaDisplacement = EA_DISP_32;
    985       insn->sibBase = SIB_BASE_NONE;
    986       break;
    987     case 0x1:
    988       insn->eaDisplacement = EA_DISP_8;
    989       insn->sibBase = (insn->addressSize == 4 ?
    990                        SIB_BASE_EBP : SIB_BASE_RBP);
    991       break;
    992     case 0x2:
    993       insn->eaDisplacement = EA_DISP_32;
    994       insn->sibBase = (insn->addressSize == 4 ?
    995                        SIB_BASE_EBP : SIB_BASE_RBP);
    996       break;
    997     case 0x3:
    998       debug("Cannot have Mod = 0b11 and a SIB byte");
    999       return -1;
   1000     }
   1001     break;
   1002   default:
   1003     insn->sibBase = (SIBBase)(sibBaseBase + base);
   1004     break;
   1005   }
   1006 
   1007   return 0;
   1008 }
   1009 
   1010 /*
   1011  * readDisplacement - Consumes the displacement of an instruction.
   1012  *
   1013  * @param insn  - The instruction whose displacement is to be read.
   1014  * @return      - 0 if the displacement byte was successfully read; nonzero
   1015  *                otherwise.
   1016  */
   1017 static int readDisplacement(struct InternalInstruction* insn) {
   1018   int8_t d8;
   1019   int16_t d16;
   1020   int32_t d32;
   1021 
   1022   dbgprintf(insn, "readDisplacement()");
   1023 
   1024   if (insn->consumedDisplacement)
   1025     return 0;
   1026 
   1027   insn->consumedDisplacement = TRUE;
   1028   insn->displacementOffset = insn->readerCursor - insn->startLocation;
   1029 
   1030   switch (insn->eaDisplacement) {
   1031   case EA_DISP_NONE:
   1032     insn->consumedDisplacement = FALSE;
   1033     break;
   1034   case EA_DISP_8:
   1035     if (consumeInt8(insn, &d8))
   1036       return -1;
   1037     insn->displacement = d8;
   1038     break;
   1039   case EA_DISP_16:
   1040     if (consumeInt16(insn, &d16))
   1041       return -1;
   1042     insn->displacement = d16;
   1043     break;
   1044   case EA_DISP_32:
   1045     if (consumeInt32(insn, &d32))
   1046       return -1;
   1047     insn->displacement = d32;
   1048     break;
   1049   }
   1050 
   1051   insn->consumedDisplacement = TRUE;
   1052   return 0;
   1053 }
   1054 
   1055 /*
   1056  * readModRM - Consumes all addressing information (ModR/M byte, SIB byte, and
   1057  *   displacement) for an instruction and interprets it.
   1058  *
   1059  * @param insn  - The instruction whose addressing information is to be read.
   1060  * @return      - 0 if the information was successfully read; nonzero otherwise.
   1061  */
   1062 static int readModRM(struct InternalInstruction* insn) {
   1063   uint8_t mod, rm, reg;
   1064 
   1065   dbgprintf(insn, "readModRM()");
   1066 
   1067   if (insn->consumedModRM)
   1068     return 0;
   1069 
   1070   if (consumeByte(insn, &insn->modRM))
   1071     return -1;
   1072   insn->consumedModRM = TRUE;
   1073 
   1074   mod     = modFromModRM(insn->modRM);
   1075   rm      = rmFromModRM(insn->modRM);
   1076   reg     = regFromModRM(insn->modRM);
   1077 
   1078   /*
   1079    * This goes by insn->registerSize to pick the correct register, which messes
   1080    * up if we're using (say) XMM or 8-bit register operands.  That gets fixed in
   1081    * fixupReg().
   1082    */
   1083   switch (insn->registerSize) {
   1084   case 2:
   1085     insn->regBase = MODRM_REG_AX;
   1086     insn->eaRegBase = EA_REG_AX;
   1087     break;
   1088   case 4:
   1089     insn->regBase = MODRM_REG_EAX;
   1090     insn->eaRegBase = EA_REG_EAX;
   1091     break;
   1092   case 8:
   1093     insn->regBase = MODRM_REG_RAX;
   1094     insn->eaRegBase = EA_REG_RAX;
   1095     break;
   1096   }
   1097 
   1098   reg |= rFromREX(insn->rexPrefix) << 3;
   1099   rm  |= bFromREX(insn->rexPrefix) << 3;
   1100 
   1101   insn->reg = (Reg)(insn->regBase + reg);
   1102 
   1103   switch (insn->addressSize) {
   1104   case 2:
   1105     insn->eaBaseBase = EA_BASE_BX_SI;
   1106 
   1107     switch (mod) {
   1108     case 0x0:
   1109       if (rm == 0x6) {
   1110         insn->eaBase = EA_BASE_NONE;
   1111         insn->eaDisplacement = EA_DISP_16;
   1112         if (readDisplacement(insn))
   1113           return -1;
   1114       } else {
   1115         insn->eaBase = (EABase)(insn->eaBaseBase + rm);
   1116         insn->eaDisplacement = EA_DISP_NONE;
   1117       }
   1118       break;
   1119     case 0x1:
   1120       insn->eaBase = (EABase)(insn->eaBaseBase + rm);
   1121       insn->eaDisplacement = EA_DISP_8;
   1122       if (readDisplacement(insn))
   1123         return -1;
   1124       break;
   1125     case 0x2:
   1126       insn->eaBase = (EABase)(insn->eaBaseBase + rm);
   1127       insn->eaDisplacement = EA_DISP_16;
   1128       if (readDisplacement(insn))
   1129         return -1;
   1130       break;
   1131     case 0x3:
   1132       insn->eaBase = (EABase)(insn->eaRegBase + rm);
   1133       if (readDisplacement(insn))
   1134         return -1;
   1135       break;
   1136     }
   1137     break;
   1138   case 4:
   1139   case 8:
   1140     insn->eaBaseBase = (insn->addressSize == 4 ? EA_BASE_EAX : EA_BASE_RAX);
   1141 
   1142     switch (mod) {
   1143     case 0x0:
   1144       insn->eaDisplacement = EA_DISP_NONE; /* readSIB may override this */
   1145       switch (rm) {
   1146       case 0x4:
   1147       case 0xc:   /* in case REXW.b is set */
   1148         insn->eaBase = (insn->addressSize == 4 ?
   1149                         EA_BASE_sib : EA_BASE_sib64);
   1150         readSIB(insn);
   1151         if (readDisplacement(insn))
   1152           return -1;
   1153         break;
   1154       case 0x5:
   1155         insn->eaBase = EA_BASE_NONE;
   1156         insn->eaDisplacement = EA_DISP_32;
   1157         if (readDisplacement(insn))
   1158           return -1;
   1159         break;
   1160       default:
   1161         insn->eaBase = (EABase)(insn->eaBaseBase + rm);
   1162         break;
   1163       }
   1164       break;
   1165     case 0x1:
   1166     case 0x2:
   1167       insn->eaDisplacement = (mod == 0x1 ? EA_DISP_8 : EA_DISP_32);
   1168       switch (rm) {
   1169       case 0x4:
   1170       case 0xc:   /* in case REXW.b is set */
   1171         insn->eaBase = EA_BASE_sib;
   1172         readSIB(insn);
   1173         if (readDisplacement(insn))
   1174           return -1;
   1175         break;
   1176       default:
   1177         insn->eaBase = (EABase)(insn->eaBaseBase + rm);
   1178         if (readDisplacement(insn))
   1179           return -1;
   1180         break;
   1181       }
   1182       break;
   1183     case 0x3:
   1184       insn->eaDisplacement = EA_DISP_NONE;
   1185       insn->eaBase = (EABase)(insn->eaRegBase + rm);
   1186       break;
   1187     }
   1188     break;
   1189   } /* switch (insn->addressSize) */
   1190 
   1191   return 0;
   1192 }
   1193 
   1194 #define GENERIC_FIXUP_FUNC(name, base, prefix)            \
   1195   static uint8_t name(struct InternalInstruction *insn,   \
   1196                       OperandType type,                   \
   1197                       uint8_t index,                      \
   1198                       uint8_t *valid) {                   \
   1199     *valid = 1;                                           \
   1200     switch (type) {                                       \
   1201     default:                                              \
   1202       debug("Unhandled register type");                   \
   1203       *valid = 0;                                         \
   1204       return 0;                                           \
   1205     case TYPE_Rv:                                         \
   1206       return base + index;                                \
   1207     case TYPE_R8:                                         \
   1208       if (insn->rexPrefix &&                              \
   1209          index >= 4 && index <= 7) {                      \
   1210         return prefix##_SPL + (index - 4);                \
   1211       } else {                                            \
   1212         return prefix##_AL + index;                       \
   1213       }                                                   \
   1214     case TYPE_R16:                                        \
   1215       return prefix##_AX + index;                         \
   1216     case TYPE_R32:                                        \
   1217       return prefix##_EAX + index;                        \
   1218     case TYPE_R64:                                        \
   1219       return prefix##_RAX + index;                        \
   1220     case TYPE_XMM256:                                     \
   1221       return prefix##_YMM0 + index;                       \
   1222     case TYPE_XMM128:                                     \
   1223     case TYPE_XMM64:                                      \
   1224     case TYPE_XMM32:                                      \
   1225     case TYPE_XMM:                                        \
   1226       return prefix##_XMM0 + index;                       \
   1227     case TYPE_MM64:                                       \
   1228     case TYPE_MM32:                                       \
   1229     case TYPE_MM:                                         \
   1230       if (index > 7)                                      \
   1231         *valid = 0;                                       \
   1232       return prefix##_MM0 + index;                        \
   1233     case TYPE_SEGMENTREG:                                 \
   1234       if (index > 5)                                      \
   1235         *valid = 0;                                       \
   1236       return prefix##_ES + index;                         \
   1237     case TYPE_DEBUGREG:                                   \
   1238       if (index > 7)                                      \
   1239         *valid = 0;                                       \
   1240       return prefix##_DR0 + index;                        \
   1241     case TYPE_CONTROLREG:                                 \
   1242       if (index > 8)                                      \
   1243         *valid = 0;                                       \
   1244       return prefix##_CR0 + index;                        \
   1245     }                                                     \
   1246   }
   1247 
   1248 /*
   1249  * fixup*Value - Consults an operand type to determine the meaning of the
   1250  *   reg or R/M field.  If the operand is an XMM operand, for example, an
   1251  *   operand would be XMM0 instead of AX, which readModRM() would otherwise
   1252  *   misinterpret it as.
   1253  *
   1254  * @param insn  - The instruction containing the operand.
   1255  * @param type  - The operand type.
   1256  * @param index - The existing value of the field as reported by readModRM().
   1257  * @param valid - The address of a uint8_t.  The target is set to 1 if the
   1258  *                field is valid for the register class; 0 if not.
   1259  * @return      - The proper value.
   1260  */
   1261 GENERIC_FIXUP_FUNC(fixupRegValue, insn->regBase,    MODRM_REG)
   1262 GENERIC_FIXUP_FUNC(fixupRMValue,  insn->eaRegBase,  EA_REG)
   1263 
   1264 /*
   1265  * fixupReg - Consults an operand specifier to determine which of the
   1266  *   fixup*Value functions to use in correcting readModRM()'ss interpretation.
   1267  *
   1268  * @param insn  - See fixup*Value().
   1269  * @param op    - The operand specifier.
   1270  * @return      - 0 if fixup was successful; -1 if the register returned was
   1271  *                invalid for its class.
   1272  */
   1273 static int fixupReg(struct InternalInstruction *insn,
   1274                     const struct OperandSpecifier *op) {
   1275   uint8_t valid;
   1276 
   1277   dbgprintf(insn, "fixupReg()");
   1278 
   1279   switch ((OperandEncoding)op->encoding) {
   1280   default:
   1281     debug("Expected a REG or R/M encoding in fixupReg");
   1282     return -1;
   1283   case ENCODING_VVVV:
   1284     insn->vvvv = (Reg)fixupRegValue(insn,
   1285                                     (OperandType)op->type,
   1286                                     insn->vvvv,
   1287                                     &valid);
   1288     if (!valid)
   1289       return -1;
   1290     break;
   1291   case ENCODING_REG:
   1292     insn->reg = (Reg)fixupRegValue(insn,
   1293                                    (OperandType)op->type,
   1294                                    insn->reg - insn->regBase,
   1295                                    &valid);
   1296     if (!valid)
   1297       return -1;
   1298     break;
   1299   case ENCODING_RM:
   1300     if (insn->eaBase >= insn->eaRegBase) {
   1301       insn->eaBase = (EABase)fixupRMValue(insn,
   1302                                           (OperandType)op->type,
   1303                                           insn->eaBase - insn->eaRegBase,
   1304                                           &valid);
   1305       if (!valid)
   1306         return -1;
   1307     }
   1308     break;
   1309   }
   1310 
   1311   return 0;
   1312 }
   1313 
   1314 /*
   1315  * readOpcodeModifier - Reads an operand from the opcode field of an
   1316  *   instruction.  Handles AddRegFrm instructions.
   1317  *
   1318  * @param insn    - The instruction whose opcode field is to be read.
   1319  * @param inModRM - Indicates that the opcode field is to be read from the
   1320  *                  ModR/M extension; useful for escape opcodes
   1321  * @return        - 0 on success; nonzero otherwise.
   1322  */
   1323 static int readOpcodeModifier(struct InternalInstruction* insn) {
   1324   dbgprintf(insn, "readOpcodeModifier()");
   1325 
   1326   if (insn->consumedOpcodeModifier)
   1327     return 0;
   1328 
   1329   insn->consumedOpcodeModifier = TRUE;
   1330 
   1331   switch (insn->spec->modifierType) {
   1332   default:
   1333     debug("Unknown modifier type.");
   1334     return -1;
   1335   case MODIFIER_NONE:
   1336     debug("No modifier but an operand expects one.");
   1337     return -1;
   1338   case MODIFIER_OPCODE:
   1339     insn->opcodeModifier = insn->opcode - insn->spec->modifierBase;
   1340     return 0;
   1341   case MODIFIER_MODRM:
   1342     insn->opcodeModifier = insn->modRM - insn->spec->modifierBase;
   1343     return 0;
   1344   }
   1345 }
   1346 
   1347 /*
   1348  * readOpcodeRegister - Reads an operand from the opcode field of an
   1349  *   instruction and interprets it appropriately given the operand width.
   1350  *   Handles AddRegFrm instructions.
   1351  *
   1352  * @param insn  - See readOpcodeModifier().
   1353  * @param size  - The width (in bytes) of the register being specified.
   1354  *                1 means AL and friends, 2 means AX, 4 means EAX, and 8 means
   1355  *                RAX.
   1356  * @return      - 0 on success; nonzero otherwise.
   1357  */
   1358 static int readOpcodeRegister(struct InternalInstruction* insn, uint8_t size) {
   1359   dbgprintf(insn, "readOpcodeRegister()");
   1360 
   1361   if (readOpcodeModifier(insn))
   1362     return -1;
   1363 
   1364   if (size == 0)
   1365     size = insn->registerSize;
   1366 
   1367   switch (size) {
   1368   case 1:
   1369     insn->opcodeRegister = (Reg)(MODRM_REG_AL + ((bFromREX(insn->rexPrefix) << 3)
   1370                                                   | insn->opcodeModifier));
   1371     if (insn->rexPrefix &&
   1372         insn->opcodeRegister >= MODRM_REG_AL + 0x4 &&
   1373         insn->opcodeRegister < MODRM_REG_AL + 0x8) {
   1374       insn->opcodeRegister = (Reg)(MODRM_REG_SPL
   1375                                    + (insn->opcodeRegister - MODRM_REG_AL - 4));
   1376     }
   1377 
   1378     break;
   1379   case 2:
   1380     insn->opcodeRegister = (Reg)(MODRM_REG_AX
   1381                                  + ((bFromREX(insn->rexPrefix) << 3)
   1382                                     | insn->opcodeModifier));
   1383     break;
   1384   case 4:
   1385     insn->opcodeRegister = (Reg)(MODRM_REG_EAX
   1386                                  + ((bFromREX(insn->rexPrefix) << 3)
   1387                                     | insn->opcodeModifier));
   1388     break;
   1389   case 8:
   1390     insn->opcodeRegister = (Reg)(MODRM_REG_RAX
   1391                                  + ((bFromREX(insn->rexPrefix) << 3)
   1392                                     | insn->opcodeModifier));
   1393     break;
   1394   }
   1395 
   1396   return 0;
   1397 }
   1398 
   1399 /*
   1400  * readImmediate - Consumes an immediate operand from an instruction, given the
   1401  *   desired operand size.
   1402  *
   1403  * @param insn  - The instruction whose operand is to be read.
   1404  * @param size  - The width (in bytes) of the operand.
   1405  * @return      - 0 if the immediate was successfully consumed; nonzero
   1406  *                otherwise.
   1407  */
   1408 static int readImmediate(struct InternalInstruction* insn, uint8_t size) {
   1409   uint8_t imm8;
   1410   uint16_t imm16;
   1411   uint32_t imm32;
   1412   uint64_t imm64;
   1413 
   1414   dbgprintf(insn, "readImmediate()");
   1415 
   1416   if (insn->numImmediatesConsumed == 2) {
   1417     debug("Already consumed two immediates");
   1418     return -1;
   1419   }
   1420 
   1421   if (size == 0)
   1422     size = insn->immediateSize;
   1423   else
   1424     insn->immediateSize = size;
   1425   insn->immediateOffset = insn->readerCursor - insn->startLocation;
   1426 
   1427   switch (size) {
   1428   case 1:
   1429     if (consumeByte(insn, &imm8))
   1430       return -1;
   1431     insn->immediates[insn->numImmediatesConsumed] = imm8;
   1432     break;
   1433   case 2:
   1434     if (consumeUInt16(insn, &imm16))
   1435       return -1;
   1436     insn->immediates[insn->numImmediatesConsumed] = imm16;
   1437     break;
   1438   case 4:
   1439     if (consumeUInt32(insn, &imm32))
   1440       return -1;
   1441     insn->immediates[insn->numImmediatesConsumed] = imm32;
   1442     break;
   1443   case 8:
   1444     if (consumeUInt64(insn, &imm64))
   1445       return -1;
   1446     insn->immediates[insn->numImmediatesConsumed] = imm64;
   1447     break;
   1448   }
   1449 
   1450   insn->numImmediatesConsumed++;
   1451 
   1452   return 0;
   1453 }
   1454 
   1455 /*
   1456  * readVVVV - Consumes vvvv from an instruction if it has a VEX prefix.
   1457  *
   1458  * @param insn  - The instruction whose operand is to be read.
   1459  * @return      - 0 if the vvvv was successfully consumed; nonzero
   1460  *                otherwise.
   1461  */
   1462 static int readVVVV(struct InternalInstruction* insn) {
   1463   dbgprintf(insn, "readVVVV()");
   1464 
   1465   if (insn->vexSize == 3)
   1466     insn->vvvv = vvvvFromVEX3of3(insn->vexPrefix[2]);
   1467   else if (insn->vexSize == 2)
   1468     insn->vvvv = vvvvFromVEX2of2(insn->vexPrefix[1]);
   1469   else
   1470     return -1;
   1471 
   1472   if (insn->mode != MODE_64BIT)
   1473     insn->vvvv &= 0x7;
   1474 
   1475   return 0;
   1476 }
   1477 
   1478 /*
   1479  * readOperands - Consults the specifier for an instruction and consumes all
   1480  *   operands for that instruction, interpreting them as it goes.
   1481  *
   1482  * @param insn  - The instruction whose operands are to be read and interpreted.
   1483  * @return      - 0 if all operands could be read; nonzero otherwise.
   1484  */
   1485 static int readOperands(struct InternalInstruction* insn) {
   1486   int index;
   1487   int hasVVVV, needVVVV;
   1488   int sawRegImm = 0;
   1489 
   1490   dbgprintf(insn, "readOperands()");
   1491 
   1492   /* If non-zero vvvv specified, need to make sure one of the operands
   1493      uses it. */
   1494   hasVVVV = !readVVVV(insn);
   1495   needVVVV = hasVVVV && (insn->vvvv != 0);
   1496 
   1497   for (index = 0; index < X86_MAX_OPERANDS; ++index) {
   1498     switch (insn->spec->operands[index].encoding) {
   1499     case ENCODING_NONE:
   1500       break;
   1501     case ENCODING_REG:
   1502     case ENCODING_RM:
   1503       if (readModRM(insn))
   1504         return -1;
   1505       if (fixupReg(insn, &insn->spec->operands[index]))
   1506         return -1;
   1507       break;
   1508     case ENCODING_CB:
   1509     case ENCODING_CW:
   1510     case ENCODING_CD:
   1511     case ENCODING_CP:
   1512     case ENCODING_CO:
   1513     case ENCODING_CT:
   1514       dbgprintf(insn, "We currently don't hande code-offset encodings");
   1515       return -1;
   1516     case ENCODING_IB:
   1517       if (sawRegImm) {
   1518         /* Saw a register immediate so don't read again and instead split the
   1519            previous immediate.  FIXME: This is a hack. */
   1520         insn->immediates[insn->numImmediatesConsumed] =
   1521           insn->immediates[insn->numImmediatesConsumed - 1] & 0xf;
   1522         ++insn->numImmediatesConsumed;
   1523         break;
   1524       }
   1525       if (readImmediate(insn, 1))
   1526         return -1;
   1527       if (insn->spec->operands[index].type == TYPE_IMM3 &&
   1528           insn->immediates[insn->numImmediatesConsumed - 1] > 7)
   1529         return -1;
   1530       if (insn->spec->operands[index].type == TYPE_IMM5 &&
   1531           insn->immediates[insn->numImmediatesConsumed - 1] > 31)
   1532         return -1;
   1533       if (insn->spec->operands[index].type == TYPE_XMM128 ||
   1534           insn->spec->operands[index].type == TYPE_XMM256)
   1535         sawRegImm = 1;
   1536       break;
   1537     case ENCODING_IW:
   1538       if (readImmediate(insn, 2))
   1539         return -1;
   1540       break;
   1541     case ENCODING_ID:
   1542       if (readImmediate(insn, 4))
   1543         return -1;
   1544       break;
   1545     case ENCODING_IO:
   1546       if (readImmediate(insn, 8))
   1547         return -1;
   1548       break;
   1549     case ENCODING_Iv:
   1550       if (readImmediate(insn, insn->immediateSize))
   1551         return -1;
   1552       break;
   1553     case ENCODING_Ia:
   1554       if (readImmediate(insn, insn->addressSize))
   1555         return -1;
   1556       break;
   1557     case ENCODING_RB:
   1558       if (readOpcodeRegister(insn, 1))
   1559         return -1;
   1560       break;
   1561     case ENCODING_RW:
   1562       if (readOpcodeRegister(insn, 2))
   1563         return -1;
   1564       break;
   1565     case ENCODING_RD:
   1566       if (readOpcodeRegister(insn, 4))
   1567         return -1;
   1568       break;
   1569     case ENCODING_RO:
   1570       if (readOpcodeRegister(insn, 8))
   1571         return -1;
   1572       break;
   1573     case ENCODING_Rv:
   1574       if (readOpcodeRegister(insn, 0))
   1575         return -1;
   1576       break;
   1577     case ENCODING_I:
   1578       if (readOpcodeModifier(insn))
   1579         return -1;
   1580       break;
   1581     case ENCODING_VVVV:
   1582       needVVVV = 0; /* Mark that we have found a VVVV operand. */
   1583       if (!hasVVVV)
   1584         return -1;
   1585       if (fixupReg(insn, &insn->spec->operands[index]))
   1586         return -1;
   1587       break;
   1588     case ENCODING_DUP:
   1589       break;
   1590     default:
   1591       dbgprintf(insn, "Encountered an operand with an unknown encoding.");
   1592       return -1;
   1593     }
   1594   }
   1595 
   1596   /* If we didn't find ENCODING_VVVV operand, but non-zero vvvv present, fail */
   1597   if (needVVVV) return -1;
   1598 
   1599   return 0;
   1600 }
   1601 
   1602 /*
   1603  * decodeInstruction - Reads and interprets a full instruction provided by the
   1604  *   user.
   1605  *
   1606  * @param insn      - A pointer to the instruction to be populated.  Must be
   1607  *                    pre-allocated.
   1608  * @param reader    - The function to be used to read the instruction's bytes.
   1609  * @param readerArg - A generic argument to be passed to the reader to store
   1610  *                    any internal state.
   1611  * @param logger    - If non-NULL, the function to be used to write log messages
   1612  *                    and warnings.
   1613  * @param loggerArg - A generic argument to be passed to the logger to store
   1614  *                    any internal state.
   1615  * @param startLoc  - The address (in the reader's address space) of the first
   1616  *                    byte in the instruction.
   1617  * @param mode      - The mode (real mode, IA-32e, or IA-32e in 64-bit mode) to
   1618  *                    decode the instruction in.
   1619  * @return          - 0 if the instruction's memory could be read; nonzero if
   1620  *                    not.
   1621  */
   1622 int decodeInstruction(struct InternalInstruction* insn,
   1623                       byteReader_t reader,
   1624                       void* readerArg,
   1625                       dlog_t logger,
   1626                       void* loggerArg,
   1627                       void* miiArg,
   1628                       uint64_t startLoc,
   1629                       DisassemblerMode mode) {
   1630   memset(insn, 0, sizeof(struct InternalInstruction));
   1631 
   1632   insn->reader = reader;
   1633   insn->readerArg = readerArg;
   1634   insn->dlog = logger;
   1635   insn->dlogArg = loggerArg;
   1636   insn->startLocation = startLoc;
   1637   insn->readerCursor = startLoc;
   1638   insn->mode = mode;
   1639   insn->numImmediatesConsumed = 0;
   1640 
   1641   if (readPrefixes(insn)       ||
   1642       readOpcode(insn)         ||
   1643       getID(insn, miiArg)      ||
   1644       insn->instructionID == 0 ||
   1645       readOperands(insn))
   1646     return -1;
   1647 
   1648   insn->length = insn->readerCursor - insn->startLocation;
   1649 
   1650   dbgprintf(insn, "Read from 0x%llx to 0x%llx: length %zu",
   1651             startLoc, insn->readerCursor, insn->length);
   1652 
   1653   if (insn->length > 15)
   1654     dbgprintf(insn, "Instruction exceeds 15-byte limit");
   1655 
   1656   return 0;
   1657 }
   1658