Home | History | Annotate | Download | only in Disassembler
      1 /*===-- X86DisassemblerDecoder.c - Disassembler decoder ------------*- C -*-===*
      2  *
      3  *                     The LLVM Compiler Infrastructure
      4  *
      5  * This file is distributed under the University of Illinois Open Source
      6  * License. See LICENSE.TXT for details.
      7  *
      8  *===----------------------------------------------------------------------===*
      9  *
     10  * This file is part of the X86 Disassembler.
     11  * It contains the implementation of the instruction decoder.
     12  * Documentation for the disassembler can be found in X86Disassembler.h.
     13  *
     14  *===----------------------------------------------------------------------===*/
     15 
     16 #include <stdarg.h>   /* for va_*()       */
     17 #include <stdio.h>    /* for vsnprintf()  */
     18 #include <stdlib.h>   /* for exit()       */
     19 #include <string.h>   /* for memset()     */
     20 
     21 #include "X86DisassemblerDecoder.h"
     22 
     23 #include "X86GenDisassemblerTables.inc"
     24 
     25 #define TRUE  1
     26 #define FALSE 0
     27 
     28 typedef int8_t bool;
     29 
     30 #ifndef NDEBUG
     31 #define debug(s) do { x86DisassemblerDebug(__FILE__, __LINE__, s); } while (0)
     32 #else
     33 #define debug(s) do { } while (0)
     34 #endif
     35 
     36 
     37 /*
     38  * contextForAttrs - Client for the instruction context table.  Takes a set of
     39  *   attributes and returns the appropriate decode context.
     40  *
     41  * @param attrMask  - Attributes, from the enumeration attributeBits.
     42  * @return          - The InstructionContext to use when looking up an
     43  *                    an instruction with these attributes.
     44  */
     45 static InstructionContext contextForAttrs(uint8_t attrMask) {
     46   return CONTEXTS_SYM[attrMask];
     47 }
     48 
     49 /*
     50  * modRMRequired - Reads the appropriate instruction table to determine whether
     51  *   the ModR/M byte is required to decode a particular instruction.
     52  *
     53  * @param type        - The opcode type (i.e., how many bytes it has).
     54  * @param insnContext - The context for the instruction, as returned by
     55  *                      contextForAttrs.
     56  * @param opcode      - The last byte of the instruction's opcode, not counting
     57  *                      ModR/M extensions and escapes.
     58  * @return            - TRUE if the ModR/M byte is required, FALSE otherwise.
     59  */
     60 static int modRMRequired(OpcodeType type,
     61                          InstructionContext insnContext,
     62                          uint8_t opcode) {
     63   const struct ContextDecision* decision = 0;
     64 
     65   switch (type) {
     66   case ONEBYTE:
     67     decision = &ONEBYTE_SYM;
     68     break;
     69   case TWOBYTE:
     70     decision = &TWOBYTE_SYM;
     71     break;
     72   case THREEBYTE_38:
     73     decision = &THREEBYTE38_SYM;
     74     break;
     75   case THREEBYTE_3A:
     76     decision = &THREEBYTE3A_SYM;
     77     break;
     78   case THREEBYTE_A6:
     79     decision = &THREEBYTEA6_SYM;
     80     break;
     81   case THREEBYTE_A7:
     82     decision = &THREEBYTEA7_SYM;
     83     break;
     84   }
     85 
     86   return decision->opcodeDecisions[insnContext].modRMDecisions[opcode].
     87     modrm_type != MODRM_ONEENTRY;
     88 }
     89 
     90 /*
     91  * decode - Reads the appropriate instruction table to obtain the unique ID of
     92  *   an instruction.
     93  *
     94  * @param type        - See modRMRequired().
     95  * @param insnContext - See modRMRequired().
     96  * @param opcode      - See modRMRequired().
     97  * @param modRM       - The ModR/M byte if required, or any value if not.
     98  * @return            - The UID of the instruction, or 0 on failure.
     99  */
    100 static InstrUID decode(OpcodeType type,
    101                        InstructionContext insnContext,
    102                        uint8_t opcode,
    103                        uint8_t modRM) {
    104   const struct ModRMDecision* dec = 0;
    105 
    106   switch (type) {
    107   case ONEBYTE:
    108     dec = &ONEBYTE_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
    109     break;
    110   case TWOBYTE:
    111     dec = &TWOBYTE_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
    112     break;
    113   case THREEBYTE_38:
    114     dec = &THREEBYTE38_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
    115     break;
    116   case THREEBYTE_3A:
    117     dec = &THREEBYTE3A_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
    118     break;
    119   case THREEBYTE_A6:
    120     dec = &THREEBYTEA6_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
    121     break;
    122   case THREEBYTE_A7:
    123     dec = &THREEBYTEA7_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
    124     break;
    125   }
    126 
    127   switch (dec->modrm_type) {
    128   default:
    129     debug("Corrupt table!  Unknown modrm_type");
    130     return 0;
    131   case MODRM_ONEENTRY:
    132     return modRMTable[dec->instructionIDs];
    133   case MODRM_SPLITRM:
    134     if (modFromModRM(modRM) == 0x3)
    135       return modRMTable[dec->instructionIDs+1];
    136     return modRMTable[dec->instructionIDs];
    137   case MODRM_SPLITREG:
    138     if (modFromModRM(modRM) == 0x3)
    139       return modRMTable[dec->instructionIDs+((modRM & 0x38) >> 3)+8];
    140     return modRMTable[dec->instructionIDs+((modRM & 0x38) >> 3)];
    141   case MODRM_SPLITMISC:
    142     if (modFromModRM(modRM) == 0x3)
    143       return modRMTable[dec->instructionIDs+(modRM & 0x3f)+8];
    144     return modRMTable[dec->instructionIDs+((modRM & 0x38) >> 3)];
    145   case MODRM_FULL:
    146     return modRMTable[dec->instructionIDs+modRM];
    147   }
    148 }
    149 
    150 /*
    151  * specifierForUID - Given a UID, returns the name and operand specification for
    152  *   that instruction.
    153  *
    154  * @param uid - The unique ID for the instruction.  This should be returned by
    155  *              decode(); specifierForUID will not check bounds.
    156  * @return    - A pointer to the specification for that instruction.
    157  */
    158 static const struct InstructionSpecifier *specifierForUID(InstrUID uid) {
    159   return &INSTRUCTIONS_SYM[uid];
    160 }
    161 
    162 /*
    163  * consumeByte - Uses the reader function provided by the user to consume one
    164  *   byte from the instruction's memory and advance the cursor.
    165  *
    166  * @param insn  - The instruction with the reader function to use.  The cursor
    167  *                for this instruction is advanced.
    168  * @param byte  - A pointer to a pre-allocated memory buffer to be populated
    169  *                with the data read.
    170  * @return      - 0 if the read was successful; nonzero otherwise.
    171  */
    172 static int consumeByte(struct InternalInstruction* insn, uint8_t* byte) {
    173   int ret = insn->reader(insn->readerArg, byte, insn->readerCursor);
    174 
    175   if (!ret)
    176     ++(insn->readerCursor);
    177 
    178   return ret;
    179 }
    180 
    181 /*
    182  * lookAtByte - Like consumeByte, but does not advance the cursor.
    183  *
    184  * @param insn  - See consumeByte().
    185  * @param byte  - See consumeByte().
    186  * @return      - See consumeByte().
    187  */
    188 static int lookAtByte(struct InternalInstruction* insn, uint8_t* byte) {
    189   return insn->reader(insn->readerArg, byte, insn->readerCursor);
    190 }
    191 
    192 static void unconsumeByte(struct InternalInstruction* insn) {
    193   insn->readerCursor--;
    194 }
    195 
    196 #define CONSUME_FUNC(name, type)                                  \
    197   static int name(struct InternalInstruction* insn, type* ptr) {  \
    198     type combined = 0;                                            \
    199     unsigned offset;                                              \
    200     for (offset = 0; offset < sizeof(type); ++offset) {           \
    201       uint8_t byte;                                               \
    202       int ret = insn->reader(insn->readerArg,                     \
    203                              &byte,                               \
    204                              insn->readerCursor + offset);        \
    205       if (ret)                                                    \
    206         return ret;                                               \
    207       combined = combined | ((uint64_t)byte << (offset * 8));     \
    208     }                                                             \
    209     *ptr = combined;                                              \
    210     insn->readerCursor += sizeof(type);                           \
    211     return 0;                                                     \
    212   }
    213 
    214 /*
    215  * consume* - Use the reader function provided by the user to consume data
    216  *   values of various sizes from the instruction's memory and advance the
    217  *   cursor appropriately.  These readers perform endian conversion.
    218  *
    219  * @param insn    - See consumeByte().
    220  * @param ptr     - A pointer to a pre-allocated memory of appropriate size to
    221  *                  be populated with the data read.
    222  * @return        - See consumeByte().
    223  */
    224 CONSUME_FUNC(consumeInt8, int8_t)
    225 CONSUME_FUNC(consumeInt16, int16_t)
    226 CONSUME_FUNC(consumeInt32, int32_t)
    227 CONSUME_FUNC(consumeUInt16, uint16_t)
    228 CONSUME_FUNC(consumeUInt32, uint32_t)
    229 CONSUME_FUNC(consumeUInt64, uint64_t)
    230 
    231 /*
    232  * dbgprintf - Uses the logging function provided by the user to log a single
    233  *   message, typically without a carriage-return.
    234  *
    235  * @param insn    - The instruction containing the logging function.
    236  * @param format  - See printf().
    237  * @param ...     - See printf().
    238  */
    239 static void dbgprintf(struct InternalInstruction* insn,
    240                       const char* format,
    241                       ...) {
    242   char buffer[256];
    243   va_list ap;
    244 
    245   if (!insn->dlog)
    246     return;
    247 
    248   va_start(ap, format);
    249   (void)vsnprintf(buffer, sizeof(buffer), format, ap);
    250   va_end(ap);
    251 
    252   insn->dlog(insn->dlogArg, buffer);
    253 
    254   return;
    255 }
    256 
    257 /*
    258  * setPrefixPresent - Marks that a particular prefix is present at a particular
    259  *   location.
    260  *
    261  * @param insn      - The instruction to be marked as having the prefix.
    262  * @param prefix    - The prefix that is present.
    263  * @param location  - The location where the prefix is located (in the address
    264  *                    space of the instruction's reader).
    265  */
    266 static void setPrefixPresent(struct InternalInstruction* insn,
    267                                     uint8_t prefix,
    268                                     uint64_t location)
    269 {
    270   insn->prefixPresent[prefix] = 1;
    271   insn->prefixLocations[prefix] = location;
    272 }
    273 
    274 /*
    275  * isPrefixAtLocation - Queries an instruction to determine whether a prefix is
    276  *   present at a given location.
    277  *
    278  * @param insn      - The instruction to be queried.
    279  * @param prefix    - The prefix.
    280  * @param location  - The location to query.
    281  * @return          - Whether the prefix is at that location.
    282  */
    283 static BOOL isPrefixAtLocation(struct InternalInstruction* insn,
    284                                uint8_t prefix,
    285                                uint64_t location)
    286 {
    287   if (insn->prefixPresent[prefix] == 1 &&
    288      insn->prefixLocations[prefix] == location)
    289     return TRUE;
    290   else
    291     return FALSE;
    292 }
    293 
    294 /*
    295  * readPrefixes - Consumes all of an instruction's prefix bytes, and marks the
    296  *   instruction as having them.  Also sets the instruction's default operand,
    297  *   address, and other relevant data sizes to report operands correctly.
    298  *
    299  * @param insn  - The instruction whose prefixes are to be read.
    300  * @return      - 0 if the instruction could be read until the end of the prefix
    301  *                bytes, and no prefixes conflicted; nonzero otherwise.
    302  */
    303 static int readPrefixes(struct InternalInstruction* insn) {
    304   BOOL isPrefix = TRUE;
    305   BOOL prefixGroups[4] = { FALSE };
    306   uint64_t prefixLocation;
    307   uint8_t byte = 0;
    308 
    309   BOOL hasAdSize = FALSE;
    310   BOOL hasOpSize = FALSE;
    311 
    312   dbgprintf(insn, "readPrefixes()");
    313 
    314   while (isPrefix) {
    315     prefixLocation = insn->readerCursor;
    316 
    317     if (consumeByte(insn, &byte))
    318       return -1;
    319 
    320     /*
    321      * If the byte is a LOCK/REP/REPNE prefix and not a part of the opcode, then
    322      * break and let it be disassembled as a normal "instruction".
    323      */
    324     if (insn->readerCursor - 1 == insn->startLocation
    325         && (byte == 0xf0 || byte == 0xf2 || byte == 0xf3)) {
    326       uint8_t nextByte;
    327       if (byte == 0xf0)
    328         break;
    329       if (lookAtByte(insn, &nextByte))
    330         return -1;
    331       /*
    332        * If the byte is 0xf2 or 0xf3, and any of the following conditions are
    333        * met:
    334        * - it is followed by a LOCK (0xf0) prefix
    335        * - it is followed by an xchg instruction
    336        * then it should be disassembled as a xacquire/xrelease not repne/rep.
    337        */
    338       if ((byte == 0xf2 || byte == 0xf3) &&
    339           ((nextByte == 0xf0) |
    340           ((nextByte & 0xfe) == 0x86 || (nextByte & 0xf8) == 0x90)))
    341         insn->xAcquireRelease = TRUE;
    342       /*
    343        * Also if the byte is 0xf3, and the following condition is met:
    344        * - it is followed by a "mov mem, reg" (opcode 0x88/0x89) or
    345        *                       "mov mem, imm" (opcode 0xc6/0xc7) instructions.
    346        * then it should be disassembled as an xrelease not rep.
    347        */
    348       if (byte == 0xf3 &&
    349           (nextByte == 0x88 || nextByte == 0x89 ||
    350            nextByte == 0xc6 || nextByte == 0xc7))
    351         insn->xAcquireRelease = TRUE;
    352       if (insn->mode == MODE_64BIT && (nextByte & 0xf0) == 0x40) {
    353         if (consumeByte(insn, &nextByte))
    354           return -1;
    355         if (lookAtByte(insn, &nextByte))
    356           return -1;
    357         unconsumeByte(insn);
    358       }
    359       if (nextByte != 0x0f && nextByte != 0x90)
    360         break;
    361     }
    362 
    363     switch (byte) {
    364     case 0xf0:  /* LOCK */
    365     case 0xf2:  /* REPNE/REPNZ */
    366     case 0xf3:  /* REP or REPE/REPZ */
    367       if (prefixGroups[0])
    368         dbgprintf(insn, "Redundant Group 1 prefix");
    369       prefixGroups[0] = TRUE;
    370       setPrefixPresent(insn, byte, prefixLocation);
    371       break;
    372     case 0x2e:  /* CS segment override -OR- Branch not taken */
    373     case 0x36:  /* SS segment override -OR- Branch taken */
    374     case 0x3e:  /* DS segment override */
    375     case 0x26:  /* ES segment override */
    376     case 0x64:  /* FS segment override */
    377     case 0x65:  /* GS segment override */
    378       switch (byte) {
    379       case 0x2e:
    380         insn->segmentOverride = SEG_OVERRIDE_CS;
    381         break;
    382       case 0x36:
    383         insn->segmentOverride = SEG_OVERRIDE_SS;
    384         break;
    385       case 0x3e:
    386         insn->segmentOverride = SEG_OVERRIDE_DS;
    387         break;
    388       case 0x26:
    389         insn->segmentOverride = SEG_OVERRIDE_ES;
    390         break;
    391       case 0x64:
    392         insn->segmentOverride = SEG_OVERRIDE_FS;
    393         break;
    394       case 0x65:
    395         insn->segmentOverride = SEG_OVERRIDE_GS;
    396         break;
    397       default:
    398         debug("Unhandled override");
    399         return -1;
    400       }
    401       if (prefixGroups[1])
    402         dbgprintf(insn, "Redundant Group 2 prefix");
    403       prefixGroups[1] = TRUE;
    404       setPrefixPresent(insn, byte, prefixLocation);
    405       break;
    406     case 0x66:  /* Operand-size override */
    407       if (prefixGroups[2])
    408         dbgprintf(insn, "Redundant Group 3 prefix");
    409       prefixGroups[2] = TRUE;
    410       hasOpSize = TRUE;
    411       setPrefixPresent(insn, byte, prefixLocation);
    412       break;
    413     case 0x67:  /* Address-size override */
    414       if (prefixGroups[3])
    415         dbgprintf(insn, "Redundant Group 4 prefix");
    416       prefixGroups[3] = TRUE;
    417       hasAdSize = TRUE;
    418       setPrefixPresent(insn, byte, prefixLocation);
    419       break;
    420     default:    /* Not a prefix byte */
    421       isPrefix = FALSE;
    422       break;
    423     }
    424 
    425     if (isPrefix)
    426       dbgprintf(insn, "Found prefix 0x%hhx", byte);
    427   }
    428 
    429   insn->vexSize = 0;
    430 
    431   if (byte == 0xc4) {
    432     uint8_t byte1;
    433 
    434     if (lookAtByte(insn, &byte1)) {
    435       dbgprintf(insn, "Couldn't read second byte of VEX");
    436       return -1;
    437     }
    438 
    439     if (insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0) {
    440       insn->vexSize = 3;
    441       insn->necessaryPrefixLocation = insn->readerCursor - 1;
    442     }
    443     else {
    444       unconsumeByte(insn);
    445       insn->necessaryPrefixLocation = insn->readerCursor - 1;
    446     }
    447 
    448     if (insn->vexSize == 3) {
    449       insn->vexPrefix[0] = byte;
    450       consumeByte(insn, &insn->vexPrefix[1]);
    451       consumeByte(insn, &insn->vexPrefix[2]);
    452 
    453       /* We simulate the REX prefix for simplicity's sake */
    454 
    455       if (insn->mode == MODE_64BIT) {
    456         insn->rexPrefix = 0x40
    457                         | (wFromVEX3of3(insn->vexPrefix[2]) << 3)
    458                         | (rFromVEX2of3(insn->vexPrefix[1]) << 2)
    459                         | (xFromVEX2of3(insn->vexPrefix[1]) << 1)
    460                         | (bFromVEX2of3(insn->vexPrefix[1]) << 0);
    461       }
    462 
    463       switch (ppFromVEX3of3(insn->vexPrefix[2]))
    464       {
    465       default:
    466         break;
    467       case VEX_PREFIX_66:
    468         hasOpSize = TRUE;
    469         break;
    470       }
    471 
    472       dbgprintf(insn, "Found VEX prefix 0x%hhx 0x%hhx 0x%hhx", insn->vexPrefix[0], insn->vexPrefix[1], insn->vexPrefix[2]);
    473     }
    474   }
    475   else if (byte == 0xc5) {
    476     uint8_t byte1;
    477 
    478     if (lookAtByte(insn, &byte1)) {
    479       dbgprintf(insn, "Couldn't read second byte of VEX");
    480       return -1;
    481     }
    482 
    483     if (insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0) {
    484       insn->vexSize = 2;
    485     }
    486     else {
    487       unconsumeByte(insn);
    488     }
    489 
    490     if (insn->vexSize == 2) {
    491       insn->vexPrefix[0] = byte;
    492       consumeByte(insn, &insn->vexPrefix[1]);
    493 
    494       if (insn->mode == MODE_64BIT) {
    495         insn->rexPrefix = 0x40
    496                         | (rFromVEX2of2(insn->vexPrefix[1]) << 2);
    497       }
    498 
    499       switch (ppFromVEX2of2(insn->vexPrefix[1]))
    500       {
    501       default:
    502         break;
    503       case VEX_PREFIX_66:
    504         hasOpSize = TRUE;
    505         break;
    506       }
    507 
    508       dbgprintf(insn, "Found VEX prefix 0x%hhx 0x%hhx", insn->vexPrefix[0], insn->vexPrefix[1]);
    509     }
    510   }
    511   else {
    512     if (insn->mode == MODE_64BIT) {
    513       if ((byte & 0xf0) == 0x40) {
    514         uint8_t opcodeByte;
    515 
    516         if (lookAtByte(insn, &opcodeByte) || ((opcodeByte & 0xf0) == 0x40)) {
    517           dbgprintf(insn, "Redundant REX prefix");
    518           return -1;
    519         }
    520 
    521         insn->rexPrefix = byte;
    522         insn->necessaryPrefixLocation = insn->readerCursor - 2;
    523 
    524         dbgprintf(insn, "Found REX prefix 0x%hhx", byte);
    525       } else {
    526         unconsumeByte(insn);
    527         insn->necessaryPrefixLocation = insn->readerCursor - 1;
    528       }
    529     } else {
    530       unconsumeByte(insn);
    531       insn->necessaryPrefixLocation = insn->readerCursor - 1;
    532     }
    533   }
    534 
    535   if (insn->mode == MODE_16BIT) {
    536     insn->registerSize       = (hasOpSize ? 4 : 2);
    537     insn->addressSize        = (hasAdSize ? 4 : 2);
    538     insn->displacementSize   = (hasAdSize ? 4 : 2);
    539     insn->immediateSize      = (hasOpSize ? 4 : 2);
    540   } else if (insn->mode == MODE_32BIT) {
    541     insn->registerSize       = (hasOpSize ? 2 : 4);
    542     insn->addressSize        = (hasAdSize ? 2 : 4);
    543     insn->displacementSize   = (hasAdSize ? 2 : 4);
    544     insn->immediateSize      = (hasOpSize ? 2 : 4);
    545   } else if (insn->mode == MODE_64BIT) {
    546     if (insn->rexPrefix && wFromREX(insn->rexPrefix)) {
    547       insn->registerSize       = 8;
    548       insn->addressSize        = (hasAdSize ? 4 : 8);
    549       insn->displacementSize   = 4;
    550       insn->immediateSize      = 4;
    551     } else if (insn->rexPrefix) {
    552       insn->registerSize       = (hasOpSize ? 2 : 4);
    553       insn->addressSize        = (hasAdSize ? 4 : 8);
    554       insn->displacementSize   = (hasOpSize ? 2 : 4);
    555       insn->immediateSize      = (hasOpSize ? 2 : 4);
    556     } else {
    557       insn->registerSize       = (hasOpSize ? 2 : 4);
    558       insn->addressSize        = (hasAdSize ? 4 : 8);
    559       insn->displacementSize   = (hasOpSize ? 2 : 4);
    560       insn->immediateSize      = (hasOpSize ? 2 : 4);
    561     }
    562   }
    563 
    564   return 0;
    565 }
    566 
    567 /*
    568  * readOpcode - Reads the opcode (excepting the ModR/M byte in the case of
    569  *   extended or escape opcodes).
    570  *
    571  * @param insn  - The instruction whose opcode is to be read.
    572  * @return      - 0 if the opcode could be read successfully; nonzero otherwise.
    573  */
    574 static int readOpcode(struct InternalInstruction* insn) {
    575   /* Determine the length of the primary opcode */
    576 
    577   uint8_t current;
    578 
    579   dbgprintf(insn, "readOpcode()");
    580 
    581   insn->opcodeType = ONEBYTE;
    582 
    583   if (insn->vexSize == 3)
    584   {
    585     switch (mmmmmFromVEX2of3(insn->vexPrefix[1]))
    586     {
    587     default:
    588       dbgprintf(insn, "Unhandled m-mmmm field for instruction (0x%hhx)", mmmmmFromVEX2of3(insn->vexPrefix[1]));
    589       return -1;
    590     case 0:
    591       break;
    592     case VEX_LOB_0F:
    593       insn->twoByteEscape = 0x0f;
    594       insn->opcodeType = TWOBYTE;
    595       return consumeByte(insn, &insn->opcode);
    596     case VEX_LOB_0F38:
    597       insn->twoByteEscape = 0x0f;
    598       insn->threeByteEscape = 0x38;
    599       insn->opcodeType = THREEBYTE_38;
    600       return consumeByte(insn, &insn->opcode);
    601     case VEX_LOB_0F3A:
    602       insn->twoByteEscape = 0x0f;
    603       insn->threeByteEscape = 0x3a;
    604       insn->opcodeType = THREEBYTE_3A;
    605       return consumeByte(insn, &insn->opcode);
    606     }
    607   }
    608   else if (insn->vexSize == 2)
    609   {
    610     insn->twoByteEscape = 0x0f;
    611     insn->opcodeType = TWOBYTE;
    612     return consumeByte(insn, &insn->opcode);
    613   }
    614 
    615   if (consumeByte(insn, &current))
    616     return -1;
    617 
    618   if (current == 0x0f) {
    619     dbgprintf(insn, "Found a two-byte escape prefix (0x%hhx)", current);
    620 
    621     insn->twoByteEscape = current;
    622 
    623     if (consumeByte(insn, &current))
    624       return -1;
    625 
    626     if (current == 0x38) {
    627       dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current);
    628 
    629       insn->threeByteEscape = current;
    630 
    631       if (consumeByte(insn, &current))
    632         return -1;
    633 
    634       insn->opcodeType = THREEBYTE_38;
    635     } else if (current == 0x3a) {
    636       dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current);
    637 
    638       insn->threeByteEscape = current;
    639 
    640       if (consumeByte(insn, &current))
    641         return -1;
    642 
    643       insn->opcodeType = THREEBYTE_3A;
    644     } else if (current == 0xa6) {
    645       dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current);
    646 
    647       insn->threeByteEscape = current;
    648 
    649       if (consumeByte(insn, &current))
    650         return -1;
    651 
    652       insn->opcodeType = THREEBYTE_A6;
    653     } else if (current == 0xa7) {
    654       dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current);
    655 
    656       insn->threeByteEscape = current;
    657 
    658       if (consumeByte(insn, &current))
    659         return -1;
    660 
    661       insn->opcodeType = THREEBYTE_A7;
    662     } else {
    663       dbgprintf(insn, "Didn't find a three-byte escape prefix");
    664 
    665       insn->opcodeType = TWOBYTE;
    666     }
    667   }
    668 
    669   /*
    670    * At this point we have consumed the full opcode.
    671    * Anything we consume from here on must be unconsumed.
    672    */
    673 
    674   insn->opcode = current;
    675 
    676   return 0;
    677 }
    678 
    679 static int readModRM(struct InternalInstruction* insn);
    680 
    681 /*
    682  * getIDWithAttrMask - Determines the ID of an instruction, consuming
    683  *   the ModR/M byte as appropriate for extended and escape opcodes,
    684  *   and using a supplied attribute mask.
    685  *
    686  * @param instructionID - A pointer whose target is filled in with the ID of the
    687  *                        instruction.
    688  * @param insn          - The instruction whose ID is to be determined.
    689  * @param attrMask      - The attribute mask to search.
    690  * @return              - 0 if the ModR/M could be read when needed or was not
    691  *                        needed; nonzero otherwise.
    692  */
    693 static int getIDWithAttrMask(uint16_t* instructionID,
    694                              struct InternalInstruction* insn,
    695                              uint8_t attrMask) {
    696   BOOL hasModRMExtension;
    697 
    698   uint8_t instructionClass;
    699 
    700   instructionClass = contextForAttrs(attrMask);
    701 
    702   hasModRMExtension = modRMRequired(insn->opcodeType,
    703                                     instructionClass,
    704                                     insn->opcode);
    705 
    706   if (hasModRMExtension) {
    707     if (readModRM(insn))
    708       return -1;
    709 
    710     *instructionID = decode(insn->opcodeType,
    711                             instructionClass,
    712                             insn->opcode,
    713                             insn->modRM);
    714   } else {
    715     *instructionID = decode(insn->opcodeType,
    716                             instructionClass,
    717                             insn->opcode,
    718                             0);
    719   }
    720 
    721   return 0;
    722 }
    723 
    724 /*
    725  * is16BitEquivalent - Determines whether two instruction names refer to
    726  * equivalent instructions but one is 16-bit whereas the other is not.
    727  *
    728  * @param orig  - The instruction that is not 16-bit
    729  * @param equiv - The instruction that is 16-bit
    730  */
    731 static BOOL is16BitEquivalent(const char* orig, const char* equiv) {
    732   off_t i;
    733 
    734   for (i = 0;; i++) {
    735     if (orig[i] == '\0' && equiv[i] == '\0')
    736       return TRUE;
    737     if (orig[i] == '\0' || equiv[i] == '\0')
    738       return FALSE;
    739     if (orig[i] != equiv[i]) {
    740       if ((orig[i] == 'Q' || orig[i] == 'L') && equiv[i] == 'W')
    741         continue;
    742       if ((orig[i] == '6' || orig[i] == '3') && equiv[i] == '1')
    743         continue;
    744       if ((orig[i] == '4' || orig[i] == '2') && equiv[i] == '6')
    745         continue;
    746       return FALSE;
    747     }
    748   }
    749 }
    750 
    751 /*
    752  * getID - Determines the ID of an instruction, consuming the ModR/M byte as
    753  *   appropriate for extended and escape opcodes.  Determines the attributes and
    754  *   context for the instruction before doing so.
    755  *
    756  * @param insn  - The instruction whose ID is to be determined.
    757  * @return      - 0 if the ModR/M could be read when needed or was not needed;
    758  *                nonzero otherwise.
    759  */
    760 static int getID(struct InternalInstruction* insn, const void *miiArg) {
    761   uint8_t attrMask;
    762   uint16_t instructionID;
    763 
    764   dbgprintf(insn, "getID()");
    765 
    766   attrMask = ATTR_NONE;
    767 
    768   if (insn->mode == MODE_64BIT)
    769     attrMask |= ATTR_64BIT;
    770 
    771   if (insn->vexSize) {
    772     attrMask |= ATTR_VEX;
    773 
    774     if (insn->vexSize == 3) {
    775       switch (ppFromVEX3of3(insn->vexPrefix[2])) {
    776       case VEX_PREFIX_66:
    777         attrMask |= ATTR_OPSIZE;
    778         break;
    779       case VEX_PREFIX_F3:
    780         attrMask |= ATTR_XS;
    781         break;
    782       case VEX_PREFIX_F2:
    783         attrMask |= ATTR_XD;
    784         break;
    785       }
    786 
    787       if (lFromVEX3of3(insn->vexPrefix[2]))
    788         attrMask |= ATTR_VEXL;
    789     }
    790     else if (insn->vexSize == 2) {
    791       switch (ppFromVEX2of2(insn->vexPrefix[1])) {
    792       case VEX_PREFIX_66:
    793         attrMask |= ATTR_OPSIZE;
    794         break;
    795       case VEX_PREFIX_F3:
    796         attrMask |= ATTR_XS;
    797         break;
    798       case VEX_PREFIX_F2:
    799         attrMask |= ATTR_XD;
    800         break;
    801       }
    802 
    803       if (lFromVEX2of2(insn->vexPrefix[1]))
    804         attrMask |= ATTR_VEXL;
    805     }
    806     else {
    807       return -1;
    808     }
    809   }
    810   else {
    811     if (isPrefixAtLocation(insn, 0x66, insn->necessaryPrefixLocation))
    812       attrMask |= ATTR_OPSIZE;
    813     else if (isPrefixAtLocation(insn, 0x67, insn->necessaryPrefixLocation))
    814       attrMask |= ATTR_ADSIZE;
    815     else if (isPrefixAtLocation(insn, 0xf3, insn->necessaryPrefixLocation))
    816       attrMask |= ATTR_XS;
    817     else if (isPrefixAtLocation(insn, 0xf2, insn->necessaryPrefixLocation))
    818       attrMask |= ATTR_XD;
    819   }
    820 
    821   if (insn->rexPrefix & 0x08)
    822     attrMask |= ATTR_REXW;
    823 
    824   if (getIDWithAttrMask(&instructionID, insn, attrMask))
    825     return -1;
    826 
    827   /* The following clauses compensate for limitations of the tables. */
    828 
    829   if ((attrMask & ATTR_VEXL) && (attrMask & ATTR_REXW) &&
    830       !(attrMask & ATTR_OPSIZE)) {
    831     /*
    832      * Some VEX instructions ignore the L-bit, but use the W-bit. Normally L-bit
    833      * has precedence since there are no L-bit with W-bit entries in the tables.
    834      * So if the L-bit isn't significant we should use the W-bit instead.
    835      * We only need to do this if the instruction doesn't specify OpSize since
    836      * there is a VEX_L_W_OPSIZE table.
    837      */
    838 
    839     const struct InstructionSpecifier *spec;
    840     uint16_t instructionIDWithWBit;
    841     const struct InstructionSpecifier *specWithWBit;
    842 
    843     spec = specifierForUID(instructionID);
    844 
    845     if (getIDWithAttrMask(&instructionIDWithWBit,
    846                           insn,
    847                           (attrMask & (~ATTR_VEXL)) | ATTR_REXW)) {
    848       insn->instructionID = instructionID;
    849       insn->spec = spec;
    850       return 0;
    851     }
    852 
    853     specWithWBit = specifierForUID(instructionIDWithWBit);
    854 
    855     if (instructionID != instructionIDWithWBit) {
    856       insn->instructionID = instructionIDWithWBit;
    857       insn->spec = specWithWBit;
    858     } else {
    859       insn->instructionID = instructionID;
    860       insn->spec = spec;
    861     }
    862     return 0;
    863   }
    864 
    865   if (insn->prefixPresent[0x66] && !(attrMask & ATTR_OPSIZE)) {
    866     /*
    867      * The instruction tables make no distinction between instructions that
    868      * allow OpSize anywhere (i.e., 16-bit operations) and that need it in a
    869      * particular spot (i.e., many MMX operations).  In general we're
    870      * conservative, but in the specific case where OpSize is present but not
    871      * in the right place we check if there's a 16-bit operation.
    872      */
    873 
    874     const struct InstructionSpecifier *spec;
    875     uint16_t instructionIDWithOpsize;
    876     const char *specName, *specWithOpSizeName;
    877 
    878     spec = specifierForUID(instructionID);
    879 
    880     if (getIDWithAttrMask(&instructionIDWithOpsize,
    881                           insn,
    882                           attrMask | ATTR_OPSIZE)) {
    883       /*
    884        * ModRM required with OpSize but not present; give up and return version
    885        * without OpSize set
    886        */
    887 
    888       insn->instructionID = instructionID;
    889       insn->spec = spec;
    890       return 0;
    891     }
    892 
    893     specName = x86DisassemblerGetInstrName(instructionID, miiArg);
    894     specWithOpSizeName =
    895       x86DisassemblerGetInstrName(instructionIDWithOpsize, miiArg);
    896 
    897     if (is16BitEquivalent(specName, specWithOpSizeName)) {
    898       insn->instructionID = instructionIDWithOpsize;
    899       insn->spec = specifierForUID(instructionIDWithOpsize);
    900     } else {
    901       insn->instructionID = instructionID;
    902       insn->spec = spec;
    903     }
    904     return 0;
    905   }
    906 
    907   if (insn->opcodeType == ONEBYTE && insn->opcode == 0x90 &&
    908       insn->rexPrefix & 0x01) {
    909     /*
    910      * NOOP shouldn't decode as NOOP if REX.b is set. Instead
    911      * it should decode as XCHG %r8, %eax.
    912      */
    913 
    914     const struct InstructionSpecifier *spec;
    915     uint16_t instructionIDWithNewOpcode;
    916     const struct InstructionSpecifier *specWithNewOpcode;
    917 
    918     spec = specifierForUID(instructionID);
    919 
    920     /* Borrow opcode from one of the other XCHGar opcodes */
    921     insn->opcode = 0x91;
    922 
    923     if (getIDWithAttrMask(&instructionIDWithNewOpcode,
    924                           insn,
    925                           attrMask)) {
    926       insn->opcode = 0x90;
    927 
    928       insn->instructionID = instructionID;
    929       insn->spec = spec;
    930       return 0;
    931     }
    932 
    933     specWithNewOpcode = specifierForUID(instructionIDWithNewOpcode);
    934 
    935     /* Change back */
    936     insn->opcode = 0x90;
    937 
    938     insn->instructionID = instructionIDWithNewOpcode;
    939     insn->spec = specWithNewOpcode;
    940 
    941     return 0;
    942   }
    943 
    944   insn->instructionID = instructionID;
    945   insn->spec = specifierForUID(insn->instructionID);
    946 
    947   return 0;
    948 }
    949 
    950 /*
    951  * readSIB - Consumes the SIB byte to determine addressing information for an
    952  *   instruction.
    953  *
    954  * @param insn  - The instruction whose SIB byte is to be read.
    955  * @return      - 0 if the SIB byte was successfully read; nonzero otherwise.
    956  */
    957 static int readSIB(struct InternalInstruction* insn) {
    958   SIBIndex sibIndexBase = 0;
    959   SIBBase sibBaseBase = 0;
    960   uint8_t index, base;
    961 
    962   dbgprintf(insn, "readSIB()");
    963 
    964   if (insn->consumedSIB)
    965     return 0;
    966 
    967   insn->consumedSIB = TRUE;
    968 
    969   switch (insn->addressSize) {
    970   case 2:
    971     dbgprintf(insn, "SIB-based addressing doesn't work in 16-bit mode");
    972     return -1;
    973     break;
    974   case 4:
    975     sibIndexBase = SIB_INDEX_EAX;
    976     sibBaseBase = SIB_BASE_EAX;
    977     break;
    978   case 8:
    979     sibIndexBase = SIB_INDEX_RAX;
    980     sibBaseBase = SIB_BASE_RAX;
    981     break;
    982   }
    983 
    984   if (consumeByte(insn, &insn->sib))
    985     return -1;
    986 
    987   index = indexFromSIB(insn->sib) | (xFromREX(insn->rexPrefix) << 3);
    988 
    989   switch (index) {
    990   case 0x4:
    991     insn->sibIndex = SIB_INDEX_NONE;
    992     break;
    993   default:
    994     insn->sibIndex = (SIBIndex)(sibIndexBase + index);
    995     if (insn->sibIndex == SIB_INDEX_sib ||
    996         insn->sibIndex == SIB_INDEX_sib64)
    997       insn->sibIndex = SIB_INDEX_NONE;
    998     break;
    999   }
   1000 
   1001   switch (scaleFromSIB(insn->sib)) {
   1002   case 0:
   1003     insn->sibScale = 1;
   1004     break;
   1005   case 1:
   1006     insn->sibScale = 2;
   1007     break;
   1008   case 2:
   1009     insn->sibScale = 4;
   1010     break;
   1011   case 3:
   1012     insn->sibScale = 8;
   1013     break;
   1014   }
   1015 
   1016   base = baseFromSIB(insn->sib) | (bFromREX(insn->rexPrefix) << 3);
   1017 
   1018   switch (base) {
   1019   case 0x5:
   1020     switch (modFromModRM(insn->modRM)) {
   1021     case 0x0:
   1022       insn->eaDisplacement = EA_DISP_32;
   1023       insn->sibBase = SIB_BASE_NONE;
   1024       break;
   1025     case 0x1:
   1026       insn->eaDisplacement = EA_DISP_8;
   1027       insn->sibBase = (insn->addressSize == 4 ?
   1028                        SIB_BASE_EBP : SIB_BASE_RBP);
   1029       break;
   1030     case 0x2:
   1031       insn->eaDisplacement = EA_DISP_32;
   1032       insn->sibBase = (insn->addressSize == 4 ?
   1033                        SIB_BASE_EBP : SIB_BASE_RBP);
   1034       break;
   1035     case 0x3:
   1036       debug("Cannot have Mod = 0b11 and a SIB byte");
   1037       return -1;
   1038     }
   1039     break;
   1040   default:
   1041     insn->sibBase = (SIBBase)(sibBaseBase + base);
   1042     break;
   1043   }
   1044 
   1045   return 0;
   1046 }
   1047 
   1048 /*
   1049  * readDisplacement - Consumes the displacement of an instruction.
   1050  *
   1051  * @param insn  - The instruction whose displacement is to be read.
   1052  * @return      - 0 if the displacement byte was successfully read; nonzero
   1053  *                otherwise.
   1054  */
   1055 static int readDisplacement(struct InternalInstruction* insn) {
   1056   int8_t d8;
   1057   int16_t d16;
   1058   int32_t d32;
   1059 
   1060   dbgprintf(insn, "readDisplacement()");
   1061 
   1062   if (insn->consumedDisplacement)
   1063     return 0;
   1064 
   1065   insn->consumedDisplacement = TRUE;
   1066   insn->displacementOffset = insn->readerCursor - insn->startLocation;
   1067 
   1068   switch (insn->eaDisplacement) {
   1069   case EA_DISP_NONE:
   1070     insn->consumedDisplacement = FALSE;
   1071     break;
   1072   case EA_DISP_8:
   1073     if (consumeInt8(insn, &d8))
   1074       return -1;
   1075     insn->displacement = d8;
   1076     break;
   1077   case EA_DISP_16:
   1078     if (consumeInt16(insn, &d16))
   1079       return -1;
   1080     insn->displacement = d16;
   1081     break;
   1082   case EA_DISP_32:
   1083     if (consumeInt32(insn, &d32))
   1084       return -1;
   1085     insn->displacement = d32;
   1086     break;
   1087   }
   1088 
   1089   insn->consumedDisplacement = TRUE;
   1090   return 0;
   1091 }
   1092 
   1093 /*
   1094  * readModRM - Consumes all addressing information (ModR/M byte, SIB byte, and
   1095  *   displacement) for an instruction and interprets it.
   1096  *
   1097  * @param insn  - The instruction whose addressing information is to be read.
   1098  * @return      - 0 if the information was successfully read; nonzero otherwise.
   1099  */
   1100 static int readModRM(struct InternalInstruction* insn) {
   1101   uint8_t mod, rm, reg;
   1102 
   1103   dbgprintf(insn, "readModRM()");
   1104 
   1105   if (insn->consumedModRM)
   1106     return 0;
   1107 
   1108   if (consumeByte(insn, &insn->modRM))
   1109     return -1;
   1110   insn->consumedModRM = TRUE;
   1111 
   1112   mod     = modFromModRM(insn->modRM);
   1113   rm      = rmFromModRM(insn->modRM);
   1114   reg     = regFromModRM(insn->modRM);
   1115 
   1116   /*
   1117    * This goes by insn->registerSize to pick the correct register, which messes
   1118    * up if we're using (say) XMM or 8-bit register operands.  That gets fixed in
   1119    * fixupReg().
   1120    */
   1121   switch (insn->registerSize) {
   1122   case 2:
   1123     insn->regBase = MODRM_REG_AX;
   1124     insn->eaRegBase = EA_REG_AX;
   1125     break;
   1126   case 4:
   1127     insn->regBase = MODRM_REG_EAX;
   1128     insn->eaRegBase = EA_REG_EAX;
   1129     break;
   1130   case 8:
   1131     insn->regBase = MODRM_REG_RAX;
   1132     insn->eaRegBase = EA_REG_RAX;
   1133     break;
   1134   }
   1135 
   1136   reg |= rFromREX(insn->rexPrefix) << 3;
   1137   rm  |= bFromREX(insn->rexPrefix) << 3;
   1138 
   1139   insn->reg = (Reg)(insn->regBase + reg);
   1140 
   1141   switch (insn->addressSize) {
   1142   case 2:
   1143     insn->eaBaseBase = EA_BASE_BX_SI;
   1144 
   1145     switch (mod) {
   1146     case 0x0:
   1147       if (rm == 0x6) {
   1148         insn->eaBase = EA_BASE_NONE;
   1149         insn->eaDisplacement = EA_DISP_16;
   1150         if (readDisplacement(insn))
   1151           return -1;
   1152       } else {
   1153         insn->eaBase = (EABase)(insn->eaBaseBase + rm);
   1154         insn->eaDisplacement = EA_DISP_NONE;
   1155       }
   1156       break;
   1157     case 0x1:
   1158       insn->eaBase = (EABase)(insn->eaBaseBase + rm);
   1159       insn->eaDisplacement = EA_DISP_8;
   1160       if (readDisplacement(insn))
   1161         return -1;
   1162       break;
   1163     case 0x2:
   1164       insn->eaBase = (EABase)(insn->eaBaseBase + rm);
   1165       insn->eaDisplacement = EA_DISP_16;
   1166       if (readDisplacement(insn))
   1167         return -1;
   1168       break;
   1169     case 0x3:
   1170       insn->eaBase = (EABase)(insn->eaRegBase + rm);
   1171       if (readDisplacement(insn))
   1172         return -1;
   1173       break;
   1174     }
   1175     break;
   1176   case 4:
   1177   case 8:
   1178     insn->eaBaseBase = (insn->addressSize == 4 ? EA_BASE_EAX : EA_BASE_RAX);
   1179 
   1180     switch (mod) {
   1181     case 0x0:
   1182       insn->eaDisplacement = EA_DISP_NONE; /* readSIB may override this */
   1183       switch (rm) {
   1184       case 0x4:
   1185       case 0xc:   /* in case REXW.b is set */
   1186         insn->eaBase = (insn->addressSize == 4 ?
   1187                         EA_BASE_sib : EA_BASE_sib64);
   1188         readSIB(insn);
   1189         if (readDisplacement(insn))
   1190           return -1;
   1191         break;
   1192       case 0x5:
   1193         insn->eaBase = EA_BASE_NONE;
   1194         insn->eaDisplacement = EA_DISP_32;
   1195         if (readDisplacement(insn))
   1196           return -1;
   1197         break;
   1198       default:
   1199         insn->eaBase = (EABase)(insn->eaBaseBase + rm);
   1200         break;
   1201       }
   1202       break;
   1203     case 0x1:
   1204     case 0x2:
   1205       insn->eaDisplacement = (mod == 0x1 ? EA_DISP_8 : EA_DISP_32);
   1206       switch (rm) {
   1207       case 0x4:
   1208       case 0xc:   /* in case REXW.b is set */
   1209         insn->eaBase = EA_BASE_sib;
   1210         readSIB(insn);
   1211         if (readDisplacement(insn))
   1212           return -1;
   1213         break;
   1214       default:
   1215         insn->eaBase = (EABase)(insn->eaBaseBase + rm);
   1216         if (readDisplacement(insn))
   1217           return -1;
   1218         break;
   1219       }
   1220       break;
   1221     case 0x3:
   1222       insn->eaDisplacement = EA_DISP_NONE;
   1223       insn->eaBase = (EABase)(insn->eaRegBase + rm);
   1224       break;
   1225     }
   1226     break;
   1227   } /* switch (insn->addressSize) */
   1228 
   1229   return 0;
   1230 }
   1231 
   1232 #define GENERIC_FIXUP_FUNC(name, base, prefix)            \
   1233   static uint8_t name(struct InternalInstruction *insn,   \
   1234                       OperandType type,                   \
   1235                       uint8_t index,                      \
   1236                       uint8_t *valid) {                   \
   1237     *valid = 1;                                           \
   1238     switch (type) {                                       \
   1239     default:                                              \
   1240       debug("Unhandled register type");                   \
   1241       *valid = 0;                                         \
   1242       return 0;                                           \
   1243     case TYPE_Rv:                                         \
   1244       return base + index;                                \
   1245     case TYPE_R8:                                         \
   1246       if (insn->rexPrefix &&                              \
   1247          index >= 4 && index <= 7) {                      \
   1248         return prefix##_SPL + (index - 4);                \
   1249       } else {                                            \
   1250         return prefix##_AL + index;                       \
   1251       }                                                   \
   1252     case TYPE_R16:                                        \
   1253       return prefix##_AX + index;                         \
   1254     case TYPE_R32:                                        \
   1255       return prefix##_EAX + index;                        \
   1256     case TYPE_R64:                                        \
   1257       return prefix##_RAX + index;                        \
   1258     case TYPE_XMM512:                                     \
   1259       return prefix##_ZMM0 + index;                       \
   1260     case TYPE_XMM256:                                     \
   1261       return prefix##_YMM0 + index;                       \
   1262     case TYPE_XMM128:                                     \
   1263     case TYPE_XMM64:                                      \
   1264     case TYPE_XMM32:                                      \
   1265     case TYPE_XMM:                                        \
   1266       return prefix##_XMM0 + index;                       \
   1267     case TYPE_MM64:                                       \
   1268     case TYPE_MM32:                                       \
   1269     case TYPE_MM:                                         \
   1270       if (index > 7)                                      \
   1271         *valid = 0;                                       \
   1272       return prefix##_MM0 + index;                        \
   1273     case TYPE_SEGMENTREG:                                 \
   1274       if (index > 5)                                      \
   1275         *valid = 0;                                       \
   1276       return prefix##_ES + index;                         \
   1277     case TYPE_DEBUGREG:                                   \
   1278       if (index > 7)                                      \
   1279         *valid = 0;                                       \
   1280       return prefix##_DR0 + index;                        \
   1281     case TYPE_CONTROLREG:                                 \
   1282       if (index > 8)                                      \
   1283         *valid = 0;                                       \
   1284       return prefix##_CR0 + index;                        \
   1285     }                                                     \
   1286   }
   1287 
   1288 /*
   1289  * fixup*Value - Consults an operand type to determine the meaning of the
   1290  *   reg or R/M field.  If the operand is an XMM operand, for example, an
   1291  *   operand would be XMM0 instead of AX, which readModRM() would otherwise
   1292  *   misinterpret it as.
   1293  *
   1294  * @param insn  - The instruction containing the operand.
   1295  * @param type  - The operand type.
   1296  * @param index - The existing value of the field as reported by readModRM().
   1297  * @param valid - The address of a uint8_t.  The target is set to 1 if the
   1298  *                field is valid for the register class; 0 if not.
   1299  * @return      - The proper value.
   1300  */
   1301 GENERIC_FIXUP_FUNC(fixupRegValue, insn->regBase,    MODRM_REG)
   1302 GENERIC_FIXUP_FUNC(fixupRMValue,  insn->eaRegBase,  EA_REG)
   1303 
   1304 /*
   1305  * fixupReg - Consults an operand specifier to determine which of the
   1306  *   fixup*Value functions to use in correcting readModRM()'ss interpretation.
   1307  *
   1308  * @param insn  - See fixup*Value().
   1309  * @param op    - The operand specifier.
   1310  * @return      - 0 if fixup was successful; -1 if the register returned was
   1311  *                invalid for its class.
   1312  */
   1313 static int fixupReg(struct InternalInstruction *insn,
   1314                     const struct OperandSpecifier *op) {
   1315   uint8_t valid;
   1316 
   1317   dbgprintf(insn, "fixupReg()");
   1318 
   1319   switch ((OperandEncoding)op->encoding) {
   1320   default:
   1321     debug("Expected a REG or R/M encoding in fixupReg");
   1322     return -1;
   1323   case ENCODING_VVVV:
   1324     insn->vvvv = (Reg)fixupRegValue(insn,
   1325                                     (OperandType)op->type,
   1326                                     insn->vvvv,
   1327                                     &valid);
   1328     if (!valid)
   1329       return -1;
   1330     break;
   1331   case ENCODING_REG:
   1332     insn->reg = (Reg)fixupRegValue(insn,
   1333                                    (OperandType)op->type,
   1334                                    insn->reg - insn->regBase,
   1335                                    &valid);
   1336     if (!valid)
   1337       return -1;
   1338     break;
   1339   case ENCODING_RM:
   1340     if (insn->eaBase >= insn->eaRegBase) {
   1341       insn->eaBase = (EABase)fixupRMValue(insn,
   1342                                           (OperandType)op->type,
   1343                                           insn->eaBase - insn->eaRegBase,
   1344                                           &valid);
   1345       if (!valid)
   1346         return -1;
   1347     }
   1348     break;
   1349   }
   1350 
   1351   return 0;
   1352 }
   1353 
   1354 /*
   1355  * readOpcodeModifier - Reads an operand from the opcode field of an
   1356  *   instruction.  Handles AddRegFrm instructions.
   1357  *
   1358  * @param insn    - The instruction whose opcode field is to be read.
   1359  * @param inModRM - Indicates that the opcode field is to be read from the
   1360  *                  ModR/M extension; useful for escape opcodes
   1361  * @return        - 0 on success; nonzero otherwise.
   1362  */
   1363 static int readOpcodeModifier(struct InternalInstruction* insn) {
   1364   dbgprintf(insn, "readOpcodeModifier()");
   1365 
   1366   if (insn->consumedOpcodeModifier)
   1367     return 0;
   1368 
   1369   insn->consumedOpcodeModifier = TRUE;
   1370 
   1371   switch (insn->spec->modifierType) {
   1372   default:
   1373     debug("Unknown modifier type.");
   1374     return -1;
   1375   case MODIFIER_NONE:
   1376     debug("No modifier but an operand expects one.");
   1377     return -1;
   1378   case MODIFIER_OPCODE:
   1379     insn->opcodeModifier = insn->opcode - insn->spec->modifierBase;
   1380     return 0;
   1381   case MODIFIER_MODRM:
   1382     insn->opcodeModifier = insn->modRM - insn->spec->modifierBase;
   1383     return 0;
   1384   }
   1385 }
   1386 
   1387 /*
   1388  * readOpcodeRegister - Reads an operand from the opcode field of an
   1389  *   instruction and interprets it appropriately given the operand width.
   1390  *   Handles AddRegFrm instructions.
   1391  *
   1392  * @param insn  - See readOpcodeModifier().
   1393  * @param size  - The width (in bytes) of the register being specified.
   1394  *                1 means AL and friends, 2 means AX, 4 means EAX, and 8 means
   1395  *                RAX.
   1396  * @return      - 0 on success; nonzero otherwise.
   1397  */
   1398 static int readOpcodeRegister(struct InternalInstruction* insn, uint8_t size) {
   1399   dbgprintf(insn, "readOpcodeRegister()");
   1400 
   1401   if (readOpcodeModifier(insn))
   1402     return -1;
   1403 
   1404   if (size == 0)
   1405     size = insn->registerSize;
   1406 
   1407   switch (size) {
   1408   case 1:
   1409     insn->opcodeRegister = (Reg)(MODRM_REG_AL + ((bFromREX(insn->rexPrefix) << 3)
   1410                                                   | insn->opcodeModifier));
   1411     if (insn->rexPrefix &&
   1412         insn->opcodeRegister >= MODRM_REG_AL + 0x4 &&
   1413         insn->opcodeRegister < MODRM_REG_AL + 0x8) {
   1414       insn->opcodeRegister = (Reg)(MODRM_REG_SPL
   1415                                    + (insn->opcodeRegister - MODRM_REG_AL - 4));
   1416     }
   1417 
   1418     break;
   1419   case 2:
   1420     insn->opcodeRegister = (Reg)(MODRM_REG_AX
   1421                                  + ((bFromREX(insn->rexPrefix) << 3)
   1422                                     | insn->opcodeModifier));
   1423     break;
   1424   case 4:
   1425     insn->opcodeRegister = (Reg)(MODRM_REG_EAX
   1426                                  + ((bFromREX(insn->rexPrefix) << 3)
   1427                                     | insn->opcodeModifier));
   1428     break;
   1429   case 8:
   1430     insn->opcodeRegister = (Reg)(MODRM_REG_RAX
   1431                                  + ((bFromREX(insn->rexPrefix) << 3)
   1432                                     | insn->opcodeModifier));
   1433     break;
   1434   }
   1435 
   1436   return 0;
   1437 }
   1438 
   1439 /*
   1440  * readImmediate - Consumes an immediate operand from an instruction, given the
   1441  *   desired operand size.
   1442  *
   1443  * @param insn  - The instruction whose operand is to be read.
   1444  * @param size  - The width (in bytes) of the operand.
   1445  * @return      - 0 if the immediate was successfully consumed; nonzero
   1446  *                otherwise.
   1447  */
   1448 static int readImmediate(struct InternalInstruction* insn, uint8_t size) {
   1449   uint8_t imm8;
   1450   uint16_t imm16;
   1451   uint32_t imm32;
   1452   uint64_t imm64;
   1453 
   1454   dbgprintf(insn, "readImmediate()");
   1455 
   1456   if (insn->numImmediatesConsumed == 2) {
   1457     debug("Already consumed two immediates");
   1458     return -1;
   1459   }
   1460 
   1461   if (size == 0)
   1462     size = insn->immediateSize;
   1463   else
   1464     insn->immediateSize = size;
   1465   insn->immediateOffset = insn->readerCursor - insn->startLocation;
   1466 
   1467   switch (size) {
   1468   case 1:
   1469     if (consumeByte(insn, &imm8))
   1470       return -1;
   1471     insn->immediates[insn->numImmediatesConsumed] = imm8;
   1472     break;
   1473   case 2:
   1474     if (consumeUInt16(insn, &imm16))
   1475       return -1;
   1476     insn->immediates[insn->numImmediatesConsumed] = imm16;
   1477     break;
   1478   case 4:
   1479     if (consumeUInt32(insn, &imm32))
   1480       return -1;
   1481     insn->immediates[insn->numImmediatesConsumed] = imm32;
   1482     break;
   1483   case 8:
   1484     if (consumeUInt64(insn, &imm64))
   1485       return -1;
   1486     insn->immediates[insn->numImmediatesConsumed] = imm64;
   1487     break;
   1488   }
   1489 
   1490   insn->numImmediatesConsumed++;
   1491 
   1492   return 0;
   1493 }
   1494 
   1495 /*
   1496  * readVVVV - Consumes vvvv from an instruction if it has a VEX prefix.
   1497  *
   1498  * @param insn  - The instruction whose operand is to be read.
   1499  * @return      - 0 if the vvvv was successfully consumed; nonzero
   1500  *                otherwise.
   1501  */
   1502 static int readVVVV(struct InternalInstruction* insn) {
   1503   dbgprintf(insn, "readVVVV()");
   1504 
   1505   if (insn->vexSize == 3)
   1506     insn->vvvv = vvvvFromVEX3of3(insn->vexPrefix[2]);
   1507   else if (insn->vexSize == 2)
   1508     insn->vvvv = vvvvFromVEX2of2(insn->vexPrefix[1]);
   1509   else
   1510     return -1;
   1511 
   1512   if (insn->mode != MODE_64BIT)
   1513     insn->vvvv &= 0x7;
   1514 
   1515   return 0;
   1516 }
   1517 
   1518 /*
   1519  * readOperands - Consults the specifier for an instruction and consumes all
   1520  *   operands for that instruction, interpreting them as it goes.
   1521  *
   1522  * @param insn  - The instruction whose operands are to be read and interpreted.
   1523  * @return      - 0 if all operands could be read; nonzero otherwise.
   1524  */
   1525 static int readOperands(struct InternalInstruction* insn) {
   1526   int index;
   1527   int hasVVVV, needVVVV;
   1528   int sawRegImm = 0;
   1529 
   1530   dbgprintf(insn, "readOperands()");
   1531 
   1532   /* If non-zero vvvv specified, need to make sure one of the operands
   1533      uses it. */
   1534   hasVVVV = !readVVVV(insn);
   1535   needVVVV = hasVVVV && (insn->vvvv != 0);
   1536 
   1537   for (index = 0; index < X86_MAX_OPERANDS; ++index) {
   1538     switch (x86OperandSets[insn->spec->operands][index].encoding) {
   1539     case ENCODING_NONE:
   1540       break;
   1541     case ENCODING_REG:
   1542     case ENCODING_RM:
   1543       if (readModRM(insn))
   1544         return -1;
   1545       if (fixupReg(insn, &x86OperandSets[insn->spec->operands][index]))
   1546         return -1;
   1547       break;
   1548     case ENCODING_CB:
   1549     case ENCODING_CW:
   1550     case ENCODING_CD:
   1551     case ENCODING_CP:
   1552     case ENCODING_CO:
   1553     case ENCODING_CT:
   1554       dbgprintf(insn, "We currently don't hande code-offset encodings");
   1555       return -1;
   1556     case ENCODING_IB:
   1557       if (sawRegImm) {
   1558         /* Saw a register immediate so don't read again and instead split the
   1559            previous immediate.  FIXME: This is a hack. */
   1560         insn->immediates[insn->numImmediatesConsumed] =
   1561           insn->immediates[insn->numImmediatesConsumed - 1] & 0xf;
   1562         ++insn->numImmediatesConsumed;
   1563         break;
   1564       }
   1565       if (readImmediate(insn, 1))
   1566         return -1;
   1567       if (x86OperandSets[insn->spec->operands][index].type == TYPE_IMM3 &&
   1568           insn->immediates[insn->numImmediatesConsumed - 1] > 7)
   1569         return -1;
   1570       if (x86OperandSets[insn->spec->operands][index].type == TYPE_IMM5 &&
   1571           insn->immediates[insn->numImmediatesConsumed - 1] > 31)
   1572         return -1;
   1573       if (x86OperandSets[insn->spec->operands][index].type == TYPE_XMM128 ||
   1574           x86OperandSets[insn->spec->operands][index].type == TYPE_XMM256)
   1575         sawRegImm = 1;
   1576       break;
   1577     case ENCODING_IW:
   1578       if (readImmediate(insn, 2))
   1579         return -1;
   1580       break;
   1581     case ENCODING_ID:
   1582       if (readImmediate(insn, 4))
   1583         return -1;
   1584       break;
   1585     case ENCODING_IO:
   1586       if (readImmediate(insn, 8))
   1587         return -1;
   1588       break;
   1589     case ENCODING_Iv:
   1590       if (readImmediate(insn, insn->immediateSize))
   1591         return -1;
   1592       break;
   1593     case ENCODING_Ia:
   1594       if (readImmediate(insn, insn->addressSize))
   1595         return -1;
   1596       break;
   1597     case ENCODING_RB:
   1598       if (readOpcodeRegister(insn, 1))
   1599         return -1;
   1600       break;
   1601     case ENCODING_RW:
   1602       if (readOpcodeRegister(insn, 2))
   1603         return -1;
   1604       break;
   1605     case ENCODING_RD:
   1606       if (readOpcodeRegister(insn, 4))
   1607         return -1;
   1608       break;
   1609     case ENCODING_RO:
   1610       if (readOpcodeRegister(insn, 8))
   1611         return -1;
   1612       break;
   1613     case ENCODING_Rv:
   1614       if (readOpcodeRegister(insn, 0))
   1615         return -1;
   1616       break;
   1617     case ENCODING_I:
   1618       if (readOpcodeModifier(insn))
   1619         return -1;
   1620       break;
   1621     case ENCODING_VVVV:
   1622       needVVVV = 0; /* Mark that we have found a VVVV operand. */
   1623       if (!hasVVVV)
   1624         return -1;
   1625       if (fixupReg(insn, &x86OperandSets[insn->spec->operands][index]))
   1626         return -1;
   1627       break;
   1628     case ENCODING_DUP:
   1629       break;
   1630     default:
   1631       dbgprintf(insn, "Encountered an operand with an unknown encoding.");
   1632       return -1;
   1633     }
   1634   }
   1635 
   1636   /* If we didn't find ENCODING_VVVV operand, but non-zero vvvv present, fail */
   1637   if (needVVVV) return -1;
   1638 
   1639   return 0;
   1640 }
   1641 
   1642 /*
   1643  * decodeInstruction - Reads and interprets a full instruction provided by the
   1644  *   user.
   1645  *
   1646  * @param insn      - A pointer to the instruction to be populated.  Must be
   1647  *                    pre-allocated.
   1648  * @param reader    - The function to be used to read the instruction's bytes.
   1649  * @param readerArg - A generic argument to be passed to the reader to store
   1650  *                    any internal state.
   1651  * @param logger    - If non-NULL, the function to be used to write log messages
   1652  *                    and warnings.
   1653  * @param loggerArg - A generic argument to be passed to the logger to store
   1654  *                    any internal state.
   1655  * @param startLoc  - The address (in the reader's address space) of the first
   1656  *                    byte in the instruction.
   1657  * @param mode      - The mode (real mode, IA-32e, or IA-32e in 64-bit mode) to
   1658  *                    decode the instruction in.
   1659  * @return          - 0 if the instruction's memory could be read; nonzero if
   1660  *                    not.
   1661  */
   1662 int decodeInstruction(struct InternalInstruction* insn,
   1663                       byteReader_t reader,
   1664                       const void* readerArg,
   1665                       dlog_t logger,
   1666                       void* loggerArg,
   1667                       const void* miiArg,
   1668                       uint64_t startLoc,
   1669                       DisassemblerMode mode) {
   1670   memset(insn, 0, sizeof(struct InternalInstruction));
   1671 
   1672   insn->reader = reader;
   1673   insn->readerArg = readerArg;
   1674   insn->dlog = logger;
   1675   insn->dlogArg = loggerArg;
   1676   insn->startLocation = startLoc;
   1677   insn->readerCursor = startLoc;
   1678   insn->mode = mode;
   1679   insn->numImmediatesConsumed = 0;
   1680 
   1681   if (readPrefixes(insn)       ||
   1682       readOpcode(insn)         ||
   1683       getID(insn, miiArg)      ||
   1684       insn->instructionID == 0 ||
   1685       readOperands(insn))
   1686     return -1;
   1687 
   1688   insn->operands = &x86OperandSets[insn->spec->operands][0];
   1689 
   1690   insn->length = insn->readerCursor - insn->startLocation;
   1691 
   1692   dbgprintf(insn, "Read from 0x%llx to 0x%llx: length %zu",
   1693             startLoc, insn->readerCursor, insn->length);
   1694 
   1695   if (insn->length > 15)
   1696     dbgprintf(insn, "Instruction exceeds 15-byte limit");
   1697 
   1698   return 0;
   1699 }
   1700