Home | History | Annotate | Download | only in Disassembler
      1 //===-- X86DisassemblerDecoder.cpp - Disassembler decoder -----------------===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // This file is part of the X86 Disassembler.
     11 // It contains the implementation of the instruction decoder.
     12 // Documentation for the disassembler can be found in X86Disassembler.h.
     13 //
     14 //===----------------------------------------------------------------------===//
     15 
     16 #include <cstdarg> /* for va_*()       */
     17 #include <cstdio>  /* for vsnprintf()  */
     18 #include <cstdlib> /* for exit()       */
     19 #include <cstring> /* for memset()     */
     20 
     21 #include "X86DisassemblerDecoder.h"
     22 
     23 using namespace llvm::X86Disassembler;
     24 
     25 /// Specifies whether a ModR/M byte is needed and (if so) which
     26 /// instruction each possible value of the ModR/M byte corresponds to.  Once
     27 /// this information is known, we have narrowed down to a single instruction.
     28 struct ModRMDecision {
     29   uint8_t modrm_type;
     30   uint16_t instructionIDs;
     31 };
     32 
     33 /// Specifies which set of ModR/M->instruction tables to look at
     34 /// given a particular opcode.
     35 struct OpcodeDecision {
     36   ModRMDecision modRMDecisions[256];
     37 };
     38 
     39 /// Specifies which opcode->instruction tables to look at given
     40 /// a particular context (set of attributes).  Since there are many possible
     41 /// contexts, the decoder first uses CONTEXTS_SYM to determine which context
     42 /// applies given a specific set of attributes.  Hence there are only IC_max
     43 /// entries in this table, rather than 2^(ATTR_max).
     44 struct ContextDecision {
     45   OpcodeDecision opcodeDecisions[IC_max];
     46 };
     47 
     48 #include "X86GenDisassemblerTables.inc"
     49 
     50 #ifndef NDEBUG
     51 #define debug(s) do { Debug(__FILE__, __LINE__, s); } while (0)
     52 #else
     53 #define debug(s) do { } while (0)
     54 #endif
     55 
     56 /*
     57  * contextForAttrs - Client for the instruction context table.  Takes a set of
     58  *   attributes and returns the appropriate decode context.
     59  *
     60  * @param attrMask  - Attributes, from the enumeration attributeBits.
     61  * @return          - The InstructionContext to use when looking up an
     62  *                    an instruction with these attributes.
     63  */
     64 static InstructionContext contextForAttrs(uint16_t attrMask) {
     65   return static_cast<InstructionContext>(CONTEXTS_SYM[attrMask]);
     66 }
     67 
     68 /*
     69  * modRMRequired - Reads the appropriate instruction table to determine whether
     70  *   the ModR/M byte is required to decode a particular instruction.
     71  *
     72  * @param type        - The opcode type (i.e., how many bytes it has).
     73  * @param insnContext - The context for the instruction, as returned by
     74  *                      contextForAttrs.
     75  * @param opcode      - The last byte of the instruction's opcode, not counting
     76  *                      ModR/M extensions and escapes.
     77  * @return            - true if the ModR/M byte is required, false otherwise.
     78  */
     79 static int modRMRequired(OpcodeType type,
     80                          InstructionContext insnContext,
     81                          uint16_t opcode) {
     82   const struct ContextDecision* decision = nullptr;
     83 
     84   switch (type) {
     85   case ONEBYTE:
     86     decision = &ONEBYTE_SYM;
     87     break;
     88   case TWOBYTE:
     89     decision = &TWOBYTE_SYM;
     90     break;
     91   case THREEBYTE_38:
     92     decision = &THREEBYTE38_SYM;
     93     break;
     94   case THREEBYTE_3A:
     95     decision = &THREEBYTE3A_SYM;
     96     break;
     97   case XOP8_MAP:
     98     decision = &XOP8_MAP_SYM;
     99     break;
    100   case XOP9_MAP:
    101     decision = &XOP9_MAP_SYM;
    102     break;
    103   case XOPA_MAP:
    104     decision = &XOPA_MAP_SYM;
    105     break;
    106   case THREEDNOW_MAP:
    107     decision = &THREEDNOW_MAP_SYM;
    108     break;
    109   }
    110 
    111   return decision->opcodeDecisions[insnContext].modRMDecisions[opcode].
    112     modrm_type != MODRM_ONEENTRY;
    113 }
    114 
    115 /*
    116  * decode - Reads the appropriate instruction table to obtain the unique ID of
    117  *   an instruction.
    118  *
    119  * @param type        - See modRMRequired().
    120  * @param insnContext - See modRMRequired().
    121  * @param opcode      - See modRMRequired().
    122  * @param modRM       - The ModR/M byte if required, or any value if not.
    123  * @return            - The UID of the instruction, or 0 on failure.
    124  */
    125 static InstrUID decode(OpcodeType type,
    126                        InstructionContext insnContext,
    127                        uint8_t opcode,
    128                        uint8_t modRM) {
    129   const struct ModRMDecision* dec = nullptr;
    130 
    131   switch (type) {
    132   case ONEBYTE:
    133     dec = &ONEBYTE_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
    134     break;
    135   case TWOBYTE:
    136     dec = &TWOBYTE_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
    137     break;
    138   case THREEBYTE_38:
    139     dec = &THREEBYTE38_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
    140     break;
    141   case THREEBYTE_3A:
    142     dec = &THREEBYTE3A_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
    143     break;
    144   case XOP8_MAP:
    145     dec = &XOP8_MAP_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
    146     break;
    147   case XOP9_MAP:
    148     dec = &XOP9_MAP_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
    149     break;
    150   case XOPA_MAP:
    151     dec = &XOPA_MAP_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
    152     break;
    153   case THREEDNOW_MAP:
    154     dec = &THREEDNOW_MAP_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
    155     break;
    156   }
    157 
    158   switch (dec->modrm_type) {
    159   default:
    160     debug("Corrupt table!  Unknown modrm_type");
    161     return 0;
    162   case MODRM_ONEENTRY:
    163     return modRMTable[dec->instructionIDs];
    164   case MODRM_SPLITRM:
    165     if (modFromModRM(modRM) == 0x3)
    166       return modRMTable[dec->instructionIDs+1];
    167     return modRMTable[dec->instructionIDs];
    168   case MODRM_SPLITREG:
    169     if (modFromModRM(modRM) == 0x3)
    170       return modRMTable[dec->instructionIDs+((modRM & 0x38) >> 3)+8];
    171     return modRMTable[dec->instructionIDs+((modRM & 0x38) >> 3)];
    172   case MODRM_SPLITMISC:
    173     if (modFromModRM(modRM) == 0x3)
    174       return modRMTable[dec->instructionIDs+(modRM & 0x3f)+8];
    175     return modRMTable[dec->instructionIDs+((modRM & 0x38) >> 3)];
    176   case MODRM_FULL:
    177     return modRMTable[dec->instructionIDs+modRM];
    178   }
    179 }
    180 
    181 /*
    182  * specifierForUID - Given a UID, returns the name and operand specification for
    183  *   that instruction.
    184  *
    185  * @param uid - The unique ID for the instruction.  This should be returned by
    186  *              decode(); specifierForUID will not check bounds.
    187  * @return    - A pointer to the specification for that instruction.
    188  */
    189 static const struct InstructionSpecifier *specifierForUID(InstrUID uid) {
    190   return &INSTRUCTIONS_SYM[uid];
    191 }
    192 
    193 /*
    194  * consumeByte - Uses the reader function provided by the user to consume one
    195  *   byte from the instruction's memory and advance the cursor.
    196  *
    197  * @param insn  - The instruction with the reader function to use.  The cursor
    198  *                for this instruction is advanced.
    199  * @param byte  - A pointer to a pre-allocated memory buffer to be populated
    200  *                with the data read.
    201  * @return      - 0 if the read was successful; nonzero otherwise.
    202  */
    203 static int consumeByte(struct InternalInstruction* insn, uint8_t* byte) {
    204   int ret = insn->reader(insn->readerArg, byte, insn->readerCursor);
    205 
    206   if (!ret)
    207     ++(insn->readerCursor);
    208 
    209   return ret;
    210 }
    211 
    212 /*
    213  * lookAtByte - Like consumeByte, but does not advance the cursor.
    214  *
    215  * @param insn  - See consumeByte().
    216  * @param byte  - See consumeByte().
    217  * @return      - See consumeByte().
    218  */
    219 static int lookAtByte(struct InternalInstruction* insn, uint8_t* byte) {
    220   return insn->reader(insn->readerArg, byte, insn->readerCursor);
    221 }
    222 
    223 static void unconsumeByte(struct InternalInstruction* insn) {
    224   insn->readerCursor--;
    225 }
    226 
    227 #define CONSUME_FUNC(name, type)                                  \
    228   static int name(struct InternalInstruction* insn, type* ptr) {  \
    229     type combined = 0;                                            \
    230     unsigned offset;                                              \
    231     for (offset = 0; offset < sizeof(type); ++offset) {           \
    232       uint8_t byte;                                               \
    233       int ret = insn->reader(insn->readerArg,                     \
    234                              &byte,                               \
    235                              insn->readerCursor + offset);        \
    236       if (ret)                                                    \
    237         return ret;                                               \
    238       combined = combined | ((uint64_t)byte << (offset * 8));     \
    239     }                                                             \
    240     *ptr = combined;                                              \
    241     insn->readerCursor += sizeof(type);                           \
    242     return 0;                                                     \
    243   }
    244 
    245 /*
    246  * consume* - Use the reader function provided by the user to consume data
    247  *   values of various sizes from the instruction's memory and advance the
    248  *   cursor appropriately.  These readers perform endian conversion.
    249  *
    250  * @param insn    - See consumeByte().
    251  * @param ptr     - A pointer to a pre-allocated memory of appropriate size to
    252  *                  be populated with the data read.
    253  * @return        - See consumeByte().
    254  */
    255 CONSUME_FUNC(consumeInt8, int8_t)
    256 CONSUME_FUNC(consumeInt16, int16_t)
    257 CONSUME_FUNC(consumeInt32, int32_t)
    258 CONSUME_FUNC(consumeUInt16, uint16_t)
    259 CONSUME_FUNC(consumeUInt32, uint32_t)
    260 CONSUME_FUNC(consumeUInt64, uint64_t)
    261 
    262 /*
    263  * dbgprintf - Uses the logging function provided by the user to log a single
    264  *   message, typically without a carriage-return.
    265  *
    266  * @param insn    - The instruction containing the logging function.
    267  * @param format  - See printf().
    268  * @param ...     - See printf().
    269  */
    270 static void dbgprintf(struct InternalInstruction* insn,
    271                       const char* format,
    272                       ...) {
    273   char buffer[256];
    274   va_list ap;
    275 
    276   if (!insn->dlog)
    277     return;
    278 
    279   va_start(ap, format);
    280   (void)vsnprintf(buffer, sizeof(buffer), format, ap);
    281   va_end(ap);
    282 
    283   insn->dlog(insn->dlogArg, buffer);
    284 }
    285 
    286 static bool isREX(struct InternalInstruction *insn, uint8_t prefix) {
    287   if (insn->mode == MODE_64BIT)
    288     return prefix >= 0x40 && prefix <= 0x4f;
    289   return false;
    290 }
    291 
    292 /*
    293  * setPrefixPresent - Marks that a particular prefix is present as mandatory
    294  *
    295  * @param insn      - The instruction to be marked as having the prefix.
    296  * @param prefix    - The prefix that is present.
    297  */
    298 static void setPrefixPresent(struct InternalInstruction *insn, uint8_t prefix) {
    299   uint8_t nextByte;
    300   switch (prefix) {
    301   case 0xf0:
    302     insn->hasLockPrefix = true;
    303     break;
    304   case 0xf2:
    305   case 0xf3:
    306     if (lookAtByte(insn, &nextByte))
    307       break;
    308     // TODO:
    309     //  1. There could be several 0x66
    310     //  2. if (nextByte == 0x66) and nextNextByte != 0x0f then
    311     //      it's not mandatory prefix
    312     //  3. if (nextByte >= 0x40 && nextByte <= 0x4f) it's REX and we need
    313     //     0x0f exactly after it to be mandatory prefix
    314     if (isREX(insn, nextByte) || nextByte == 0x0f || nextByte == 0x66)
    315       // The last of 0xf2 /0xf3 is mandatory prefix
    316       insn->mandatoryPrefix = prefix;
    317     insn->repeatPrefix = prefix;
    318     break;
    319   case 0x66:
    320     if (lookAtByte(insn, &nextByte))
    321       break;
    322     // 0x66 can't overwrite existing mandatory prefix and should be ignored
    323     if (!insn->mandatoryPrefix && (nextByte == 0x0f || isREX(insn, nextByte)))
    324       insn->mandatoryPrefix = prefix;
    325     break;
    326   }
    327 }
    328 
    329 /*
    330  * readPrefixes - Consumes all of an instruction's prefix bytes, and marks the
    331  *   instruction as having them.  Also sets the instruction's default operand,
    332  *   address, and other relevant data sizes to report operands correctly.
    333  *
    334  * @param insn  - The instruction whose prefixes are to be read.
    335  * @return      - 0 if the instruction could be read until the end of the prefix
    336  *                bytes, and no prefixes conflicted; nonzero otherwise.
    337  */
    338 static int readPrefixes(struct InternalInstruction* insn) {
    339   bool isPrefix = true;
    340   uint8_t byte = 0;
    341   uint8_t nextByte;
    342 
    343   dbgprintf(insn, "readPrefixes()");
    344 
    345   while (isPrefix) {
    346     /* If we fail reading prefixes, just stop here and let the opcode reader deal with it */
    347     if (consumeByte(insn, &byte))
    348       break;
    349 
    350     /*
    351      * If the byte is a LOCK/REP/REPNE prefix and not a part of the opcode, then
    352      * break and let it be disassembled as a normal "instruction".
    353      */
    354     if (insn->readerCursor - 1 == insn->startLocation && byte == 0xf0) // LOCK
    355       break;
    356 
    357     if ((byte == 0xf2 || byte == 0xf3) && !lookAtByte(insn, &nextByte)) {
    358       /*
    359        * If the byte is 0xf2 or 0xf3, and any of the following conditions are
    360        * met:
    361        * - it is followed by a LOCK (0xf0) prefix
    362        * - it is followed by an xchg instruction
    363        * then it should be disassembled as a xacquire/xrelease not repne/rep.
    364        */
    365       if (((nextByte == 0xf0) ||
    366            ((nextByte & 0xfe) == 0x86 || (nextByte & 0xf8) == 0x90))) {
    367         insn->xAcquireRelease = true;
    368         if (!(byte == 0xf3 && nextByte == 0x90)) // PAUSE instruction support
    369           break;
    370       }
    371       /*
    372        * Also if the byte is 0xf3, and the following condition is met:
    373        * - it is followed by a "mov mem, reg" (opcode 0x88/0x89) or
    374        *                       "mov mem, imm" (opcode 0xc6/0xc7) instructions.
    375        * then it should be disassembled as an xrelease not rep.
    376        */
    377       if (byte == 0xf3 && (nextByte == 0x88 || nextByte == 0x89 ||
    378                            nextByte == 0xc6 || nextByte == 0xc7)) {
    379         insn->xAcquireRelease = true;
    380         if (nextByte != 0x90) // PAUSE instruction support
    381           break;
    382       }
    383       if (isREX(insn, nextByte)) {
    384         uint8_t nnextByte;
    385         // Go to REX prefix after the current one
    386         if (consumeByte(insn, &nnextByte))
    387           return -1;
    388         // We should be able to read next byte after REX prefix
    389         if (lookAtByte(insn, &nnextByte))
    390           return -1;
    391         unconsumeByte(insn);
    392       }
    393     }
    394 
    395     switch (byte) {
    396     case 0xf0:  /* LOCK */
    397     case 0xf2:  /* REPNE/REPNZ */
    398     case 0xf3:  /* REP or REPE/REPZ */
    399       setPrefixPresent(insn, byte);
    400       break;
    401     case 0x2e:  /* CS segment override -OR- Branch not taken */
    402     case 0x36:  /* SS segment override -OR- Branch taken */
    403     case 0x3e:  /* DS segment override */
    404     case 0x26:  /* ES segment override */
    405     case 0x64:  /* FS segment override */
    406     case 0x65:  /* GS segment override */
    407       switch (byte) {
    408       case 0x2e:
    409         insn->segmentOverride = SEG_OVERRIDE_CS;
    410         break;
    411       case 0x36:
    412         insn->segmentOverride = SEG_OVERRIDE_SS;
    413         break;
    414       case 0x3e:
    415         insn->segmentOverride = SEG_OVERRIDE_DS;
    416         break;
    417       case 0x26:
    418         insn->segmentOverride = SEG_OVERRIDE_ES;
    419         break;
    420       case 0x64:
    421         insn->segmentOverride = SEG_OVERRIDE_FS;
    422         break;
    423       case 0x65:
    424         insn->segmentOverride = SEG_OVERRIDE_GS;
    425         break;
    426       default:
    427         debug("Unhandled override");
    428         return -1;
    429       }
    430       setPrefixPresent(insn, byte);
    431       break;
    432     case 0x66:  /* Operand-size override */
    433       insn->hasOpSize = true;
    434       setPrefixPresent(insn, byte);
    435       break;
    436     case 0x67:  /* Address-size override */
    437       insn->hasAdSize = true;
    438       setPrefixPresent(insn, byte);
    439       break;
    440     default:    /* Not a prefix byte */
    441       isPrefix = false;
    442       break;
    443     }
    444 
    445     if (isPrefix)
    446       dbgprintf(insn, "Found prefix 0x%hhx", byte);
    447   }
    448 
    449   insn->vectorExtensionType = TYPE_NO_VEX_XOP;
    450 
    451   if (byte == 0x62) {
    452     uint8_t byte1, byte2;
    453 
    454     if (consumeByte(insn, &byte1)) {
    455       dbgprintf(insn, "Couldn't read second byte of EVEX prefix");
    456       return -1;
    457     }
    458 
    459     if (lookAtByte(insn, &byte2)) {
    460       dbgprintf(insn, "Couldn't read third byte of EVEX prefix");
    461       return -1;
    462     }
    463 
    464     if ((insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0) &&
    465        ((~byte1 & 0xc) == 0xc) && ((byte2 & 0x4) == 0x4)) {
    466       insn->vectorExtensionType = TYPE_EVEX;
    467     } else {
    468       unconsumeByte(insn); /* unconsume byte1 */
    469       unconsumeByte(insn); /* unconsume byte  */
    470     }
    471 
    472     if (insn->vectorExtensionType == TYPE_EVEX) {
    473       insn->vectorExtensionPrefix[0] = byte;
    474       insn->vectorExtensionPrefix[1] = byte1;
    475       if (consumeByte(insn, &insn->vectorExtensionPrefix[2])) {
    476         dbgprintf(insn, "Couldn't read third byte of EVEX prefix");
    477         return -1;
    478       }
    479       if (consumeByte(insn, &insn->vectorExtensionPrefix[3])) {
    480         dbgprintf(insn, "Couldn't read fourth byte of EVEX prefix");
    481         return -1;
    482       }
    483 
    484       /* We simulate the REX prefix for simplicity's sake */
    485       if (insn->mode == MODE_64BIT) {
    486         insn->rexPrefix = 0x40
    487                         | (wFromEVEX3of4(insn->vectorExtensionPrefix[2]) << 3)
    488                         | (rFromEVEX2of4(insn->vectorExtensionPrefix[1]) << 2)
    489                         | (xFromEVEX2of4(insn->vectorExtensionPrefix[1]) << 1)
    490                         | (bFromEVEX2of4(insn->vectorExtensionPrefix[1]) << 0);
    491       }
    492 
    493       dbgprintf(insn, "Found EVEX prefix 0x%hhx 0x%hhx 0x%hhx 0x%hhx",
    494               insn->vectorExtensionPrefix[0], insn->vectorExtensionPrefix[1],
    495               insn->vectorExtensionPrefix[2], insn->vectorExtensionPrefix[3]);
    496     }
    497   } else if (byte == 0xc4) {
    498     uint8_t byte1;
    499 
    500     if (lookAtByte(insn, &byte1)) {
    501       dbgprintf(insn, "Couldn't read second byte of VEX");
    502       return -1;
    503     }
    504 
    505     if (insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0)
    506       insn->vectorExtensionType = TYPE_VEX_3B;
    507     else
    508       unconsumeByte(insn);
    509 
    510     if (insn->vectorExtensionType == TYPE_VEX_3B) {
    511       insn->vectorExtensionPrefix[0] = byte;
    512       consumeByte(insn, &insn->vectorExtensionPrefix[1]);
    513       consumeByte(insn, &insn->vectorExtensionPrefix[2]);
    514 
    515       /* We simulate the REX prefix for simplicity's sake */
    516 
    517       if (insn->mode == MODE_64BIT)
    518         insn->rexPrefix = 0x40
    519                         | (wFromVEX3of3(insn->vectorExtensionPrefix[2]) << 3)
    520                         | (rFromVEX2of3(insn->vectorExtensionPrefix[1]) << 2)
    521                         | (xFromVEX2of3(insn->vectorExtensionPrefix[1]) << 1)
    522                         | (bFromVEX2of3(insn->vectorExtensionPrefix[1]) << 0);
    523 
    524       dbgprintf(insn, "Found VEX prefix 0x%hhx 0x%hhx 0x%hhx",
    525                 insn->vectorExtensionPrefix[0], insn->vectorExtensionPrefix[1],
    526                 insn->vectorExtensionPrefix[2]);
    527     }
    528   } else if (byte == 0xc5) {
    529     uint8_t byte1;
    530 
    531     if (lookAtByte(insn, &byte1)) {
    532       dbgprintf(insn, "Couldn't read second byte of VEX");
    533       return -1;
    534     }
    535 
    536     if (insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0)
    537       insn->vectorExtensionType = TYPE_VEX_2B;
    538     else
    539       unconsumeByte(insn);
    540 
    541     if (insn->vectorExtensionType == TYPE_VEX_2B) {
    542       insn->vectorExtensionPrefix[0] = byte;
    543       consumeByte(insn, &insn->vectorExtensionPrefix[1]);
    544 
    545       if (insn->mode == MODE_64BIT)
    546         insn->rexPrefix = 0x40
    547                         | (rFromVEX2of2(insn->vectorExtensionPrefix[1]) << 2);
    548 
    549       switch (ppFromVEX2of2(insn->vectorExtensionPrefix[1])) {
    550       default:
    551         break;
    552       case VEX_PREFIX_66:
    553         insn->hasOpSize = true;
    554         break;
    555       }
    556 
    557       dbgprintf(insn, "Found VEX prefix 0x%hhx 0x%hhx",
    558                 insn->vectorExtensionPrefix[0],
    559                 insn->vectorExtensionPrefix[1]);
    560     }
    561   } else if (byte == 0x8f) {
    562     uint8_t byte1;
    563 
    564     if (lookAtByte(insn, &byte1)) {
    565       dbgprintf(insn, "Couldn't read second byte of XOP");
    566       return -1;
    567     }
    568 
    569     if ((byte1 & 0x38) != 0x0) /* 0 in these 3 bits is a POP instruction. */
    570       insn->vectorExtensionType = TYPE_XOP;
    571     else
    572       unconsumeByte(insn);
    573 
    574     if (insn->vectorExtensionType == TYPE_XOP) {
    575       insn->vectorExtensionPrefix[0] = byte;
    576       consumeByte(insn, &insn->vectorExtensionPrefix[1]);
    577       consumeByte(insn, &insn->vectorExtensionPrefix[2]);
    578 
    579       /* We simulate the REX prefix for simplicity's sake */
    580 
    581       if (insn->mode == MODE_64BIT)
    582         insn->rexPrefix = 0x40
    583                         | (wFromXOP3of3(insn->vectorExtensionPrefix[2]) << 3)
    584                         | (rFromXOP2of3(insn->vectorExtensionPrefix[1]) << 2)
    585                         | (xFromXOP2of3(insn->vectorExtensionPrefix[1]) << 1)
    586                         | (bFromXOP2of3(insn->vectorExtensionPrefix[1]) << 0);
    587 
    588       switch (ppFromXOP3of3(insn->vectorExtensionPrefix[2])) {
    589       default:
    590         break;
    591       case VEX_PREFIX_66:
    592         insn->hasOpSize = true;
    593         break;
    594       }
    595 
    596       dbgprintf(insn, "Found XOP prefix 0x%hhx 0x%hhx 0x%hhx",
    597                 insn->vectorExtensionPrefix[0], insn->vectorExtensionPrefix[1],
    598                 insn->vectorExtensionPrefix[2]);
    599     }
    600   } else if (isREX(insn, byte)) {
    601     if (lookAtByte(insn, &nextByte))
    602       return -1;
    603     insn->rexPrefix = byte;
    604     dbgprintf(insn, "Found REX prefix 0x%hhx", byte);
    605   } else
    606     unconsumeByte(insn);
    607 
    608   if (insn->mode == MODE_16BIT) {
    609     insn->registerSize = (insn->hasOpSize ? 4 : 2);
    610     insn->addressSize = (insn->hasAdSize ? 4 : 2);
    611     insn->displacementSize = (insn->hasAdSize ? 4 : 2);
    612     insn->immediateSize = (insn->hasOpSize ? 4 : 2);
    613   } else if (insn->mode == MODE_32BIT) {
    614     insn->registerSize = (insn->hasOpSize ? 2 : 4);
    615     insn->addressSize = (insn->hasAdSize ? 2 : 4);
    616     insn->displacementSize = (insn->hasAdSize ? 2 : 4);
    617     insn->immediateSize = (insn->hasOpSize ? 2 : 4);
    618   } else if (insn->mode == MODE_64BIT) {
    619     if (insn->rexPrefix && wFromREX(insn->rexPrefix)) {
    620       insn->registerSize       = 8;
    621       insn->addressSize = (insn->hasAdSize ? 4 : 8);
    622       insn->displacementSize   = 4;
    623       insn->immediateSize      = 4;
    624     } else {
    625       insn->registerSize = (insn->hasOpSize ? 2 : 4);
    626       insn->addressSize = (insn->hasAdSize ? 4 : 8);
    627       insn->displacementSize = (insn->hasOpSize ? 2 : 4);
    628       insn->immediateSize = (insn->hasOpSize ? 2 : 4);
    629     }
    630   }
    631 
    632   return 0;
    633 }
    634 
    635 static int readModRM(struct InternalInstruction* insn);
    636 
    637 /*
    638  * readOpcode - Reads the opcode (excepting the ModR/M byte in the case of
    639  *   extended or escape opcodes).
    640  *
    641  * @param insn  - The instruction whose opcode is to be read.
    642  * @return      - 0 if the opcode could be read successfully; nonzero otherwise.
    643  */
    644 static int readOpcode(struct InternalInstruction* insn) {
    645   /* Determine the length of the primary opcode */
    646 
    647   uint8_t current;
    648 
    649   dbgprintf(insn, "readOpcode()");
    650 
    651   insn->opcodeType = ONEBYTE;
    652 
    653   if (insn->vectorExtensionType == TYPE_EVEX) {
    654     switch (mmFromEVEX2of4(insn->vectorExtensionPrefix[1])) {
    655     default:
    656       dbgprintf(insn, "Unhandled mm field for instruction (0x%hhx)",
    657                 mmFromEVEX2of4(insn->vectorExtensionPrefix[1]));
    658       return -1;
    659     case VEX_LOB_0F:
    660       insn->opcodeType = TWOBYTE;
    661       return consumeByte(insn, &insn->opcode);
    662     case VEX_LOB_0F38:
    663       insn->opcodeType = THREEBYTE_38;
    664       return consumeByte(insn, &insn->opcode);
    665     case VEX_LOB_0F3A:
    666       insn->opcodeType = THREEBYTE_3A;
    667       return consumeByte(insn, &insn->opcode);
    668     }
    669   } else if (insn->vectorExtensionType == TYPE_VEX_3B) {
    670     switch (mmmmmFromVEX2of3(insn->vectorExtensionPrefix[1])) {
    671     default:
    672       dbgprintf(insn, "Unhandled m-mmmm field for instruction (0x%hhx)",
    673                 mmmmmFromVEX2of3(insn->vectorExtensionPrefix[1]));
    674       return -1;
    675     case VEX_LOB_0F:
    676       insn->opcodeType = TWOBYTE;
    677       return consumeByte(insn, &insn->opcode);
    678     case VEX_LOB_0F38:
    679       insn->opcodeType = THREEBYTE_38;
    680       return consumeByte(insn, &insn->opcode);
    681     case VEX_LOB_0F3A:
    682       insn->opcodeType = THREEBYTE_3A;
    683       return consumeByte(insn, &insn->opcode);
    684     }
    685   } else if (insn->vectorExtensionType == TYPE_VEX_2B) {
    686     insn->opcodeType = TWOBYTE;
    687     return consumeByte(insn, &insn->opcode);
    688   } else if (insn->vectorExtensionType == TYPE_XOP) {
    689     switch (mmmmmFromXOP2of3(insn->vectorExtensionPrefix[1])) {
    690     default:
    691       dbgprintf(insn, "Unhandled m-mmmm field for instruction (0x%hhx)",
    692                 mmmmmFromVEX2of3(insn->vectorExtensionPrefix[1]));
    693       return -1;
    694     case XOP_MAP_SELECT_8:
    695       insn->opcodeType = XOP8_MAP;
    696       return consumeByte(insn, &insn->opcode);
    697     case XOP_MAP_SELECT_9:
    698       insn->opcodeType = XOP9_MAP;
    699       return consumeByte(insn, &insn->opcode);
    700     case XOP_MAP_SELECT_A:
    701       insn->opcodeType = XOPA_MAP;
    702       return consumeByte(insn, &insn->opcode);
    703     }
    704   }
    705 
    706   if (consumeByte(insn, &current))
    707     return -1;
    708 
    709   if (current == 0x0f) {
    710     dbgprintf(insn, "Found a two-byte escape prefix (0x%hhx)", current);
    711 
    712     if (consumeByte(insn, &current))
    713       return -1;
    714 
    715     if (current == 0x38) {
    716       dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current);
    717 
    718       if (consumeByte(insn, &current))
    719         return -1;
    720 
    721       insn->opcodeType = THREEBYTE_38;
    722     } else if (current == 0x3a) {
    723       dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current);
    724 
    725       if (consumeByte(insn, &current))
    726         return -1;
    727 
    728       insn->opcodeType = THREEBYTE_3A;
    729     } else if (current == 0x0f) {
    730       dbgprintf(insn, "Found a 3dnow escape prefix (0x%hhx)", current);
    731 
    732       // Consume operands before the opcode to comply with the 3DNow encoding
    733       if (readModRM(insn))
    734         return -1;
    735 
    736       if (consumeByte(insn, &current))
    737         return -1;
    738 
    739       insn->opcodeType = THREEDNOW_MAP;
    740     } else {
    741       dbgprintf(insn, "Didn't find a three-byte escape prefix");
    742 
    743       insn->opcodeType = TWOBYTE;
    744     }
    745   } else if (insn->mandatoryPrefix)
    746     // The opcode with mandatory prefix must start with opcode escape.
    747     // If not it's legacy repeat prefix
    748     insn->mandatoryPrefix = 0;
    749 
    750   /*
    751    * At this point we have consumed the full opcode.
    752    * Anything we consume from here on must be unconsumed.
    753    */
    754 
    755   insn->opcode = current;
    756 
    757   return 0;
    758 }
    759 
    760 /*
    761  * getIDWithAttrMask - Determines the ID of an instruction, consuming
    762  *   the ModR/M byte as appropriate for extended and escape opcodes,
    763  *   and using a supplied attribute mask.
    764  *
    765  * @param instructionID - A pointer whose target is filled in with the ID of the
    766  *                        instruction.
    767  * @param insn          - The instruction whose ID is to be determined.
    768  * @param attrMask      - The attribute mask to search.
    769  * @return              - 0 if the ModR/M could be read when needed or was not
    770  *                        needed; nonzero otherwise.
    771  */
    772 static int getIDWithAttrMask(uint16_t* instructionID,
    773                              struct InternalInstruction* insn,
    774                              uint16_t attrMask) {
    775   bool hasModRMExtension;
    776 
    777   InstructionContext instructionClass = contextForAttrs(attrMask);
    778 
    779   hasModRMExtension = modRMRequired(insn->opcodeType,
    780                                     instructionClass,
    781                                     insn->opcode);
    782 
    783   if (hasModRMExtension) {
    784     if (readModRM(insn))
    785       return -1;
    786 
    787     *instructionID = decode(insn->opcodeType,
    788                             instructionClass,
    789                             insn->opcode,
    790                             insn->modRM);
    791   } else {
    792     *instructionID = decode(insn->opcodeType,
    793                             instructionClass,
    794                             insn->opcode,
    795                             0);
    796   }
    797 
    798   return 0;
    799 }
    800 
    801 /*
    802  * is16BitEquivalent - Determines whether two instruction names refer to
    803  * equivalent instructions but one is 16-bit whereas the other is not.
    804  *
    805  * @param orig  - The instruction that is not 16-bit
    806  * @param equiv - The instruction that is 16-bit
    807  */
    808 static bool is16BitEquivalent(const char *orig, const char *equiv) {
    809   off_t i;
    810 
    811   for (i = 0;; i++) {
    812     if (orig[i] == '\0' && equiv[i] == '\0')
    813       return true;
    814     if (orig[i] == '\0' || equiv[i] == '\0')
    815       return false;
    816     if (orig[i] != equiv[i]) {
    817       if ((orig[i] == 'Q' || orig[i] == 'L') && equiv[i] == 'W')
    818         continue;
    819       if ((orig[i] == '6' || orig[i] == '3') && equiv[i] == '1')
    820         continue;
    821       if ((orig[i] == '4' || orig[i] == '2') && equiv[i] == '6')
    822         continue;
    823       return false;
    824     }
    825   }
    826 }
    827 
    828 /*
    829  * is64Bit - Determines whether this instruction is a 64-bit instruction.
    830  *
    831  * @param name - The instruction that is not 16-bit
    832  */
    833 static bool is64Bit(const char *name) {
    834   off_t i;
    835 
    836   for (i = 0;; ++i) {
    837     if (name[i] == '\0')
    838       return false;
    839     if (name[i] == '6' && name[i+1] == '4')
    840       return true;
    841   }
    842 }
    843 
    844 /*
    845  * getID - Determines the ID of an instruction, consuming the ModR/M byte as
    846  *   appropriate for extended and escape opcodes.  Determines the attributes and
    847  *   context for the instruction before doing so.
    848  *
    849  * @param insn  - The instruction whose ID is to be determined.
    850  * @return      - 0 if the ModR/M could be read when needed or was not needed;
    851  *                nonzero otherwise.
    852  */
    853 static int getID(struct InternalInstruction* insn, const void *miiArg) {
    854   uint16_t attrMask;
    855   uint16_t instructionID;
    856 
    857   dbgprintf(insn, "getID()");
    858 
    859   attrMask = ATTR_NONE;
    860 
    861   if (insn->mode == MODE_64BIT)
    862     attrMask |= ATTR_64BIT;
    863 
    864   if (insn->vectorExtensionType != TYPE_NO_VEX_XOP) {
    865     attrMask |= (insn->vectorExtensionType == TYPE_EVEX) ? ATTR_EVEX : ATTR_VEX;
    866 
    867     if (insn->vectorExtensionType == TYPE_EVEX) {
    868       switch (ppFromEVEX3of4(insn->vectorExtensionPrefix[2])) {
    869       case VEX_PREFIX_66:
    870         attrMask |= ATTR_OPSIZE;
    871         break;
    872       case VEX_PREFIX_F3:
    873         attrMask |= ATTR_XS;
    874         break;
    875       case VEX_PREFIX_F2:
    876         attrMask |= ATTR_XD;
    877         break;
    878       }
    879 
    880       if (zFromEVEX4of4(insn->vectorExtensionPrefix[3]))
    881         attrMask |= ATTR_EVEXKZ;
    882       if (bFromEVEX4of4(insn->vectorExtensionPrefix[3]))
    883         attrMask |= ATTR_EVEXB;
    884       if (aaaFromEVEX4of4(insn->vectorExtensionPrefix[3]))
    885         attrMask |= ATTR_EVEXK;
    886       if (lFromEVEX4of4(insn->vectorExtensionPrefix[3]))
    887         attrMask |= ATTR_EVEXL;
    888       if (l2FromEVEX4of4(insn->vectorExtensionPrefix[3]))
    889         attrMask |= ATTR_EVEXL2;
    890     } else if (insn->vectorExtensionType == TYPE_VEX_3B) {
    891       switch (ppFromVEX3of3(insn->vectorExtensionPrefix[2])) {
    892       case VEX_PREFIX_66:
    893         attrMask |= ATTR_OPSIZE;
    894         break;
    895       case VEX_PREFIX_F3:
    896         attrMask |= ATTR_XS;
    897         break;
    898       case VEX_PREFIX_F2:
    899         attrMask |= ATTR_XD;
    900         break;
    901       }
    902 
    903       if (lFromVEX3of3(insn->vectorExtensionPrefix[2]))
    904         attrMask |= ATTR_VEXL;
    905     } else if (insn->vectorExtensionType == TYPE_VEX_2B) {
    906       switch (ppFromVEX2of2(insn->vectorExtensionPrefix[1])) {
    907       case VEX_PREFIX_66:
    908         attrMask |= ATTR_OPSIZE;
    909         break;
    910       case VEX_PREFIX_F3:
    911         attrMask |= ATTR_XS;
    912         break;
    913       case VEX_PREFIX_F2:
    914         attrMask |= ATTR_XD;
    915         break;
    916       }
    917 
    918       if (lFromVEX2of2(insn->vectorExtensionPrefix[1]))
    919         attrMask |= ATTR_VEXL;
    920     } else if (insn->vectorExtensionType == TYPE_XOP) {
    921       switch (ppFromXOP3of3(insn->vectorExtensionPrefix[2])) {
    922       case VEX_PREFIX_66:
    923         attrMask |= ATTR_OPSIZE;
    924         break;
    925       case VEX_PREFIX_F3:
    926         attrMask |= ATTR_XS;
    927         break;
    928       case VEX_PREFIX_F2:
    929         attrMask |= ATTR_XD;
    930         break;
    931       }
    932 
    933       if (lFromXOP3of3(insn->vectorExtensionPrefix[2]))
    934         attrMask |= ATTR_VEXL;
    935     } else {
    936       return -1;
    937     }
    938   } else if (!insn->mandatoryPrefix) {
    939     // If we don't have mandatory prefix we should use legacy prefixes here
    940     if (insn->hasOpSize && (insn->mode != MODE_16BIT))
    941       attrMask |= ATTR_OPSIZE;
    942     if (insn->hasAdSize)
    943       attrMask |= ATTR_ADSIZE;
    944     if (insn->opcodeType == ONEBYTE) {
    945       if (insn->repeatPrefix == 0xf3 && (insn->opcode == 0x90))
    946         // Special support for PAUSE
    947         attrMask |= ATTR_XS;
    948     } else {
    949       if (insn->repeatPrefix == 0xf2)
    950         attrMask |= ATTR_XD;
    951       else if (insn->repeatPrefix == 0xf3)
    952         attrMask |= ATTR_XS;
    953     }
    954   } else {
    955     switch (insn->mandatoryPrefix) {
    956     case 0xf2:
    957       attrMask |= ATTR_XD;
    958       break;
    959     case 0xf3:
    960       attrMask |= ATTR_XS;
    961       break;
    962     case 0x66:
    963       if (insn->mode != MODE_16BIT)
    964         attrMask |= ATTR_OPSIZE;
    965       break;
    966     case 0x67:
    967       attrMask |= ATTR_ADSIZE;
    968       break;
    969     }
    970 
    971   }
    972 
    973   if (insn->rexPrefix & 0x08) {
    974     attrMask |= ATTR_REXW;
    975     attrMask &= ~ATTR_ADSIZE;
    976   }
    977 
    978   /*
    979    * JCXZ/JECXZ need special handling for 16-bit mode because the meaning
    980    * of the AdSize prefix is inverted w.r.t. 32-bit mode.
    981    */
    982   if (insn->mode == MODE_16BIT && insn->opcodeType == ONEBYTE &&
    983       insn->opcode == 0xE3)
    984     attrMask ^= ATTR_ADSIZE;
    985 
    986   /*
    987    * In 64-bit mode all f64 superscripted opcodes ignore opcode size prefix
    988    * CALL/JMP/JCC instructions need to ignore 0x66 and consume 4 bytes
    989    */
    990 
    991   if ((insn->mode == MODE_64BIT) && insn->hasOpSize) {
    992     switch (insn->opcode) {
    993     case 0xE8:
    994     case 0xE9:
    995       // Take care of psubsb and other mmx instructions.
    996       if (insn->opcodeType == ONEBYTE) {
    997         attrMask ^= ATTR_OPSIZE;
    998         insn->immediateSize = 4;
    999         insn->displacementSize = 4;
   1000       }
   1001       break;
   1002     case 0x82:
   1003     case 0x83:
   1004     case 0x84:
   1005     case 0x85:
   1006     case 0x86:
   1007     case 0x87:
   1008     case 0x88:
   1009     case 0x89:
   1010     case 0x8A:
   1011     case 0x8B:
   1012     case 0x8C:
   1013     case 0x8D:
   1014     case 0x8E:
   1015     case 0x8F:
   1016       // Take care of lea and three byte ops.
   1017       if (insn->opcodeType == TWOBYTE) {
   1018         attrMask ^= ATTR_OPSIZE;
   1019         insn->immediateSize = 4;
   1020         insn->displacementSize = 4;
   1021       }
   1022       break;
   1023     }
   1024   }
   1025 
   1026   if (getIDWithAttrMask(&instructionID, insn, attrMask))
   1027     return -1;
   1028 
   1029   /* The following clauses compensate for limitations of the tables. */
   1030 
   1031   if (insn->mode != MODE_64BIT &&
   1032       insn->vectorExtensionType != TYPE_NO_VEX_XOP) {
   1033     /*
   1034      * The tables can't distinquish between cases where the W-bit is used to
   1035      * select register size and cases where its a required part of the opcode.
   1036      */
   1037     if ((insn->vectorExtensionType == TYPE_EVEX &&
   1038          wFromEVEX3of4(insn->vectorExtensionPrefix[2])) ||
   1039         (insn->vectorExtensionType == TYPE_VEX_3B &&
   1040          wFromVEX3of3(insn->vectorExtensionPrefix[2])) ||
   1041         (insn->vectorExtensionType == TYPE_XOP &&
   1042          wFromXOP3of3(insn->vectorExtensionPrefix[2]))) {
   1043 
   1044       uint16_t instructionIDWithREXW;
   1045       if (getIDWithAttrMask(&instructionIDWithREXW,
   1046                             insn, attrMask | ATTR_REXW)) {
   1047         insn->instructionID = instructionID;
   1048         insn->spec = specifierForUID(instructionID);
   1049         return 0;
   1050       }
   1051 
   1052       auto SpecName = GetInstrName(instructionIDWithREXW, miiArg);
   1053       // If not a 64-bit instruction. Switch the opcode.
   1054       if (!is64Bit(SpecName.data())) {
   1055         insn->instructionID = instructionIDWithREXW;
   1056         insn->spec = specifierForUID(instructionIDWithREXW);
   1057         return 0;
   1058       }
   1059     }
   1060   }
   1061 
   1062   /*
   1063    * Absolute moves, umonitor, and movdir64b need special handling.
   1064    * -For 16-bit mode because the meaning of the AdSize and OpSize prefixes are
   1065    *  inverted w.r.t.
   1066    * -For 32-bit mode we need to ensure the ADSIZE prefix is observed in
   1067    *  any position.
   1068    */
   1069   if ((insn->opcodeType == ONEBYTE && ((insn->opcode & 0xFC) == 0xA0)) ||
   1070       (insn->opcodeType == TWOBYTE && (insn->opcode == 0xAE)) ||
   1071       (insn->opcodeType == THREEBYTE_38 && insn->opcode == 0xF8)) {
   1072     /* Make sure we observed the prefixes in any position. */
   1073     if (insn->hasAdSize)
   1074       attrMask |= ATTR_ADSIZE;
   1075     if (insn->hasOpSize)
   1076       attrMask |= ATTR_OPSIZE;
   1077 
   1078     /* In 16-bit, invert the attributes. */
   1079     if (insn->mode == MODE_16BIT) {
   1080       attrMask ^= ATTR_ADSIZE;
   1081 
   1082       /* The OpSize attribute is only valid with the absolute moves. */
   1083       if (insn->opcodeType == ONEBYTE && ((insn->opcode & 0xFC) == 0xA0))
   1084         attrMask ^= ATTR_OPSIZE;
   1085     }
   1086 
   1087     if (getIDWithAttrMask(&instructionID, insn, attrMask))
   1088       return -1;
   1089 
   1090     insn->instructionID = instructionID;
   1091     insn->spec = specifierForUID(instructionID);
   1092     return 0;
   1093   }
   1094 
   1095   if ((insn->mode == MODE_16BIT || insn->hasOpSize) &&
   1096       !(attrMask & ATTR_OPSIZE)) {
   1097     /*
   1098      * The instruction tables make no distinction between instructions that
   1099      * allow OpSize anywhere (i.e., 16-bit operations) and that need it in a
   1100      * particular spot (i.e., many MMX operations).  In general we're
   1101      * conservative, but in the specific case where OpSize is present but not
   1102      * in the right place we check if there's a 16-bit operation.
   1103      */
   1104 
   1105     const struct InstructionSpecifier *spec;
   1106     uint16_t instructionIDWithOpsize;
   1107     llvm::StringRef specName, specWithOpSizeName;
   1108 
   1109     spec = specifierForUID(instructionID);
   1110 
   1111     if (getIDWithAttrMask(&instructionIDWithOpsize,
   1112                           insn,
   1113                           attrMask | ATTR_OPSIZE)) {
   1114       /*
   1115        * ModRM required with OpSize but not present; give up and return version
   1116        * without OpSize set
   1117        */
   1118 
   1119       insn->instructionID = instructionID;
   1120       insn->spec = spec;
   1121       return 0;
   1122     }
   1123 
   1124     specName = GetInstrName(instructionID, miiArg);
   1125     specWithOpSizeName = GetInstrName(instructionIDWithOpsize, miiArg);
   1126 
   1127     if (is16BitEquivalent(specName.data(), specWithOpSizeName.data()) &&
   1128         (insn->mode == MODE_16BIT) ^ insn->hasOpSize) {
   1129       insn->instructionID = instructionIDWithOpsize;
   1130       insn->spec = specifierForUID(instructionIDWithOpsize);
   1131     } else {
   1132       insn->instructionID = instructionID;
   1133       insn->spec = spec;
   1134     }
   1135     return 0;
   1136   }
   1137 
   1138   if (insn->opcodeType == ONEBYTE && insn->opcode == 0x90 &&
   1139       insn->rexPrefix & 0x01) {
   1140     /*
   1141      * NOOP shouldn't decode as NOOP if REX.b is set. Instead
   1142      * it should decode as XCHG %r8, %eax.
   1143      */
   1144 
   1145     const struct InstructionSpecifier *spec;
   1146     uint16_t instructionIDWithNewOpcode;
   1147     const struct InstructionSpecifier *specWithNewOpcode;
   1148 
   1149     spec = specifierForUID(instructionID);
   1150 
   1151     /* Borrow opcode from one of the other XCHGar opcodes */
   1152     insn->opcode = 0x91;
   1153 
   1154     if (getIDWithAttrMask(&instructionIDWithNewOpcode,
   1155                           insn,
   1156                           attrMask)) {
   1157       insn->opcode = 0x90;
   1158 
   1159       insn->instructionID = instructionID;
   1160       insn->spec = spec;
   1161       return 0;
   1162     }
   1163 
   1164     specWithNewOpcode = specifierForUID(instructionIDWithNewOpcode);
   1165 
   1166     /* Change back */
   1167     insn->opcode = 0x90;
   1168 
   1169     insn->instructionID = instructionIDWithNewOpcode;
   1170     insn->spec = specWithNewOpcode;
   1171 
   1172     return 0;
   1173   }
   1174 
   1175   insn->instructionID = instructionID;
   1176   insn->spec = specifierForUID(insn->instructionID);
   1177 
   1178   return 0;
   1179 }
   1180 
   1181 /*
   1182  * readSIB - Consumes the SIB byte to determine addressing information for an
   1183  *   instruction.
   1184  *
   1185  * @param insn  - The instruction whose SIB byte is to be read.
   1186  * @return      - 0 if the SIB byte was successfully read; nonzero otherwise.
   1187  */
   1188 static int readSIB(struct InternalInstruction* insn) {
   1189   SIBBase sibBaseBase = SIB_BASE_NONE;
   1190   uint8_t index, base;
   1191 
   1192   dbgprintf(insn, "readSIB()");
   1193 
   1194   if (insn->consumedSIB)
   1195     return 0;
   1196 
   1197   insn->consumedSIB = true;
   1198 
   1199   switch (insn->addressSize) {
   1200   case 2:
   1201     dbgprintf(insn, "SIB-based addressing doesn't work in 16-bit mode");
   1202     return -1;
   1203   case 4:
   1204     insn->sibIndexBase = SIB_INDEX_EAX;
   1205     sibBaseBase = SIB_BASE_EAX;
   1206     break;
   1207   case 8:
   1208     insn->sibIndexBase = SIB_INDEX_RAX;
   1209     sibBaseBase = SIB_BASE_RAX;
   1210     break;
   1211   }
   1212 
   1213   if (consumeByte(insn, &insn->sib))
   1214     return -1;
   1215 
   1216   index = indexFromSIB(insn->sib) | (xFromREX(insn->rexPrefix) << 3);
   1217 
   1218   if (index == 0x4) {
   1219     insn->sibIndex = SIB_INDEX_NONE;
   1220   } else {
   1221     insn->sibIndex = (SIBIndex)(insn->sibIndexBase + index);
   1222   }
   1223 
   1224   insn->sibScale = 1 << scaleFromSIB(insn->sib);
   1225 
   1226   base = baseFromSIB(insn->sib) | (bFromREX(insn->rexPrefix) << 3);
   1227 
   1228   switch (base) {
   1229   case 0x5:
   1230   case 0xd:
   1231     switch (modFromModRM(insn->modRM)) {
   1232     case 0x0:
   1233       insn->eaDisplacement = EA_DISP_32;
   1234       insn->sibBase = SIB_BASE_NONE;
   1235       break;
   1236     case 0x1:
   1237       insn->eaDisplacement = EA_DISP_8;
   1238       insn->sibBase = (SIBBase)(sibBaseBase + base);
   1239       break;
   1240     case 0x2:
   1241       insn->eaDisplacement = EA_DISP_32;
   1242       insn->sibBase = (SIBBase)(sibBaseBase + base);
   1243       break;
   1244     case 0x3:
   1245       debug("Cannot have Mod = 0b11 and a SIB byte");
   1246       return -1;
   1247     }
   1248     break;
   1249   default:
   1250     insn->sibBase = (SIBBase)(sibBaseBase + base);
   1251     break;
   1252   }
   1253 
   1254   return 0;
   1255 }
   1256 
   1257 /*
   1258  * readDisplacement - Consumes the displacement of an instruction.
   1259  *
   1260  * @param insn  - The instruction whose displacement is to be read.
   1261  * @return      - 0 if the displacement byte was successfully read; nonzero
   1262  *                otherwise.
   1263  */
   1264 static int readDisplacement(struct InternalInstruction* insn) {
   1265   int8_t d8;
   1266   int16_t d16;
   1267   int32_t d32;
   1268 
   1269   dbgprintf(insn, "readDisplacement()");
   1270 
   1271   if (insn->consumedDisplacement)
   1272     return 0;
   1273 
   1274   insn->consumedDisplacement = true;
   1275   insn->displacementOffset = insn->readerCursor - insn->startLocation;
   1276 
   1277   switch (insn->eaDisplacement) {
   1278   case EA_DISP_NONE:
   1279     insn->consumedDisplacement = false;
   1280     break;
   1281   case EA_DISP_8:
   1282     if (consumeInt8(insn, &d8))
   1283       return -1;
   1284     insn->displacement = d8;
   1285     break;
   1286   case EA_DISP_16:
   1287     if (consumeInt16(insn, &d16))
   1288       return -1;
   1289     insn->displacement = d16;
   1290     break;
   1291   case EA_DISP_32:
   1292     if (consumeInt32(insn, &d32))
   1293       return -1;
   1294     insn->displacement = d32;
   1295     break;
   1296   }
   1297 
   1298   insn->consumedDisplacement = true;
   1299   return 0;
   1300 }
   1301 
   1302 /*
   1303  * readModRM - Consumes all addressing information (ModR/M byte, SIB byte, and
   1304  *   displacement) for an instruction and interprets it.
   1305  *
   1306  * @param insn  - The instruction whose addressing information is to be read.
   1307  * @return      - 0 if the information was successfully read; nonzero otherwise.
   1308  */
   1309 static int readModRM(struct InternalInstruction* insn) {
   1310   uint8_t mod, rm, reg, evexrm;
   1311 
   1312   dbgprintf(insn, "readModRM()");
   1313 
   1314   if (insn->consumedModRM)
   1315     return 0;
   1316 
   1317   if (consumeByte(insn, &insn->modRM))
   1318     return -1;
   1319   insn->consumedModRM = true;
   1320 
   1321   mod     = modFromModRM(insn->modRM);
   1322   rm      = rmFromModRM(insn->modRM);
   1323   reg     = regFromModRM(insn->modRM);
   1324 
   1325   /*
   1326    * This goes by insn->registerSize to pick the correct register, which messes
   1327    * up if we're using (say) XMM or 8-bit register operands.  That gets fixed in
   1328    * fixupReg().
   1329    */
   1330   switch (insn->registerSize) {
   1331   case 2:
   1332     insn->regBase = MODRM_REG_AX;
   1333     insn->eaRegBase = EA_REG_AX;
   1334     break;
   1335   case 4:
   1336     insn->regBase = MODRM_REG_EAX;
   1337     insn->eaRegBase = EA_REG_EAX;
   1338     break;
   1339   case 8:
   1340     insn->regBase = MODRM_REG_RAX;
   1341     insn->eaRegBase = EA_REG_RAX;
   1342     break;
   1343   }
   1344 
   1345   reg |= rFromREX(insn->rexPrefix) << 3;
   1346   rm  |= bFromREX(insn->rexPrefix) << 3;
   1347 
   1348   evexrm = 0;
   1349   if (insn->vectorExtensionType == TYPE_EVEX && insn->mode == MODE_64BIT) {
   1350     reg |= r2FromEVEX2of4(insn->vectorExtensionPrefix[1]) << 4;
   1351     evexrm = xFromEVEX2of4(insn->vectorExtensionPrefix[1]) << 4;
   1352   }
   1353 
   1354   insn->reg = (Reg)(insn->regBase + reg);
   1355 
   1356   switch (insn->addressSize) {
   1357   case 2: {
   1358     EABase eaBaseBase = EA_BASE_BX_SI;
   1359 
   1360     switch (mod) {
   1361     case 0x0:
   1362       if (rm == 0x6) {
   1363         insn->eaBase = EA_BASE_NONE;
   1364         insn->eaDisplacement = EA_DISP_16;
   1365         if (readDisplacement(insn))
   1366           return -1;
   1367       } else {
   1368         insn->eaBase = (EABase)(eaBaseBase + rm);
   1369         insn->eaDisplacement = EA_DISP_NONE;
   1370       }
   1371       break;
   1372     case 0x1:
   1373       insn->eaBase = (EABase)(eaBaseBase + rm);
   1374       insn->eaDisplacement = EA_DISP_8;
   1375       insn->displacementSize = 1;
   1376       if (readDisplacement(insn))
   1377         return -1;
   1378       break;
   1379     case 0x2:
   1380       insn->eaBase = (EABase)(eaBaseBase + rm);
   1381       insn->eaDisplacement = EA_DISP_16;
   1382       if (readDisplacement(insn))
   1383         return -1;
   1384       break;
   1385     case 0x3:
   1386       insn->eaBase = (EABase)(insn->eaRegBase + rm);
   1387       if (readDisplacement(insn))
   1388         return -1;
   1389       break;
   1390     }
   1391     break;
   1392   }
   1393   case 4:
   1394   case 8: {
   1395     EABase eaBaseBase = (insn->addressSize == 4 ? EA_BASE_EAX : EA_BASE_RAX);
   1396 
   1397     switch (mod) {
   1398     case 0x0:
   1399       insn->eaDisplacement = EA_DISP_NONE; /* readSIB may override this */
   1400       // In determining whether RIP-relative mode is used (rm=5),
   1401       // or whether a SIB byte is present (rm=4),
   1402       // the extension bits (REX.b and EVEX.x) are ignored.
   1403       switch (rm & 7) {
   1404       case 0x4: // SIB byte is present
   1405         insn->eaBase = (insn->addressSize == 4 ?
   1406                         EA_BASE_sib : EA_BASE_sib64);
   1407         if (readSIB(insn) || readDisplacement(insn))
   1408           return -1;
   1409         break;
   1410       case 0x5: // RIP-relative
   1411         insn->eaBase = EA_BASE_NONE;
   1412         insn->eaDisplacement = EA_DISP_32;
   1413         if (readDisplacement(insn))
   1414           return -1;
   1415         break;
   1416       default:
   1417         insn->eaBase = (EABase)(eaBaseBase + rm);
   1418         break;
   1419       }
   1420       break;
   1421     case 0x1:
   1422       insn->displacementSize = 1;
   1423       LLVM_FALLTHROUGH;
   1424     case 0x2:
   1425       insn->eaDisplacement = (mod == 0x1 ? EA_DISP_8 : EA_DISP_32);
   1426       switch (rm & 7) {
   1427       case 0x4: // SIB byte is present
   1428         insn->eaBase = EA_BASE_sib;
   1429         if (readSIB(insn) || readDisplacement(insn))
   1430           return -1;
   1431         break;
   1432       default:
   1433         insn->eaBase = (EABase)(eaBaseBase + rm);
   1434         if (readDisplacement(insn))
   1435           return -1;
   1436         break;
   1437       }
   1438       break;
   1439     case 0x3:
   1440       insn->eaDisplacement = EA_DISP_NONE;
   1441       insn->eaBase = (EABase)(insn->eaRegBase + rm + evexrm);
   1442       break;
   1443     }
   1444     break;
   1445   }
   1446   } /* switch (insn->addressSize) */
   1447 
   1448   return 0;
   1449 }
   1450 
   1451 #define GENERIC_FIXUP_FUNC(name, base, prefix, mask)      \
   1452   static uint16_t name(struct InternalInstruction *insn,  \
   1453                        OperandType type,                  \
   1454                        uint8_t index,                     \
   1455                        uint8_t *valid) {                  \
   1456     *valid = 1;                                           \
   1457     switch (type) {                                       \
   1458     default:                                              \
   1459       debug("Unhandled register type");                   \
   1460       *valid = 0;                                         \
   1461       return 0;                                           \
   1462     case TYPE_Rv:                                         \
   1463       return base + index;                                \
   1464     case TYPE_R8:                                         \
   1465       index &= mask;                                      \
   1466       if (index > 0xf)                                    \
   1467         *valid = 0;                                       \
   1468       if (insn->rexPrefix &&                              \
   1469          index >= 4 && index <= 7) {                      \
   1470         return prefix##_SPL + (index - 4);                \
   1471       } else {                                            \
   1472         return prefix##_AL + index;                       \
   1473       }                                                   \
   1474     case TYPE_R16:                                        \
   1475       index &= mask;                                      \
   1476       if (index > 0xf)                                    \
   1477         *valid = 0;                                       \
   1478       return prefix##_AX + index;                         \
   1479     case TYPE_R32:                                        \
   1480       index &= mask;                                      \
   1481       if (index > 0xf)                                    \
   1482         *valid = 0;                                       \
   1483       return prefix##_EAX + index;                        \
   1484     case TYPE_R64:                                        \
   1485       index &= mask;                                      \
   1486       if (index > 0xf)                                    \
   1487         *valid = 0;                                       \
   1488       return prefix##_RAX + index;                        \
   1489     case TYPE_ZMM:                                        \
   1490       return prefix##_ZMM0 + index;                       \
   1491     case TYPE_YMM:                                        \
   1492       return prefix##_YMM0 + index;                       \
   1493     case TYPE_XMM:                                        \
   1494       return prefix##_XMM0 + index;                       \
   1495     case TYPE_VK:                                         \
   1496       index &= 0xf;                                       \
   1497       if (index > 7)                                      \
   1498         *valid = 0;                                       \
   1499       return prefix##_K0 + index;                         \
   1500     case TYPE_MM64:                                       \
   1501       return prefix##_MM0 + (index & 0x7);                \
   1502     case TYPE_SEGMENTREG:                                 \
   1503       if ((index & 7) > 5)                                \
   1504         *valid = 0;                                       \
   1505       return prefix##_ES + (index & 7);                   \
   1506     case TYPE_DEBUGREG:                                   \
   1507       return prefix##_DR0 + index;                        \
   1508     case TYPE_CONTROLREG:                                 \
   1509       return prefix##_CR0 + index;                        \
   1510     case TYPE_BNDR:                                       \
   1511       if (index > 3)                                      \
   1512         *valid = 0;                                       \
   1513       return prefix##_BND0 + index;                       \
   1514     case TYPE_MVSIBX:                                     \
   1515       return prefix##_XMM0 + index;                       \
   1516     case TYPE_MVSIBY:                                     \
   1517       return prefix##_YMM0 + index;                       \
   1518     case TYPE_MVSIBZ:                                     \
   1519       return prefix##_ZMM0 + index;                       \
   1520     }                                                     \
   1521   }
   1522 
   1523 /*
   1524  * fixup*Value - Consults an operand type to determine the meaning of the
   1525  *   reg or R/M field.  If the operand is an XMM operand, for example, an
   1526  *   operand would be XMM0 instead of AX, which readModRM() would otherwise
   1527  *   misinterpret it as.
   1528  *
   1529  * @param insn  - The instruction containing the operand.
   1530  * @param type  - The operand type.
   1531  * @param index - The existing value of the field as reported by readModRM().
   1532  * @param valid - The address of a uint8_t.  The target is set to 1 if the
   1533  *                field is valid for the register class; 0 if not.
   1534  * @return      - The proper value.
   1535  */
   1536 GENERIC_FIXUP_FUNC(fixupRegValue, insn->regBase,    MODRM_REG, 0x1f)
   1537 GENERIC_FIXUP_FUNC(fixupRMValue,  insn->eaRegBase,  EA_REG,    0xf)
   1538 
   1539 /*
   1540  * fixupReg - Consults an operand specifier to determine which of the
   1541  *   fixup*Value functions to use in correcting readModRM()'ss interpretation.
   1542  *
   1543  * @param insn  - See fixup*Value().
   1544  * @param op    - The operand specifier.
   1545  * @return      - 0 if fixup was successful; -1 if the register returned was
   1546  *                invalid for its class.
   1547  */
   1548 static int fixupReg(struct InternalInstruction *insn,
   1549                     const struct OperandSpecifier *op) {
   1550   uint8_t valid;
   1551 
   1552   dbgprintf(insn, "fixupReg()");
   1553 
   1554   switch ((OperandEncoding)op->encoding) {
   1555   default:
   1556     debug("Expected a REG or R/M encoding in fixupReg");
   1557     return -1;
   1558   case ENCODING_VVVV:
   1559     insn->vvvv = (Reg)fixupRegValue(insn,
   1560                                     (OperandType)op->type,
   1561                                     insn->vvvv,
   1562                                     &valid);
   1563     if (!valid)
   1564       return -1;
   1565     break;
   1566   case ENCODING_REG:
   1567     insn->reg = (Reg)fixupRegValue(insn,
   1568                                    (OperandType)op->type,
   1569                                    insn->reg - insn->regBase,
   1570                                    &valid);
   1571     if (!valid)
   1572       return -1;
   1573     break;
   1574   CASE_ENCODING_RM:
   1575     if (insn->eaBase >= insn->eaRegBase) {
   1576       insn->eaBase = (EABase)fixupRMValue(insn,
   1577                                           (OperandType)op->type,
   1578                                           insn->eaBase - insn->eaRegBase,
   1579                                           &valid);
   1580       if (!valid)
   1581         return -1;
   1582     }
   1583     break;
   1584   }
   1585 
   1586   return 0;
   1587 }
   1588 
   1589 /*
   1590  * readOpcodeRegister - Reads an operand from the opcode field of an
   1591  *   instruction and interprets it appropriately given the operand width.
   1592  *   Handles AddRegFrm instructions.
   1593  *
   1594  * @param insn  - the instruction whose opcode field is to be read.
   1595  * @param size  - The width (in bytes) of the register being specified.
   1596  *                1 means AL and friends, 2 means AX, 4 means EAX, and 8 means
   1597  *                RAX.
   1598  * @return      - 0 on success; nonzero otherwise.
   1599  */
   1600 static int readOpcodeRegister(struct InternalInstruction* insn, uint8_t size) {
   1601   dbgprintf(insn, "readOpcodeRegister()");
   1602 
   1603   if (size == 0)
   1604     size = insn->registerSize;
   1605 
   1606   switch (size) {
   1607   case 1:
   1608     insn->opcodeRegister = (Reg)(MODRM_REG_AL + ((bFromREX(insn->rexPrefix) << 3)
   1609                                                   | (insn->opcode & 7)));
   1610     if (insn->rexPrefix &&
   1611         insn->opcodeRegister >= MODRM_REG_AL + 0x4 &&
   1612         insn->opcodeRegister < MODRM_REG_AL + 0x8) {
   1613       insn->opcodeRegister = (Reg)(MODRM_REG_SPL
   1614                                    + (insn->opcodeRegister - MODRM_REG_AL - 4));
   1615     }
   1616 
   1617     break;
   1618   case 2:
   1619     insn->opcodeRegister = (Reg)(MODRM_REG_AX
   1620                                  + ((bFromREX(insn->rexPrefix) << 3)
   1621                                     | (insn->opcode & 7)));
   1622     break;
   1623   case 4:
   1624     insn->opcodeRegister = (Reg)(MODRM_REG_EAX
   1625                                  + ((bFromREX(insn->rexPrefix) << 3)
   1626                                     | (insn->opcode & 7)));
   1627     break;
   1628   case 8:
   1629     insn->opcodeRegister = (Reg)(MODRM_REG_RAX
   1630                                  + ((bFromREX(insn->rexPrefix) << 3)
   1631                                     | (insn->opcode & 7)));
   1632     break;
   1633   }
   1634 
   1635   return 0;
   1636 }
   1637 
   1638 /*
   1639  * readImmediate - Consumes an immediate operand from an instruction, given the
   1640  *   desired operand size.
   1641  *
   1642  * @param insn  - The instruction whose operand is to be read.
   1643  * @param size  - The width (in bytes) of the operand.
   1644  * @return      - 0 if the immediate was successfully consumed; nonzero
   1645  *                otherwise.
   1646  */
   1647 static int readImmediate(struct InternalInstruction* insn, uint8_t size) {
   1648   uint8_t imm8;
   1649   uint16_t imm16;
   1650   uint32_t imm32;
   1651   uint64_t imm64;
   1652 
   1653   dbgprintf(insn, "readImmediate()");
   1654 
   1655   if (insn->numImmediatesConsumed == 2) {
   1656     debug("Already consumed two immediates");
   1657     return -1;
   1658   }
   1659 
   1660   if (size == 0)
   1661     size = insn->immediateSize;
   1662   else
   1663     insn->immediateSize = size;
   1664   insn->immediateOffset = insn->readerCursor - insn->startLocation;
   1665 
   1666   switch (size) {
   1667   case 1:
   1668     if (consumeByte(insn, &imm8))
   1669       return -1;
   1670     insn->immediates[insn->numImmediatesConsumed] = imm8;
   1671     break;
   1672   case 2:
   1673     if (consumeUInt16(insn, &imm16))
   1674       return -1;
   1675     insn->immediates[insn->numImmediatesConsumed] = imm16;
   1676     break;
   1677   case 4:
   1678     if (consumeUInt32(insn, &imm32))
   1679       return -1;
   1680     insn->immediates[insn->numImmediatesConsumed] = imm32;
   1681     break;
   1682   case 8:
   1683     if (consumeUInt64(insn, &imm64))
   1684       return -1;
   1685     insn->immediates[insn->numImmediatesConsumed] = imm64;
   1686     break;
   1687   }
   1688 
   1689   insn->numImmediatesConsumed++;
   1690 
   1691   return 0;
   1692 }
   1693 
   1694 /*
   1695  * readVVVV - Consumes vvvv from an instruction if it has a VEX prefix.
   1696  *
   1697  * @param insn  - The instruction whose operand is to be read.
   1698  * @return      - 0 if the vvvv was successfully consumed; nonzero
   1699  *                otherwise.
   1700  */
   1701 static int readVVVV(struct InternalInstruction* insn) {
   1702   dbgprintf(insn, "readVVVV()");
   1703 
   1704   int vvvv;
   1705   if (insn->vectorExtensionType == TYPE_EVEX)
   1706     vvvv = (v2FromEVEX4of4(insn->vectorExtensionPrefix[3]) << 4 |
   1707             vvvvFromEVEX3of4(insn->vectorExtensionPrefix[2]));
   1708   else if (insn->vectorExtensionType == TYPE_VEX_3B)
   1709     vvvv = vvvvFromVEX3of3(insn->vectorExtensionPrefix[2]);
   1710   else if (insn->vectorExtensionType == TYPE_VEX_2B)
   1711     vvvv = vvvvFromVEX2of2(insn->vectorExtensionPrefix[1]);
   1712   else if (insn->vectorExtensionType == TYPE_XOP)
   1713     vvvv = vvvvFromXOP3of3(insn->vectorExtensionPrefix[2]);
   1714   else
   1715     return -1;
   1716 
   1717   if (insn->mode != MODE_64BIT)
   1718     vvvv &= 0xf; // Can only clear bit 4. Bit 3 must be cleared later.
   1719 
   1720   insn->vvvv = static_cast<Reg>(vvvv);
   1721   return 0;
   1722 }
   1723 
   1724 /*
   1725  * readMaskRegister - Reads an mask register from the opcode field of an
   1726  *   instruction.
   1727  *
   1728  * @param insn    - The instruction whose opcode field is to be read.
   1729  * @return        - 0 on success; nonzero otherwise.
   1730  */
   1731 static int readMaskRegister(struct InternalInstruction* insn) {
   1732   dbgprintf(insn, "readMaskRegister()");
   1733 
   1734   if (insn->vectorExtensionType != TYPE_EVEX)
   1735     return -1;
   1736 
   1737   insn->writemask =
   1738       static_cast<Reg>(aaaFromEVEX4of4(insn->vectorExtensionPrefix[3]));
   1739   return 0;
   1740 }
   1741 
   1742 /*
   1743  * readOperands - Consults the specifier for an instruction and consumes all
   1744  *   operands for that instruction, interpreting them as it goes.
   1745  *
   1746  * @param insn  - The instruction whose operands are to be read and interpreted.
   1747  * @return      - 0 if all operands could be read; nonzero otherwise.
   1748  */
   1749 static int readOperands(struct InternalInstruction* insn) {
   1750   int hasVVVV, needVVVV;
   1751   int sawRegImm = 0;
   1752 
   1753   dbgprintf(insn, "readOperands()");
   1754 
   1755   /* If non-zero vvvv specified, need to make sure one of the operands
   1756      uses it. */
   1757   hasVVVV = !readVVVV(insn);
   1758   needVVVV = hasVVVV && (insn->vvvv != 0);
   1759 
   1760   for (const auto &Op : x86OperandSets[insn->spec->operands]) {
   1761     switch (Op.encoding) {
   1762     case ENCODING_NONE:
   1763     case ENCODING_SI:
   1764     case ENCODING_DI:
   1765       break;
   1766     CASE_ENCODING_VSIB:
   1767       // VSIB can use the V2 bit so check only the other bits.
   1768       if (needVVVV)
   1769         needVVVV = hasVVVV & ((insn->vvvv & 0xf) != 0);
   1770       if (readModRM(insn))
   1771         return -1;
   1772 
   1773       // Reject if SIB wasn't used.
   1774       if (insn->eaBase != EA_BASE_sib && insn->eaBase != EA_BASE_sib64)
   1775         return -1;
   1776 
   1777       // If sibIndex was set to SIB_INDEX_NONE, index offset is 4.
   1778       if (insn->sibIndex == SIB_INDEX_NONE)
   1779         insn->sibIndex = (SIBIndex)(insn->sibIndexBase + 4);
   1780 
   1781       // If EVEX.v2 is set this is one of the 16-31 registers.
   1782       if (insn->vectorExtensionType == TYPE_EVEX && insn->mode == MODE_64BIT &&
   1783           v2FromEVEX4of4(insn->vectorExtensionPrefix[3]))
   1784         insn->sibIndex = (SIBIndex)(insn->sibIndex + 16);
   1785 
   1786       // Adjust the index register to the correct size.
   1787       switch ((OperandType)Op.type) {
   1788       default:
   1789         debug("Unhandled VSIB index type");
   1790         return -1;
   1791       case TYPE_MVSIBX:
   1792         insn->sibIndex = (SIBIndex)(SIB_INDEX_XMM0 +
   1793                                     (insn->sibIndex - insn->sibIndexBase));
   1794         break;
   1795       case TYPE_MVSIBY:
   1796         insn->sibIndex = (SIBIndex)(SIB_INDEX_YMM0 +
   1797                                     (insn->sibIndex - insn->sibIndexBase));
   1798         break;
   1799       case TYPE_MVSIBZ:
   1800         insn->sibIndex = (SIBIndex)(SIB_INDEX_ZMM0 +
   1801                                     (insn->sibIndex - insn->sibIndexBase));
   1802         break;
   1803       }
   1804 
   1805       // Apply the AVX512 compressed displacement scaling factor.
   1806       if (Op.encoding != ENCODING_REG && insn->eaDisplacement == EA_DISP_8)
   1807         insn->displacement *= 1 << (Op.encoding - ENCODING_VSIB);
   1808       break;
   1809     case ENCODING_REG:
   1810     CASE_ENCODING_RM:
   1811       if (readModRM(insn))
   1812         return -1;
   1813       if (fixupReg(insn, &Op))
   1814         return -1;
   1815       // Apply the AVX512 compressed displacement scaling factor.
   1816       if (Op.encoding != ENCODING_REG && insn->eaDisplacement == EA_DISP_8)
   1817         insn->displacement *= 1 << (Op.encoding - ENCODING_RM);
   1818       break;
   1819     case ENCODING_IB:
   1820       if (sawRegImm) {
   1821         /* Saw a register immediate so don't read again and instead split the
   1822            previous immediate.  FIXME: This is a hack. */
   1823         insn->immediates[insn->numImmediatesConsumed] =
   1824           insn->immediates[insn->numImmediatesConsumed - 1] & 0xf;
   1825         ++insn->numImmediatesConsumed;
   1826         break;
   1827       }
   1828       if (readImmediate(insn, 1))
   1829         return -1;
   1830       if (Op.type == TYPE_XMM || Op.type == TYPE_YMM)
   1831         sawRegImm = 1;
   1832       break;
   1833     case ENCODING_IW:
   1834       if (readImmediate(insn, 2))
   1835         return -1;
   1836       break;
   1837     case ENCODING_ID:
   1838       if (readImmediate(insn, 4))
   1839         return -1;
   1840       break;
   1841     case ENCODING_IO:
   1842       if (readImmediate(insn, 8))
   1843         return -1;
   1844       break;
   1845     case ENCODING_Iv:
   1846       if (readImmediate(insn, insn->immediateSize))
   1847         return -1;
   1848       break;
   1849     case ENCODING_Ia:
   1850       if (readImmediate(insn, insn->addressSize))
   1851         return -1;
   1852       break;
   1853     case ENCODING_IRC:
   1854       insn->RC = (l2FromEVEX4of4(insn->vectorExtensionPrefix[3]) << 1) |
   1855                  lFromEVEX4of4(insn->vectorExtensionPrefix[3]);
   1856       break;
   1857     case ENCODING_RB:
   1858       if (readOpcodeRegister(insn, 1))
   1859         return -1;
   1860       break;
   1861     case ENCODING_RW:
   1862       if (readOpcodeRegister(insn, 2))
   1863         return -1;
   1864       break;
   1865     case ENCODING_RD:
   1866       if (readOpcodeRegister(insn, 4))
   1867         return -1;
   1868       break;
   1869     case ENCODING_RO:
   1870       if (readOpcodeRegister(insn, 8))
   1871         return -1;
   1872       break;
   1873     case ENCODING_Rv:
   1874       if (readOpcodeRegister(insn, 0))
   1875         return -1;
   1876       break;
   1877     case ENCODING_FP:
   1878       break;
   1879     case ENCODING_VVVV:
   1880       needVVVV = 0; /* Mark that we have found a VVVV operand. */
   1881       if (!hasVVVV)
   1882         return -1;
   1883       if (insn->mode != MODE_64BIT)
   1884         insn->vvvv = static_cast<Reg>(insn->vvvv & 0x7);
   1885       if (fixupReg(insn, &Op))
   1886         return -1;
   1887       break;
   1888     case ENCODING_WRITEMASK:
   1889       if (readMaskRegister(insn))
   1890         return -1;
   1891       break;
   1892     case ENCODING_DUP:
   1893       break;
   1894     default:
   1895       dbgprintf(insn, "Encountered an operand with an unknown encoding.");
   1896       return -1;
   1897     }
   1898   }
   1899 
   1900   /* If we didn't find ENCODING_VVVV operand, but non-zero vvvv present, fail */
   1901   if (needVVVV) return -1;
   1902 
   1903   return 0;
   1904 }
   1905 
   1906 /*
   1907  * decodeInstruction - Reads and interprets a full instruction provided by the
   1908  *   user.
   1909  *
   1910  * @param insn      - A pointer to the instruction to be populated.  Must be
   1911  *                    pre-allocated.
   1912  * @param reader    - The function to be used to read the instruction's bytes.
   1913  * @param readerArg - A generic argument to be passed to the reader to store
   1914  *                    any internal state.
   1915  * @param logger    - If non-NULL, the function to be used to write log messages
   1916  *                    and warnings.
   1917  * @param loggerArg - A generic argument to be passed to the logger to store
   1918  *                    any internal state.
   1919  * @param startLoc  - The address (in the reader's address space) of the first
   1920  *                    byte in the instruction.
   1921  * @param mode      - The mode (real mode, IA-32e, or IA-32e in 64-bit mode) to
   1922  *                    decode the instruction in.
   1923  * @return          - 0 if the instruction's memory could be read; nonzero if
   1924  *                    not.
   1925  */
   1926 int llvm::X86Disassembler::decodeInstruction(
   1927     struct InternalInstruction *insn, byteReader_t reader,
   1928     const void *readerArg, dlog_t logger, void *loggerArg, const void *miiArg,
   1929     uint64_t startLoc, DisassemblerMode mode) {
   1930   memset(insn, 0, sizeof(struct InternalInstruction));
   1931 
   1932   insn->reader = reader;
   1933   insn->readerArg = readerArg;
   1934   insn->dlog = logger;
   1935   insn->dlogArg = loggerArg;
   1936   insn->startLocation = startLoc;
   1937   insn->readerCursor = startLoc;
   1938   insn->mode = mode;
   1939   insn->numImmediatesConsumed = 0;
   1940 
   1941   if (readPrefixes(insn)       ||
   1942       readOpcode(insn)         ||
   1943       getID(insn, miiArg)      ||
   1944       insn->instructionID == 0 ||
   1945       readOperands(insn))
   1946     return -1;
   1947 
   1948   insn->operands = x86OperandSets[insn->spec->operands];
   1949 
   1950   insn->length = insn->readerCursor - insn->startLocation;
   1951 
   1952   dbgprintf(insn, "Read from 0x%llx to 0x%llx: length %zu",
   1953             startLoc, insn->readerCursor, insn->length);
   1954 
   1955   if (insn->length > 15)
   1956     dbgprintf(insn, "Instruction exceeds 15-byte limit");
   1957 
   1958   return 0;
   1959 }
   1960