Home | History | Annotate | Download | only in Disassembler
      1 /*===- X86DisassemblerDecoder.c - Disassembler decoder -------------*- C -*-==*
      2  *
      3  *                     The LLVM Compiler Infrastructure
      4  *
      5  * This file is distributed under the University of Illinois Open Source
      6  * License. See LICENSE.TXT for details.
      7  *
      8  *===----------------------------------------------------------------------===*
      9  *
     10  * This file is part of the X86 Disassembler.
     11  * It contains the implementation of the instruction decoder.
     12  * Documentation for the disassembler can be found in X86Disassembler.h.
     13  *
     14  *===----------------------------------------------------------------------===*/
     15 
     16 #include <stdarg.h>   /* for va_*()       */
     17 #include <stdio.h>    /* for vsnprintf()  */
     18 #include <stdlib.h>   /* for exit()       */
     19 #include <string.h>   /* for memset()     */
     20 
     21 #include "X86DisassemblerDecoder.h"
     22 
     23 #include "X86GenDisassemblerTables.inc"
     24 
     25 #define TRUE  1
     26 #define FALSE 0
     27 
     28 typedef int8_t bool;
     29 
     30 #ifndef NDEBUG
     31 #define debug(s) do { x86DisassemblerDebug(__FILE__, __LINE__, s); } while (0)
     32 #else
     33 #define debug(s) do { } while (0)
     34 #endif
     35 
     36 
     37 /*
     38  * contextForAttrs - Client for the instruction context table.  Takes a set of
     39  *   attributes and returns the appropriate decode context.
     40  *
     41  * @param attrMask  - Attributes, from the enumeration attributeBits.
     42  * @return          - The InstructionContext to use when looking up an
     43  *                    an instruction with these attributes.
     44  */
     45 static InstructionContext contextForAttrs(uint8_t attrMask) {
     46   return CONTEXTS_SYM[attrMask];
     47 }
     48 
     49 /*
     50  * modRMRequired - Reads the appropriate instruction table to determine whether
     51  *   the ModR/M byte is required to decode a particular instruction.
     52  *
     53  * @param type        - The opcode type (i.e., how many bytes it has).
     54  * @param insnContext - The context for the instruction, as returned by
     55  *                      contextForAttrs.
     56  * @param opcode      - The last byte of the instruction's opcode, not counting
     57  *                      ModR/M extensions and escapes.
     58  * @return            - TRUE if the ModR/M byte is required, FALSE otherwise.
     59  */
     60 static int modRMRequired(OpcodeType type,
     61                                 InstructionContext insnContext,
     62                                 uint8_t opcode) {
     63   const struct ContextDecision* decision = 0;
     64 
     65   switch (type) {
     66   case ONEBYTE:
     67     decision = &ONEBYTE_SYM;
     68     break;
     69   case TWOBYTE:
     70     decision = &TWOBYTE_SYM;
     71     break;
     72   case THREEBYTE_38:
     73     decision = &THREEBYTE38_SYM;
     74     break;
     75   case THREEBYTE_3A:
     76     decision = &THREEBYTE3A_SYM;
     77     break;
     78   case THREEBYTE_A6:
     79     decision = &THREEBYTEA6_SYM;
     80     break;
     81   case THREEBYTE_A7:
     82     decision = &THREEBYTEA7_SYM;
     83     break;
     84   }
     85 
     86   return decision->opcodeDecisions[insnContext].modRMDecisions[opcode].
     87     modrm_type != MODRM_ONEENTRY;
     88 
     89   return 0;
     90 }
     91 
     92 /*
     93  * decode - Reads the appropriate instruction table to obtain the unique ID of
     94  *   an instruction.
     95  *
     96  * @param type        - See modRMRequired().
     97  * @param insnContext - See modRMRequired().
     98  * @param opcode      - See modRMRequired().
     99  * @param modRM       - The ModR/M byte if required, or any value if not.
    100  * @return            - The UID of the instruction, or 0 on failure.
    101  */
    102 static InstrUID decode(OpcodeType type,
    103                        InstructionContext insnContext,
    104                        uint8_t opcode,
    105                        uint8_t modRM) {
    106   const struct ModRMDecision* dec;
    107 
    108   switch (type) {
    109   default:
    110     debug("Unknown opcode type");
    111     return 0;
    112   case ONEBYTE:
    113     dec = &ONEBYTE_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
    114     break;
    115   case TWOBYTE:
    116     dec = &TWOBYTE_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
    117     break;
    118   case THREEBYTE_38:
    119     dec = &THREEBYTE38_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
    120     break;
    121   case THREEBYTE_3A:
    122     dec = &THREEBYTE3A_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
    123     break;
    124   case THREEBYTE_A6:
    125     dec = &THREEBYTEA6_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
    126     break;
    127   case THREEBYTE_A7:
    128     dec = &THREEBYTEA7_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
    129     break;
    130   }
    131 
    132   switch (dec->modrm_type) {
    133   default:
    134     debug("Corrupt table!  Unknown modrm_type");
    135     return 0;
    136   case MODRM_ONEENTRY:
    137     return dec->instructionIDs[0];
    138   case MODRM_SPLITRM:
    139     if (modFromModRM(modRM) == 0x3)
    140       return dec->instructionIDs[1];
    141     else
    142       return dec->instructionIDs[0];
    143   case MODRM_FULL:
    144     return dec->instructionIDs[modRM];
    145   }
    146 }
    147 
    148 /*
    149  * specifierForUID - Given a UID, returns the name and operand specification for
    150  *   that instruction.
    151  *
    152  * @param uid - The unique ID for the instruction.  This should be returned by
    153  *              decode(); specifierForUID will not check bounds.
    154  * @return    - A pointer to the specification for that instruction.
    155  */
    156 static const struct InstructionSpecifier *specifierForUID(InstrUID uid) {
    157   return &INSTRUCTIONS_SYM[uid];
    158 }
    159 
    160 /*
    161  * consumeByte - Uses the reader function provided by the user to consume one
    162  *   byte from the instruction's memory and advance the cursor.
    163  *
    164  * @param insn  - The instruction with the reader function to use.  The cursor
    165  *                for this instruction is advanced.
    166  * @param byte  - A pointer to a pre-allocated memory buffer to be populated
    167  *                with the data read.
    168  * @return      - 0 if the read was successful; nonzero otherwise.
    169  */
    170 static int consumeByte(struct InternalInstruction* insn, uint8_t* byte) {
    171   int ret = insn->reader(insn->readerArg, byte, insn->readerCursor);
    172 
    173   if (!ret)
    174     ++(insn->readerCursor);
    175 
    176   return ret;
    177 }
    178 
    179 /*
    180  * lookAtByte - Like consumeByte, but does not advance the cursor.
    181  *
    182  * @param insn  - See consumeByte().
    183  * @param byte  - See consumeByte().
    184  * @return      - See consumeByte().
    185  */
    186 static int lookAtByte(struct InternalInstruction* insn, uint8_t* byte) {
    187   return insn->reader(insn->readerArg, byte, insn->readerCursor);
    188 }
    189 
    190 static void unconsumeByte(struct InternalInstruction* insn) {
    191   insn->readerCursor--;
    192 }
    193 
    194 #define CONSUME_FUNC(name, type)                                  \
    195   static int name(struct InternalInstruction* insn, type* ptr) {  \
    196     type combined = 0;                                            \
    197     unsigned offset;                                              \
    198     for (offset = 0; offset < sizeof(type); ++offset) {           \
    199       uint8_t byte;                                               \
    200       int ret = insn->reader(insn->readerArg,                     \
    201                              &byte,                               \
    202                              insn->readerCursor + offset);        \
    203       if (ret)                                                    \
    204         return ret;                                               \
    205       combined = combined | ((type)byte << ((type)offset * 8));   \
    206     }                                                             \
    207     *ptr = combined;                                              \
    208     insn->readerCursor += sizeof(type);                           \
    209     return 0;                                                     \
    210   }
    211 
    212 /*
    213  * consume* - Use the reader function provided by the user to consume data
    214  *   values of various sizes from the instruction's memory and advance the
    215  *   cursor appropriately.  These readers perform endian conversion.
    216  *
    217  * @param insn    - See consumeByte().
    218  * @param ptr     - A pointer to a pre-allocated memory of appropriate size to
    219  *                  be populated with the data read.
    220  * @return        - See consumeByte().
    221  */
    222 CONSUME_FUNC(consumeInt8, int8_t)
    223 CONSUME_FUNC(consumeInt16, int16_t)
    224 CONSUME_FUNC(consumeInt32, int32_t)
    225 CONSUME_FUNC(consumeUInt16, uint16_t)
    226 CONSUME_FUNC(consumeUInt32, uint32_t)
    227 CONSUME_FUNC(consumeUInt64, uint64_t)
    228 
    229 /*
    230  * dbgprintf - Uses the logging function provided by the user to log a single
    231  *   message, typically without a carriage-return.
    232  *
    233  * @param insn    - The instruction containing the logging function.
    234  * @param format  - See printf().
    235  * @param ...     - See printf().
    236  */
    237 static void dbgprintf(struct InternalInstruction* insn,
    238                       const char* format,
    239                       ...) {
    240   char buffer[256];
    241   va_list ap;
    242 
    243   if (!insn->dlog)
    244     return;
    245 
    246   va_start(ap, format);
    247   (void)vsnprintf(buffer, sizeof(buffer), format, ap);
    248   va_end(ap);
    249 
    250   insn->dlog(insn->dlogArg, buffer);
    251 
    252   return;
    253 }
    254 
    255 /*
    256  * setPrefixPresent - Marks that a particular prefix is present at a particular
    257  *   location.
    258  *
    259  * @param insn      - The instruction to be marked as having the prefix.
    260  * @param prefix    - The prefix that is present.
    261  * @param location  - The location where the prefix is located (in the address
    262  *                    space of the instruction's reader).
    263  */
    264 static void setPrefixPresent(struct InternalInstruction* insn,
    265                                     uint8_t prefix,
    266                                     uint64_t location)
    267 {
    268   insn->prefixPresent[prefix] = 1;
    269   insn->prefixLocations[prefix] = location;
    270 }
    271 
    272 /*
    273  * isPrefixAtLocation - Queries an instruction to determine whether a prefix is
    274  *   present at a given location.
    275  *
    276  * @param insn      - The instruction to be queried.
    277  * @param prefix    - The prefix.
    278  * @param location  - The location to query.
    279  * @return          - Whether the prefix is at that location.
    280  */
    281 static BOOL isPrefixAtLocation(struct InternalInstruction* insn,
    282                                uint8_t prefix,
    283                                uint64_t location)
    284 {
    285   if (insn->prefixPresent[prefix] == 1 &&
    286      insn->prefixLocations[prefix] == location)
    287     return TRUE;
    288   else
    289     return FALSE;
    290 }
    291 
    292 /*
    293  * readPrefixes - Consumes all of an instruction's prefix bytes, and marks the
    294  *   instruction as having them.  Also sets the instruction's default operand,
    295  *   address, and other relevant data sizes to report operands correctly.
    296  *
    297  * @param insn  - The instruction whose prefixes are to be read.
    298  * @return      - 0 if the instruction could be read until the end of the prefix
    299  *                bytes, and no prefixes conflicted; nonzero otherwise.
    300  */
    301 static int readPrefixes(struct InternalInstruction* insn) {
    302   BOOL isPrefix = TRUE;
    303   BOOL prefixGroups[4] = { FALSE };
    304   uint64_t prefixLocation;
    305   uint8_t byte = 0;
    306 
    307   BOOL hasAdSize = FALSE;
    308   BOOL hasOpSize = FALSE;
    309 
    310   dbgprintf(insn, "readPrefixes()");
    311 
    312   while (isPrefix) {
    313     prefixLocation = insn->readerCursor;
    314 
    315     if (consumeByte(insn, &byte))
    316       return -1;
    317 
    318     switch (byte) {
    319     case 0xf0:  /* LOCK */
    320     case 0xf2:  /* REPNE/REPNZ */
    321     case 0xf3:  /* REP or REPE/REPZ */
    322       if (prefixGroups[0])
    323         dbgprintf(insn, "Redundant Group 1 prefix");
    324       prefixGroups[0] = TRUE;
    325       setPrefixPresent(insn, byte, prefixLocation);
    326       break;
    327     case 0x2e:  /* CS segment override -OR- Branch not taken */
    328     case 0x36:  /* SS segment override -OR- Branch taken */
    329     case 0x3e:  /* DS segment override */
    330     case 0x26:  /* ES segment override */
    331     case 0x64:  /* FS segment override */
    332     case 0x65:  /* GS segment override */
    333       switch (byte) {
    334       case 0x2e:
    335         insn->segmentOverride = SEG_OVERRIDE_CS;
    336         break;
    337       case 0x36:
    338         insn->segmentOverride = SEG_OVERRIDE_SS;
    339         break;
    340       case 0x3e:
    341         insn->segmentOverride = SEG_OVERRIDE_DS;
    342         break;
    343       case 0x26:
    344         insn->segmentOverride = SEG_OVERRIDE_ES;
    345         break;
    346       case 0x64:
    347         insn->segmentOverride = SEG_OVERRIDE_FS;
    348         break;
    349       case 0x65:
    350         insn->segmentOverride = SEG_OVERRIDE_GS;
    351         break;
    352       default:
    353         debug("Unhandled override");
    354         return -1;
    355       }
    356       if (prefixGroups[1])
    357         dbgprintf(insn, "Redundant Group 2 prefix");
    358       prefixGroups[1] = TRUE;
    359       setPrefixPresent(insn, byte, prefixLocation);
    360       break;
    361     case 0x66:  /* Operand-size override */
    362       if (prefixGroups[2])
    363         dbgprintf(insn, "Redundant Group 3 prefix");
    364       prefixGroups[2] = TRUE;
    365       hasOpSize = TRUE;
    366       setPrefixPresent(insn, byte, prefixLocation);
    367       break;
    368     case 0x67:  /* Address-size override */
    369       if (prefixGroups[3])
    370         dbgprintf(insn, "Redundant Group 4 prefix");
    371       prefixGroups[3] = TRUE;
    372       hasAdSize = TRUE;
    373       setPrefixPresent(insn, byte, prefixLocation);
    374       break;
    375     default:    /* Not a prefix byte */
    376       isPrefix = FALSE;
    377       break;
    378     }
    379 
    380     if (isPrefix)
    381       dbgprintf(insn, "Found prefix 0x%hhx", byte);
    382   }
    383 
    384   insn->vexSize = 0;
    385 
    386   if (byte == 0xc4) {
    387     uint8_t byte1;
    388 
    389     if (lookAtByte(insn, &byte1)) {
    390       dbgprintf(insn, "Couldn't read second byte of VEX");
    391       return -1;
    392     }
    393 
    394     if (insn->mode == MODE_64BIT || byte1 & 0x8) {
    395       insn->vexSize = 3;
    396       insn->necessaryPrefixLocation = insn->readerCursor - 1;
    397     }
    398     else {
    399       unconsumeByte(insn);
    400       insn->necessaryPrefixLocation = insn->readerCursor - 1;
    401     }
    402 
    403     if (insn->vexSize == 3) {
    404       insn->vexPrefix[0] = byte;
    405       consumeByte(insn, &insn->vexPrefix[1]);
    406       consumeByte(insn, &insn->vexPrefix[2]);
    407 
    408       /* We simulate the REX prefix for simplicity's sake */
    409 
    410       insn->rexPrefix = 0x40
    411                       | (wFromVEX3of3(insn->vexPrefix[2]) << 3)
    412                       | (rFromVEX2of3(insn->vexPrefix[1]) << 2)
    413                       | (xFromVEX2of3(insn->vexPrefix[1]) << 1)
    414                       | (bFromVEX2of3(insn->vexPrefix[1]) << 0);
    415 
    416       switch (ppFromVEX3of3(insn->vexPrefix[2]))
    417       {
    418       default:
    419         break;
    420       case VEX_PREFIX_66:
    421         hasOpSize = TRUE;
    422         break;
    423       }
    424 
    425       dbgprintf(insn, "Found VEX prefix 0x%hhx 0x%hhx 0x%hhx", insn->vexPrefix[0], insn->vexPrefix[1], insn->vexPrefix[2]);
    426     }
    427   }
    428   else if (byte == 0xc5) {
    429     uint8_t byte1;
    430 
    431     if (lookAtByte(insn, &byte1)) {
    432       dbgprintf(insn, "Couldn't read second byte of VEX");
    433       return -1;
    434     }
    435 
    436     if (insn->mode == MODE_64BIT || byte1 & 0x8) {
    437       insn->vexSize = 2;
    438     }
    439     else {
    440       unconsumeByte(insn);
    441     }
    442 
    443     if (insn->vexSize == 2) {
    444       insn->vexPrefix[0] = byte;
    445       consumeByte(insn, &insn->vexPrefix[1]);
    446 
    447       insn->rexPrefix = 0x40
    448                       | (rFromVEX2of2(insn->vexPrefix[1]) << 2);
    449 
    450       switch (ppFromVEX2of2(insn->vexPrefix[1]))
    451       {
    452       default:
    453         break;
    454       case VEX_PREFIX_66:
    455         hasOpSize = TRUE;
    456         break;
    457       }
    458 
    459       dbgprintf(insn, "Found VEX prefix 0x%hhx 0x%hhx", insn->vexPrefix[0], insn->vexPrefix[1]);
    460     }
    461   }
    462   else {
    463     if (insn->mode == MODE_64BIT) {
    464       if ((byte & 0xf0) == 0x40) {
    465         uint8_t opcodeByte;
    466 
    467         if (lookAtByte(insn, &opcodeByte) || ((opcodeByte & 0xf0) == 0x40)) {
    468           dbgprintf(insn, "Redundant REX prefix");
    469           return -1;
    470         }
    471 
    472         insn->rexPrefix = byte;
    473         insn->necessaryPrefixLocation = insn->readerCursor - 2;
    474 
    475         dbgprintf(insn, "Found REX prefix 0x%hhx", byte);
    476       } else {
    477         unconsumeByte(insn);
    478         insn->necessaryPrefixLocation = insn->readerCursor - 1;
    479       }
    480     } else {
    481       unconsumeByte(insn);
    482       insn->necessaryPrefixLocation = insn->readerCursor - 1;
    483     }
    484   }
    485 
    486   if (insn->mode == MODE_16BIT) {
    487     insn->registerSize       = (hasOpSize ? 4 : 2);
    488     insn->addressSize        = (hasAdSize ? 4 : 2);
    489     insn->displacementSize   = (hasAdSize ? 4 : 2);
    490     insn->immediateSize      = (hasOpSize ? 4 : 2);
    491   } else if (insn->mode == MODE_32BIT) {
    492     insn->registerSize       = (hasOpSize ? 2 : 4);
    493     insn->addressSize        = (hasAdSize ? 2 : 4);
    494     insn->displacementSize   = (hasAdSize ? 2 : 4);
    495     insn->immediateSize      = (hasOpSize ? 2 : 4);
    496   } else if (insn->mode == MODE_64BIT) {
    497     if (insn->rexPrefix && wFromREX(insn->rexPrefix)) {
    498       insn->registerSize       = 8;
    499       insn->addressSize        = (hasAdSize ? 4 : 8);
    500       insn->displacementSize   = 4;
    501       insn->immediateSize      = 4;
    502     } else if (insn->rexPrefix) {
    503       insn->registerSize       = (hasOpSize ? 2 : 4);
    504       insn->addressSize        = (hasAdSize ? 4 : 8);
    505       insn->displacementSize   = (hasOpSize ? 2 : 4);
    506       insn->immediateSize      = (hasOpSize ? 2 : 4);
    507     } else {
    508       insn->registerSize       = (hasOpSize ? 2 : 4);
    509       insn->addressSize        = (hasAdSize ? 4 : 8);
    510       insn->displacementSize   = (hasOpSize ? 2 : 4);
    511       insn->immediateSize      = (hasOpSize ? 2 : 4);
    512     }
    513   }
    514 
    515   return 0;
    516 }
    517 
    518 /*
    519  * readOpcode - Reads the opcode (excepting the ModR/M byte in the case of
    520  *   extended or escape opcodes).
    521  *
    522  * @param insn  - The instruction whose opcode is to be read.
    523  * @return      - 0 if the opcode could be read successfully; nonzero otherwise.
    524  */
    525 static int readOpcode(struct InternalInstruction* insn) {
    526   /* Determine the length of the primary opcode */
    527 
    528   uint8_t current;
    529 
    530   dbgprintf(insn, "readOpcode()");
    531 
    532   insn->opcodeType = ONEBYTE;
    533 
    534   if (insn->vexSize == 3)
    535   {
    536     switch (mmmmmFromVEX2of3(insn->vexPrefix[1]))
    537     {
    538     default:
    539       dbgprintf(insn, "Unhandled m-mmmm field for instruction (0x%hhx)", mmmmmFromVEX2of3(insn->vexPrefix[1]));
    540       return -1;
    541     case 0:
    542       break;
    543     case VEX_LOB_0F:
    544       insn->twoByteEscape = 0x0f;
    545       insn->opcodeType = TWOBYTE;
    546       return consumeByte(insn, &insn->opcode);
    547     case VEX_LOB_0F38:
    548       insn->twoByteEscape = 0x0f;
    549       insn->threeByteEscape = 0x38;
    550       insn->opcodeType = THREEBYTE_38;
    551       return consumeByte(insn, &insn->opcode);
    552     case VEX_LOB_0F3A:
    553       insn->twoByteEscape = 0x0f;
    554       insn->threeByteEscape = 0x3a;
    555       insn->opcodeType = THREEBYTE_3A;
    556       return consumeByte(insn, &insn->opcode);
    557     }
    558   }
    559   else if (insn->vexSize == 2)
    560   {
    561     insn->twoByteEscape = 0x0f;
    562     insn->opcodeType = TWOBYTE;
    563     return consumeByte(insn, &insn->opcode);
    564   }
    565 
    566   if (consumeByte(insn, &current))
    567     return -1;
    568 
    569   if (current == 0x0f) {
    570     dbgprintf(insn, "Found a two-byte escape prefix (0x%hhx)", current);
    571 
    572     insn->twoByteEscape = current;
    573 
    574     if (consumeByte(insn, &current))
    575       return -1;
    576 
    577     if (current == 0x38) {
    578       dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current);
    579 
    580       insn->threeByteEscape = current;
    581 
    582       if (consumeByte(insn, &current))
    583         return -1;
    584 
    585       insn->opcodeType = THREEBYTE_38;
    586     } else if (current == 0x3a) {
    587       dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current);
    588 
    589       insn->threeByteEscape = current;
    590 
    591       if (consumeByte(insn, &current))
    592         return -1;
    593 
    594       insn->opcodeType = THREEBYTE_3A;
    595     } else if (current == 0xa6) {
    596       dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current);
    597 
    598       insn->threeByteEscape = current;
    599 
    600       if (consumeByte(insn, &current))
    601         return -1;
    602 
    603       insn->opcodeType = THREEBYTE_A6;
    604     } else if (current == 0xa7) {
    605       dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current);
    606 
    607       insn->threeByteEscape = current;
    608 
    609       if (consumeByte(insn, &current))
    610         return -1;
    611 
    612       insn->opcodeType = THREEBYTE_A7;
    613     } else {
    614       dbgprintf(insn, "Didn't find a three-byte escape prefix");
    615 
    616       insn->opcodeType = TWOBYTE;
    617     }
    618   }
    619 
    620   /*
    621    * At this point we have consumed the full opcode.
    622    * Anything we consume from here on must be unconsumed.
    623    */
    624 
    625   insn->opcode = current;
    626 
    627   return 0;
    628 }
    629 
    630 static int readModRM(struct InternalInstruction* insn);
    631 
    632 /*
    633  * getIDWithAttrMask - Determines the ID of an instruction, consuming
    634  *   the ModR/M byte as appropriate for extended and escape opcodes,
    635  *   and using a supplied attribute mask.
    636  *
    637  * @param instructionID - A pointer whose target is filled in with the ID of the
    638  *                        instruction.
    639  * @param insn          - The instruction whose ID is to be determined.
    640  * @param attrMask      - The attribute mask to search.
    641  * @return              - 0 if the ModR/M could be read when needed or was not
    642  *                        needed; nonzero otherwise.
    643  */
    644 static int getIDWithAttrMask(uint16_t* instructionID,
    645                              struct InternalInstruction* insn,
    646                              uint8_t attrMask) {
    647   BOOL hasModRMExtension;
    648 
    649   uint8_t instructionClass;
    650 
    651   instructionClass = contextForAttrs(attrMask);
    652 
    653   hasModRMExtension = modRMRequired(insn->opcodeType,
    654                                     instructionClass,
    655                                     insn->opcode);
    656 
    657   if (hasModRMExtension) {
    658     if (readModRM(insn))
    659       return -1;
    660 
    661     *instructionID = decode(insn->opcodeType,
    662                             instructionClass,
    663                             insn->opcode,
    664                             insn->modRM);
    665   } else {
    666     *instructionID = decode(insn->opcodeType,
    667                             instructionClass,
    668                             insn->opcode,
    669                             0);
    670   }
    671 
    672   return 0;
    673 }
    674 
    675 /*
    676  * is16BitEquivalent - Determines whether two instruction names refer to
    677  * equivalent instructions but one is 16-bit whereas the other is not.
    678  *
    679  * @param orig  - The instruction that is not 16-bit
    680  * @param equiv - The instruction that is 16-bit
    681  */
    682 static BOOL is16BitEquvalent(const char* orig, const char* equiv) {
    683   off_t i;
    684 
    685   for (i = 0;; i++) {
    686     if (orig[i] == '\0' && equiv[i] == '\0')
    687       return TRUE;
    688     if (orig[i] == '\0' || equiv[i] == '\0')
    689       return FALSE;
    690     if (orig[i] != equiv[i]) {
    691       if ((orig[i] == 'Q' || orig[i] == 'L') && equiv[i] == 'W')
    692         continue;
    693       if ((orig[i] == '6' || orig[i] == '3') && equiv[i] == '1')
    694         continue;
    695       if ((orig[i] == '4' || orig[i] == '2') && equiv[i] == '6')
    696         continue;
    697       return FALSE;
    698     }
    699   }
    700 }
    701 
    702 /*
    703  * is64BitEquivalent - Determines whether two instruction names refer to
    704  * equivalent instructions but one is 64-bit whereas the other is not.
    705  *
    706  * @param orig  - The instruction that is not 64-bit
    707  * @param equiv - The instruction that is 64-bit
    708  */
    709 static BOOL is64BitEquivalent(const char* orig, const char* equiv) {
    710   off_t i;
    711 
    712   for (i = 0;; i++) {
    713     if (orig[i] == '\0' && equiv[i] == '\0')
    714       return TRUE;
    715     if (orig[i] == '\0' || equiv[i] == '\0')
    716       return FALSE;
    717     if (orig[i] != equiv[i]) {
    718       if ((orig[i] == 'W' || orig[i] == 'L') && equiv[i] == 'Q')
    719         continue;
    720       if ((orig[i] == '1' || orig[i] == '3') && equiv[i] == '6')
    721         continue;
    722       if ((orig[i] == '6' || orig[i] == '2') && equiv[i] == '4')
    723         continue;
    724       return FALSE;
    725     }
    726   }
    727 }
    728 
    729 
    730 /*
    731  * getID - Determines the ID of an instruction, consuming the ModR/M byte as
    732  *   appropriate for extended and escape opcodes.  Determines the attributes and
    733  *   context for the instruction before doing so.
    734  *
    735  * @param insn  - The instruction whose ID is to be determined.
    736  * @return      - 0 if the ModR/M could be read when needed or was not needed;
    737  *                nonzero otherwise.
    738  */
    739 static int getID(struct InternalInstruction* insn) {
    740   uint8_t attrMask;
    741   uint16_t instructionID;
    742 
    743   dbgprintf(insn, "getID()");
    744 
    745   attrMask = ATTR_NONE;
    746 
    747   if (insn->mode == MODE_64BIT)
    748     attrMask |= ATTR_64BIT;
    749 
    750   if (insn->vexSize) {
    751     attrMask |= ATTR_VEX;
    752 
    753     if (insn->vexSize == 3) {
    754       switch (ppFromVEX3of3(insn->vexPrefix[2])) {
    755       case VEX_PREFIX_66:
    756         attrMask |= ATTR_OPSIZE;
    757         break;
    758       case VEX_PREFIX_F3:
    759         attrMask |= ATTR_XS;
    760         break;
    761       case VEX_PREFIX_F2:
    762         attrMask |= ATTR_XD;
    763         break;
    764       }
    765 
    766       if (wFromVEX3of3(insn->vexPrefix[2]))
    767         attrMask |= ATTR_REXW;
    768       if (lFromVEX3of3(insn->vexPrefix[2]))
    769         attrMask |= ATTR_VEXL;
    770     }
    771     else if (insn->vexSize == 2) {
    772       switch (ppFromVEX2of2(insn->vexPrefix[1])) {
    773       case VEX_PREFIX_66:
    774         attrMask |= ATTR_OPSIZE;
    775         break;
    776       case VEX_PREFIX_F3:
    777         attrMask |= ATTR_XS;
    778         break;
    779       case VEX_PREFIX_F2:
    780         attrMask |= ATTR_XD;
    781         break;
    782       }
    783 
    784       if (lFromVEX2of2(insn->vexPrefix[1]))
    785         attrMask |= ATTR_VEXL;
    786     }
    787     else {
    788       return -1;
    789     }
    790   }
    791   else {
    792     if (insn->rexPrefix & 0x08)
    793       attrMask |= ATTR_REXW;
    794 
    795     if (isPrefixAtLocation(insn, 0x66, insn->necessaryPrefixLocation))
    796       attrMask |= ATTR_OPSIZE;
    797     else if (isPrefixAtLocation(insn, 0xf3, insn->necessaryPrefixLocation))
    798       attrMask |= ATTR_XS;
    799     else if (isPrefixAtLocation(insn, 0xf2, insn->necessaryPrefixLocation))
    800       attrMask |= ATTR_XD;
    801 
    802   }
    803 
    804   if (getIDWithAttrMask(&instructionID, insn, attrMask))
    805     return -1;
    806 
    807   /* The following clauses compensate for limitations of the tables. */
    808 
    809   if ((attrMask & ATTR_XD) && (attrMask & ATTR_REXW)) {
    810     /*
    811      * Although for SSE instructions it is usually necessary to treat REX.W+F2
    812      * as F2 for decode (in the absence of a 64BIT_REXW_XD category) there is
    813      * an occasional instruction where F2 is incidental and REX.W is the more
    814      * significant.  If the decoded instruction is 32-bit and adding REX.W
    815      * instead of F2 changes a 32 to a 64, we adopt the new encoding.
    816      */
    817 
    818     const struct InstructionSpecifier *spec;
    819     uint16_t instructionIDWithREXw;
    820     const struct InstructionSpecifier *specWithREXw;
    821 
    822     spec = specifierForUID(instructionID);
    823 
    824     if (getIDWithAttrMask(&instructionIDWithREXw,
    825                           insn,
    826                           attrMask & (~ATTR_XD))) {
    827       /*
    828        * Decoding with REX.w would yield nothing; give up and return original
    829        * decode.
    830        */
    831 
    832       insn->instructionID = instructionID;
    833       insn->spec = spec;
    834       return 0;
    835     }
    836 
    837     specWithREXw = specifierForUID(instructionIDWithREXw);
    838 
    839     if (is64BitEquivalent(spec->name, specWithREXw->name)) {
    840       insn->instructionID = instructionIDWithREXw;
    841       insn->spec = specWithREXw;
    842     } else {
    843       insn->instructionID = instructionID;
    844       insn->spec = spec;
    845     }
    846     return 0;
    847   }
    848 
    849   if (insn->prefixPresent[0x66] && !(attrMask & ATTR_OPSIZE)) {
    850     /*
    851      * The instruction tables make no distinction between instructions that
    852      * allow OpSize anywhere (i.e., 16-bit operations) and that need it in a
    853      * particular spot (i.e., many MMX operations).  In general we're
    854      * conservative, but in the specific case where OpSize is present but not
    855      * in the right place we check if there's a 16-bit operation.
    856      */
    857 
    858     const struct InstructionSpecifier *spec;
    859     uint16_t instructionIDWithOpsize;
    860     const struct InstructionSpecifier *specWithOpsize;
    861 
    862     spec = specifierForUID(instructionID);
    863 
    864     if (getIDWithAttrMask(&instructionIDWithOpsize,
    865                           insn,
    866                           attrMask | ATTR_OPSIZE)) {
    867       /*
    868        * ModRM required with OpSize but not present; give up and return version
    869        * without OpSize set
    870        */
    871 
    872       insn->instructionID = instructionID;
    873       insn->spec = spec;
    874       return 0;
    875     }
    876 
    877     specWithOpsize = specifierForUID(instructionIDWithOpsize);
    878 
    879     if (is16BitEquvalent(spec->name, specWithOpsize->name)) {
    880       insn->instructionID = instructionIDWithOpsize;
    881       insn->spec = specWithOpsize;
    882     } else {
    883       insn->instructionID = instructionID;
    884       insn->spec = spec;
    885     }
    886     return 0;
    887   }
    888 
    889   insn->instructionID = instructionID;
    890   insn->spec = specifierForUID(insn->instructionID);
    891 
    892   return 0;
    893 }
    894 
    895 /*
    896  * readSIB - Consumes the SIB byte to determine addressing information for an
    897  *   instruction.
    898  *
    899  * @param insn  - The instruction whose SIB byte is to be read.
    900  * @return      - 0 if the SIB byte was successfully read; nonzero otherwise.
    901  */
    902 static int readSIB(struct InternalInstruction* insn) {
    903   SIBIndex sibIndexBase = 0;
    904   SIBBase sibBaseBase = 0;
    905   uint8_t index, base;
    906 
    907   dbgprintf(insn, "readSIB()");
    908 
    909   if (insn->consumedSIB)
    910     return 0;
    911 
    912   insn->consumedSIB = TRUE;
    913 
    914   switch (insn->addressSize) {
    915   case 2:
    916     dbgprintf(insn, "SIB-based addressing doesn't work in 16-bit mode");
    917     return -1;
    918     break;
    919   case 4:
    920     sibIndexBase = SIB_INDEX_EAX;
    921     sibBaseBase = SIB_BASE_EAX;
    922     break;
    923   case 8:
    924     sibIndexBase = SIB_INDEX_RAX;
    925     sibBaseBase = SIB_BASE_RAX;
    926     break;
    927   }
    928 
    929   if (consumeByte(insn, &insn->sib))
    930     return -1;
    931 
    932   index = indexFromSIB(insn->sib) | (xFromREX(insn->rexPrefix) << 3);
    933 
    934   switch (index) {
    935   case 0x4:
    936     insn->sibIndex = SIB_INDEX_NONE;
    937     break;
    938   default:
    939     insn->sibIndex = (SIBIndex)(sibIndexBase + index);
    940     if (insn->sibIndex == SIB_INDEX_sib ||
    941         insn->sibIndex == SIB_INDEX_sib64)
    942       insn->sibIndex = SIB_INDEX_NONE;
    943     break;
    944   }
    945 
    946   switch (scaleFromSIB(insn->sib)) {
    947   case 0:
    948     insn->sibScale = 1;
    949     break;
    950   case 1:
    951     insn->sibScale = 2;
    952     break;
    953   case 2:
    954     insn->sibScale = 4;
    955     break;
    956   case 3:
    957     insn->sibScale = 8;
    958     break;
    959   }
    960 
    961   base = baseFromSIB(insn->sib) | (bFromREX(insn->rexPrefix) << 3);
    962 
    963   switch (base) {
    964   case 0x5:
    965     switch (modFromModRM(insn->modRM)) {
    966     case 0x0:
    967       insn->eaDisplacement = EA_DISP_32;
    968       insn->sibBase = SIB_BASE_NONE;
    969       break;
    970     case 0x1:
    971       insn->eaDisplacement = EA_DISP_8;
    972       insn->sibBase = (insn->addressSize == 4 ?
    973                        SIB_BASE_EBP : SIB_BASE_RBP);
    974       break;
    975     case 0x2:
    976       insn->eaDisplacement = EA_DISP_32;
    977       insn->sibBase = (insn->addressSize == 4 ?
    978                        SIB_BASE_EBP : SIB_BASE_RBP);
    979       break;
    980     case 0x3:
    981       debug("Cannot have Mod = 0b11 and a SIB byte");
    982       return -1;
    983     }
    984     break;
    985   default:
    986     insn->sibBase = (SIBBase)(sibBaseBase + base);
    987     break;
    988   }
    989 
    990   return 0;
    991 }
    992 
    993 /*
    994  * readDisplacement - Consumes the displacement of an instruction.
    995  *
    996  * @param insn  - The instruction whose displacement is to be read.
    997  * @return      - 0 if the displacement byte was successfully read; nonzero
    998  *                otherwise.
    999  */
   1000 static int readDisplacement(struct InternalInstruction* insn) {
   1001   int8_t d8;
   1002   int16_t d16;
   1003   int32_t d32;
   1004 
   1005   dbgprintf(insn, "readDisplacement()");
   1006 
   1007   if (insn->consumedDisplacement)
   1008     return 0;
   1009 
   1010   insn->consumedDisplacement = TRUE;
   1011 
   1012   switch (insn->eaDisplacement) {
   1013   case EA_DISP_NONE:
   1014     insn->consumedDisplacement = FALSE;
   1015     break;
   1016   case EA_DISP_8:
   1017     if (consumeInt8(insn, &d8))
   1018       return -1;
   1019     insn->displacement = d8;
   1020     break;
   1021   case EA_DISP_16:
   1022     if (consumeInt16(insn, &d16))
   1023       return -1;
   1024     insn->displacement = d16;
   1025     break;
   1026   case EA_DISP_32:
   1027     if (consumeInt32(insn, &d32))
   1028       return -1;
   1029     insn->displacement = d32;
   1030     break;
   1031   }
   1032 
   1033   insn->consumedDisplacement = TRUE;
   1034   return 0;
   1035 }
   1036 
   1037 /*
   1038  * readModRM - Consumes all addressing information (ModR/M byte, SIB byte, and
   1039  *   displacement) for an instruction and interprets it.
   1040  *
   1041  * @param insn  - The instruction whose addressing information is to be read.
   1042  * @return      - 0 if the information was successfully read; nonzero otherwise.
   1043  */
   1044 static int readModRM(struct InternalInstruction* insn) {
   1045   uint8_t mod, rm, reg;
   1046 
   1047   dbgprintf(insn, "readModRM()");
   1048 
   1049   if (insn->consumedModRM)
   1050     return 0;
   1051 
   1052   if (consumeByte(insn, &insn->modRM))
   1053     return -1;
   1054   insn->consumedModRM = TRUE;
   1055 
   1056   mod     = modFromModRM(insn->modRM);
   1057   rm      = rmFromModRM(insn->modRM);
   1058   reg     = regFromModRM(insn->modRM);
   1059 
   1060   /*
   1061    * This goes by insn->registerSize to pick the correct register, which messes
   1062    * up if we're using (say) XMM or 8-bit register operands.  That gets fixed in
   1063    * fixupReg().
   1064    */
   1065   switch (insn->registerSize) {
   1066   case 2:
   1067     insn->regBase = MODRM_REG_AX;
   1068     insn->eaRegBase = EA_REG_AX;
   1069     break;
   1070   case 4:
   1071     insn->regBase = MODRM_REG_EAX;
   1072     insn->eaRegBase = EA_REG_EAX;
   1073     break;
   1074   case 8:
   1075     insn->regBase = MODRM_REG_RAX;
   1076     insn->eaRegBase = EA_REG_RAX;
   1077     break;
   1078   }
   1079 
   1080   reg |= rFromREX(insn->rexPrefix) << 3;
   1081   rm  |= bFromREX(insn->rexPrefix) << 3;
   1082 
   1083   insn->reg = (Reg)(insn->regBase + reg);
   1084 
   1085   switch (insn->addressSize) {
   1086   case 2:
   1087     insn->eaBaseBase = EA_BASE_BX_SI;
   1088 
   1089     switch (mod) {
   1090     case 0x0:
   1091       if (rm == 0x6) {
   1092         insn->eaBase = EA_BASE_NONE;
   1093         insn->eaDisplacement = EA_DISP_16;
   1094         if (readDisplacement(insn))
   1095           return -1;
   1096       } else {
   1097         insn->eaBase = (EABase)(insn->eaBaseBase + rm);
   1098         insn->eaDisplacement = EA_DISP_NONE;
   1099       }
   1100       break;
   1101     case 0x1:
   1102       insn->eaBase = (EABase)(insn->eaBaseBase + rm);
   1103       insn->eaDisplacement = EA_DISP_8;
   1104       if (readDisplacement(insn))
   1105         return -1;
   1106       break;
   1107     case 0x2:
   1108       insn->eaBase = (EABase)(insn->eaBaseBase + rm);
   1109       insn->eaDisplacement = EA_DISP_16;
   1110       if (readDisplacement(insn))
   1111         return -1;
   1112       break;
   1113     case 0x3:
   1114       insn->eaBase = (EABase)(insn->eaRegBase + rm);
   1115       if (readDisplacement(insn))
   1116         return -1;
   1117       break;
   1118     }
   1119     break;
   1120   case 4:
   1121   case 8:
   1122     insn->eaBaseBase = (insn->addressSize == 4 ? EA_BASE_EAX : EA_BASE_RAX);
   1123 
   1124     switch (mod) {
   1125     case 0x0:
   1126       insn->eaDisplacement = EA_DISP_NONE; /* readSIB may override this */
   1127       switch (rm) {
   1128       case 0x4:
   1129       case 0xc:   /* in case REXW.b is set */
   1130         insn->eaBase = (insn->addressSize == 4 ?
   1131                         EA_BASE_sib : EA_BASE_sib64);
   1132         readSIB(insn);
   1133         if (readDisplacement(insn))
   1134           return -1;
   1135         break;
   1136       case 0x5:
   1137         insn->eaBase = EA_BASE_NONE;
   1138         insn->eaDisplacement = EA_DISP_32;
   1139         if (readDisplacement(insn))
   1140           return -1;
   1141         break;
   1142       default:
   1143         insn->eaBase = (EABase)(insn->eaBaseBase + rm);
   1144         break;
   1145       }
   1146       break;
   1147     case 0x1:
   1148     case 0x2:
   1149       insn->eaDisplacement = (mod == 0x1 ? EA_DISP_8 : EA_DISP_32);
   1150       switch (rm) {
   1151       case 0x4:
   1152       case 0xc:   /* in case REXW.b is set */
   1153         insn->eaBase = EA_BASE_sib;
   1154         readSIB(insn);
   1155         if (readDisplacement(insn))
   1156           return -1;
   1157         break;
   1158       default:
   1159         insn->eaBase = (EABase)(insn->eaBaseBase + rm);
   1160         if (readDisplacement(insn))
   1161           return -1;
   1162         break;
   1163       }
   1164       break;
   1165     case 0x3:
   1166       insn->eaDisplacement = EA_DISP_NONE;
   1167       insn->eaBase = (EABase)(insn->eaRegBase + rm);
   1168       break;
   1169     }
   1170     break;
   1171   } /* switch (insn->addressSize) */
   1172 
   1173   return 0;
   1174 }
   1175 
   1176 #define GENERIC_FIXUP_FUNC(name, base, prefix)            \
   1177   static uint8_t name(struct InternalInstruction *insn,   \
   1178                       OperandType type,                   \
   1179                       uint8_t index,                      \
   1180                       uint8_t *valid) {                   \
   1181     *valid = 1;                                           \
   1182     switch (type) {                                       \
   1183     default:                                              \
   1184       debug("Unhandled register type");                   \
   1185       *valid = 0;                                         \
   1186       return 0;                                           \
   1187     case TYPE_Rv:                                         \
   1188       return base + index;                                \
   1189     case TYPE_R8:                                         \
   1190       if (insn->rexPrefix &&                              \
   1191          index >= 4 && index <= 7) {                      \
   1192         return prefix##_SPL + (index - 4);                \
   1193       } else {                                            \
   1194         return prefix##_AL + index;                       \
   1195       }                                                   \
   1196     case TYPE_R16:                                        \
   1197       return prefix##_AX + index;                         \
   1198     case TYPE_R32:                                        \
   1199       return prefix##_EAX + index;                        \
   1200     case TYPE_R64:                                        \
   1201       return prefix##_RAX + index;                        \
   1202     case TYPE_XMM256:                                     \
   1203       return prefix##_YMM0 + index;                       \
   1204     case TYPE_XMM128:                                     \
   1205     case TYPE_XMM64:                                      \
   1206     case TYPE_XMM32:                                      \
   1207     case TYPE_XMM:                                        \
   1208       return prefix##_XMM0 + index;                       \
   1209     case TYPE_MM64:                                       \
   1210     case TYPE_MM32:                                       \
   1211     case TYPE_MM:                                         \
   1212       if (index > 7)                                      \
   1213         *valid = 0;                                       \
   1214       return prefix##_MM0 + index;                        \
   1215     case TYPE_SEGMENTREG:                                 \
   1216       if (index > 5)                                      \
   1217         *valid = 0;                                       \
   1218       return prefix##_ES + index;                         \
   1219     case TYPE_DEBUGREG:                                   \
   1220       if (index > 7)                                      \
   1221         *valid = 0;                                       \
   1222       return prefix##_DR0 + index;                        \
   1223     case TYPE_CONTROLREG:                                 \
   1224       if (index > 8)                                      \
   1225         *valid = 0;                                       \
   1226       return prefix##_CR0 + index;                        \
   1227     }                                                     \
   1228   }
   1229 
   1230 /*
   1231  * fixup*Value - Consults an operand type to determine the meaning of the
   1232  *   reg or R/M field.  If the operand is an XMM operand, for example, an
   1233  *   operand would be XMM0 instead of AX, which readModRM() would otherwise
   1234  *   misinterpret it as.
   1235  *
   1236  * @param insn  - The instruction containing the operand.
   1237  * @param type  - The operand type.
   1238  * @param index - The existing value of the field as reported by readModRM().
   1239  * @param valid - The address of a uint8_t.  The target is set to 1 if the
   1240  *                field is valid for the register class; 0 if not.
   1241  * @return      - The proper value.
   1242  */
   1243 GENERIC_FIXUP_FUNC(fixupRegValue, insn->regBase,    MODRM_REG)
   1244 GENERIC_FIXUP_FUNC(fixupRMValue,  insn->eaRegBase,  EA_REG)
   1245 
   1246 /*
   1247  * fixupReg - Consults an operand specifier to determine which of the
   1248  *   fixup*Value functions to use in correcting readModRM()'ss interpretation.
   1249  *
   1250  * @param insn  - See fixup*Value().
   1251  * @param op    - The operand specifier.
   1252  * @return      - 0 if fixup was successful; -1 if the register returned was
   1253  *                invalid for its class.
   1254  */
   1255 static int fixupReg(struct InternalInstruction *insn,
   1256                     const struct OperandSpecifier *op) {
   1257   uint8_t valid;
   1258 
   1259   dbgprintf(insn, "fixupReg()");
   1260 
   1261   switch ((OperandEncoding)op->encoding) {
   1262   default:
   1263     debug("Expected a REG or R/M encoding in fixupReg");
   1264     return -1;
   1265   case ENCODING_VVVV:
   1266     insn->vvvv = (Reg)fixupRegValue(insn,
   1267                                     (OperandType)op->type,
   1268                                     insn->vvvv,
   1269                                     &valid);
   1270     if (!valid)
   1271       return -1;
   1272     break;
   1273   case ENCODING_REG:
   1274     insn->reg = (Reg)fixupRegValue(insn,
   1275                                    (OperandType)op->type,
   1276                                    insn->reg - insn->regBase,
   1277                                    &valid);
   1278     if (!valid)
   1279       return -1;
   1280     break;
   1281   case ENCODING_RM:
   1282     if (insn->eaBase >= insn->eaRegBase) {
   1283       insn->eaBase = (EABase)fixupRMValue(insn,
   1284                                           (OperandType)op->type,
   1285                                           insn->eaBase - insn->eaRegBase,
   1286                                           &valid);
   1287       if (!valid)
   1288         return -1;
   1289     }
   1290     break;
   1291   }
   1292 
   1293   return 0;
   1294 }
   1295 
   1296 /*
   1297  * readOpcodeModifier - Reads an operand from the opcode field of an
   1298  *   instruction.  Handles AddRegFrm instructions.
   1299  *
   1300  * @param insn    - The instruction whose opcode field is to be read.
   1301  * @param inModRM - Indicates that the opcode field is to be read from the
   1302  *                  ModR/M extension; useful for escape opcodes
   1303  * @return        - 0 on success; nonzero otherwise.
   1304  */
   1305 static int readOpcodeModifier(struct InternalInstruction* insn) {
   1306   dbgprintf(insn, "readOpcodeModifier()");
   1307 
   1308   if (insn->consumedOpcodeModifier)
   1309     return 0;
   1310 
   1311   insn->consumedOpcodeModifier = TRUE;
   1312 
   1313   switch (insn->spec->modifierType) {
   1314   default:
   1315     debug("Unknown modifier type.");
   1316     return -1;
   1317   case MODIFIER_NONE:
   1318     debug("No modifier but an operand expects one.");
   1319     return -1;
   1320   case MODIFIER_OPCODE:
   1321     insn->opcodeModifier = insn->opcode - insn->spec->modifierBase;
   1322     return 0;
   1323   case MODIFIER_MODRM:
   1324     insn->opcodeModifier = insn->modRM - insn->spec->modifierBase;
   1325     return 0;
   1326   }
   1327 }
   1328 
   1329 /*
   1330  * readOpcodeRegister - Reads an operand from the opcode field of an
   1331  *   instruction and interprets it appropriately given the operand width.
   1332  *   Handles AddRegFrm instructions.
   1333  *
   1334  * @param insn  - See readOpcodeModifier().
   1335  * @param size  - The width (in bytes) of the register being specified.
   1336  *                1 means AL and friends, 2 means AX, 4 means EAX, and 8 means
   1337  *                RAX.
   1338  * @return      - 0 on success; nonzero otherwise.
   1339  */
   1340 static int readOpcodeRegister(struct InternalInstruction* insn, uint8_t size) {
   1341   dbgprintf(insn, "readOpcodeRegister()");
   1342 
   1343   if (readOpcodeModifier(insn))
   1344     return -1;
   1345 
   1346   if (size == 0)
   1347     size = insn->registerSize;
   1348 
   1349   switch (size) {
   1350   case 1:
   1351     insn->opcodeRegister = (Reg)(MODRM_REG_AL + ((bFromREX(insn->rexPrefix) << 3)
   1352                                                   | insn->opcodeModifier));
   1353     if (insn->rexPrefix &&
   1354         insn->opcodeRegister >= MODRM_REG_AL + 0x4 &&
   1355         insn->opcodeRegister < MODRM_REG_AL + 0x8) {
   1356       insn->opcodeRegister = (Reg)(MODRM_REG_SPL
   1357                                    + (insn->opcodeRegister - MODRM_REG_AL - 4));
   1358     }
   1359 
   1360     break;
   1361   case 2:
   1362     insn->opcodeRegister = (Reg)(MODRM_REG_AX
   1363                                  + ((bFromREX(insn->rexPrefix) << 3)
   1364                                     | insn->opcodeModifier));
   1365     break;
   1366   case 4:
   1367     insn->opcodeRegister = (Reg)(MODRM_REG_EAX
   1368                                  + ((bFromREX(insn->rexPrefix) << 3)
   1369                                     | insn->opcodeModifier));
   1370     break;
   1371   case 8:
   1372     insn->opcodeRegister = (Reg)(MODRM_REG_RAX
   1373                                  + ((bFromREX(insn->rexPrefix) << 3)
   1374                                     | insn->opcodeModifier));
   1375     break;
   1376   }
   1377 
   1378   return 0;
   1379 }
   1380 
   1381 /*
   1382  * readImmediate - Consumes an immediate operand from an instruction, given the
   1383  *   desired operand size.
   1384  *
   1385  * @param insn  - The instruction whose operand is to be read.
   1386  * @param size  - The width (in bytes) of the operand.
   1387  * @return      - 0 if the immediate was successfully consumed; nonzero
   1388  *                otherwise.
   1389  */
   1390 static int readImmediate(struct InternalInstruction* insn, uint8_t size) {
   1391   uint8_t imm8;
   1392   uint16_t imm16;
   1393   uint32_t imm32;
   1394   uint64_t imm64;
   1395 
   1396   dbgprintf(insn, "readImmediate()");
   1397 
   1398   if (insn->numImmediatesConsumed == 2) {
   1399     debug("Already consumed two immediates");
   1400     return -1;
   1401   }
   1402 
   1403   if (size == 0)
   1404     size = insn->immediateSize;
   1405   else
   1406     insn->immediateSize = size;
   1407 
   1408   switch (size) {
   1409   case 1:
   1410     if (consumeByte(insn, &imm8))
   1411       return -1;
   1412     insn->immediates[insn->numImmediatesConsumed] = imm8;
   1413     break;
   1414   case 2:
   1415     if (consumeUInt16(insn, &imm16))
   1416       return -1;
   1417     insn->immediates[insn->numImmediatesConsumed] = imm16;
   1418     break;
   1419   case 4:
   1420     if (consumeUInt32(insn, &imm32))
   1421       return -1;
   1422     insn->immediates[insn->numImmediatesConsumed] = imm32;
   1423     break;
   1424   case 8:
   1425     if (consumeUInt64(insn, &imm64))
   1426       return -1;
   1427     insn->immediates[insn->numImmediatesConsumed] = imm64;
   1428     break;
   1429   }
   1430 
   1431   insn->numImmediatesConsumed++;
   1432 
   1433   return 0;
   1434 }
   1435 
   1436 /*
   1437  * readVVVV - Consumes an immediate operand from an instruction, given the
   1438  *   desired operand size.
   1439  *
   1440  * @param insn  - The instruction whose operand is to be read.
   1441  * @return      - 0 if the immediate was successfully consumed; nonzero
   1442  *                otherwise.
   1443  */
   1444 static int readVVVV(struct InternalInstruction* insn) {
   1445   dbgprintf(insn, "readVVVV()");
   1446 
   1447   if (insn->vexSize == 3)
   1448     insn->vvvv = vvvvFromVEX3of3(insn->vexPrefix[2]);
   1449   else if (insn->vexSize == 2)
   1450     insn->vvvv = vvvvFromVEX2of2(insn->vexPrefix[1]);
   1451   else
   1452     return -1;
   1453 
   1454   return 0;
   1455 }
   1456 
   1457 /*
   1458  * readOperands - Consults the specifier for an instruction and consumes all
   1459  *   operands for that instruction, interpreting them as it goes.
   1460  *
   1461  * @param insn  - The instruction whose operands are to be read and interpreted.
   1462  * @return      - 0 if all operands could be read; nonzero otherwise.
   1463  */
   1464 static int readOperands(struct InternalInstruction* insn) {
   1465   int index;
   1466 
   1467   dbgprintf(insn, "readOperands()");
   1468 
   1469   for (index = 0; index < X86_MAX_OPERANDS; ++index) {
   1470     switch (insn->spec->operands[index].encoding) {
   1471     case ENCODING_NONE:
   1472       break;
   1473     case ENCODING_REG:
   1474     case ENCODING_RM:
   1475       if (readModRM(insn))
   1476         return -1;
   1477       if (fixupReg(insn, &insn->spec->operands[index]))
   1478         return -1;
   1479       break;
   1480     case ENCODING_CB:
   1481     case ENCODING_CW:
   1482     case ENCODING_CD:
   1483     case ENCODING_CP:
   1484     case ENCODING_CO:
   1485     case ENCODING_CT:
   1486       dbgprintf(insn, "We currently don't hande code-offset encodings");
   1487       return -1;
   1488     case ENCODING_IB:
   1489       if (readImmediate(insn, 1))
   1490         return -1;
   1491       if (insn->spec->operands[index].type == TYPE_IMM3 &&
   1492           insn->immediates[insn->numImmediatesConsumed - 1] > 7)
   1493         return -1;
   1494       break;
   1495     case ENCODING_IW:
   1496       if (readImmediate(insn, 2))
   1497         return -1;
   1498       break;
   1499     case ENCODING_ID:
   1500       if (readImmediate(insn, 4))
   1501         return -1;
   1502       break;
   1503     case ENCODING_IO:
   1504       if (readImmediate(insn, 8))
   1505         return -1;
   1506       break;
   1507     case ENCODING_Iv:
   1508       if (readImmediate(insn, insn->immediateSize))
   1509         return -1;
   1510       break;
   1511     case ENCODING_Ia:
   1512       if (readImmediate(insn, insn->addressSize))
   1513         return -1;
   1514       break;
   1515     case ENCODING_RB:
   1516       if (readOpcodeRegister(insn, 1))
   1517         return -1;
   1518       break;
   1519     case ENCODING_RW:
   1520       if (readOpcodeRegister(insn, 2))
   1521         return -1;
   1522       break;
   1523     case ENCODING_RD:
   1524       if (readOpcodeRegister(insn, 4))
   1525         return -1;
   1526       break;
   1527     case ENCODING_RO:
   1528       if (readOpcodeRegister(insn, 8))
   1529         return -1;
   1530       break;
   1531     case ENCODING_Rv:
   1532       if (readOpcodeRegister(insn, 0))
   1533         return -1;
   1534       break;
   1535     case ENCODING_I:
   1536       if (readOpcodeModifier(insn))
   1537         return -1;
   1538       break;
   1539     case ENCODING_VVVV:
   1540       if (readVVVV(insn))
   1541         return -1;
   1542       if (fixupReg(insn, &insn->spec->operands[index]))
   1543         return -1;
   1544       break;
   1545     case ENCODING_DUP:
   1546       break;
   1547     default:
   1548       dbgprintf(insn, "Encountered an operand with an unknown encoding.");
   1549       return -1;
   1550     }
   1551   }
   1552 
   1553   return 0;
   1554 }
   1555 
   1556 /*
   1557  * decodeInstruction - Reads and interprets a full instruction provided by the
   1558  *   user.
   1559  *
   1560  * @param insn      - A pointer to the instruction to be populated.  Must be
   1561  *                    pre-allocated.
   1562  * @param reader    - The function to be used to read the instruction's bytes.
   1563  * @param readerArg - A generic argument to be passed to the reader to store
   1564  *                    any internal state.
   1565  * @param logger    - If non-NULL, the function to be used to write log messages
   1566  *                    and warnings.
   1567  * @param loggerArg - A generic argument to be passed to the logger to store
   1568  *                    any internal state.
   1569  * @param startLoc  - The address (in the reader's address space) of the first
   1570  *                    byte in the instruction.
   1571  * @param mode      - The mode (real mode, IA-32e, or IA-32e in 64-bit mode) to
   1572  *                    decode the instruction in.
   1573  * @return          - 0 if the instruction's memory could be read; nonzero if
   1574  *                    not.
   1575  */
   1576 int decodeInstruction(struct InternalInstruction* insn,
   1577                       byteReader_t reader,
   1578                       void* readerArg,
   1579                       dlog_t logger,
   1580                       void* loggerArg,
   1581                       uint64_t startLoc,
   1582                       DisassemblerMode mode) {
   1583   memset(insn, 0, sizeof(struct InternalInstruction));
   1584 
   1585   insn->reader = reader;
   1586   insn->readerArg = readerArg;
   1587   insn->dlog = logger;
   1588   insn->dlogArg = loggerArg;
   1589   insn->startLocation = startLoc;
   1590   insn->readerCursor = startLoc;
   1591   insn->mode = mode;
   1592   insn->numImmediatesConsumed = 0;
   1593 
   1594   if (readPrefixes(insn)       ||
   1595       readOpcode(insn)         ||
   1596       getID(insn)              ||
   1597       insn->instructionID == 0 ||
   1598       readOperands(insn))
   1599     return -1;
   1600 
   1601   insn->length = insn->readerCursor - insn->startLocation;
   1602 
   1603   dbgprintf(insn, "Read from 0x%llx to 0x%llx: length %zu",
   1604             startLoc, insn->readerCursor, insn->length);
   1605 
   1606   if (insn->length > 15)
   1607     dbgprintf(insn, "Instruction exceeds 15-byte limit");
   1608 
   1609   return 0;
   1610 }
   1611