Home | History | Annotate | Download | only in windows
      1 /* Copyright (c) 2007, Google Inc.
      2  * All rights reserved.
      3  *
      4  * Redistribution and use in source and binary forms, with or without
      5  * modification, are permitted provided that the following conditions are
      6  * met:
      7  *
      8  *     * Redistributions of source code must retain the above copyright
      9  * notice, this list of conditions and the following disclaimer.
     10  *     * Redistributions in binary form must reproduce the above
     11  * copyright notice, this list of conditions and the following disclaimer
     12  * in the documentation and/or other materials provided with the
     13  * distribution.
     14  *     * Neither the name of Google Inc. nor the names of its
     15  * contributors may be used to endorse or promote products derived from
     16  * this software without specific prior written permission.
     17  *
     18  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
     19  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
     20  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
     21  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
     22  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
     23  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
     24  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     25  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     26  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     27  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     28  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     29  *
     30  * ---
     31  * Author: Joi Sigurdsson
     32  *
     33  * Implementation of MiniDisassembler.
     34  */
     35 
     36 #include "mini_disassembler.h"
     37 
     38 namespace sidestep {
     39 
     40 MiniDisassembler::MiniDisassembler(bool operand_default_is_32_bits,
     41                                    bool address_default_is_32_bits)
     42     : operand_default_is_32_bits_(operand_default_is_32_bits),
     43       address_default_is_32_bits_(address_default_is_32_bits) {
     44   Initialize();
     45 }
     46 
     47 MiniDisassembler::MiniDisassembler()
     48     : operand_default_is_32_bits_(true),
     49       address_default_is_32_bits_(true) {
     50   Initialize();
     51 }
     52 
     53 InstructionType MiniDisassembler::Disassemble(
     54     unsigned char* start_byte,
     55     unsigned int& instruction_bytes) {
     56   // Clean up any state from previous invocations.
     57   Initialize();
     58 
     59   // Start by processing any prefixes.
     60   unsigned char* current_byte = start_byte;
     61   unsigned int size = 0;
     62   InstructionType instruction_type = ProcessPrefixes(current_byte, size);
     63 
     64   if (IT_UNKNOWN == instruction_type)
     65     return instruction_type;
     66 
     67   current_byte += size;
     68   size = 0;
     69 
     70   // Invariant: We have stripped all prefixes, and the operand_is_32_bits_
     71   // and address_is_32_bits_ flags are correctly set.
     72 
     73   instruction_type = ProcessOpcode(current_byte, 0, size);
     74 
     75   // Check for error processing instruction
     76   if ((IT_UNKNOWN == instruction_type_) || (IT_UNUSED == instruction_type_)) {
     77     return IT_UNKNOWN;
     78   }
     79 
     80   current_byte += size;
     81 
     82   // Invariant: operand_bytes_ indicates the total size of operands
     83   // specified by the opcode and/or ModR/M byte and/or SIB byte.
     84   // pCurrentByte points to the first byte after the ModR/M byte, or after
     85   // the SIB byte if it is present (i.e. the first byte of any operands
     86   // encoded in the instruction).
     87 
     88   // We get the total length of any prefixes, the opcode, and the ModR/M and
     89   // SIB bytes if present, by taking the difference of the original starting
     90   // address and the current byte (which points to the first byte of the
     91   // operands if present, or to the first byte of the next instruction if
     92   // they are not).  Adding the count of bytes in the operands encoded in
     93   // the instruction gives us the full length of the instruction in bytes.
     94   instruction_bytes += operand_bytes_ + (current_byte - start_byte);
     95 
     96   // Return the instruction type, which was set by ProcessOpcode().
     97   return instruction_type_;
     98 }
     99 
    100 void MiniDisassembler::Initialize() {
    101   operand_is_32_bits_ = operand_default_is_32_bits_;
    102   address_is_32_bits_ = address_default_is_32_bits_;
    103 #ifdef _M_X64
    104   operand_default_support_64_bits_ = true;
    105 #else
    106   operand_default_support_64_bits_ = false;
    107 #endif
    108   operand_is_64_bits_ = false;
    109   operand_bytes_ = 0;
    110   have_modrm_ = false;
    111   should_decode_modrm_ = false;
    112   instruction_type_ = IT_UNKNOWN;
    113   got_f2_prefix_ = false;
    114   got_f3_prefix_ = false;
    115   got_66_prefix_ = false;
    116 }
    117 
    118 InstructionType MiniDisassembler::ProcessPrefixes(unsigned char* start_byte,
    119                                                   unsigned int& size) {
    120   InstructionType instruction_type = IT_GENERIC;
    121   const Opcode& opcode = s_ia32_opcode_map_[0].table_[*start_byte];
    122 
    123   switch (opcode.type_) {
    124     case IT_PREFIX_ADDRESS:
    125       address_is_32_bits_ = !address_default_is_32_bits_;
    126       goto nochangeoperand;
    127     case IT_PREFIX_OPERAND:
    128       operand_is_32_bits_ = !operand_default_is_32_bits_;
    129       nochangeoperand:
    130     case IT_PREFIX:
    131 
    132       if (0xF2 == (*start_byte))
    133         got_f2_prefix_ = true;
    134       else if (0xF3 == (*start_byte))
    135         got_f3_prefix_ = true;
    136       else if (0x66 == (*start_byte))
    137         got_66_prefix_ = true;
    138       else if (operand_default_support_64_bits_ && (*start_byte) & 0x48)
    139         operand_is_64_bits_ = true;
    140 
    141       instruction_type = opcode.type_;
    142       size ++;
    143       // we got a prefix, so add one and check next byte
    144       ProcessPrefixes(start_byte + 1, size);
    145     default:
    146       break;   // not a prefix byte
    147   }
    148 
    149   return instruction_type;
    150 }
    151 
    152 InstructionType MiniDisassembler::ProcessOpcode(unsigned char* start_byte,
    153                                                 unsigned int table_index,
    154                                                 unsigned int& size) {
    155   const OpcodeTable& table = s_ia32_opcode_map_[table_index];   // Get our table
    156   unsigned char current_byte = (*start_byte) >> table.shift_;
    157   current_byte = current_byte & table.mask_;  // Mask out the bits we will use
    158 
    159   // Check whether the byte we have is inside the table we have.
    160   if (current_byte < table.min_lim_ || current_byte > table.max_lim_) {
    161     instruction_type_ = IT_UNKNOWN;
    162     return instruction_type_;
    163   }
    164 
    165   const Opcode& opcode = table.table_[current_byte];
    166   if (IT_UNUSED == opcode.type_) {
    167     // This instruction is not used by the IA-32 ISA, so we indicate
    168     // this to the user.  Probably means that we were pointed to
    169     // a byte in memory that was not the start of an instruction.
    170     instruction_type_ = IT_UNUSED;
    171     return instruction_type_;
    172   } else if (IT_REFERENCE == opcode.type_) {
    173     // We are looking at an opcode that has more bytes (or is continued
    174     // in the ModR/M byte).  Recursively find the opcode definition in
    175     // the table for the opcode's next byte.
    176     size++;
    177     ProcessOpcode(start_byte + 1, opcode.table_index_, size);
    178     return instruction_type_;
    179   }
    180 
    181   const SpecificOpcode* specific_opcode = (SpecificOpcode*)&opcode;
    182   if (opcode.is_prefix_dependent_) {
    183     if (got_f2_prefix_ && opcode.opcode_if_f2_prefix_.mnemonic_ != 0) {
    184       specific_opcode = &opcode.opcode_if_f2_prefix_;
    185     } else if (got_f3_prefix_ && opcode.opcode_if_f3_prefix_.mnemonic_ != 0) {
    186       specific_opcode = &opcode.opcode_if_f3_prefix_;
    187     } else if (got_66_prefix_ && opcode.opcode_if_66_prefix_.mnemonic_ != 0) {
    188       specific_opcode = &opcode.opcode_if_66_prefix_;
    189     }
    190   }
    191 
    192   // Inv: The opcode type is known.
    193   instruction_type_ = specific_opcode->type_;
    194 
    195   // Let's process the operand types to see if we have any immediate
    196   // operands, and/or a ModR/M byte.
    197 
    198   ProcessOperand(specific_opcode->flag_dest_);
    199   ProcessOperand(specific_opcode->flag_source_);
    200   ProcessOperand(specific_opcode->flag_aux_);
    201 
    202   // Inv: We have processed the opcode and incremented operand_bytes_
    203   // by the number of bytes of any operands specified by the opcode
    204   // that are stored in the instruction (not registers etc.).  Now
    205   // we need to return the total number of bytes for the opcode and
    206   // for the ModR/M or SIB bytes if they are present.
    207 
    208   if (table.mask_ != 0xff) {
    209     if (have_modrm_) {
    210       // we're looking at a ModR/M byte so we're not going to
    211       // count that into the opcode size
    212       ProcessModrm(start_byte, size);
    213       return IT_GENERIC;
    214     } else {
    215       // need to count the ModR/M byte even if it's just being
    216       // used for opcode extension
    217       size++;
    218       return IT_GENERIC;
    219     }
    220   } else {
    221     if (have_modrm_) {
    222       // The ModR/M byte is the next byte.
    223       size++;
    224       ProcessModrm(start_byte + 1, size);
    225       return IT_GENERIC;
    226     } else {
    227       size++;
    228       return IT_GENERIC;
    229     }
    230   }
    231 }
    232 
    233 bool MiniDisassembler::ProcessOperand(int flag_operand) {
    234   bool succeeded = true;
    235   if (AM_NOT_USED == flag_operand)
    236     return succeeded;
    237 
    238   // Decide what to do based on the addressing mode.
    239   switch (flag_operand & AM_MASK) {
    240     // No ModR/M byte indicated by these addressing modes, and no
    241     // additional (e.g. immediate) parameters.
    242     case AM_A: // Direct address
    243     case AM_F: // EFLAGS register
    244     case AM_X: // Memory addressed by the DS:SI register pair
    245     case AM_Y: // Memory addressed by the ES:DI register pair
    246     case AM_IMPLICIT: // Parameter is implicit, occupies no space in
    247                        // instruction
    248       break;
    249 
    250     // There is a ModR/M byte but it does not necessarily need
    251     // to be decoded.
    252     case AM_C: // reg field of ModR/M selects a control register
    253     case AM_D: // reg field of ModR/M selects a debug register
    254     case AM_G: // reg field of ModR/M selects a general register
    255     case AM_P: // reg field of ModR/M selects an MMX register
    256     case AM_R: // mod field of ModR/M may refer only to a general register
    257     case AM_S: // reg field of ModR/M selects a segment register
    258     case AM_T: // reg field of ModR/M selects a test register
    259     case AM_V: // reg field of ModR/M selects a 128-bit XMM register
    260       have_modrm_ = true;
    261       break;
    262 
    263     // In these addressing modes, there is a ModR/M byte and it needs to be
    264     // decoded. No other (e.g. immediate) params than indicated in ModR/M.
    265     case AM_E: // Operand is either a general-purpose register or memory,
    266                  // specified by ModR/M byte
    267     case AM_M: // ModR/M byte will refer only to memory
    268     case AM_Q: // Operand is either an MMX register or memory (complex
    269                  // evaluation), specified by ModR/M byte
    270     case AM_W: // Operand is either a 128-bit XMM register or memory (complex
    271                  // eval), specified by ModR/M byte
    272       have_modrm_ = true;
    273       should_decode_modrm_ = true;
    274       break;
    275 
    276     // These addressing modes specify an immediate or an offset value
    277     // directly, so we need to look at the operand type to see how many
    278     // bytes.
    279     case AM_I: // Immediate data.
    280     case AM_J: // Jump to offset.
    281     case AM_O: // Operand is at offset.
    282       switch (flag_operand & OT_MASK) {
    283         case OT_B: // Byte regardless of operand-size attribute.
    284           operand_bytes_ += OS_BYTE;
    285           break;
    286         case OT_C: // Byte or word, depending on operand-size attribute.
    287           if (operand_is_32_bits_)
    288             operand_bytes_ += OS_WORD;
    289           else
    290             operand_bytes_ += OS_BYTE;
    291           break;
    292         case OT_D: // Doubleword, regardless of operand-size attribute.
    293           operand_bytes_ += OS_DOUBLE_WORD;
    294           break;
    295         case OT_DQ: // Double-quadword, regardless of operand-size attribute.
    296           operand_bytes_ += OS_DOUBLE_QUAD_WORD;
    297           break;
    298         case OT_P: // 32-bit or 48-bit pointer, depending on operand-size
    299                      // attribute.
    300           if (operand_is_32_bits_)
    301             operand_bytes_ += OS_48_BIT_POINTER;
    302           else
    303             operand_bytes_ += OS_32_BIT_POINTER;
    304           break;
    305         case OT_PS: // 128-bit packed single-precision floating-point data.
    306           operand_bytes_ += OS_128_BIT_PACKED_SINGLE_PRECISION_FLOATING;
    307           break;
    308         case OT_Q: // Quadword, regardless of operand-size attribute.
    309           operand_bytes_ += OS_QUAD_WORD;
    310           break;
    311         case OT_S: // 6-byte pseudo-descriptor.
    312           operand_bytes_ += OS_PSEUDO_DESCRIPTOR;
    313           break;
    314         case OT_SD: // Scalar Double-Precision Floating-Point Value
    315         case OT_PD: // Unaligned packed double-precision floating point value
    316           operand_bytes_ += OS_DOUBLE_PRECISION_FLOATING;
    317           break;
    318         case OT_SS:
    319           // Scalar element of a 128-bit packed single-precision
    320           // floating data.
    321           // We simply return enItUnknown since we don't have to support
    322           // floating point
    323           succeeded = false;
    324           break;
    325         case OT_V: // Word, doubleword or quadword, depending on operand-size
    326                    // attribute.
    327           if (operand_is_64_bits_ && flag_operand & AM_I &&
    328               flag_operand & IOS_64)
    329             operand_bytes_ += OS_QUAD_WORD;
    330           else if (operand_is_32_bits_)
    331             operand_bytes_ += OS_DOUBLE_WORD;
    332           else
    333             operand_bytes_ += OS_WORD;
    334           break;
    335         case OT_W: // Word, regardless of operand-size attribute.
    336           operand_bytes_ += OS_WORD;
    337           break;
    338 
    339         // Can safely ignore these.
    340         case OT_A: // Two one-word operands in memory or two double-word
    341                      // operands in memory
    342         case OT_PI: // Quadword MMX technology register (e.g. mm0)
    343         case OT_SI: // Doubleword integer register (e.g., eax)
    344           break;
    345 
    346         default:
    347           break;
    348       }
    349       break;
    350 
    351     default:
    352       break;
    353   }
    354 
    355   return succeeded;
    356 }
    357 
    358 bool MiniDisassembler::ProcessModrm(unsigned char* start_byte,
    359                                     unsigned int& size) {
    360   // If we don't need to decode, we just return the size of the ModR/M
    361   // byte (there is never a SIB byte in this case).
    362   if (!should_decode_modrm_) {
    363     size++;
    364     return true;
    365   }
    366 
    367   // We never care about the reg field, only the combination of the mod
    368   // and r/m fields, so let's start by packing those fields together into
    369   // 5 bits.
    370   unsigned char modrm = (*start_byte);
    371   unsigned char mod = modrm & 0xC0; // mask out top two bits to get mod field
    372   modrm = modrm & 0x07; // mask out bottom 3 bits to get r/m field
    373   mod = mod >> 3; // shift the mod field to the right place
    374   modrm = mod | modrm; // combine the r/m and mod fields as discussed
    375   mod = mod >> 3; // shift the mod field to bits 2..0
    376 
    377   // Invariant: modrm contains the mod field in bits 4..3 and the r/m field
    378   // in bits 2..0, and mod contains the mod field in bits 2..0
    379 
    380   const ModrmEntry* modrm_entry = 0;
    381   if (address_is_32_bits_)
    382     modrm_entry = &s_ia32_modrm_map_[modrm];
    383   else
    384     modrm_entry = &s_ia16_modrm_map_[modrm];
    385 
    386   // Invariant: modrm_entry points to information that we need to decode
    387   // the ModR/M byte.
    388 
    389   // Add to the count of operand bytes, if the ModR/M byte indicates
    390   // that some operands are encoded in the instruction.
    391   if (modrm_entry->is_encoded_in_instruction_)
    392     operand_bytes_ += modrm_entry->operand_size_;
    393 
    394   // Process the SIB byte if necessary, and return the count
    395   // of ModR/M and SIB bytes.
    396   if (modrm_entry->use_sib_byte_) {
    397     size++;
    398     return ProcessSib(start_byte + 1, mod, size);
    399   } else {
    400     size++;
    401     return true;
    402   }
    403 }
    404 
    405 bool MiniDisassembler::ProcessSib(unsigned char* start_byte,
    406                                   unsigned char mod,
    407                                   unsigned int& size) {
    408   // get the mod field from the 2..0 bits of the SIB byte
    409   unsigned char sib_base = (*start_byte) & 0x07;
    410   if (0x05 == sib_base) {
    411     switch (mod) {
    412     case 0x00: // mod == 00
    413     case 0x02: // mod == 10
    414       operand_bytes_ += OS_DOUBLE_WORD;
    415       break;
    416     case 0x01: // mod == 01
    417       operand_bytes_ += OS_BYTE;
    418       break;
    419     case 0x03: // mod == 11
    420       // According to the IA-32 docs, there does not seem to be a disp
    421       // value for this value of mod
    422     default:
    423       break;
    424     }
    425   }
    426 
    427   size++;
    428   return true;
    429 }
    430 
    431 };  // namespace sidestep
    432