Home | History | Annotate | Download | only in sidestep
      1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 // Implementation of MiniDisassembler.
      6 
      7 #ifdef _WIN64
      8 #error The code in this file should not be used on 64-bit Windows.
      9 #endif
     10 
     11 #include "sandbox/win/src/sidestep/mini_disassembler.h"
     12 
     13 namespace sidestep {
     14 
     15 MiniDisassembler::MiniDisassembler(bool operand_default_is_32_bits,
     16                                    bool address_default_is_32_bits)
     17     : operand_default_is_32_bits_(operand_default_is_32_bits),
     18       address_default_is_32_bits_(address_default_is_32_bits) {
     19   Initialize();
     20 }
     21 
     22 MiniDisassembler::MiniDisassembler()
     23     : operand_default_is_32_bits_(true),
     24       address_default_is_32_bits_(true) {
     25   Initialize();
     26 }
     27 
     28 InstructionType MiniDisassembler::Disassemble(
     29     unsigned char* start_byte,
     30     unsigned int* instruction_bytes) {
     31   // Clean up any state from previous invocations.
     32   Initialize();
     33 
     34   // Start by processing any prefixes.
     35   unsigned char* current_byte = start_byte;
     36   unsigned int size = 0;
     37   InstructionType instruction_type = ProcessPrefixes(current_byte, &size);
     38 
     39   if (IT_UNKNOWN == instruction_type)
     40     return instruction_type;
     41 
     42   current_byte += size;
     43   size = 0;
     44 
     45   // Invariant: We have stripped all prefixes, and the operand_is_32_bits_
     46   // and address_is_32_bits_ flags are correctly set.
     47 
     48   instruction_type = ProcessOpcode(current_byte, 0, &size);
     49 
     50   // Check for error processing instruction
     51   if ((IT_UNKNOWN == instruction_type_) || (IT_UNUSED == instruction_type_)) {
     52     return IT_UNKNOWN;
     53   }
     54 
     55   current_byte += size;
     56 
     57   // Invariant: operand_bytes_ indicates the total size of operands
     58   // specified by the opcode and/or ModR/M byte and/or SIB byte.
     59   // pCurrentByte points to the first byte after the ModR/M byte, or after
     60   // the SIB byte if it is present (i.e. the first byte of any operands
     61   // encoded in the instruction).
     62 
     63   // We get the total length of any prefixes, the opcode, and the ModR/M and
     64   // SIB bytes if present, by taking the difference of the original starting
     65   // address and the current byte (which points to the first byte of the
     66   // operands if present, or to the first byte of the next instruction if
     67   // they are not).  Adding the count of bytes in the operands encoded in
     68   // the instruction gives us the full length of the instruction in bytes.
     69   *instruction_bytes += operand_bytes_ + (current_byte - start_byte);
     70 
     71   // Return the instruction type, which was set by ProcessOpcode().
     72   return instruction_type_;
     73 }
     74 
     75 void MiniDisassembler::Initialize() {
     76   operand_is_32_bits_ = operand_default_is_32_bits_;
     77   address_is_32_bits_ = address_default_is_32_bits_;
     78   operand_bytes_ = 0;
     79   have_modrm_ = false;
     80   should_decode_modrm_ = false;
     81   instruction_type_ = IT_UNKNOWN;
     82   got_f2_prefix_ = false;
     83   got_f3_prefix_ = false;
     84   got_66_prefix_ = false;
     85 }
     86 
     87 InstructionType MiniDisassembler::ProcessPrefixes(unsigned char* start_byte,
     88                                                   unsigned int* size) {
     89   InstructionType instruction_type = IT_GENERIC;
     90   const Opcode& opcode = s_ia32_opcode_map_[0].table_[*start_byte];
     91 
     92   switch (opcode.type_) {
     93     case IT_PREFIX_ADDRESS:
     94       address_is_32_bits_ = !address_default_is_32_bits_;
     95       goto nochangeoperand;
     96     case IT_PREFIX_OPERAND:
     97       operand_is_32_bits_ = !operand_default_is_32_bits_;
     98       nochangeoperand:
     99     case IT_PREFIX:
    100 
    101       if (0xF2 == (*start_byte))
    102         got_f2_prefix_ = true;
    103       else if (0xF3 == (*start_byte))
    104         got_f3_prefix_ = true;
    105       else if (0x66 == (*start_byte))
    106         got_66_prefix_ = true;
    107 
    108       instruction_type = opcode.type_;
    109       (*size)++;
    110       // we got a prefix, so add one and check next byte
    111       ProcessPrefixes(start_byte + 1, size);
    112     default:
    113       break;   // not a prefix byte
    114   }
    115 
    116   return instruction_type;
    117 }
    118 
    119 InstructionType MiniDisassembler::ProcessOpcode(unsigned char* start_byte,
    120                                                 unsigned int table_index,
    121                                                 unsigned int* size) {
    122   const OpcodeTable& table = s_ia32_opcode_map_[table_index];   // Get our table
    123   unsigned char current_byte = (*start_byte) >> table.shift_;
    124   current_byte = current_byte & table.mask_;  // Mask out the bits we will use
    125 
    126   // Check whether the byte we have is inside the table we have.
    127   if (current_byte < table.min_lim_ || current_byte > table.max_lim_) {
    128     instruction_type_ = IT_UNKNOWN;
    129     return instruction_type_;
    130   }
    131 
    132   const Opcode& opcode = table.table_[current_byte];
    133   if (IT_UNUSED == opcode.type_) {
    134     // This instruction is not used by the IA-32 ISA, so we indicate
    135     // this to the user.  Probably means that we were pointed to
    136     // a byte in memory that was not the start of an instruction.
    137     instruction_type_ = IT_UNUSED;
    138     return instruction_type_;
    139   } else if (IT_REFERENCE == opcode.type_) {
    140     // We are looking at an opcode that has more bytes (or is continued
    141     // in the ModR/M byte).  Recursively find the opcode definition in
    142     // the table for the opcode's next byte.
    143     (*size)++;
    144     ProcessOpcode(start_byte + 1, opcode.table_index_, size);
    145     return instruction_type_;
    146   }
    147 
    148   const SpecificOpcode* specific_opcode = reinterpret_cast<
    149                                               const SpecificOpcode*>(&opcode);
    150   if (opcode.is_prefix_dependent_) {
    151     if (got_f2_prefix_ && opcode.opcode_if_f2_prefix_.mnemonic_ != 0) {
    152       specific_opcode = &opcode.opcode_if_f2_prefix_;
    153     } else if (got_f3_prefix_ && opcode.opcode_if_f3_prefix_.mnemonic_ != 0) {
    154       specific_opcode = &opcode.opcode_if_f3_prefix_;
    155     } else if (got_66_prefix_ && opcode.opcode_if_66_prefix_.mnemonic_ != 0) {
    156       specific_opcode = &opcode.opcode_if_66_prefix_;
    157     }
    158   }
    159 
    160   // Inv: The opcode type is known.
    161   instruction_type_ = specific_opcode->type_;
    162 
    163   // Let's process the operand types to see if we have any immediate
    164   // operands, and/or a ModR/M byte.
    165 
    166   ProcessOperand(specific_opcode->flag_dest_);
    167   ProcessOperand(specific_opcode->flag_source_);
    168   ProcessOperand(specific_opcode->flag_aux_);
    169 
    170   // Inv: We have processed the opcode and incremented operand_bytes_
    171   // by the number of bytes of any operands specified by the opcode
    172   // that are stored in the instruction (not registers etc.).  Now
    173   // we need to return the total number of bytes for the opcode and
    174   // for the ModR/M or SIB bytes if they are present.
    175 
    176   if (table.mask_ != 0xff) {
    177     if (have_modrm_) {
    178       // we're looking at a ModR/M byte so we're not going to
    179       // count that into the opcode size
    180       ProcessModrm(start_byte, size);
    181       return IT_GENERIC;
    182     } else {
    183       // need to count the ModR/M byte even if it's just being
    184       // used for opcode extension
    185       (*size)++;
    186       return IT_GENERIC;
    187     }
    188   } else {
    189     if (have_modrm_) {
    190       // The ModR/M byte is the next byte.
    191       (*size)++;
    192       ProcessModrm(start_byte + 1, size);
    193       return IT_GENERIC;
    194     } else {
    195       (*size)++;
    196       return IT_GENERIC;
    197     }
    198   }
    199 }
    200 
    201 bool MiniDisassembler::ProcessOperand(int flag_operand) {
    202   bool succeeded = true;
    203   if (AM_NOT_USED == flag_operand)
    204     return succeeded;
    205 
    206   // Decide what to do based on the addressing mode.
    207   switch (flag_operand & AM_MASK) {
    208     // No ModR/M byte indicated by these addressing modes, and no
    209     // additional (e.g. immediate) parameters.
    210     case AM_A:  // Direct address
    211     case AM_F:  // EFLAGS register
    212     case AM_X:  // Memory addressed by the DS:SI register pair
    213     case AM_Y:  // Memory addressed by the ES:DI register pair
    214     case AM_IMPLICIT:  // Parameter is implicit, occupies no space in
    215                        // instruction
    216       break;
    217 
    218     // There is a ModR/M byte but it does not necessarily need
    219     // to be decoded.
    220     case AM_C:  // reg field of ModR/M selects a control register
    221     case AM_D:  // reg field of ModR/M selects a debug register
    222     case AM_G:  // reg field of ModR/M selects a general register
    223     case AM_P:  // reg field of ModR/M selects an MMX register
    224     case AM_R:  // mod field of ModR/M may refer only to a general register
    225     case AM_S:  // reg field of ModR/M selects a segment register
    226     case AM_T:  // reg field of ModR/M selects a test register
    227     case AM_V:  // reg field of ModR/M selects a 128-bit XMM register
    228       have_modrm_ = true;
    229       break;
    230 
    231     // In these addressing modes, there is a ModR/M byte and it needs to be
    232     // decoded. No other (e.g. immediate) params than indicated in ModR/M.
    233     case AM_E:  // Operand is either a general-purpose register or memory,
    234                 // specified by ModR/M byte
    235     case AM_M:  // ModR/M byte will refer only to memory
    236     case AM_Q:  // Operand is either an MMX register or memory (complex
    237                 // evaluation), specified by ModR/M byte
    238     case AM_W:  // Operand is either a 128-bit XMM register or memory (complex
    239                 // eval), specified by ModR/M byte
    240       have_modrm_ = true;
    241       should_decode_modrm_ = true;
    242       break;
    243 
    244     // These addressing modes specify an immediate or an offset value
    245     // directly, so we need to look at the operand type to see how many
    246     // bytes.
    247     case AM_I:  // Immediate data.
    248     case AM_J:  // Jump to offset.
    249     case AM_O:  // Operand is at offset.
    250       switch (flag_operand & OT_MASK) {
    251         case OT_B:  // Byte regardless of operand-size attribute.
    252           operand_bytes_ += OS_BYTE;
    253           break;
    254         case OT_C:  // Byte or word, depending on operand-size attribute.
    255           if (operand_is_32_bits_)
    256             operand_bytes_ += OS_WORD;
    257           else
    258             operand_bytes_ += OS_BYTE;
    259           break;
    260         case OT_D:  // Doubleword, regardless of operand-size attribute.
    261           operand_bytes_ += OS_DOUBLE_WORD;
    262           break;
    263         case OT_DQ:  // Double-quadword, regardless of operand-size attribute.
    264           operand_bytes_ += OS_DOUBLE_QUAD_WORD;
    265           break;
    266         case OT_P:  // 32-bit or 48-bit pointer, depending on operand-size
    267                     // attribute.
    268           if (operand_is_32_bits_)
    269             operand_bytes_ += OS_48_BIT_POINTER;
    270           else
    271             operand_bytes_ += OS_32_BIT_POINTER;
    272           break;
    273         case OT_PS:  // 128-bit packed single-precision floating-point data.
    274           operand_bytes_ += OS_128_BIT_PACKED_SINGLE_PRECISION_FLOATING;
    275           break;
    276         case OT_Q:  // Quadword, regardless of operand-size attribute.
    277           operand_bytes_ += OS_QUAD_WORD;
    278           break;
    279         case OT_S:  // 6-byte pseudo-descriptor.
    280           operand_bytes_ += OS_PSEUDO_DESCRIPTOR;
    281           break;
    282         case OT_SD:  // Scalar Double-Precision Floating-Point Value
    283         case OT_PD:  // Unaligned packed double-precision floating point value
    284           operand_bytes_ += OS_DOUBLE_PRECISION_FLOATING;
    285           break;
    286         case OT_SS:
    287           // Scalar element of a 128-bit packed single-precision
    288           // floating data.
    289           // We simply return enItUnknown since we don't have to support
    290           // floating point
    291           succeeded = false;
    292           break;
    293         case OT_V:  // Word or doubleword, depending on operand-size attribute.
    294           if (operand_is_32_bits_)
    295             operand_bytes_ += OS_DOUBLE_WORD;
    296           else
    297             operand_bytes_ += OS_WORD;
    298           break;
    299         case OT_W:  // Word, regardless of operand-size attribute.
    300           operand_bytes_ += OS_WORD;
    301           break;
    302 
    303         // Can safely ignore these.
    304         case OT_A:  // Two one-word operands in memory or two double-word
    305                     // operands in memory
    306         case OT_PI:  // Quadword MMX technology register (e.g. mm0)
    307         case OT_SI:  // Doubleword integer register (e.g., eax)
    308           break;
    309 
    310         default:
    311           break;
    312       }
    313       break;
    314 
    315     default:
    316       break;
    317   }
    318 
    319   return succeeded;
    320 }
    321 
    322 bool MiniDisassembler::ProcessModrm(unsigned char* start_byte,
    323                                     unsigned int* size) {
    324   // If we don't need to decode, we just return the size of the ModR/M
    325   // byte (there is never a SIB byte in this case).
    326   if (!should_decode_modrm_) {
    327     (*size)++;
    328     return true;
    329   }
    330 
    331   // We never care about the reg field, only the combination of the mod
    332   // and r/m fields, so let's start by packing those fields together into
    333   // 5 bits.
    334   unsigned char modrm = (*start_byte);
    335   unsigned char mod = modrm & 0xC0;  // mask out top two bits to get mod field
    336   modrm = modrm & 0x07;  // mask out bottom 3 bits to get r/m field
    337   mod = mod >> 3;  // shift the mod field to the right place
    338   modrm = mod | modrm;  // combine the r/m and mod fields as discussed
    339   mod = mod >> 3;  // shift the mod field to bits 2..0
    340 
    341   // Invariant: modrm contains the mod field in bits 4..3 and the r/m field
    342   // in bits 2..0, and mod contains the mod field in bits 2..0
    343 
    344   const ModrmEntry* modrm_entry = 0;
    345   if (address_is_32_bits_)
    346     modrm_entry = &s_ia32_modrm_map_[modrm];
    347   else
    348     modrm_entry = &s_ia16_modrm_map_[modrm];
    349 
    350   // Invariant: modrm_entry points to information that we need to decode
    351   // the ModR/M byte.
    352 
    353   // Add to the count of operand bytes, if the ModR/M byte indicates
    354   // that some operands are encoded in the instruction.
    355   if (modrm_entry->is_encoded_in_instruction_)
    356     operand_bytes_ += modrm_entry->operand_size_;
    357 
    358   // Process the SIB byte if necessary, and return the count
    359   // of ModR/M and SIB bytes.
    360   if (modrm_entry->use_sib_byte_) {
    361     (*size)++;
    362     return ProcessSib(start_byte + 1, mod, size);
    363   } else {
    364     (*size)++;
    365     return true;
    366   }
    367 }
    368 
    369 bool MiniDisassembler::ProcessSib(unsigned char* start_byte,
    370                                   unsigned char mod,
    371                                   unsigned int* size) {
    372   // get the mod field from the 2..0 bits of the SIB byte
    373   unsigned char sib_base = (*start_byte) & 0x07;
    374   if (0x05 == sib_base) {
    375     switch (mod) {
    376       case 0x00:  // mod == 00
    377       case 0x02:  // mod == 10
    378         operand_bytes_ += OS_DOUBLE_WORD;
    379         break;
    380       case 0x01:  // mod == 01
    381         operand_bytes_ += OS_BYTE;
    382         break;
    383       case 0x03:  // mod == 11
    384         // According to the IA-32 docs, there does not seem to be a disp
    385         // value for this value of mod
    386       default:
    387         break;
    388     }
    389   }
    390 
    391   (*size)++;
    392   return true;
    393 }
    394 
    395 };  // namespace sidestep
    396