Home | History | Annotate | Download | only in memory_watcher
      1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 /*
      6  * Implementation of MiniDisassembler.
      7  */
      8 
      9 #include "mini_disassembler.h"
     10 
     11 namespace sidestep {
     12 
     13 MiniDisassembler::MiniDisassembler(bool operand_default_is_32_bits,
     14                                    bool address_default_is_32_bits)
     15     : operand_default_is_32_bits_(operand_default_is_32_bits),
     16       address_default_is_32_bits_(address_default_is_32_bits) {
     17   Initialize();
     18 }
     19 
     20 MiniDisassembler::MiniDisassembler()
     21     : operand_default_is_32_bits_(true),
     22       address_default_is_32_bits_(true) {
     23   Initialize();
     24 }
     25 
     26 InstructionType MiniDisassembler::Disassemble(
     27     unsigned char* start_byte,
     28     unsigned int& instruction_bytes) {
     29   // Clean up any state from previous invocations.
     30   Initialize();
     31 
     32   // Start by processing any prefixes.
     33   unsigned char* current_byte = start_byte;
     34   unsigned int size = 0;
     35   InstructionType instruction_type = ProcessPrefixes(current_byte, size);
     36 
     37   if (IT_UNKNOWN == instruction_type)
     38     return instruction_type;
     39 
     40   current_byte += size;
     41   size = 0;
     42 
     43   // Invariant: We have stripped all prefixes, and the operand_is_32_bits_
     44   // and address_is_32_bits_ flags are correctly set.
     45 
     46   instruction_type = ProcessOpcode(current_byte, 0, size);
     47 
     48   // Check for error processing instruction
     49   if ((IT_UNKNOWN == instruction_type_) || (IT_UNUSED == instruction_type_)) {
     50     return IT_UNKNOWN;
     51   }
     52 
     53   current_byte += size;
     54 
     55   // Invariant: operand_bytes_ indicates the total size of operands
     56   // specified by the opcode and/or ModR/M byte and/or SIB byte.
     57   // pCurrentByte points to the first byte after the ModR/M byte, or after
     58   // the SIB byte if it is present (i.e. the first byte of any operands
     59   // encoded in the instruction).
     60 
     61   // We get the total length of any prefixes, the opcode, and the ModR/M and
     62   // SIB bytes if present, by taking the difference of the original starting
     63   // address and the current byte (which points to the first byte of the
     64   // operands if present, or to the first byte of the next instruction if
     65   // they are not).  Adding the count of bytes in the operands encoded in
     66   // the instruction gives us the full length of the instruction in bytes.
     67   instruction_bytes += operand_bytes_ + (current_byte - start_byte);
     68 
     69   // Return the instruction type, which was set by ProcessOpcode().
     70   return instruction_type_;
     71 }
     72 
     73 void MiniDisassembler::Initialize() {
     74   operand_is_32_bits_ = operand_default_is_32_bits_;
     75   address_is_32_bits_ = address_default_is_32_bits_;
     76   operand_bytes_ = 0;
     77   have_modrm_ = false;
     78   should_decode_modrm_ = false;
     79   instruction_type_ = IT_UNKNOWN;
     80   got_f2_prefix_ = false;
     81   got_f3_prefix_ = false;
     82   got_66_prefix_ = false;
     83 }
     84 
     85 InstructionType MiniDisassembler::ProcessPrefixes(unsigned char* start_byte,
     86                                                   unsigned int& size) {
     87   InstructionType instruction_type = IT_GENERIC;
     88   const Opcode& opcode = s_ia32_opcode_map_[0].table_[*start_byte];
     89 
     90   switch (opcode.type_) {
     91     case IT_PREFIX_ADDRESS:
     92       address_is_32_bits_ = !address_default_is_32_bits_;
     93       goto nochangeoperand;
     94     case IT_PREFIX_OPERAND:
     95       operand_is_32_bits_ = !operand_default_is_32_bits_;
     96       nochangeoperand:
     97     case IT_PREFIX:
     98 
     99       if (0xF2 == (*start_byte))
    100         got_f2_prefix_ = true;
    101       else if (0xF3 == (*start_byte))
    102         got_f3_prefix_ = true;
    103       else if (0x66 == (*start_byte))
    104         got_66_prefix_ = true;
    105 
    106       instruction_type = opcode.type_;
    107       size ++;
    108       // we got a prefix, so add one and check next byte
    109       ProcessPrefixes(start_byte + 1, size);
    110     default:
    111       break;   // not a prefix byte
    112   }
    113 
    114   return instruction_type;
    115 }
    116 
    117 InstructionType MiniDisassembler::ProcessOpcode(unsigned char* start_byte,
    118                                                 unsigned int table_index,
    119                                                 unsigned int& size) {
    120   const OpcodeTable& table = s_ia32_opcode_map_[table_index];   // Get our table
    121   unsigned char current_byte = (*start_byte) >> table.shift_;
    122   current_byte = current_byte & table.mask_;  // Mask out the bits we will use
    123 
    124   // Check whether the byte we have is inside the table we have.
    125   if (current_byte < table.min_lim_ || current_byte > table.max_lim_) {
    126     instruction_type_ = IT_UNKNOWN;
    127     return instruction_type_;
    128   }
    129 
    130   const Opcode& opcode = table.table_[current_byte];
    131   if (IT_UNUSED == opcode.type_) {
    132     // This instruction is not used by the IA-32 ISA, so we indicate
    133     // this to the user.  Probably means that we were pointed to
    134     // a byte in memory that was not the start of an instruction.
    135     instruction_type_ = IT_UNUSED;
    136     return instruction_type_;
    137   } else if (IT_REFERENCE == opcode.type_) {
    138     // We are looking at an opcode that has more bytes (or is continued
    139     // in the ModR/M byte).  Recursively find the opcode definition in
    140     // the table for the opcode's next byte.
    141     size++;
    142     ProcessOpcode(start_byte + 1, opcode.table_index_, size);
    143     return instruction_type_;
    144   }
    145 
    146   const SpecificOpcode* specific_opcode = (SpecificOpcode*)&opcode;
    147   if (opcode.is_prefix_dependent_) {
    148     if (got_f2_prefix_ && opcode.opcode_if_f2_prefix_.mnemonic_ != 0) {
    149       specific_opcode = &opcode.opcode_if_f2_prefix_;
    150     } else if (got_f3_prefix_ && opcode.opcode_if_f3_prefix_.mnemonic_ != 0) {
    151       specific_opcode = &opcode.opcode_if_f3_prefix_;
    152     } else if (got_66_prefix_ && opcode.opcode_if_66_prefix_.mnemonic_ != 0) {
    153       specific_opcode = &opcode.opcode_if_66_prefix_;
    154     }
    155   }
    156 
    157   // Inv: The opcode type is known.
    158   instruction_type_ = specific_opcode->type_;
    159 
    160   // Let's process the operand types to see if we have any immediate
    161   // operands, and/or a ModR/M byte.
    162 
    163   ProcessOperand(specific_opcode->flag_dest_);
    164   ProcessOperand(specific_opcode->flag_source_);
    165   ProcessOperand(specific_opcode->flag_aux_);
    166 
    167   // Inv: We have processed the opcode and incremented operand_bytes_
    168   // by the number of bytes of any operands specified by the opcode
    169   // that are stored in the instruction (not registers etc.).  Now
    170   // we need to return the total number of bytes for the opcode and
    171   // for the ModR/M or SIB bytes if they are present.
    172 
    173   if (table.mask_ != 0xff) {
    174     if (have_modrm_) {
    175       // we're looking at a ModR/M byte so we're not going to
    176       // count that into the opcode size
    177       ProcessModrm(start_byte, size);
    178       return IT_GENERIC;
    179     } else {
    180       // need to count the ModR/M byte even if it's just being
    181       // used for opcode extension
    182       size++;
    183       return IT_GENERIC;
    184     }
    185   } else {
    186     if (have_modrm_) {
    187       // The ModR/M byte is the next byte.
    188       size++;
    189       ProcessModrm(start_byte + 1, size);
    190       return IT_GENERIC;
    191     } else {
    192       size++;
    193       return IT_GENERIC;
    194     }
    195   }
    196 }
    197 
    198 bool MiniDisassembler::ProcessOperand(int flag_operand) {
    199   bool succeeded = true;
    200   if (AM_NOT_USED == flag_operand)
    201     return succeeded;
    202 
    203   // Decide what to do based on the addressing mode.
    204   switch (flag_operand & AM_MASK) {
    205     // No ModR/M byte indicated by these addressing modes, and no
    206     // additional (e.g. immediate) parameters.
    207     case AM_A: // Direct address
    208     case AM_F: // EFLAGS register
    209     case AM_X: // Memory addressed by the DS:SI register pair
    210     case AM_Y: // Memory addressed by the ES:DI register pair
    211     case AM_IMPLICIT: // Parameter is implicit, occupies no space in
    212                        // instruction
    213       break;
    214 
    215     // There is a ModR/M byte but it does not necessarily need
    216     // to be decoded.
    217     case AM_C: // reg field of ModR/M selects a control register
    218     case AM_D: // reg field of ModR/M selects a debug register
    219     case AM_G: // reg field of ModR/M selects a general register
    220     case AM_P: // reg field of ModR/M selects an MMX register
    221     case AM_R: // mod field of ModR/M may refer only to a general register
    222     case AM_S: // reg field of ModR/M selects a segment register
    223     case AM_T: // reg field of ModR/M selects a test register
    224     case AM_V: // reg field of ModR/M selects a 128-bit XMM register
    225       have_modrm_ = true;
    226       break;
    227 
    228     // In these addressing modes, there is a ModR/M byte and it needs to be
    229     // decoded. No other (e.g. immediate) params than indicated in ModR/M.
    230     case AM_E: // Operand is either a general-purpose register or memory,
    231                  // specified by ModR/M byte
    232     case AM_M: // ModR/M byte will refer only to memory
    233     case AM_Q: // Operand is either an MMX register or memory (complex
    234                  // evaluation), specified by ModR/M byte
    235     case AM_W: // Operand is either a 128-bit XMM register or memory (complex
    236                  // eval), specified by ModR/M byte
    237       have_modrm_ = true;
    238       should_decode_modrm_ = true;
    239       break;
    240 
    241     // These addressing modes specify an immediate or an offset value
    242     // directly, so we need to look at the operand type to see how many
    243     // bytes.
    244     case AM_I: // Immediate data.
    245     case AM_J: // Jump to offset.
    246     case AM_O: // Operand is at offset.
    247       switch (flag_operand & OT_MASK) {
    248         case OT_B: // Byte regardless of operand-size attribute.
    249           operand_bytes_ += OS_BYTE;
    250           break;
    251         case OT_C: // Byte or word, depending on operand-size attribute.
    252           if (operand_is_32_bits_)
    253             operand_bytes_ += OS_WORD;
    254           else
    255             operand_bytes_ += OS_BYTE;
    256           break;
    257         case OT_D: // Doubleword, regardless of operand-size attribute.
    258           operand_bytes_ += OS_DOUBLE_WORD;
    259           break;
    260         case OT_DQ: // Double-quadword, regardless of operand-size attribute.
    261           operand_bytes_ += OS_DOUBLE_QUAD_WORD;
    262           break;
    263         case OT_P: // 32-bit or 48-bit pointer, depending on operand-size
    264                      // attribute.
    265           if (operand_is_32_bits_)
    266             operand_bytes_ += OS_48_BIT_POINTER;
    267           else
    268             operand_bytes_ += OS_32_BIT_POINTER;
    269           break;
    270         case OT_PS: // 128-bit packed single-precision floating-point data.
    271           operand_bytes_ += OS_128_BIT_PACKED_SINGLE_PRECISION_FLOATING;
    272           break;
    273         case OT_Q: // Quadword, regardless of operand-size attribute.
    274           operand_bytes_ += OS_QUAD_WORD;
    275           break;
    276         case OT_S: // 6-byte pseudo-descriptor.
    277           operand_bytes_ += OS_PSEUDO_DESCRIPTOR;
    278           break;
    279         case OT_SD: // Scalar Double-Precision Floating-Point Value
    280         case OT_PD: // Unaligned packed double-precision floating point value
    281           operand_bytes_ += OS_DOUBLE_PRECISION_FLOATING;
    282           break;
    283         case OT_SS:
    284           // Scalar element of a 128-bit packed single-precision
    285           // floating data.
    286           // We simply return enItUnknown since we don't have to support
    287           // floating point
    288           succeeded = false;
    289           break;
    290         case OT_V: // Word or doubleword, depending on operand-size attribute.
    291           if (operand_is_32_bits_)
    292             operand_bytes_ += OS_DOUBLE_WORD;
    293           else
    294             operand_bytes_ += OS_WORD;
    295           break;
    296         case OT_W: // Word, regardless of operand-size attribute.
    297           operand_bytes_ += OS_WORD;
    298           break;
    299 
    300         // Can safely ignore these.
    301         case OT_A: // Two one-word operands in memory or two double-word
    302                      // operands in memory
    303         case OT_PI: // Quadword MMX technology register (e.g. mm0)
    304         case OT_SI: // Doubleword integer register (e.g., eax)
    305           break;
    306 
    307         default:
    308           break;
    309       }
    310       break;
    311 
    312     default:
    313       break;
    314   }
    315 
    316   return succeeded;
    317 }
    318 
    319 bool MiniDisassembler::ProcessModrm(unsigned char* start_byte,
    320                                     unsigned int& size) {
    321   // If we don't need to decode, we just return the size of the ModR/M
    322   // byte (there is never a SIB byte in this case).
    323   if (!should_decode_modrm_) {
    324     size++;
    325     return true;
    326   }
    327 
    328   // We never care about the reg field, only the combination of the mod
    329   // and r/m fields, so let's start by packing those fields together into
    330   // 5 bits.
    331   unsigned char modrm = (*start_byte);
    332   unsigned char mod = modrm & 0xC0; // mask out top two bits to get mod field
    333   modrm = modrm & 0x07; // mask out bottom 3 bits to get r/m field
    334   mod = mod >> 3; // shift the mod field to the right place
    335   modrm = mod | modrm; // combine the r/m and mod fields as discussed
    336   mod = mod >> 3; // shift the mod field to bits 2..0
    337 
    338   // Invariant: modrm contains the mod field in bits 4..3 and the r/m field
    339   // in bits 2..0, and mod contains the mod field in bits 2..0
    340 
    341   const ModrmEntry* modrm_entry = 0;
    342   if (address_is_32_bits_)
    343     modrm_entry = &s_ia32_modrm_map_[modrm];
    344   else
    345     modrm_entry = &s_ia16_modrm_map_[modrm];
    346 
    347   // Invariant: modrm_entry points to information that we need to decode
    348   // the ModR/M byte.
    349 
    350   // Add to the count of operand bytes, if the ModR/M byte indicates
    351   // that some operands are encoded in the instruction.
    352   if (modrm_entry->is_encoded_in_instruction_)
    353     operand_bytes_ += modrm_entry->operand_size_;
    354 
    355   // Process the SIB byte if necessary, and return the count
    356   // of ModR/M and SIB bytes.
    357   if (modrm_entry->use_sib_byte_) {
    358     size++;
    359     return ProcessSib(start_byte + 1, mod, size);
    360   } else {
    361     size++;
    362     return true;
    363   }
    364 }
    365 
    366 bool MiniDisassembler::ProcessSib(unsigned char* start_byte,
    367                                   unsigned char mod,
    368                                   unsigned int& size) {
    369   // get the mod field from the 2..0 bits of the SIB byte
    370   unsigned char sib_base = (*start_byte) & 0x07;
    371   if (0x05 == sib_base) {
    372     switch (mod) {
    373     case 0x00: // mod == 00
    374     case 0x02: // mod == 10
    375       operand_bytes_ += OS_DOUBLE_WORD;
    376       break;
    377     case 0x01: // mod == 01
    378       operand_bytes_ += OS_BYTE;
    379       break;
    380     case 0x03: // mod == 11
    381       // According to the IA-32 docs, there does not seem to be a disp
    382       // value for this value of mod
    383     default:
    384       break;
    385     }
    386   }
    387 
    388   size++;
    389   return true;
    390 }
    391 
    392 };  // namespace sidestep
    393