1 /* Copyright (c) 2007, Google Inc. 2 * All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions are 6 * met: 7 * 8 * * Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * * Redistributions in binary form must reproduce the above 11 * copyright notice, this list of conditions and the following disclaimer 12 * in the documentation and/or other materials provided with the 13 * distribution. 14 * * Neither the name of Google Inc. nor the names of its 15 * contributors may be used to endorse or promote products derived from 16 * this software without specific prior written permission. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 * 30 * --- 31 * Author: Joi Sigurdsson 32 * 33 * Implementation of MiniDisassembler. 34 */ 35 36 #include "mini_disassembler.h" 37 38 namespace sidestep { 39 40 MiniDisassembler::MiniDisassembler(bool operand_default_is_32_bits, 41 bool address_default_is_32_bits) 42 : operand_default_is_32_bits_(operand_default_is_32_bits), 43 address_default_is_32_bits_(address_default_is_32_bits) { 44 Initialize(); 45 } 46 47 MiniDisassembler::MiniDisassembler() 48 : operand_default_is_32_bits_(true), 49 address_default_is_32_bits_(true) { 50 Initialize(); 51 } 52 53 InstructionType MiniDisassembler::Disassemble( 54 unsigned char* start_byte, 55 unsigned int& instruction_bytes) { 56 // Clean up any state from previous invocations. 57 Initialize(); 58 59 // Start by processing any prefixes. 60 unsigned char* current_byte = start_byte; 61 unsigned int size = 0; 62 InstructionType instruction_type = ProcessPrefixes(current_byte, size); 63 64 if (IT_UNKNOWN == instruction_type) 65 return instruction_type; 66 67 current_byte += size; 68 size = 0; 69 70 // Invariant: We have stripped all prefixes, and the operand_is_32_bits_ 71 // and address_is_32_bits_ flags are correctly set. 72 73 instruction_type = ProcessOpcode(current_byte, 0, size); 74 75 // Check for error processing instruction 76 if ((IT_UNKNOWN == instruction_type_) || (IT_UNUSED == instruction_type_)) { 77 return IT_UNKNOWN; 78 } 79 80 current_byte += size; 81 82 // Invariant: operand_bytes_ indicates the total size of operands 83 // specified by the opcode and/or ModR/M byte and/or SIB byte. 84 // pCurrentByte points to the first byte after the ModR/M byte, or after 85 // the SIB byte if it is present (i.e. the first byte of any operands 86 // encoded in the instruction). 87 88 // We get the total length of any prefixes, the opcode, and the ModR/M and 89 // SIB bytes if present, by taking the difference of the original starting 90 // address and the current byte (which points to the first byte of the 91 // operands if present, or to the first byte of the next instruction if 92 // they are not). Adding the count of bytes in the operands encoded in 93 // the instruction gives us the full length of the instruction in bytes. 94 instruction_bytes += operand_bytes_ + (current_byte - start_byte); 95 96 // Return the instruction type, which was set by ProcessOpcode(). 97 return instruction_type_; 98 } 99 100 void MiniDisassembler::Initialize() { 101 operand_is_32_bits_ = operand_default_is_32_bits_; 102 address_is_32_bits_ = address_default_is_32_bits_; 103 #ifdef _M_X64 104 operand_default_support_64_bits_ = true; 105 #else 106 operand_default_support_64_bits_ = false; 107 #endif 108 operand_is_64_bits_ = false; 109 operand_bytes_ = 0; 110 have_modrm_ = false; 111 should_decode_modrm_ = false; 112 instruction_type_ = IT_UNKNOWN; 113 got_f2_prefix_ = false; 114 got_f3_prefix_ = false; 115 got_66_prefix_ = false; 116 } 117 118 InstructionType MiniDisassembler::ProcessPrefixes(unsigned char* start_byte, 119 unsigned int& size) { 120 InstructionType instruction_type = IT_GENERIC; 121 const Opcode& opcode = s_ia32_opcode_map_[0].table_[*start_byte]; 122 123 switch (opcode.type_) { 124 case IT_PREFIX_ADDRESS: 125 address_is_32_bits_ = !address_default_is_32_bits_; 126 goto nochangeoperand; 127 case IT_PREFIX_OPERAND: 128 operand_is_32_bits_ = !operand_default_is_32_bits_; 129 nochangeoperand: 130 case IT_PREFIX: 131 132 if (0xF2 == (*start_byte)) 133 got_f2_prefix_ = true; 134 else if (0xF3 == (*start_byte)) 135 got_f3_prefix_ = true; 136 else if (0x66 == (*start_byte)) 137 got_66_prefix_ = true; 138 else if (operand_default_support_64_bits_ && (*start_byte) & 0x48) 139 operand_is_64_bits_ = true; 140 141 instruction_type = opcode.type_; 142 size ++; 143 // we got a prefix, so add one and check next byte 144 ProcessPrefixes(start_byte + 1, size); 145 default: 146 break; // not a prefix byte 147 } 148 149 return instruction_type; 150 } 151 152 InstructionType MiniDisassembler::ProcessOpcode(unsigned char* start_byte, 153 unsigned int table_index, 154 unsigned int& size) { 155 const OpcodeTable& table = s_ia32_opcode_map_[table_index]; // Get our table 156 unsigned char current_byte = (*start_byte) >> table.shift_; 157 current_byte = current_byte & table.mask_; // Mask out the bits we will use 158 159 // Check whether the byte we have is inside the table we have. 160 if (current_byte < table.min_lim_ || current_byte > table.max_lim_) { 161 instruction_type_ = IT_UNKNOWN; 162 return instruction_type_; 163 } 164 165 const Opcode& opcode = table.table_[current_byte]; 166 if (IT_UNUSED == opcode.type_) { 167 // This instruction is not used by the IA-32 ISA, so we indicate 168 // this to the user. Probably means that we were pointed to 169 // a byte in memory that was not the start of an instruction. 170 instruction_type_ = IT_UNUSED; 171 return instruction_type_; 172 } else if (IT_REFERENCE == opcode.type_) { 173 // We are looking at an opcode that has more bytes (or is continued 174 // in the ModR/M byte). Recursively find the opcode definition in 175 // the table for the opcode's next byte. 176 size++; 177 ProcessOpcode(start_byte + 1, opcode.table_index_, size); 178 return instruction_type_; 179 } 180 181 const SpecificOpcode* specific_opcode = (SpecificOpcode*)&opcode; 182 if (opcode.is_prefix_dependent_) { 183 if (got_f2_prefix_ && opcode.opcode_if_f2_prefix_.mnemonic_ != 0) { 184 specific_opcode = &opcode.opcode_if_f2_prefix_; 185 } else if (got_f3_prefix_ && opcode.opcode_if_f3_prefix_.mnemonic_ != 0) { 186 specific_opcode = &opcode.opcode_if_f3_prefix_; 187 } else if (got_66_prefix_ && opcode.opcode_if_66_prefix_.mnemonic_ != 0) { 188 specific_opcode = &opcode.opcode_if_66_prefix_; 189 } 190 } 191 192 // Inv: The opcode type is known. 193 instruction_type_ = specific_opcode->type_; 194 195 // Let's process the operand types to see if we have any immediate 196 // operands, and/or a ModR/M byte. 197 198 ProcessOperand(specific_opcode->flag_dest_); 199 ProcessOperand(specific_opcode->flag_source_); 200 ProcessOperand(specific_opcode->flag_aux_); 201 202 // Inv: We have processed the opcode and incremented operand_bytes_ 203 // by the number of bytes of any operands specified by the opcode 204 // that are stored in the instruction (not registers etc.). Now 205 // we need to return the total number of bytes for the opcode and 206 // for the ModR/M or SIB bytes if they are present. 207 208 if (table.mask_ != 0xff) { 209 if (have_modrm_) { 210 // we're looking at a ModR/M byte so we're not going to 211 // count that into the opcode size 212 ProcessModrm(start_byte, size); 213 return IT_GENERIC; 214 } else { 215 // need to count the ModR/M byte even if it's just being 216 // used for opcode extension 217 size++; 218 return IT_GENERIC; 219 } 220 } else { 221 if (have_modrm_) { 222 // The ModR/M byte is the next byte. 223 size++; 224 ProcessModrm(start_byte + 1, size); 225 return IT_GENERIC; 226 } else { 227 size++; 228 return IT_GENERIC; 229 } 230 } 231 } 232 233 bool MiniDisassembler::ProcessOperand(int flag_operand) { 234 bool succeeded = true; 235 if (AM_NOT_USED == flag_operand) 236 return succeeded; 237 238 // Decide what to do based on the addressing mode. 239 switch (flag_operand & AM_MASK) { 240 // No ModR/M byte indicated by these addressing modes, and no 241 // additional (e.g. immediate) parameters. 242 case AM_A: // Direct address 243 case AM_F: // EFLAGS register 244 case AM_X: // Memory addressed by the DS:SI register pair 245 case AM_Y: // Memory addressed by the ES:DI register pair 246 case AM_IMPLICIT: // Parameter is implicit, occupies no space in 247 // instruction 248 break; 249 250 // There is a ModR/M byte but it does not necessarily need 251 // to be decoded. 252 case AM_C: // reg field of ModR/M selects a control register 253 case AM_D: // reg field of ModR/M selects a debug register 254 case AM_G: // reg field of ModR/M selects a general register 255 case AM_P: // reg field of ModR/M selects an MMX register 256 case AM_R: // mod field of ModR/M may refer only to a general register 257 case AM_S: // reg field of ModR/M selects a segment register 258 case AM_T: // reg field of ModR/M selects a test register 259 case AM_V: // reg field of ModR/M selects a 128-bit XMM register 260 have_modrm_ = true; 261 break; 262 263 // In these addressing modes, there is a ModR/M byte and it needs to be 264 // decoded. No other (e.g. immediate) params than indicated in ModR/M. 265 case AM_E: // Operand is either a general-purpose register or memory, 266 // specified by ModR/M byte 267 case AM_M: // ModR/M byte will refer only to memory 268 case AM_Q: // Operand is either an MMX register or memory (complex 269 // evaluation), specified by ModR/M byte 270 case AM_W: // Operand is either a 128-bit XMM register or memory (complex 271 // eval), specified by ModR/M byte 272 have_modrm_ = true; 273 should_decode_modrm_ = true; 274 break; 275 276 // These addressing modes specify an immediate or an offset value 277 // directly, so we need to look at the operand type to see how many 278 // bytes. 279 case AM_I: // Immediate data. 280 case AM_J: // Jump to offset. 281 case AM_O: // Operand is at offset. 282 switch (flag_operand & OT_MASK) { 283 case OT_B: // Byte regardless of operand-size attribute. 284 operand_bytes_ += OS_BYTE; 285 break; 286 case OT_C: // Byte or word, depending on operand-size attribute. 287 if (operand_is_32_bits_) 288 operand_bytes_ += OS_WORD; 289 else 290 operand_bytes_ += OS_BYTE; 291 break; 292 case OT_D: // Doubleword, regardless of operand-size attribute. 293 operand_bytes_ += OS_DOUBLE_WORD; 294 break; 295 case OT_DQ: // Double-quadword, regardless of operand-size attribute. 296 operand_bytes_ += OS_DOUBLE_QUAD_WORD; 297 break; 298 case OT_P: // 32-bit or 48-bit pointer, depending on operand-size 299 // attribute. 300 if (operand_is_32_bits_) 301 operand_bytes_ += OS_48_BIT_POINTER; 302 else 303 operand_bytes_ += OS_32_BIT_POINTER; 304 break; 305 case OT_PS: // 128-bit packed single-precision floating-point data. 306 operand_bytes_ += OS_128_BIT_PACKED_SINGLE_PRECISION_FLOATING; 307 break; 308 case OT_Q: // Quadword, regardless of operand-size attribute. 309 operand_bytes_ += OS_QUAD_WORD; 310 break; 311 case OT_S: // 6-byte pseudo-descriptor. 312 operand_bytes_ += OS_PSEUDO_DESCRIPTOR; 313 break; 314 case OT_SD: // Scalar Double-Precision Floating-Point Value 315 case OT_PD: // Unaligned packed double-precision floating point value 316 operand_bytes_ += OS_DOUBLE_PRECISION_FLOATING; 317 break; 318 case OT_SS: 319 // Scalar element of a 128-bit packed single-precision 320 // floating data. 321 // We simply return enItUnknown since we don't have to support 322 // floating point 323 succeeded = false; 324 break; 325 case OT_V: // Word, doubleword or quadword, depending on operand-size 326 // attribute. 327 if (operand_is_64_bits_ && flag_operand & AM_I && 328 flag_operand & IOS_64) 329 operand_bytes_ += OS_QUAD_WORD; 330 else if (operand_is_32_bits_) 331 operand_bytes_ += OS_DOUBLE_WORD; 332 else 333 operand_bytes_ += OS_WORD; 334 break; 335 case OT_W: // Word, regardless of operand-size attribute. 336 operand_bytes_ += OS_WORD; 337 break; 338 339 // Can safely ignore these. 340 case OT_A: // Two one-word operands in memory or two double-word 341 // operands in memory 342 case OT_PI: // Quadword MMX technology register (e.g. mm0) 343 case OT_SI: // Doubleword integer register (e.g., eax) 344 break; 345 346 default: 347 break; 348 } 349 break; 350 351 default: 352 break; 353 } 354 355 return succeeded; 356 } 357 358 bool MiniDisassembler::ProcessModrm(unsigned char* start_byte, 359 unsigned int& size) { 360 // If we don't need to decode, we just return the size of the ModR/M 361 // byte (there is never a SIB byte in this case). 362 if (!should_decode_modrm_) { 363 size++; 364 return true; 365 } 366 367 // We never care about the reg field, only the combination of the mod 368 // and r/m fields, so let's start by packing those fields together into 369 // 5 bits. 370 unsigned char modrm = (*start_byte); 371 unsigned char mod = modrm & 0xC0; // mask out top two bits to get mod field 372 modrm = modrm & 0x07; // mask out bottom 3 bits to get r/m field 373 mod = mod >> 3; // shift the mod field to the right place 374 modrm = mod | modrm; // combine the r/m and mod fields as discussed 375 mod = mod >> 3; // shift the mod field to bits 2..0 376 377 // Invariant: modrm contains the mod field in bits 4..3 and the r/m field 378 // in bits 2..0, and mod contains the mod field in bits 2..0 379 380 const ModrmEntry* modrm_entry = 0; 381 if (address_is_32_bits_) 382 modrm_entry = &s_ia32_modrm_map_[modrm]; 383 else 384 modrm_entry = &s_ia16_modrm_map_[modrm]; 385 386 // Invariant: modrm_entry points to information that we need to decode 387 // the ModR/M byte. 388 389 // Add to the count of operand bytes, if the ModR/M byte indicates 390 // that some operands are encoded in the instruction. 391 if (modrm_entry->is_encoded_in_instruction_) 392 operand_bytes_ += modrm_entry->operand_size_; 393 394 // Process the SIB byte if necessary, and return the count 395 // of ModR/M and SIB bytes. 396 if (modrm_entry->use_sib_byte_) { 397 size++; 398 return ProcessSib(start_byte + 1, mod, size); 399 } else { 400 size++; 401 return true; 402 } 403 } 404 405 bool MiniDisassembler::ProcessSib(unsigned char* start_byte, 406 unsigned char mod, 407 unsigned int& size) { 408 // get the mod field from the 2..0 bits of the SIB byte 409 unsigned char sib_base = (*start_byte) & 0x07; 410 if (0x05 == sib_base) { 411 switch (mod) { 412 case 0x00: // mod == 00 413 case 0x02: // mod == 10 414 operand_bytes_ += OS_DOUBLE_WORD; 415 break; 416 case 0x01: // mod == 01 417 operand_bytes_ += OS_BYTE; 418 break; 419 case 0x03: // mod == 11 420 // According to the IA-32 docs, there does not seem to be a disp 421 // value for this value of mod 422 default: 423 break; 424 } 425 } 426 427 size++; 428 return true; 429 } 430 431 }; // namespace sidestep 432