1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 /* 6 * Implementation of MiniDisassembler. 7 */ 8 9 #include "mini_disassembler.h" 10 11 namespace sidestep { 12 13 MiniDisassembler::MiniDisassembler(bool operand_default_is_32_bits, 14 bool address_default_is_32_bits) 15 : operand_default_is_32_bits_(operand_default_is_32_bits), 16 address_default_is_32_bits_(address_default_is_32_bits) { 17 Initialize(); 18 } 19 20 MiniDisassembler::MiniDisassembler() 21 : operand_default_is_32_bits_(true), 22 address_default_is_32_bits_(true) { 23 Initialize(); 24 } 25 26 InstructionType MiniDisassembler::Disassemble( 27 unsigned char* start_byte, 28 unsigned int& instruction_bytes) { 29 // Clean up any state from previous invocations. 30 Initialize(); 31 32 // Start by processing any prefixes. 33 unsigned char* current_byte = start_byte; 34 unsigned int size = 0; 35 InstructionType instruction_type = ProcessPrefixes(current_byte, size); 36 37 if (IT_UNKNOWN == instruction_type) 38 return instruction_type; 39 40 current_byte += size; 41 size = 0; 42 43 // Invariant: We have stripped all prefixes, and the operand_is_32_bits_ 44 // and address_is_32_bits_ flags are correctly set. 45 46 instruction_type = ProcessOpcode(current_byte, 0, size); 47 48 // Check for error processing instruction 49 if ((IT_UNKNOWN == instruction_type_) || (IT_UNUSED == instruction_type_)) { 50 return IT_UNKNOWN; 51 } 52 53 current_byte += size; 54 55 // Invariant: operand_bytes_ indicates the total size of operands 56 // specified by the opcode and/or ModR/M byte and/or SIB byte. 57 // pCurrentByte points to the first byte after the ModR/M byte, or after 58 // the SIB byte if it is present (i.e. the first byte of any operands 59 // encoded in the instruction). 60 61 // We get the total length of any prefixes, the opcode, and the ModR/M and 62 // SIB bytes if present, by taking the difference of the original starting 63 // address and the current byte (which points to the first byte of the 64 // operands if present, or to the first byte of the next instruction if 65 // they are not). Adding the count of bytes in the operands encoded in 66 // the instruction gives us the full length of the instruction in bytes. 67 instruction_bytes += operand_bytes_ + (current_byte - start_byte); 68 69 // Return the instruction type, which was set by ProcessOpcode(). 70 return instruction_type_; 71 } 72 73 void MiniDisassembler::Initialize() { 74 operand_is_32_bits_ = operand_default_is_32_bits_; 75 address_is_32_bits_ = address_default_is_32_bits_; 76 operand_bytes_ = 0; 77 have_modrm_ = false; 78 should_decode_modrm_ = false; 79 instruction_type_ = IT_UNKNOWN; 80 got_f2_prefix_ = false; 81 got_f3_prefix_ = false; 82 got_66_prefix_ = false; 83 } 84 85 InstructionType MiniDisassembler::ProcessPrefixes(unsigned char* start_byte, 86 unsigned int& size) { 87 InstructionType instruction_type = IT_GENERIC; 88 const Opcode& opcode = s_ia32_opcode_map_[0].table_[*start_byte]; 89 90 switch (opcode.type_) { 91 case IT_PREFIX_ADDRESS: 92 address_is_32_bits_ = !address_default_is_32_bits_; 93 goto nochangeoperand; 94 case IT_PREFIX_OPERAND: 95 operand_is_32_bits_ = !operand_default_is_32_bits_; 96 nochangeoperand: 97 case IT_PREFIX: 98 99 if (0xF2 == (*start_byte)) 100 got_f2_prefix_ = true; 101 else if (0xF3 == (*start_byte)) 102 got_f3_prefix_ = true; 103 else if (0x66 == (*start_byte)) 104 got_66_prefix_ = true; 105 106 instruction_type = opcode.type_; 107 size ++; 108 // we got a prefix, so add one and check next byte 109 ProcessPrefixes(start_byte + 1, size); 110 default: 111 break; // not a prefix byte 112 } 113 114 return instruction_type; 115 } 116 117 InstructionType MiniDisassembler::ProcessOpcode(unsigned char* start_byte, 118 unsigned int table_index, 119 unsigned int& size) { 120 const OpcodeTable& table = s_ia32_opcode_map_[table_index]; // Get our table 121 unsigned char current_byte = (*start_byte) >> table.shift_; 122 current_byte = current_byte & table.mask_; // Mask out the bits we will use 123 124 // Check whether the byte we have is inside the table we have. 125 if (current_byte < table.min_lim_ || current_byte > table.max_lim_) { 126 instruction_type_ = IT_UNKNOWN; 127 return instruction_type_; 128 } 129 130 const Opcode& opcode = table.table_[current_byte]; 131 if (IT_UNUSED == opcode.type_) { 132 // This instruction is not used by the IA-32 ISA, so we indicate 133 // this to the user. Probably means that we were pointed to 134 // a byte in memory that was not the start of an instruction. 135 instruction_type_ = IT_UNUSED; 136 return instruction_type_; 137 } else if (IT_REFERENCE == opcode.type_) { 138 // We are looking at an opcode that has more bytes (or is continued 139 // in the ModR/M byte). Recursively find the opcode definition in 140 // the table for the opcode's next byte. 141 size++; 142 ProcessOpcode(start_byte + 1, opcode.table_index_, size); 143 return instruction_type_; 144 } 145 146 const SpecificOpcode* specific_opcode = (SpecificOpcode*)&opcode; 147 if (opcode.is_prefix_dependent_) { 148 if (got_f2_prefix_ && opcode.opcode_if_f2_prefix_.mnemonic_ != 0) { 149 specific_opcode = &opcode.opcode_if_f2_prefix_; 150 } else if (got_f3_prefix_ && opcode.opcode_if_f3_prefix_.mnemonic_ != 0) { 151 specific_opcode = &opcode.opcode_if_f3_prefix_; 152 } else if (got_66_prefix_ && opcode.opcode_if_66_prefix_.mnemonic_ != 0) { 153 specific_opcode = &opcode.opcode_if_66_prefix_; 154 } 155 } 156 157 // Inv: The opcode type is known. 158 instruction_type_ = specific_opcode->type_; 159 160 // Let's process the operand types to see if we have any immediate 161 // operands, and/or a ModR/M byte. 162 163 ProcessOperand(specific_opcode->flag_dest_); 164 ProcessOperand(specific_opcode->flag_source_); 165 ProcessOperand(specific_opcode->flag_aux_); 166 167 // Inv: We have processed the opcode and incremented operand_bytes_ 168 // by the number of bytes of any operands specified by the opcode 169 // that are stored in the instruction (not registers etc.). Now 170 // we need to return the total number of bytes for the opcode and 171 // for the ModR/M or SIB bytes if they are present. 172 173 if (table.mask_ != 0xff) { 174 if (have_modrm_) { 175 // we're looking at a ModR/M byte so we're not going to 176 // count that into the opcode size 177 ProcessModrm(start_byte, size); 178 return IT_GENERIC; 179 } else { 180 // need to count the ModR/M byte even if it's just being 181 // used for opcode extension 182 size++; 183 return IT_GENERIC; 184 } 185 } else { 186 if (have_modrm_) { 187 // The ModR/M byte is the next byte. 188 size++; 189 ProcessModrm(start_byte + 1, size); 190 return IT_GENERIC; 191 } else { 192 size++; 193 return IT_GENERIC; 194 } 195 } 196 } 197 198 bool MiniDisassembler::ProcessOperand(int flag_operand) { 199 bool succeeded = true; 200 if (AM_NOT_USED == flag_operand) 201 return succeeded; 202 203 // Decide what to do based on the addressing mode. 204 switch (flag_operand & AM_MASK) { 205 // No ModR/M byte indicated by these addressing modes, and no 206 // additional (e.g. immediate) parameters. 207 case AM_A: // Direct address 208 case AM_F: // EFLAGS register 209 case AM_X: // Memory addressed by the DS:SI register pair 210 case AM_Y: // Memory addressed by the ES:DI register pair 211 case AM_IMPLICIT: // Parameter is implicit, occupies no space in 212 // instruction 213 break; 214 215 // There is a ModR/M byte but it does not necessarily need 216 // to be decoded. 217 case AM_C: // reg field of ModR/M selects a control register 218 case AM_D: // reg field of ModR/M selects a debug register 219 case AM_G: // reg field of ModR/M selects a general register 220 case AM_P: // reg field of ModR/M selects an MMX register 221 case AM_R: // mod field of ModR/M may refer only to a general register 222 case AM_S: // reg field of ModR/M selects a segment register 223 case AM_T: // reg field of ModR/M selects a test register 224 case AM_V: // reg field of ModR/M selects a 128-bit XMM register 225 have_modrm_ = true; 226 break; 227 228 // In these addressing modes, there is a ModR/M byte and it needs to be 229 // decoded. No other (e.g. immediate) params than indicated in ModR/M. 230 case AM_E: // Operand is either a general-purpose register or memory, 231 // specified by ModR/M byte 232 case AM_M: // ModR/M byte will refer only to memory 233 case AM_Q: // Operand is either an MMX register or memory (complex 234 // evaluation), specified by ModR/M byte 235 case AM_W: // Operand is either a 128-bit XMM register or memory (complex 236 // eval), specified by ModR/M byte 237 have_modrm_ = true; 238 should_decode_modrm_ = true; 239 break; 240 241 // These addressing modes specify an immediate or an offset value 242 // directly, so we need to look at the operand type to see how many 243 // bytes. 244 case AM_I: // Immediate data. 245 case AM_J: // Jump to offset. 246 case AM_O: // Operand is at offset. 247 switch (flag_operand & OT_MASK) { 248 case OT_B: // Byte regardless of operand-size attribute. 249 operand_bytes_ += OS_BYTE; 250 break; 251 case OT_C: // Byte or word, depending on operand-size attribute. 252 if (operand_is_32_bits_) 253 operand_bytes_ += OS_WORD; 254 else 255 operand_bytes_ += OS_BYTE; 256 break; 257 case OT_D: // Doubleword, regardless of operand-size attribute. 258 operand_bytes_ += OS_DOUBLE_WORD; 259 break; 260 case OT_DQ: // Double-quadword, regardless of operand-size attribute. 261 operand_bytes_ += OS_DOUBLE_QUAD_WORD; 262 break; 263 case OT_P: // 32-bit or 48-bit pointer, depending on operand-size 264 // attribute. 265 if (operand_is_32_bits_) 266 operand_bytes_ += OS_48_BIT_POINTER; 267 else 268 operand_bytes_ += OS_32_BIT_POINTER; 269 break; 270 case OT_PS: // 128-bit packed single-precision floating-point data. 271 operand_bytes_ += OS_128_BIT_PACKED_SINGLE_PRECISION_FLOATING; 272 break; 273 case OT_Q: // Quadword, regardless of operand-size attribute. 274 operand_bytes_ += OS_QUAD_WORD; 275 break; 276 case OT_S: // 6-byte pseudo-descriptor. 277 operand_bytes_ += OS_PSEUDO_DESCRIPTOR; 278 break; 279 case OT_SD: // Scalar Double-Precision Floating-Point Value 280 case OT_PD: // Unaligned packed double-precision floating point value 281 operand_bytes_ += OS_DOUBLE_PRECISION_FLOATING; 282 break; 283 case OT_SS: 284 // Scalar element of a 128-bit packed single-precision 285 // floating data. 286 // We simply return enItUnknown since we don't have to support 287 // floating point 288 succeeded = false; 289 break; 290 case OT_V: // Word or doubleword, depending on operand-size attribute. 291 if (operand_is_32_bits_) 292 operand_bytes_ += OS_DOUBLE_WORD; 293 else 294 operand_bytes_ += OS_WORD; 295 break; 296 case OT_W: // Word, regardless of operand-size attribute. 297 operand_bytes_ += OS_WORD; 298 break; 299 300 // Can safely ignore these. 301 case OT_A: // Two one-word operands in memory or two double-word 302 // operands in memory 303 case OT_PI: // Quadword MMX technology register (e.g. mm0) 304 case OT_SI: // Doubleword integer register (e.g., eax) 305 break; 306 307 default: 308 break; 309 } 310 break; 311 312 default: 313 break; 314 } 315 316 return succeeded; 317 } 318 319 bool MiniDisassembler::ProcessModrm(unsigned char* start_byte, 320 unsigned int& size) { 321 // If we don't need to decode, we just return the size of the ModR/M 322 // byte (there is never a SIB byte in this case). 323 if (!should_decode_modrm_) { 324 size++; 325 return true; 326 } 327 328 // We never care about the reg field, only the combination of the mod 329 // and r/m fields, so let's start by packing those fields together into 330 // 5 bits. 331 unsigned char modrm = (*start_byte); 332 unsigned char mod = modrm & 0xC0; // mask out top two bits to get mod field 333 modrm = modrm & 0x07; // mask out bottom 3 bits to get r/m field 334 mod = mod >> 3; // shift the mod field to the right place 335 modrm = mod | modrm; // combine the r/m and mod fields as discussed 336 mod = mod >> 3; // shift the mod field to bits 2..0 337 338 // Invariant: modrm contains the mod field in bits 4..3 and the r/m field 339 // in bits 2..0, and mod contains the mod field in bits 2..0 340 341 const ModrmEntry* modrm_entry = 0; 342 if (address_is_32_bits_) 343 modrm_entry = &s_ia32_modrm_map_[modrm]; 344 else 345 modrm_entry = &s_ia16_modrm_map_[modrm]; 346 347 // Invariant: modrm_entry points to information that we need to decode 348 // the ModR/M byte. 349 350 // Add to the count of operand bytes, if the ModR/M byte indicates 351 // that some operands are encoded in the instruction. 352 if (modrm_entry->is_encoded_in_instruction_) 353 operand_bytes_ += modrm_entry->operand_size_; 354 355 // Process the SIB byte if necessary, and return the count 356 // of ModR/M and SIB bytes. 357 if (modrm_entry->use_sib_byte_) { 358 size++; 359 return ProcessSib(start_byte + 1, mod, size); 360 } else { 361 size++; 362 return true; 363 } 364 } 365 366 bool MiniDisassembler::ProcessSib(unsigned char* start_byte, 367 unsigned char mod, 368 unsigned int& size) { 369 // get the mod field from the 2..0 bits of the SIB byte 370 unsigned char sib_base = (*start_byte) & 0x07; 371 if (0x05 == sib_base) { 372 switch (mod) { 373 case 0x00: // mod == 00 374 case 0x02: // mod == 10 375 operand_bytes_ += OS_DOUBLE_WORD; 376 break; 377 case 0x01: // mod == 01 378 operand_bytes_ += OS_BYTE; 379 break; 380 case 0x03: // mod == 11 381 // According to the IA-32 docs, there does not seem to be a disp 382 // value for this value of mod 383 default: 384 break; 385 } 386 } 387 388 size++; 389 return true; 390 } 391 392 }; // namespace sidestep 393