1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 // Implementation of MiniDisassembler. 6 7 #ifdef _WIN64 8 #error The code in this file should not be used on 64-bit Windows. 9 #endif 10 11 #include "sandbox/win/src/sidestep/mini_disassembler.h" 12 13 namespace sidestep { 14 15 MiniDisassembler::MiniDisassembler(bool operand_default_is_32_bits, 16 bool address_default_is_32_bits) 17 : operand_default_is_32_bits_(operand_default_is_32_bits), 18 address_default_is_32_bits_(address_default_is_32_bits) { 19 Initialize(); 20 } 21 22 MiniDisassembler::MiniDisassembler() 23 : operand_default_is_32_bits_(true), 24 address_default_is_32_bits_(true) { 25 Initialize(); 26 } 27 28 InstructionType MiniDisassembler::Disassemble( 29 unsigned char* start_byte, 30 unsigned int* instruction_bytes) { 31 // Clean up any state from previous invocations. 32 Initialize(); 33 34 // Start by processing any prefixes. 35 unsigned char* current_byte = start_byte; 36 unsigned int size = 0; 37 InstructionType instruction_type = ProcessPrefixes(current_byte, &size); 38 39 if (IT_UNKNOWN == instruction_type) 40 return instruction_type; 41 42 current_byte += size; 43 size = 0; 44 45 // Invariant: We have stripped all prefixes, and the operand_is_32_bits_ 46 // and address_is_32_bits_ flags are correctly set. 47 48 instruction_type = ProcessOpcode(current_byte, 0, &size); 49 50 // Check for error processing instruction 51 if ((IT_UNKNOWN == instruction_type_) || (IT_UNUSED == instruction_type_)) { 52 return IT_UNKNOWN; 53 } 54 55 current_byte += size; 56 57 // Invariant: operand_bytes_ indicates the total size of operands 58 // specified by the opcode and/or ModR/M byte and/or SIB byte. 59 // pCurrentByte points to the first byte after the ModR/M byte, or after 60 // the SIB byte if it is present (i.e. the first byte of any operands 61 // encoded in the instruction). 62 63 // We get the total length of any prefixes, the opcode, and the ModR/M and 64 // SIB bytes if present, by taking the difference of the original starting 65 // address and the current byte (which points to the first byte of the 66 // operands if present, or to the first byte of the next instruction if 67 // they are not). Adding the count of bytes in the operands encoded in 68 // the instruction gives us the full length of the instruction in bytes. 69 *instruction_bytes += operand_bytes_ + (current_byte - start_byte); 70 71 // Return the instruction type, which was set by ProcessOpcode(). 72 return instruction_type_; 73 } 74 75 void MiniDisassembler::Initialize() { 76 operand_is_32_bits_ = operand_default_is_32_bits_; 77 address_is_32_bits_ = address_default_is_32_bits_; 78 operand_bytes_ = 0; 79 have_modrm_ = false; 80 should_decode_modrm_ = false; 81 instruction_type_ = IT_UNKNOWN; 82 got_f2_prefix_ = false; 83 got_f3_prefix_ = false; 84 got_66_prefix_ = false; 85 } 86 87 InstructionType MiniDisassembler::ProcessPrefixes(unsigned char* start_byte, 88 unsigned int* size) { 89 InstructionType instruction_type = IT_GENERIC; 90 const Opcode& opcode = s_ia32_opcode_map_[0].table_[*start_byte]; 91 92 switch (opcode.type_) { 93 case IT_PREFIX_ADDRESS: 94 address_is_32_bits_ = !address_default_is_32_bits_; 95 goto nochangeoperand; 96 case IT_PREFIX_OPERAND: 97 operand_is_32_bits_ = !operand_default_is_32_bits_; 98 nochangeoperand: 99 case IT_PREFIX: 100 101 if (0xF2 == (*start_byte)) 102 got_f2_prefix_ = true; 103 else if (0xF3 == (*start_byte)) 104 got_f3_prefix_ = true; 105 else if (0x66 == (*start_byte)) 106 got_66_prefix_ = true; 107 108 instruction_type = opcode.type_; 109 (*size)++; 110 // we got a prefix, so add one and check next byte 111 ProcessPrefixes(start_byte + 1, size); 112 default: 113 break; // not a prefix byte 114 } 115 116 return instruction_type; 117 } 118 119 InstructionType MiniDisassembler::ProcessOpcode(unsigned char* start_byte, 120 unsigned int table_index, 121 unsigned int* size) { 122 const OpcodeTable& table = s_ia32_opcode_map_[table_index]; // Get our table 123 unsigned char current_byte = (*start_byte) >> table.shift_; 124 current_byte = current_byte & table.mask_; // Mask out the bits we will use 125 126 // Check whether the byte we have is inside the table we have. 127 if (current_byte < table.min_lim_ || current_byte > table.max_lim_) { 128 instruction_type_ = IT_UNKNOWN; 129 return instruction_type_; 130 } 131 132 const Opcode& opcode = table.table_[current_byte]; 133 if (IT_UNUSED == opcode.type_) { 134 // This instruction is not used by the IA-32 ISA, so we indicate 135 // this to the user. Probably means that we were pointed to 136 // a byte in memory that was not the start of an instruction. 137 instruction_type_ = IT_UNUSED; 138 return instruction_type_; 139 } else if (IT_REFERENCE == opcode.type_) { 140 // We are looking at an opcode that has more bytes (or is continued 141 // in the ModR/M byte). Recursively find the opcode definition in 142 // the table for the opcode's next byte. 143 (*size)++; 144 ProcessOpcode(start_byte + 1, opcode.table_index_, size); 145 return instruction_type_; 146 } 147 148 const SpecificOpcode* specific_opcode = reinterpret_cast< 149 const SpecificOpcode*>(&opcode); 150 if (opcode.is_prefix_dependent_) { 151 if (got_f2_prefix_ && opcode.opcode_if_f2_prefix_.mnemonic_ != 0) { 152 specific_opcode = &opcode.opcode_if_f2_prefix_; 153 } else if (got_f3_prefix_ && opcode.opcode_if_f3_prefix_.mnemonic_ != 0) { 154 specific_opcode = &opcode.opcode_if_f3_prefix_; 155 } else if (got_66_prefix_ && opcode.opcode_if_66_prefix_.mnemonic_ != 0) { 156 specific_opcode = &opcode.opcode_if_66_prefix_; 157 } 158 } 159 160 // Inv: The opcode type is known. 161 instruction_type_ = specific_opcode->type_; 162 163 // Let's process the operand types to see if we have any immediate 164 // operands, and/or a ModR/M byte. 165 166 ProcessOperand(specific_opcode->flag_dest_); 167 ProcessOperand(specific_opcode->flag_source_); 168 ProcessOperand(specific_opcode->flag_aux_); 169 170 // Inv: We have processed the opcode and incremented operand_bytes_ 171 // by the number of bytes of any operands specified by the opcode 172 // that are stored in the instruction (not registers etc.). Now 173 // we need to return the total number of bytes for the opcode and 174 // for the ModR/M or SIB bytes if they are present. 175 176 if (table.mask_ != 0xff) { 177 if (have_modrm_) { 178 // we're looking at a ModR/M byte so we're not going to 179 // count that into the opcode size 180 ProcessModrm(start_byte, size); 181 return IT_GENERIC; 182 } else { 183 // need to count the ModR/M byte even if it's just being 184 // used for opcode extension 185 (*size)++; 186 return IT_GENERIC; 187 } 188 } else { 189 if (have_modrm_) { 190 // The ModR/M byte is the next byte. 191 (*size)++; 192 ProcessModrm(start_byte + 1, size); 193 return IT_GENERIC; 194 } else { 195 (*size)++; 196 return IT_GENERIC; 197 } 198 } 199 } 200 201 bool MiniDisassembler::ProcessOperand(int flag_operand) { 202 bool succeeded = true; 203 if (AM_NOT_USED == flag_operand) 204 return succeeded; 205 206 // Decide what to do based on the addressing mode. 207 switch (flag_operand & AM_MASK) { 208 // No ModR/M byte indicated by these addressing modes, and no 209 // additional (e.g. immediate) parameters. 210 case AM_A: // Direct address 211 case AM_F: // EFLAGS register 212 case AM_X: // Memory addressed by the DS:SI register pair 213 case AM_Y: // Memory addressed by the ES:DI register pair 214 case AM_IMPLICIT: // Parameter is implicit, occupies no space in 215 // instruction 216 break; 217 218 // There is a ModR/M byte but it does not necessarily need 219 // to be decoded. 220 case AM_C: // reg field of ModR/M selects a control register 221 case AM_D: // reg field of ModR/M selects a debug register 222 case AM_G: // reg field of ModR/M selects a general register 223 case AM_P: // reg field of ModR/M selects an MMX register 224 case AM_R: // mod field of ModR/M may refer only to a general register 225 case AM_S: // reg field of ModR/M selects a segment register 226 case AM_T: // reg field of ModR/M selects a test register 227 case AM_V: // reg field of ModR/M selects a 128-bit XMM register 228 have_modrm_ = true; 229 break; 230 231 // In these addressing modes, there is a ModR/M byte and it needs to be 232 // decoded. No other (e.g. immediate) params than indicated in ModR/M. 233 case AM_E: // Operand is either a general-purpose register or memory, 234 // specified by ModR/M byte 235 case AM_M: // ModR/M byte will refer only to memory 236 case AM_Q: // Operand is either an MMX register or memory (complex 237 // evaluation), specified by ModR/M byte 238 case AM_W: // Operand is either a 128-bit XMM register or memory (complex 239 // eval), specified by ModR/M byte 240 have_modrm_ = true; 241 should_decode_modrm_ = true; 242 break; 243 244 // These addressing modes specify an immediate or an offset value 245 // directly, so we need to look at the operand type to see how many 246 // bytes. 247 case AM_I: // Immediate data. 248 case AM_J: // Jump to offset. 249 case AM_O: // Operand is at offset. 250 switch (flag_operand & OT_MASK) { 251 case OT_B: // Byte regardless of operand-size attribute. 252 operand_bytes_ += OS_BYTE; 253 break; 254 case OT_C: // Byte or word, depending on operand-size attribute. 255 if (operand_is_32_bits_) 256 operand_bytes_ += OS_WORD; 257 else 258 operand_bytes_ += OS_BYTE; 259 break; 260 case OT_D: // Doubleword, regardless of operand-size attribute. 261 operand_bytes_ += OS_DOUBLE_WORD; 262 break; 263 case OT_DQ: // Double-quadword, regardless of operand-size attribute. 264 operand_bytes_ += OS_DOUBLE_QUAD_WORD; 265 break; 266 case OT_P: // 32-bit or 48-bit pointer, depending on operand-size 267 // attribute. 268 if (operand_is_32_bits_) 269 operand_bytes_ += OS_48_BIT_POINTER; 270 else 271 operand_bytes_ += OS_32_BIT_POINTER; 272 break; 273 case OT_PS: // 128-bit packed single-precision floating-point data. 274 operand_bytes_ += OS_128_BIT_PACKED_SINGLE_PRECISION_FLOATING; 275 break; 276 case OT_Q: // Quadword, regardless of operand-size attribute. 277 operand_bytes_ += OS_QUAD_WORD; 278 break; 279 case OT_S: // 6-byte pseudo-descriptor. 280 operand_bytes_ += OS_PSEUDO_DESCRIPTOR; 281 break; 282 case OT_SD: // Scalar Double-Precision Floating-Point Value 283 case OT_PD: // Unaligned packed double-precision floating point value 284 operand_bytes_ += OS_DOUBLE_PRECISION_FLOATING; 285 break; 286 case OT_SS: 287 // Scalar element of a 128-bit packed single-precision 288 // floating data. 289 // We simply return enItUnknown since we don't have to support 290 // floating point 291 succeeded = false; 292 break; 293 case OT_V: // Word or doubleword, depending on operand-size attribute. 294 if (operand_is_32_bits_) 295 operand_bytes_ += OS_DOUBLE_WORD; 296 else 297 operand_bytes_ += OS_WORD; 298 break; 299 case OT_W: // Word, regardless of operand-size attribute. 300 operand_bytes_ += OS_WORD; 301 break; 302 303 // Can safely ignore these. 304 case OT_A: // Two one-word operands in memory or two double-word 305 // operands in memory 306 case OT_PI: // Quadword MMX technology register (e.g. mm0) 307 case OT_SI: // Doubleword integer register (e.g., eax) 308 break; 309 310 default: 311 break; 312 } 313 break; 314 315 default: 316 break; 317 } 318 319 return succeeded; 320 } 321 322 bool MiniDisassembler::ProcessModrm(unsigned char* start_byte, 323 unsigned int* size) { 324 // If we don't need to decode, we just return the size of the ModR/M 325 // byte (there is never a SIB byte in this case). 326 if (!should_decode_modrm_) { 327 (*size)++; 328 return true; 329 } 330 331 // We never care about the reg field, only the combination of the mod 332 // and r/m fields, so let's start by packing those fields together into 333 // 5 bits. 334 unsigned char modrm = (*start_byte); 335 unsigned char mod = modrm & 0xC0; // mask out top two bits to get mod field 336 modrm = modrm & 0x07; // mask out bottom 3 bits to get r/m field 337 mod = mod >> 3; // shift the mod field to the right place 338 modrm = mod | modrm; // combine the r/m and mod fields as discussed 339 mod = mod >> 3; // shift the mod field to bits 2..0 340 341 // Invariant: modrm contains the mod field in bits 4..3 and the r/m field 342 // in bits 2..0, and mod contains the mod field in bits 2..0 343 344 const ModrmEntry* modrm_entry = 0; 345 if (address_is_32_bits_) 346 modrm_entry = &s_ia32_modrm_map_[modrm]; 347 else 348 modrm_entry = &s_ia16_modrm_map_[modrm]; 349 350 // Invariant: modrm_entry points to information that we need to decode 351 // the ModR/M byte. 352 353 // Add to the count of operand bytes, if the ModR/M byte indicates 354 // that some operands are encoded in the instruction. 355 if (modrm_entry->is_encoded_in_instruction_) 356 operand_bytes_ += modrm_entry->operand_size_; 357 358 // Process the SIB byte if necessary, and return the count 359 // of ModR/M and SIB bytes. 360 if (modrm_entry->use_sib_byte_) { 361 (*size)++; 362 return ProcessSib(start_byte + 1, mod, size); 363 } else { 364 (*size)++; 365 return true; 366 } 367 } 368 369 bool MiniDisassembler::ProcessSib(unsigned char* start_byte, 370 unsigned char mod, 371 unsigned int* size) { 372 // get the mod field from the 2..0 bits of the SIB byte 373 unsigned char sib_base = (*start_byte) & 0x07; 374 if (0x05 == sib_base) { 375 switch (mod) { 376 case 0x00: // mod == 00 377 case 0x02: // mod == 10 378 operand_bytes_ += OS_DOUBLE_WORD; 379 break; 380 case 0x01: // mod == 01 381 operand_bytes_ += OS_BYTE; 382 break; 383 case 0x03: // mod == 11 384 // According to the IA-32 docs, there does not seem to be a disp 385 // value for this value of mod 386 default: 387 break; 388 } 389 } 390 391 (*size)++; 392 return true; 393 } 394 395 }; // namespace sidestep 396