1 #include <stdio.h> 2 #include <stdlib.h> 3 #include <string.h> 4 5 #include "libdis.h" 6 #include "ia32_insn.h" 7 #include "ia32_operand.h" 8 #include "ia32_modrm.h" 9 #include "ia32_reg.h" 10 #include "x86_imm.h" 11 #include "x86_operand_list.h" 12 13 14 15 /* apply segment override to memory operand in insn */ 16 static void apply_seg( x86_op_t *op, unsigned int prefixes ) { 17 if (! prefixes ) return; 18 19 /* apply overrides from prefix */ 20 switch ( prefixes & PREFIX_REG_MASK ) { 21 case PREFIX_CS: 22 op->flags |= op_cs_seg; break; 23 case PREFIX_SS: 24 op->flags |= op_ss_seg; break; 25 case PREFIX_DS: 26 op->flags |= op_ds_seg; break; 27 case PREFIX_ES: 28 op->flags |= op_es_seg; break; 29 case PREFIX_FS: 30 op->flags |= op_fs_seg; break; 31 case PREFIX_GS: 32 op->flags |= op_gs_seg; break; 33 } 34 35 return; 36 } 37 38 static size_t decode_operand_value( unsigned char *buf, size_t buf_len, 39 x86_op_t *op, x86_insn_t *insn, 40 unsigned int addr_meth, size_t op_size, 41 unsigned int op_value, unsigned char modrm, 42 size_t gen_regs ) { 43 size_t size = 0; 44 45 /* ++ Do Operand Addressing Method / Decode operand ++ */ 46 switch (addr_meth) { 47 /* This sets the operand Size based on the Intel Opcode Map 48 * (Vol 2, Appendix A). Letter encodings are from section 49 * A.1.1, 'Codes for Addressing Method' */ 50 51 /* ---------------------- Addressing Method -------------- */ 52 /* Note that decoding mod ModR/M operand adjusts the size of 53 * the instruction, but decoding the reg operand does not. 54 * This should not cause any problems, as every 'reg' operand 55 * has an associated 'mod' operand. 56 * Goddamn-Intel-Note: 57 * Some Intel addressing methods [M, R] specify that modR/M 58 * byte may only refer to a memory address/may only refer to 59 * a register -- however Intel provides no clues on what to do 60 * if, say, the modR/M for an M opcode decodes to a register 61 * rather than a memory address ... returning 0 is out of the 62 * question, as this would be an Immediate or a RelOffset, so 63 * instead these modR/Ms are decoded with total disregard to 64 * the M, R constraints. */ 65 66 /* MODRM -- mod operand. sets size to at least 1! */ 67 case ADDRMETH_E: /* ModR/M present, Gen reg or memory */ 68 size = ia32_modrm_decode( buf, buf_len, op, insn, 69 gen_regs ); 70 break; 71 case ADDRMETH_M: /* ModR/M only refers to memory */ 72 size = ia32_modrm_decode( buf, buf_len, op, insn, 73 gen_regs ); 74 break; 75 case ADDRMETH_Q: /* ModR/M present, MMX or Memory */ 76 size = ia32_modrm_decode( buf, buf_len, op, insn, 77 REG_MMX_OFFSET ); 78 break; 79 case ADDRMETH_R: /* ModR/M mod == gen reg */ 80 size = ia32_modrm_decode( buf, buf_len, op, insn, 81 gen_regs ); 82 break; 83 case ADDRMETH_W: /* ModR/M present, mem or SIMD reg */ 84 size = ia32_modrm_decode( buf, buf_len, op, insn, 85 REG_SIMD_OFFSET ); 86 break; 87 88 /* MODRM -- reg operand. does not effect size! */ 89 case ADDRMETH_C: /* ModR/M reg == control reg */ 90 ia32_reg_decode( modrm, op, REG_CTRL_OFFSET ); 91 break; 92 case ADDRMETH_D: /* ModR/M reg == debug reg */ 93 ia32_reg_decode( modrm, op, REG_DEBUG_OFFSET ); 94 break; 95 case ADDRMETH_G: /* ModR/M reg == gen-purpose reg */ 96 ia32_reg_decode( modrm, op, gen_regs ); 97 break; 98 case ADDRMETH_P: /* ModR/M reg == qword MMX reg */ 99 ia32_reg_decode( modrm, op, REG_MMX_OFFSET ); 100 break; 101 case ADDRMETH_S: /* ModR/M reg == segment reg */ 102 ia32_reg_decode( modrm, op, REG_SEG_OFFSET ); 103 break; 104 case ADDRMETH_T: /* ModR/M reg == test reg */ 105 ia32_reg_decode( modrm, op, REG_TEST_OFFSET ); 106 break; 107 case ADDRMETH_V: /* ModR/M reg == SIMD reg */ 108 ia32_reg_decode( modrm, op, REG_SIMD_OFFSET ); 109 break; 110 111 /* No MODRM : note these set operand type explicitly */ 112 case ADDRMETH_A: /* No modR/M -- direct addr */ 113 op->type = op_absolute; 114 115 /* segment:offset address used in far calls */ 116 x86_imm_sized( buf, buf_len, 117 &op->data.absolute.segment, 2 ); 118 if ( insn->addr_size == 4 ) { 119 x86_imm_sized( buf, buf_len, 120 &op->data.absolute.offset.off32, 4 ); 121 size = 6; 122 } else { 123 x86_imm_sized( buf, buf_len, 124 &op->data.absolute.offset.off16, 2 ); 125 size = 4; 126 } 127 128 break; 129 case ADDRMETH_I: /* Immediate val */ 130 op->type = op_immediate; 131 /* if it ever becomes legal to have imm as dest and 132 * there is a src ModR/M operand, we are screwed! */ 133 if ( op->flags & op_signed ) { 134 x86_imm_signsized(buf, buf_len, &op->data.byte, 135 op_size); 136 } else { 137 x86_imm_sized(buf, buf_len, &op->data.byte, 138 op_size); 139 } 140 size = op_size; 141 break; 142 case ADDRMETH_J: /* Rel offset to add to IP [jmp] */ 143 /* this fills op->data.near_offset or 144 op->data.far_offset depending on the size of 145 the operand */ 146 op->flags |= op_signed; 147 if ( op_size == 1 ) { 148 /* one-byte near offset */ 149 op->type = op_relative_near; 150 x86_imm_signsized(buf, buf_len, 151 &op->data.relative_near, 1); 152 } else { 153 /* far offset...is this truly signed? */ 154 op->type = op_relative_far; 155 x86_imm_signsized(buf, buf_len, 156 &op->data.relative_far, op_size ); 157 } 158 size = op_size; 159 break; 160 case ADDRMETH_O: /* No ModR/M; op is word/dword offset */ 161 /* NOTE: these are actually RVAs not offsets to seg!! */ 162 /* note bene: 'O' ADDR_METH uses addr_size to 163 determine operand size */ 164 op->type = op_offset; 165 op->flags |= op_pointer; 166 x86_imm_sized( buf, buf_len, &op->data.offset, 167 insn->addr_size ); 168 169 size = insn->addr_size; 170 break; 171 172 /* Hard-coded: these are specified in the insn definition */ 173 case ADDRMETH_F: /* EFLAGS register */ 174 op->type = op_register; 175 op->flags |= op_hardcode; 176 ia32_handle_register( &op->data.reg, REG_FLAGS_INDEX ); 177 break; 178 case ADDRMETH_X: /* Memory addressed by DS:SI [string] */ 179 op->type = op_expression; 180 op->flags |= op_hardcode; 181 op->flags |= op_ds_seg | op_pointer | op_string; 182 ia32_handle_register( &op->data.expression.base, 183 REG_DWORD_OFFSET + 6 ); 184 break; 185 case ADDRMETH_Y: /* Memory addressed by ES:DI [string] */ 186 op->type = op_expression; 187 op->flags |= op_hardcode; 188 op->flags |= op_es_seg | op_pointer | op_string; 189 ia32_handle_register( &op->data.expression.base, 190 REG_DWORD_OFFSET + 7 ); 191 break; 192 case ADDRMETH_RR: /* Gen Register hard-coded in opcode */ 193 op->type = op_register; 194 op->flags |= op_hardcode; 195 ia32_handle_register( &op->data.reg, 196 op_value + gen_regs ); 197 break; 198 case ADDRMETH_RS: /* Seg Register hard-coded in opcode */ 199 op->type = op_register; 200 op->flags |= op_hardcode; 201 ia32_handle_register( &op->data.reg, 202 op_value + REG_SEG_OFFSET ); 203 break; 204 case ADDRMETH_RF: /* FPU Register hard-coded in opcode */ 205 op->type = op_register; 206 op->flags |= op_hardcode; 207 ia32_handle_register( &op->data.reg, 208 op_value + REG_FPU_OFFSET ); 209 break; 210 case ADDRMETH_RT: /* TST Register hard-coded in opcode */ 211 op->type = op_register; 212 op->flags |= op_hardcode; 213 ia32_handle_register( &op->data.reg, 214 op_value + REG_TEST_OFFSET ); 215 break; 216 case ADDRMETH_II: /* Immediate hard-coded in opcode */ 217 op->type = op_immediate; 218 op->data.dword = op_value; 219 op->flags |= op_hardcode; 220 break; 221 222 case 0: /* Operand is not used */ 223 default: 224 /* ignore -- operand not used in this insn */ 225 op->type = op_unused; /* this shouldn't happen! */ 226 break; 227 } 228 229 return size; 230 } 231 232 static size_t decode_operand_size( unsigned int op_type, x86_insn_t *insn, 233 x86_op_t *op ){ 234 size_t size; 235 236 /* ++ Do Operand Type ++ */ 237 switch (op_type) { 238 /* This sets the operand Size based on the Intel Opcode Map 239 * (Vol 2, Appendix A). Letter encodings are from section 240 * A.1.2, 'Codes for Operand Type' */ 241 /* NOTE: in this routines, 'size' refers to the size 242 * of the operand in the raw (encoded) instruction; 243 * 'datatype' stores the actual size and datatype 244 * of the operand */ 245 246 /* ------------------------ Operand Type ----------------- */ 247 case OPTYPE_c: /* byte or word [op size attr] */ 248 size = (insn->op_size == 4) ? 2 : 1; 249 op->datatype = (size == 4) ? op_word : op_byte; 250 break; 251 case OPTYPE_a: /* 2 word or 2 dword [op size attr] */ 252 /* pointer to a 16:16 or 32:32 BOUNDS operand */ 253 size = (insn->op_size == 4) ? 8 : 4; 254 op->datatype = (size == 4) ? op_bounds32 : op_bounds16; 255 break; 256 case OPTYPE_v: /* word or dword [op size attr] */ 257 size = (insn->op_size == 4) ? 4 : 2; 258 op->datatype = (size == 4) ? op_dword : op_word; 259 break; 260 case OPTYPE_p: /* 32/48-bit ptr [op size attr] */ 261 /* technically these flags are not accurate: the 262 * value s a 16:16 pointer or a 16:32 pointer, where 263 * the first '16' is a segment */ 264 size = (insn->addr_size == 4) ? 6 : 4; 265 op->datatype = (size == 4) ? op_descr32 : op_descr16; 266 break; 267 case OPTYPE_b: /* byte, ignore op-size */ 268 size = 1; 269 op->datatype = op_byte; 270 break; 271 case OPTYPE_w: /* word, ignore op-size */ 272 size = 2; 273 op->datatype = op_word; 274 break; 275 case OPTYPE_d: /* dword , ignore op-size */ 276 size = 4; 277 op->datatype = op_dword; 278 break; 279 case OPTYPE_s: /* 6-byte psuedo-descriptor */ 280 /* ptr to 6-byte value which is 32:16 in 32-bit 281 * mode, or 8:24:16 in 16-bit mode. The high byte 282 * is ignored in 16-bit mode. */ 283 size = 6; 284 op->datatype = (insn->addr_size == 4) ? 285 op_pdescr32 : op_pdescr16; 286 break; 287 case OPTYPE_q: /* qword, ignore op-size */ 288 size = 8; 289 op->datatype = op_qword; 290 break; 291 case OPTYPE_dq: /* d-qword, ignore op-size */ 292 size = 16; 293 op->datatype = op_dqword; 294 break; 295 case OPTYPE_ps: /* 128-bit FP data */ 296 size = 16; 297 /* really this is 4 packed SP FP values */ 298 op->datatype = op_ssimd; 299 break; 300 case OPTYPE_pd: /* 128-bit FP data */ 301 size = 16; 302 /* really this is 2 packed DP FP values */ 303 op->datatype = op_dsimd; 304 break; 305 case OPTYPE_ss: /* Scalar elem of 128-bit FP data */ 306 size = 16; 307 /* this only looks at the low dword (4 bytes) 308 * of the xmmm register passed as a param. 309 * This is a 16-byte register where only 4 bytes 310 * are used in the insn. Painful, ain't it? */ 311 op->datatype = op_sssimd; 312 break; 313 case OPTYPE_sd: /* Scalar elem of 128-bit FP data */ 314 size = 16; 315 /* this only looks at the low qword (8 bytes) 316 * of the xmmm register passed as a param. 317 * This is a 16-byte register where only 8 bytes 318 * are used in the insn. Painful, again... */ 319 op->datatype = op_sdsimd; 320 break; 321 case OPTYPE_pi: /* qword mmx register */ 322 size = 8; 323 op->datatype = op_qword; 324 break; 325 case OPTYPE_si: /* dword integer register */ 326 size = 4; 327 op->datatype = op_dword; 328 break; 329 case OPTYPE_fs: /* single-real */ 330 size = 4; 331 op->datatype = op_sreal; 332 break; 333 case OPTYPE_fd: /* double real */ 334 size = 8; 335 op->datatype = op_dreal; 336 break; 337 case OPTYPE_fe: /* extended real */ 338 size = 10; 339 op->datatype = op_extreal; 340 break; 341 case OPTYPE_fb: /* packed BCD */ 342 size = 10; 343 op->datatype = op_bcd; 344 break; 345 case OPTYPE_fv: /* pointer to FPU env: 14 or 28-bytes */ 346 size = (insn->addr_size == 4)? 28 : 14; 347 op->datatype = (size == 28)? op_fpuenv32: op_fpuenv16; 348 break; 349 case OPTYPE_ft: /* pointer to FPU env: 94 or 108 bytes */ 350 size = (insn->addr_size == 4)? 108 : 94; 351 op->datatype = (size == 108)? 352 op_fpustate32: op_fpustate16; 353 break; 354 case OPTYPE_fx: /* 512-byte register stack */ 355 size = 512; 356 op->datatype = op_fpregset; 357 break; 358 case OPTYPE_fp: /* floating point register */ 359 size = 10; /* double extended precision */ 360 op->datatype = op_fpreg; 361 break; 362 case OPTYPE_m: /* fake operand type used for "lea Gv, M" */ 363 size = insn->addr_size; 364 op->datatype = (size == 4) ? op_dword : op_word; 365 break; 366 case OPTYPE_none: /* handle weird instructions that have no encoding but use a dword datatype, like invlpg */ 367 size = 0; 368 op->datatype = op_none; 369 break; 370 case 0: 371 default: 372 size = insn->op_size; 373 op->datatype = (size == 4) ? op_dword : op_word; 374 break; 375 } 376 return size; 377 } 378 379 size_t ia32_decode_operand( unsigned char *buf, size_t buf_len, 380 x86_insn_t *insn, unsigned int raw_op, 381 unsigned int raw_flags, unsigned int prefixes, 382 unsigned char modrm ) { 383 unsigned int addr_meth, op_type, op_size, gen_regs; 384 x86_op_t *op; 385 size_t size; 386 387 /* ++ Yank optype and addr mode out of operand flags */ 388 addr_meth = raw_flags & ADDRMETH_MASK; 389 op_type = raw_flags & OPTYPE_MASK; 390 391 if ( raw_flags == ARG_NONE ) { 392 /* operand is not used in this instruction */ 393 return 0; 394 } 395 396 /* allocate a new operand */ 397 op = x86_operand_new( insn ); 398 399 /* ++ Copy flags from opcode table to x86_insn_t */ 400 op->access = (enum x86_op_access) OP_PERM(raw_flags); 401 op->flags = (enum x86_op_flags) (OP_FLAGS(raw_flags) >> 12); 402 403 /* Get size (for decoding) and datatype of operand */ 404 op_size = decode_operand_size(op_type, insn, op); 405 406 /* override default register set based on Operand Type */ 407 /* this allows mixing of 8, 16, and 32 bit regs in insn */ 408 if (op_size == 1) { 409 gen_regs = REG_BYTE_OFFSET; 410 } else if (op_size == 2) { 411 gen_regs = REG_WORD_OFFSET; 412 } else { 413 gen_regs = REG_DWORD_OFFSET; 414 } 415 416 size = decode_operand_value( buf, buf_len, op, insn, addr_meth, 417 op_size, raw_op, modrm, gen_regs ); 418 419 /* if operand is an address, apply any segment override prefixes */ 420 if ( op->type == op_expression || op->type == op_offset ) { 421 apply_seg(op, prefixes); 422 } 423 424 return size; /* return number of bytes in instruction */ 425 } 426