1 #include <stdio.h> 2 #include <stdlib.h> 3 #include <string.h> 4 #include "qword.h" 5 6 #include "ia32_insn.h" 7 #include "ia32_opcode_tables.h" 8 9 #include "ia32_reg.h" 10 #include "ia32_operand.h" 11 #include "ia32_implicit.h" 12 #include "ia32_settings.h" 13 14 #include "libdis.h" 15 16 extern ia32_table_desc_t ia32_tables[]; 17 extern ia32_settings_t ia32_settings; 18 19 #define IS_SP( op ) (op->type == op_register && \ 20 (op->data.reg.id == REG_ESP_INDEX || \ 21 op->data.reg.alias == REG_ESP_INDEX) ) 22 #define IS_IMM( op ) (op->type == op_immediate ) 23 24 #ifdef WIN32 25 # define INLINE 26 #else 27 # define INLINE inline 28 #endif 29 30 /* for calculating stack modification based on an operand */ 31 static INLINE int32_t long_from_operand( x86_op_t *op ) { 32 33 if (! IS_IMM(op) ) { 34 return 0L; 35 } 36 37 switch ( op->datatype ) { 38 case op_byte: 39 return (int32_t) op->data.sbyte; 40 case op_word: 41 return (int32_t) op->data.sword; 42 case op_qword: 43 return (int32_t) op->data.sqword; 44 case op_dword: 45 return op->data.sdword; 46 default: 47 /* these are not used in stack insn */ 48 break; 49 } 50 51 return 0L; 52 } 53 54 55 /* determine what this insn does to the stack */ 56 static void ia32_stack_mod(x86_insn_t *insn) { 57 x86_op_t *dest, *src = NULL; 58 59 if (! insn || ! insn->operands ) { 60 return; 61 } 62 63 dest = &insn->operands->op; 64 if ( dest ) { 65 src = &insn->operands->next->op; 66 } 67 68 insn->stack_mod = 0; 69 insn->stack_mod_val = 0; 70 71 switch ( insn->type ) { 72 case insn_call: 73 case insn_callcc: 74 insn->stack_mod = 1; 75 insn->stack_mod_val = insn->addr_size * -1; 76 break; 77 case insn_push: 78 insn->stack_mod = 1; 79 insn->stack_mod_val = insn->addr_size * -1; 80 break; 81 case insn_return: 82 insn->stack_mod = 1; 83 insn->stack_mod_val = insn->addr_size; 84 case insn_int: case insn_intcc: 85 case insn_iret: 86 break; 87 case insn_pop: 88 insn->stack_mod = 1; 89 if (! IS_SP( dest ) ) { 90 insn->stack_mod_val = insn->op_size; 91 } /* else we don't know the stack change in a pop esp */ 92 break; 93 case insn_enter: 94 insn->stack_mod = 1; 95 insn->stack_mod_val = 0; /* TODO : FIX */ 96 break; 97 case insn_leave: 98 insn->stack_mod = 1; 99 insn->stack_mod_val = 0; /* TODO : FIX */ 100 break; 101 case insn_pushregs: 102 insn->stack_mod = 1; 103 insn->stack_mod_val = 0; /* TODO : FIX */ 104 break; 105 case insn_popregs: 106 insn->stack_mod = 1; 107 insn->stack_mod_val = 0; /* TODO : FIX */ 108 break; 109 case insn_pushflags: 110 insn->stack_mod = 1; 111 insn->stack_mod_val = 0; /* TODO : FIX */ 112 break; 113 case insn_popflags: 114 insn->stack_mod = 1; 115 insn->stack_mod_val = 0; /* TODO : FIX */ 116 break; 117 case insn_add: 118 if ( IS_SP( dest ) ) { 119 insn->stack_mod = 1; 120 insn->stack_mod_val = long_from_operand( src ); 121 } 122 break; 123 case insn_sub: 124 if ( IS_SP( dest ) ) { 125 insn->stack_mod = 1; 126 insn->stack_mod_val = long_from_operand( src ); 127 insn->stack_mod_val *= -1; 128 } 129 break; 130 case insn_inc: 131 if ( IS_SP( dest ) ) { 132 insn->stack_mod = 1; 133 insn->stack_mod_val = 1; 134 } 135 break; 136 case insn_dec: 137 if ( IS_SP( dest ) ) { 138 insn->stack_mod = 1; 139 insn->stack_mod_val = 1; 140 } 141 break; 142 case insn_mov: case insn_movcc: 143 case insn_xchg: case insn_xchgcc: 144 case insn_mul: case insn_div: 145 case insn_shl: case insn_shr: 146 case insn_rol: case insn_ror: 147 case insn_and: case insn_or: 148 case insn_not: case insn_neg: 149 case insn_xor: 150 if ( IS_SP( dest ) ) { 151 insn->stack_mod = 1; 152 } 153 break; 154 default: 155 break; 156 } 157 if (! strcmp("enter", insn->mnemonic) ) { 158 insn->stack_mod = 1; 159 } else if (! strcmp("leave", insn->mnemonic) ) { 160 insn->stack_mod = 1; 161 } 162 163 /* for mov, etc we return 0 -- unknown stack mod */ 164 165 return; 166 } 167 168 /* get the cpu details for this insn from cpu flags int */ 169 static void ia32_handle_cpu( x86_insn_t *insn, unsigned int cpu ) { 170 insn->cpu = (enum x86_insn_cpu) CPU_MODEL(cpu); 171 insn->isa = (enum x86_insn_isa) (ISA_SUBSET(cpu)) >> 16; 172 return; 173 } 174 175 /* handle mnemonic type and group */ 176 static void ia32_handle_mnemtype(x86_insn_t *insn, unsigned int mnemtype) { 177 unsigned int type = mnemtype & ~INS_FLAG_MASK; 178 insn->group = (enum x86_insn_group) (INS_GROUP(type)) >> 12; 179 insn->type = (enum x86_insn_type) INS_TYPE(type); 180 181 return; 182 } 183 184 static void ia32_handle_notes(x86_insn_t *insn, unsigned int notes) { 185 insn->note = (enum x86_insn_note) notes; 186 return; 187 } 188 189 static void ia32_handle_eflags( x86_insn_t *insn, unsigned int eflags) { 190 unsigned int flags; 191 192 /* handle flags effected */ 193 flags = INS_FLAGS_TEST(eflags); 194 /* handle weird OR cases */ 195 /* these are either JLE (ZF | SF<>OF) or JBE (CF | ZF) */ 196 if (flags & INS_TEST_OR) { 197 flags &= ~INS_TEST_OR; 198 if ( flags & INS_TEST_ZERO ) { 199 flags &= ~INS_TEST_ZERO; 200 if ( flags & INS_TEST_CARRY ) { 201 flags &= ~INS_TEST_CARRY ; 202 flags |= (int)insn_carry_or_zero_set; 203 } else if ( flags & INS_TEST_SFNEOF ) { 204 flags &= ~INS_TEST_SFNEOF; 205 flags |= (int)insn_zero_set_or_sign_ne_oflow; 206 } 207 } 208 } 209 insn->flags_tested = (enum x86_flag_status) flags; 210 211 insn->flags_set = (enum x86_flag_status) INS_FLAGS_SET(eflags) >> 16; 212 213 return; 214 } 215 216 static void ia32_handle_prefix( x86_insn_t *insn, unsigned int prefixes ) { 217 218 insn->prefix = (enum x86_insn_prefix) prefixes & PREFIX_MASK; // >> 20; 219 if (! (insn->prefix & PREFIX_PRINT_MASK) ) { 220 /* no printable prefixes */ 221 insn->prefix = insn_no_prefix; 222 } 223 224 /* concat all prefix strings */ 225 if ( (unsigned int)insn->prefix & PREFIX_LOCK ) { 226 strncat(insn->prefix_string, "lock ", 32 - 227 strlen(insn->prefix_string)); 228 } 229 230 if ( (unsigned int)insn->prefix & PREFIX_REPNZ ) { 231 strncat(insn->prefix_string, "repnz ", 32 - 232 strlen(insn->prefix_string)); 233 } else if ( (unsigned int)insn->prefix & PREFIX_REPZ ) { 234 strncat(insn->prefix_string, "repz ", 32 - 235 strlen(insn->prefix_string)); 236 } 237 238 return; 239 } 240 241 242 static void reg_32_to_16( x86_op_t *op, x86_insn_t *insn, void *arg ) { 243 244 /* if this is a 32-bit register and it is a general register ... */ 245 if ( op->type == op_register && op->data.reg.size == 4 && 246 (op->data.reg.type & reg_gen) ) { 247 /* WORD registers are 8 indices off from DWORD registers */ 248 ia32_handle_register( &(op->data.reg), 249 op->data.reg.id + 8 ); 250 } 251 } 252 253 static void handle_insn_metadata( x86_insn_t *insn, ia32_insn_t *raw_insn ) { 254 ia32_handle_mnemtype( insn, raw_insn->mnem_flag ); 255 ia32_handle_notes( insn, raw_insn->notes ); 256 ia32_handle_eflags( insn, raw_insn->flags_effected ); 257 ia32_handle_cpu( insn, raw_insn->cpu ); 258 ia32_stack_mod( insn ); 259 } 260 261 static size_t ia32_decode_insn( unsigned char *buf, size_t buf_len, 262 ia32_insn_t *raw_insn, x86_insn_t *insn, 263 unsigned int prefixes ) { 264 size_t size, op_size; 265 unsigned char modrm; 266 267 /* this should never happen, but just in case... */ 268 if ( raw_insn->mnem_flag == INS_INVALID ) { 269 return 0; 270 } 271 272 if (ia32_settings.options & opt_16_bit) { 273 insn->op_size = ( prefixes & PREFIX_OP_SIZE ) ? 4 : 2; 274 insn->addr_size = ( prefixes & PREFIX_ADDR_SIZE ) ? 4 : 2; 275 } else { 276 insn->op_size = ( prefixes & PREFIX_OP_SIZE ) ? 2 : 4; 277 insn->addr_size = ( prefixes & PREFIX_ADDR_SIZE ) ? 2 : 4; 278 } 279 280 281 /* ++++ 1. Copy mnemonic and mnemonic-flags to CODE struct */ 282 if ((ia32_settings.options & opt_att_mnemonics) && raw_insn->mnemonic_att[0]) { 283 strncpy( insn->mnemonic, raw_insn->mnemonic_att, 16 ); 284 } 285 else { 286 strncpy( insn->mnemonic, raw_insn->mnemonic, 16 ); 287 } 288 ia32_handle_prefix( insn, prefixes ); 289 290 handle_insn_metadata( insn, raw_insn ); 291 292 /* prefetch the next byte in case it is a modr/m byte -- saves 293 * worrying about whether the 'mod/rm' operand or the 'reg' operand 294 * occurs first */ 295 modrm = GET_BYTE( buf, buf_len ); 296 297 /* ++++ 2. Decode Explicit Operands */ 298 /* Intel uses up to 3 explicit operands in its instructions; 299 * the first is 'dest', the second is 'src', and the third 300 * is an additional source value (usually an immediate value, 301 * e.g. in the MUL instructions). These three explicit operands 302 * are encoded in the opcode tables, even if they are not used 303 * by the instruction. Additional implicit operands are stored 304 * in a supplemental table and are handled later. */ 305 306 op_size = ia32_decode_operand( buf, buf_len, insn, raw_insn->dest, 307 raw_insn->dest_flag, prefixes, modrm ); 308 /* advance buffer, increase size if necessary */ 309 buf += op_size; 310 buf_len -= op_size; 311 size = op_size; 312 313 op_size = ia32_decode_operand( buf, buf_len, insn, raw_insn->src, 314 raw_insn->src_flag, prefixes, modrm ); 315 buf += op_size; 316 buf_len -= op_size; 317 size += op_size; 318 319 op_size = ia32_decode_operand( buf, buf_len, insn, raw_insn->aux, 320 raw_insn->aux_flag, prefixes, modrm ); 321 size += op_size; 322 323 324 /* ++++ 3. Decode Implicit Operands */ 325 /* apply implicit operands */ 326 ia32_insn_implicit_ops( insn, raw_insn->implicit_ops ); 327 /* we have one small inelegant hack here, to deal with 328 * the two prefixes that have implicit operands. If Intel 329 * adds more, we'll change the algorithm to suit :) */ 330 if ( (prefixes & PREFIX_REPZ) || (prefixes & PREFIX_REPNZ) ) { 331 ia32_insn_implicit_ops( insn, IDX_IMPLICIT_REP ); 332 } 333 334 335 /* 16-bit hack: foreach operand, if 32-bit reg, make 16-bit reg */ 336 if ( insn->op_size == 2 ) { 337 x86_operand_foreach( insn, reg_32_to_16, NULL, op_any ); 338 } 339 340 return size; 341 } 342 343 344 /* convenience routine */ 345 #define USES_MOD_RM(flag) \ 346 (flag == ADDRMETH_E || flag == ADDRMETH_M || flag == ADDRMETH_Q || \ 347 flag == ADDRMETH_W || flag == ADDRMETH_R) 348 349 static int uses_modrm_flag( unsigned int flag ) { 350 unsigned int meth; 351 if ( flag == ARG_NONE ) { 352 return 0; 353 } 354 meth = (flag & ADDRMETH_MASK); 355 if ( USES_MOD_RM(meth) ) { 356 return 1; 357 } 358 359 return 0; 360 } 361 362 /* This routine performs the actual byte-by-byte opcode table lookup. 363 * Originally it was pretty simple: get a byte, adjust it to a proper 364 * index into the table, then check the table row at that index to 365 * determine what to do next. But is anything that simple with Intel? 366 * This is now a huge, convoluted mess, mostly of bitter comments. */ 367 /* buf: pointer to next byte to read from stream 368 * buf_len: length of buf 369 * table: index of table to use for lookups 370 * raw_insn: output pointer that receives opcode definition 371 * prefixes: output integer that is encoded with prefixes in insn 372 * returns : number of bytes consumed from stream during lookup */ 373 size_t ia32_table_lookup( unsigned char *buf, size_t buf_len, 374 unsigned int table, ia32_insn_t **raw_insn, 375 unsigned int *prefixes ) { 376 unsigned char *next, op = buf[0]; /* byte value -- 'opcode' */ 377 size_t size = 1, sub_size = 0, next_len; 378 ia32_table_desc_t *table_desc; 379 unsigned int subtable, prefix = 0, recurse_table = 0; 380 381 table_desc = &ia32_tables[table]; 382 383 op = GET_BYTE( buf, buf_len ); 384 385 if ( table_desc->type == tbl_fpu && op > table_desc->maxlim) { 386 /* one of the fucking FPU tables out of the 00-BH range */ 387 /* OK,. this is a bit of a hack -- the proper way would 388 * have been to use subtables in the 00-BF FPU opcode tables, 389 * but that is rather wasteful of space... */ 390 table_desc = &ia32_tables[table +1]; 391 } 392 393 /* PERFORM TABLE LOOKUP */ 394 395 /* ModR/M trick: shift extension bits into lowest bits of byte */ 396 /* Note: non-ModR/M tables have a shift value of 0 */ 397 op >>= table_desc->shift; 398 399 /* ModR/M trick: mask out high bits to turn extension into an index */ 400 /* Note: non-ModR/M tables have a mask value of 0xFF */ 401 op &= table_desc->mask; 402 403 404 /* Sparse table trick: check that byte is <= max value */ 405 /* Note: full (256-entry) tables have a maxlim of 155 */ 406 if ( op > table_desc->maxlim ) { 407 /* this is a partial table, truncated at the tail, 408 and op is out of range! */ 409 return INVALID_INSN; 410 } 411 412 /* Sparse table trick: check that byte is >= min value */ 413 /* Note: full (256-entry) tables have a minlim of 0 */ 414 if ( table_desc->minlim > op ) { 415 /* this is a partial table, truncated at the head, 416 and op is out of range! */ 417 return INVALID_INSN; 418 } 419 /* adjust op to be an offset from table index 0 */ 420 op -= table_desc->minlim; 421 422 /* Yay! 'op' is now fully adjusted to be an index into 'table' */ 423 *raw_insn = &(table_desc->table[op]); 424 //printf("BYTE %X TABLE %d OP %X\n", buf[0], table, op ); 425 426 if ( (*raw_insn)->mnem_flag & INS_FLAG_PREFIX ) { 427 prefix = (*raw_insn)->mnem_flag & PREFIX_MASK; 428 } 429 430 431 /* handle escape to a multibyte/coproc/extension/etc table */ 432 /* NOTE: if insn is a prefix and has a subtable, then we 433 * only recurse if this is the first prefix byte -- 434 * that is, if *prefixes is 0. 435 * NOTE also that suffix tables are handled later */ 436 subtable = (*raw_insn)->table; 437 438 if ( subtable && ia32_tables[subtable].type != tbl_suffix && 439 (! prefix || ! *prefixes) ) { 440 441 if ( ia32_tables[subtable].type == tbl_ext_ext || 442 ia32_tables[subtable].type == tbl_fpu_ext ) { 443 /* opcode extension: reuse current byte in buffer */ 444 next = buf; 445 next_len = buf_len; 446 } else { 447 /* "normal" opcode: advance to next byte in buffer */ 448 if ( buf_len > 1 ) { 449 next = &buf[1]; 450 next_len = buf_len - 1; 451 } 452 else { 453 // buffer is truncated 454 return INVALID_INSN; 455 } 456 } 457 /* we encountered a multibyte opcode: recurse using the 458 * table specified in the opcode definition */ 459 sub_size = ia32_table_lookup( next, next_len, subtable, 460 raw_insn, prefixes ); 461 462 /* SSE/prefix hack: if the original opcode def was a 463 * prefix that specified a subtable, and the subtable 464 * lookup returned a valid insn, then we have encountered 465 * an SSE opcode definition; otherwise, we pretend we 466 * never did the subtable lookup, and deal with the 467 * prefix normally later */ 468 if ( prefix && ( sub_size == INVALID_INSN || 469 INS_TYPE((*raw_insn)->mnem_flag) == INS_INVALID ) ) { 470 /* this is a prefix, not an SSE insn : 471 * lookup next byte in main table, 472 * subsize will be reset during the 473 * main table lookup */ 474 recurse_table = 1; 475 } else { 476 /* this is either a subtable (two-byte) insn 477 * or an invalid insn: either way, set prefix 478 * to NULL and end the opcode lookup */ 479 prefix = 0; 480 // short-circuit lookup on invalid insn 481 if (sub_size == INVALID_INSN) return INVALID_INSN; 482 } 483 } else if ( prefix ) { 484 recurse_table = 1; 485 } 486 487 /* by default, we assume that we have the opcode definition, 488 * and there is no need to recurse on the same table, but 489 * if we do then a prefix was encountered... */ 490 if ( recurse_table ) { 491 /* this must have been a prefix: use the same table for 492 * lookup of the next byte */ 493 sub_size = ia32_table_lookup( &buf[1], buf_len - 1, table, 494 raw_insn, prefixes ); 495 496 // short-circuit lookup on invalid insn 497 if (sub_size == INVALID_INSN) return INVALID_INSN; 498 499 /* a bit of a hack for branch hints */ 500 if ( prefix & BRANCH_HINT_MASK ) { 501 if ( INS_GROUP((*raw_insn)->mnem_flag) == INS_EXEC ) { 502 /* segment override prefixes are invalid for 503 * all branch instructions, so delete them */ 504 prefix &= ~PREFIX_REG_MASK; 505 } else { 506 prefix &= ~BRANCH_HINT_MASK; 507 } 508 } 509 510 /* apply prefix to instruction */ 511 512 /* TODO: implement something enforcing prefix groups */ 513 (*prefixes) |= prefix; 514 } 515 516 /* if this lookup was in a ModR/M table, then an opcode byte is 517 * NOT consumed: subtract accordingly. NOTE that if none of the 518 * operands used the ModR/M, then we need to consume the byte 519 * here, but ONLY in the 'top-level' opcode extension table */ 520 521 if ( table_desc->type == tbl_ext_ext ) { 522 /* extensions-to-extensions never consume a byte */ 523 --size; 524 } else if ( (table_desc->type == tbl_extension || 525 table_desc->type == tbl_fpu || 526 table_desc->type == tbl_fpu_ext ) && 527 /* extensions that have an operand encoded in ModR/M 528 * never consume a byte */ 529 (uses_modrm_flag((*raw_insn)->dest_flag) || 530 uses_modrm_flag((*raw_insn)->src_flag) ) ) { 531 --size; 532 } 533 534 size += sub_size; 535 536 return size; 537 } 538 539 static size_t handle_insn_suffix( unsigned char *buf, size_t buf_len, 540 ia32_insn_t *raw_insn, x86_insn_t * insn ) { 541 ia32_insn_t *sfx_insn; 542 size_t size; 543 unsigned int prefixes = 0; 544 545 size = ia32_table_lookup( buf, buf_len, raw_insn->table, &sfx_insn, 546 &prefixes ); 547 if (size == INVALID_INSN || sfx_insn->mnem_flag == INS_INVALID ) { 548 return 0; 549 } 550 551 strncpy( insn->mnemonic, sfx_insn->mnemonic, 16 ); 552 handle_insn_metadata( insn, sfx_insn ); 553 554 return 1; 555 } 556 557 /* invalid instructions are handled by returning 0 [error] from the 558 * function, setting the size of the insn to 1 byte, and copying 559 * the byte at the start of the invalid insn into the x86_insn_t. 560 * if the caller is saving the x86_insn_t for invalid instructions, 561 * instead of discarding them, this will maintain a consistent 562 * address space in the x86_insn_ts */ 563 564 /* this function is called by the controlling disassembler, so its name and 565 * calling convention cannot be changed */ 566 /* buf points to the loc of the current opcode (start of the 567 * instruction) in the instruction stream. The instruction 568 * stream is assumed to be a buffer of bytes read directly 569 * from the file for the purpose of disassembly; a mem-mapped 570 * file is ideal for * this. 571 * insn points to a code structure to be filled by instr_decode 572 * returns the size of the decoded instruction in bytes */ 573 size_t ia32_disasm_addr( unsigned char * buf, size_t buf_len, 574 x86_insn_t *insn ) { 575 ia32_insn_t *raw_insn = NULL; 576 unsigned int prefixes = 0; 577 size_t size, sfx_size; 578 579 if ( (ia32_settings.options & opt_ignore_nulls) && buf_len > 3 && 580 !buf[0] && !buf[1] && !buf[2] && !buf[3]) { 581 /* IF IGNORE_NULLS is set AND 582 * first 4 bytes in the intruction stream are NULL 583 * THEN return 0 (END_OF_DISASSEMBLY) */ 584 /* TODO: set errno */ 585 MAKE_INVALID( insn, buf ); 586 return 0; /* 4 00 bytes in a row? This isn't code! */ 587 } 588 589 /* Perform recursive table lookup starting with main table (0) */ 590 size = ia32_table_lookup(buf, buf_len, idx_Main, &raw_insn, &prefixes); 591 if ( size == INVALID_INSN || size > buf_len || raw_insn->mnem_flag == INS_INVALID ) { 592 MAKE_INVALID( insn, buf ); 593 /* TODO: set errno */ 594 return 0; 595 } 596 597 /* We now have the opcode itself figured out: we can decode 598 * the rest of the instruction. */ 599 size += ia32_decode_insn( &buf[size], buf_len - size, raw_insn, insn, 600 prefixes ); 601 if ( raw_insn->mnem_flag & INS_FLAG_SUFFIX ) { 602 /* AMD 3DNow! suffix -- get proper operand type here */ 603 sfx_size = handle_insn_suffix( &buf[size], buf_len - size, 604 raw_insn, insn ); 605 if (! sfx_size ) { 606 /* TODO: set errno */ 607 MAKE_INVALID( insn, buf ); 608 return 0; 609 } 610 611 size += sfx_size; 612 } 613 614 if (! size ) { 615 /* invalid insn */ 616 MAKE_INVALID( insn, buf ); 617 return 0; 618 } 619 620 621 insn->size = size; 622 return size; /* return size of instruction in bytes */ 623 } 624