1 /* udis86 - libudis86/decode.c 2 * 3 * Copyright (c) 2002-2009 Vivek Thampi 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without modification, 7 * are permitted provided that the following conditions are met: 8 * 9 * * Redistributions of source code must retain the above copyright notice, 10 * this list of conditions and the following disclaimer. 11 * * Redistributions in binary form must reproduce the above copyright notice, 12 * this list of conditions and the following disclaimer in the documentation 13 * and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 17 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 18 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 19 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 20 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 21 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 22 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 24 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 */ 26 #include "udint.h" 27 #include "types.h" 28 #include "input.h" 29 #include "decode.h" 30 31 #ifndef __UD_STANDALONE__ 32 # include <string.h> 33 #endif /* __UD_STANDALONE__ */ 34 35 /* The max number of prefixes to an instruction */ 36 #define MAX_PREFIXES 15 37 38 /* rex prefix bits */ 39 #define REX_W(r) ( ( 0xF & ( r ) ) >> 3 ) 40 #define REX_R(r) ( ( 0x7 & ( r ) ) >> 2 ) 41 #define REX_X(r) ( ( 0x3 & ( r ) ) >> 1 ) 42 #define REX_B(r) ( ( 0x1 & ( r ) ) >> 0 ) 43 #define REX_PFX_MASK(n) ( ( P_REXW(n) << 3 ) | \ 44 ( P_REXR(n) << 2 ) | \ 45 ( P_REXX(n) << 1 ) | \ 46 ( P_REXB(n) << 0 ) ) 47 48 /* scable-index-base bits */ 49 #define SIB_S(b) ( ( b ) >> 6 ) 50 #define SIB_I(b) ( ( ( b ) >> 3 ) & 7 ) 51 #define SIB_B(b) ( ( b ) & 7 ) 52 53 /* modrm bits */ 54 #define MODRM_REG(b) ( ( ( b ) >> 3 ) & 7 ) 55 #define MODRM_NNN(b) ( ( ( b ) >> 3 ) & 7 ) 56 #define MODRM_MOD(b) ( ( ( b ) >> 6 ) & 3 ) 57 #define MODRM_RM(b) ( ( b ) & 7 ) 58 59 static int decode_ext(struct ud *u, uint16_t ptr); 60 61 enum reg_class { /* register classes */ 62 REGCLASS_NONE, 63 REGCLASS_GPR, 64 REGCLASS_MMX, 65 REGCLASS_CR, 66 REGCLASS_DB, 67 REGCLASS_SEG, 68 REGCLASS_XMM 69 }; 70 71 72 /* 73 * inp_uint8 74 * int_uint16 75 * int_uint32 76 * int_uint64 77 * Load little-endian values from input 78 */ 79 static uint8_t 80 inp_uint8(struct ud* u) 81 { 82 return ud_inp_next(u); 83 } 84 85 static uint16_t 86 inp_uint16(struct ud* u) 87 { 88 uint16_t r, ret; 89 90 ret = ud_inp_next(u); 91 r = ud_inp_next(u); 92 return ret | (r << 8); 93 } 94 95 static uint32_t 96 inp_uint32(struct ud* u) 97 { 98 uint32_t r, ret; 99 100 ret = ud_inp_next(u); 101 r = ud_inp_next(u); 102 ret = ret | (r << 8); 103 r = ud_inp_next(u); 104 ret = ret | (r << 16); 105 r = ud_inp_next(u); 106 return ret | (r << 24); 107 } 108 109 static uint64_t 110 inp_uint64(struct ud* u) 111 { 112 uint64_t r, ret; 113 114 ret = ud_inp_next(u); 115 r = ud_inp_next(u); 116 ret = ret | (r << 8); 117 r = ud_inp_next(u); 118 ret = ret | (r << 16); 119 r = ud_inp_next(u); 120 ret = ret | (r << 24); 121 r = ud_inp_next(u); 122 ret = ret | (r << 32); 123 r = ud_inp_next(u); 124 ret = ret | (r << 40); 125 r = ud_inp_next(u); 126 ret = ret | (r << 48); 127 r = ud_inp_next(u); 128 return ret | (r << 56); 129 } 130 131 132 static inline int 133 eff_opr_mode(int dis_mode, int rex_w, int pfx_opr) 134 { 135 if (dis_mode == 64) { 136 return rex_w ? 64 : (pfx_opr ? 16 : 32); 137 } else if (dis_mode == 32) { 138 return pfx_opr ? 16 : 32; 139 } else { 140 UD_ASSERT(dis_mode == 16); 141 return pfx_opr ? 32 : 16; 142 } 143 } 144 145 146 static inline int 147 eff_adr_mode(int dis_mode, int pfx_adr) 148 { 149 if (dis_mode == 64) { 150 return pfx_adr ? 32 : 64; 151 } else if (dis_mode == 32) { 152 return pfx_adr ? 16 : 32; 153 } else { 154 UD_ASSERT(dis_mode == 16); 155 return pfx_adr ? 32 : 16; 156 } 157 } 158 159 160 /* Looks up mnemonic code in the mnemonic string table 161 * Returns NULL if the mnemonic code is invalid 162 */ 163 const char* 164 ud_lookup_mnemonic(enum ud_mnemonic_code c) 165 { 166 if (c < UD_MAX_MNEMONIC_CODE) { 167 return ud_mnemonics_str[c]; 168 } else { 169 return NULL; 170 } 171 } 172 173 174 /* 175 * decode_prefixes 176 * 177 * Extracts instruction prefixes. 178 */ 179 static int 180 decode_prefixes(struct ud *u) 181 { 182 int done = 0; 183 uint8_t curr; 184 UD_RETURN_ON_ERROR(u); 185 186 do { 187 ud_inp_next(u); 188 UD_RETURN_ON_ERROR(u); 189 if (inp_len(u) == MAX_INSN_LENGTH) { 190 UD_RETURN_WITH_ERROR(u, "max instruction length"); 191 } 192 curr = inp_curr(u); 193 194 switch (curr) 195 { 196 case 0x2E : 197 u->pfx_seg = UD_R_CS; 198 break; 199 case 0x36 : 200 u->pfx_seg = UD_R_SS; 201 break; 202 case 0x3E : 203 u->pfx_seg = UD_R_DS; 204 break; 205 case 0x26 : 206 u->pfx_seg = UD_R_ES; 207 break; 208 case 0x64 : 209 u->pfx_seg = UD_R_FS; 210 break; 211 case 0x65 : 212 u->pfx_seg = UD_R_GS; 213 break; 214 case 0x67 : /* adress-size override prefix */ 215 u->pfx_adr = 0x67; 216 break; 217 case 0xF0 : 218 u->pfx_lock = 0xF0; 219 break; 220 case 0x66: 221 u->pfx_opr = 0x66; 222 break; 223 case 0xF2: 224 u->pfx_str = 0xf2; 225 break; 226 case 0xF3: 227 u->pfx_str = 0xf3; 228 break; 229 default: 230 done = 1; 231 break; 232 } 233 } while (!done); 234 235 if (u->dis_mode == 64 && (curr & 0xF0) == 0x40) { 236 /* rex prefixes in 64bit mode, must be the last prefix 237 */ 238 u->pfx_rex = curr; 239 } else { 240 /* rewind back one byte in stream, since the above loop 241 * stops with a non-prefix byte. 242 */ 243 inp_back(u); 244 } 245 return 0; 246 } 247 248 249 static inline unsigned int modrm( struct ud * u ) 250 { 251 if ( !u->have_modrm ) { 252 u->modrm = ud_inp_next( u ); 253 u->have_modrm = 1; 254 } 255 return u->modrm; 256 } 257 258 259 static unsigned int 260 resolve_operand_size( const struct ud * u, unsigned int s ) 261 { 262 switch ( s ) 263 { 264 case SZ_V: 265 return ( u->opr_mode ); 266 case SZ_Z: 267 return ( u->opr_mode == 16 ) ? 16 : 32; 268 case SZ_Y: 269 return ( u->opr_mode == 16 ) ? 32 : u->opr_mode; 270 case SZ_RDQ: 271 return ( u->dis_mode == 64 ) ? 64 : 32; 272 default: 273 return s; 274 } 275 } 276 277 278 static int resolve_mnemonic( struct ud* u ) 279 { 280 /* resolve 3dnow weirdness. */ 281 if ( u->mnemonic == UD_I3dnow ) { 282 u->mnemonic = ud_itab[ u->le->table[ inp_curr( u ) ] ].mnemonic; 283 } 284 /* SWAPGS is only valid in 64bits mode */ 285 if ( u->mnemonic == UD_Iswapgs && u->dis_mode != 64 ) { 286 UDERR(u, "swapgs invalid in 64bits mode"); 287 return -1; 288 } 289 290 if (u->mnemonic == UD_Ixchg) { 291 if ((u->operand[0].type == UD_OP_REG && u->operand[0].base == UD_R_AX && 292 u->operand[1].type == UD_OP_REG && u->operand[1].base == UD_R_AX) || 293 (u->operand[0].type == UD_OP_REG && u->operand[0].base == UD_R_EAX && 294 u->operand[1].type == UD_OP_REG && u->operand[1].base == UD_R_EAX)) { 295 u->operand[0].type = UD_NONE; 296 u->operand[1].type = UD_NONE; 297 u->mnemonic = UD_Inop; 298 } 299 } 300 301 if (u->mnemonic == UD_Inop && u->pfx_repe) { 302 u->pfx_repe = 0; 303 u->mnemonic = UD_Ipause; 304 } 305 return 0; 306 } 307 308 309 /* ----------------------------------------------------------------------------- 310 * decode_a()- Decodes operands of the type seg:offset 311 * ----------------------------------------------------------------------------- 312 */ 313 static void 314 decode_a(struct ud* u, struct ud_operand *op) 315 { 316 if (u->opr_mode == 16) { 317 /* seg16:off16 */ 318 op->type = UD_OP_PTR; 319 op->size = 32; 320 op->lval.ptr.off = inp_uint16(u); 321 op->lval.ptr.seg = inp_uint16(u); 322 } else { 323 /* seg16:off32 */ 324 op->type = UD_OP_PTR; 325 op->size = 48; 326 op->lval.ptr.off = inp_uint32(u); 327 op->lval.ptr.seg = inp_uint16(u); 328 } 329 } 330 331 /* ----------------------------------------------------------------------------- 332 * decode_gpr() - Returns decoded General Purpose Register 333 * ----------------------------------------------------------------------------- 334 */ 335 static enum ud_type 336 decode_gpr(register struct ud* u, unsigned int s, unsigned char rm) 337 { 338 switch (s) { 339 case 64: 340 return UD_R_RAX + rm; 341 case 32: 342 return UD_R_EAX + rm; 343 case 16: 344 return UD_R_AX + rm; 345 case 8: 346 if (u->dis_mode == 64 && u->pfx_rex) { 347 if (rm >= 4) 348 return UD_R_SPL + (rm-4); 349 return UD_R_AL + rm; 350 } else return UD_R_AL + rm; 351 default: 352 UD_ASSERT(!"invalid operand size"); 353 return 0; 354 } 355 } 356 357 static void 358 decode_reg(struct ud *u, 359 struct ud_operand *opr, 360 int type, 361 int num, 362 int size) 363 { 364 int reg; 365 size = resolve_operand_size(u, size); 366 switch (type) { 367 case REGCLASS_GPR : reg = decode_gpr(u, size, num); break; 368 case REGCLASS_MMX : reg = UD_R_MM0 + (num & 7); break; 369 case REGCLASS_XMM : reg = UD_R_XMM0 + num; break; 370 case REGCLASS_CR : reg = UD_R_CR0 + num; break; 371 case REGCLASS_DB : reg = UD_R_DR0 + num; break; 372 case REGCLASS_SEG : { 373 /* 374 * Only 6 segment registers, anything else is an error. 375 */ 376 if ((num & 7) > 5) { 377 UDERR(u, "invalid segment register value"); 378 return; 379 } else { 380 reg = UD_R_ES + (num & 7); 381 } 382 break; 383 } 384 default: 385 UD_ASSERT(!"invalid register type"); 386 break; 387 } 388 opr->type = UD_OP_REG; 389 opr->base = reg; 390 opr->size = size; 391 } 392 393 394 /* 395 * decode_imm 396 * 397 * Decode Immediate values. 398 */ 399 static void 400 decode_imm(struct ud* u, unsigned int size, struct ud_operand *op) 401 { 402 op->size = resolve_operand_size(u, size); 403 op->type = UD_OP_IMM; 404 405 switch (op->size) { 406 case 8: op->lval.sbyte = inp_uint8(u); break; 407 case 16: op->lval.uword = inp_uint16(u); break; 408 case 32: op->lval.udword = inp_uint32(u); break; 409 case 64: op->lval.uqword = inp_uint64(u); break; 410 default: return; 411 } 412 } 413 414 415 /* 416 * decode_mem_disp 417 * 418 * Decode mem address displacement. 419 */ 420 static void 421 decode_mem_disp(struct ud* u, unsigned int size, struct ud_operand *op) 422 { 423 switch (size) { 424 case 8: 425 op->offset = 8; 426 op->lval.ubyte = inp_uint8(u); 427 break; 428 case 16: 429 op->offset = 16; 430 op->lval.uword = inp_uint16(u); 431 break; 432 case 32: 433 op->offset = 32; 434 op->lval.udword = inp_uint32(u); 435 break; 436 case 64: 437 op->offset = 64; 438 op->lval.uqword = inp_uint64(u); 439 break; 440 default: 441 return; 442 } 443 } 444 445 446 /* 447 * decode_modrm_reg 448 * 449 * Decodes reg field of mod/rm byte 450 * 451 */ 452 static inline void 453 decode_modrm_reg(struct ud *u, 454 struct ud_operand *operand, 455 unsigned int type, 456 unsigned int size) 457 { 458 uint8_t reg = (REX_R(u->pfx_rex) << 3) | MODRM_REG(modrm(u)); 459 decode_reg(u, operand, type, reg, size); 460 } 461 462 463 /* 464 * decode_modrm_rm 465 * 466 * Decodes rm field of mod/rm byte 467 * 468 */ 469 static void 470 decode_modrm_rm(struct ud *u, 471 struct ud_operand *op, 472 unsigned char type, /* register type */ 473 unsigned int size) /* operand size */ 474 475 { 476 size_t offset = 0; 477 unsigned char mod, rm; 478 479 /* get mod, r/m and reg fields */ 480 mod = MODRM_MOD(modrm(u)); 481 rm = (REX_B(u->pfx_rex) << 3) | MODRM_RM(modrm(u)); 482 483 /* 484 * If mod is 11b, then the modrm.rm specifies a register. 485 * 486 */ 487 if (mod == 3) { 488 decode_reg(u, op, type, rm, size); 489 return; 490 } 491 492 /* 493 * !11b => Memory Address 494 */ 495 op->type = UD_OP_MEM; 496 op->size = resolve_operand_size(u, size); 497 498 if (u->adr_mode == 64) { 499 op->base = UD_R_RAX + rm; 500 if (mod == 1) { 501 offset = 8; 502 } else if (mod == 2) { 503 offset = 32; 504 } else if (mod == 0 && (rm & 7) == 5) { 505 op->base = UD_R_RIP; 506 offset = 32; 507 } else { 508 offset = 0; 509 } 510 /* 511 * Scale-Index-Base (SIB) 512 */ 513 if ((rm & 7) == 4) { 514 ud_inp_next(u); 515 516 op->scale = (1 << SIB_S(inp_curr(u))) & ~1; 517 op->index = UD_R_RAX + (SIB_I(inp_curr(u)) | (REX_X(u->pfx_rex) << 3)); 518 op->base = UD_R_RAX + (SIB_B(inp_curr(u)) | (REX_B(u->pfx_rex) << 3)); 519 520 /* special conditions for base reference */ 521 if (op->index == UD_R_RSP) { 522 op->index = UD_NONE; 523 op->scale = UD_NONE; 524 } 525 526 if (op->base == UD_R_RBP || op->base == UD_R_R13) { 527 if (mod == 0) { 528 op->base = UD_NONE; 529 } 530 if (mod == 1) { 531 offset = 8; 532 } else { 533 offset = 32; 534 } 535 } 536 } 537 } else if (u->adr_mode == 32) { 538 op->base = UD_R_EAX + rm; 539 if (mod == 1) { 540 offset = 8; 541 } else if (mod == 2) { 542 offset = 32; 543 } else if (mod == 0 && rm == 5) { 544 op->base = UD_NONE; 545 offset = 32; 546 } else { 547 offset = 0; 548 } 549 550 /* Scale-Index-Base (SIB) */ 551 if ((rm & 7) == 4) { 552 ud_inp_next(u); 553 554 op->scale = (1 << SIB_S(inp_curr(u))) & ~1; 555 op->index = UD_R_EAX + (SIB_I(inp_curr(u)) | (REX_X(u->pfx_rex) << 3)); 556 op->base = UD_R_EAX + (SIB_B(inp_curr(u)) | (REX_B(u->pfx_rex) << 3)); 557 558 if (op->index == UD_R_ESP) { 559 op->index = UD_NONE; 560 op->scale = UD_NONE; 561 } 562 563 /* special condition for base reference */ 564 if (op->base == UD_R_EBP) { 565 if (mod == 0) { 566 op->base = UD_NONE; 567 } 568 if (mod == 1) { 569 offset = 8; 570 } else { 571 offset = 32; 572 } 573 } 574 } 575 } else { 576 const unsigned int bases[] = { UD_R_BX, UD_R_BX, UD_R_BP, UD_R_BP, 577 UD_R_SI, UD_R_DI, UD_R_BP, UD_R_BX }; 578 const unsigned int indices[] = { UD_R_SI, UD_R_DI, UD_R_SI, UD_R_DI, 579 UD_NONE, UD_NONE, UD_NONE, UD_NONE }; 580 op->base = bases[rm & 7]; 581 op->index = indices[rm & 7]; 582 if (mod == 0 && rm == 6) { 583 offset = 16; 584 op->base = UD_NONE; 585 } else if (mod == 1) { 586 offset = 8; 587 } else if (mod == 2) { 588 offset = 16; 589 } 590 } 591 592 if (offset) { 593 decode_mem_disp(u, offset, op); 594 } 595 } 596 597 598 /* 599 * decode_moffset 600 * Decode offset-only memory operand 601 */ 602 static void 603 decode_moffset(struct ud *u, unsigned int size, struct ud_operand *opr) 604 { 605 opr->type = UD_OP_MEM; 606 opr->size = resolve_operand_size(u, size); 607 decode_mem_disp(u, u->adr_mode, opr); 608 } 609 610 611 /* ----------------------------------------------------------------------------- 612 * decode_operands() - Disassembles Operands. 613 * ----------------------------------------------------------------------------- 614 */ 615 static int 616 decode_operand(struct ud *u, 617 struct ud_operand *operand, 618 enum ud_operand_code type, 619 unsigned int size) 620 { 621 operand->_oprcode = type; 622 623 switch (type) { 624 case OP_A : 625 decode_a(u, operand); 626 break; 627 case OP_MR: 628 decode_modrm_rm(u, operand, REGCLASS_GPR, 629 MODRM_MOD(modrm(u)) == 3 ? 630 Mx_reg_size(size) : Mx_mem_size(size)); 631 break; 632 case OP_F: 633 u->br_far = 1; 634 /* intended fall through */ 635 case OP_M: 636 if (MODRM_MOD(modrm(u)) == 3) { 637 UDERR(u, "expected modrm.mod != 3"); 638 } 639 /* intended fall through */ 640 case OP_E: 641 decode_modrm_rm(u, operand, REGCLASS_GPR, size); 642 break; 643 case OP_G: 644 decode_modrm_reg(u, operand, REGCLASS_GPR, size); 645 break; 646 case OP_sI: 647 case OP_I: 648 decode_imm(u, size, operand); 649 break; 650 case OP_I1: 651 operand->type = UD_OP_CONST; 652 operand->lval.udword = 1; 653 break; 654 case OP_N: 655 if (MODRM_MOD(modrm(u)) != 3) { 656 UDERR(u, "expected modrm.mod == 3"); 657 } 658 /* intended fall through */ 659 case OP_Q: 660 decode_modrm_rm(u, operand, REGCLASS_MMX, size); 661 break; 662 case OP_P: 663 decode_modrm_reg(u, operand, REGCLASS_MMX, size); 664 break; 665 case OP_U: 666 if (MODRM_MOD(modrm(u)) != 3) { 667 UDERR(u, "expected modrm.mod == 3"); 668 } 669 /* intended fall through */ 670 case OP_W: 671 decode_modrm_rm(u, operand, REGCLASS_XMM, size); 672 break; 673 case OP_V: 674 decode_modrm_reg(u, operand, REGCLASS_XMM, size); 675 break; 676 case OP_MU: 677 decode_modrm_rm(u, operand, REGCLASS_XMM, 678 MODRM_MOD(modrm(u)) == 3 ? 679 Mx_reg_size(size) : Mx_mem_size(size)); 680 break; 681 case OP_S: 682 decode_modrm_reg(u, operand, REGCLASS_SEG, size); 683 break; 684 case OP_O: 685 decode_moffset(u, size, operand); 686 break; 687 case OP_R0: 688 case OP_R1: 689 case OP_R2: 690 case OP_R3: 691 case OP_R4: 692 case OP_R5: 693 case OP_R6: 694 case OP_R7: 695 decode_reg(u, operand, REGCLASS_GPR, 696 (REX_B(u->pfx_rex) << 3) | (type - OP_R0), size); 697 break; 698 case OP_AL: 699 case OP_AX: 700 case OP_eAX: 701 case OP_rAX: 702 decode_reg(u, operand, REGCLASS_GPR, 0, size); 703 break; 704 case OP_CL: 705 case OP_CX: 706 case OP_eCX: 707 decode_reg(u, operand, REGCLASS_GPR, 1, size); 708 break; 709 case OP_DL: 710 case OP_DX: 711 case OP_eDX: 712 decode_reg(u, operand, REGCLASS_GPR, 2, size); 713 break; 714 case OP_ES: 715 case OP_CS: 716 case OP_DS: 717 case OP_SS: 718 case OP_FS: 719 case OP_GS: 720 /* in 64bits mode, only fs and gs are allowed */ 721 if (u->dis_mode == 64) { 722 if (type != OP_FS && type != OP_GS) { 723 UDERR(u, "invalid segment register in 64bits"); 724 } 725 } 726 operand->type = UD_OP_REG; 727 operand->base = (type - OP_ES) + UD_R_ES; 728 operand->size = 16; 729 break; 730 case OP_J : 731 decode_imm(u, size, operand); 732 operand->type = UD_OP_JIMM; 733 break ; 734 case OP_R : 735 if (MODRM_MOD(modrm(u)) != 3) { 736 UDERR(u, "expected modrm.mod == 3"); 737 } 738 decode_modrm_rm(u, operand, REGCLASS_GPR, size); 739 break; 740 case OP_C: 741 decode_modrm_reg(u, operand, REGCLASS_CR, size); 742 break; 743 case OP_D: 744 decode_modrm_reg(u, operand, REGCLASS_DB, size); 745 break; 746 case OP_I3 : 747 operand->type = UD_OP_CONST; 748 operand->lval.sbyte = 3; 749 break; 750 case OP_ST0: 751 case OP_ST1: 752 case OP_ST2: 753 case OP_ST3: 754 case OP_ST4: 755 case OP_ST5: 756 case OP_ST6: 757 case OP_ST7: 758 operand->type = UD_OP_REG; 759 operand->base = (type - OP_ST0) + UD_R_ST0; 760 operand->size = 80; 761 break; 762 default : 763 break; 764 } 765 return 0; 766 } 767 768 769 /* 770 * decode_operands 771 * 772 * Disassemble upto 3 operands of the current instruction being 773 * disassembled. By the end of the function, the operand fields 774 * of the ud structure will have been filled. 775 */ 776 static int 777 decode_operands(struct ud* u) 778 { 779 decode_operand(u, &u->operand[0], 780 u->itab_entry->operand1.type, 781 u->itab_entry->operand1.size); 782 decode_operand(u, &u->operand[1], 783 u->itab_entry->operand2.type, 784 u->itab_entry->operand2.size); 785 decode_operand(u, &u->operand[2], 786 u->itab_entry->operand3.type, 787 u->itab_entry->operand3.size); 788 return 0; 789 } 790 791 /* ----------------------------------------------------------------------------- 792 * clear_insn() - clear instruction structure 793 * ----------------------------------------------------------------------------- 794 */ 795 static void 796 clear_insn(register struct ud* u) 797 { 798 u->error = 0; 799 u->pfx_seg = 0; 800 u->pfx_opr = 0; 801 u->pfx_adr = 0; 802 u->pfx_lock = 0; 803 u->pfx_repne = 0; 804 u->pfx_rep = 0; 805 u->pfx_repe = 0; 806 u->pfx_rex = 0; 807 u->pfx_str = 0; 808 u->mnemonic = UD_Inone; 809 u->itab_entry = NULL; 810 u->have_modrm = 0; 811 u->br_far = 0; 812 813 memset( &u->operand[ 0 ], 0, sizeof( struct ud_operand ) ); 814 memset( &u->operand[ 1 ], 0, sizeof( struct ud_operand ) ); 815 memset( &u->operand[ 2 ], 0, sizeof( struct ud_operand ) ); 816 } 817 818 819 static inline int 820 resolve_pfx_str(struct ud* u) 821 { 822 if (u->pfx_str == 0xf3) { 823 if (P_STR(u->itab_entry->prefix)) { 824 u->pfx_rep = 0xf3; 825 } else { 826 u->pfx_repe = 0xf3; 827 } 828 } else if (u->pfx_str == 0xf2) { 829 u->pfx_repne = 0xf3; 830 } 831 return 0; 832 } 833 834 835 static int 836 resolve_mode( struct ud* u ) 837 { 838 /* if in error state, bail out */ 839 if ( u->error ) return -1; 840 841 /* propagate prefix effects */ 842 if ( u->dis_mode == 64 ) { /* set 64bit-mode flags */ 843 844 /* Check validity of instruction m64 */ 845 if ( P_INV64( u->itab_entry->prefix ) ) { 846 UDERR(u, "instruction invalid in 64bits"); 847 return -1; 848 } 849 850 /* effective rex prefix is the effective mask for the 851 * instruction hard-coded in the opcode map. 852 */ 853 u->pfx_rex = ( u->pfx_rex & 0x40 ) | 854 ( u->pfx_rex & REX_PFX_MASK( u->itab_entry->prefix ) ); 855 856 /* whether this instruction has a default operand size of 857 * 64bit, also hardcoded into the opcode map. 858 */ 859 u->default64 = P_DEF64( u->itab_entry->prefix ); 860 /* calculate effective operand size */ 861 if ( REX_W( u->pfx_rex ) ) { 862 u->opr_mode = 64; 863 } else if ( u->pfx_opr ) { 864 u->opr_mode = 16; 865 } else { 866 /* unless the default opr size of instruction is 64, 867 * the effective operand size in the absence of rex.w 868 * prefix is 32. 869 */ 870 u->opr_mode = ( u->default64 ) ? 64 : 32; 871 } 872 873 /* calculate effective address size */ 874 u->adr_mode = (u->pfx_adr) ? 32 : 64; 875 } else if ( u->dis_mode == 32 ) { /* set 32bit-mode flags */ 876 u->opr_mode = ( u->pfx_opr ) ? 16 : 32; 877 u->adr_mode = ( u->pfx_adr ) ? 16 : 32; 878 } else if ( u->dis_mode == 16 ) { /* set 16bit-mode flags */ 879 u->opr_mode = ( u->pfx_opr ) ? 32 : 16; 880 u->adr_mode = ( u->pfx_adr ) ? 32 : 16; 881 } 882 883 /* set flags for implicit addressing */ 884 u->implicit_addr = P_IMPADDR( u->itab_entry->prefix ); 885 886 return 0; 887 } 888 889 890 static inline int 891 decode_insn(struct ud *u, uint16_t ptr) 892 { 893 UD_ASSERT((ptr & 0x8000) == 0); 894 u->itab_entry = &ud_itab[ ptr ]; 895 u->mnemonic = u->itab_entry->mnemonic; 896 return (resolve_pfx_str(u) == 0 && 897 resolve_mode(u) == 0 && 898 decode_operands(u) == 0 && 899 resolve_mnemonic(u) == 0) ? 0 : -1; 900 } 901 902 903 /* 904 * decode_3dnow() 905 * 906 * Decoding 3dnow is a little tricky because of its strange opcode 907 * structure. The final opcode disambiguation depends on the last 908 * byte that comes after the operands have been decoded. Fortunately, 909 * all 3dnow instructions have the same set of operand types. So we 910 * go ahead and decode the instruction by picking an arbitrarily chosen 911 * valid entry in the table, decode the operands, and read the final 912 * byte to resolve the menmonic. 913 */ 914 static inline int 915 decode_3dnow(struct ud* u) 916 { 917 uint16_t ptr; 918 UD_ASSERT(u->le->type == UD_TAB__OPC_3DNOW); 919 UD_ASSERT(u->le->table[0xc] != 0); 920 decode_insn(u, u->le->table[0xc]); 921 ud_inp_next(u); 922 if (u->error) { 923 return -1; 924 } 925 ptr = u->le->table[inp_curr(u)]; 926 UD_ASSERT((ptr & 0x8000) == 0); 927 u->mnemonic = ud_itab[ptr].mnemonic; 928 return 0; 929 } 930 931 932 static int 933 decode_ssepfx(struct ud *u) 934 { 935 uint8_t idx; 936 uint8_t pfx; 937 938 /* 939 * String prefixes (f2, f3) take precedence over operand 940 * size prefix (66). 941 */ 942 pfx = u->pfx_str; 943 if (pfx == 0) { 944 pfx = u->pfx_opr; 945 } 946 idx = ((pfx & 0xf) + 1) / 2; 947 if (u->le->table[idx] == 0) { 948 idx = 0; 949 } 950 if (idx && u->le->table[idx] != 0) { 951 /* 952 * "Consume" the prefix as a part of the opcode, so it is no 953 * longer exported as an instruction prefix. 954 */ 955 u->pfx_str = 0; 956 if (pfx == 0x66) { 957 /* 958 * consume "66" only if it was used for decoding, leaving 959 * it to be used as an operands size override for some 960 * simd instructions. 961 */ 962 u->pfx_opr = 0; 963 } 964 } 965 return decode_ext(u, u->le->table[idx]); 966 } 967 968 969 /* 970 * decode_ext() 971 * 972 * Decode opcode extensions (if any) 973 */ 974 static int 975 decode_ext(struct ud *u, uint16_t ptr) 976 { 977 uint8_t idx = 0; 978 if ((ptr & 0x8000) == 0) { 979 return decode_insn(u, ptr); 980 } 981 u->le = &ud_lookup_table_list[(~0x8000 & ptr)]; 982 if (u->le->type == UD_TAB__OPC_3DNOW) { 983 return decode_3dnow(u); 984 } 985 986 switch (u->le->type) { 987 case UD_TAB__OPC_MOD: 988 /* !11 = 0, 11 = 1 */ 989 idx = (MODRM_MOD(modrm(u)) + 1) / 4; 990 break; 991 /* disassembly mode/operand size/address size based tables. 992 * 16 = 0,, 32 = 1, 64 = 2 993 */ 994 case UD_TAB__OPC_MODE: 995 idx = u->dis_mode != 64 ? 0 : 1; 996 break; 997 case UD_TAB__OPC_OSIZE: 998 idx = eff_opr_mode(u->dis_mode, REX_W(u->pfx_rex), u->pfx_opr) / 32; 999 break; 1000 case UD_TAB__OPC_ASIZE: 1001 idx = eff_adr_mode(u->dis_mode, u->pfx_adr) / 32; 1002 break; 1003 case UD_TAB__OPC_X87: 1004 idx = modrm(u) - 0xC0; 1005 break; 1006 case UD_TAB__OPC_VENDOR: 1007 if (u->vendor == UD_VENDOR_ANY) { 1008 /* choose a valid entry */ 1009 idx = (u->le->table[idx] != 0) ? 0 : 1; 1010 } else if (u->vendor == UD_VENDOR_AMD) { 1011 idx = 0; 1012 } else { 1013 idx = 1; 1014 } 1015 break; 1016 case UD_TAB__OPC_RM: 1017 idx = MODRM_RM(modrm(u)); 1018 break; 1019 case UD_TAB__OPC_REG: 1020 idx = MODRM_REG(modrm(u)); 1021 break; 1022 case UD_TAB__OPC_SSE: 1023 return decode_ssepfx(u); 1024 default: 1025 UD_ASSERT(!"not reached"); 1026 break; 1027 } 1028 1029 return decode_ext(u, u->le->table[idx]); 1030 } 1031 1032 1033 static int 1034 decode_opcode(struct ud *u) 1035 { 1036 uint16_t ptr; 1037 UD_ASSERT(u->le->type == UD_TAB__OPC_TABLE); 1038 ud_inp_next(u); 1039 if (u->error) { 1040 return -1; 1041 } 1042 u->primary_opcode = inp_curr(u); 1043 ptr = u->le->table[inp_curr(u)]; 1044 if (ptr & 0x8000) { 1045 u->le = &ud_lookup_table_list[ptr & ~0x8000]; 1046 if (u->le->type == UD_TAB__OPC_TABLE) { 1047 return decode_opcode(u); 1048 } 1049 } 1050 return decode_ext(u, ptr); 1051 } 1052 1053 1054 /* ============================================================================= 1055 * ud_decode() - Instruction decoder. Returns the number of bytes decoded. 1056 * ============================================================================= 1057 */ 1058 unsigned int 1059 ud_decode(struct ud *u) 1060 { 1061 inp_start(u); 1062 clear_insn(u); 1063 u->le = &ud_lookup_table_list[0]; 1064 u->error = decode_prefixes(u) == -1 || 1065 decode_opcode(u) == -1 || 1066 u->error; 1067 /* Handle decode error. */ 1068 if (u->error) { 1069 /* clear out the decode data. */ 1070 clear_insn(u); 1071 /* mark the sequence of bytes as invalid. */ 1072 u->itab_entry = &ud_itab[0]; /* entry 0 is invalid */ 1073 u->mnemonic = u->itab_entry->mnemonic; 1074 } 1075 1076 /* maybe this stray segment override byte 1077 * should be spewed out? 1078 */ 1079 if ( !P_SEG( u->itab_entry->prefix ) && 1080 u->operand[0].type != UD_OP_MEM && 1081 u->operand[1].type != UD_OP_MEM ) 1082 u->pfx_seg = 0; 1083 1084 u->insn_offset = u->pc; /* set offset of instruction */ 1085 u->asm_buf_fill = 0; /* set translation buffer index to 0 */ 1086 u->pc += u->inp_ctr; /* move program counter by bytes decoded */ 1087 1088 /* return number of bytes disassembled. */ 1089 return u->inp_ctr; 1090 } 1091 1092 /* 1093 vim: set ts=2 sw=2 expandtab 1094 */ 1095