Home | History | Annotate | Download | only in libudis86
      1 /* udis86 - libudis86/decode.c
      2  *
      3  * Copyright (c) 2002-2009 Vivek Thampi
      4  * All rights reserved.
      5  *
      6  * Redistribution and use in source and binary forms, with or without modification,
      7  * are permitted provided that the following conditions are met:
      8  *
      9  *     * Redistributions of source code must retain the above copyright notice,
     10  *       this list of conditions and the following disclaimer.
     11  *     * Redistributions in binary form must reproduce the above copyright notice,
     12  *       this list of conditions and the following disclaimer in the documentation
     13  *       and/or other materials provided with the distribution.
     14  *
     15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
     16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
     17  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
     18  * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
     19  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
     20  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
     21  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
     22  * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     23  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
     24  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     25  */
     26 #include "udint.h"
     27 #include "types.h"
     28 #include "input.h"
     29 #include "decode.h"
     30 
     31 #ifndef __UD_STANDALONE__
     32 # include <string.h>
     33 #endif /* __UD_STANDALONE__ */
     34 
     35 /* The max number of prefixes to an instruction */
     36 #define MAX_PREFIXES    15
     37 
     38 /* rex prefix bits */
     39 #define REX_W(r)        ( ( 0xF & ( r ) )  >> 3 )
     40 #define REX_R(r)        ( ( 0x7 & ( r ) )  >> 2 )
     41 #define REX_X(r)        ( ( 0x3 & ( r ) )  >> 1 )
     42 #define REX_B(r)        ( ( 0x1 & ( r ) )  >> 0 )
     43 #define REX_PFX_MASK(n) ( ( P_REXW(n) << 3 ) | \
     44                           ( P_REXR(n) << 2 ) | \
     45                           ( P_REXX(n) << 1 ) | \
     46                           ( P_REXB(n) << 0 ) )
     47 
     48 /* scable-index-base bits */
     49 #define SIB_S(b)        ( ( b ) >> 6 )
     50 #define SIB_I(b)        ( ( ( b ) >> 3 ) & 7 )
     51 #define SIB_B(b)        ( ( b ) & 7 )
     52 
     53 /* modrm bits */
     54 #define MODRM_REG(b)    ( ( ( b ) >> 3 ) & 7 )
     55 #define MODRM_NNN(b)    ( ( ( b ) >> 3 ) & 7 )
     56 #define MODRM_MOD(b)    ( ( ( b ) >> 6 ) & 3 )
     57 #define MODRM_RM(b)     ( ( b ) & 7 )
     58 
     59 static int decode_ext(struct ud *u, uint16_t ptr);
     60 
     61 enum reg_class { /* register classes */
     62   REGCLASS_NONE,
     63   REGCLASS_GPR,
     64   REGCLASS_MMX,
     65   REGCLASS_CR,
     66   REGCLASS_DB,
     67   REGCLASS_SEG,
     68   REGCLASS_XMM
     69 };
     70 
     71 
     72 /*
     73  * inp_uint8
     74  * int_uint16
     75  * int_uint32
     76  * int_uint64
     77  *    Load little-endian values from input
     78  */
     79 static uint8_t
     80 inp_uint8(struct ud* u)
     81 {
     82   return ud_inp_next(u);
     83 }
     84 
     85 static uint16_t
     86 inp_uint16(struct ud* u)
     87 {
     88   uint16_t r, ret;
     89 
     90   ret = ud_inp_next(u);
     91   r = ud_inp_next(u);
     92   return ret | (r << 8);
     93 }
     94 
     95 static uint32_t
     96 inp_uint32(struct ud* u)
     97 {
     98   uint32_t r, ret;
     99 
    100   ret = ud_inp_next(u);
    101   r = ud_inp_next(u);
    102   ret = ret | (r << 8);
    103   r = ud_inp_next(u);
    104   ret = ret | (r << 16);
    105   r = ud_inp_next(u);
    106   return ret | (r << 24);
    107 }
    108 
    109 static uint64_t
    110 inp_uint64(struct ud* u)
    111 {
    112   uint64_t r, ret;
    113 
    114   ret = ud_inp_next(u);
    115   r = ud_inp_next(u);
    116   ret = ret | (r << 8);
    117   r = ud_inp_next(u);
    118   ret = ret | (r << 16);
    119   r = ud_inp_next(u);
    120   ret = ret | (r << 24);
    121   r = ud_inp_next(u);
    122   ret = ret | (r << 32);
    123   r = ud_inp_next(u);
    124   ret = ret | (r << 40);
    125   r = ud_inp_next(u);
    126   ret = ret | (r << 48);
    127   r = ud_inp_next(u);
    128   return ret | (r << 56);
    129 }
    130 
    131 
    132 static inline int
    133 eff_opr_mode(int dis_mode, int rex_w, int pfx_opr)
    134 {
    135   if (dis_mode == 64) {
    136     return rex_w ? 64 : (pfx_opr ? 16 : 32);
    137   } else if (dis_mode == 32) {
    138     return pfx_opr ? 16 : 32;
    139   } else {
    140     UD_ASSERT(dis_mode == 16);
    141     return pfx_opr ? 32 : 16;
    142   }
    143 }
    144 
    145 
    146 static inline int
    147 eff_adr_mode(int dis_mode, int pfx_adr)
    148 {
    149   if (dis_mode == 64) {
    150     return pfx_adr ? 32 : 64;
    151   } else if (dis_mode == 32) {
    152     return pfx_adr ? 16 : 32;
    153   } else {
    154     UD_ASSERT(dis_mode == 16);
    155     return pfx_adr ? 32 : 16;
    156   }
    157 }
    158 
    159 
    160 /* Looks up mnemonic code in the mnemonic string table
    161  * Returns NULL if the mnemonic code is invalid
    162  */
    163 const char*
    164 ud_lookup_mnemonic(enum ud_mnemonic_code c)
    165 {
    166   if (c < UD_MAX_MNEMONIC_CODE) {
    167     return ud_mnemonics_str[c];
    168   } else {
    169     return NULL;
    170   }
    171 }
    172 
    173 
    174 /*
    175  * decode_prefixes
    176  *
    177  *  Extracts instruction prefixes.
    178  */
    179 static int
    180 decode_prefixes(struct ud *u)
    181 {
    182   int done = 0;
    183   uint8_t curr;
    184   UD_RETURN_ON_ERROR(u);
    185 
    186   do {
    187     ud_inp_next(u);
    188     UD_RETURN_ON_ERROR(u);
    189     if (inp_len(u) == MAX_INSN_LENGTH) {
    190       UD_RETURN_WITH_ERROR(u, "max instruction length");
    191     }
    192     curr = inp_curr(u);
    193 
    194     switch (curr)
    195     {
    196     case 0x2E :
    197       u->pfx_seg = UD_R_CS;
    198       break;
    199     case 0x36 :
    200       u->pfx_seg = UD_R_SS;
    201       break;
    202     case 0x3E :
    203       u->pfx_seg = UD_R_DS;
    204       break;
    205     case 0x26 :
    206       u->pfx_seg = UD_R_ES;
    207       break;
    208     case 0x64 :
    209       u->pfx_seg = UD_R_FS;
    210       break;
    211     case 0x65 :
    212       u->pfx_seg = UD_R_GS;
    213       break;
    214     case 0x67 : /* adress-size override prefix */
    215       u->pfx_adr = 0x67;
    216       break;
    217     case 0xF0 :
    218       u->pfx_lock = 0xF0;
    219       break;
    220     case 0x66:
    221       u->pfx_opr = 0x66;
    222       break;
    223     case 0xF2:
    224       u->pfx_str = 0xf2;
    225       break;
    226     case 0xF3:
    227       u->pfx_str = 0xf3;
    228       break;
    229     default:
    230       done = 1;
    231       break;
    232     }
    233   } while (!done);
    234 
    235   if (u->dis_mode == 64 && (curr & 0xF0) == 0x40) {
    236     /* rex prefixes in 64bit mode, must be the last prefix
    237      */
    238     u->pfx_rex = curr;
    239   } else {
    240     /* rewind back one byte in stream, since the above loop
    241      * stops with a non-prefix byte.
    242      */
    243     inp_back(u);
    244   }
    245   return 0;
    246 }
    247 
    248 
    249 static inline unsigned int modrm( struct ud * u )
    250 {
    251     if ( !u->have_modrm ) {
    252         u->modrm = ud_inp_next( u );
    253         u->have_modrm = 1;
    254     }
    255     return u->modrm;
    256 }
    257 
    258 
    259 static unsigned int
    260 resolve_operand_size( const struct ud * u, unsigned int s )
    261 {
    262     switch ( s )
    263     {
    264     case SZ_V:
    265         return ( u->opr_mode );
    266     case SZ_Z:
    267         return ( u->opr_mode == 16 ) ? 16 : 32;
    268     case SZ_Y:
    269         return ( u->opr_mode == 16 ) ? 32 : u->opr_mode;
    270     case SZ_RDQ:
    271         return ( u->dis_mode == 64 ) ? 64 : 32;
    272     default:
    273         return s;
    274     }
    275 }
    276 
    277 
    278 static int resolve_mnemonic( struct ud* u )
    279 {
    280   /* resolve 3dnow weirdness. */
    281   if ( u->mnemonic == UD_I3dnow ) {
    282     u->mnemonic = ud_itab[ u->le->table[ inp_curr( u )  ] ].mnemonic;
    283   }
    284   /* SWAPGS is only valid in 64bits mode */
    285   if ( u->mnemonic == UD_Iswapgs && u->dis_mode != 64 ) {
    286     UDERR(u, "swapgs invalid in 64bits mode");
    287     return -1;
    288   }
    289 
    290   if (u->mnemonic == UD_Ixchg) {
    291     if ((u->operand[0].type == UD_OP_REG && u->operand[0].base == UD_R_AX  &&
    292          u->operand[1].type == UD_OP_REG && u->operand[1].base == UD_R_AX) ||
    293         (u->operand[0].type == UD_OP_REG && u->operand[0].base == UD_R_EAX &&
    294          u->operand[1].type == UD_OP_REG && u->operand[1].base == UD_R_EAX)) {
    295       u->operand[0].type = UD_NONE;
    296       u->operand[1].type = UD_NONE;
    297       u->mnemonic = UD_Inop;
    298     }
    299   }
    300 
    301   if (u->mnemonic == UD_Inop && u->pfx_repe) {
    302     u->pfx_repe = 0;
    303     u->mnemonic = UD_Ipause;
    304   }
    305   return 0;
    306 }
    307 
    308 
    309 /* -----------------------------------------------------------------------------
    310  * decode_a()- Decodes operands of the type seg:offset
    311  * -----------------------------------------------------------------------------
    312  */
    313 static void
    314 decode_a(struct ud* u, struct ud_operand *op)
    315 {
    316   if (u->opr_mode == 16) {
    317     /* seg16:off16 */
    318     op->type = UD_OP_PTR;
    319     op->size = 32;
    320     op->lval.ptr.off = inp_uint16(u);
    321     op->lval.ptr.seg = inp_uint16(u);
    322   } else {
    323     /* seg16:off32 */
    324     op->type = UD_OP_PTR;
    325     op->size = 48;
    326     op->lval.ptr.off = inp_uint32(u);
    327     op->lval.ptr.seg = inp_uint16(u);
    328   }
    329 }
    330 
    331 /* -----------------------------------------------------------------------------
    332  * decode_gpr() - Returns decoded General Purpose Register
    333  * -----------------------------------------------------------------------------
    334  */
    335 static enum ud_type
    336 decode_gpr(register struct ud* u, unsigned int s, unsigned char rm)
    337 {
    338   switch (s) {
    339     case 64:
    340         return UD_R_RAX + rm;
    341     case 32:
    342         return UD_R_EAX + rm;
    343     case 16:
    344         return UD_R_AX  + rm;
    345     case  8:
    346         if (u->dis_mode == 64 && u->pfx_rex) {
    347             if (rm >= 4)
    348                 return UD_R_SPL + (rm-4);
    349             return UD_R_AL + rm;
    350         } else return UD_R_AL + rm;
    351     default:
    352         UD_ASSERT(!"invalid operand size");
    353         return 0;
    354   }
    355 }
    356 
    357 static void
    358 decode_reg(struct ud *u,
    359            struct ud_operand *opr,
    360            int type,
    361            int num,
    362            int size)
    363 {
    364   int reg;
    365   size = resolve_operand_size(u, size);
    366   switch (type) {
    367     case REGCLASS_GPR : reg = decode_gpr(u, size, num); break;
    368     case REGCLASS_MMX : reg = UD_R_MM0  + (num & 7); break;
    369     case REGCLASS_XMM : reg = UD_R_XMM0 + num; break;
    370     case REGCLASS_CR : reg = UD_R_CR0  + num; break;
    371     case REGCLASS_DB : reg = UD_R_DR0  + num; break;
    372     case REGCLASS_SEG : {
    373       /*
    374        * Only 6 segment registers, anything else is an error.
    375        */
    376       if ((num & 7) > 5) {
    377         UDERR(u, "invalid segment register value");
    378         return;
    379       } else {
    380         reg = UD_R_ES + (num & 7);
    381       }
    382       break;
    383     }
    384     default:
    385       UD_ASSERT(!"invalid register type");
    386       break;
    387   }
    388   opr->type = UD_OP_REG;
    389   opr->base = reg;
    390   opr->size = size;
    391 }
    392 
    393 
    394 /*
    395  * decode_imm
    396  *
    397  *    Decode Immediate values.
    398  */
    399 static void
    400 decode_imm(struct ud* u, unsigned int size, struct ud_operand *op)
    401 {
    402   op->size = resolve_operand_size(u, size);
    403   op->type = UD_OP_IMM;
    404 
    405   switch (op->size) {
    406   case  8: op->lval.sbyte = inp_uint8(u);   break;
    407   case 16: op->lval.uword = inp_uint16(u);  break;
    408   case 32: op->lval.udword = inp_uint32(u); break;
    409   case 64: op->lval.uqword = inp_uint64(u); break;
    410   default: return;
    411   }
    412 }
    413 
    414 
    415 /*
    416  * decode_mem_disp
    417  *
    418  *    Decode mem address displacement.
    419  */
    420 static void
    421 decode_mem_disp(struct ud* u, unsigned int size, struct ud_operand *op)
    422 {
    423   switch (size) {
    424   case 8:
    425     op->offset = 8;
    426     op->lval.ubyte  = inp_uint8(u);
    427     break;
    428   case 16:
    429     op->offset = 16;
    430     op->lval.uword  = inp_uint16(u);
    431     break;
    432   case 32:
    433     op->offset = 32;
    434     op->lval.udword = inp_uint32(u);
    435     break;
    436   case 64:
    437     op->offset = 64;
    438     op->lval.uqword = inp_uint64(u);
    439     break;
    440   default:
    441       return;
    442   }
    443 }
    444 
    445 
    446 /*
    447  * decode_modrm_reg
    448  *
    449  *    Decodes reg field of mod/rm byte
    450  *
    451  */
    452 static inline void
    453 decode_modrm_reg(struct ud         *u,
    454                  struct ud_operand *operand,
    455                  unsigned int       type,
    456                  unsigned int       size)
    457 {
    458   uint8_t reg = (REX_R(u->pfx_rex) << 3) | MODRM_REG(modrm(u));
    459   decode_reg(u, operand, type, reg, size);
    460 }
    461 
    462 
    463 /*
    464  * decode_modrm_rm
    465  *
    466  *    Decodes rm field of mod/rm byte
    467  *
    468  */
    469 static void
    470 decode_modrm_rm(struct ud         *u,
    471                 struct ud_operand *op,
    472                 unsigned char      type,    /* register type */
    473                 unsigned int       size)    /* operand size */
    474 
    475 {
    476   size_t offset = 0;
    477   unsigned char mod, rm;
    478 
    479   /* get mod, r/m and reg fields */
    480   mod = MODRM_MOD(modrm(u));
    481   rm  = (REX_B(u->pfx_rex) << 3) | MODRM_RM(modrm(u));
    482 
    483   /*
    484    * If mod is 11b, then the modrm.rm specifies a register.
    485    *
    486    */
    487   if (mod == 3) {
    488     decode_reg(u, op, type, rm, size);
    489     return;
    490   }
    491 
    492   /*
    493    * !11b => Memory Address
    494    */
    495   op->type = UD_OP_MEM;
    496   op->size = resolve_operand_size(u, size);
    497 
    498   if (u->adr_mode == 64) {
    499     op->base = UD_R_RAX + rm;
    500     if (mod == 1) {
    501       offset = 8;
    502     } else if (mod == 2) {
    503       offset = 32;
    504     } else if (mod == 0 && (rm & 7) == 5) {
    505       op->base = UD_R_RIP;
    506       offset = 32;
    507     } else {
    508       offset = 0;
    509     }
    510     /*
    511      * Scale-Index-Base (SIB)
    512      */
    513     if ((rm & 7) == 4) {
    514       ud_inp_next(u);
    515 
    516       op->scale = (1 << SIB_S(inp_curr(u))) & ~1;
    517       op->index = UD_R_RAX + (SIB_I(inp_curr(u)) | (REX_X(u->pfx_rex) << 3));
    518       op->base  = UD_R_RAX + (SIB_B(inp_curr(u)) | (REX_B(u->pfx_rex) << 3));
    519 
    520       /* special conditions for base reference */
    521       if (op->index == UD_R_RSP) {
    522         op->index = UD_NONE;
    523         op->scale = UD_NONE;
    524       }
    525 
    526       if (op->base == UD_R_RBP || op->base == UD_R_R13) {
    527         if (mod == 0) {
    528           op->base = UD_NONE;
    529         }
    530         if (mod == 1) {
    531           offset = 8;
    532         } else {
    533           offset = 32;
    534         }
    535       }
    536     }
    537   } else if (u->adr_mode == 32) {
    538     op->base = UD_R_EAX + rm;
    539     if (mod == 1) {
    540       offset = 8;
    541     } else if (mod == 2) {
    542       offset = 32;
    543     } else if (mod == 0 && rm == 5) {
    544       op->base = UD_NONE;
    545       offset = 32;
    546     } else {
    547       offset = 0;
    548     }
    549 
    550     /* Scale-Index-Base (SIB) */
    551     if ((rm & 7) == 4) {
    552       ud_inp_next(u);
    553 
    554       op->scale = (1 << SIB_S(inp_curr(u))) & ~1;
    555       op->index = UD_R_EAX + (SIB_I(inp_curr(u)) | (REX_X(u->pfx_rex) << 3));
    556       op->base  = UD_R_EAX + (SIB_B(inp_curr(u)) | (REX_B(u->pfx_rex) << 3));
    557 
    558       if (op->index == UD_R_ESP) {
    559         op->index = UD_NONE;
    560         op->scale = UD_NONE;
    561       }
    562 
    563       /* special condition for base reference */
    564       if (op->base == UD_R_EBP) {
    565         if (mod == 0) {
    566           op->base = UD_NONE;
    567         }
    568         if (mod == 1) {
    569           offset = 8;
    570         } else {
    571           offset = 32;
    572         }
    573       }
    574     }
    575   } else {
    576     const unsigned int bases[]   = { UD_R_BX, UD_R_BX, UD_R_BP, UD_R_BP,
    577                                      UD_R_SI, UD_R_DI, UD_R_BP, UD_R_BX };
    578     const unsigned int indices[] = { UD_R_SI, UD_R_DI, UD_R_SI, UD_R_DI,
    579                                      UD_NONE, UD_NONE, UD_NONE, UD_NONE };
    580     op->base  = bases[rm & 7];
    581     op->index = indices[rm & 7];
    582     if (mod == 0 && rm == 6) {
    583       offset = 16;
    584       op->base = UD_NONE;
    585     } else if (mod == 1) {
    586       offset = 8;
    587     } else if (mod == 2) {
    588       offset = 16;
    589     }
    590   }
    591 
    592   if (offset) {
    593     decode_mem_disp(u, offset, op);
    594   }
    595 }
    596 
    597 
    598 /*
    599  * decode_moffset
    600  *    Decode offset-only memory operand
    601  */
    602 static void
    603 decode_moffset(struct ud *u, unsigned int size, struct ud_operand *opr)
    604 {
    605   opr->type = UD_OP_MEM;
    606   opr->size = resolve_operand_size(u, size);
    607   decode_mem_disp(u, u->adr_mode, opr);
    608 }
    609 
    610 
    611 /* -----------------------------------------------------------------------------
    612  * decode_operands() - Disassembles Operands.
    613  * -----------------------------------------------------------------------------
    614  */
    615 static int
    616 decode_operand(struct ud           *u,
    617                struct ud_operand   *operand,
    618                enum ud_operand_code type,
    619                unsigned int         size)
    620 {
    621   operand->_oprcode = type;
    622 
    623   switch (type) {
    624     case OP_A :
    625       decode_a(u, operand);
    626       break;
    627     case OP_MR:
    628       decode_modrm_rm(u, operand, REGCLASS_GPR,
    629                       MODRM_MOD(modrm(u)) == 3 ?
    630                         Mx_reg_size(size) : Mx_mem_size(size));
    631       break;
    632     case OP_F:
    633       u->br_far  = 1;
    634       /* intended fall through */
    635     case OP_M:
    636       if (MODRM_MOD(modrm(u)) == 3) {
    637         UDERR(u, "expected modrm.mod != 3");
    638       }
    639       /* intended fall through */
    640     case OP_E:
    641       decode_modrm_rm(u, operand, REGCLASS_GPR, size);
    642       break;
    643     case OP_G:
    644       decode_modrm_reg(u, operand, REGCLASS_GPR, size);
    645       break;
    646     case OP_sI:
    647     case OP_I:
    648       decode_imm(u, size, operand);
    649       break;
    650     case OP_I1:
    651       operand->type = UD_OP_CONST;
    652       operand->lval.udword = 1;
    653       break;
    654     case OP_N:
    655       if (MODRM_MOD(modrm(u)) != 3) {
    656         UDERR(u, "expected modrm.mod == 3");
    657       }
    658       /* intended fall through */
    659     case OP_Q:
    660       decode_modrm_rm(u, operand, REGCLASS_MMX, size);
    661       break;
    662     case OP_P:
    663       decode_modrm_reg(u, operand, REGCLASS_MMX, size);
    664       break;
    665     case OP_U:
    666       if (MODRM_MOD(modrm(u)) != 3) {
    667         UDERR(u, "expected modrm.mod == 3");
    668       }
    669       /* intended fall through */
    670     case OP_W:
    671       decode_modrm_rm(u, operand, REGCLASS_XMM, size);
    672       break;
    673     case OP_V:
    674       decode_modrm_reg(u, operand, REGCLASS_XMM, size);
    675       break;
    676     case OP_MU:
    677       decode_modrm_rm(u, operand, REGCLASS_XMM,
    678                       MODRM_MOD(modrm(u)) == 3 ?
    679                         Mx_reg_size(size) : Mx_mem_size(size));
    680       break;
    681     case OP_S:
    682       decode_modrm_reg(u, operand, REGCLASS_SEG, size);
    683       break;
    684     case OP_O:
    685       decode_moffset(u, size, operand);
    686       break;
    687     case OP_R0:
    688     case OP_R1:
    689     case OP_R2:
    690     case OP_R3:
    691     case OP_R4:
    692     case OP_R5:
    693     case OP_R6:
    694     case OP_R7:
    695       decode_reg(u, operand, REGCLASS_GPR,
    696                  (REX_B(u->pfx_rex) << 3) | (type - OP_R0), size);
    697       break;
    698     case OP_AL:
    699     case OP_AX:
    700     case OP_eAX:
    701     case OP_rAX:
    702       decode_reg(u, operand, REGCLASS_GPR, 0, size);
    703       break;
    704     case OP_CL:
    705     case OP_CX:
    706     case OP_eCX:
    707       decode_reg(u, operand, REGCLASS_GPR, 1, size);
    708       break;
    709     case OP_DL:
    710     case OP_DX:
    711     case OP_eDX:
    712       decode_reg(u, operand, REGCLASS_GPR, 2, size);
    713       break;
    714     case OP_ES:
    715     case OP_CS:
    716     case OP_DS:
    717     case OP_SS:
    718     case OP_FS:
    719     case OP_GS:
    720       /* in 64bits mode, only fs and gs are allowed */
    721       if (u->dis_mode == 64) {
    722         if (type != OP_FS && type != OP_GS) {
    723           UDERR(u, "invalid segment register in 64bits");
    724         }
    725       }
    726       operand->type = UD_OP_REG;
    727       operand->base = (type - OP_ES) + UD_R_ES;
    728       operand->size = 16;
    729       break;
    730     case OP_J :
    731       decode_imm(u, size, operand);
    732       operand->type = UD_OP_JIMM;
    733       break ;
    734     case OP_R :
    735       if (MODRM_MOD(modrm(u)) != 3) {
    736         UDERR(u, "expected modrm.mod == 3");
    737       }
    738       decode_modrm_rm(u, operand, REGCLASS_GPR, size);
    739       break;
    740     case OP_C:
    741       decode_modrm_reg(u, operand, REGCLASS_CR, size);
    742       break;
    743     case OP_D:
    744       decode_modrm_reg(u, operand, REGCLASS_DB, size);
    745       break;
    746     case OP_I3 :
    747       operand->type = UD_OP_CONST;
    748       operand->lval.sbyte = 3;
    749       break;
    750     case OP_ST0:
    751     case OP_ST1:
    752     case OP_ST2:
    753     case OP_ST3:
    754     case OP_ST4:
    755     case OP_ST5:
    756     case OP_ST6:
    757     case OP_ST7:
    758       operand->type = UD_OP_REG;
    759       operand->base = (type - OP_ST0) + UD_R_ST0;
    760       operand->size = 80;
    761       break;
    762     default :
    763       break;
    764   }
    765   return 0;
    766 }
    767 
    768 
    769 /*
    770  * decode_operands
    771  *
    772  *    Disassemble upto 3 operands of the current instruction being
    773  *    disassembled. By the end of the function, the operand fields
    774  *    of the ud structure will have been filled.
    775  */
    776 static int
    777 decode_operands(struct ud* u)
    778 {
    779   decode_operand(u, &u->operand[0],
    780                     u->itab_entry->operand1.type,
    781                     u->itab_entry->operand1.size);
    782   decode_operand(u, &u->operand[1],
    783                     u->itab_entry->operand2.type,
    784                     u->itab_entry->operand2.size);
    785   decode_operand(u, &u->operand[2],
    786                     u->itab_entry->operand3.type,
    787                     u->itab_entry->operand3.size);
    788   return 0;
    789 }
    790 
    791 /* -----------------------------------------------------------------------------
    792  * clear_insn() - clear instruction structure
    793  * -----------------------------------------------------------------------------
    794  */
    795 static void
    796 clear_insn(register struct ud* u)
    797 {
    798   u->error     = 0;
    799   u->pfx_seg   = 0;
    800   u->pfx_opr   = 0;
    801   u->pfx_adr   = 0;
    802   u->pfx_lock  = 0;
    803   u->pfx_repne = 0;
    804   u->pfx_rep   = 0;
    805   u->pfx_repe  = 0;
    806   u->pfx_rex   = 0;
    807   u->pfx_str   = 0;
    808   u->mnemonic  = UD_Inone;
    809   u->itab_entry = NULL;
    810   u->have_modrm = 0;
    811   u->br_far    = 0;
    812 
    813   memset( &u->operand[ 0 ], 0, sizeof( struct ud_operand ) );
    814   memset( &u->operand[ 1 ], 0, sizeof( struct ud_operand ) );
    815   memset( &u->operand[ 2 ], 0, sizeof( struct ud_operand ) );
    816 }
    817 
    818 
    819 static inline int
    820 resolve_pfx_str(struct ud* u)
    821 {
    822   if (u->pfx_str == 0xf3) {
    823     if (P_STR(u->itab_entry->prefix)) {
    824         u->pfx_rep  = 0xf3;
    825     } else {
    826         u->pfx_repe = 0xf3;
    827     }
    828   } else if (u->pfx_str == 0xf2) {
    829     u->pfx_repne = 0xf3;
    830   }
    831   return 0;
    832 }
    833 
    834 
    835 static int
    836 resolve_mode( struct ud* u )
    837 {
    838   /* if in error state, bail out */
    839   if ( u->error ) return -1;
    840 
    841   /* propagate prefix effects */
    842   if ( u->dis_mode == 64 ) {  /* set 64bit-mode flags */
    843 
    844     /* Check validity of  instruction m64 */
    845     if ( P_INV64( u->itab_entry->prefix ) ) {
    846       UDERR(u, "instruction invalid in 64bits");
    847       return -1;
    848     }
    849 
    850     /* effective rex prefix is the  effective mask for the
    851      * instruction hard-coded in the opcode map.
    852      */
    853     u->pfx_rex = ( u->pfx_rex & 0x40 ) |
    854                  ( u->pfx_rex & REX_PFX_MASK( u->itab_entry->prefix ) );
    855 
    856     /* whether this instruction has a default operand size of
    857      * 64bit, also hardcoded into the opcode map.
    858      */
    859     u->default64 = P_DEF64( u->itab_entry->prefix );
    860     /* calculate effective operand size */
    861     if ( REX_W( u->pfx_rex ) ) {
    862         u->opr_mode = 64;
    863     } else if ( u->pfx_opr ) {
    864         u->opr_mode = 16;
    865     } else {
    866         /* unless the default opr size of instruction is 64,
    867          * the effective operand size in the absence of rex.w
    868          * prefix is 32.
    869          */
    870         u->opr_mode = ( u->default64 ) ? 64 : 32;
    871     }
    872 
    873     /* calculate effective address size */
    874     u->adr_mode = (u->pfx_adr) ? 32 : 64;
    875   } else if ( u->dis_mode == 32 ) { /* set 32bit-mode flags */
    876     u->opr_mode = ( u->pfx_opr ) ? 16 : 32;
    877     u->adr_mode = ( u->pfx_adr ) ? 16 : 32;
    878   } else if ( u->dis_mode == 16 ) { /* set 16bit-mode flags */
    879     u->opr_mode = ( u->pfx_opr ) ? 32 : 16;
    880     u->adr_mode = ( u->pfx_adr ) ? 32 : 16;
    881   }
    882 
    883   /* set flags for implicit addressing */
    884   u->implicit_addr = P_IMPADDR( u->itab_entry->prefix );
    885 
    886   return 0;
    887 }
    888 
    889 
    890 static inline int
    891 decode_insn(struct ud *u, uint16_t ptr)
    892 {
    893   UD_ASSERT((ptr & 0x8000) == 0);
    894   u->itab_entry = &ud_itab[ ptr ];
    895   u->mnemonic = u->itab_entry->mnemonic;
    896   return (resolve_pfx_str(u)  == 0 &&
    897           resolve_mode(u)     == 0 &&
    898           decode_operands(u)  == 0 &&
    899           resolve_mnemonic(u) == 0) ? 0 : -1;
    900 }
    901 
    902 
    903 /*
    904  * decode_3dnow()
    905  *
    906  *    Decoding 3dnow is a little tricky because of its strange opcode
    907  *    structure. The final opcode disambiguation depends on the last
    908  *    byte that comes after the operands have been decoded. Fortunately,
    909  *    all 3dnow instructions have the same set of operand types. So we
    910  *    go ahead and decode the instruction by picking an arbitrarily chosen
    911  *    valid entry in the table, decode the operands, and read the final
    912  *    byte to resolve the menmonic.
    913  */
    914 static inline int
    915 decode_3dnow(struct ud* u)
    916 {
    917   uint16_t ptr;
    918   UD_ASSERT(u->le->type == UD_TAB__OPC_3DNOW);
    919   UD_ASSERT(u->le->table[0xc] != 0);
    920   decode_insn(u, u->le->table[0xc]);
    921   ud_inp_next(u);
    922   if (u->error) {
    923     return -1;
    924   }
    925   ptr = u->le->table[inp_curr(u)];
    926   UD_ASSERT((ptr & 0x8000) == 0);
    927   u->mnemonic = ud_itab[ptr].mnemonic;
    928   return 0;
    929 }
    930 
    931 
    932 static int
    933 decode_ssepfx(struct ud *u)
    934 {
    935   uint8_t idx;
    936   uint8_t pfx;
    937 
    938   /*
    939    * String prefixes (f2, f3) take precedence over operand
    940    * size prefix (66).
    941    */
    942   pfx = u->pfx_str;
    943   if (pfx == 0) {
    944     pfx = u->pfx_opr;
    945   }
    946   idx = ((pfx & 0xf) + 1) / 2;
    947   if (u->le->table[idx] == 0) {
    948     idx = 0;
    949   }
    950   if (idx && u->le->table[idx] != 0) {
    951     /*
    952      * "Consume" the prefix as a part of the opcode, so it is no
    953      * longer exported as an instruction prefix.
    954      */
    955     u->pfx_str = 0;
    956     if (pfx == 0x66) {
    957         /*
    958          * consume "66" only if it was used for decoding, leaving
    959          * it to be used as an operands size override for some
    960          * simd instructions.
    961          */
    962         u->pfx_opr = 0;
    963     }
    964   }
    965   return decode_ext(u, u->le->table[idx]);
    966 }
    967 
    968 
    969 /*
    970  * decode_ext()
    971  *
    972  *    Decode opcode extensions (if any)
    973  */
    974 static int
    975 decode_ext(struct ud *u, uint16_t ptr)
    976 {
    977   uint8_t idx = 0;
    978   if ((ptr & 0x8000) == 0) {
    979     return decode_insn(u, ptr);
    980   }
    981   u->le = &ud_lookup_table_list[(~0x8000 & ptr)];
    982   if (u->le->type == UD_TAB__OPC_3DNOW) {
    983     return decode_3dnow(u);
    984   }
    985 
    986   switch (u->le->type) {
    987     case UD_TAB__OPC_MOD:
    988       /* !11 = 0, 11 = 1 */
    989       idx = (MODRM_MOD(modrm(u)) + 1) / 4;
    990       break;
    991       /* disassembly mode/operand size/address size based tables.
    992        * 16 = 0,, 32 = 1, 64 = 2
    993        */
    994     case UD_TAB__OPC_MODE:
    995       idx = u->dis_mode != 64 ? 0 : 1;
    996       break;
    997     case UD_TAB__OPC_OSIZE:
    998       idx = eff_opr_mode(u->dis_mode, REX_W(u->pfx_rex), u->pfx_opr) / 32;
    999       break;
   1000     case UD_TAB__OPC_ASIZE:
   1001       idx = eff_adr_mode(u->dis_mode, u->pfx_adr) / 32;
   1002       break;
   1003     case UD_TAB__OPC_X87:
   1004       idx = modrm(u) - 0xC0;
   1005       break;
   1006     case UD_TAB__OPC_VENDOR:
   1007       if (u->vendor == UD_VENDOR_ANY) {
   1008         /* choose a valid entry */
   1009         idx = (u->le->table[idx] != 0) ? 0 : 1;
   1010       } else if (u->vendor == UD_VENDOR_AMD) {
   1011         idx = 0;
   1012       } else {
   1013         idx = 1;
   1014       }
   1015       break;
   1016     case UD_TAB__OPC_RM:
   1017       idx = MODRM_RM(modrm(u));
   1018       break;
   1019     case UD_TAB__OPC_REG:
   1020       idx = MODRM_REG(modrm(u));
   1021       break;
   1022     case UD_TAB__OPC_SSE:
   1023       return decode_ssepfx(u);
   1024     default:
   1025       UD_ASSERT(!"not reached");
   1026       break;
   1027   }
   1028 
   1029   return decode_ext(u, u->le->table[idx]);
   1030 }
   1031 
   1032 
   1033 static int
   1034 decode_opcode(struct ud *u)
   1035 {
   1036   uint16_t ptr;
   1037   UD_ASSERT(u->le->type == UD_TAB__OPC_TABLE);
   1038   ud_inp_next(u);
   1039   if (u->error) {
   1040     return -1;
   1041   }
   1042   u->primary_opcode = inp_curr(u);
   1043   ptr = u->le->table[inp_curr(u)];
   1044   if (ptr & 0x8000) {
   1045     u->le = &ud_lookup_table_list[ptr & ~0x8000];
   1046     if (u->le->type == UD_TAB__OPC_TABLE) {
   1047       return decode_opcode(u);
   1048     }
   1049   }
   1050   return decode_ext(u, ptr);
   1051 }
   1052 
   1053 
   1054 /* =============================================================================
   1055  * ud_decode() - Instruction decoder. Returns the number of bytes decoded.
   1056  * =============================================================================
   1057  */
   1058 unsigned int
   1059 ud_decode(struct ud *u)
   1060 {
   1061   inp_start(u);
   1062   clear_insn(u);
   1063   u->le = &ud_lookup_table_list[0];
   1064   u->error = decode_prefixes(u) == -1 ||
   1065              decode_opcode(u)   == -1 ||
   1066              u->error;
   1067   /* Handle decode error. */
   1068   if (u->error) {
   1069     /* clear out the decode data. */
   1070     clear_insn(u);
   1071     /* mark the sequence of bytes as invalid. */
   1072     u->itab_entry = &ud_itab[0]; /* entry 0 is invalid */
   1073     u->mnemonic = u->itab_entry->mnemonic;
   1074   }
   1075 
   1076     /* maybe this stray segment override byte
   1077      * should be spewed out?
   1078      */
   1079     if ( !P_SEG( u->itab_entry->prefix ) &&
   1080             u->operand[0].type != UD_OP_MEM &&
   1081             u->operand[1].type != UD_OP_MEM )
   1082         u->pfx_seg = 0;
   1083 
   1084   u->insn_offset = u->pc; /* set offset of instruction */
   1085   u->asm_buf_fill = 0;   /* set translation buffer index to 0 */
   1086   u->pc += u->inp_ctr;    /* move program counter by bytes decoded */
   1087 
   1088   /* return number of bytes disassembled. */
   1089   return u->inp_ctr;
   1090 }
   1091 
   1092 /*
   1093 vim: set ts=2 sw=2 expandtab
   1094 */
   1095