Home | History | Annotate | Download | only in libenc
      1 /*
      2  *  Licensed to the Apache Software Foundation (ASF) under one or more
      3  *  contributor license agreements.  See the NOTICE file distributed with
      4  *  this work for additional information regarding copyright ownership.
      5  *  The ASF licenses this file to You under the Apache License, Version 2.0
      6  *  (the "License"); you may not use this file except in compliance with
      7  *  the License.  You may obtain a copy of the License at
      8  *
      9  *     http://www.apache.org/licenses/LICENSE-2.0
     10  *
     11  *  Unless required by applicable law or agreed to in writing, software
     12  *  distributed under the License is distributed on an "AS IS" BASIS,
     13  *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     14  *  See the License for the specific language governing permissions and
     15  *  limitations under the License.
     16  */
     17 /**
     18  * @author Alexander V. Astapchuk
     19  */
     20 
     21 /**
     22  * @file
     23  * @brief Main decoding (disassembling) routines implementation.
     24  */
     25 
     26 #include "dec_base.h"
     27 #include "enc_prvt.h"
     28 #include <stdio.h>
     29 //#include "open/common.h"
     30 
     31 bool DecoderBase::is_prefix(const unsigned char * bytes)
     32 {
     33     unsigned char b0 = *bytes;
     34     unsigned char b1 = *(bytes+1);
     35     if (b0 == 0xF0) { // LOCK
     36         return true;
     37     }
     38     if (b0==0xF2 || b0==0xF3) { // REPNZ/REPZ prefixes
     39         if (b1 == 0x0F) {   // .... but may be a part of SIMD opcode
     40             return false;
     41         }
     42         return true;
     43     }
     44     if (b0 == 0x2E || b0 == 0x36 || b0==0x3E || b0==0x26 || b0==0x64 || b0==0x3E) {
     45         // branch hints, segment prefixes
     46         return true;
     47     }
     48     if (b0==0x66) { // operand-size prefix
     49         if (b1 == 0x0F) {   // .... but may be a part of SIMD opcode
     50             return false;
     51         }
     52         return false; //XXX - currently considered as part of opcode//true;
     53     }
     54     if (b0==0x67) { // address size prefix
     55         return true;
     56     }
     57     return false;
     58 }
     59 
     60 // Returns prefix count from 0 to 4, or ((unsigned int)-1) on error
     61 unsigned int DecoderBase::fill_prefs(const unsigned char * bytes, Inst * pinst)
     62 {
     63     const unsigned char * my_bytes = bytes;
     64 
     65     while( 1 )
     66     {
     67         unsigned char by1 = *my_bytes;
     68         unsigned char by2 = *(my_bytes + 1);
     69         Inst::PrefGroups where;
     70 
     71         switch( by1 )
     72         {
     73         case InstPrefix_REPNE:
     74         case InstPrefix_REP:
     75         {
     76             if( 0x0F == by2)
     77             {
     78                 return pinst->prefc;
     79             }
     80         }
     81         case InstPrefix_LOCK:
     82         {
     83             where = Inst::Group1;
     84             break;
     85         }
     86         case InstPrefix_CS:
     87         case InstPrefix_SS:
     88         case InstPrefix_DS:
     89         case InstPrefix_ES:
     90         case InstPrefix_FS:
     91         case InstPrefix_GS:
     92 //      case InstPrefix_HintTaken: the same as CS override
     93 //      case InstPrefix_HintNotTaken: the same as DS override
     94         {
     95             where = Inst::Group2;
     96             break;
     97         }
     98         case InstPrefix_OpndSize:
     99         {
    100 //NOTE:   prefix does not work for JMP Sz16, the opcode is 0x66 0xe9
    101 //        here 0x66 will be treated as prefix, try_mn will try to match the code starting at 0xe9
    102 //        it will match JMP Sz32 ...
    103 //HACK:   assume it is the last prefix, return any way
    104             if( 0x0F == by2)
    105             {
    106                 return pinst->prefc;
    107             }
    108             return pinst->prefc;
    109             where = Inst::Group3;
    110             break;
    111         }
    112         case InstPrefix_AddrSize:
    113         {
    114             where = Inst::Group4;
    115             break;
    116         }
    117         default:
    118         {
    119             return pinst->prefc;
    120         }
    121         }
    122         // Assertions are not allowed here.
    123         // Error situations should result in returning error status
    124         if (InstPrefix_Null != pinst->pref[where]) //only one prefix in each group
    125             return (unsigned int)-1;
    126 
    127         pinst->pref[where] = (InstPrefix)by1;
    128 
    129         if (pinst->prefc >= 4) //no more than 4 prefixes
    130             return (unsigned int)-1;
    131 
    132         pinst->prefc++;
    133         ++my_bytes;
    134     }
    135 }
    136 
    137 
    138 
    139 unsigned DecoderBase::decode(const void * addr, Inst * pinst)
    140 {
    141     Inst tmp;
    142 
    143     //assert( *(unsigned char*)addr != 0x66);
    144 
    145     const unsigned char * bytes = (unsigned char*)addr;
    146 
    147     // Load up to 4 prefixes
    148     // for each Mnemonic
    149     unsigned int pref_count = fill_prefs(bytes, &tmp);
    150 
    151     if (pref_count == (unsigned int)-1) // Wrong prefix sequence, or >4 prefixes
    152         return 0; // Error
    153 
    154     bytes += pref_count;
    155 
    156     //  for each opcodedesc
    157     //      if (raw_len == 0) memcmp(, raw_len)
    158     //  else check the mixed state which is one of the following:
    159     //      /digit /i /rw /rd /rb
    160 
    161     bool found = false;
    162     const unsigned char * saveBytes = bytes;
    163     for (unsigned mn=1; mn<Mnemonic_Count; mn++) {
    164         bytes = saveBytes;
    165         found=try_mn((Mnemonic)mn, &bytes, &tmp);
    166         if (found) {
    167             tmp.mn = (Mnemonic)mn;
    168             break;
    169         }
    170     }
    171     if (!found) {
    172         // Unknown opcode
    173         return 0;
    174     }
    175     tmp.size = (unsigned)(bytes-(const unsigned char*)addr);
    176     if (pinst) {
    177         *pinst = tmp;
    178     }
    179     return tmp.size;
    180 }
    181 
    182 #ifdef _EM64T_
    183 #define EXTEND_REG(reg, flag)                        \
    184     ((NULL == rex || 0 == rex->flag) ? reg : (reg + 8))
    185 #else
    186 #define EXTEND_REG(reg, flag) (reg)
    187 #endif
    188 
    189 //don't know the use of rex, seems not used when _EM64T_ is not enabled
    190 bool DecoderBase::decode_aux(const EncoderBase::OpcodeDesc& odesc, unsigned aux,
    191     const unsigned char ** pbuf, Inst * pinst
    192 #ifdef _EM64T_
    193     , const Rex UNREF *rex
    194 #endif
    195     )
    196 {
    197     OpcodeByteKind kind = (OpcodeByteKind)(aux & OpcodeByteKind_KindMask);
    198     unsigned byte = (aux & OpcodeByteKind_OpcodeMask);
    199     unsigned data_byte = **pbuf;
    200     EncoderBase::Operand& opnd = pinst->operands[pinst->argc];
    201     const EncoderBase::OpndDesc& opndDesc = odesc.opnds[pinst->argc];
    202 
    203     switch (kind) {
    204     case OpcodeByteKind_SlashR:
    205         {
    206             RegName reg;
    207             OpndKind okind;
    208             const ModRM& modrm = *(ModRM*)*pbuf;
    209             if (opndDesc.kind & OpndKind_Mem) { // 1st operand is memory
    210 #ifdef _EM64T_
    211                 decodeModRM(odesc, pbuf, pinst, rex);
    212 #else
    213                 decodeModRM(odesc, pbuf, pinst);
    214 #endif
    215                 ++pinst->argc;
    216                 const EncoderBase::OpndDesc& opndDesc2 = odesc.opnds[pinst->argc];
    217                 okind = ((opndDesc2.kind & OpndKind_XMMReg) || opndDesc2.size==OpndSize_64) ? OpndKind_XMMReg : OpndKind_GPReg;
    218                 EncoderBase::Operand& regOpnd = pinst->operands[pinst->argc];
    219                 reg = getRegName(okind, opndDesc2.size, EXTEND_REG(modrm.reg, r));
    220                 regOpnd = EncoderBase::Operand(reg);
    221             } else {                            // 2nd operand is memory
    222                 okind = ((opndDesc.kind & OpndKind_XMMReg) || opndDesc.size==OpndSize_64) ? OpndKind_XMMReg : OpndKind_GPReg;
    223                 EncoderBase::Operand& regOpnd = pinst->operands[pinst->argc];
    224                 reg = getRegName(okind, opndDesc.size, EXTEND_REG(modrm.reg, r));
    225                 regOpnd = EncoderBase::Operand(reg);
    226                 ++pinst->argc;
    227 #ifdef _EM64T_
    228                 decodeModRM(odesc, pbuf, pinst, rex);
    229 #else
    230                 decodeModRM(odesc, pbuf, pinst);
    231 #endif
    232             }
    233             ++pinst->argc;
    234         }
    235         return true;
    236     case OpcodeByteKind_rb:
    237     case OpcodeByteKind_rw:
    238     case OpcodeByteKind_rd:
    239         {
    240             // Gregory -
    241             // Here we don't parse register because for current needs
    242             // disassembler doesn't require to parse all operands
    243             unsigned regid = data_byte - byte;
    244             if (regid>7) {
    245                 return false;
    246             }
    247             OpndSize opnd_size;
    248             switch(kind)
    249             {
    250             case OpcodeByteKind_rb:
    251             {
    252                 opnd_size = OpndSize_8;
    253                 break;
    254             }
    255             case OpcodeByteKind_rw:
    256             {
    257                 opnd_size = OpndSize_16;
    258                 break;
    259             }
    260             case OpcodeByteKind_rd:
    261             {
    262                 opnd_size = OpndSize_32;
    263                 break;
    264             }
    265             default:
    266                 opnd_size = OpndSize_32;  // so there is no compiler warning
    267                 assert( false );
    268             }
    269             opnd = EncoderBase::Operand( getRegName(OpndKind_GPReg, opnd_size, regid) );
    270 
    271             ++pinst->argc;
    272             ++*pbuf;
    273             return true;
    274         }
    275     case OpcodeByteKind_cb:
    276         {
    277         char offset = *(char*)*pbuf;
    278         *pbuf += 1;
    279         opnd = EncoderBase::Operand(offset);
    280         ++pinst->argc;
    281         //pinst->direct_addr = (void*)(pinst->offset + *pbuf);
    282         }
    283         return true;
    284     case OpcodeByteKind_cw:
    285         // not an error, but not expected in current env
    286         // Android x86
    287         {
    288         short offset = *(short*)*pbuf;
    289         *pbuf += 2;
    290         opnd = EncoderBase::Operand(offset);
    291         ++pinst->argc;
    292         }
    293         return true;
    294         //return false;
    295     case OpcodeByteKind_cd:
    296         {
    297         int offset = *(int*)*pbuf;
    298         *pbuf += 4;
    299         opnd = EncoderBase::Operand(offset);
    300         ++pinst->argc;
    301         }
    302         return true;
    303     case OpcodeByteKind_SlashNum:
    304         {
    305         const ModRM& modrm = *(ModRM*)*pbuf;
    306         if (modrm.reg != byte) {
    307             return false;
    308         }
    309         decodeModRM(odesc, pbuf, pinst
    310 #ifdef _EM64T_
    311                         , rex
    312 #endif
    313                         );
    314         ++pinst->argc;
    315         }
    316         return true;
    317     case OpcodeByteKind_ib:
    318         {
    319         char ival = *(char*)*pbuf;
    320         opnd = EncoderBase::Operand(ival);
    321         ++pinst->argc;
    322         *pbuf += 1;
    323         }
    324         return true;
    325     case OpcodeByteKind_iw:
    326         {
    327         short ival = *(short*)*pbuf;
    328         opnd = EncoderBase::Operand(ival);
    329         ++pinst->argc;
    330         *pbuf += 2;
    331         }
    332         return true;
    333     case OpcodeByteKind_id:
    334         {
    335         int ival = *(int*)*pbuf;
    336         opnd = EncoderBase::Operand(ival);
    337         ++pinst->argc;
    338         *pbuf += 4;
    339         }
    340         return true;
    341 #ifdef _EM64T_
    342     case OpcodeByteKind_io:
    343         {
    344         long long int ival = *(long long int*)*pbuf;
    345         opnd = EncoderBase::Operand(OpndSize_64, ival);
    346         ++pinst->argc;
    347         *pbuf += 8;
    348         }
    349         return true;
    350 #endif
    351     case OpcodeByteKind_plus_i:
    352         {
    353             unsigned regid = data_byte - byte;
    354             if (regid>7) {
    355                 return false;
    356             }
    357             ++*pbuf;
    358             return true;
    359         }
    360     case OpcodeByteKind_ZeroOpcodeByte: // cant be here
    361         return false;
    362     default:
    363         // unknown kind ? how comes ?
    364         break;
    365     }
    366     return false;
    367 }
    368 
    369 bool DecoderBase::try_mn(Mnemonic mn, const unsigned char ** pbuf, Inst * pinst) {
    370     const unsigned char * save_pbuf = *pbuf;
    371     EncoderBase::OpcodeDesc * opcodes = EncoderBase::opcodes[mn];
    372 
    373     for (unsigned i=0; !opcodes[i].last; i++) {
    374         const EncoderBase::OpcodeDesc& odesc = opcodes[i];
    375         char *opcode_ptr = const_cast<char *>(odesc.opcode);
    376         int opcode_len = odesc.opcode_len;
    377 #ifdef _EM64T_
    378         Rex *prex = NULL;
    379         Rex rex;
    380 #endif
    381 
    382         *pbuf = save_pbuf;
    383 #ifdef _EM64T_
    384         // Match REX prefixes
    385         unsigned char rex_byte = (*pbuf)[0];
    386         if ((rex_byte & 0xf0) == 0x40)
    387         {
    388             if ((rex_byte & 0x08) != 0)
    389             {
    390                 // Have REX.W
    391                 if (opcode_len > 0 && opcode_ptr[0] == 0x48)
    392                 {
    393                     // Have REX.W in opcode. All mnemonics that allow
    394                     // REX.W have to have specified it in opcode,
    395                     // otherwise it is not allowed
    396                     rex = *(Rex *)*pbuf;
    397                     prex = &rex;
    398                     (*pbuf)++;
    399                     opcode_ptr++;
    400                     opcode_len--;
    401                 }
    402             }
    403             else
    404             {
    405                 // No REX.W, so it doesn't have to be in opcode. We
    406                 // have REX.B, REX.X, REX.R or their combination, but
    407                 // not in opcode, they may extend any part of the
    408                 // instruction
    409                 rex = *(Rex *)*pbuf;
    410                 prex = &rex;
    411                 (*pbuf)++;
    412             }
    413         }
    414 #endif
    415         if (opcode_len != 0) {
    416             if (memcmp(*pbuf, opcode_ptr, opcode_len)) {
    417                 continue;
    418             }
    419             *pbuf += opcode_len;
    420         }
    421         if (odesc.aux0 != 0) {
    422 
    423             if (!decode_aux(odesc, odesc.aux0, pbuf, pinst
    424 #ifdef _EM64T_
    425                             , prex
    426 #endif
    427                             )) {
    428                 continue;
    429             }
    430             if (odesc.aux1 != 0) {
    431                 if (!decode_aux(odesc, odesc.aux1, pbuf, pinst
    432 #ifdef _EM64T_
    433                             , prex
    434 #endif
    435                             )) {
    436                     continue;
    437                 }
    438             }
    439             pinst->odesc = &opcodes[i];
    440             return true;
    441         }
    442         else {
    443             // Can't have empty opcode
    444             assert(opcode_len != 0);
    445             pinst->odesc = &opcodes[i];
    446             return true;
    447         }
    448     }
    449     return false;
    450 }
    451 
    452 bool DecoderBase::decodeModRM(const EncoderBase::OpcodeDesc& odesc,
    453     const unsigned char ** pbuf, Inst * pinst
    454 #ifdef _EM64T_
    455     , const Rex *rex
    456 #endif
    457     )
    458 {
    459     EncoderBase::Operand& opnd = pinst->operands[pinst->argc];
    460     const EncoderBase::OpndDesc& opndDesc = odesc.opnds[pinst->argc];
    461 
    462     //XXX debug ///assert(0x66 != *(*pbuf-2));
    463     const ModRM& modrm = *(ModRM*)*pbuf;
    464     *pbuf += 1;
    465 
    466     RegName base = RegName_Null;
    467     RegName index = RegName_Null;
    468     int disp = 0;
    469     unsigned scale = 0;
    470 
    471     // On x86_64 all mnemonics that allow REX.W have REX.W in opcode.
    472     // Therefore REX.W is simply ignored, and opndDesc.size is used
    473 
    474     if (modrm.mod == 3) {
    475         // we have only modrm. no sib, no disp.
    476         // Android x86: Use XMMReg for 64b operand.
    477         OpndKind okind = ((opndDesc.kind & OpndKind_XMMReg) || opndDesc.size == OpndSize_64) ? OpndKind_XMMReg : OpndKind_GPReg;
    478         RegName reg = getRegName(okind, opndDesc.size, EXTEND_REG(modrm.rm, b));
    479         opnd = EncoderBase::Operand(reg);
    480         return true;
    481     }
    482     //Android x86: m16, m32, m64: mean a byte[word|doubleword] operand in memory
    483     //base and index should be 32 bits!!!
    484     const SIB& sib = *(SIB*)*pbuf;
    485     // check whether we have a sib
    486     if (modrm.rm == 4) {
    487         // yes, we have SIB
    488         *pbuf += 1;
    489         // scale = sib.scale == 0 ? 0 : (1<<sib.scale);
    490         scale = (1<<sib.scale);
    491         if (sib.index != 4) {
    492             index = getRegName(OpndKind_GPReg, OpndSize_32, EXTEND_REG(sib.index, x)); //Android x86: OpndDesc.size
    493         } else {
    494             // (sib.index == 4) => no index
    495             //%esp can't be sib.index
    496         }
    497 
    498         if (sib.base != 5 || modrm.mod != 0) {
    499             base = getRegName(OpndKind_GPReg, OpndSize_32, EXTEND_REG(sib.base, b)); //Android x86: OpndDesc.size
    500         } else {
    501             // (sib.base == 5 && modrm.mod == 0) => no base
    502         }
    503     }
    504     else {
    505         if (modrm.mod != 0 || modrm.rm != 5) {
    506             base = getRegName(OpndKind_GPReg, OpndSize_32, EXTEND_REG(modrm.rm, b)); //Android x86: OpndDesc.size
    507         }
    508         else {
    509             // mod=0 && rm == 5 => only disp32
    510         }
    511     }
    512 
    513     //update disp and pbuf
    514     if (modrm.mod == 2) {
    515         // have disp32
    516         disp = *(int*)*pbuf;
    517         *pbuf += 4;
    518     }
    519     else if (modrm.mod == 1) {
    520         // have disp8
    521         disp = *(char*)*pbuf;
    522         *pbuf += 1;
    523     }
    524     else {
    525         assert(modrm.mod == 0);
    526         if (modrm.rm == 5) {
    527             // have disp32 w/o sib
    528             disp = *(int*)*pbuf;
    529             *pbuf += 4;
    530         }
    531         else if (modrm.rm == 4 && sib.base == 5) {
    532             // have disp32 with SI in sib
    533             disp = *(int*)*pbuf;
    534             *pbuf += 4;
    535         }
    536     }
    537     opnd = EncoderBase::Operand(opndDesc.size, base, index, scale, disp);
    538     return true;
    539 }
    540 
    541