Home | History | Annotate | Download | only in libenc
      1 /*
      2  *  Licensed to the Apache Software Foundation (ASF) under one or more
      3  *  contributor license agreements.  See the NOTICE file distributed with
      4  *  this work for additional information regarding copyright ownership.
      5  *  The ASF licenses this file to You under the Apache License, Version 2.0
      6  *  (the "License"); you may not use this file except in compliance with
      7  *  the License.  You may obtain a copy of the License at
      8  *
      9  *     http://www.apache.org/licenses/LICENSE-2.0
     10  *
     11  *  Unless required by applicable law or agreed to in writing, software
     12  *  distributed under the License is distributed on an "AS IS" BASIS,
     13  *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     14  *  See the License for the specific language governing permissions and
     15  *  limitations under the License.
     16  */
     17 /**
     18  * @author Alexander V. Astapchuk
     19  */
     20 #include "enc_base.h"
     21 //#include <climits>
     22 #include <string.h>
     23 #define USE_ENCODER_DEFINES
     24 #include "enc_prvt.h"
     25 #include <stdio.h>
     26 
     27 //#define JET_PROTO
     28 
     29 #ifdef JET_PROTO
     30 #include "dec_base.h"
     31 #include "jvmti_dasm.h"
     32 #endif
     33 
     34 ENCODER_NAMESPACE_START
     35 
     36 /**
     37  * @file
     38  * @brief Main encoding routines and structures.
     39  */
     40 
     41 #ifndef _WIN32
     42     #define strcmpi strcasecmp
     43 #endif
     44 
     45 int EncoderBase::dummy = EncoderBase::buildTable();
     46 
     47 const unsigned char EncoderBase::size_hash[OpndSize_64+1] = {
     48     //
     49     0xFF,   // OpndSize_Null        = 0,
     50     3,              // OpndSize_8           = 0x1,
     51     2,              // OpndSize_16          = 0x2,
     52     0xFF,   // 0x3
     53     1,              // OpndSize_32          = 0x4,
     54     0xFF,   // 0x5
     55     0xFF,   // 0x6
     56     0xFF,   // 0x7
     57     0,              // OpndSize_64          = 0x8,
     58     //
     59 };
     60 
     61 const unsigned char EncoderBase::kind_hash[OpndKind_Mem+1] = {
     62     //
     63     //gp reg                -> 000 = 0
     64     //memory                -> 001 = 1
     65     //immediate             -> 010 = 2
     66     //xmm reg               -> 011 = 3
     67     //segment regs  -> 100 = 4
     68     //fp reg                -> 101 = 5
     69     //mmx reg               -> 110 = 6
     70     //
     71     0xFF,                          // 0    OpndKind_Null=0,
     72     0<<2,                          // 1    OpndKind_GPReg =
     73                                    //           OpndKind_MinRegKind=0x1,
     74     4<<2,                          // 2    OpndKind_SReg=0x2,
     75 
     76 #ifdef _HAVE_MMX_
     77     6<<2,                          // 3
     78 #else
     79     0xFF,                          // 3
     80 #endif
     81 
     82     5<<2,                          // 4    OpndKind_FPReg=0x4,
     83     0xFF, 0xFF, 0xFF,              // 5, 6, 7
     84     3<<2,                                   //      OpndKind_XMMReg=0x8,
     85     0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // 9, 0xA, 0xB, 0xC, 0xD,
     86                                               // 0xE, 0xF
     87     0xFF,                          // OpndKind_MaxRegKind =
     88                                    // OpndKind_StatusReg =
     89                                    // OpndKind_OtherReg=0x10,
     90     0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // 0x11-0x18
     91     0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,               // 0x19-0x1F
     92     2<<2,                                   // OpndKind_Immediate=0x20,
     93     0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // 0x21-0x28
     94     0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // 0x29-0x30
     95     0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // 0x31-0x38
     96     0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,               // 0x39-0x3F
     97     1<<2,                                   // OpndKind_Memory=0x40
     98 };
     99 
    100 char * EncoderBase::curRelOpnd[3];
    101 
    102 char* EncoderBase::encode_aux(char* stream, unsigned aux,
    103                               const Operands& opnds, const OpcodeDesc * odesc,
    104                               unsigned * pargsCount, Rex * prex)
    105 {
    106     const unsigned byte = aux;
    107     OpcodeByteKind kind = (OpcodeByteKind)(byte & OpcodeByteKind_KindMask);
    108     // The '>>' here is to force the switch to be table-based) instead of
    109     // set of CMP+Jcc.
    110     if (*pargsCount >= COUNTOF(opnds)) {
    111         assert(false);
    112         return stream;
    113     }
    114     switch(kind>>8) {
    115     case OpcodeByteKind_SlashR>>8:
    116         // /r - Indicates that the ModR/M byte of the instruction contains
    117         // both a register operand and an r/m operand.
    118         {
    119         assert(opnds.count() > 1);
    120     // not true anymore for MOVQ xmm<->r
    121         //assert((odesc->opnds[0].kind & OpndKind_Mem) ||
    122         //       (odesc->opnds[1].kind & OpndKind_Mem));
    123         unsigned memidx = odesc->opnds[0].kind & OpndKind_Mem ? 0 : 1;
    124         unsigned regidx = memidx == 0 ? 1 : 0;
    125         memidx += *pargsCount;
    126         regidx += *pargsCount;
    127         ModRM& modrm = *(ModRM*)stream;
    128         if (memidx >= COUNTOF(opnds) || regidx >= COUNTOF(opnds)) {
    129             assert(false);
    130             break;
    131         }
    132         if (opnds[memidx].is_mem()) {
    133             stream = encodeModRM(stream, opnds, memidx, odesc, prex);
    134         }
    135         else {
    136             modrm.mod = 3; // 11
    137             modrm.rm = getHWRegIndex(opnds[memidx].reg());
    138 #ifdef _EM64T_
    139             if (opnds[memidx].need_rex() && needs_rex_r(opnds[memidx].reg())) {
    140                 prex->b = 1;
    141             }
    142 #endif
    143             ++stream;
    144         }
    145         modrm.reg = getHWRegIndex(opnds[regidx].reg());
    146 #ifdef _EM64T_
    147         if (opnds[regidx].need_rex() && needs_rex_r(opnds[regidx].reg())) {
    148             prex->r = 1;
    149         }
    150 #endif
    151         *pargsCount += 2;
    152         }
    153         break;
    154     case OpcodeByteKind_SlashNum>>8:
    155         //  /digit - A digit between 0 and 7 indicates that the
    156         //  ModR/M byte of the instruction uses only the r/m
    157         //  (register or memory) operand. The reg field contains
    158         //  the digit that provides an extension to the instruction's
    159         //  opcode.
    160         {
    161         const unsigned lowByte = (byte & OpcodeByteKind_OpcodeMask);
    162         assert(lowByte <= 7);
    163         ModRM& modrm = *(ModRM*)stream;
    164         unsigned idx = *pargsCount;
    165         assert(opnds[idx].is_mem() || opnds[idx].is_reg());
    166         if (opnds[idx].is_mem()) {
    167             stream = encodeModRM(stream, opnds, idx, odesc, prex);
    168         }
    169         else {
    170             modrm.mod = 3; // 11
    171             modrm.rm = getHWRegIndex(opnds[idx].reg());
    172 #ifdef _EM64T_
    173             if (opnds[idx].need_rex() && needs_rex_r(opnds[idx].reg())) {
    174                 prex->b = 1;
    175             }
    176 #endif
    177             ++stream;
    178         }
    179         modrm.reg = (char)lowByte;
    180         *pargsCount += 1;
    181         }
    182         break;
    183     case OpcodeByteKind_plus_i>>8:
    184         //  +i - A number used in floating-point instructions when one
    185         //  of the operands is ST(i) from the FPU register stack. The
    186         //  number i (which can range from 0 to 7) is added to the
    187         //  hexadecimal byte given at the left of the plus sign to form
    188         //  a single opcode byte.
    189         {
    190             unsigned idx = *pargsCount;
    191             const unsigned lowByte = (byte & OpcodeByteKind_OpcodeMask);
    192             *stream = (char)lowByte + getHWRegIndex(opnds[idx].reg());
    193             ++stream;
    194             *pargsCount += 1;
    195         }
    196         break;
    197     case OpcodeByteKind_ib>>8:
    198     case OpcodeByteKind_iw>>8:
    199     case OpcodeByteKind_id>>8:
    200 #ifdef _EM64T_
    201     case OpcodeByteKind_io>>8:
    202 #endif //_EM64T_
    203         //  ib, iw, id - A 1-byte (ib), 2-byte (iw), or 4-byte (id)
    204         //  immediate operand to the instruction that follows the
    205         //  opcode, ModR/M bytes or scale-indexing bytes. The opcode
    206         //  determines if the operand is a signed value. All words
    207         //  and double words are given with the low-order byte first.
    208         {
    209             unsigned idx = *pargsCount;
    210             *pargsCount += 1;
    211             assert(opnds[idx].is_imm());
    212             if (kind == OpcodeByteKind_ib) {
    213                 *(unsigned char*)stream = (unsigned char)opnds[idx].imm();
    214                 curRelOpnd[idx] = stream;
    215                 stream += 1;
    216             }
    217             else if (kind == OpcodeByteKind_iw) {
    218                 *(unsigned short*)stream = (unsigned short)opnds[idx].imm();
    219                 curRelOpnd[idx] = stream;
    220                 stream += 2;
    221             }
    222             else if (kind == OpcodeByteKind_id) {
    223                 *(unsigned*)stream = (unsigned)opnds[idx].imm();
    224                 curRelOpnd[idx] = stream;
    225                 stream += 4;
    226             }
    227 #ifdef _EM64T_
    228             else {
    229                 assert(kind == OpcodeByteKind_io);
    230                 *(long long*)stream = (long long)opnds[idx].imm();
    231                 curRelOpnd[idx] = stream;
    232                 stream += 8;
    233             }
    234 #else
    235             else {
    236                 assert(false);
    237             }
    238 #endif
    239         }
    240         break;
    241     case OpcodeByteKind_cb>>8:
    242         assert(opnds[*pargsCount].is_imm());
    243         *(unsigned char*)stream = (unsigned char)opnds[*pargsCount].imm();
    244         curRelOpnd[*pargsCount]= stream;
    245         stream += 1;
    246         *pargsCount += 1;
    247         break;
    248     case OpcodeByteKind_cw>>8:
    249         assert(opnds[*pargsCount].is_imm());
    250         *(unsigned short*)stream = (unsigned short)opnds[*pargsCount].imm();
    251         curRelOpnd[*pargsCount]= stream;
    252         stream += 2;
    253         *pargsCount += 1;
    254         break;
    255     case OpcodeByteKind_cd>>8:
    256         assert(opnds[*pargsCount].is_imm());
    257         *(unsigned*)stream = (unsigned)opnds[*pargsCount].imm();
    258         curRelOpnd[*pargsCount]= stream;
    259         stream += 4;
    260         *pargsCount += 1;
    261         break;
    262     //OpcodeByteKind_cp                             = 0x0B00,
    263     //OpcodeByteKind_co                             = 0x0C00,
    264     //OpcodeByteKind_ct                             = 0x0D00,
    265     case OpcodeByteKind_rb>>8:
    266     case OpcodeByteKind_rw>>8:
    267     case OpcodeByteKind_rd>>8:
    268         //  +rb, +rw, +rd - A register code, from 0 through 7,
    269         //  added to the hexadecimal byte given at the left of
    270         //  the plus sign to form a single opcode byte.
    271         assert(opnds.count() > 0);
    272         assert(opnds[*pargsCount].is_reg());
    273         {
    274         const unsigned lowByte = (byte & OpcodeByteKind_OpcodeMask);
    275         *(unsigned char*)stream = (unsigned char)lowByte +
    276                                    getHWRegIndex(opnds[*pargsCount].reg());
    277 #ifdef _EM64T_
    278         if (opnds[*pargsCount].need_rex() && needs_rex_r(opnds[*pargsCount].reg())) {
    279         prex->b = 1;
    280         }
    281 #endif
    282         ++stream;
    283         *pargsCount += 1;
    284         }
    285         break;
    286     default:
    287         assert(false);
    288         break;
    289     }
    290     return stream;
    291 }
    292 
    293 char * EncoderBase::encode(char * stream, Mnemonic mn, const Operands& opnds)
    294 {
    295 #ifdef _DEBUG
    296     if (opnds.count() > 0) {
    297         if (opnds[0].is_mem()) {
    298             assert(getRegKind(opnds[0].base()) != OpndKind_SReg);
    299         }
    300         else if (opnds.count() >1 && opnds[1].is_mem()) {
    301             assert(getRegKind(opnds[1].base()) != OpndKind_SReg);
    302         }
    303     }
    304 #endif
    305 
    306 #ifdef JET_PROTO
    307     char* saveStream = stream;
    308 #endif
    309 
    310     const OpcodeDesc * odesc = lookup(mn, opnds);
    311 #if !defined(_EM64T_)
    312     bool copy_opcode = true;
    313     Rex *prex = NULL;
    314 #else
    315     // We need rex if
    316     //  either of registers used as operand or address form is new extended register
    317     //  it's explicitly specified by opcode
    318     // So, if we don't have REX in opcode but need_rex, then set rex here
    319     // otherwise, wait until opcode is set, and then update REX
    320 
    321     bool copy_opcode = true;
    322     unsigned char _1st = odesc->opcode[0];
    323 
    324     Rex *prex = (Rex*)stream;
    325     if (opnds.need_rex() &&
    326         ((_1st == 0x66) || (_1st == 0xF2 || _1st == 0xF3) && odesc->opcode[1] == 0x0F)) {
    327         // Special processing
    328         //
    329         copy_opcode = false;
    330         //
    331         *(unsigned char*)stream = _1st;
    332         ++stream;
    333         //
    334         prex = (Rex*)stream;
    335         prex->dummy = 4;
    336         prex->w = 0;
    337         prex->b = 0;
    338         prex->x = 0;
    339         prex->r = 0;
    340         ++stream;
    341         //
    342         memcpy(stream, &odesc->opcode[1], odesc->opcode_len-1);
    343         stream += odesc->opcode_len-1;
    344     }
    345     else if (_1st != 0x48 && opnds.need_rex()) {
    346         prex = (Rex*)stream;
    347         prex->dummy = 4;
    348         prex->w = 0;
    349         prex->b = 0;
    350         prex->x = 0;
    351         prex->r = 0;
    352         ++stream;
    353     }
    354 #endif  // ifndef EM64T
    355 
    356     if (copy_opcode) {
    357         if (odesc->opcode_len==1) {
    358         *(unsigned char*)stream = *(unsigned char*)&odesc->opcode;
    359         }
    360         else if (odesc->opcode_len==2) {
    361         *(unsigned short*)stream = *(unsigned short*)&odesc->opcode;
    362         }
    363         else if (odesc->opcode_len==3) {
    364         *(unsigned short*)stream = *(unsigned short*)&odesc->opcode;
    365         *(unsigned char*)(stream+2) = odesc->opcode[2];
    366         }
    367         else if (odesc->opcode_len==4) {
    368         *(unsigned*)stream = *(unsigned*)&odesc->opcode;
    369         }
    370         stream += odesc->opcode_len;
    371     }
    372 
    373     unsigned argsCount = odesc->first_opnd;
    374 
    375     if (odesc->aux0) {
    376         stream = encode_aux(stream, odesc->aux0, opnds, odesc, &argsCount, prex);
    377         if (odesc->aux1) {
    378             stream = encode_aux(stream, odesc->aux1, opnds, odesc, &argsCount, prex);
    379         }
    380     }
    381 #ifdef JET_PROTO
    382     //saveStream
    383     Inst inst;
    384     unsigned len = DecoderBase::decode(saveStream, &inst);
    385     assert(inst.mn == mn);
    386     assert(len == (unsigned)(stream-saveStream));
    387     if (mn == Mnemonic_CALL || mn == Mnemonic_JMP ||
    388         Mnemonic_RET == mn ||
    389         (Mnemonic_JO<=mn && mn<=Mnemonic_JG)) {
    390         assert(inst.argc == opnds.count());
    391 
    392         InstructionDisassembler idi(saveStream);
    393 
    394         for (unsigned i=0; i<inst.argc; i++) {
    395             const EncoderBase::Operand& original = opnds[i];
    396             const EncoderBase::Operand& decoded = inst.operands[i];
    397             assert(original.kind() == decoded.kind());
    398             assert(original.size() == decoded.size());
    399             if (original.is_imm()) {
    400                 assert(original.imm() == decoded.imm());
    401                 assert(idi.get_opnd(0).kind == InstructionDisassembler::Kind_Imm);
    402                 if (mn == Mnemonic_CALL) {
    403                     assert(idi.get_type() == InstructionDisassembler::RELATIVE_CALL);
    404                 }
    405                 else if (mn == Mnemonic_JMP) {
    406                     assert(idi.get_type() == InstructionDisassembler::RELATIVE_JUMP);
    407                 }
    408                 else if (mn == Mnemonic_RET) {
    409                     assert(idi.get_type() == InstructionDisassembler::RET);
    410                 }
    411                 else {
    412                     assert(idi.get_type() == InstructionDisassembler::RELATIVE_COND_JUMP);
    413                 }
    414             }
    415             else if (original.is_mem()) {
    416                 assert(original.base() == decoded.base());
    417                 assert(original.index() == decoded.index());
    418                 assert(original.scale() == decoded.scale());
    419                 assert(original.disp() == decoded.disp());
    420                 assert(idi.get_opnd(0).kind == InstructionDisassembler::Kind_Mem);
    421                 if (mn == Mnemonic_CALL) {
    422                     assert(idi.get_type() == InstructionDisassembler::INDIRECT_CALL);
    423                 }
    424                 else if (mn == Mnemonic_JMP) {
    425                     assert(idi.get_type() == InstructionDisassembler::INDIRECT_JUMP);
    426                 }
    427                 else {
    428                     assert(false);
    429                 }
    430             }
    431             else {
    432                 assert(original.is_reg());
    433                 assert(original.reg() == decoded.reg());
    434                 assert(idi.get_opnd(0).kind == InstructionDisassembler::Kind_Reg);
    435                 if (mn == Mnemonic_CALL) {
    436                     assert(idi.get_type() == InstructionDisassembler::INDIRECT_CALL);
    437                 }
    438                 else if (mn == Mnemonic_JMP) {
    439                     assert(idi.get_type() == InstructionDisassembler::INDIRECT_JUMP);
    440                 }
    441                 else {
    442                     assert(false);
    443                 }
    444             }
    445         }
    446 
    447         Inst inst2;
    448         len = DecoderBase::decode(saveStream, &inst2);
    449     }
    450 
    451  //   if(idi.get_length_with_prefix() != (int)len) {
    452 	//__asm { int 3 };
    453  //   }
    454 #endif
    455 
    456     return stream;
    457 }
    458 
    459 char* EncoderBase::encodeModRM(char* stream, const Operands& opnds,
    460                                unsigned idx, const OpcodeDesc * odesc,
    461                                Rex * prex)
    462 {
    463     const Operand& op = opnds[idx];
    464     assert(op.is_mem());
    465     assert(idx < COUNTOF(curRelOpnd));
    466     ModRM& modrm = *(ModRM*)stream;
    467     ++stream;
    468     SIB& sib = *(SIB*)stream;
    469 
    470     // we need SIB if
    471     //      we have index & scale (nb: having index w/o base and w/o scale
    472     //      treated as error)
    473     //      the base is EBP w/o disp, BUT let's use a fake disp8
    474     //      the base is ESP (nb: cant have ESP as index)
    475 
    476     RegName base = op.base();
    477     // only disp ?..
    478     if (base == RegName_Null && op.index() == RegName_Null) {
    479         assert(op.scale() == 0); // 'scale!=0' has no meaning without index
    480         // ... yes - only have disp
    481         // On EM64T, the simply [disp] addressing means 'RIP-based' one -
    482         // must have to use SIB to encode 'DS: based'
    483 #ifdef _EM64T_
    484         modrm.mod = 0;  // 00 - ..
    485         modrm.rm = 4;   // 100 - have SIB
    486 
    487         sib.base = 5;   // 101 - none
    488         sib.index = 4;  // 100 - none
    489         sib.scale = 0;  //
    490         ++stream; // bypass SIB
    491 #else
    492         // ignore disp_fits8, always use disp32.
    493         modrm.mod = 0;
    494         modrm.rm = 5;
    495 #endif
    496         *(unsigned*)stream = (unsigned)op.disp();
    497         curRelOpnd[idx]= stream;
    498         stream += 4;
    499         return stream;
    500     }
    501 
    502     //climits: error when targeting compal
    503 #define CHAR_MIN -127
    504 #define CHAR_MAX 127
    505     const bool disp_fits8 = CHAR_MIN <= op.disp() && op.disp() <= CHAR_MAX;
    506     /*&& op.base() != RegName_Null - just checked above*/
    507     if (op.index() == RegName_Null && getHWRegIndex(op.base()) != getHWRegIndex(REG_STACK)) {
    508         assert(op.scale() == 0); // 'scale!=0' has no meaning without index
    509         // ... luckily no SIB, only base and may be a disp
    510 
    511         // EBP base is a special case. Need to use [EBP] + disp8 form
    512         if (op.disp() == 0  && getHWRegIndex(op.base()) != getHWRegIndex(RegName_EBP)) {
    513             modrm.mod = 0; // mod=00, no disp et all
    514         }
    515         else if (disp_fits8) {
    516             modrm.mod = 1; // mod=01, use disp8
    517             *(unsigned char*)stream = (unsigned char)op.disp();
    518             curRelOpnd[idx]= stream;
    519             ++stream;
    520         }
    521         else {
    522             modrm.mod = 2; // mod=10, use disp32
    523             *(unsigned*)stream = (unsigned)op.disp();
    524             curRelOpnd[idx]= stream;
    525             stream += 4;
    526         }
    527         modrm.rm = getHWRegIndex(op.base());
    528     if (is_em64t_extra_reg(op.base())) {
    529         prex->b = 1;
    530     }
    531         return stream;
    532     }
    533 
    534     // cool, we do have SIB.
    535     ++stream; // bypass SIB in stream
    536 
    537     // {E|R}SP cannot be scaled index, however, R12 which has the same index in modrm - can
    538     assert(op.index() == RegName_Null || !equals(op.index(), REG_STACK));
    539 
    540     // Only GPRegs can be encoded in the SIB
    541     assert(op.base() == RegName_Null ||
    542             getRegKind(op.base()) == OpndKind_GPReg);
    543     assert(op.index() == RegName_Null ||
    544             getRegKind(op.index()) == OpndKind_GPReg);
    545 
    546     modrm.rm = 4;   // r/m = 100, means 'we have SIB here'
    547     if (op.base() == RegName_Null) {
    548         // no base.
    549         // already checked above if
    550         // the first if() //assert(op.index() != RegName_Null);
    551 
    552         modrm.mod = 0;  // mod=00 - here it means 'no base, but disp32'
    553         sib.base = 5;   // 101 with mod=00  ^^^
    554 
    555         // encode at least fake disp32 to avoid having [base=ebp]
    556         *(unsigned*)stream = op.disp();
    557         curRelOpnd[idx]= stream;
    558         stream += 4;
    559 
    560         unsigned sc = op.scale();
    561         if (sc == 1 || sc==0)   { sib.scale = 0; }    // SS=00
    562         else if (sc == 2)       { sib.scale = 1; }    // SS=01
    563         else if (sc == 4)       { sib.scale = 2; }    // SS=10
    564         else if (sc == 8)       { sib.scale = 3; }    // SS=11
    565         sib.index = getHWRegIndex(op.index());
    566     if (is_em64t_extra_reg(op.index())) {
    567         prex->x = 1;
    568     }
    569 
    570         return stream;
    571     }
    572 
    573     if (op.disp() == 0 && getHWRegIndex(op.base()) != getHWRegIndex(RegName_EBP)) {
    574         modrm.mod = 0;  // mod=00, no disp
    575     }
    576     else if (disp_fits8) {
    577         modrm.mod = 1;  // mod=01, use disp8
    578         *(unsigned char*)stream = (unsigned char)op.disp();
    579         curRelOpnd[idx]= stream;
    580         stream += 1;
    581     }
    582     else {
    583         modrm.mod = 2;  // mod=10, use disp32
    584         *(unsigned*)stream = (unsigned)op.disp();
    585         curRelOpnd[idx]= stream;
    586         stream += 4;
    587     }
    588 
    589     if (op.index() == RegName_Null) {
    590         assert(op.scale() == 0); // 'scale!=0' has no meaning without index
    591         // the only reason we're here without index, is that we have {E|R}SP
    592         // or R12 as a base. Another possible reason - EBP without a disp -
    593         // is handled above by adding a fake disp8
    594 #ifdef _EM64T_
    595         assert(op.base() != RegName_Null && (equals(op.base(), REG_STACK) ||
    596                                              equals(op.base(), RegName_R12)));
    597 #else  // _EM64T_
    598         assert(op.base() != RegName_Null && equals(op.base(), REG_STACK));
    599 #endif //_EM64T_
    600         sib.scale = 0;  // SS = 00
    601         sib.index = 4;  // SS + index=100 means 'no index'
    602     }
    603     else {
    604         unsigned sc = op.scale();
    605         if (sc == 1 || sc==0)   { sib.scale = 0; }    // SS=00
    606         else if (sc == 2)       { sib.scale = 1; }    // SS=01
    607         else if (sc == 4)       { sib.scale = 2; }    // SS=10
    608         else if (sc == 8)       { sib.scale = 3; }    // SS=11
    609         sib.index = getHWRegIndex(op.index());
    610     if (is_em64t_extra_reg(op.index())) {
    611         prex->x = 1;
    612     }
    613         // not an error by itself, but the usage of [index*1] instead
    614         // of [base] is discouraged
    615         assert(op.base() != RegName_Null || op.scale() != 1);
    616     }
    617     sib.base = getHWRegIndex(op.base());
    618     if (is_em64t_extra_reg(op.base())) {
    619     prex->b = 1;
    620     }
    621     return stream;
    622 }
    623 
    624 char * EncoderBase::nops(char * stream, unsigned howMany)
    625 {
    626     // Recommended multi-byte NOPs from the Intel architecture manual
    627     static const unsigned char nops[10][9] = {
    628         { 0, },                                                     // 0, this line is dummy and not used in the loop below
    629         { 0x90, },                                                  // 1-byte NOP
    630         { 0x66, 0x90, },                                            // 2
    631         { 0x0F, 0x1F, 0x00, },                                      // 3
    632         { 0x0F, 0x1F, 0x40, 0x00, },                                // 4
    633         { 0x0F, 0x1F, 0x44, 0x00, 0x00, },                          // 5
    634         { 0x66, 0x0F, 0x1F, 0x44, 0x00, 0x00, },                    // 6
    635         { 0x0F, 0x1F, 0x80, 0x00, 0x00, 0x00, 0x00, },              // 7
    636         { 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00, },        // 8
    637         { 0x66, 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00 },   // 9-byte NOP
    638     };
    639 
    640     // Start from delivering the longest possible NOPs, then proceed with shorter ones
    641     for (unsigned nopSize=9; nopSize!=0; nopSize--) {
    642         while(howMany>=nopSize) {
    643             const unsigned char* nopBytes = nops[nopSize];
    644             for (unsigned i=0; i<nopSize; i++) {
    645                 stream[i] = nopBytes[i];
    646             }
    647             stream += nopSize;
    648             howMany -= nopSize;
    649         }
    650     }
    651     char* end = stream + howMany;
    652     return end;
    653 }
    654 
    655 char * EncoderBase::prefix(char* stream, InstPrefix pref)
    656 {
    657     if (pref== InstPrefix_Null) {
    658         // nothing to do
    659         return stream;
    660     }
    661     *stream = (char)pref;
    662     return stream + 1;
    663 }
    664 
    665 
    666 /**
    667  *
    668  */
    669 bool EncoderBase::extAllowed(OpndExt opndExt, OpndExt instExt) {
    670     if (instExt == opndExt || instExt == OpndExt_Any || opndExt == OpndExt_Any) {
    671             return true;
    672     }
    673 //asm("int3");
    674 assert(0);
    675     return false;
    676 }
    677 
    678 /**
    679  *
    680  */
    681 static bool match(const EncoderBase::OpcodeDesc& odesc,
    682                       const EncoderBase::Operands& opnds) {
    683 
    684     assert(odesc.roles.count == opnds.count());
    685 
    686     for(unsigned j = 0; j < odesc.roles.count; j++) {
    687         const EncoderBase::OpndDesc& desc = odesc.opnds[j];
    688         const EncoderBase::Operand& op = opnds[j];
    689         // location must match exactly
    690         if ((desc.kind & op.kind()) != op.kind()) {
    691 //assert(0);
    692             return false;
    693         }
    694         // size must match exactly
    695         if (desc.size != op.size()) {
    696 //assert(0);
    697             return false;
    698         }
    699         // extentions should be consistent
    700         if (!EncoderBase::extAllowed(op.ext(), desc.ext)) {
    701             return false;
    702         }
    703     }
    704     return true;
    705 }
    706 
    707 
    708 static bool try_match(const EncoderBase::OpcodeDesc& odesc,
    709                       const EncoderBase::Operands& opnds, bool strict) {
    710 
    711     assert(odesc.roles.count == opnds.count());
    712 
    713     for(unsigned j=0; j<odesc.roles.count; j++) {
    714         // - the location must match exactly
    715         if ((odesc.opnds[j].kind & opnds[j].kind()) != opnds[j].kind()) {
    716             return false;
    717         }
    718         if (strict) {
    719             // the size must match exactly
    720             if (odesc.opnds[j].size != opnds[j].size()) {
    721                 return false;
    722             }
    723         }
    724         else {
    725             // must match only for def operands, and dont care about use ones
    726             // situations like 'mov r8, imm32/mov r32, imm8' so the
    727             // destination operand defines the overall size
    728             if (EncoderBase::getOpndRoles(odesc.roles, j) & OpndRole_Def) {
    729                 if (odesc.opnds[j].size != opnds[j].size()) {
    730                     return false;
    731                 }
    732             }
    733         }
    734     }
    735     return true;
    736 }
    737 
    738 //
    739 //Subhash implementaion - may be useful in case of many misses during fast
    740 //opcode lookup.
    741 //
    742 
    743 #ifdef ENCODER_USE_SUBHASH
    744 static unsigned subHash[32];
    745 
    746 static unsigned find(Mnemonic mn, unsigned hash)
    747 {
    748     unsigned key = hash % COUNTOF(subHash);
    749     unsigned pack = subHash[key];
    750     unsigned _hash = pack & 0xFFFF;
    751     if (_hash != hash) {
    752         stat.miss(mn);
    753         return EncoderBase::NOHASH;
    754     }
    755     unsigned _mn = (pack >> 24)&0xFF;
    756     if (_mn != _mn) {
    757         stat.miss(mn);
    758         return EncoderBase::NOHASH;
    759     }
    760     unsigned idx = (pack >> 16) & 0xFF;
    761     stat.hit(mn);
    762     return idx;
    763 }
    764 
    765 static void put(Mnemonic mn, unsigned hash, unsigned idx)
    766 {
    767     unsigned pack = hash | (idx<<16) | (mn << 24);
    768     unsigned key = hash % COUNTOF(subHash);
    769     subHash[key] = pack;
    770 }
    771 #endif
    772 
    773 const EncoderBase::OpcodeDesc *
    774 EncoderBase::lookup(Mnemonic mn, const Operands& opnds)
    775 {
    776     const unsigned hash = opnds.hash();
    777     unsigned opcodeIndex = opcodesHashMap[mn][hash];
    778 #ifdef ENCODER_USE_SUBHASH
    779     if (opcodeIndex == NOHASH) {
    780         opcodeIndex = find(mn, hash);
    781     }
    782 #endif
    783 
    784     if (opcodeIndex == NOHASH) {
    785         // fast-path did no work. try to lookup sequentially
    786         const OpcodeDesc * odesc = opcodes[mn];
    787         int idx = -1;
    788         bool found = false;
    789         for (idx=0; !odesc[idx].last; idx++) {
    790             const OpcodeDesc& opcode = odesc[idx];
    791             if (opcode.platf == OpcodeInfo::decoder) {
    792                 continue;
    793             }
    794             if (opcode.roles.count != opnds.count()) {
    795                 continue;
    796             }
    797             if (try_match(opcode, opnds, true)) {
    798                 found = true;
    799                 break;
    800             }
    801         }
    802         if (!found) {
    803             for (idx=0; !odesc[idx].last; idx++) {
    804                 const OpcodeDesc& opcode = odesc[idx];
    805                 if (opcode.platf == OpcodeInfo::decoder) {
    806                     continue;
    807                 }
    808                 if (opcode.roles.count != opnds.count()) {
    809                     continue;
    810                 }
    811                 if (try_match(opcode, opnds, false)) {
    812                     found = true;
    813                     break;
    814                 }
    815             }
    816         }
    817         assert(found);
    818         opcodeIndex = idx;
    819 #ifdef ENCODER_USE_SUBHASH
    820         put(mn, hash, opcodeIndex);
    821 #endif
    822     }
    823     assert(opcodeIndex != NOHASH);
    824     const OpcodeDesc * odesc = &opcodes[mn][opcodeIndex];
    825     assert(!odesc->last);
    826     assert(odesc->roles.count == opnds.count());
    827     assert(odesc->platf != OpcodeInfo::decoder);
    828 #if !defined(_EM64T_)
    829     // tuning was done for IA32 only, so no size restriction on EM64T
    830     //assert(sizeof(OpcodeDesc)==128);
    831 #endif
    832     return odesc;
    833 }
    834 
    835 char* EncoderBase::getOpndLocation(int index) {
    836      assert(index < 3);
    837      return curRelOpnd[index];
    838 }
    839 
    840 
    841 Mnemonic EncoderBase::str2mnemonic(const char * mn_name)
    842 {
    843     for (unsigned m = 1; m<Mnemonic_Count; m++) {
    844         if (!strcmpi(mnemonics[m].name, mn_name)) {
    845             return (Mnemonic)m;
    846         }
    847     }
    848     return Mnemonic_Null;
    849 }
    850 
    851 static const char * conditionStrings[ConditionMnemonic_Count] = {
    852     "O",
    853     "NO",
    854     "B",
    855     "AE",
    856     "Z",
    857     "NZ",
    858     "BE",
    859     "A",
    860 
    861     "S",
    862     "NS",
    863     "P",
    864     "NP",
    865     "L",
    866     "GE",
    867     "LE",
    868     "G",
    869 };
    870 
    871 const char * getConditionString(ConditionMnemonic cm) {
    872     return conditionStrings[cm];
    873 }
    874 
    875 static const struct {
    876         char            sizeString[12];
    877         OpndSize        size;
    878 }
    879 sizes[] = {
    880     { "Sz8", OpndSize_8 },
    881     { "Sz16", OpndSize_16 },
    882     { "Sz32", OpndSize_32 },
    883     { "Sz64", OpndSize_64 },
    884 #if !defined(TESTING_ENCODER)
    885     { "Sz80", OpndSize_80 },
    886     { "Sz128", OpndSize_128 },
    887 #endif
    888     { "SzAny", OpndSize_Any },
    889 };
    890 
    891 
    892 OpndSize getOpndSize(const char * sizeString)
    893 {
    894     assert(sizeString);
    895     for (unsigned i = 0; i<COUNTOF(sizes); i++) {
    896         if (!strcmpi(sizeString, sizes[i].sizeString)) {
    897             return sizes[i].size;
    898         }
    899     }
    900     return OpndSize_Null;
    901 }
    902 
    903 const char * getOpndSizeString(OpndSize size) {
    904     for( unsigned i = 0; i<COUNTOF(sizes); i++ ) {
    905         if( sizes[i].size==size ) {
    906             return sizes[i].sizeString;
    907         }
    908     }
    909     return NULL;
    910 }
    911 
    912 static const struct {
    913     char            kindString[16];
    914     OpndKind        kind;
    915 }
    916 kinds[] = {
    917     { "Null", OpndKind_Null },
    918     { "GPReg", OpndKind_GPReg },
    919     { "SReg", OpndKind_SReg },
    920     { "FPReg", OpndKind_FPReg },
    921     { "XMMReg", OpndKind_XMMReg },
    922 #ifdef _HAVE_MMX_
    923     { "MMXReg", OpndKind_MMXReg },
    924 #endif
    925     { "StatusReg", OpndKind_StatusReg },
    926     { "Reg", OpndKind_Reg },
    927     { "Imm", OpndKind_Imm },
    928     { "Mem", OpndKind_Mem },
    929     { "Any", OpndKind_Any },
    930 };
    931 
    932 const char * getOpndKindString(OpndKind kind)
    933 {
    934     for (unsigned i = 0; i<COUNTOF(kinds); i++) {
    935         if (kinds[i].kind==kind) {
    936             return kinds[i].kindString;
    937         }
    938     }
    939     return NULL;
    940 }
    941 
    942 OpndKind getOpndKind(const char * kindString)
    943 {
    944     assert(kindString);
    945     for (unsigned i = 0; i<COUNTOF(kinds); i++) {
    946         if (!strcmpi(kindString, kinds[i].kindString)) {
    947             return kinds[i].kind;
    948         }
    949     }
    950     return OpndKind_Null;
    951 }
    952 
    953 /**
    954  * A mapping between register string representation and its RegName constant.
    955  */
    956 static const struct {
    957         char    regstring[7];
    958         RegName regname;
    959 }
    960 
    961 registers[] = {
    962 #ifdef _EM64T_
    963     {"RAX",         RegName_RAX},
    964     {"RBX",         RegName_RBX},
    965     {"RCX",         RegName_RCX},
    966     {"RDX",         RegName_RDX},
    967     {"RBP",         RegName_RBP},
    968     {"RSI",         RegName_RSI},
    969     {"RDI",         RegName_RDI},
    970     {"RSP",         RegName_RSP},
    971     {"R8",          RegName_R8},
    972     {"R9",          RegName_R9},
    973     {"R10",         RegName_R10},
    974     {"R11",         RegName_R11},
    975     {"R12",         RegName_R12},
    976     {"R13",         RegName_R13},
    977     {"R14",         RegName_R14},
    978     {"R15",         RegName_R15},
    979 #endif
    980 
    981     {"EAX",         RegName_EAX},
    982     {"ECX",         RegName_ECX},
    983     {"EDX",         RegName_EDX},
    984     {"EBX",         RegName_EBX},
    985     {"ESP",         RegName_ESP},
    986     {"EBP",         RegName_EBP},
    987     {"ESI",         RegName_ESI},
    988     {"EDI",         RegName_EDI},
    989 #ifdef _EM64T_
    990     {"R8D",         RegName_R8D},
    991     {"R9D",         RegName_R9D},
    992     {"R10D",        RegName_R10D},
    993     {"R11D",        RegName_R11D},
    994     {"R12D",        RegName_R12D},
    995     {"R13D",        RegName_R13D},
    996     {"R14D",        RegName_R14D},
    997     {"R15D",        RegName_R15D},
    998 #endif
    999 
   1000     {"AX",          RegName_AX},
   1001     {"CX",          RegName_CX},
   1002     {"DX",          RegName_DX},
   1003     {"BX",          RegName_BX},
   1004     {"SP",          RegName_SP},
   1005     {"BP",          RegName_BP},
   1006     {"SI",          RegName_SI},
   1007     {"DI",          RegName_DI},
   1008 
   1009     {"AL",          RegName_AL},
   1010     {"CL",          RegName_CL},
   1011     {"DL",          RegName_DL},
   1012     {"BL",          RegName_BL},
   1013 #if !defined(_EM64T_)
   1014     {"AH",          RegName_AH},
   1015     {"CH",          RegName_CH},
   1016     {"DH",          RegName_DH},
   1017     {"BH",          RegName_BH},
   1018 #else
   1019     {"SPL",         RegName_SPL},
   1020     {"BPL",         RegName_BPL},
   1021     {"SIL",         RegName_SIL},
   1022     {"DIL",         RegName_DIL},
   1023     {"R8L",         RegName_R8L},
   1024     {"R9L",         RegName_R9L},
   1025     {"R10L",        RegName_R10L},
   1026     {"R11L",        RegName_R11L},
   1027     {"R12L",        RegName_R12L},
   1028     {"R13L",        RegName_R13L},
   1029     {"R14L",        RegName_R14L},
   1030     {"R15L",        RegName_R15L},
   1031 #endif
   1032     {"ES",          RegName_ES},
   1033     {"CS",          RegName_CS},
   1034     {"SS",          RegName_SS},
   1035     {"DS",          RegName_DS},
   1036     {"FS",          RegName_FS},
   1037     {"GS",          RegName_GS},
   1038 
   1039     {"FP0",         RegName_FP0},
   1040 /*
   1041     {"FP1",         RegName_FP1},
   1042     {"FP2",         RegName_FP2},
   1043     {"FP3",         RegName_FP3},
   1044     {"FP4",         RegName_FP4},
   1045     {"FP5",         RegName_FP5},
   1046     {"FP6",         RegName_FP6},
   1047     {"FP7",         RegName_FP7},
   1048 */
   1049     {"FP0S",        RegName_FP0S},
   1050     {"FP1S",        RegName_FP1S},
   1051     {"FP2S",        RegName_FP2S},
   1052     {"FP3S",        RegName_FP3S},
   1053     {"FP4S",        RegName_FP4S},
   1054     {"FP5S",        RegName_FP5S},
   1055     {"FP6S",        RegName_FP6S},
   1056     {"FP7S",        RegName_FP7S},
   1057 
   1058     {"FP0D",        RegName_FP0D},
   1059     {"FP1D",        RegName_FP1D},
   1060     {"FP2D",        RegName_FP2D},
   1061     {"FP3D",        RegName_FP3D},
   1062     {"FP4D",        RegName_FP4D},
   1063     {"FP5D",        RegName_FP5D},
   1064     {"FP6D",        RegName_FP6D},
   1065     {"FP7D",        RegName_FP7D},
   1066 
   1067     {"XMM0",        RegName_XMM0},
   1068     {"XMM1",        RegName_XMM1},
   1069     {"XMM2",        RegName_XMM2},
   1070     {"XMM3",        RegName_XMM3},
   1071     {"XMM4",        RegName_XMM4},
   1072     {"XMM5",        RegName_XMM5},
   1073     {"XMM6",        RegName_XMM6},
   1074     {"XMM7",        RegName_XMM7},
   1075 #ifdef _EM64T_
   1076     {"XMM8",       RegName_XMM8},
   1077     {"XMM9",       RegName_XMM9},
   1078     {"XMM10",      RegName_XMM10},
   1079     {"XMM11",      RegName_XMM11},
   1080     {"XMM12",      RegName_XMM12},
   1081     {"XMM13",      RegName_XMM13},
   1082     {"XMM14",      RegName_XMM14},
   1083     {"XMM15",      RegName_XMM15},
   1084 #endif
   1085 
   1086 
   1087     {"XMM0S",       RegName_XMM0S},
   1088     {"XMM1S",       RegName_XMM1S},
   1089     {"XMM2S",       RegName_XMM2S},
   1090     {"XMM3S",       RegName_XMM3S},
   1091     {"XMM4S",       RegName_XMM4S},
   1092     {"XMM5S",       RegName_XMM5S},
   1093     {"XMM6S",       RegName_XMM6S},
   1094     {"XMM7S",       RegName_XMM7S},
   1095 #ifdef _EM64T_
   1096     {"XMM8S",       RegName_XMM8S},
   1097     {"XMM9S",       RegName_XMM9S},
   1098     {"XMM10S",      RegName_XMM10S},
   1099     {"XMM11S",      RegName_XMM11S},
   1100     {"XMM12S",      RegName_XMM12S},
   1101     {"XMM13S",      RegName_XMM13S},
   1102     {"XMM14S",      RegName_XMM14S},
   1103     {"XMM15S",      RegName_XMM15S},
   1104 #endif
   1105 
   1106     {"XMM0D",       RegName_XMM0D},
   1107     {"XMM1D",       RegName_XMM1D},
   1108     {"XMM2D",       RegName_XMM2D},
   1109     {"XMM3D",       RegName_XMM3D},
   1110     {"XMM4D",       RegName_XMM4D},
   1111     {"XMM5D",       RegName_XMM5D},
   1112     {"XMM6D",       RegName_XMM6D},
   1113     {"XMM7D",       RegName_XMM7D},
   1114 #ifdef _EM64T_
   1115     {"XMM8D",       RegName_XMM8D},
   1116     {"XMM9D",       RegName_XMM9D},
   1117     {"XMM10D",      RegName_XMM10D},
   1118     {"XMM11D",      RegName_XMM11D},
   1119     {"XMM12D",      RegName_XMM12D},
   1120     {"XMM13D",      RegName_XMM13D},
   1121     {"XMM14D",      RegName_XMM14D},
   1122     {"XMM15D",      RegName_XMM15D},
   1123 #endif
   1124 
   1125     {"EFLGS",       RegName_EFLAGS},
   1126 };
   1127 
   1128 
   1129 const char * getRegNameString(RegName reg)
   1130 {
   1131     for (unsigned i = 0; i<COUNTOF(registers); i++) {
   1132         if (registers[i].regname == reg) {
   1133             return registers[i].regstring;
   1134         }
   1135     }
   1136     return NULL;
   1137 }
   1138 
   1139 RegName getRegName(const char * regname)
   1140 {
   1141     if (NULL == regname) {
   1142         return RegName_Null;
   1143     }
   1144 
   1145     for (unsigned i = 0; i<COUNTOF(registers); i++) {
   1146         if (!strcmpi(regname,registers[i].regstring)) {
   1147             return registers[i].regname;
   1148         }
   1149     }
   1150     return RegName_Null;
   1151 }
   1152 
   1153 ENCODER_NAMESPACE_END
   1154