Home | History | Annotate | Download | only in i386
      1 /*
      2  * Tiny Code Generator for QEMU
      3  *
      4  * Copyright (c) 2008 Fabrice Bellard
      5  *
      6  * Permission is hereby granted, free of charge, to any person obtaining a copy
      7  * of this software and associated documentation files (the "Software"), to deal
      8  * in the Software without restriction, including without limitation the rights
      9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
     10  * copies of the Software, and to permit persons to whom the Software is
     11  * furnished to do so, subject to the following conditions:
     12  *
     13  * The above copyright notice and this permission notice shall be included in
     14  * all copies or substantial portions of the Software.
     15  *
     16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
     19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
     21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
     22  * THE SOFTWARE.
     23  */
     24 
     25 #ifndef NDEBUG
     26 static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
     27 #if TCG_TARGET_REG_BITS == 64
     28     "%rax", "%rcx", "%rdx", "%rbx", "%rsp", "%rbp", "%rsi", "%rdi",
     29     "%r8",  "%r9",  "%r10", "%r11", "%r12", "%r13", "%r14", "%r15",
     30 #else
     31     "%eax", "%ecx", "%edx", "%ebx", "%esp", "%ebp", "%esi", "%edi",
     32 #endif
     33 };
     34 #endif
     35 
     36 static const int tcg_target_reg_alloc_order[] = {
     37 #if TCG_TARGET_REG_BITS == 64
     38     TCG_REG_RBP,
     39     TCG_REG_RBX,
     40     TCG_REG_R12,
     41     TCG_REG_R13,
     42     TCG_REG_R14,
     43     TCG_REG_R15,
     44     TCG_REG_R10,
     45     TCG_REG_R11,
     46     TCG_REG_R9,
     47     TCG_REG_R8,
     48     TCG_REG_RCX,
     49     TCG_REG_RDX,
     50     TCG_REG_RSI,
     51     TCG_REG_RDI,
     52     TCG_REG_RAX,
     53 #else
     54     TCG_REG_EAX,
     55     TCG_REG_EDX,
     56     TCG_REG_ECX,
     57     TCG_REG_EBX,
     58     TCG_REG_ESI,
     59     TCG_REG_EDI,
     60     TCG_REG_EBP,
     61 #endif
     62 };
     63 
     64 static const int tcg_target_call_iarg_regs[] = {
     65 #if TCG_TARGET_REG_BITS == 64
     66     TCG_REG_RDI,
     67     TCG_REG_RSI,
     68     TCG_REG_RDX,
     69     TCG_REG_RCX,
     70     TCG_REG_R8,
     71     TCG_REG_R9,
     72 #else
     73     TCG_REG_EAX,
     74     TCG_REG_EDX,
     75     TCG_REG_ECX
     76 #endif
     77 };
     78 
     79 static const int tcg_target_call_oarg_regs[2] = {
     80     TCG_REG_EAX,
     81     TCG_REG_EDX
     82 };
     83 
     84 static uint8_t *tb_ret_addr;
     85 
     86 static void patch_reloc(uint8_t *code_ptr, int type,
     87                         tcg_target_long value, tcg_target_long addend)
     88 {
     89     value += addend;
     90     switch(type) {
     91     case R_386_PC32:
     92         value -= (uintptr_t)code_ptr;
     93         if (value != (int32_t)value) {
     94             tcg_abort();
     95         }
     96         *(uint32_t *)code_ptr = value;
     97         break;
     98     case R_386_PC8:
     99         value -= (uintptr_t)code_ptr;
    100         if (value != (int8_t)value) {
    101             tcg_abort();
    102         }
    103         *(uint8_t *)code_ptr = value;
    104         break;
    105     default:
    106         tcg_abort();
    107     }
    108 }
    109 
    110 /* maximum number of register used for input function arguments */
    111 static inline int tcg_target_get_call_iarg_regs_count(int flags)
    112 {
    113     if (TCG_TARGET_REG_BITS == 64) {
    114         return 6;
    115     }
    116 
    117     flags &= TCG_CALL_TYPE_MASK;
    118     switch(flags) {
    119     case TCG_CALL_TYPE_STD:
    120         return 0;
    121     case TCG_CALL_TYPE_REGPARM_1:
    122     case TCG_CALL_TYPE_REGPARM_2:
    123     case TCG_CALL_TYPE_REGPARM:
    124         return flags - TCG_CALL_TYPE_REGPARM_1 + 1;
    125     default:
    126         tcg_abort();
    127     }
    128 }
    129 
    130 /* parse target specific constraints */
    131 static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)
    132 {
    133     const char *ct_str;
    134 
    135     ct_str = *pct_str;
    136     switch(ct_str[0]) {
    137     case 'a':
    138         ct->ct |= TCG_CT_REG;
    139         tcg_regset_set_reg(ct->u.regs, TCG_REG_EAX);
    140         break;
    141     case 'b':
    142         ct->ct |= TCG_CT_REG;
    143         tcg_regset_set_reg(ct->u.regs, TCG_REG_EBX);
    144         break;
    145     case 'c':
    146         ct->ct |= TCG_CT_REG;
    147         tcg_regset_set_reg(ct->u.regs, TCG_REG_ECX);
    148         break;
    149     case 'd':
    150         ct->ct |= TCG_CT_REG;
    151         tcg_regset_set_reg(ct->u.regs, TCG_REG_EDX);
    152         break;
    153     case 'S':
    154         ct->ct |= TCG_CT_REG;
    155         tcg_regset_set_reg(ct->u.regs, TCG_REG_ESI);
    156         break;
    157     case 'D':
    158         ct->ct |= TCG_CT_REG;
    159         tcg_regset_set_reg(ct->u.regs, TCG_REG_EDI);
    160         break;
    161     case 'q':
    162         ct->ct |= TCG_CT_REG;
    163         if (TCG_TARGET_REG_BITS == 64) {
    164             tcg_regset_set32(ct->u.regs, 0, 0xffff);
    165         } else {
    166             tcg_regset_set32(ct->u.regs, 0, 0xf);
    167         }
    168         break;
    169     case 'r':
    170         ct->ct |= TCG_CT_REG;
    171         if (TCG_TARGET_REG_BITS == 64) {
    172             tcg_regset_set32(ct->u.regs, 0, 0xffff);
    173         } else {
    174             tcg_regset_set32(ct->u.regs, 0, 0xff);
    175         }
    176         break;
    177 
    178         /* qemu_ld/st address constraint */
    179     case 'L':
    180         ct->ct |= TCG_CT_REG;
    181         if (TCG_TARGET_REG_BITS == 64) {
    182             tcg_regset_set32(ct->u.regs, 0, 0xffff);
    183             tcg_regset_reset_reg(ct->u.regs, TCG_REG_RSI);
    184             tcg_regset_reset_reg(ct->u.regs, TCG_REG_RDI);
    185         } else {
    186             tcg_regset_set32(ct->u.regs, 0, 0xff);
    187             tcg_regset_reset_reg(ct->u.regs, TCG_REG_EAX);
    188             tcg_regset_reset_reg(ct->u.regs, TCG_REG_EDX);
    189         }
    190         break;
    191 
    192     case 'e':
    193         ct->ct |= TCG_CT_CONST_S32;
    194         break;
    195     case 'Z':
    196         ct->ct |= TCG_CT_CONST_U32;
    197         break;
    198 
    199     default:
    200         return -1;
    201     }
    202     ct_str++;
    203     *pct_str = ct_str;
    204     return 0;
    205 }
    206 
    207 /* test if a constant matches the constraint */
    208 static inline int tcg_target_const_match(tcg_target_long val,
    209                                          const TCGArgConstraint *arg_ct)
    210 {
    211     int ct = arg_ct->ct;
    212     if (ct & TCG_CT_CONST) {
    213         return 1;
    214     }
    215     if ((ct & TCG_CT_CONST_S32) && val == (int32_t)val) {
    216         return 1;
    217     }
    218     if ((ct & TCG_CT_CONST_U32) && val == (uint32_t)val) {
    219         return 1;
    220     }
    221     return 0;
    222 }
    223 
    224 #if TCG_TARGET_REG_BITS == 64
    225 # define LOWREGMASK(x)	((x) & 7)
    226 #else
    227 # define LOWREGMASK(x)	(x)
    228 #endif
    229 
    230 #define P_EXT		0x100		/* 0x0f opcode prefix */
    231 #define P_DATA16	0x200		/* 0x66 opcode prefix */
    232 #if TCG_TARGET_REG_BITS == 64
    233 # define P_ADDR32	0x400		/* 0x67 opcode prefix */
    234 # define P_REXW		0x800		/* Set REX.W = 1 */
    235 # define P_REXB_R	0x1000		/* REG field as byte register */
    236 # define P_REXB_RM	0x2000		/* R/M field as byte register */
    237 #else
    238 # define P_ADDR32	0
    239 # define P_REXW		0
    240 # define P_REXB_R	0
    241 # define P_REXB_RM	0
    242 #endif
    243 
    244 #define OPC_ARITH_EvIz	(0x81)
    245 #define OPC_ARITH_EvIb	(0x83)
    246 #define OPC_ARITH_GvEv	(0x03)		/* ... plus (ARITH_FOO << 3) */
    247 #define OPC_ADD_GvEv	(OPC_ARITH_GvEv | (ARITH_ADD << 3))
    248 #define OPC_BSWAP	(0xc8 | P_EXT)
    249 #define OPC_CALL_Jz	(0xe8)
    250 #define OPC_CMP_GvEv	(OPC_ARITH_GvEv | (ARITH_CMP << 3))
    251 #define OPC_DEC_r32	(0x48)
    252 #define OPC_IMUL_GvEv	(0xaf | P_EXT)
    253 #define OPC_IMUL_GvEvIb	(0x6b)
    254 #define OPC_IMUL_GvEvIz	(0x69)
    255 #define OPC_INC_r32	(0x40)
    256 #define OPC_JCC_long	(0x80 | P_EXT)	/* ... plus condition code */
    257 #define OPC_JCC_short	(0x70)		/* ... plus condition code */
    258 #define OPC_JMP_long	(0xe9)
    259 #define OPC_JMP_short	(0xeb)
    260 #define OPC_LEA         (0x8d)
    261 #define OPC_MOVB_EvGv	(0x88)		/* stores, more or less */
    262 #define OPC_MOVL_EvGv	(0x89)		/* stores, more or less */
    263 #define OPC_MOVL_GvEv	(0x8b)		/* loads, more or less */
    264 #define OPC_MOVL_EvIz	(0xc7)
    265 #define OPC_MOVL_Iv     (0xb8)
    266 #define OPC_MOVSBL	(0xbe | P_EXT)
    267 #define OPC_MOVSWL	(0xbf | P_EXT)
    268 #define OPC_MOVSLQ	(0x63 | P_REXW)
    269 #define OPC_MOVZBL	(0xb6 | P_EXT)
    270 #define OPC_MOVZWL	(0xb7 | P_EXT)
    271 #define OPC_POP_r32	(0x58)
    272 #define OPC_PUSH_r32	(0x50)
    273 #define OPC_PUSH_Iv	(0x68)
    274 #define OPC_PUSH_Ib	(0x6a)
    275 #define OPC_RET		(0xc3)
    276 #define OPC_SETCC	(0x90 | P_EXT | P_REXB_RM) /* ... plus cc */
    277 #define OPC_SHIFT_1	(0xd1)
    278 #define OPC_SHIFT_Ib	(0xc1)
    279 #define OPC_SHIFT_cl	(0xd3)
    280 #define OPC_TESTL	(0x85)
    281 #define OPC_XCHG_ax_r32	(0x90)
    282 
    283 #define OPC_GRP3_Ev	(0xf7)
    284 #define OPC_GRP5	(0xff)
    285 
    286 /* Group 1 opcode extensions for 0x80-0x83.
    287    These are also used as modifiers for OPC_ARITH.  */
    288 #define ARITH_ADD 0
    289 #define ARITH_OR  1
    290 #define ARITH_ADC 2
    291 #define ARITH_SBB 3
    292 #define ARITH_AND 4
    293 #define ARITH_SUB 5
    294 #define ARITH_XOR 6
    295 #define ARITH_CMP 7
    296 
    297 /* Group 2 opcode extensions for 0xc0, 0xc1, 0xd0-0xd3.  */
    298 #define SHIFT_ROL 0
    299 #define SHIFT_ROR 1
    300 #define SHIFT_SHL 4
    301 #define SHIFT_SHR 5
    302 #define SHIFT_SAR 7
    303 
    304 /* Group 3 opcode extensions for 0xf6, 0xf7.  To be used with OPC_GRP3.  */
    305 #define EXT3_NOT   2
    306 #define EXT3_NEG   3
    307 #define EXT3_MUL   4
    308 #define EXT3_IMUL  5
    309 #define EXT3_DIV   6
    310 #define EXT3_IDIV  7
    311 
    312 /* Group 5 opcode extensions for 0xff.  To be used with OPC_GRP5.  */
    313 #define EXT5_INC_Ev	0
    314 #define EXT5_DEC_Ev	1
    315 #define EXT5_CALLN_Ev	2
    316 #define EXT5_JMPN_Ev	4
    317 
    318 /* Condition codes to be added to OPC_JCC_{long,short}.  */
    319 #define JCC_JMP (-1)
    320 #define JCC_JO  0x0
    321 #define JCC_JNO 0x1
    322 #define JCC_JB  0x2
    323 #define JCC_JAE 0x3
    324 #define JCC_JE  0x4
    325 #define JCC_JNE 0x5
    326 #define JCC_JBE 0x6
    327 #define JCC_JA  0x7
    328 #define JCC_JS  0x8
    329 #define JCC_JNS 0x9
    330 #define JCC_JP  0xa
    331 #define JCC_JNP 0xb
    332 #define JCC_JL  0xc
    333 #define JCC_JGE 0xd
    334 #define JCC_JLE 0xe
    335 #define JCC_JG  0xf
    336 
    337 static const uint8_t tcg_cond_to_jcc[10] = {
    338     [TCG_COND_EQ] = JCC_JE,
    339     [TCG_COND_NE] = JCC_JNE,
    340     [TCG_COND_LT] = JCC_JL,
    341     [TCG_COND_GE] = JCC_JGE,
    342     [TCG_COND_LE] = JCC_JLE,
    343     [TCG_COND_GT] = JCC_JG,
    344     [TCG_COND_LTU] = JCC_JB,
    345     [TCG_COND_GEU] = JCC_JAE,
    346     [TCG_COND_LEU] = JCC_JBE,
    347     [TCG_COND_GTU] = JCC_JA,
    348 };
    349 
    350 #if TCG_TARGET_REG_BITS == 64
    351 static void tcg_out_opc(TCGContext *s, int opc, int r, int rm, int x)
    352 {
    353     int rex;
    354 
    355     if (opc & P_DATA16) {
    356         /* We should never be asking for both 16 and 64-bit operation.  */
    357         assert((opc & P_REXW) == 0);
    358         tcg_out8(s, 0x66);
    359     }
    360     if (opc & P_ADDR32) {
    361         tcg_out8(s, 0x67);
    362     }
    363 
    364     rex = 0;
    365     rex |= (opc & P_REXW) >> 8;		/* REX.W */
    366     rex |= (r & 8) >> 1;		/* REX.R */
    367     rex |= (x & 8) >> 2;		/* REX.X */
    368     rex |= (rm & 8) >> 3;		/* REX.B */
    369 
    370     /* P_REXB_{R,RM} indicates that the given register is the low byte.
    371        For %[abcd]l we need no REX prefix, but for %{si,di,bp,sp}l we do,
    372        as otherwise the encoding indicates %[abcd]h.  Note that the values
    373        that are ORed in merely indicate that the REX byte must be present;
    374        those bits get discarded in output.  */
    375     rex |= opc & (r >= 4 ? P_REXB_R : 0);
    376     rex |= opc & (rm >= 4 ? P_REXB_RM : 0);
    377 
    378     if (rex) {
    379         tcg_out8(s, (uint8_t)(rex | 0x40));
    380     }
    381 
    382     if (opc & P_EXT) {
    383         tcg_out8(s, 0x0f);
    384     }
    385     tcg_out8(s, opc);
    386 }
    387 #else
    388 static void tcg_out_opc(TCGContext *s, int opc)
    389 {
    390     if (opc & P_DATA16) {
    391         tcg_out8(s, 0x66);
    392     }
    393     if (opc & P_EXT) {
    394         tcg_out8(s, 0x0f);
    395     }
    396     tcg_out8(s, opc);
    397 }
    398 /* Discard the register arguments to tcg_out_opc early, so as not to penalize
    399    the 32-bit compilation paths.  This method works with all versions of gcc,
    400    whereas relying on optimization may not be able to exclude them.  */
    401 #define tcg_out_opc(s, opc, r, rm, x)  (tcg_out_opc)(s, opc)
    402 #endif
    403 
    404 static void tcg_out_modrm(TCGContext *s, int opc, int r, int rm)
    405 {
    406     tcg_out_opc(s, opc, r, rm, 0);
    407     tcg_out8(s, 0xc0 | (LOWREGMASK(r) << 3) | LOWREGMASK(rm));
    408 }
    409 
    410 /* Output an opcode with a full "rm + (index<<shift) + offset" address mode.
    411    We handle either RM and INDEX missing with a negative value.  In 64-bit
    412    mode for absolute addresses, ~RM is the size of the immediate operand
    413    that will follow the instruction.  */
    414 
    415 static void tcg_out_modrm_sib_offset(TCGContext *s, int opc, int r, int rm,
    416                                      int index, int shift,
    417                                      tcg_target_long offset)
    418 {
    419     int mod, len;
    420 
    421     if (index < 0 && rm < 0) {
    422         if (TCG_TARGET_REG_BITS == 64) {
    423             /* Try for a rip-relative addressing mode.  This has replaced
    424                the 32-bit-mode absolute addressing encoding.  */
    425             tcg_target_long pc = (tcg_target_long)s->code_ptr + 5 + ~rm;
    426             tcg_target_long disp = offset - pc;
    427             if (disp == (int32_t)disp) {
    428                 tcg_out_opc(s, opc, r, 0, 0);
    429                 tcg_out8(s, (LOWREGMASK(r) << 3) | 5);
    430                 tcg_out32(s, disp);
    431                 return;
    432             }
    433 
    434             /* Try for an absolute address encoding.  This requires the
    435                use of the MODRM+SIB encoding and is therefore larger than
    436                rip-relative addressing.  */
    437             if (offset == (int32_t)offset) {
    438                 tcg_out_opc(s, opc, r, 0, 0);
    439                 tcg_out8(s, (LOWREGMASK(r) << 3) | 4);
    440                 tcg_out8(s, (4 << 3) | 5);
    441                 tcg_out32(s, offset);
    442                 return;
    443             }
    444 
    445             /* ??? The memory isn't directly addressable.  */
    446             tcg_abort();
    447         } else {
    448             /* Absolute address.  */
    449             tcg_out_opc(s, opc, r, 0, 0);
    450             tcg_out8(s, (r << 3) | 5);
    451             tcg_out32(s, offset);
    452             return;
    453         }
    454     }
    455 
    456     /* Find the length of the immediate addend.  Note that the encoding
    457        that would be used for (%ebp) indicates absolute addressing.  */
    458     if (rm < 0) {
    459         mod = 0, len = 4, rm = 5;
    460     } else if (offset == 0 && LOWREGMASK(rm) != TCG_REG_EBP) {
    461         mod = 0, len = 0;
    462     } else if (offset == (int8_t)offset) {
    463         mod = 0x40, len = 1;
    464     } else {
    465         mod = 0x80, len = 4;
    466     }
    467 
    468     /* Use a single byte MODRM format if possible.  Note that the encoding
    469        that would be used for %esp is the escape to the two byte form.  */
    470     if (index < 0 && LOWREGMASK(rm) != TCG_REG_ESP) {
    471         /* Single byte MODRM format.  */
    472         tcg_out_opc(s, opc, r, rm, 0);
    473         tcg_out8(s, mod | (LOWREGMASK(r) << 3) | LOWREGMASK(rm));
    474     } else {
    475         /* Two byte MODRM+SIB format.  */
    476 
    477         /* Note that the encoding that would place %esp into the index
    478            field indicates no index register.  In 64-bit mode, the REX.X
    479            bit counts, so %r12 can be used as the index.  */
    480         if (index < 0) {
    481             index = 4;
    482         } else {
    483             assert(index != TCG_REG_ESP);
    484         }
    485 
    486         tcg_out_opc(s, opc, r, rm, index);
    487         tcg_out8(s, mod | (LOWREGMASK(r) << 3) | 4);
    488         tcg_out8(s, (shift << 6) | (LOWREGMASK(index) << 3) | LOWREGMASK(rm));
    489     }
    490 
    491     if (len == 1) {
    492         tcg_out8(s, offset);
    493     } else if (len == 4) {
    494         tcg_out32(s, offset);
    495     }
    496 }
    497 
    498 /* A simplification of the above with no index or shift.  */
    499 static inline void tcg_out_modrm_offset(TCGContext *s, int opc, int r,
    500                                         int rm, tcg_target_long offset)
    501 {
    502     tcg_out_modrm_sib_offset(s, opc, r, rm, -1, 0, offset);
    503 }
    504 
    505 /* Generate dest op= src.  Uses the same ARITH_* codes as tgen_arithi.  */
    506 static inline void tgen_arithr(TCGContext *s, int subop, int dest, int src)
    507 {
    508     /* Propagate an opcode prefix, such as P_REXW.  */
    509     int ext = subop & ~0x7;
    510     subop &= 0x7;
    511 
    512     tcg_out_modrm(s, OPC_ARITH_GvEv + (subop << 3) + ext, dest, src);
    513 }
    514 
    515 static inline void tcg_out_mov(TCGContext *s, TCGType type, int ret, int arg)
    516 {
    517     if (arg != ret) {
    518         int opc = OPC_MOVL_GvEv + (type == TCG_TYPE_I64 ? P_REXW : 0);
    519         tcg_out_modrm(s, opc, ret, arg);
    520     }
    521 }
    522 
    523 static void tcg_out_movi(TCGContext *s, TCGType type,
    524                          int ret, tcg_target_long arg)
    525 {
    526     if (arg == 0) {
    527         tgen_arithr(s, ARITH_XOR, ret, ret);
    528         return;
    529     } else if (arg == (uint32_t)arg || type == TCG_TYPE_I32) {
    530         tcg_out_opc(s, OPC_MOVL_Iv + LOWREGMASK(ret), 0, ret, 0);
    531         tcg_out32(s, arg);
    532     } else if (arg == (int32_t)arg) {
    533         tcg_out_modrm(s, OPC_MOVL_EvIz + P_REXW, 0, ret);
    534         tcg_out32(s, arg);
    535     } else {
    536         tcg_out_opc(s, OPC_MOVL_Iv + P_REXW + LOWREGMASK(ret), 0, ret, 0);
    537         tcg_out32(s, arg);
    538         tcg_out32(s, arg >> 31 >> 1);
    539     }
    540 }
    541 
    542 static inline void tcg_out_pushi(TCGContext *s, tcg_target_long val)
    543 {
    544     if (val == (int8_t)val) {
    545         tcg_out_opc(s, OPC_PUSH_Ib, 0, 0, 0);
    546         tcg_out8(s, val);
    547     } else if (val == (int32_t)val) {
    548         tcg_out_opc(s, OPC_PUSH_Iv, 0, 0, 0);
    549         tcg_out32(s, val);
    550     } else {
    551         tcg_abort();
    552     }
    553 }
    554 
    555 static inline void tcg_out_push(TCGContext *s, int reg)
    556 {
    557     tcg_out_opc(s, OPC_PUSH_r32 + LOWREGMASK(reg), 0, reg, 0);
    558 }
    559 
    560 static inline void tcg_out_pop(TCGContext *s, int reg)
    561 {
    562     tcg_out_opc(s, OPC_POP_r32 + LOWREGMASK(reg), 0, reg, 0);
    563 }
    564 
    565 static inline void tcg_out_ld(TCGContext *s, TCGType type, int ret,
    566                               int arg1, tcg_target_long arg2)
    567 {
    568     int opc = OPC_MOVL_GvEv + (type == TCG_TYPE_I64 ? P_REXW : 0);
    569     tcg_out_modrm_offset(s, opc, ret, arg1, arg2);
    570 }
    571 
    572 static inline void tcg_out_st(TCGContext *s, TCGType type, int arg,
    573                               int arg1, tcg_target_long arg2)
    574 {
    575     int opc = OPC_MOVL_EvGv + (type == TCG_TYPE_I64 ? P_REXW : 0);
    576     tcg_out_modrm_offset(s, opc, arg, arg1, arg2);
    577 }
    578 
    579 static void tcg_out_shifti(TCGContext *s, int subopc, int reg, int count)
    580 {
    581     /* Propagate an opcode prefix, such as P_DATA16.  */
    582     int ext = subopc & ~0x7;
    583     subopc &= 0x7;
    584 
    585     if (count == 1) {
    586         tcg_out_modrm(s, OPC_SHIFT_1 + ext, subopc, reg);
    587     } else {
    588         tcg_out_modrm(s, OPC_SHIFT_Ib + ext, subopc, reg);
    589         tcg_out8(s, count);
    590     }
    591 }
    592 
    593 static inline void tcg_out_bswap32(TCGContext *s, int reg)
    594 {
    595     tcg_out_opc(s, OPC_BSWAP + LOWREGMASK(reg), 0, reg, 0);
    596 }
    597 
    598 static inline void tcg_out_rolw_8(TCGContext *s, int reg)
    599 {
    600     tcg_out_shifti(s, SHIFT_ROL + P_DATA16, reg, 8);
    601 }
    602 
    603 static inline void tcg_out_ext8u(TCGContext *s, int dest, int src)
    604 {
    605     /* movzbl */
    606     assert(src < 4 || TCG_TARGET_REG_BITS == 64);
    607     tcg_out_modrm(s, OPC_MOVZBL + P_REXB_RM, dest, src);
    608 }
    609 
    610 static void tcg_out_ext8s(TCGContext *s, int dest, int src, int rexw)
    611 {
    612     /* movsbl */
    613     assert(src < 4 || TCG_TARGET_REG_BITS == 64);
    614     tcg_out_modrm(s, OPC_MOVSBL + P_REXB_RM + rexw, dest, src);
    615 }
    616 
    617 static inline void tcg_out_ext16u(TCGContext *s, int dest, int src)
    618 {
    619     /* movzwl */
    620     tcg_out_modrm(s, OPC_MOVZWL, dest, src);
    621 }
    622 
    623 static inline void tcg_out_ext16s(TCGContext *s, int dest, int src, int rexw)
    624 {
    625     /* movsw[lq] */
    626     tcg_out_modrm(s, OPC_MOVSWL + rexw, dest, src);
    627 }
    628 
    629 static inline void tcg_out_ext32u(TCGContext *s, int dest, int src)
    630 {
    631     /* 32-bit mov zero extends.  */
    632     tcg_out_modrm(s, OPC_MOVL_GvEv, dest, src);
    633 }
    634 
    635 static inline void tcg_out_ext32s(TCGContext *s, int dest, int src)
    636 {
    637     tcg_out_modrm(s, OPC_MOVSLQ, dest, src);
    638 }
    639 
    640 static inline void tcg_out_bswap64(TCGContext *s, int reg)
    641 {
    642     tcg_out_opc(s, OPC_BSWAP + P_REXW + LOWREGMASK(reg), 0, reg, 0);
    643 }
    644 
    645 static void tgen_arithi(TCGContext *s, int c, int r0,
    646                         tcg_target_long val, int cf)
    647 {
    648     int rexw = 0;
    649 
    650     if (TCG_TARGET_REG_BITS == 64) {
    651         rexw = c & -8;
    652         c &= 7;
    653     }
    654 
    655     /* ??? While INC is 2 bytes shorter than ADDL $1, they also induce
    656        partial flags update stalls on Pentium4 and are not recommended
    657        by current Intel optimization manuals.  */
    658     if (!cf && (c == ARITH_ADD || c == ARITH_SUB) && (val == 1 || val == -1)) {
    659         int is_inc = (c == ARITH_ADD) ^ (val < 0);
    660         if (TCG_TARGET_REG_BITS == 64) {
    661             /* The single-byte increment encodings are re-tasked as the
    662                REX prefixes.  Use the MODRM encoding.  */
    663             tcg_out_modrm(s, OPC_GRP5 + rexw,
    664                           (is_inc ? EXT5_INC_Ev : EXT5_DEC_Ev), r0);
    665         } else {
    666             tcg_out8(s, (is_inc ? OPC_INC_r32 : OPC_DEC_r32) + r0);
    667         }
    668         return;
    669     }
    670 
    671     if (c == ARITH_AND) {
    672         if (TCG_TARGET_REG_BITS == 64) {
    673             if (val == 0xffffffffu) {
    674                 tcg_out_ext32u(s, r0, r0);
    675                 return;
    676             }
    677             if (val == (uint32_t)val) {
    678                 /* AND with no high bits set can use a 32-bit operation.  */
    679                 rexw = 0;
    680             }
    681         }
    682         if (val == 0xffu && (r0 < 4 || TCG_TARGET_REG_BITS == 64)) {
    683             tcg_out_ext8u(s, r0, r0);
    684             return;
    685         }
    686         if (val == 0xffffu) {
    687             tcg_out_ext16u(s, r0, r0);
    688             return;
    689         }
    690     }
    691 
    692     if (val == (int8_t)val) {
    693         tcg_out_modrm(s, OPC_ARITH_EvIb + rexw, c, r0);
    694         tcg_out8(s, val);
    695         return;
    696     }
    697     if (rexw == 0 || val == (int32_t)val) {
    698         tcg_out_modrm(s, OPC_ARITH_EvIz + rexw, c, r0);
    699         tcg_out32(s, val);
    700         return;
    701     }
    702 
    703     tcg_abort();
    704 }
    705 
    706 static void tcg_out_addi(TCGContext *s, int reg, tcg_target_long val)
    707 {
    708     if (val != 0) {
    709         tgen_arithi(s, ARITH_ADD + P_REXW, reg, val, 0);
    710     }
    711 }
    712 
    713 #undef small  /* for mingw build */
    714 
    715 /* Use SMALL != 0 to force a short forward branch.  */
    716 static void tcg_out_jxx(TCGContext *s, int opc, int label_index, int small)
    717 {
    718     int32_t val, val1;
    719     TCGLabel *l = &s->labels[label_index];
    720 
    721     if (l->has_value) {
    722         val = l->u.value - (tcg_target_long)s->code_ptr;
    723         val1 = val - 2;
    724         if ((int8_t)val1 == val1) {
    725             if (opc == -1) {
    726                 tcg_out8(s, OPC_JMP_short);
    727             } else {
    728                 tcg_out8(s, OPC_JCC_short + opc);
    729             }
    730             tcg_out8(s, val1);
    731         } else {
    732             if (small) {
    733                 tcg_abort();
    734             }
    735             if (opc == -1) {
    736                 tcg_out8(s, OPC_JMP_long);
    737                 tcg_out32(s, val - 5);
    738             } else {
    739                 tcg_out_opc(s, OPC_JCC_long + opc, 0, 0, 0);
    740                 tcg_out32(s, val - 6);
    741             }
    742         }
    743     } else if (small) {
    744         if (opc == -1) {
    745             tcg_out8(s, OPC_JMP_short);
    746         } else {
    747             tcg_out8(s, OPC_JCC_short + opc);
    748         }
    749         tcg_out_reloc(s, s->code_ptr, R_386_PC8, label_index, -1);
    750         s->code_ptr += 1;
    751     } else {
    752         if (opc == -1) {
    753             tcg_out8(s, OPC_JMP_long);
    754         } else {
    755             tcg_out_opc(s, OPC_JCC_long + opc, 0, 0, 0);
    756         }
    757         tcg_out_reloc(s, s->code_ptr, R_386_PC32, label_index, -4);
    758         s->code_ptr += 4;
    759     }
    760 }
    761 
    762 static void tcg_out_cmp(TCGContext *s, TCGArg arg1, TCGArg arg2,
    763                         int const_arg2, int rexw)
    764 {
    765     if (const_arg2) {
    766         if (arg2 == 0) {
    767             /* test r, r */
    768             tcg_out_modrm(s, OPC_TESTL + rexw, arg1, arg1);
    769         } else {
    770             tgen_arithi(s, ARITH_CMP + rexw, arg1, arg2, 0);
    771         }
    772     } else {
    773         tgen_arithr(s, ARITH_CMP + rexw, arg1, arg2);
    774     }
    775 }
    776 
    777 static void tcg_out_brcond32(TCGContext *s, TCGCond cond,
    778                              TCGArg arg1, TCGArg arg2, int const_arg2,
    779                              int label_index, int small)
    780 {
    781     tcg_out_cmp(s, arg1, arg2, const_arg2, 0);
    782     tcg_out_jxx(s, tcg_cond_to_jcc[cond], label_index, small);
    783 }
    784 
    785 #if TCG_TARGET_REG_BITS == 64
    786 static void tcg_out_brcond64(TCGContext *s, TCGCond cond,
    787                              TCGArg arg1, TCGArg arg2, int const_arg2,
    788                              int label_index, int small)
    789 {
    790     tcg_out_cmp(s, arg1, arg2, const_arg2, P_REXW);
    791     tcg_out_jxx(s, tcg_cond_to_jcc[cond], label_index, small);
    792 }
    793 #else
    794 /* XXX: we implement it at the target level to avoid having to
    795    handle cross basic blocks temporaries */
    796 static void tcg_out_brcond2(TCGContext *s, const TCGArg *args,
    797                             const int *const_args, int small)
    798 {
    799     int label_next;
    800     label_next = gen_new_label();
    801     switch(args[4]) {
    802     case TCG_COND_EQ:
    803         tcg_out_brcond32(s, TCG_COND_NE, args[0], args[2], const_args[2],
    804                          label_next, 1);
    805         tcg_out_brcond32(s, TCG_COND_EQ, args[1], args[3], const_args[3],
    806                          args[5], small);
    807         break;
    808     case TCG_COND_NE:
    809         tcg_out_brcond32(s, TCG_COND_NE, args[0], args[2], const_args[2],
    810                          args[5], small);
    811         tcg_out_brcond32(s, TCG_COND_NE, args[1], args[3], const_args[3],
    812                          args[5], small);
    813         break;
    814     case TCG_COND_LT:
    815         tcg_out_brcond32(s, TCG_COND_LT, args[1], args[3], const_args[3],
    816                          args[5], small);
    817         tcg_out_jxx(s, JCC_JNE, label_next, 1);
    818         tcg_out_brcond32(s, TCG_COND_LTU, args[0], args[2], const_args[2],
    819                          args[5], small);
    820         break;
    821     case TCG_COND_LE:
    822         tcg_out_brcond32(s, TCG_COND_LT, args[1], args[3], const_args[3],
    823                          args[5], small);
    824         tcg_out_jxx(s, JCC_JNE, label_next, 1);
    825         tcg_out_brcond32(s, TCG_COND_LEU, args[0], args[2], const_args[2],
    826                          args[5], small);
    827         break;
    828     case TCG_COND_GT:
    829         tcg_out_brcond32(s, TCG_COND_GT, args[1], args[3], const_args[3],
    830                          args[5], small);
    831         tcg_out_jxx(s, JCC_JNE, label_next, 1);
    832         tcg_out_brcond32(s, TCG_COND_GTU, args[0], args[2], const_args[2],
    833                          args[5], small);
    834         break;
    835     case TCG_COND_GE:
    836         tcg_out_brcond32(s, TCG_COND_GT, args[1], args[3], const_args[3],
    837                          args[5], small);
    838         tcg_out_jxx(s, JCC_JNE, label_next, 1);
    839         tcg_out_brcond32(s, TCG_COND_GEU, args[0], args[2], const_args[2],
    840                          args[5], small);
    841         break;
    842     case TCG_COND_LTU:
    843         tcg_out_brcond32(s, TCG_COND_LTU, args[1], args[3], const_args[3],
    844                          args[5], small);
    845         tcg_out_jxx(s, JCC_JNE, label_next, 1);
    846         tcg_out_brcond32(s, TCG_COND_LTU, args[0], args[2], const_args[2],
    847                          args[5], small);
    848         break;
    849     case TCG_COND_LEU:
    850         tcg_out_brcond32(s, TCG_COND_LTU, args[1], args[3], const_args[3],
    851                          args[5], small);
    852         tcg_out_jxx(s, JCC_JNE, label_next, 1);
    853         tcg_out_brcond32(s, TCG_COND_LEU, args[0], args[2], const_args[2],
    854                          args[5], small);
    855         break;
    856     case TCG_COND_GTU:
    857         tcg_out_brcond32(s, TCG_COND_GTU, args[1], args[3], const_args[3],
    858                          args[5], small);
    859         tcg_out_jxx(s, JCC_JNE, label_next, 1);
    860         tcg_out_brcond32(s, TCG_COND_GTU, args[0], args[2], const_args[2],
    861                          args[5], small);
    862         break;
    863     case TCG_COND_GEU:
    864         tcg_out_brcond32(s, TCG_COND_GTU, args[1], args[3], const_args[3],
    865                          args[5], small);
    866         tcg_out_jxx(s, JCC_JNE, label_next, 1);
    867         tcg_out_brcond32(s, TCG_COND_GEU, args[0], args[2], const_args[2],
    868                          args[5], small);
    869         break;
    870     default:
    871         tcg_abort();
    872     }
    873     tcg_out_label(s, label_next, (tcg_target_long)s->code_ptr);
    874 }
    875 #endif
    876 
    877 static void tcg_out_setcond32(TCGContext *s, TCGCond cond, TCGArg dest,
    878                               TCGArg arg1, TCGArg arg2, int const_arg2)
    879 {
    880     tcg_out_cmp(s, arg1, arg2, const_arg2, 0);
    881     tcg_out_modrm(s, OPC_SETCC | tcg_cond_to_jcc[cond], 0, dest);
    882     tcg_out_ext8u(s, dest, dest);
    883 }
    884 
    885 #if TCG_TARGET_REG_BITS == 64
    886 static void tcg_out_setcond64(TCGContext *s, TCGCond cond, TCGArg dest,
    887                               TCGArg arg1, TCGArg arg2, int const_arg2)
    888 {
    889     tcg_out_cmp(s, arg1, arg2, const_arg2, P_REXW);
    890     tcg_out_modrm(s, OPC_SETCC | tcg_cond_to_jcc[cond], 0, dest);
    891     tcg_out_ext8u(s, dest, dest);
    892 }
    893 #else
    894 static void tcg_out_setcond2(TCGContext *s, const TCGArg *args,
    895                              const int *const_args)
    896 {
    897     TCGArg new_args[6];
    898     int label_true, label_over;
    899 
    900     memcpy(new_args, args+1, 5*sizeof(TCGArg));
    901 
    902     if (args[0] == args[1] || args[0] == args[2]
    903         || (!const_args[3] && args[0] == args[3])
    904         || (!const_args[4] && args[0] == args[4])) {
    905         /* When the destination overlaps with one of the argument
    906            registers, don't do anything tricky.  */
    907         label_true = gen_new_label();
    908         label_over = gen_new_label();
    909 
    910         new_args[5] = label_true;
    911         tcg_out_brcond2(s, new_args, const_args+1, 1);
    912 
    913         tcg_out_movi(s, TCG_TYPE_I32, args[0], 0);
    914         tcg_out_jxx(s, JCC_JMP, label_over, 1);
    915         tcg_out_label(s, label_true, (tcg_target_long)s->code_ptr);
    916 
    917         tcg_out_movi(s, TCG_TYPE_I32, args[0], 1);
    918         tcg_out_label(s, label_over, (tcg_target_long)s->code_ptr);
    919     } else {
    920         /* When the destination does not overlap one of the arguments,
    921            clear the destination first, jump if cond false, and emit an
    922            increment in the true case.  This results in smaller code.  */
    923 
    924         tcg_out_movi(s, TCG_TYPE_I32, args[0], 0);
    925 
    926         label_over = gen_new_label();
    927         new_args[4] = tcg_invert_cond(new_args[4]);
    928         new_args[5] = label_over;
    929         tcg_out_brcond2(s, new_args, const_args+1, 1);
    930 
    931         tgen_arithi(s, ARITH_ADD, args[0], 1, 0);
    932         tcg_out_label(s, label_over, (tcg_target_long)s->code_ptr);
    933     }
    934 }
    935 #endif
    936 
    937 static void tcg_out_branch(TCGContext *s, int call, tcg_target_long dest)
    938 {
    939     tcg_target_long disp = dest - (tcg_target_long)s->code_ptr - 5;
    940 
    941     if (disp == (int32_t)disp) {
    942         tcg_out_opc(s, call ? OPC_CALL_Jz : OPC_JMP_long, 0, 0, 0);
    943         tcg_out32(s, disp);
    944     } else {
    945         tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R10, dest);
    946         tcg_out_modrm(s, OPC_GRP5,
    947                       call ? EXT5_CALLN_Ev : EXT5_JMPN_Ev, TCG_REG_R10);
    948     }
    949 }
    950 
    951 static inline void tcg_out_calli(TCGContext *s, tcg_target_long dest)
    952 {
    953     tcg_out_branch(s, 1, dest);
    954 }
    955 
    956 static void tcg_out_jmp(TCGContext *s, tcg_target_long dest)
    957 {
    958     tcg_out_branch(s, 0, dest);
    959 }
    960 
    961 #if defined(CONFIG_SOFTMMU)
    962 
    963 #include "../../softmmu_defs.h"
    964 
    965 static void *qemu_ld_helpers[4] = {
    966     __ldb_mmu,
    967     __ldw_mmu,
    968     __ldl_mmu,
    969     __ldq_mmu,
    970 };
    971 
    972 static void *qemu_st_helpers[4] = {
    973     __stb_mmu,
    974     __stw_mmu,
    975     __stl_mmu,
    976     __stq_mmu,
    977 };
    978 
    979 /* Perform the TLB load and compare.
    980 
    981    Inputs:
    982    ADDRLO_IDX contains the index into ARGS of the low part of the
    983    address; the high part of the address is at ADDR_LOW_IDX+1.
    984 
    985    MEM_INDEX and S_BITS are the memory context and log2 size of the load.
    986 
    987    WHICH is the offset into the CPUTLBEntry structure of the slot to read.
    988    This should be offsetof addr_read or addr_write.
    989 
    990    Outputs:
    991    LABEL_PTRS is filled with 1 (32-bit addresses) or 2 (64-bit addresses)
    992    positions of the displacements of forward jumps to the TLB miss case.
    993 
    994    First argument register is loaded with the low part of the address.
    995    In the TLB hit case, it has been adjusted as indicated by the TLB
    996    and so is a host address.  In the TLB miss case, it continues to
    997    hold a guest address.
    998 
    999    Second argument register is clobbered.  */
   1000 
   1001 static inline void tcg_out_tlb_load(TCGContext *s, int addrlo_idx,
   1002                                     int mem_index, int s_bits,
   1003                                     const TCGArg *args,
   1004                                     uint8_t **label_ptr, int which)
   1005 {
   1006     const int addrlo = args[addrlo_idx];
   1007     const int r0 = tcg_target_call_iarg_regs[0];
   1008     const int r1 = tcg_target_call_iarg_regs[1];
   1009     TCGType type = TCG_TYPE_I32;
   1010     int rexw = 0;
   1011 
   1012     if (TCG_TARGET_REG_BITS == 64 && TARGET_LONG_BITS == 64) {
   1013         type = TCG_TYPE_I64;
   1014         rexw = P_REXW;
   1015     }
   1016 
   1017     tcg_out_mov(s, type, r1, addrlo);
   1018     tcg_out_mov(s, type, r0, addrlo);
   1019 
   1020     tcg_out_shifti(s, SHIFT_SHR + rexw, r1,
   1021                    TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
   1022 
   1023     tgen_arithi(s, ARITH_AND + rexw, r0,
   1024                 TARGET_PAGE_MASK | ((1 << s_bits) - 1), 0);
   1025     tgen_arithi(s, ARITH_AND + rexw, r1,
   1026                 (CPU_TLB_SIZE - 1) << CPU_TLB_ENTRY_BITS, 0);
   1027 
   1028     tcg_out_modrm_sib_offset(s, OPC_LEA + P_REXW, r1, TCG_AREG0, r1, 0,
   1029                              offsetof(CPUState, tlb_table[mem_index][0])
   1030                              + which);
   1031 
   1032     /* cmp 0(r1), r0 */
   1033     tcg_out_modrm_offset(s, OPC_CMP_GvEv + rexw, r0, r1, 0);
   1034 
   1035     tcg_out_mov(s, type, r0, addrlo);
   1036 
   1037     /* jne label1 */
   1038     tcg_out8(s, OPC_JCC_short + JCC_JNE);
   1039     label_ptr[0] = s->code_ptr;
   1040     s->code_ptr++;
   1041 
   1042     if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
   1043         /* cmp 4(r1), addrhi */
   1044         tcg_out_modrm_offset(s, OPC_CMP_GvEv, args[addrlo_idx+1], r1, 4);
   1045 
   1046         /* jne label1 */
   1047         tcg_out8(s, OPC_JCC_short + JCC_JNE);
   1048         label_ptr[1] = s->code_ptr;
   1049         s->code_ptr++;
   1050     }
   1051 
   1052     /* TLB Hit.  */
   1053 
   1054     /* add addend(r1), r0 */
   1055     tcg_out_modrm_offset(s, OPC_ADD_GvEv + P_REXW, r0, r1,
   1056                          offsetof(CPUTLBEntry, addend) - which);
   1057 }
   1058 #endif
   1059 
   1060 static void tcg_out_qemu_ld_direct(TCGContext *s, int datalo, int datahi,
   1061                                    int base, tcg_target_long ofs, int sizeop)
   1062 {
   1063 #ifdef TARGET_WORDS_BIGENDIAN
   1064     const int bswap = 1;
   1065 #else
   1066     const int bswap = 0;
   1067 #endif
   1068     switch (sizeop) {
   1069     case 0:
   1070         tcg_out_modrm_offset(s, OPC_MOVZBL, datalo, base, ofs);
   1071         break;
   1072     case 0 | 4:
   1073         tcg_out_modrm_offset(s, OPC_MOVSBL + P_REXW, datalo, base, ofs);
   1074         break;
   1075     case 1:
   1076         tcg_out_modrm_offset(s, OPC_MOVZWL, datalo, base, ofs);
   1077         if (bswap) {
   1078             tcg_out_rolw_8(s, datalo);
   1079         }
   1080         break;
   1081     case 1 | 4:
   1082         if (bswap) {
   1083             tcg_out_modrm_offset(s, OPC_MOVZWL, datalo, base, ofs);
   1084             tcg_out_rolw_8(s, datalo);
   1085             tcg_out_modrm(s, OPC_MOVSWL + P_REXW, datalo, datalo);
   1086         } else {
   1087             tcg_out_modrm_offset(s, OPC_MOVSWL + P_REXW, datalo, base, ofs);
   1088         }
   1089         break;
   1090     case 2:
   1091         tcg_out_ld(s, TCG_TYPE_I32, datalo, base, ofs);
   1092         if (bswap) {
   1093             tcg_out_bswap32(s, datalo);
   1094         }
   1095         break;
   1096 #if TCG_TARGET_REG_BITS == 64
   1097     case 2 | 4:
   1098         if (bswap) {
   1099             tcg_out_ld(s, TCG_TYPE_I32, datalo, base, ofs);
   1100             tcg_out_bswap32(s, datalo);
   1101             tcg_out_ext32s(s, datalo, datalo);
   1102         } else {
   1103             tcg_out_modrm_offset(s, OPC_MOVSLQ, datalo, base, ofs);
   1104         }
   1105         break;
   1106 #endif
   1107     case 3:
   1108         if (TCG_TARGET_REG_BITS == 64) {
   1109             tcg_out_ld(s, TCG_TYPE_I64, datalo, base, ofs);
   1110             if (bswap) {
   1111                 tcg_out_bswap64(s, datalo);
   1112             }
   1113         } else {
   1114             if (bswap) {
   1115                 int t = datalo;
   1116                 datalo = datahi;
   1117                 datahi = t;
   1118             }
   1119             if (base != datalo) {
   1120                 tcg_out_ld(s, TCG_TYPE_I32, datalo, base, ofs);
   1121                 tcg_out_ld(s, TCG_TYPE_I32, datahi, base, ofs + 4);
   1122             } else {
   1123                 tcg_out_ld(s, TCG_TYPE_I32, datahi, base, ofs + 4);
   1124                 tcg_out_ld(s, TCG_TYPE_I32, datalo, base, ofs);
   1125             }
   1126             if (bswap) {
   1127                 tcg_out_bswap32(s, datalo);
   1128                 tcg_out_bswap32(s, datahi);
   1129             }
   1130         }
   1131         break;
   1132     default:
   1133         tcg_abort();
   1134     }
   1135 }
   1136 
   1137 /* XXX: qemu_ld and qemu_st could be modified to clobber only EDX and
   1138    EAX. It will be useful once fixed registers globals are less
   1139    common. */
   1140 static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
   1141                             int opc)
   1142 {
   1143     int data_reg, data_reg2 = 0;
   1144     int addrlo_idx;
   1145 #if defined(CONFIG_SOFTMMU)
   1146     int mem_index, s_bits, arg_idx;
   1147     uint8_t *label_ptr[3];
   1148 #endif
   1149 
   1150     data_reg = args[0];
   1151     addrlo_idx = 1;
   1152     if (TCG_TARGET_REG_BITS == 32 && opc == 3) {
   1153         data_reg2 = args[1];
   1154         addrlo_idx = 2;
   1155     }
   1156 
   1157 #if defined(CONFIG_SOFTMMU)
   1158     mem_index = args[addrlo_idx + 1 + (TARGET_LONG_BITS > TCG_TARGET_REG_BITS)];
   1159     s_bits = opc & 3;
   1160 
   1161     tcg_out_tlb_load(s, addrlo_idx, mem_index, s_bits, args,
   1162                      label_ptr, offsetof(CPUTLBEntry, addr_read));
   1163 
   1164     /* TLB Hit.  */
   1165     tcg_out_qemu_ld_direct(s, data_reg, data_reg2,
   1166                            tcg_target_call_iarg_regs[0], 0, opc);
   1167 
   1168     /* jmp label2 */
   1169     tcg_out8(s, OPC_JMP_short);
   1170     label_ptr[2] = s->code_ptr;
   1171     s->code_ptr++;
   1172 
   1173     /* TLB Miss.  */
   1174 
   1175     /* label1: */
   1176     *label_ptr[0] = s->code_ptr - label_ptr[0] - 1;
   1177     if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
   1178         *label_ptr[1] = s->code_ptr - label_ptr[1] - 1;
   1179     }
   1180 
   1181     /* XXX: move that code at the end of the TB */
   1182     /* The first argument is already loaded with addrlo.  */
   1183     arg_idx = 1;
   1184     if (TCG_TARGET_REG_BITS == 32 && TARGET_LONG_BITS == 64) {
   1185         tcg_out_mov(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[arg_idx++],
   1186                     args[addrlo_idx + 1]);
   1187     }
   1188     tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[arg_idx],
   1189                  mem_index);
   1190     tcg_out_calli(s, (tcg_target_long)qemu_ld_helpers[s_bits]);
   1191 
   1192     switch(opc) {
   1193     case 0 | 4:
   1194         tcg_out_ext8s(s, data_reg, TCG_REG_EAX, P_REXW);
   1195         break;
   1196     case 1 | 4:
   1197         tcg_out_ext16s(s, data_reg, TCG_REG_EAX, P_REXW);
   1198         break;
   1199     case 0:
   1200         tcg_out_ext8u(s, data_reg, TCG_REG_EAX);
   1201         break;
   1202     case 1:
   1203         tcg_out_ext16u(s, data_reg, TCG_REG_EAX);
   1204         break;
   1205     case 2:
   1206         tcg_out_mov(s, TCG_TYPE_I32, data_reg, TCG_REG_EAX);
   1207         break;
   1208 #if TCG_TARGET_REG_BITS == 64
   1209     case 2 | 4:
   1210         tcg_out_ext32s(s, data_reg, TCG_REG_EAX);
   1211         break;
   1212 #endif
   1213     case 3:
   1214         if (TCG_TARGET_REG_BITS == 64) {
   1215             tcg_out_mov(s, TCG_TYPE_I64, data_reg, TCG_REG_RAX);
   1216         } else if (data_reg == TCG_REG_EDX) {
   1217             /* xchg %edx, %eax */
   1218             tcg_out_opc(s, OPC_XCHG_ax_r32 + TCG_REG_EDX, 0, 0, 0);
   1219             tcg_out_mov(s, TCG_TYPE_I32, data_reg2, TCG_REG_EAX);
   1220         } else {
   1221             tcg_out_mov(s, TCG_TYPE_I32, data_reg, TCG_REG_EAX);
   1222             tcg_out_mov(s, TCG_TYPE_I32, data_reg2, TCG_REG_EDX);
   1223         }
   1224         break;
   1225     default:
   1226         tcg_abort();
   1227     }
   1228 
   1229     /* label2: */
   1230     *label_ptr[2] = s->code_ptr - label_ptr[2] - 1;
   1231 #else
   1232     {
   1233         int32_t offset = GUEST_BASE;
   1234         int base = args[addrlo_idx];
   1235 
   1236         if (TCG_TARGET_REG_BITS == 64) {
   1237             /* ??? We assume all operations have left us with register
   1238                contents that are zero extended.  So far this appears to
   1239                be true.  If we want to enforce this, we can either do
   1240                an explicit zero-extension here, or (if GUEST_BASE == 0)
   1241                use the ADDR32 prefix.  For now, do nothing.  */
   1242 
   1243             if (offset != GUEST_BASE) {
   1244                 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_RDI, GUEST_BASE);
   1245                 tgen_arithr(s, ARITH_ADD + P_REXW, TCG_REG_RDI, base);
   1246                 base = TCG_REG_RDI, offset = 0;
   1247             }
   1248         }
   1249 
   1250         tcg_out_qemu_ld_direct(s, data_reg, data_reg2, base, offset, opc);
   1251     }
   1252 #endif
   1253 }
   1254 
   1255 static void tcg_out_qemu_st_direct(TCGContext *s, int datalo, int datahi,
   1256                                    int base, tcg_target_long ofs, int sizeop)
   1257 {
   1258 #ifdef TARGET_WORDS_BIGENDIAN
   1259     const int bswap = 1;
   1260 #else
   1261     const int bswap = 0;
   1262 #endif
   1263     /* ??? Ideally we wouldn't need a scratch register.  For user-only,
   1264        we could perform the bswap twice to restore the original value
   1265        instead of moving to the scratch.  But as it is, the L constraint
   1266        means that the second argument reg is definitely free here.  */
   1267     int scratch = tcg_target_call_iarg_regs[1];
   1268 
   1269     switch (sizeop) {
   1270     case 0:
   1271         tcg_out_modrm_offset(s, OPC_MOVB_EvGv + P_REXB_R, datalo, base, ofs);
   1272         break;
   1273     case 1:
   1274         if (bswap) {
   1275             tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo);
   1276             tcg_out_rolw_8(s, scratch);
   1277             datalo = scratch;
   1278         }
   1279         tcg_out_modrm_offset(s, OPC_MOVL_EvGv + P_DATA16, datalo, base, ofs);
   1280         break;
   1281     case 2:
   1282         if (bswap) {
   1283             tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo);
   1284             tcg_out_bswap32(s, scratch);
   1285             datalo = scratch;
   1286         }
   1287         tcg_out_st(s, TCG_TYPE_I32, datalo, base, ofs);
   1288         break;
   1289     case 3:
   1290         if (TCG_TARGET_REG_BITS == 64) {
   1291             if (bswap) {
   1292                 tcg_out_mov(s, TCG_TYPE_I64, scratch, datalo);
   1293                 tcg_out_bswap64(s, scratch);
   1294                 datalo = scratch;
   1295             }
   1296             tcg_out_st(s, TCG_TYPE_I64, datalo, base, ofs);
   1297         } else if (bswap) {
   1298             tcg_out_mov(s, TCG_TYPE_I32, scratch, datahi);
   1299             tcg_out_bswap32(s, scratch);
   1300             tcg_out_st(s, TCG_TYPE_I32, scratch, base, ofs);
   1301             tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo);
   1302             tcg_out_bswap32(s, scratch);
   1303             tcg_out_st(s, TCG_TYPE_I32, scratch, base, ofs + 4);
   1304         } else {
   1305             tcg_out_st(s, TCG_TYPE_I32, datalo, base, ofs);
   1306             tcg_out_st(s, TCG_TYPE_I32, datahi, base, ofs + 4);
   1307         }
   1308         break;
   1309     default:
   1310         tcg_abort();
   1311     }
   1312 }
   1313 
   1314 static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
   1315                             int opc)
   1316 {
   1317     int data_reg, data_reg2 = 0;
   1318     int addrlo_idx;
   1319 #if defined(CONFIG_SOFTMMU)
   1320     int mem_index, s_bits;
   1321     int stack_adjust;
   1322     uint8_t *label_ptr[3];
   1323 #endif
   1324 
   1325     data_reg = args[0];
   1326     addrlo_idx = 1;
   1327     if (TCG_TARGET_REG_BITS == 32 && opc == 3) {
   1328         data_reg2 = args[1];
   1329         addrlo_idx = 2;
   1330     }
   1331 
   1332 #if defined(CONFIG_SOFTMMU)
   1333     mem_index = args[addrlo_idx + 1 + (TARGET_LONG_BITS > TCG_TARGET_REG_BITS)];
   1334     s_bits = opc;
   1335 
   1336     tcg_out_tlb_load(s, addrlo_idx, mem_index, s_bits, args,
   1337                      label_ptr, offsetof(CPUTLBEntry, addr_write));
   1338 
   1339     /* TLB Hit.  */
   1340     tcg_out_qemu_st_direct(s, data_reg, data_reg2,
   1341                            tcg_target_call_iarg_regs[0], 0, opc);
   1342 
   1343     /* jmp label2 */
   1344     tcg_out8(s, OPC_JMP_short);
   1345     label_ptr[2] = s->code_ptr;
   1346     s->code_ptr++;
   1347 
   1348     /* TLB Miss.  */
   1349 
   1350     /* label1: */
   1351     *label_ptr[0] = s->code_ptr - label_ptr[0] - 1;
   1352     if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
   1353         *label_ptr[1] = s->code_ptr - label_ptr[1] - 1;
   1354     }
   1355 
   1356     /* XXX: move that code at the end of the TB */
   1357     if (TCG_TARGET_REG_BITS == 64) {
   1358         tcg_out_mov(s, (opc == 3 ? TCG_TYPE_I64 : TCG_TYPE_I32),
   1359                     TCG_REG_RSI, data_reg);
   1360         tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_RDX, mem_index);
   1361         stack_adjust = 0;
   1362     } else if (TARGET_LONG_BITS == 32) {
   1363         tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_EDX, data_reg);
   1364         if (opc == 3) {
   1365             tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_ECX, data_reg2);
   1366             tcg_out_pushi(s, mem_index);
   1367             stack_adjust = 4;
   1368         } else {
   1369             tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_ECX, mem_index);
   1370             stack_adjust = 0;
   1371         }
   1372     } else {
   1373         if (opc == 3) {
   1374             tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_EDX, args[addrlo_idx + 1]);
   1375             tcg_out_pushi(s, mem_index);
   1376             tcg_out_push(s, data_reg2);
   1377             tcg_out_push(s, data_reg);
   1378             stack_adjust = 12;
   1379         } else {
   1380             tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_EDX, args[addrlo_idx + 1]);
   1381             switch(opc) {
   1382             case 0:
   1383                 tcg_out_ext8u(s, TCG_REG_ECX, data_reg);
   1384                 break;
   1385             case 1:
   1386                 tcg_out_ext16u(s, TCG_REG_ECX, data_reg);
   1387                 break;
   1388             case 2:
   1389                 tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_ECX, data_reg);
   1390                 break;
   1391             }
   1392             tcg_out_pushi(s, mem_index);
   1393             stack_adjust = 4;
   1394         }
   1395     }
   1396 
   1397     tcg_out_calli(s, (tcg_target_long)qemu_st_helpers[s_bits]);
   1398 
   1399     if (stack_adjust == (TCG_TARGET_REG_BITS / 8)) {
   1400         /* Pop and discard.  This is 2 bytes smaller than the add.  */
   1401         tcg_out_pop(s, TCG_REG_ECX);
   1402     } else if (stack_adjust != 0) {
   1403         tcg_out_addi(s, TCG_REG_ESP, stack_adjust);
   1404     }
   1405 
   1406     /* label2: */
   1407     *label_ptr[2] = s->code_ptr - label_ptr[2] - 1;
   1408 #else
   1409     {
   1410         int32_t offset = GUEST_BASE;
   1411         int base = args[addrlo_idx];
   1412 
   1413         if (TCG_TARGET_REG_BITS == 64) {
   1414             /* ??? We assume all operations have left us with register
   1415                contents that are zero extended.  So far this appears to
   1416                be true.  If we want to enforce this, we can either do
   1417                an explicit zero-extension here, or (if GUEST_BASE == 0)
   1418                use the ADDR32 prefix.  For now, do nothing.  */
   1419 
   1420             if (offset != GUEST_BASE) {
   1421                 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_RDI, GUEST_BASE);
   1422                 tgen_arithr(s, ARITH_ADD + P_REXW, TCG_REG_RDI, base);
   1423                 base = TCG_REG_RDI, offset = 0;
   1424             }
   1425         }
   1426 
   1427         tcg_out_qemu_st_direct(s, data_reg, data_reg2, base, offset, opc);
   1428     }
   1429 #endif
   1430 }
   1431 
   1432 static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
   1433                               const TCGArg *args, const int *const_args)
   1434 {
   1435     int c, rexw = 0;
   1436 
   1437 #if TCG_TARGET_REG_BITS == 64
   1438 # define OP_32_64(x) \
   1439         case glue(glue(INDEX_op_, x), _i64): \
   1440             rexw = P_REXW; /* FALLTHRU */    \
   1441         case glue(glue(INDEX_op_, x), _i32)
   1442 #else
   1443 # define OP_32_64(x) \
   1444         case glue(glue(INDEX_op_, x), _i32)
   1445 #endif
   1446 
   1447     switch(opc) {
   1448     case INDEX_op_exit_tb:
   1449         tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_EAX, args[0]);
   1450         tcg_out_jmp(s, (tcg_target_long) tb_ret_addr);
   1451         break;
   1452     case INDEX_op_goto_tb:
   1453         if (s->tb_jmp_offset) {
   1454             /* direct jump method */
   1455             tcg_out8(s, OPC_JMP_long); /* jmp im */
   1456             s->tb_jmp_offset[args[0]] = s->code_ptr - s->code_buf;
   1457             tcg_out32(s, 0);
   1458         } else {
   1459             /* indirect jump method */
   1460             tcg_out_modrm_offset(s, OPC_GRP5, EXT5_JMPN_Ev, -1,
   1461                                  (tcg_target_long)(s->tb_next + args[0]));
   1462         }
   1463         s->tb_next_offset[args[0]] = s->code_ptr - s->code_buf;
   1464         break;
   1465     case INDEX_op_call:
   1466         if (const_args[0]) {
   1467             tcg_out_calli(s, args[0]);
   1468         } else {
   1469             /* call *reg */
   1470             tcg_out_modrm(s, OPC_GRP5, EXT5_CALLN_Ev, args[0]);
   1471         }
   1472         break;
   1473     case INDEX_op_jmp:
   1474         if (const_args[0]) {
   1475             tcg_out_jmp(s, args[0]);
   1476         } else {
   1477             /* jmp *reg */
   1478             tcg_out_modrm(s, OPC_GRP5, EXT5_JMPN_Ev, args[0]);
   1479         }
   1480         break;
   1481     case INDEX_op_br:
   1482         tcg_out_jxx(s, JCC_JMP, args[0], 0);
   1483         break;
   1484     case INDEX_op_movi_i32:
   1485         tcg_out_movi(s, TCG_TYPE_I32, args[0], args[1]);
   1486         break;
   1487     OP_32_64(ld8u):
   1488         /* Note that we can ignore REXW for the zero-extend to 64-bit.  */
   1489         tcg_out_modrm_offset(s, OPC_MOVZBL, args[0], args[1], args[2]);
   1490         break;
   1491     OP_32_64(ld8s):
   1492         tcg_out_modrm_offset(s, OPC_MOVSBL + rexw, args[0], args[1], args[2]);
   1493         break;
   1494     OP_32_64(ld16u):
   1495         /* Note that we can ignore REXW for the zero-extend to 64-bit.  */
   1496         tcg_out_modrm_offset(s, OPC_MOVZWL, args[0], args[1], args[2]);
   1497         break;
   1498     OP_32_64(ld16s):
   1499         tcg_out_modrm_offset(s, OPC_MOVSWL + rexw, args[0], args[1], args[2]);
   1500         break;
   1501 #if TCG_TARGET_REG_BITS == 64
   1502     case INDEX_op_ld32u_i64:
   1503 #endif
   1504     case INDEX_op_ld_i32:
   1505         tcg_out_ld(s, TCG_TYPE_I32, args[0], args[1], args[2]);
   1506         break;
   1507 
   1508     OP_32_64(st8):
   1509         tcg_out_modrm_offset(s, OPC_MOVB_EvGv | P_REXB_R,
   1510                              args[0], args[1], args[2]);
   1511         break;
   1512     OP_32_64(st16):
   1513         tcg_out_modrm_offset(s, OPC_MOVL_EvGv | P_DATA16,
   1514                              args[0], args[1], args[2]);
   1515         break;
   1516 #if TCG_TARGET_REG_BITS == 64
   1517     case INDEX_op_st32_i64:
   1518 #endif
   1519     case INDEX_op_st_i32:
   1520         tcg_out_st(s, TCG_TYPE_I32, args[0], args[1], args[2]);
   1521         break;
   1522 
   1523     OP_32_64(add):
   1524         /* For 3-operand addition, use LEA.  */
   1525         if (args[0] != args[1]) {
   1526             TCGArg a0 = args[0], a1 = args[1], a2 = args[2], c3 = 0;
   1527 
   1528             if (const_args[2]) {
   1529                 c3 = a2, a2 = -1;
   1530             } else if (a0 == a2) {
   1531                 /* Watch out for dest = src + dest, since we've removed
   1532                    the matching constraint on the add.  */
   1533                 tgen_arithr(s, ARITH_ADD + rexw, a0, a1);
   1534                 break;
   1535             }
   1536 
   1537             tcg_out_modrm_sib_offset(s, OPC_LEA + rexw, a0, a1, a2, 0, c3);
   1538             break;
   1539         }
   1540         c = ARITH_ADD;
   1541         goto gen_arith;
   1542     OP_32_64(sub):
   1543         c = ARITH_SUB;
   1544         goto gen_arith;
   1545     OP_32_64(and):
   1546         c = ARITH_AND;
   1547         goto gen_arith;
   1548     OP_32_64(or):
   1549         c = ARITH_OR;
   1550         goto gen_arith;
   1551     OP_32_64(xor):
   1552         c = ARITH_XOR;
   1553         goto gen_arith;
   1554     gen_arith:
   1555         if (const_args[2]) {
   1556             tgen_arithi(s, c + rexw, args[0], args[2], 0);
   1557         } else {
   1558             tgen_arithr(s, c + rexw, args[0], args[2]);
   1559         }
   1560         break;
   1561 
   1562     OP_32_64(mul):
   1563         if (const_args[2]) {
   1564             int32_t val;
   1565             val = args[2];
   1566             if (val == (int8_t)val) {
   1567                 tcg_out_modrm(s, OPC_IMUL_GvEvIb + rexw, args[0], args[0]);
   1568                 tcg_out8(s, val);
   1569             } else {
   1570                 tcg_out_modrm(s, OPC_IMUL_GvEvIz + rexw, args[0], args[0]);
   1571                 tcg_out32(s, val);
   1572             }
   1573         } else {
   1574             tcg_out_modrm(s, OPC_IMUL_GvEv + rexw, args[0], args[2]);
   1575         }
   1576         break;
   1577 
   1578     OP_32_64(div2):
   1579         tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_IDIV, args[4]);
   1580         break;
   1581     OP_32_64(divu2):
   1582         tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_DIV, args[4]);
   1583         break;
   1584 
   1585     OP_32_64(shl):
   1586         c = SHIFT_SHL;
   1587         goto gen_shift;
   1588     OP_32_64(shr):
   1589         c = SHIFT_SHR;
   1590         goto gen_shift;
   1591     OP_32_64(sar):
   1592         c = SHIFT_SAR;
   1593         goto gen_shift;
   1594     OP_32_64(rotl):
   1595         c = SHIFT_ROL;
   1596         goto gen_shift;
   1597     OP_32_64(rotr):
   1598         c = SHIFT_ROR;
   1599         goto gen_shift;
   1600     gen_shift:
   1601         if (const_args[2]) {
   1602             tcg_out_shifti(s, c + rexw, args[0], args[2]);
   1603         } else {
   1604             tcg_out_modrm(s, OPC_SHIFT_cl + rexw, c, args[0]);
   1605         }
   1606         break;
   1607 
   1608     case INDEX_op_brcond_i32:
   1609         tcg_out_brcond32(s, args[2], args[0], args[1], const_args[1],
   1610                          args[3], 0);
   1611         break;
   1612     case INDEX_op_setcond_i32:
   1613         tcg_out_setcond32(s, args[3], args[0], args[1],
   1614                           args[2], const_args[2]);
   1615         break;
   1616 
   1617     OP_32_64(bswap16):
   1618         tcg_out_rolw_8(s, args[0]);
   1619         break;
   1620     OP_32_64(bswap32):
   1621         tcg_out_bswap32(s, args[0]);
   1622         break;
   1623 
   1624     OP_32_64(neg):
   1625         tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_NEG, args[0]);
   1626         break;
   1627     OP_32_64(not):
   1628         tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_NOT, args[0]);
   1629         break;
   1630 
   1631     OP_32_64(ext8s):
   1632         tcg_out_ext8s(s, args[0], args[1], rexw);
   1633         break;
   1634     OP_32_64(ext16s):
   1635         tcg_out_ext16s(s, args[0], args[1], rexw);
   1636         break;
   1637     OP_32_64(ext8u):
   1638         tcg_out_ext8u(s, args[0], args[1]);
   1639         break;
   1640     OP_32_64(ext16u):
   1641         tcg_out_ext16u(s, args[0], args[1]);
   1642         break;
   1643 
   1644     case INDEX_op_qemu_ld8u:
   1645         tcg_out_qemu_ld(s, args, 0);
   1646         break;
   1647     case INDEX_op_qemu_ld8s:
   1648         tcg_out_qemu_ld(s, args, 0 | 4);
   1649         break;
   1650     case INDEX_op_qemu_ld16u:
   1651         tcg_out_qemu_ld(s, args, 1);
   1652         break;
   1653     case INDEX_op_qemu_ld16s:
   1654         tcg_out_qemu_ld(s, args, 1 | 4);
   1655         break;
   1656 #if TCG_TARGET_REG_BITS == 64
   1657     case INDEX_op_qemu_ld32u:
   1658 #endif
   1659     case INDEX_op_qemu_ld32:
   1660         tcg_out_qemu_ld(s, args, 2);
   1661         break;
   1662     case INDEX_op_qemu_ld64:
   1663         tcg_out_qemu_ld(s, args, 3);
   1664         break;
   1665 
   1666     case INDEX_op_qemu_st8:
   1667         tcg_out_qemu_st(s, args, 0);
   1668         break;
   1669     case INDEX_op_qemu_st16:
   1670         tcg_out_qemu_st(s, args, 1);
   1671         break;
   1672     case INDEX_op_qemu_st32:
   1673         tcg_out_qemu_st(s, args, 2);
   1674         break;
   1675     case INDEX_op_qemu_st64:
   1676         tcg_out_qemu_st(s, args, 3);
   1677         break;
   1678 
   1679 #if TCG_TARGET_REG_BITS == 32
   1680     case INDEX_op_brcond2_i32:
   1681         tcg_out_brcond2(s, args, const_args, 0);
   1682         break;
   1683     case INDEX_op_setcond2_i32:
   1684         tcg_out_setcond2(s, args, const_args);
   1685         break;
   1686     case INDEX_op_mulu2_i32:
   1687         tcg_out_modrm(s, OPC_GRP3_Ev, EXT3_MUL, args[3]);
   1688         break;
   1689     case INDEX_op_add2_i32:
   1690         if (const_args[4]) {
   1691             tgen_arithi(s, ARITH_ADD, args[0], args[4], 1);
   1692         } else {
   1693             tgen_arithr(s, ARITH_ADD, args[0], args[4]);
   1694         }
   1695         if (const_args[5]) {
   1696             tgen_arithi(s, ARITH_ADC, args[1], args[5], 1);
   1697         } else {
   1698             tgen_arithr(s, ARITH_ADC, args[1], args[5]);
   1699         }
   1700         break;
   1701     case INDEX_op_sub2_i32:
   1702         if (const_args[4]) {
   1703             tgen_arithi(s, ARITH_SUB, args[0], args[4], 1);
   1704         } else {
   1705             tgen_arithr(s, ARITH_SUB, args[0], args[4]);
   1706         }
   1707         if (const_args[5]) {
   1708             tgen_arithi(s, ARITH_SBB, args[1], args[5], 1);
   1709         } else {
   1710             tgen_arithr(s, ARITH_SBB, args[1], args[5]);
   1711         }
   1712         break;
   1713 #else /* TCG_TARGET_REG_BITS == 64 */
   1714     case INDEX_op_movi_i64:
   1715         tcg_out_movi(s, TCG_TYPE_I64, args[0], args[1]);
   1716         break;
   1717     case INDEX_op_ld32s_i64:
   1718         tcg_out_modrm_offset(s, OPC_MOVSLQ, args[0], args[1], args[2]);
   1719         break;
   1720     case INDEX_op_ld_i64:
   1721         tcg_out_ld(s, TCG_TYPE_I64, args[0], args[1], args[2]);
   1722         break;
   1723     case INDEX_op_st_i64:
   1724         tcg_out_st(s, TCG_TYPE_I64, args[0], args[1], args[2]);
   1725         break;
   1726     case INDEX_op_qemu_ld32s:
   1727         tcg_out_qemu_ld(s, args, 2 | 4);
   1728         break;
   1729 
   1730     case INDEX_op_brcond_i64:
   1731         tcg_out_brcond64(s, args[2], args[0], args[1], const_args[1],
   1732                          args[3], 0);
   1733         break;
   1734     case INDEX_op_setcond_i64:
   1735         tcg_out_setcond64(s, args[3], args[0], args[1],
   1736                           args[2], const_args[2]);
   1737         break;
   1738 
   1739     case INDEX_op_bswap64_i64:
   1740         tcg_out_bswap64(s, args[0]);
   1741         break;
   1742     case INDEX_op_ext32u_i64:
   1743         tcg_out_ext32u(s, args[0], args[1]);
   1744         break;
   1745     case INDEX_op_ext32s_i64:
   1746         tcg_out_ext32s(s, args[0], args[1]);
   1747         break;
   1748 #endif
   1749 
   1750     default:
   1751         tcg_abort();
   1752     }
   1753 
   1754 #undef OP_32_64
   1755 }
   1756 
   1757 static const TCGTargetOpDef x86_op_defs[] = {
   1758     { INDEX_op_exit_tb, { } },
   1759     { INDEX_op_goto_tb, { } },
   1760     { INDEX_op_call, { "ri" } },
   1761     { INDEX_op_jmp, { "ri" } },
   1762     { INDEX_op_br, { } },
   1763     { INDEX_op_mov_i32, { "r", "r" } },
   1764     { INDEX_op_movi_i32, { "r" } },
   1765     { INDEX_op_ld8u_i32, { "r", "r" } },
   1766     { INDEX_op_ld8s_i32, { "r", "r" } },
   1767     { INDEX_op_ld16u_i32, { "r", "r" } },
   1768     { INDEX_op_ld16s_i32, { "r", "r" } },
   1769     { INDEX_op_ld_i32, { "r", "r" } },
   1770     { INDEX_op_st8_i32, { "q", "r" } },
   1771     { INDEX_op_st16_i32, { "r", "r" } },
   1772     { INDEX_op_st_i32, { "r", "r" } },
   1773 
   1774     { INDEX_op_add_i32, { "r", "r", "ri" } },
   1775     { INDEX_op_sub_i32, { "r", "0", "ri" } },
   1776     { INDEX_op_mul_i32, { "r", "0", "ri" } },
   1777     { INDEX_op_div2_i32, { "a", "d", "0", "1", "r" } },
   1778     { INDEX_op_divu2_i32, { "a", "d", "0", "1", "r" } },
   1779     { INDEX_op_and_i32, { "r", "0", "ri" } },
   1780     { INDEX_op_or_i32, { "r", "0", "ri" } },
   1781     { INDEX_op_xor_i32, { "r", "0", "ri" } },
   1782 
   1783     { INDEX_op_shl_i32, { "r", "0", "ci" } },
   1784     { INDEX_op_shr_i32, { "r", "0", "ci" } },
   1785     { INDEX_op_sar_i32, { "r", "0", "ci" } },
   1786     { INDEX_op_rotl_i32, { "r", "0", "ci" } },
   1787     { INDEX_op_rotr_i32, { "r", "0", "ci" } },
   1788 
   1789     { INDEX_op_brcond_i32, { "r", "ri" } },
   1790 
   1791     { INDEX_op_bswap16_i32, { "r", "0" } },
   1792     { INDEX_op_bswap32_i32, { "r", "0" } },
   1793 
   1794     { INDEX_op_neg_i32, { "r", "0" } },
   1795 
   1796     { INDEX_op_not_i32, { "r", "0" } },
   1797 
   1798     { INDEX_op_ext8s_i32, { "r", "q" } },
   1799     { INDEX_op_ext16s_i32, { "r", "r" } },
   1800     { INDEX_op_ext8u_i32, { "r", "q" } },
   1801     { INDEX_op_ext16u_i32, { "r", "r" } },
   1802 
   1803     { INDEX_op_setcond_i32, { "q", "r", "ri" } },
   1804 
   1805 #if TCG_TARGET_REG_BITS == 32
   1806     { INDEX_op_mulu2_i32, { "a", "d", "a", "r" } },
   1807     { INDEX_op_add2_i32, { "r", "r", "0", "1", "ri", "ri" } },
   1808     { INDEX_op_sub2_i32, { "r", "r", "0", "1", "ri", "ri" } },
   1809     { INDEX_op_brcond2_i32, { "r", "r", "ri", "ri" } },
   1810     { INDEX_op_setcond2_i32, { "r", "r", "r", "ri", "ri" } },
   1811 #else
   1812     { INDEX_op_mov_i64, { "r", "r" } },
   1813     { INDEX_op_movi_i64, { "r" } },
   1814     { INDEX_op_ld8u_i64, { "r", "r" } },
   1815     { INDEX_op_ld8s_i64, { "r", "r" } },
   1816     { INDEX_op_ld16u_i64, { "r", "r" } },
   1817     { INDEX_op_ld16s_i64, { "r", "r" } },
   1818     { INDEX_op_ld32u_i64, { "r", "r" } },
   1819     { INDEX_op_ld32s_i64, { "r", "r" } },
   1820     { INDEX_op_ld_i64, { "r", "r" } },
   1821     { INDEX_op_st8_i64, { "r", "r" } },
   1822     { INDEX_op_st16_i64, { "r", "r" } },
   1823     { INDEX_op_st32_i64, { "r", "r" } },
   1824     { INDEX_op_st_i64, { "r", "r" } },
   1825 
   1826     { INDEX_op_add_i64, { "r", "0", "re" } },
   1827     { INDEX_op_mul_i64, { "r", "0", "re" } },
   1828     { INDEX_op_div2_i64, { "a", "d", "0", "1", "r" } },
   1829     { INDEX_op_divu2_i64, { "a", "d", "0", "1", "r" } },
   1830     { INDEX_op_sub_i64, { "r", "0", "re" } },
   1831     { INDEX_op_and_i64, { "r", "0", "reZ" } },
   1832     { INDEX_op_or_i64, { "r", "0", "re" } },
   1833     { INDEX_op_xor_i64, { "r", "0", "re" } },
   1834 
   1835     { INDEX_op_shl_i64, { "r", "0", "ci" } },
   1836     { INDEX_op_shr_i64, { "r", "0", "ci" } },
   1837     { INDEX_op_sar_i64, { "r", "0", "ci" } },
   1838     { INDEX_op_rotl_i64, { "r", "0", "ci" } },
   1839     { INDEX_op_rotr_i64, { "r", "0", "ci" } },
   1840 
   1841     { INDEX_op_brcond_i64, { "r", "re" } },
   1842     { INDEX_op_setcond_i64, { "r", "r", "re" } },
   1843 
   1844     { INDEX_op_bswap16_i64, { "r", "0" } },
   1845     { INDEX_op_bswap32_i64, { "r", "0" } },
   1846     { INDEX_op_bswap64_i64, { "r", "0" } },
   1847     { INDEX_op_neg_i64, { "r", "0" } },
   1848     { INDEX_op_not_i64, { "r", "0" } },
   1849 
   1850     { INDEX_op_ext8s_i64, { "r", "r" } },
   1851     { INDEX_op_ext16s_i64, { "r", "r" } },
   1852     { INDEX_op_ext32s_i64, { "r", "r" } },
   1853     { INDEX_op_ext8u_i64, { "r", "r" } },
   1854     { INDEX_op_ext16u_i64, { "r", "r" } },
   1855     { INDEX_op_ext32u_i64, { "r", "r" } },
   1856 #endif
   1857 
   1858 #if TCG_TARGET_REG_BITS == 64
   1859     { INDEX_op_qemu_ld8u, { "r", "L" } },
   1860     { INDEX_op_qemu_ld8s, { "r", "L" } },
   1861     { INDEX_op_qemu_ld16u, { "r", "L" } },
   1862     { INDEX_op_qemu_ld16s, { "r", "L" } },
   1863     { INDEX_op_qemu_ld32, { "r", "L" } },
   1864     { INDEX_op_qemu_ld32u, { "r", "L" } },
   1865     { INDEX_op_qemu_ld32s, { "r", "L" } },
   1866     { INDEX_op_qemu_ld64, { "r", "L" } },
   1867 
   1868     { INDEX_op_qemu_st8, { "L", "L" } },
   1869     { INDEX_op_qemu_st16, { "L", "L" } },
   1870     { INDEX_op_qemu_st32, { "L", "L" } },
   1871     { INDEX_op_qemu_st64, { "L", "L" } },
   1872 #elif TARGET_LONG_BITS <= TCG_TARGET_REG_BITS
   1873     { INDEX_op_qemu_ld8u, { "r", "L" } },
   1874     { INDEX_op_qemu_ld8s, { "r", "L" } },
   1875     { INDEX_op_qemu_ld16u, { "r", "L" } },
   1876     { INDEX_op_qemu_ld16s, { "r", "L" } },
   1877     { INDEX_op_qemu_ld32, { "r", "L" } },
   1878     { INDEX_op_qemu_ld64, { "r", "r", "L" } },
   1879 
   1880     { INDEX_op_qemu_st8, { "cb", "L" } },
   1881     { INDEX_op_qemu_st16, { "L", "L" } },
   1882     { INDEX_op_qemu_st32, { "L", "L" } },
   1883     { INDEX_op_qemu_st64, { "L", "L", "L" } },
   1884 #else
   1885     { INDEX_op_qemu_ld8u, { "r", "L", "L" } },
   1886     { INDEX_op_qemu_ld8s, { "r", "L", "L" } },
   1887     { INDEX_op_qemu_ld16u, { "r", "L", "L" } },
   1888     { INDEX_op_qemu_ld16s, { "r", "L", "L" } },
   1889     { INDEX_op_qemu_ld32, { "r", "L", "L" } },
   1890     { INDEX_op_qemu_ld64, { "r", "r", "L", "L" } },
   1891 
   1892     { INDEX_op_qemu_st8, { "cb", "L", "L" } },
   1893     { INDEX_op_qemu_st16, { "L", "L", "L" } },
   1894     { INDEX_op_qemu_st32, { "L", "L", "L" } },
   1895     { INDEX_op_qemu_st64, { "L", "L", "L", "L" } },
   1896 #endif
   1897     { -1 },
   1898 };
   1899 
   1900 static int tcg_target_callee_save_regs[] = {
   1901 #if TCG_TARGET_REG_BITS == 64
   1902     TCG_REG_RBP,
   1903     TCG_REG_RBX,
   1904     TCG_REG_R12,
   1905     TCG_REG_R13,
   1906     /* TCG_REG_R14, */ /* Currently used for the global env. */
   1907     TCG_REG_R15,
   1908 #else
   1909     /* TCG_REG_EBP, */ /* Currently used for the global env. */
   1910     TCG_REG_EBX,
   1911     TCG_REG_ESI,
   1912     TCG_REG_EDI,
   1913 #endif
   1914 };
   1915 
   1916 /* Generate global QEMU prologue and epilogue code */
   1917 static void tcg_target_qemu_prologue(TCGContext *s)
   1918 {
   1919     int i, frame_size, push_size, stack_addend;
   1920 
   1921     /* TB prologue */
   1922 
   1923     /* Save all callee saved registers.  */
   1924     for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); i++) {
   1925         tcg_out_push(s, tcg_target_callee_save_regs[i]);
   1926     }
   1927 
   1928     /* Reserve some stack space.  */
   1929     push_size = 1 + ARRAY_SIZE(tcg_target_callee_save_regs);
   1930     push_size *= TCG_TARGET_REG_BITS / 8;
   1931 
   1932     frame_size = push_size + TCG_STATIC_CALL_ARGS_SIZE;
   1933     frame_size = (frame_size + TCG_TARGET_STACK_ALIGN - 1) &
   1934         ~(TCG_TARGET_STACK_ALIGN - 1);
   1935     stack_addend = frame_size - push_size;
   1936     tcg_out_addi(s, TCG_REG_ESP, -stack_addend);
   1937 
   1938     /* jmp *tb.  */
   1939     tcg_out_modrm(s, OPC_GRP5, EXT5_JMPN_Ev, tcg_target_call_iarg_regs[0]);
   1940 
   1941     /* TB epilogue */
   1942     tb_ret_addr = s->code_ptr;
   1943 
   1944     tcg_out_addi(s, TCG_REG_ESP, stack_addend);
   1945 
   1946     for (i = ARRAY_SIZE(tcg_target_callee_save_regs) - 1; i >= 0; i--) {
   1947         tcg_out_pop(s, tcg_target_callee_save_regs[i]);
   1948     }
   1949     tcg_out_opc(s, OPC_RET, 0, 0, 0);
   1950 }
   1951 
   1952 static void tcg_target_init(TCGContext *s)
   1953 {
   1954 #if !defined(CONFIG_USER_ONLY)
   1955     /* fail safe */
   1956     if ((1 << CPU_TLB_ENTRY_BITS) != sizeof(CPUTLBEntry))
   1957         tcg_abort();
   1958 #endif
   1959 
   1960     if (TCG_TARGET_REG_BITS == 64) {
   1961         tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xffff);
   1962         tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I64], 0, 0xffff);
   1963     } else {
   1964         tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xff);
   1965     }
   1966 
   1967     tcg_regset_clear(tcg_target_call_clobber_regs);
   1968     tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_EAX);
   1969     tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_EDX);
   1970     tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_ECX);
   1971     if (TCG_TARGET_REG_BITS == 64) {
   1972         tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_RDI);
   1973         tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_RSI);
   1974         tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R8);
   1975         tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R9);
   1976         tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R10);
   1977         tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R11);
   1978     }
   1979 
   1980     tcg_regset_clear(s->reserved_regs);
   1981     tcg_regset_set_reg(s->reserved_regs, TCG_REG_ESP);
   1982 
   1983     tcg_add_target_add_op_defs(x86_op_defs);
   1984 }
   1985