Home | History | Annotate | Download | only in i386
      1 /*
      2  * Tiny Code Generator for QEMU
      3  *
      4  * Copyright (c) 2008 Fabrice Bellard
      5  *
      6  * Permission is hereby granted, free of charge, to any person obtaining a copy
      7  * of this software and associated documentation files (the "Software"), to deal
      8  * in the Software without restriction, including without limitation the rights
      9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
     10  * copies of the Software, and to permit persons to whom the Software is
     11  * furnished to do so, subject to the following conditions:
     12  *
     13  * The above copyright notice and this permission notice shall be included in
     14  * all copies or substantial portions of the Software.
     15  *
     16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
     19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
     21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
     22  * THE SOFTWARE.
     23  */
     24 
     25 #include "tcg-be-ldst.h"
     26 
     27 #ifdef _WIN32
     28 // For some reason, the Mingw32 headers define the 'small' macro which
     29 // prevents this source from compiling.
     30 #undef small
     31 #endif
     32 
     33 #ifndef NDEBUG
     34 static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
     35 #if TCG_TARGET_REG_BITS == 64
     36     "%rax", "%rcx", "%rdx", "%rbx", "%rsp", "%rbp", "%rsi", "%rdi",
     37     "%r8",  "%r9",  "%r10", "%r11", "%r12", "%r13", "%r14", "%r15",
     38 #else
     39     "%eax", "%ecx", "%edx", "%ebx", "%esp", "%ebp", "%esi", "%edi",
     40 #endif
     41 };
     42 #endif
     43 
     44 static const int tcg_target_reg_alloc_order[] = {
     45 #if TCG_TARGET_REG_BITS == 64
     46     TCG_REG_RBP,
     47     TCG_REG_RBX,
     48     TCG_REG_R12,
     49     TCG_REG_R13,
     50     TCG_REG_R14,
     51     TCG_REG_R15,
     52     TCG_REG_R10,
     53     TCG_REG_R11,
     54     TCG_REG_R9,
     55     TCG_REG_R8,
     56     TCG_REG_RCX,
     57     TCG_REG_RDX,
     58     TCG_REG_RSI,
     59     TCG_REG_RDI,
     60     TCG_REG_RAX,
     61 #else
     62     TCG_REG_EBX,
     63     TCG_REG_ESI,
     64     TCG_REG_EDI,
     65     TCG_REG_EBP,
     66     TCG_REG_ECX,
     67     TCG_REG_EDX,
     68     TCG_REG_EAX,
     69 #endif
     70 };
     71 
     72 static const int tcg_target_call_iarg_regs[] = {
     73 #if TCG_TARGET_REG_BITS == 64
     74 #if defined(_WIN64)
     75     TCG_REG_RCX,
     76     TCG_REG_RDX,
     77 #else
     78     TCG_REG_RDI,
     79     TCG_REG_RSI,
     80     TCG_REG_RDX,
     81     TCG_REG_RCX,
     82 #endif
     83     TCG_REG_R8,
     84     TCG_REG_R9,
     85 #else
     86     /* 32 bit mode uses stack based calling convention (GCC default). */
     87 #endif
     88 };
     89 
     90 static const int tcg_target_call_oarg_regs[] = {
     91     TCG_REG_EAX,
     92 #if TCG_TARGET_REG_BITS == 32
     93     TCG_REG_EDX
     94 #endif
     95 };
     96 
     97 /* Registers used with L constraint, which are the first argument
     98    registers on x86_64, and two random call clobbered registers on
     99    i386. */
    100 #if TCG_TARGET_REG_BITS == 64
    101 # define TCG_REG_L0 tcg_target_call_iarg_regs[0]
    102 # define TCG_REG_L1 tcg_target_call_iarg_regs[1]
    103 #else
    104 # define TCG_REG_L0 TCG_REG_EAX
    105 # define TCG_REG_L1 TCG_REG_EDX
    106 #endif
    107 
    108 /* For 32-bit, we are going to attempt to determine at runtime whether cmov
    109    is available.  However, the host compiler must supply <cpuid.h>, as we're
    110    not going to go so far as our own inline assembly.  */
    111 #if TCG_TARGET_REG_BITS == 64
    112 # define have_cmov 1
    113 #elif defined(CONFIG_CPUID_H)
    114 #include <cpuid.h>
    115 static bool have_cmov;
    116 #else
    117 # define have_cmov 0
    118 #endif
    119 
    120 static uint8_t *tb_ret_addr;
    121 
    122 static void patch_reloc(uint8_t *code_ptr, int type,
    123                         intptr_t value, intptr_t addend)
    124 {
    125     value += addend;
    126     switch(type) {
    127     case R_386_PC32:
    128         value -= (uintptr_t)code_ptr;
    129         if (value != (int32_t)value) {
    130             tcg_abort();
    131         }
    132         *(uint32_t *)code_ptr = value;
    133         break;
    134     case R_386_PC8:
    135         value -= (uintptr_t)code_ptr;
    136         if (value != (int8_t)value) {
    137             tcg_abort();
    138         }
    139         *(uint8_t *)code_ptr = value;
    140         break;
    141     default:
    142         tcg_abort();
    143     }
    144 }
    145 
    146 /* parse target specific constraints */
    147 static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)
    148 {
    149     const char *ct_str;
    150 
    151     ct_str = *pct_str;
    152     switch(ct_str[0]) {
    153     case 'a':
    154         ct->ct |= TCG_CT_REG;
    155         tcg_regset_set_reg(ct->u.regs, TCG_REG_EAX);
    156         break;
    157     case 'b':
    158         ct->ct |= TCG_CT_REG;
    159         tcg_regset_set_reg(ct->u.regs, TCG_REG_EBX);
    160         break;
    161     case 'c':
    162         ct->ct |= TCG_CT_REG;
    163         tcg_regset_set_reg(ct->u.regs, TCG_REG_ECX);
    164         break;
    165     case 'd':
    166         ct->ct |= TCG_CT_REG;
    167         tcg_regset_set_reg(ct->u.regs, TCG_REG_EDX);
    168         break;
    169     case 'S':
    170         ct->ct |= TCG_CT_REG;
    171         tcg_regset_set_reg(ct->u.regs, TCG_REG_ESI);
    172         break;
    173     case 'D':
    174         ct->ct |= TCG_CT_REG;
    175         tcg_regset_set_reg(ct->u.regs, TCG_REG_EDI);
    176         break;
    177     case 'q':
    178         ct->ct |= TCG_CT_REG;
    179         if (TCG_TARGET_REG_BITS == 64) {
    180             tcg_regset_set32(ct->u.regs, 0, 0xffff);
    181         } else {
    182             tcg_regset_set32(ct->u.regs, 0, 0xf);
    183         }
    184         break;
    185     case 'Q':
    186         ct->ct |= TCG_CT_REG;
    187         tcg_regset_set32(ct->u.regs, 0, 0xf);
    188         break;
    189     case 'r':
    190         ct->ct |= TCG_CT_REG;
    191         if (TCG_TARGET_REG_BITS == 64) {
    192             tcg_regset_set32(ct->u.regs, 0, 0xffff);
    193         } else {
    194             tcg_regset_set32(ct->u.regs, 0, 0xff);
    195         }
    196         break;
    197 
    198         /* qemu_ld/st address constraint */
    199     case 'L':
    200         ct->ct |= TCG_CT_REG;
    201         if (TCG_TARGET_REG_BITS == 64) {
    202             tcg_regset_set32(ct->u.regs, 0, 0xffff);
    203         } else {
    204             tcg_regset_set32(ct->u.regs, 0, 0xff);
    205         }
    206         tcg_regset_reset_reg(ct->u.regs, TCG_REG_L0);
    207         tcg_regset_reset_reg(ct->u.regs, TCG_REG_L1);
    208         break;
    209 
    210     case 'e':
    211         ct->ct |= TCG_CT_CONST_S32;
    212         break;
    213     case 'Z':
    214         ct->ct |= TCG_CT_CONST_U32;
    215         break;
    216 
    217     default:
    218         return -1;
    219     }
    220     ct_str++;
    221     *pct_str = ct_str;
    222     return 0;
    223 }
    224 
    225 /* test if a constant matches the constraint */
    226 static inline int tcg_target_const_match(tcg_target_long val,
    227                                          const TCGArgConstraint *arg_ct)
    228 {
    229     int ct = arg_ct->ct;
    230     if (ct & TCG_CT_CONST) {
    231         return 1;
    232     }
    233     if ((ct & TCG_CT_CONST_S32) && val == (int32_t)val) {
    234         return 1;
    235     }
    236     if ((ct & TCG_CT_CONST_U32) && val == (uint32_t)val) {
    237         return 1;
    238     }
    239     return 0;
    240 }
    241 
    242 #if TCG_TARGET_REG_BITS == 64
    243 # define LOWREGMASK(x)	((x) & 7)
    244 #else
    245 # define LOWREGMASK(x)	(x)
    246 #endif
    247 
    248 #define P_EXT		0x100		/* 0x0f opcode prefix */
    249 #define P_DATA16	0x200		/* 0x66 opcode prefix */
    250 #if TCG_TARGET_REG_BITS == 64
    251 # define P_ADDR32	0x400		/* 0x67 opcode prefix */
    252 # define P_REXW		0x800		/* Set REX.W = 1 */
    253 # define P_REXB_R	0x1000		/* REG field as byte register */
    254 # define P_REXB_RM	0x2000		/* R/M field as byte register */
    255 # define P_GS           0x4000          /* gs segment override */
    256 #else
    257 # define P_ADDR32	0
    258 # define P_REXW		0
    259 # define P_REXB_R	0
    260 # define P_REXB_RM	0
    261 # define P_GS           0
    262 #endif
    263 
    264 #define OPC_ARITH_EvIz	(0x81)
    265 #define OPC_ARITH_EvIb	(0x83)
    266 #define OPC_ARITH_GvEv	(0x03)		/* ... plus (ARITH_FOO << 3) */
    267 #define OPC_ADD_GvEv	(OPC_ARITH_GvEv | (ARITH_ADD << 3))
    268 #define OPC_BSWAP	(0xc8 | P_EXT)
    269 #define OPC_CALL_Jz	(0xe8)
    270 #define OPC_CMOVCC      (0x40 | P_EXT)  /* ... plus condition code */
    271 #define OPC_CMP_GvEv	(OPC_ARITH_GvEv | (ARITH_CMP << 3))
    272 #define OPC_DEC_r32	(0x48)
    273 #define OPC_IMUL_GvEv	(0xaf | P_EXT)
    274 #define OPC_IMUL_GvEvIb	(0x6b)
    275 #define OPC_IMUL_GvEvIz	(0x69)
    276 #define OPC_INC_r32	(0x40)
    277 #define OPC_JCC_long	(0x80 | P_EXT)	/* ... plus condition code */
    278 #define OPC_JCC_short	(0x70)		/* ... plus condition code */
    279 #define OPC_JMP_long	(0xe9)
    280 #define OPC_JMP_short	(0xeb)
    281 #define OPC_LEA         (0x8d)
    282 #define OPC_MOVB_EvGv	(0x88)		/* stores, more or less */
    283 #define OPC_MOVL_EvGv	(0x89)		/* stores, more or less */
    284 #define OPC_MOVL_GvEv	(0x8b)		/* loads, more or less */
    285 #define OPC_MOVB_EvIz   (0xc6)
    286 #define OPC_MOVL_EvIz	(0xc7)
    287 #define OPC_MOVL_Iv     (0xb8)
    288 #define OPC_MOVSBL	(0xbe | P_EXT)
    289 #define OPC_MOVSWL	(0xbf | P_EXT)
    290 #define OPC_MOVSLQ	(0x63 | P_REXW)
    291 #define OPC_MOVZBL	(0xb6 | P_EXT)
    292 #define OPC_MOVZWL	(0xb7 | P_EXT)
    293 #define OPC_POP_r32	(0x58)
    294 #define OPC_PUSH_r32	(0x50)
    295 #define OPC_PUSH_Iv	(0x68)
    296 #define OPC_PUSH_Ib	(0x6a)
    297 #define OPC_RET		(0xc3)
    298 #define OPC_SETCC	(0x90 | P_EXT | P_REXB_RM) /* ... plus cc */
    299 #define OPC_SHIFT_1	(0xd1)
    300 #define OPC_SHIFT_Ib	(0xc1)
    301 #define OPC_SHIFT_cl	(0xd3)
    302 #define OPC_TESTL	(0x85)
    303 #define OPC_XCHG_ax_r32	(0x90)
    304 
    305 #define OPC_GRP3_Ev	(0xf7)
    306 #define OPC_GRP5	(0xff)
    307 
    308 /* Group 1 opcode extensions for 0x80-0x83.
    309    These are also used as modifiers for OPC_ARITH.  */
    310 #define ARITH_ADD 0
    311 #define ARITH_OR  1
    312 #define ARITH_ADC 2
    313 #define ARITH_SBB 3
    314 #define ARITH_AND 4
    315 #define ARITH_SUB 5
    316 #define ARITH_XOR 6
    317 #define ARITH_CMP 7
    318 
    319 /* Group 2 opcode extensions for 0xc0, 0xc1, 0xd0-0xd3.  */
    320 #define SHIFT_ROL 0
    321 #define SHIFT_ROR 1
    322 #define SHIFT_SHL 4
    323 #define SHIFT_SHR 5
    324 #define SHIFT_SAR 7
    325 
    326 /* Group 3 opcode extensions for 0xf6, 0xf7.  To be used with OPC_GRP3.  */
    327 #define EXT3_NOT   2
    328 #define EXT3_NEG   3
    329 #define EXT3_MUL   4
    330 #define EXT3_IMUL  5
    331 #define EXT3_DIV   6
    332 #define EXT3_IDIV  7
    333 
    334 /* Group 5 opcode extensions for 0xff.  To be used with OPC_GRP5.  */
    335 #define EXT5_INC_Ev	0
    336 #define EXT5_DEC_Ev	1
    337 #define EXT5_CALLN_Ev	2
    338 #define EXT5_JMPN_Ev	4
    339 
    340 /* Condition codes to be added to OPC_JCC_{long,short}.  */
    341 #define JCC_JMP (-1)
    342 #define JCC_JO  0x0
    343 #define JCC_JNO 0x1
    344 #define JCC_JB  0x2
    345 #define JCC_JAE 0x3
    346 #define JCC_JE  0x4
    347 #define JCC_JNE 0x5
    348 #define JCC_JBE 0x6
    349 #define JCC_JA  0x7
    350 #define JCC_JS  0x8
    351 #define JCC_JNS 0x9
    352 #define JCC_JP  0xa
    353 #define JCC_JNP 0xb
    354 #define JCC_JL  0xc
    355 #define JCC_JGE 0xd
    356 #define JCC_JLE 0xe
    357 #define JCC_JG  0xf
    358 
    359 static const uint8_t tcg_cond_to_jcc[] = {
    360     [TCG_COND_EQ] = JCC_JE,
    361     [TCG_COND_NE] = JCC_JNE,
    362     [TCG_COND_LT] = JCC_JL,
    363     [TCG_COND_GE] = JCC_JGE,
    364     [TCG_COND_LE] = JCC_JLE,
    365     [TCG_COND_GT] = JCC_JG,
    366     [TCG_COND_LTU] = JCC_JB,
    367     [TCG_COND_GEU] = JCC_JAE,
    368     [TCG_COND_LEU] = JCC_JBE,
    369     [TCG_COND_GTU] = JCC_JA,
    370 };
    371 
    372 #if TCG_TARGET_REG_BITS == 64
    373 static void tcg_out_opc(TCGContext *s, int opc, int r, int rm, int x)
    374 {
    375     int rex;
    376 
    377     if (opc & P_GS) {
    378         tcg_out8(s, 0x65);
    379     }
    380     if (opc & P_DATA16) {
    381         /* We should never be asking for both 16 and 64-bit operation.  */
    382         assert((opc & P_REXW) == 0);
    383         tcg_out8(s, 0x66);
    384     }
    385     if (opc & P_ADDR32) {
    386         tcg_out8(s, 0x67);
    387     }
    388 
    389     rex = 0;
    390     rex |= (opc & P_REXW) >> 8;		/* REX.W */
    391     rex |= (r & 8) >> 1;		/* REX.R */
    392     rex |= (x & 8) >> 2;		/* REX.X */
    393     rex |= (rm & 8) >> 3;		/* REX.B */
    394 
    395     /* P_REXB_{R,RM} indicates that the given register is the low byte.
    396        For %[abcd]l we need no REX prefix, but for %{si,di,bp,sp}l we do,
    397        as otherwise the encoding indicates %[abcd]h.  Note that the values
    398        that are ORed in merely indicate that the REX byte must be present;
    399        those bits get discarded in output.  */
    400     rex |= opc & (r >= 4 ? P_REXB_R : 0);
    401     rex |= opc & (rm >= 4 ? P_REXB_RM : 0);
    402 
    403     if (rex) {
    404         tcg_out8(s, (uint8_t)(rex | 0x40));
    405     }
    406 
    407     if (opc & P_EXT) {
    408         tcg_out8(s, 0x0f);
    409     }
    410     tcg_out8(s, opc);
    411 }
    412 #else
    413 static void tcg_out_opc(TCGContext *s, int opc)
    414 {
    415     if (opc & P_DATA16) {
    416         tcg_out8(s, 0x66);
    417     }
    418     if (opc & P_EXT) {
    419         tcg_out8(s, 0x0f);
    420     }
    421     tcg_out8(s, opc);
    422 }
    423 /* Discard the register arguments to tcg_out_opc early, so as not to penalize
    424    the 32-bit compilation paths.  This method works with all versions of gcc,
    425    whereas relying on optimization may not be able to exclude them.  */
    426 #define tcg_out_opc(s, opc, r, rm, x)  (tcg_out_opc)(s, opc)
    427 #endif
    428 
    429 static void tcg_out_modrm(TCGContext *s, int opc, int r, int rm)
    430 {
    431     tcg_out_opc(s, opc, r, rm, 0);
    432     tcg_out8(s, 0xc0 | (LOWREGMASK(r) << 3) | LOWREGMASK(rm));
    433 }
    434 
    435 /* Output an opcode with a full "rm + (index<<shift) + offset" address mode.
    436    We handle either RM and INDEX missing with a negative value.  In 64-bit
    437    mode for absolute addresses, ~RM is the size of the immediate operand
    438    that will follow the instruction.  */
    439 
    440 static void tcg_out_modrm_sib_offset(TCGContext *s, int opc, int r, int rm,
    441                                      int index, int shift, intptr_t offset)
    442 {
    443     int mod, len;
    444 
    445     if (index < 0 && rm < 0) {
    446         if (TCG_TARGET_REG_BITS == 64) {
    447             /* Try for a rip-relative addressing mode.  This has replaced
    448                the 32-bit-mode absolute addressing encoding.  */
    449             intptr_t pc = (intptr_t)s->code_ptr + 5 + ~rm;
    450             intptr_t disp = offset - pc;
    451             if (disp == (int32_t)disp) {
    452                 tcg_out_opc(s, opc, r, 0, 0);
    453                 tcg_out8(s, (LOWREGMASK(r) << 3) | 5);
    454                 tcg_out32(s, disp);
    455                 return;
    456             }
    457 
    458             /* Try for an absolute address encoding.  This requires the
    459                use of the MODRM+SIB encoding and is therefore larger than
    460                rip-relative addressing.  */
    461             if (offset == (int32_t)offset) {
    462                 tcg_out_opc(s, opc, r, 0, 0);
    463                 tcg_out8(s, (LOWREGMASK(r) << 3) | 4);
    464                 tcg_out8(s, (4 << 3) | 5);
    465                 tcg_out32(s, offset);
    466                 return;
    467             }
    468 
    469             /* ??? The memory isn't directly addressable.  */
    470             tcg_abort();
    471         } else {
    472             /* Absolute address.  */
    473             tcg_out_opc(s, opc, r, 0, 0);
    474             tcg_out8(s, (r << 3) | 5);
    475             tcg_out32(s, offset);
    476             return;
    477         }
    478     }
    479 
    480     /* Find the length of the immediate addend.  Note that the encoding
    481        that would be used for (%ebp) indicates absolute addressing.  */
    482     if (rm < 0) {
    483         mod = 0, len = 4, rm = 5;
    484     } else if (offset == 0 && LOWREGMASK(rm) != TCG_REG_EBP) {
    485         mod = 0, len = 0;
    486     } else if (offset == (int8_t)offset) {
    487         mod = 0x40, len = 1;
    488     } else {
    489         mod = 0x80, len = 4;
    490     }
    491 
    492     /* Use a single byte MODRM format if possible.  Note that the encoding
    493        that would be used for %esp is the escape to the two byte form.  */
    494     if (index < 0 && LOWREGMASK(rm) != TCG_REG_ESP) {
    495         /* Single byte MODRM format.  */
    496         tcg_out_opc(s, opc, r, rm, 0);
    497         tcg_out8(s, mod | (LOWREGMASK(r) << 3) | LOWREGMASK(rm));
    498     } else {
    499         /* Two byte MODRM+SIB format.  */
    500 
    501         /* Note that the encoding that would place %esp into the index
    502            field indicates no index register.  In 64-bit mode, the REX.X
    503            bit counts, so %r12 can be used as the index.  */
    504         if (index < 0) {
    505             index = 4;
    506         } else {
    507             assert(index != TCG_REG_ESP);
    508         }
    509 
    510         tcg_out_opc(s, opc, r, rm, index);
    511         tcg_out8(s, mod | (LOWREGMASK(r) << 3) | 4);
    512         tcg_out8(s, (shift << 6) | (LOWREGMASK(index) << 3) | LOWREGMASK(rm));
    513     }
    514 
    515     if (len == 1) {
    516         tcg_out8(s, offset);
    517     } else if (len == 4) {
    518         tcg_out32(s, offset);
    519     }
    520 }
    521 
    522 /* A simplification of the above with no index or shift.  */
    523 static inline void tcg_out_modrm_offset(TCGContext *s, int opc, int r,
    524                                         int rm, intptr_t offset)
    525 {
    526     tcg_out_modrm_sib_offset(s, opc, r, rm, -1, 0, offset);
    527 }
    528 
    529 /* Generate dest op= src.  Uses the same ARITH_* codes as tgen_arithi.  */
    530 static inline void tgen_arithr(TCGContext *s, int subop, int dest, int src)
    531 {
    532     /* Propagate an opcode prefix, such as P_REXW.  */
    533     int ext = subop & ~0x7;
    534     subop &= 0x7;
    535 
    536     tcg_out_modrm(s, OPC_ARITH_GvEv + (subop << 3) + ext, dest, src);
    537 }
    538 
    539 static inline void tcg_out_mov(TCGContext *s, TCGType type,
    540                                TCGReg ret, TCGReg arg)
    541 {
    542     if (arg != ret) {
    543         int opc = OPC_MOVL_GvEv + (type == TCG_TYPE_I64 ? P_REXW : 0);
    544         tcg_out_modrm(s, opc, ret, arg);
    545     }
    546 }
    547 
    548 static void tcg_out_movi(TCGContext *s, TCGType type,
    549                          TCGReg ret, tcg_target_long arg)
    550 {
    551     tcg_target_long diff;
    552 
    553     if (arg == 0) {
    554         tgen_arithr(s, ARITH_XOR, ret, ret);
    555         return;
    556     }
    557     if (arg == (uint32_t)arg || type == TCG_TYPE_I32) {
    558         tcg_out_opc(s, OPC_MOVL_Iv + LOWREGMASK(ret), 0, ret, 0);
    559         tcg_out32(s, arg);
    560         return;
    561     }
    562     if (arg == (int32_t)arg) {
    563         tcg_out_modrm(s, OPC_MOVL_EvIz + P_REXW, 0, ret);
    564         tcg_out32(s, arg);
    565         return;
    566     }
    567 
    568     /* Try a 7 byte pc-relative lea before the 10 byte movq.  */
    569     diff = arg - ((uintptr_t)s->code_ptr + 7);
    570     if (diff == (int32_t)diff) {
    571         tcg_out_opc(s, OPC_LEA | P_REXW, ret, 0, 0);
    572         tcg_out8(s, (LOWREGMASK(ret) << 3) | 5);
    573         tcg_out32(s, diff);
    574         return;
    575     }
    576 
    577     tcg_out_opc(s, OPC_MOVL_Iv + P_REXW + LOWREGMASK(ret), 0, ret, 0);
    578     tcg_out64(s, arg);
    579 }
    580 
    581 static inline void tcg_out_pushi(TCGContext *s, tcg_target_long val)
    582 {
    583     if (val == (int8_t)val) {
    584         tcg_out_opc(s, OPC_PUSH_Ib, 0, 0, 0);
    585         tcg_out8(s, val);
    586     } else if (val == (int32_t)val) {
    587         tcg_out_opc(s, OPC_PUSH_Iv, 0, 0, 0);
    588         tcg_out32(s, val);
    589     } else {
    590         tcg_abort();
    591     }
    592 }
    593 
    594 static inline void tcg_out_push(TCGContext *s, int reg)
    595 {
    596     tcg_out_opc(s, OPC_PUSH_r32 + LOWREGMASK(reg), 0, reg, 0);
    597 }
    598 
    599 static inline void tcg_out_pop(TCGContext *s, int reg)
    600 {
    601     tcg_out_opc(s, OPC_POP_r32 + LOWREGMASK(reg), 0, reg, 0);
    602 }
    603 
    604 static inline void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret,
    605                               TCGReg arg1, intptr_t arg2)
    606 {
    607     int opc = OPC_MOVL_GvEv + (type == TCG_TYPE_I64 ? P_REXW : 0);
    608     tcg_out_modrm_offset(s, opc, ret, arg1, arg2);
    609 }
    610 
    611 static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
    612                               TCGReg arg1, intptr_t arg2)
    613 {
    614     int opc = OPC_MOVL_EvGv + (type == TCG_TYPE_I64 ? P_REXW : 0);
    615     tcg_out_modrm_offset(s, opc, arg, arg1, arg2);
    616 }
    617 
    618 static inline void tcg_out_sti(TCGContext *s, TCGType type, TCGReg base,
    619                                tcg_target_long ofs, tcg_target_long val)
    620 {
    621     int opc = OPC_MOVL_EvIz + (type == TCG_TYPE_I64 ? P_REXW : 0);
    622     tcg_out_modrm_offset(s, opc, 0, base, ofs);
    623     tcg_out32(s, val);
    624 }
    625 
    626 static void tcg_out_shifti(TCGContext *s, int subopc, int reg, int count)
    627 {
    628     /* Propagate an opcode prefix, such as P_DATA16.  */
    629     int ext = subopc & ~0x7;
    630     subopc &= 0x7;
    631 
    632     if (count == 1) {
    633         tcg_out_modrm(s, OPC_SHIFT_1 + ext, subopc, reg);
    634     } else {
    635         tcg_out_modrm(s, OPC_SHIFT_Ib + ext, subopc, reg);
    636         tcg_out8(s, count);
    637     }
    638 }
    639 
    640 static inline void tcg_out_bswap32(TCGContext *s, int reg)
    641 {
    642     tcg_out_opc(s, OPC_BSWAP + LOWREGMASK(reg), 0, reg, 0);
    643 }
    644 
    645 static inline void tcg_out_rolw_8(TCGContext *s, int reg)
    646 {
    647     tcg_out_shifti(s, SHIFT_ROL + P_DATA16, reg, 8);
    648 }
    649 
    650 static inline void tcg_out_ext8u(TCGContext *s, int dest, int src)
    651 {
    652     /* movzbl */
    653     assert(src < 4 || TCG_TARGET_REG_BITS == 64);
    654     tcg_out_modrm(s, OPC_MOVZBL + P_REXB_RM, dest, src);
    655 }
    656 
    657 static void tcg_out_ext8s(TCGContext *s, int dest, int src, int rexw)
    658 {
    659     /* movsbl */
    660     assert(src < 4 || TCG_TARGET_REG_BITS == 64);
    661     tcg_out_modrm(s, OPC_MOVSBL + P_REXB_RM + rexw, dest, src);
    662 }
    663 
    664 static inline void tcg_out_ext16u(TCGContext *s, int dest, int src)
    665 {
    666     /* movzwl */
    667     tcg_out_modrm(s, OPC_MOVZWL, dest, src);
    668 }
    669 
    670 static inline void tcg_out_ext16s(TCGContext *s, int dest, int src, int rexw)
    671 {
    672     /* movsw[lq] */
    673     tcg_out_modrm(s, OPC_MOVSWL + rexw, dest, src);
    674 }
    675 
    676 static inline void tcg_out_ext32u(TCGContext *s, int dest, int src)
    677 {
    678     /* 32-bit mov zero extends.  */
    679     tcg_out_modrm(s, OPC_MOVL_GvEv, dest, src);
    680 }
    681 
    682 static inline void tcg_out_ext32s(TCGContext *s, int dest, int src)
    683 {
    684     tcg_out_modrm(s, OPC_MOVSLQ, dest, src);
    685 }
    686 
    687 static inline void tcg_out_bswap64(TCGContext *s, int reg)
    688 {
    689     tcg_out_opc(s, OPC_BSWAP + P_REXW + LOWREGMASK(reg), 0, reg, 0);
    690 }
    691 
    692 static void tgen_arithi(TCGContext *s, int c, int r0,
    693                         tcg_target_long val, int cf)
    694 {
    695     int rexw = 0;
    696 
    697     if (TCG_TARGET_REG_BITS == 64) {
    698         rexw = c & -8;
    699         c &= 7;
    700     }
    701 
    702     /* ??? While INC is 2 bytes shorter than ADDL $1, they also induce
    703        partial flags update stalls on Pentium4 and are not recommended
    704        by current Intel optimization manuals.  */
    705     if (!cf && (c == ARITH_ADD || c == ARITH_SUB) && (val == 1 || val == -1)) {
    706         int is_inc = (c == ARITH_ADD) ^ (val < 0);
    707         if (TCG_TARGET_REG_BITS == 64) {
    708             /* The single-byte increment encodings are re-tasked as the
    709                REX prefixes.  Use the MODRM encoding.  */
    710             tcg_out_modrm(s, OPC_GRP5 + rexw,
    711                           (is_inc ? EXT5_INC_Ev : EXT5_DEC_Ev), r0);
    712         } else {
    713             tcg_out8(s, (is_inc ? OPC_INC_r32 : OPC_DEC_r32) + r0);
    714         }
    715         return;
    716     }
    717 
    718     if (c == ARITH_AND) {
    719         if (TCG_TARGET_REG_BITS == 64) {
    720             if (val == 0xffffffffu) {
    721                 tcg_out_ext32u(s, r0, r0);
    722                 return;
    723             }
    724             if (val == (uint32_t)val) {
    725                 /* AND with no high bits set can use a 32-bit operation.  */
    726                 rexw = 0;
    727             }
    728         }
    729         if (val == 0xffu && (r0 < 4 || TCG_TARGET_REG_BITS == 64)) {
    730             tcg_out_ext8u(s, r0, r0);
    731             return;
    732         }
    733         if (val == 0xffffu) {
    734             tcg_out_ext16u(s, r0, r0);
    735             return;
    736         }
    737     }
    738 
    739     if (val == (int8_t)val) {
    740         tcg_out_modrm(s, OPC_ARITH_EvIb + rexw, c, r0);
    741         tcg_out8(s, val);
    742         return;
    743     }
    744     if (rexw == 0 || val == (int32_t)val) {
    745         tcg_out_modrm(s, OPC_ARITH_EvIz + rexw, c, r0);
    746         tcg_out32(s, val);
    747         return;
    748     }
    749 
    750     tcg_abort();
    751 }
    752 
    753 static void tcg_out_addi(TCGContext *s, int reg, tcg_target_long val)
    754 {
    755     if (val != 0) {
    756         tgen_arithi(s, ARITH_ADD + P_REXW, reg, val, 0);
    757     }
    758 }
    759 
    760 /* Use SMALL != 0 to force a short forward branch.  */
    761 static void tcg_out_jxx(TCGContext *s, int opc, int label_index, int small)
    762 {
    763     int32_t val, val1;
    764     TCGLabel *l = &s->labels[label_index];
    765 
    766     if (l->has_value) {
    767         val = l->u.value - (intptr_t)s->code_ptr;
    768         val1 = val - 2;
    769         if ((int8_t)val1 == val1) {
    770             if (opc == -1) {
    771                 tcg_out8(s, OPC_JMP_short);
    772             } else {
    773                 tcg_out8(s, OPC_JCC_short + opc);
    774             }
    775             tcg_out8(s, val1);
    776         } else {
    777             if (small) {
    778                 tcg_abort();
    779             }
    780             if (opc == -1) {
    781                 tcg_out8(s, OPC_JMP_long);
    782                 tcg_out32(s, val - 5);
    783             } else {
    784                 tcg_out_opc(s, OPC_JCC_long + opc, 0, 0, 0);
    785                 tcg_out32(s, val - 6);
    786             }
    787         }
    788     } else if (small) {
    789         if (opc == -1) {
    790             tcg_out8(s, OPC_JMP_short);
    791         } else {
    792             tcg_out8(s, OPC_JCC_short + opc);
    793         }
    794         tcg_out_reloc(s, s->code_ptr, R_386_PC8, label_index, -1);
    795         s->code_ptr += 1;
    796     } else {
    797         if (opc == -1) {
    798             tcg_out8(s, OPC_JMP_long);
    799         } else {
    800             tcg_out_opc(s, OPC_JCC_long + opc, 0, 0, 0);
    801         }
    802         tcg_out_reloc(s, s->code_ptr, R_386_PC32, label_index, -4);
    803         s->code_ptr += 4;
    804     }
    805 }
    806 
    807 static void tcg_out_cmp(TCGContext *s, TCGArg arg1, TCGArg arg2,
    808                         int const_arg2, int rexw)
    809 {
    810     if (const_arg2) {
    811         if (arg2 == 0) {
    812             /* test r, r */
    813             tcg_out_modrm(s, OPC_TESTL + rexw, arg1, arg1);
    814         } else {
    815             tgen_arithi(s, ARITH_CMP + rexw, arg1, arg2, 0);
    816         }
    817     } else {
    818         tgen_arithr(s, ARITH_CMP + rexw, arg1, arg2);
    819     }
    820 }
    821 
    822 static void tcg_out_brcond32(TCGContext *s, TCGCond cond,
    823                              TCGArg arg1, TCGArg arg2, int const_arg2,
    824                              int label_index, int small)
    825 {
    826     tcg_out_cmp(s, arg1, arg2, const_arg2, 0);
    827     tcg_out_jxx(s, tcg_cond_to_jcc[cond], label_index, small);
    828 }
    829 
    830 #if TCG_TARGET_REG_BITS == 64
    831 static void tcg_out_brcond64(TCGContext *s, TCGCond cond,
    832                              TCGArg arg1, TCGArg arg2, int const_arg2,
    833                              int label_index, int small)
    834 {
    835     tcg_out_cmp(s, arg1, arg2, const_arg2, P_REXW);
    836     tcg_out_jxx(s, tcg_cond_to_jcc[cond], label_index, small);
    837 }
    838 #else
    839 /* XXX: we implement it at the target level to avoid having to
    840    handle cross basic blocks temporaries */
    841 static void tcg_out_brcond2(TCGContext *s, const TCGArg *args,
    842                             const int *const_args, int small)
    843 {
    844     int label_next;
    845     label_next = gen_new_label();
    846     switch(args[4]) {
    847     case TCG_COND_EQ:
    848         tcg_out_brcond32(s, TCG_COND_NE, args[0], args[2], const_args[2],
    849                          label_next, 1);
    850         tcg_out_brcond32(s, TCG_COND_EQ, args[1], args[3], const_args[3],
    851                          args[5], small);
    852         break;
    853     case TCG_COND_NE:
    854         tcg_out_brcond32(s, TCG_COND_NE, args[0], args[2], const_args[2],
    855                          args[5], small);
    856         tcg_out_brcond32(s, TCG_COND_NE, args[1], args[3], const_args[3],
    857                          args[5], small);
    858         break;
    859     case TCG_COND_LT:
    860         tcg_out_brcond32(s, TCG_COND_LT, args[1], args[3], const_args[3],
    861                          args[5], small);
    862         tcg_out_jxx(s, JCC_JNE, label_next, 1);
    863         tcg_out_brcond32(s, TCG_COND_LTU, args[0], args[2], const_args[2],
    864                          args[5], small);
    865         break;
    866     case TCG_COND_LE:
    867         tcg_out_brcond32(s, TCG_COND_LT, args[1], args[3], const_args[3],
    868                          args[5], small);
    869         tcg_out_jxx(s, JCC_JNE, label_next, 1);
    870         tcg_out_brcond32(s, TCG_COND_LEU, args[0], args[2], const_args[2],
    871                          args[5], small);
    872         break;
    873     case TCG_COND_GT:
    874         tcg_out_brcond32(s, TCG_COND_GT, args[1], args[3], const_args[3],
    875                          args[5], small);
    876         tcg_out_jxx(s, JCC_JNE, label_next, 1);
    877         tcg_out_brcond32(s, TCG_COND_GTU, args[0], args[2], const_args[2],
    878                          args[5], small);
    879         break;
    880     case TCG_COND_GE:
    881         tcg_out_brcond32(s, TCG_COND_GT, args[1], args[3], const_args[3],
    882                          args[5], small);
    883         tcg_out_jxx(s, JCC_JNE, label_next, 1);
    884         tcg_out_brcond32(s, TCG_COND_GEU, args[0], args[2], const_args[2],
    885                          args[5], small);
    886         break;
    887     case TCG_COND_LTU:
    888         tcg_out_brcond32(s, TCG_COND_LTU, args[1], args[3], const_args[3],
    889                          args[5], small);
    890         tcg_out_jxx(s, JCC_JNE, label_next, 1);
    891         tcg_out_brcond32(s, TCG_COND_LTU, args[0], args[2], const_args[2],
    892                          args[5], small);
    893         break;
    894     case TCG_COND_LEU:
    895         tcg_out_brcond32(s, TCG_COND_LTU, args[1], args[3], const_args[3],
    896                          args[5], small);
    897         tcg_out_jxx(s, JCC_JNE, label_next, 1);
    898         tcg_out_brcond32(s, TCG_COND_LEU, args[0], args[2], const_args[2],
    899                          args[5], small);
    900         break;
    901     case TCG_COND_GTU:
    902         tcg_out_brcond32(s, TCG_COND_GTU, args[1], args[3], const_args[3],
    903                          args[5], small);
    904         tcg_out_jxx(s, JCC_JNE, label_next, 1);
    905         tcg_out_brcond32(s, TCG_COND_GTU, args[0], args[2], const_args[2],
    906                          args[5], small);
    907         break;
    908     case TCG_COND_GEU:
    909         tcg_out_brcond32(s, TCG_COND_GTU, args[1], args[3], const_args[3],
    910                          args[5], small);
    911         tcg_out_jxx(s, JCC_JNE, label_next, 1);
    912         tcg_out_brcond32(s, TCG_COND_GEU, args[0], args[2], const_args[2],
    913                          args[5], small);
    914         break;
    915     default:
    916         tcg_abort();
    917     }
    918     tcg_out_label(s, label_next, s->code_ptr);
    919 }
    920 #endif
    921 
    922 static void tcg_out_setcond32(TCGContext *s, TCGCond cond, TCGArg dest,
    923                               TCGArg arg1, TCGArg arg2, int const_arg2)
    924 {
    925     tcg_out_cmp(s, arg1, arg2, const_arg2, 0);
    926     tcg_out_modrm(s, OPC_SETCC | tcg_cond_to_jcc[cond], 0, dest);
    927     tcg_out_ext8u(s, dest, dest);
    928 }
    929 
    930 #if TCG_TARGET_REG_BITS == 64
    931 static void tcg_out_setcond64(TCGContext *s, TCGCond cond, TCGArg dest,
    932                               TCGArg arg1, TCGArg arg2, int const_arg2)
    933 {
    934     tcg_out_cmp(s, arg1, arg2, const_arg2, P_REXW);
    935     tcg_out_modrm(s, OPC_SETCC | tcg_cond_to_jcc[cond], 0, dest);
    936     tcg_out_ext8u(s, dest, dest);
    937 }
    938 #else
    939 static void tcg_out_setcond2(TCGContext *s, const TCGArg *args,
    940                              const int *const_args)
    941 {
    942     TCGArg new_args[6];
    943     int label_true, label_over;
    944 
    945     memcpy(new_args, args+1, 5*sizeof(TCGArg));
    946 
    947     if (args[0] == args[1] || args[0] == args[2]
    948         || (!const_args[3] && args[0] == args[3])
    949         || (!const_args[4] && args[0] == args[4])) {
    950         /* When the destination overlaps with one of the argument
    951            registers, don't do anything tricky.  */
    952         label_true = gen_new_label();
    953         label_over = gen_new_label();
    954 
    955         new_args[5] = label_true;
    956         tcg_out_brcond2(s, new_args, const_args+1, 1);
    957 
    958         tcg_out_movi(s, TCG_TYPE_I32, args[0], 0);
    959         tcg_out_jxx(s, JCC_JMP, label_over, 1);
    960         tcg_out_label(s, label_true, s->code_ptr);
    961 
    962         tcg_out_movi(s, TCG_TYPE_I32, args[0], 1);
    963         tcg_out_label(s, label_over, s->code_ptr);
    964     } else {
    965         /* When the destination does not overlap one of the arguments,
    966            clear the destination first, jump if cond false, and emit an
    967            increment in the true case.  This results in smaller code.  */
    968 
    969         tcg_out_movi(s, TCG_TYPE_I32, args[0], 0);
    970 
    971         label_over = gen_new_label();
    972         new_args[4] = tcg_invert_cond(new_args[4]);
    973         new_args[5] = label_over;
    974         tcg_out_brcond2(s, new_args, const_args+1, 1);
    975 
    976         tgen_arithi(s, ARITH_ADD, args[0], 1, 0);
    977         tcg_out_label(s, label_over, s->code_ptr);
    978     }
    979 }
    980 #endif
    981 
    982 static void tcg_out_movcond32(TCGContext *s, TCGCond cond, TCGArg dest,
    983                               TCGArg c1, TCGArg c2, int const_c2,
    984                               TCGArg v1)
    985 {
    986     tcg_out_cmp(s, c1, c2, const_c2, 0);
    987     if (have_cmov) {
    988         tcg_out_modrm(s, OPC_CMOVCC | tcg_cond_to_jcc[cond], dest, v1);
    989     } else {
    990         int over = gen_new_label();
    991         tcg_out_jxx(s, tcg_cond_to_jcc[tcg_invert_cond(cond)], over, 1);
    992         tcg_out_mov(s, TCG_TYPE_I32, dest, v1);
    993         tcg_out_label(s, over, s->code_ptr);
    994     }
    995 }
    996 
    997 #if TCG_TARGET_REG_BITS == 64
    998 static void tcg_out_movcond64(TCGContext *s, TCGCond cond, TCGArg dest,
    999                               TCGArg c1, TCGArg c2, int const_c2,
   1000                               TCGArg v1)
   1001 {
   1002     tcg_out_cmp(s, c1, c2, const_c2, P_REXW);
   1003     tcg_out_modrm(s, OPC_CMOVCC | tcg_cond_to_jcc[cond] | P_REXW, dest, v1);
   1004 }
   1005 #endif
   1006 
   1007 static void tcg_out_branch(TCGContext *s, int call, uintptr_t dest)
   1008 {
   1009     intptr_t disp = dest - (intptr_t)s->code_ptr - 5;
   1010 
   1011     if (disp == (int32_t)disp) {
   1012         tcg_out_opc(s, call ? OPC_CALL_Jz : OPC_JMP_long, 0, 0, 0);
   1013         tcg_out32(s, disp);
   1014     } else {
   1015         tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R10, dest);
   1016         tcg_out_modrm(s, OPC_GRP5,
   1017                       call ? EXT5_CALLN_Ev : EXT5_JMPN_Ev, TCG_REG_R10);
   1018     }
   1019 }
   1020 
   1021 static inline void tcg_out_calli(TCGContext *s, uintptr_t dest)
   1022 {
   1023     tcg_out_branch(s, 1, dest);
   1024 }
   1025 
   1026 static void tcg_out_jmp(TCGContext *s, uintptr_t dest)
   1027 {
   1028     tcg_out_branch(s, 0, dest);
   1029 }
   1030 
   1031 #if defined(CONFIG_SOFTMMU)
   1032 /* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr,
   1033  *                                     int mmu_idx, uintptr_t ra)
   1034  */
   1035 static const void * const qemu_ld_helpers[16] = {
   1036     [MO_UB]   = helper_ret_ldub_mmu,
   1037     [MO_LEUW] = helper_le_lduw_mmu,
   1038     [MO_LEUL] = helper_le_ldul_mmu,
   1039     [MO_LEQ]  = helper_le_ldq_mmu,
   1040     [MO_BEUW] = helper_be_lduw_mmu,
   1041     [MO_BEUL] = helper_be_ldul_mmu,
   1042     [MO_BEQ]  = helper_be_ldq_mmu,
   1043 };
   1044 
   1045 /* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr,
   1046  *                                     uintxx_t val, int mmu_idx, uintptr_t ra)
   1047  */
   1048 static const void * const qemu_st_helpers[16] = {
   1049     [MO_UB]   = helper_ret_stb_mmu,
   1050     [MO_LEUW] = helper_le_stw_mmu,
   1051     [MO_LEUL] = helper_le_stl_mmu,
   1052     [MO_LEQ]  = helper_le_stq_mmu,
   1053     [MO_BEUW] = helper_be_stw_mmu,
   1054     [MO_BEUL] = helper_be_stl_mmu,
   1055     [MO_BEQ]  = helper_be_stq_mmu,
   1056 };
   1057 
   1058 /* Perform the TLB load and compare.
   1059 
   1060    Inputs:
   1061    ADDRLO and ADDRHI contain the low and high part of the address.
   1062 
   1063    MEM_INDEX and S_BITS are the memory context and log2 size of the load.
   1064 
   1065    WHICH is the offset into the CPUTLBEntry structure of the slot to read.
   1066    This should be offsetof addr_read or addr_write.
   1067 
   1068    Outputs:
   1069    LABEL_PTRS is filled with 1 (32-bit addresses) or 2 (64-bit addresses)
   1070    positions of the displacements of forward jumps to the TLB miss case.
   1071 
   1072    Second argument register is loaded with the low part of the address.
   1073    In the TLB hit case, it has been adjusted as indicated by the TLB
   1074    and so is a host address.  In the TLB miss case, it continues to
   1075    hold a guest address.
   1076 
   1077    First argument register is clobbered.  */
   1078 
   1079 static inline void tcg_out_tlb_load(TCGContext *s, TCGReg addrlo, TCGReg addrhi,
   1080                                     int mem_index, TCGMemOp s_bits,
   1081                                     uint8_t **label_ptr, int which)
   1082 {
   1083     const TCGReg r0 = TCG_REG_L0;
   1084     const TCGReg r1 = TCG_REG_L1;
   1085     TCGType ttype = TCG_TYPE_I32;
   1086     TCGType htype = TCG_TYPE_I32;
   1087     int trexw = 0, hrexw = 0;
   1088 
   1089     if (TCG_TARGET_REG_BITS == 64) {
   1090         if (TARGET_LONG_BITS == 64) {
   1091             ttype = TCG_TYPE_I64;
   1092             trexw = P_REXW;
   1093         }
   1094         if (TCG_TYPE_PTR == TCG_TYPE_I64) {
   1095             htype = TCG_TYPE_I64;
   1096             hrexw = P_REXW;
   1097         }
   1098     }
   1099 
   1100     tcg_out_mov(s, htype, r0, addrlo);
   1101     tcg_out_mov(s, ttype, r1, addrlo);
   1102 
   1103     tcg_out_shifti(s, SHIFT_SHR + hrexw, r0,
   1104                    TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
   1105 
   1106     tgen_arithi(s, ARITH_AND + trexw, r1,
   1107                 TARGET_PAGE_MASK | ((1 << s_bits) - 1), 0);
   1108     tgen_arithi(s, ARITH_AND + hrexw, r0,
   1109                 (CPU_TLB_SIZE - 1) << CPU_TLB_ENTRY_BITS, 0);
   1110 
   1111     tcg_out_modrm_sib_offset(s, OPC_LEA + hrexw, r0, TCG_AREG0, r0, 0,
   1112                              offsetof(CPUArchState, tlb_table[mem_index][0])
   1113                              + which);
   1114 
   1115     /* cmp 0(r0), r1 */
   1116     tcg_out_modrm_offset(s, OPC_CMP_GvEv + trexw, r1, r0, 0);
   1117 
   1118     /* Prepare for both the fast path add of the tlb addend, and the slow
   1119        path function argument setup.  There are two cases worth note:
   1120        For 32-bit guest and x86_64 host, MOVL zero-extends the guest address
   1121        before the fastpath ADDQ below.  For 64-bit guest and x32 host, MOVQ
   1122        copies the entire guest address for the slow path, while truncation
   1123        for the 32-bit host happens with the fastpath ADDL below.  */
   1124     tcg_out_mov(s, ttype, r1, addrlo);
   1125 
   1126     /* jne slow_path */
   1127     tcg_out_opc(s, OPC_JCC_long + JCC_JNE, 0, 0, 0);
   1128     label_ptr[0] = s->code_ptr;
   1129     s->code_ptr += 4;
   1130 
   1131     if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
   1132         /* cmp 4(r0), addrhi */
   1133         tcg_out_modrm_offset(s, OPC_CMP_GvEv, addrhi, r0, 4);
   1134 
   1135         /* jne slow_path */
   1136         tcg_out_opc(s, OPC_JCC_long + JCC_JNE, 0, 0, 0);
   1137         label_ptr[1] = s->code_ptr;
   1138         s->code_ptr += 4;
   1139     }
   1140 
   1141     /* TLB Hit.  */
   1142 
   1143     /* add addend(r0), r1 */
   1144     tcg_out_modrm_offset(s, OPC_ADD_GvEv + hrexw, r1, r0,
   1145                          offsetof(CPUTLBEntry, addend) - which);
   1146 }
   1147 
   1148 /*
   1149  * Record the context of a call to the out of line helper code for the slow path
   1150  * for a load or store, so that we can later generate the correct helper code
   1151  */
   1152 static void add_qemu_ldst_label(TCGContext *s, int is_ld, TCGMemOp opc,
   1153                                 TCGReg datalo, TCGReg datahi,
   1154                                 TCGReg addrlo, TCGReg addrhi,
   1155                                 int mem_index, uint8_t *raddr,
   1156                                 uint8_t **label_ptr)
   1157 {
   1158     TCGLabelQemuLdst *label = new_ldst_label(s);
   1159 
   1160     label->is_ld = is_ld;
   1161     label->opc = opc;
   1162     label->datalo_reg = datalo;
   1163     label->datahi_reg = datahi;
   1164     label->addrlo_reg = addrlo;
   1165     label->addrhi_reg = addrhi;
   1166     label->mem_index = mem_index;
   1167     label->raddr = raddr;
   1168     label->label_ptr[0] = label_ptr[0];
   1169     if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
   1170         label->label_ptr[1] = label_ptr[1];
   1171     }
   1172 }
   1173 
   1174 /*
   1175  * Generate code for the slow path for a load at the end of block
   1176  */
   1177 static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
   1178 {
   1179     TCGMemOp opc = l->opc;
   1180     TCGReg data_reg;
   1181     uint8_t **label_ptr = &l->label_ptr[0];
   1182 
   1183     /* resolve label address */
   1184     *(uint32_t *)label_ptr[0] = (uint32_t)(s->code_ptr - label_ptr[0] - 4);
   1185     if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
   1186         *(uint32_t *)label_ptr[1] = (uint32_t)(s->code_ptr - label_ptr[1] - 4);
   1187     }
   1188 
   1189     if (TCG_TARGET_REG_BITS == 32) {
   1190         int ofs = 0;
   1191 
   1192         tcg_out_st(s, TCG_TYPE_PTR, TCG_AREG0, TCG_REG_ESP, ofs);
   1193         ofs += 4;
   1194 
   1195         tcg_out_st(s, TCG_TYPE_I32, l->addrlo_reg, TCG_REG_ESP, ofs);
   1196         ofs += 4;
   1197 
   1198         if (TARGET_LONG_BITS == 64) {
   1199             tcg_out_st(s, TCG_TYPE_I32, l->addrhi_reg, TCG_REG_ESP, ofs);
   1200             ofs += 4;
   1201         }
   1202 
   1203         tcg_out_sti(s, TCG_TYPE_I32, TCG_REG_ESP, ofs, l->mem_index);
   1204         ofs += 4;
   1205 
   1206         tcg_out_sti(s, TCG_TYPE_I32, TCG_REG_ESP, ofs, (uintptr_t)l->raddr);
   1207     } else {
   1208         tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0], TCG_AREG0);
   1209         /* The second argument is already loaded with addrlo.  */
   1210         tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[2],
   1211                      l->mem_index);
   1212         tcg_out_movi(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[3],
   1213                      (uintptr_t)l->raddr);
   1214     }
   1215 
   1216     tcg_out_calli(s, (uintptr_t)qemu_ld_helpers[opc & ~MO_SIGN]);
   1217 
   1218     data_reg = l->datalo_reg;
   1219     switch (opc & MO_SSIZE) {
   1220     case MO_SB:
   1221         tcg_out_ext8s(s, data_reg, TCG_REG_EAX, P_REXW);
   1222         break;
   1223     case MO_SW:
   1224         tcg_out_ext16s(s, data_reg, TCG_REG_EAX, P_REXW);
   1225         break;
   1226 #if TCG_TARGET_REG_BITS == 64
   1227     case MO_SL:
   1228         tcg_out_ext32s(s, data_reg, TCG_REG_EAX);
   1229         break;
   1230 #endif
   1231     case MO_UB:
   1232     case MO_UW:
   1233         /* Note that the helpers have zero-extended to tcg_target_long.  */
   1234     case MO_UL:
   1235         tcg_out_mov(s, TCG_TYPE_I32, data_reg, TCG_REG_EAX);
   1236         break;
   1237     case MO_Q:
   1238         if (TCG_TARGET_REG_BITS == 64) {
   1239             tcg_out_mov(s, TCG_TYPE_I64, data_reg, TCG_REG_RAX);
   1240         } else if (data_reg == TCG_REG_EDX) {
   1241             /* xchg %edx, %eax */
   1242             tcg_out_opc(s, OPC_XCHG_ax_r32 + TCG_REG_EDX, 0, 0, 0);
   1243             tcg_out_mov(s, TCG_TYPE_I32, l->datahi_reg, TCG_REG_EAX);
   1244         } else {
   1245             tcg_out_mov(s, TCG_TYPE_I32, data_reg, TCG_REG_EAX);
   1246             tcg_out_mov(s, TCG_TYPE_I32, l->datahi_reg, TCG_REG_EDX);
   1247         }
   1248         break;
   1249     default:
   1250         tcg_abort();
   1251     }
   1252 
   1253     /* Jump to the code corresponding to next IR of qemu_st */
   1254     tcg_out_jmp(s, (uintptr_t)l->raddr);
   1255 }
   1256 
   1257 /*
   1258  * Generate code for the slow path for a store at the end of block
   1259  */
   1260 static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
   1261 {
   1262     TCGMemOp opc = l->opc;
   1263     TCGMemOp s_bits = opc & MO_SIZE;
   1264     uint8_t **label_ptr = &l->label_ptr[0];
   1265     TCGReg retaddr;
   1266 
   1267     /* resolve label address */
   1268     *(uint32_t *)label_ptr[0] = (uint32_t)(s->code_ptr - label_ptr[0] - 4);
   1269     if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
   1270         *(uint32_t *)label_ptr[1] = (uint32_t)(s->code_ptr - label_ptr[1] - 4);
   1271     }
   1272 
   1273     if (TCG_TARGET_REG_BITS == 32) {
   1274         int ofs = 0;
   1275 
   1276         tcg_out_st(s, TCG_TYPE_PTR, TCG_AREG0, TCG_REG_ESP, ofs);
   1277         ofs += 4;
   1278 
   1279         tcg_out_st(s, TCG_TYPE_I32, l->addrlo_reg, TCG_REG_ESP, ofs);
   1280         ofs += 4;
   1281 
   1282         if (TARGET_LONG_BITS == 64) {
   1283             tcg_out_st(s, TCG_TYPE_I32, l->addrhi_reg, TCG_REG_ESP, ofs);
   1284             ofs += 4;
   1285         }
   1286 
   1287         tcg_out_st(s, TCG_TYPE_I32, l->datalo_reg, TCG_REG_ESP, ofs);
   1288         ofs += 4;
   1289 
   1290         if (s_bits == MO_64) {
   1291             tcg_out_st(s, TCG_TYPE_I32, l->datahi_reg, TCG_REG_ESP, ofs);
   1292             ofs += 4;
   1293         }
   1294 
   1295         tcg_out_sti(s, TCG_TYPE_I32, TCG_REG_ESP, ofs, l->mem_index);
   1296         ofs += 4;
   1297 
   1298         retaddr = TCG_REG_EAX;
   1299         tcg_out_movi(s, TCG_TYPE_I32, retaddr, (uintptr_t)l->raddr);
   1300         tcg_out_st(s, TCG_TYPE_I32, retaddr, TCG_REG_ESP, ofs);
   1301     } else {
   1302         tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0], TCG_AREG0);
   1303         /* The second argument is already loaded with addrlo.  */
   1304         tcg_out_mov(s, (s_bits == MO_64 ? TCG_TYPE_I64 : TCG_TYPE_I32),
   1305                     tcg_target_call_iarg_regs[2], l->datalo_reg);
   1306         tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[3],
   1307                      l->mem_index);
   1308 
   1309         if (ARRAY_SIZE(tcg_target_call_iarg_regs) > 4) {
   1310             retaddr = tcg_target_call_iarg_regs[4];
   1311             tcg_out_movi(s, TCG_TYPE_PTR, retaddr, (uintptr_t)l->raddr);
   1312         } else {
   1313             retaddr = TCG_REG_RAX;
   1314             tcg_out_movi(s, TCG_TYPE_PTR, retaddr, (uintptr_t)l->raddr);
   1315             tcg_out_st(s, TCG_TYPE_PTR, retaddr, TCG_REG_ESP, 0);
   1316         }
   1317     }
   1318 
   1319     /* "Tail call" to the helper, with the return address back inline.  */
   1320     tcg_out_push(s, retaddr);
   1321     tcg_out_jmp(s, (uintptr_t)qemu_st_helpers[opc]);
   1322 }
   1323 #elif defined(__x86_64__) && defined(__linux__)
   1324 # include <asm/prctl.h>
   1325 # include <sys/prctl.h>
   1326 
   1327 int arch_prctl(int code, unsigned long addr);
   1328 
   1329 static int guest_base_flags;
   1330 static inline void setup_guest_base_seg(void)
   1331 {
   1332     if (arch_prctl(ARCH_SET_GS, GUEST_BASE) == 0) {
   1333         guest_base_flags = P_GS;
   1334     }
   1335 }
   1336 #else
   1337 # define guest_base_flags 0
   1338 static inline void setup_guest_base_seg(void) { }
   1339 #endif /* SOFTMMU */
   1340 
   1341 static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg datalo, TCGReg datahi,
   1342                                    TCGReg base, intptr_t ofs, int seg,
   1343                                    TCGMemOp memop)
   1344 {
   1345     const TCGMemOp bswap = memop & MO_BSWAP;
   1346 
   1347     switch (memop & MO_SSIZE) {
   1348     case MO_UB:
   1349         tcg_out_modrm_offset(s, OPC_MOVZBL + seg, datalo, base, ofs);
   1350         break;
   1351     case MO_SB:
   1352         tcg_out_modrm_offset(s, OPC_MOVSBL + P_REXW + seg, datalo, base, ofs);
   1353         break;
   1354     case MO_UW:
   1355         tcg_out_modrm_offset(s, OPC_MOVZWL + seg, datalo, base, ofs);
   1356         if (bswap) {
   1357             tcg_out_rolw_8(s, datalo);
   1358         }
   1359         break;
   1360     case MO_SW:
   1361         if (bswap) {
   1362             tcg_out_modrm_offset(s, OPC_MOVZWL + seg, datalo, base, ofs);
   1363             tcg_out_rolw_8(s, datalo);
   1364             tcg_out_modrm(s, OPC_MOVSWL + P_REXW, datalo, datalo);
   1365         } else {
   1366             tcg_out_modrm_offset(s, OPC_MOVSWL + P_REXW + seg,
   1367                                  datalo, base, ofs);
   1368         }
   1369         break;
   1370     case MO_UL:
   1371         tcg_out_modrm_offset(s, OPC_MOVL_GvEv + seg, datalo, base, ofs);
   1372         if (bswap) {
   1373             tcg_out_bswap32(s, datalo);
   1374         }
   1375         break;
   1376 #if TCG_TARGET_REG_BITS == 64
   1377     case MO_SL:
   1378         if (bswap) {
   1379             tcg_out_modrm_offset(s, OPC_MOVL_GvEv + seg, datalo, base, ofs);
   1380             tcg_out_bswap32(s, datalo);
   1381             tcg_out_ext32s(s, datalo, datalo);
   1382         } else {
   1383             tcg_out_modrm_offset(s, OPC_MOVSLQ + seg, datalo, base, ofs);
   1384         }
   1385         break;
   1386 #endif
   1387     case MO_Q:
   1388         if (TCG_TARGET_REG_BITS == 64) {
   1389             tcg_out_modrm_offset(s, OPC_MOVL_GvEv + P_REXW + seg,
   1390                                  datalo, base, ofs);
   1391             if (bswap) {
   1392                 tcg_out_bswap64(s, datalo);
   1393             }
   1394         } else {
   1395             if (bswap) {
   1396                 int t = datalo;
   1397                 datalo = datahi;
   1398                 datahi = t;
   1399             }
   1400             if (base != datalo) {
   1401                 tcg_out_modrm_offset(s, OPC_MOVL_GvEv + seg,
   1402                                      datalo, base, ofs);
   1403                 tcg_out_modrm_offset(s, OPC_MOVL_GvEv + seg,
   1404                                      datahi, base, ofs + 4);
   1405             } else {
   1406                 tcg_out_modrm_offset(s, OPC_MOVL_GvEv + seg,
   1407                                      datahi, base, ofs + 4);
   1408                 tcg_out_modrm_offset(s, OPC_MOVL_GvEv + seg,
   1409                                      datalo, base, ofs);
   1410             }
   1411             if (bswap) {
   1412                 tcg_out_bswap32(s, datalo);
   1413                 tcg_out_bswap32(s, datahi);
   1414             }
   1415         }
   1416         break;
   1417     default:
   1418         tcg_abort();
   1419     }
   1420 }
   1421 
   1422 /* XXX: qemu_ld and qemu_st could be modified to clobber only EDX and
   1423    EAX. It will be useful once fixed registers globals are less
   1424    common. */
   1425 static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is64)
   1426 {
   1427     TCGReg datalo, datahi, addrlo;
   1428     TCGReg addrhi __attribute__((unused));
   1429     TCGMemOp opc;
   1430 #if defined(CONFIG_SOFTMMU)
   1431     int mem_index;
   1432     TCGMemOp s_bits;
   1433     uint8_t *label_ptr[2];
   1434 #endif
   1435 
   1436     datalo = *args++;
   1437     datahi = (TCG_TARGET_REG_BITS == 32 && is64 ? *args++ : 0);
   1438     addrlo = *args++;
   1439     addrhi = (TARGET_LONG_BITS > TCG_TARGET_REG_BITS ? *args++ : 0);
   1440     opc = *args++;
   1441 
   1442 #if defined(CONFIG_SOFTMMU)
   1443     mem_index = *args++;
   1444     s_bits = opc & MO_SIZE;
   1445 
   1446     tcg_out_tlb_load(s, addrlo, addrhi, mem_index, s_bits,
   1447                      label_ptr, offsetof(CPUTLBEntry, addr_read));
   1448 
   1449     /* TLB Hit.  */
   1450     tcg_out_qemu_ld_direct(s, datalo, datahi, TCG_REG_L1, 0, 0, opc);
   1451 
   1452     /* Record the current context of a load into ldst label */
   1453     add_qemu_ldst_label(s, 1, opc, datalo, datahi, addrlo, addrhi,
   1454                         mem_index, s->code_ptr, label_ptr);
   1455 #else
   1456     {
   1457         int32_t offset = GUEST_BASE;
   1458         TCGReg base = addrlo;
   1459         int seg = 0;
   1460 
   1461         /* ??? We assume all operations have left us with register contents
   1462            that are zero extended.  So far this appears to be true.  If we
   1463            want to enforce this, we can either do an explicit zero-extension
   1464            here, or (if GUEST_BASE == 0, or a segment register is in use)
   1465            use the ADDR32 prefix.  For now, do nothing.  */
   1466         if (GUEST_BASE && guest_base_flags) {
   1467             seg = guest_base_flags;
   1468             offset = 0;
   1469         } else if (TCG_TARGET_REG_BITS == 64 && offset != GUEST_BASE) {
   1470             tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_L1, GUEST_BASE);
   1471             tgen_arithr(s, ARITH_ADD + P_REXW, TCG_REG_L1, base);
   1472             base = TCG_REG_L1;
   1473             offset = 0;
   1474         }
   1475 
   1476         tcg_out_qemu_ld_direct(s, datalo, datahi, base, offset, seg, opc);
   1477     }
   1478 #endif
   1479 }
   1480 
   1481 static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg datalo, TCGReg datahi,
   1482                                    TCGReg base, intptr_t ofs, int seg,
   1483                                    TCGMemOp memop)
   1484 {
   1485     const TCGMemOp bswap = memop & MO_BSWAP;
   1486 
   1487     /* ??? Ideally we wouldn't need a scratch register.  For user-only,
   1488        we could perform the bswap twice to restore the original value
   1489        instead of moving to the scratch.  But as it is, the L constraint
   1490        means that TCG_REG_L0 is definitely free here.  */
   1491     const TCGReg scratch = TCG_REG_L0;
   1492 
   1493     switch (memop & MO_SIZE) {
   1494     case MO_8:
   1495         /* In 32-bit mode, 8-byte stores can only happen from [abcd]x.
   1496            Use the scratch register if necessary.  */
   1497         if (TCG_TARGET_REG_BITS == 32 && datalo >= 4) {
   1498             tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo);
   1499             datalo = scratch;
   1500         }
   1501         tcg_out_modrm_offset(s, OPC_MOVB_EvGv + P_REXB_R + seg,
   1502                              datalo, base, ofs);
   1503         break;
   1504     case MO_16:
   1505         if (bswap) {
   1506             tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo);
   1507             tcg_out_rolw_8(s, scratch);
   1508             datalo = scratch;
   1509         }
   1510         tcg_out_modrm_offset(s, OPC_MOVL_EvGv + P_DATA16 + seg,
   1511                              datalo, base, ofs);
   1512         break;
   1513     case MO_32:
   1514         if (bswap) {
   1515             tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo);
   1516             tcg_out_bswap32(s, scratch);
   1517             datalo = scratch;
   1518         }
   1519         tcg_out_modrm_offset(s, OPC_MOVL_EvGv + seg, datalo, base, ofs);
   1520         break;
   1521     case MO_64:
   1522         if (TCG_TARGET_REG_BITS == 64) {
   1523             if (bswap) {
   1524                 tcg_out_mov(s, TCG_TYPE_I64, scratch, datalo);
   1525                 tcg_out_bswap64(s, scratch);
   1526                 datalo = scratch;
   1527             }
   1528             tcg_out_modrm_offset(s, OPC_MOVL_EvGv + P_REXW + seg,
   1529                                  datalo, base, ofs);
   1530         } else if (bswap) {
   1531             tcg_out_mov(s, TCG_TYPE_I32, scratch, datahi);
   1532             tcg_out_bswap32(s, scratch);
   1533             tcg_out_modrm_offset(s, OPC_MOVL_EvGv + seg, scratch, base, ofs);
   1534             tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo);
   1535             tcg_out_bswap32(s, scratch);
   1536             tcg_out_modrm_offset(s, OPC_MOVL_EvGv + seg, scratch, base, ofs+4);
   1537         } else {
   1538             tcg_out_modrm_offset(s, OPC_MOVL_EvGv + seg, datalo, base, ofs);
   1539             tcg_out_modrm_offset(s, OPC_MOVL_EvGv + seg, datahi, base, ofs+4);
   1540         }
   1541         break;
   1542     default:
   1543         tcg_abort();
   1544     }
   1545 }
   1546 
   1547 static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64)
   1548 {
   1549     TCGReg datalo, datahi, addrlo;
   1550     TCGReg addrhi __attribute__((unused));
   1551     TCGMemOp opc;
   1552 #if defined(CONFIG_SOFTMMU)
   1553     int mem_index;
   1554     TCGMemOp s_bits;
   1555     uint8_t *label_ptr[2];
   1556 #endif
   1557 
   1558     datalo = *args++;
   1559     datahi = (TCG_TARGET_REG_BITS == 32 && is64 ? *args++ : 0);
   1560     addrlo = *args++;
   1561     addrhi = (TARGET_LONG_BITS > TCG_TARGET_REG_BITS ? *args++ : 0);
   1562     opc = *args++;
   1563 
   1564 #if defined(CONFIG_SOFTMMU)
   1565     mem_index = *args++;
   1566     s_bits = opc & MO_SIZE;
   1567 
   1568     tcg_out_tlb_load(s, addrlo, addrhi, mem_index, s_bits,
   1569                      label_ptr, offsetof(CPUTLBEntry, addr_write));
   1570 
   1571     /* TLB Hit.  */
   1572     tcg_out_qemu_st_direct(s, datalo, datahi, TCG_REG_L1, 0, 0, opc);
   1573 
   1574     /* Record the current context of a store into ldst label */
   1575     add_qemu_ldst_label(s, 0, opc, datalo, datahi, addrlo, addrhi,
   1576                         mem_index, s->code_ptr, label_ptr);
   1577 #else
   1578     {
   1579         int32_t offset = GUEST_BASE;
   1580         TCGReg base = addrlo;
   1581         int seg = 0;
   1582 
   1583         /* ??? We assume all operations have left us with register contents
   1584            that are zero extended.  So far this appears to be true.  If we
   1585            want to enforce this, we can either do an explicit zero-extension
   1586            here, or (if GUEST_BASE == 0, or a segment register is in use)
   1587            use the ADDR32 prefix.  For now, do nothing.  */
   1588         if (GUEST_BASE && guest_base_flags) {
   1589             seg = guest_base_flags;
   1590             offset = 0;
   1591         } else if (TCG_TARGET_REG_BITS == 64 && offset != GUEST_BASE) {
   1592             tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_L1, GUEST_BASE);
   1593             tgen_arithr(s, ARITH_ADD + P_REXW, TCG_REG_L1, base);
   1594             base = TCG_REG_L1;
   1595             offset = 0;
   1596         }
   1597 
   1598         tcg_out_qemu_st_direct(s, datalo, datahi, base, offset, seg, opc);
   1599     }
   1600 #endif
   1601 }
   1602 
   1603 static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
   1604                               const TCGArg *args, const int *const_args)
   1605 {
   1606     int c, rexw = 0;
   1607 
   1608 #if TCG_TARGET_REG_BITS == 64
   1609 # define OP_32_64(x) \
   1610         case glue(glue(INDEX_op_, x), _i64): \
   1611             rexw = P_REXW; /* FALLTHRU */    \
   1612         case glue(glue(INDEX_op_, x), _i32)
   1613 #else
   1614 # define OP_32_64(x) \
   1615         case glue(glue(INDEX_op_, x), _i32)
   1616 #endif
   1617 
   1618     switch(opc) {
   1619     case INDEX_op_exit_tb:
   1620         tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_EAX, args[0]);
   1621         tcg_out_jmp(s, (uintptr_t)tb_ret_addr);
   1622         break;
   1623     case INDEX_op_goto_tb:
   1624         if (s->tb_jmp_offset) {
   1625             /* direct jump method */
   1626             tcg_out8(s, OPC_JMP_long); /* jmp im */
   1627             s->tb_jmp_offset[args[0]] = s->code_ptr - s->code_buf;
   1628             tcg_out32(s, 0);
   1629         } else {
   1630             /* indirect jump method */
   1631             tcg_out_modrm_offset(s, OPC_GRP5, EXT5_JMPN_Ev, -1,
   1632                                  (intptr_t)(s->tb_next + args[0]));
   1633         }
   1634         s->tb_next_offset[args[0]] = s->code_ptr - s->code_buf;
   1635         break;
   1636     case INDEX_op_call:
   1637         if (const_args[0]) {
   1638             tcg_out_calli(s, args[0]);
   1639         } else {
   1640             /* call *reg */
   1641             tcg_out_modrm(s, OPC_GRP5, EXT5_CALLN_Ev, args[0]);
   1642         }
   1643         break;
   1644     case INDEX_op_br:
   1645         tcg_out_jxx(s, JCC_JMP, args[0], 0);
   1646         break;
   1647     case INDEX_op_movi_i32:
   1648         tcg_out_movi(s, TCG_TYPE_I32, args[0], args[1]);
   1649         break;
   1650     OP_32_64(ld8u):
   1651         /* Note that we can ignore REXW for the zero-extend to 64-bit.  */
   1652         tcg_out_modrm_offset(s, OPC_MOVZBL, args[0], args[1], args[2]);
   1653         break;
   1654     OP_32_64(ld8s):
   1655         tcg_out_modrm_offset(s, OPC_MOVSBL + rexw, args[0], args[1], args[2]);
   1656         break;
   1657     OP_32_64(ld16u):
   1658         /* Note that we can ignore REXW for the zero-extend to 64-bit.  */
   1659         tcg_out_modrm_offset(s, OPC_MOVZWL, args[0], args[1], args[2]);
   1660         break;
   1661     OP_32_64(ld16s):
   1662         tcg_out_modrm_offset(s, OPC_MOVSWL + rexw, args[0], args[1], args[2]);
   1663         break;
   1664 #if TCG_TARGET_REG_BITS == 64
   1665     case INDEX_op_ld32u_i64:
   1666 #endif
   1667     case INDEX_op_ld_i32:
   1668         tcg_out_ld(s, TCG_TYPE_I32, args[0], args[1], args[2]);
   1669         break;
   1670 
   1671     OP_32_64(st8):
   1672         if (const_args[0]) {
   1673             tcg_out_modrm_offset(s, OPC_MOVB_EvIz,
   1674                                  0, args[1], args[2]);
   1675             tcg_out8(s, args[0]);
   1676         } else {
   1677             tcg_out_modrm_offset(s, OPC_MOVB_EvGv | P_REXB_R,
   1678                                  args[0], args[1], args[2]);
   1679         }
   1680         break;
   1681     OP_32_64(st16):
   1682         if (const_args[0]) {
   1683             tcg_out_modrm_offset(s, OPC_MOVL_EvIz | P_DATA16,
   1684                                  0, args[1], args[2]);
   1685             tcg_out16(s, args[0]);
   1686         } else {
   1687             tcg_out_modrm_offset(s, OPC_MOVL_EvGv | P_DATA16,
   1688                                  args[0], args[1], args[2]);
   1689         }
   1690         break;
   1691 #if TCG_TARGET_REG_BITS == 64
   1692     case INDEX_op_st32_i64:
   1693 #endif
   1694     case INDEX_op_st_i32:
   1695         if (const_args[0]) {
   1696             tcg_out_modrm_offset(s, OPC_MOVL_EvIz, 0, args[1], args[2]);
   1697             tcg_out32(s, args[0]);
   1698         } else {
   1699             tcg_out_st(s, TCG_TYPE_I32, args[0], args[1], args[2]);
   1700         }
   1701         break;
   1702 
   1703     OP_32_64(add):
   1704         /* For 3-operand addition, use LEA.  */
   1705         if (args[0] != args[1]) {
   1706             TCGArg a0 = args[0], a1 = args[1], a2 = args[2], c3 = 0;
   1707 
   1708             if (const_args[2]) {
   1709                 c3 = a2, a2 = -1;
   1710             } else if (a0 == a2) {
   1711                 /* Watch out for dest = src + dest, since we've removed
   1712                    the matching constraint on the add.  */
   1713                 tgen_arithr(s, ARITH_ADD + rexw, a0, a1);
   1714                 break;
   1715             }
   1716 
   1717             tcg_out_modrm_sib_offset(s, OPC_LEA + rexw, a0, a1, a2, 0, c3);
   1718             break;
   1719         }
   1720         c = ARITH_ADD;
   1721         goto gen_arith;
   1722     OP_32_64(sub):
   1723         c = ARITH_SUB;
   1724         goto gen_arith;
   1725     OP_32_64(and):
   1726         c = ARITH_AND;
   1727         goto gen_arith;
   1728     OP_32_64(or):
   1729         c = ARITH_OR;
   1730         goto gen_arith;
   1731     OP_32_64(xor):
   1732         c = ARITH_XOR;
   1733         goto gen_arith;
   1734     gen_arith:
   1735         if (const_args[2]) {
   1736             tgen_arithi(s, c + rexw, args[0], args[2], 0);
   1737         } else {
   1738             tgen_arithr(s, c + rexw, args[0], args[2]);
   1739         }
   1740         break;
   1741 
   1742     OP_32_64(mul):
   1743         if (const_args[2]) {
   1744             int32_t val;
   1745             val = args[2];
   1746             if (val == (int8_t)val) {
   1747                 tcg_out_modrm(s, OPC_IMUL_GvEvIb + rexw, args[0], args[0]);
   1748                 tcg_out8(s, val);
   1749             } else {
   1750                 tcg_out_modrm(s, OPC_IMUL_GvEvIz + rexw, args[0], args[0]);
   1751                 tcg_out32(s, val);
   1752             }
   1753         } else {
   1754             tcg_out_modrm(s, OPC_IMUL_GvEv + rexw, args[0], args[2]);
   1755         }
   1756         break;
   1757 
   1758     OP_32_64(div2):
   1759         tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_IDIV, args[4]);
   1760         break;
   1761     OP_32_64(divu2):
   1762         tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_DIV, args[4]);
   1763         break;
   1764 
   1765     OP_32_64(shl):
   1766         c = SHIFT_SHL;
   1767         goto gen_shift;
   1768     OP_32_64(shr):
   1769         c = SHIFT_SHR;
   1770         goto gen_shift;
   1771     OP_32_64(sar):
   1772         c = SHIFT_SAR;
   1773         goto gen_shift;
   1774     OP_32_64(rotl):
   1775         c = SHIFT_ROL;
   1776         goto gen_shift;
   1777     OP_32_64(rotr):
   1778         c = SHIFT_ROR;
   1779         goto gen_shift;
   1780     gen_shift:
   1781         if (const_args[2]) {
   1782             tcg_out_shifti(s, c + rexw, args[0], args[2]);
   1783         } else {
   1784             tcg_out_modrm(s, OPC_SHIFT_cl + rexw, c, args[0]);
   1785         }
   1786         break;
   1787 
   1788     case INDEX_op_brcond_i32:
   1789         tcg_out_brcond32(s, args[2], args[0], args[1], const_args[1],
   1790                          args[3], 0);
   1791         break;
   1792     case INDEX_op_setcond_i32:
   1793         tcg_out_setcond32(s, args[3], args[0], args[1],
   1794                           args[2], const_args[2]);
   1795         break;
   1796     case INDEX_op_movcond_i32:
   1797         tcg_out_movcond32(s, args[5], args[0], args[1],
   1798                           args[2], const_args[2], args[3]);
   1799         break;
   1800 
   1801     OP_32_64(bswap16):
   1802         tcg_out_rolw_8(s, args[0]);
   1803         break;
   1804     OP_32_64(bswap32):
   1805         tcg_out_bswap32(s, args[0]);
   1806         break;
   1807 
   1808     OP_32_64(neg):
   1809         tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_NEG, args[0]);
   1810         break;
   1811     OP_32_64(not):
   1812         tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_NOT, args[0]);
   1813         break;
   1814 
   1815     OP_32_64(ext8s):
   1816         tcg_out_ext8s(s, args[0], args[1], rexw);
   1817         break;
   1818     OP_32_64(ext16s):
   1819         tcg_out_ext16s(s, args[0], args[1], rexw);
   1820         break;
   1821     OP_32_64(ext8u):
   1822         tcg_out_ext8u(s, args[0], args[1]);
   1823         break;
   1824     OP_32_64(ext16u):
   1825         tcg_out_ext16u(s, args[0], args[1]);
   1826         break;
   1827 
   1828     case INDEX_op_qemu_ld_i32:
   1829         tcg_out_qemu_ld(s, args, 0);
   1830         break;
   1831     case INDEX_op_qemu_ld_i64:
   1832         tcg_out_qemu_ld(s, args, 1);
   1833         break;
   1834     case INDEX_op_qemu_st_i32:
   1835         tcg_out_qemu_st(s, args, 0);
   1836         break;
   1837     case INDEX_op_qemu_st_i64:
   1838         tcg_out_qemu_st(s, args, 1);
   1839         break;
   1840 
   1841     OP_32_64(mulu2):
   1842         tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_MUL, args[3]);
   1843         break;
   1844     OP_32_64(muls2):
   1845         tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_IMUL, args[3]);
   1846         break;
   1847     OP_32_64(add2):
   1848         if (const_args[4]) {
   1849             tgen_arithi(s, ARITH_ADD + rexw, args[0], args[4], 1);
   1850         } else {
   1851             tgen_arithr(s, ARITH_ADD + rexw, args[0], args[4]);
   1852         }
   1853         if (const_args[5]) {
   1854             tgen_arithi(s, ARITH_ADC + rexw, args[1], args[5], 1);
   1855         } else {
   1856             tgen_arithr(s, ARITH_ADC + rexw, args[1], args[5]);
   1857         }
   1858         break;
   1859     OP_32_64(sub2):
   1860         if (const_args[4]) {
   1861             tgen_arithi(s, ARITH_SUB + rexw, args[0], args[4], 1);
   1862         } else {
   1863             tgen_arithr(s, ARITH_SUB + rexw, args[0], args[4]);
   1864         }
   1865         if (const_args[5]) {
   1866             tgen_arithi(s, ARITH_SBB + rexw, args[1], args[5], 1);
   1867         } else {
   1868             tgen_arithr(s, ARITH_SBB + rexw, args[1], args[5]);
   1869         }
   1870         break;
   1871 
   1872 #if TCG_TARGET_REG_BITS == 32
   1873     case INDEX_op_brcond2_i32:
   1874         tcg_out_brcond2(s, args, const_args, 0);
   1875         break;
   1876     case INDEX_op_setcond2_i32:
   1877         tcg_out_setcond2(s, args, const_args);
   1878         break;
   1879 #else /* TCG_TARGET_REG_BITS == 64 */
   1880     case INDEX_op_movi_i64:
   1881         tcg_out_movi(s, TCG_TYPE_I64, args[0], args[1]);
   1882         break;
   1883     case INDEX_op_ld32s_i64:
   1884         tcg_out_modrm_offset(s, OPC_MOVSLQ, args[0], args[1], args[2]);
   1885         break;
   1886     case INDEX_op_ld_i64:
   1887         tcg_out_ld(s, TCG_TYPE_I64, args[0], args[1], args[2]);
   1888         break;
   1889     case INDEX_op_st_i64:
   1890         if (const_args[0]) {
   1891             tcg_out_modrm_offset(s, OPC_MOVL_EvIz | P_REXW,
   1892                                  0, args[1], args[2]);
   1893             tcg_out32(s, args[0]);
   1894         } else {
   1895             tcg_out_st(s, TCG_TYPE_I64, args[0], args[1], args[2]);
   1896         }
   1897         break;
   1898 
   1899     case INDEX_op_brcond_i64:
   1900         tcg_out_brcond64(s, args[2], args[0], args[1], const_args[1],
   1901                          args[3], 0);
   1902         break;
   1903     case INDEX_op_setcond_i64:
   1904         tcg_out_setcond64(s, args[3], args[0], args[1],
   1905                           args[2], const_args[2]);
   1906         break;
   1907     case INDEX_op_movcond_i64:
   1908         tcg_out_movcond64(s, args[5], args[0], args[1],
   1909                           args[2], const_args[2], args[3]);
   1910         break;
   1911 
   1912     case INDEX_op_bswap64_i64:
   1913         tcg_out_bswap64(s, args[0]);
   1914         break;
   1915     case INDEX_op_ext32u_i64:
   1916         tcg_out_ext32u(s, args[0], args[1]);
   1917         break;
   1918     case INDEX_op_ext32s_i64:
   1919         tcg_out_ext32s(s, args[0], args[1]);
   1920         break;
   1921 #endif
   1922 
   1923     OP_32_64(deposit):
   1924         if (args[3] == 0 && args[4] == 8) {
   1925             /* load bits 0..7 */
   1926             tcg_out_modrm(s, OPC_MOVB_EvGv | P_REXB_R | P_REXB_RM,
   1927                           args[2], args[0]);
   1928         } else if (args[3] == 8 && args[4] == 8) {
   1929             /* load bits 8..15 */
   1930             tcg_out_modrm(s, OPC_MOVB_EvGv, args[2], args[0] + 4);
   1931         } else if (args[3] == 0 && args[4] == 16) {
   1932             /* load bits 0..15 */
   1933             tcg_out_modrm(s, OPC_MOVL_EvGv | P_DATA16, args[2], args[0]);
   1934         } else {
   1935             tcg_abort();
   1936         }
   1937         break;
   1938 
   1939     default:
   1940         tcg_abort();
   1941     }
   1942 
   1943 #undef OP_32_64
   1944 }
   1945 
   1946 static const TCGTargetOpDef x86_op_defs[] = {
   1947     { INDEX_op_exit_tb, { } },
   1948     { INDEX_op_goto_tb, { } },
   1949     { INDEX_op_call, { "ri" } },
   1950     { INDEX_op_br, { } },
   1951     { INDEX_op_mov_i32, { "r", "r" } },
   1952     { INDEX_op_movi_i32, { "r" } },
   1953     { INDEX_op_ld8u_i32, { "r", "r" } },
   1954     { INDEX_op_ld8s_i32, { "r", "r" } },
   1955     { INDEX_op_ld16u_i32, { "r", "r" } },
   1956     { INDEX_op_ld16s_i32, { "r", "r" } },
   1957     { INDEX_op_ld_i32, { "r", "r" } },
   1958     { INDEX_op_st8_i32, { "qi", "r" } },
   1959     { INDEX_op_st16_i32, { "ri", "r" } },
   1960     { INDEX_op_st_i32, { "ri", "r" } },
   1961 
   1962     { INDEX_op_add_i32, { "r", "r", "ri" } },
   1963     { INDEX_op_sub_i32, { "r", "0", "ri" } },
   1964     { INDEX_op_mul_i32, { "r", "0", "ri" } },
   1965     { INDEX_op_div2_i32, { "a", "d", "0", "1", "r" } },
   1966     { INDEX_op_divu2_i32, { "a", "d", "0", "1", "r" } },
   1967     { INDEX_op_and_i32, { "r", "0", "ri" } },
   1968     { INDEX_op_or_i32, { "r", "0", "ri" } },
   1969     { INDEX_op_xor_i32, { "r", "0", "ri" } },
   1970 
   1971     { INDEX_op_shl_i32, { "r", "0", "ci" } },
   1972     { INDEX_op_shr_i32, { "r", "0", "ci" } },
   1973     { INDEX_op_sar_i32, { "r", "0", "ci" } },
   1974     { INDEX_op_rotl_i32, { "r", "0", "ci" } },
   1975     { INDEX_op_rotr_i32, { "r", "0", "ci" } },
   1976 
   1977     { INDEX_op_brcond_i32, { "r", "ri" } },
   1978 
   1979     { INDEX_op_bswap16_i32, { "r", "0" } },
   1980     { INDEX_op_bswap32_i32, { "r", "0" } },
   1981 
   1982     { INDEX_op_neg_i32, { "r", "0" } },
   1983 
   1984     { INDEX_op_not_i32, { "r", "0" } },
   1985 
   1986     { INDEX_op_ext8s_i32, { "r", "q" } },
   1987     { INDEX_op_ext16s_i32, { "r", "r" } },
   1988     { INDEX_op_ext8u_i32, { "r", "q" } },
   1989     { INDEX_op_ext16u_i32, { "r", "r" } },
   1990 
   1991     { INDEX_op_setcond_i32, { "q", "r", "ri" } },
   1992 
   1993     { INDEX_op_deposit_i32, { "Q", "0", "Q" } },
   1994 #if TCG_TARGET_HAS_movcond_i32
   1995     { INDEX_op_movcond_i32, { "r", "r", "ri", "r", "0" } },
   1996 #endif
   1997 
   1998     { INDEX_op_mulu2_i32, { "a", "d", "a", "r" } },
   1999     { INDEX_op_muls2_i32, { "a", "d", "a", "r" } },
   2000     { INDEX_op_add2_i32, { "r", "r", "0", "1", "ri", "ri" } },
   2001     { INDEX_op_sub2_i32, { "r", "r", "0", "1", "ri", "ri" } },
   2002 
   2003 #if TCG_TARGET_REG_BITS == 32
   2004     { INDEX_op_brcond2_i32, { "r", "r", "ri", "ri" } },
   2005     { INDEX_op_setcond2_i32, { "r", "r", "r", "ri", "ri" } },
   2006 #else
   2007     { INDEX_op_mov_i64, { "r", "r" } },
   2008     { INDEX_op_movi_i64, { "r" } },
   2009     { INDEX_op_ld8u_i64, { "r", "r" } },
   2010     { INDEX_op_ld8s_i64, { "r", "r" } },
   2011     { INDEX_op_ld16u_i64, { "r", "r" } },
   2012     { INDEX_op_ld16s_i64, { "r", "r" } },
   2013     { INDEX_op_ld32u_i64, { "r", "r" } },
   2014     { INDEX_op_ld32s_i64, { "r", "r" } },
   2015     { INDEX_op_ld_i64, { "r", "r" } },
   2016     { INDEX_op_st8_i64, { "ri", "r" } },
   2017     { INDEX_op_st16_i64, { "ri", "r" } },
   2018     { INDEX_op_st32_i64, { "ri", "r" } },
   2019     { INDEX_op_st_i64, { "re", "r" } },
   2020 
   2021     { INDEX_op_add_i64, { "r", "r", "re" } },
   2022     { INDEX_op_mul_i64, { "r", "0", "re" } },
   2023     { INDEX_op_div2_i64, { "a", "d", "0", "1", "r" } },
   2024     { INDEX_op_divu2_i64, { "a", "d", "0", "1", "r" } },
   2025     { INDEX_op_sub_i64, { "r", "0", "re" } },
   2026     { INDEX_op_and_i64, { "r", "0", "reZ" } },
   2027     { INDEX_op_or_i64, { "r", "0", "re" } },
   2028     { INDEX_op_xor_i64, { "r", "0", "re" } },
   2029 
   2030     { INDEX_op_shl_i64, { "r", "0", "ci" } },
   2031     { INDEX_op_shr_i64, { "r", "0", "ci" } },
   2032     { INDEX_op_sar_i64, { "r", "0", "ci" } },
   2033     { INDEX_op_rotl_i64, { "r", "0", "ci" } },
   2034     { INDEX_op_rotr_i64, { "r", "0", "ci" } },
   2035 
   2036     { INDEX_op_brcond_i64, { "r", "re" } },
   2037     { INDEX_op_setcond_i64, { "r", "r", "re" } },
   2038 
   2039     { INDEX_op_bswap16_i64, { "r", "0" } },
   2040     { INDEX_op_bswap32_i64, { "r", "0" } },
   2041     { INDEX_op_bswap64_i64, { "r", "0" } },
   2042     { INDEX_op_neg_i64, { "r", "0" } },
   2043     { INDEX_op_not_i64, { "r", "0" } },
   2044 
   2045     { INDEX_op_ext8s_i64, { "r", "r" } },
   2046     { INDEX_op_ext16s_i64, { "r", "r" } },
   2047     { INDEX_op_ext32s_i64, { "r", "r" } },
   2048     { INDEX_op_ext8u_i64, { "r", "r" } },
   2049     { INDEX_op_ext16u_i64, { "r", "r" } },
   2050     { INDEX_op_ext32u_i64, { "r", "r" } },
   2051 
   2052     { INDEX_op_deposit_i64, { "Q", "0", "Q" } },
   2053     { INDEX_op_movcond_i64, { "r", "r", "re", "r", "0" } },
   2054 
   2055     { INDEX_op_mulu2_i64, { "a", "d", "a", "r" } },
   2056     { INDEX_op_muls2_i64, { "a", "d", "a", "r" } },
   2057     { INDEX_op_add2_i64, { "r", "r", "0", "1", "re", "re" } },
   2058     { INDEX_op_sub2_i64, { "r", "r", "0", "1", "re", "re" } },
   2059 #endif
   2060 
   2061 #if TCG_TARGET_REG_BITS == 64
   2062     { INDEX_op_qemu_ld_i32, { "r", "L" } },
   2063     { INDEX_op_qemu_st_i32, { "L", "L" } },
   2064     { INDEX_op_qemu_ld_i64, { "r", "L" } },
   2065     { INDEX_op_qemu_st_i64, { "L", "L" } },
   2066 #elif TARGET_LONG_BITS <= TCG_TARGET_REG_BITS
   2067     { INDEX_op_qemu_ld_i32, { "r", "L" } },
   2068     { INDEX_op_qemu_st_i32, { "L", "L" } },
   2069     { INDEX_op_qemu_ld_i64, { "r", "r", "L" } },
   2070     { INDEX_op_qemu_st_i64, { "L", "L", "L" } },
   2071 #else
   2072     { INDEX_op_qemu_ld_i32, { "r", "L", "L" } },
   2073     { INDEX_op_qemu_st_i32, { "L", "L", "L" } },
   2074     { INDEX_op_qemu_ld_i64, { "r", "r", "L", "L" } },
   2075     { INDEX_op_qemu_st_i64, { "L", "L", "L", "L" } },
   2076 #endif
   2077     { -1 },
   2078 };
   2079 
   2080 static int tcg_target_callee_save_regs[] = {
   2081 #if TCG_TARGET_REG_BITS == 64
   2082     TCG_REG_RBP,
   2083     TCG_REG_RBX,
   2084 #if defined(_WIN64)
   2085     TCG_REG_RDI,
   2086     TCG_REG_RSI,
   2087 #endif
   2088     TCG_REG_R12,
   2089     TCG_REG_R13,
   2090     TCG_REG_R14, /* Currently used for the global env. */
   2091     TCG_REG_R15,
   2092 #else
   2093     TCG_REG_EBP, /* Currently used for the global env. */
   2094     TCG_REG_EBX,
   2095     TCG_REG_ESI,
   2096     TCG_REG_EDI,
   2097 #endif
   2098 };
   2099 
   2100 /* Compute frame size via macros, to share between tcg_target_qemu_prologue
   2101    and tcg_register_jit.  */
   2102 
   2103 #define PUSH_SIZE \
   2104     ((1 + ARRAY_SIZE(tcg_target_callee_save_regs)) \
   2105      * (TCG_TARGET_REG_BITS / 8))
   2106 
   2107 #define FRAME_SIZE \
   2108     ((PUSH_SIZE \
   2109       + TCG_STATIC_CALL_ARGS_SIZE \
   2110       + CPU_TEMP_BUF_NLONGS * sizeof(long) \
   2111       + TCG_TARGET_STACK_ALIGN - 1) \
   2112      & ~(TCG_TARGET_STACK_ALIGN - 1))
   2113 
   2114 /* Generate global QEMU prologue and epilogue code */
   2115 static void tcg_target_qemu_prologue(TCGContext *s)
   2116 {
   2117     int i, stack_addend;
   2118 
   2119     /* TB prologue */
   2120 
   2121     /* Reserve some stack space, also for TCG temps.  */
   2122     stack_addend = FRAME_SIZE - PUSH_SIZE;
   2123     tcg_set_frame(s, TCG_REG_CALL_STACK, TCG_STATIC_CALL_ARGS_SIZE,
   2124                   CPU_TEMP_BUF_NLONGS * sizeof(long));
   2125 
   2126     /* Save all callee saved registers.  */
   2127     for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); i++) {
   2128         tcg_out_push(s, tcg_target_callee_save_regs[i]);
   2129     }
   2130 
   2131 #if TCG_TARGET_REG_BITS == 32
   2132     tcg_out_ld(s, TCG_TYPE_PTR, TCG_AREG0, TCG_REG_ESP,
   2133                (ARRAY_SIZE(tcg_target_callee_save_regs) + 1) * 4);
   2134     tcg_out_addi(s, TCG_REG_ESP, -stack_addend);
   2135     /* jmp *tb.  */
   2136     tcg_out_modrm_offset(s, OPC_GRP5, EXT5_JMPN_Ev, TCG_REG_ESP,
   2137 		         (ARRAY_SIZE(tcg_target_callee_save_regs) + 2) * 4
   2138 			 + stack_addend);
   2139 #else
   2140     tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
   2141     tcg_out_addi(s, TCG_REG_ESP, -stack_addend);
   2142     /* jmp *tb.  */
   2143     tcg_out_modrm(s, OPC_GRP5, EXT5_JMPN_Ev, tcg_target_call_iarg_regs[1]);
   2144 #endif
   2145 
   2146     /* TB epilogue */
   2147     tb_ret_addr = s->code_ptr;
   2148 
   2149     tcg_out_addi(s, TCG_REG_CALL_STACK, stack_addend);
   2150 
   2151     for (i = ARRAY_SIZE(tcg_target_callee_save_regs) - 1; i >= 0; i--) {
   2152         tcg_out_pop(s, tcg_target_callee_save_regs[i]);
   2153     }
   2154     tcg_out_opc(s, OPC_RET, 0, 0, 0);
   2155 
   2156 #if !defined(CONFIG_SOFTMMU)
   2157     /* Try to set up a segment register to point to GUEST_BASE.  */
   2158     if (GUEST_BASE) {
   2159         setup_guest_base_seg();
   2160     }
   2161 #endif
   2162 }
   2163 
   2164 static void tcg_target_init(TCGContext *s)
   2165 {
   2166     /* For 32-bit, 99% certainty that we're running on hardware that supports
   2167        cmov, but we still need to check.  In case cmov is not available, we'll
   2168        use a small forward branch.  */
   2169 #ifndef have_cmov
   2170     {
   2171         unsigned a, b, c, d;
   2172         have_cmov = (__get_cpuid(1, &a, &b, &c, &d) && (d & bit_CMOV));
   2173     }
   2174 #endif
   2175 
   2176     if (TCG_TARGET_REG_BITS == 64) {
   2177         tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xffff);
   2178         tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I64], 0, 0xffff);
   2179     } else {
   2180         tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xff);
   2181     }
   2182 
   2183     tcg_regset_clear(tcg_target_call_clobber_regs);
   2184     tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_EAX);
   2185     tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_EDX);
   2186     tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_ECX);
   2187     if (TCG_TARGET_REG_BITS == 64) {
   2188 #if !defined(_WIN64)
   2189         tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_RDI);
   2190         tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_RSI);
   2191 #endif
   2192         tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R8);
   2193         tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R9);
   2194         tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R10);
   2195         tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R11);
   2196     }
   2197 
   2198     tcg_regset_clear(s->reserved_regs);
   2199     tcg_regset_set_reg(s->reserved_regs, TCG_REG_CALL_STACK);
   2200 
   2201     tcg_add_target_add_op_defs(x86_op_defs);
   2202 }
   2203 
   2204 typedef struct {
   2205     DebugFrameCIE cie;
   2206     DebugFrameFDEHeader fde;
   2207     uint8_t fde_def_cfa[4];
   2208     uint8_t fde_reg_ofs[14];
   2209 } DebugFrame;
   2210 
   2211 /* We're expecting a 2 byte uleb128 encoded value.  */
   2212 QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14));
   2213 
   2214 #if !defined(__ELF__)
   2215     /* Host machine without ELF. */
   2216 #elif TCG_TARGET_REG_BITS == 64
   2217 #define ELF_HOST_MACHINE EM_X86_64
   2218 static DebugFrame debug_frame = {
   2219     .cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
   2220     .cie.id = -1,
   2221     .cie.version = 1,
   2222     .cie.code_align = 1,
   2223     .cie.data_align = 0x78,             /* sleb128 -8 */
   2224     .cie.return_column = 16,
   2225 
   2226     /* Total FDE size does not include the "len" member.  */
   2227     .fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, fde.cie_offset),
   2228 
   2229     .fde_def_cfa = {
   2230         12, 7,                          /* DW_CFA_def_cfa %rsp, ... */
   2231         (FRAME_SIZE & 0x7f) | 0x80,     /* ... uleb128 FRAME_SIZE */
   2232         (FRAME_SIZE >> 7)
   2233     },
   2234     .fde_reg_ofs = {
   2235         0x90, 1,                        /* DW_CFA_offset, %rip, -8 */
   2236         /* The following ordering must match tcg_target_callee_save_regs.  */
   2237         0x86, 2,                        /* DW_CFA_offset, %rbp, -16 */
   2238         0x83, 3,                        /* DW_CFA_offset, %rbx, -24 */
   2239         0x8c, 4,                        /* DW_CFA_offset, %r12, -32 */
   2240         0x8d, 5,                        /* DW_CFA_offset, %r13, -40 */
   2241         0x8e, 6,                        /* DW_CFA_offset, %r14, -48 */
   2242         0x8f, 7,                        /* DW_CFA_offset, %r15, -56 */
   2243     }
   2244 };
   2245 #else
   2246 #define ELF_HOST_MACHINE EM_386
   2247 static DebugFrame debug_frame = {
   2248     .cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
   2249     .cie.id = -1,
   2250     .cie.version = 1,
   2251     .cie.code_align = 1,
   2252     .cie.data_align = 0x7c,             /* sleb128 -4 */
   2253     .cie.return_column = 8,
   2254 
   2255     /* Total FDE size does not include the "len" member.  */
   2256     .fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, fde.cie_offset),
   2257 
   2258     .fde_def_cfa = {
   2259         12, 4,                          /* DW_CFA_def_cfa %esp, ... */
   2260         (FRAME_SIZE & 0x7f) | 0x80,     /* ... uleb128 FRAME_SIZE */
   2261         (FRAME_SIZE >> 7)
   2262     },
   2263     .fde_reg_ofs = {
   2264         0x88, 1,                        /* DW_CFA_offset, %eip, -4 */
   2265         /* The following ordering must match tcg_target_callee_save_regs.  */
   2266         0x85, 2,                        /* DW_CFA_offset, %ebp, -8 */
   2267         0x83, 3,                        /* DW_CFA_offset, %ebx, -12 */
   2268         0x86, 4,                        /* DW_CFA_offset, %esi, -16 */
   2269         0x87, 5,                        /* DW_CFA_offset, %edi, -20 */
   2270     }
   2271 };
   2272 #endif
   2273 
   2274 #if defined(ELF_HOST_MACHINE)
   2275 void tcg_register_jit(void *buf, size_t buf_size)
   2276 {
   2277     debug_frame.fde.func_start = (uintptr_t)buf;
   2278     debug_frame.fde.func_len = buf_size;
   2279 
   2280     tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame));
   2281 }
   2282 #endif
   2283