1 /* 2 * Tiny Code Generator for QEMU 3 * 4 * Copyright (c) 2008 Fabrice Bellard 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a copy 7 * of this software and associated documentation files (the "Software"), to deal 8 * in the Software without restriction, including without limitation the rights 9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 * copies of the Software, and to permit persons to whom the Software is 11 * furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included in 14 * all copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 22 * THE SOFTWARE. 23 */ 24 25 #include "tcg-be-ldst.h" 26 27 #ifdef _WIN32 28 // For some reason, the Mingw32 headers define the 'small' macro which 29 // prevents this source from compiling. 30 #undef small 31 #endif 32 33 #ifndef NDEBUG 34 static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = { 35 #if TCG_TARGET_REG_BITS == 64 36 "%rax", "%rcx", "%rdx", "%rbx", "%rsp", "%rbp", "%rsi", "%rdi", 37 "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15", 38 #else 39 "%eax", "%ecx", "%edx", "%ebx", "%esp", "%ebp", "%esi", "%edi", 40 #endif 41 }; 42 #endif 43 44 static const int tcg_target_reg_alloc_order[] = { 45 #if TCG_TARGET_REG_BITS == 64 46 TCG_REG_RBP, 47 TCG_REG_RBX, 48 TCG_REG_R12, 49 TCG_REG_R13, 50 TCG_REG_R14, 51 TCG_REG_R15, 52 TCG_REG_R10, 53 TCG_REG_R11, 54 TCG_REG_R9, 55 TCG_REG_R8, 56 TCG_REG_RCX, 57 TCG_REG_RDX, 58 TCG_REG_RSI, 59 TCG_REG_RDI, 60 TCG_REG_RAX, 61 #else 62 TCG_REG_EBX, 63 TCG_REG_ESI, 64 TCG_REG_EDI, 65 TCG_REG_EBP, 66 TCG_REG_ECX, 67 TCG_REG_EDX, 68 TCG_REG_EAX, 69 #endif 70 }; 71 72 static const int tcg_target_call_iarg_regs[] = { 73 #if TCG_TARGET_REG_BITS == 64 74 #if defined(_WIN64) 75 TCG_REG_RCX, 76 TCG_REG_RDX, 77 #else 78 TCG_REG_RDI, 79 TCG_REG_RSI, 80 TCG_REG_RDX, 81 TCG_REG_RCX, 82 #endif 83 TCG_REG_R8, 84 TCG_REG_R9, 85 #else 86 /* 32 bit mode uses stack based calling convention (GCC default). */ 87 #endif 88 }; 89 90 static const int tcg_target_call_oarg_regs[] = { 91 TCG_REG_EAX, 92 #if TCG_TARGET_REG_BITS == 32 93 TCG_REG_EDX 94 #endif 95 }; 96 97 /* Registers used with L constraint, which are the first argument 98 registers on x86_64, and two random call clobbered registers on 99 i386. */ 100 #if TCG_TARGET_REG_BITS == 64 101 # define TCG_REG_L0 tcg_target_call_iarg_regs[0] 102 # define TCG_REG_L1 tcg_target_call_iarg_regs[1] 103 #else 104 # define TCG_REG_L0 TCG_REG_EAX 105 # define TCG_REG_L1 TCG_REG_EDX 106 #endif 107 108 /* For 32-bit, we are going to attempt to determine at runtime whether cmov 109 is available. However, the host compiler must supply <cpuid.h>, as we're 110 not going to go so far as our own inline assembly. */ 111 #if TCG_TARGET_REG_BITS == 64 112 # define have_cmov 1 113 #elif defined(CONFIG_CPUID_H) 114 #include <cpuid.h> 115 static bool have_cmov; 116 #else 117 # define have_cmov 0 118 #endif 119 120 static uint8_t *tb_ret_addr; 121 122 static void patch_reloc(uint8_t *code_ptr, int type, 123 intptr_t value, intptr_t addend) 124 { 125 value += addend; 126 switch(type) { 127 case R_386_PC32: 128 value -= (uintptr_t)code_ptr; 129 if (value != (int32_t)value) { 130 tcg_abort(); 131 } 132 *(uint32_t *)code_ptr = value; 133 break; 134 case R_386_PC8: 135 value -= (uintptr_t)code_ptr; 136 if (value != (int8_t)value) { 137 tcg_abort(); 138 } 139 *(uint8_t *)code_ptr = value; 140 break; 141 default: 142 tcg_abort(); 143 } 144 } 145 146 /* parse target specific constraints */ 147 static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str) 148 { 149 const char *ct_str; 150 151 ct_str = *pct_str; 152 switch(ct_str[0]) { 153 case 'a': 154 ct->ct |= TCG_CT_REG; 155 tcg_regset_set_reg(ct->u.regs, TCG_REG_EAX); 156 break; 157 case 'b': 158 ct->ct |= TCG_CT_REG; 159 tcg_regset_set_reg(ct->u.regs, TCG_REG_EBX); 160 break; 161 case 'c': 162 ct->ct |= TCG_CT_REG; 163 tcg_regset_set_reg(ct->u.regs, TCG_REG_ECX); 164 break; 165 case 'd': 166 ct->ct |= TCG_CT_REG; 167 tcg_regset_set_reg(ct->u.regs, TCG_REG_EDX); 168 break; 169 case 'S': 170 ct->ct |= TCG_CT_REG; 171 tcg_regset_set_reg(ct->u.regs, TCG_REG_ESI); 172 break; 173 case 'D': 174 ct->ct |= TCG_CT_REG; 175 tcg_regset_set_reg(ct->u.regs, TCG_REG_EDI); 176 break; 177 case 'q': 178 ct->ct |= TCG_CT_REG; 179 if (TCG_TARGET_REG_BITS == 64) { 180 tcg_regset_set32(ct->u.regs, 0, 0xffff); 181 } else { 182 tcg_regset_set32(ct->u.regs, 0, 0xf); 183 } 184 break; 185 case 'Q': 186 ct->ct |= TCG_CT_REG; 187 tcg_regset_set32(ct->u.regs, 0, 0xf); 188 break; 189 case 'r': 190 ct->ct |= TCG_CT_REG; 191 if (TCG_TARGET_REG_BITS == 64) { 192 tcg_regset_set32(ct->u.regs, 0, 0xffff); 193 } else { 194 tcg_regset_set32(ct->u.regs, 0, 0xff); 195 } 196 break; 197 198 /* qemu_ld/st address constraint */ 199 case 'L': 200 ct->ct |= TCG_CT_REG; 201 if (TCG_TARGET_REG_BITS == 64) { 202 tcg_regset_set32(ct->u.regs, 0, 0xffff); 203 } else { 204 tcg_regset_set32(ct->u.regs, 0, 0xff); 205 } 206 tcg_regset_reset_reg(ct->u.regs, TCG_REG_L0); 207 tcg_regset_reset_reg(ct->u.regs, TCG_REG_L1); 208 break; 209 210 case 'e': 211 ct->ct |= TCG_CT_CONST_S32; 212 break; 213 case 'Z': 214 ct->ct |= TCG_CT_CONST_U32; 215 break; 216 217 default: 218 return -1; 219 } 220 ct_str++; 221 *pct_str = ct_str; 222 return 0; 223 } 224 225 /* test if a constant matches the constraint */ 226 static inline int tcg_target_const_match(tcg_target_long val, 227 const TCGArgConstraint *arg_ct) 228 { 229 int ct = arg_ct->ct; 230 if (ct & TCG_CT_CONST) { 231 return 1; 232 } 233 if ((ct & TCG_CT_CONST_S32) && val == (int32_t)val) { 234 return 1; 235 } 236 if ((ct & TCG_CT_CONST_U32) && val == (uint32_t)val) { 237 return 1; 238 } 239 return 0; 240 } 241 242 #if TCG_TARGET_REG_BITS == 64 243 # define LOWREGMASK(x) ((x) & 7) 244 #else 245 # define LOWREGMASK(x) (x) 246 #endif 247 248 #define P_EXT 0x100 /* 0x0f opcode prefix */ 249 #define P_DATA16 0x200 /* 0x66 opcode prefix */ 250 #if TCG_TARGET_REG_BITS == 64 251 # define P_ADDR32 0x400 /* 0x67 opcode prefix */ 252 # define P_REXW 0x800 /* Set REX.W = 1 */ 253 # define P_REXB_R 0x1000 /* REG field as byte register */ 254 # define P_REXB_RM 0x2000 /* R/M field as byte register */ 255 # define P_GS 0x4000 /* gs segment override */ 256 #else 257 # define P_ADDR32 0 258 # define P_REXW 0 259 # define P_REXB_R 0 260 # define P_REXB_RM 0 261 # define P_GS 0 262 #endif 263 264 #define OPC_ARITH_EvIz (0x81) 265 #define OPC_ARITH_EvIb (0x83) 266 #define OPC_ARITH_GvEv (0x03) /* ... plus (ARITH_FOO << 3) */ 267 #define OPC_ADD_GvEv (OPC_ARITH_GvEv | (ARITH_ADD << 3)) 268 #define OPC_BSWAP (0xc8 | P_EXT) 269 #define OPC_CALL_Jz (0xe8) 270 #define OPC_CMOVCC (0x40 | P_EXT) /* ... plus condition code */ 271 #define OPC_CMP_GvEv (OPC_ARITH_GvEv | (ARITH_CMP << 3)) 272 #define OPC_DEC_r32 (0x48) 273 #define OPC_IMUL_GvEv (0xaf | P_EXT) 274 #define OPC_IMUL_GvEvIb (0x6b) 275 #define OPC_IMUL_GvEvIz (0x69) 276 #define OPC_INC_r32 (0x40) 277 #define OPC_JCC_long (0x80 | P_EXT) /* ... plus condition code */ 278 #define OPC_JCC_short (0x70) /* ... plus condition code */ 279 #define OPC_JMP_long (0xe9) 280 #define OPC_JMP_short (0xeb) 281 #define OPC_LEA (0x8d) 282 #define OPC_MOVB_EvGv (0x88) /* stores, more or less */ 283 #define OPC_MOVL_EvGv (0x89) /* stores, more or less */ 284 #define OPC_MOVL_GvEv (0x8b) /* loads, more or less */ 285 #define OPC_MOVB_EvIz (0xc6) 286 #define OPC_MOVL_EvIz (0xc7) 287 #define OPC_MOVL_Iv (0xb8) 288 #define OPC_MOVSBL (0xbe | P_EXT) 289 #define OPC_MOVSWL (0xbf | P_EXT) 290 #define OPC_MOVSLQ (0x63 | P_REXW) 291 #define OPC_MOVZBL (0xb6 | P_EXT) 292 #define OPC_MOVZWL (0xb7 | P_EXT) 293 #define OPC_POP_r32 (0x58) 294 #define OPC_PUSH_r32 (0x50) 295 #define OPC_PUSH_Iv (0x68) 296 #define OPC_PUSH_Ib (0x6a) 297 #define OPC_RET (0xc3) 298 #define OPC_SETCC (0x90 | P_EXT | P_REXB_RM) /* ... plus cc */ 299 #define OPC_SHIFT_1 (0xd1) 300 #define OPC_SHIFT_Ib (0xc1) 301 #define OPC_SHIFT_cl (0xd3) 302 #define OPC_TESTL (0x85) 303 #define OPC_XCHG_ax_r32 (0x90) 304 305 #define OPC_GRP3_Ev (0xf7) 306 #define OPC_GRP5 (0xff) 307 308 /* Group 1 opcode extensions for 0x80-0x83. 309 These are also used as modifiers for OPC_ARITH. */ 310 #define ARITH_ADD 0 311 #define ARITH_OR 1 312 #define ARITH_ADC 2 313 #define ARITH_SBB 3 314 #define ARITH_AND 4 315 #define ARITH_SUB 5 316 #define ARITH_XOR 6 317 #define ARITH_CMP 7 318 319 /* Group 2 opcode extensions for 0xc0, 0xc1, 0xd0-0xd3. */ 320 #define SHIFT_ROL 0 321 #define SHIFT_ROR 1 322 #define SHIFT_SHL 4 323 #define SHIFT_SHR 5 324 #define SHIFT_SAR 7 325 326 /* Group 3 opcode extensions for 0xf6, 0xf7. To be used with OPC_GRP3. */ 327 #define EXT3_NOT 2 328 #define EXT3_NEG 3 329 #define EXT3_MUL 4 330 #define EXT3_IMUL 5 331 #define EXT3_DIV 6 332 #define EXT3_IDIV 7 333 334 /* Group 5 opcode extensions for 0xff. To be used with OPC_GRP5. */ 335 #define EXT5_INC_Ev 0 336 #define EXT5_DEC_Ev 1 337 #define EXT5_CALLN_Ev 2 338 #define EXT5_JMPN_Ev 4 339 340 /* Condition codes to be added to OPC_JCC_{long,short}. */ 341 #define JCC_JMP (-1) 342 #define JCC_JO 0x0 343 #define JCC_JNO 0x1 344 #define JCC_JB 0x2 345 #define JCC_JAE 0x3 346 #define JCC_JE 0x4 347 #define JCC_JNE 0x5 348 #define JCC_JBE 0x6 349 #define JCC_JA 0x7 350 #define JCC_JS 0x8 351 #define JCC_JNS 0x9 352 #define JCC_JP 0xa 353 #define JCC_JNP 0xb 354 #define JCC_JL 0xc 355 #define JCC_JGE 0xd 356 #define JCC_JLE 0xe 357 #define JCC_JG 0xf 358 359 static const uint8_t tcg_cond_to_jcc[] = { 360 [TCG_COND_EQ] = JCC_JE, 361 [TCG_COND_NE] = JCC_JNE, 362 [TCG_COND_LT] = JCC_JL, 363 [TCG_COND_GE] = JCC_JGE, 364 [TCG_COND_LE] = JCC_JLE, 365 [TCG_COND_GT] = JCC_JG, 366 [TCG_COND_LTU] = JCC_JB, 367 [TCG_COND_GEU] = JCC_JAE, 368 [TCG_COND_LEU] = JCC_JBE, 369 [TCG_COND_GTU] = JCC_JA, 370 }; 371 372 #if TCG_TARGET_REG_BITS == 64 373 static void tcg_out_opc(TCGContext *s, int opc, int r, int rm, int x) 374 { 375 int rex; 376 377 if (opc & P_GS) { 378 tcg_out8(s, 0x65); 379 } 380 if (opc & P_DATA16) { 381 /* We should never be asking for both 16 and 64-bit operation. */ 382 assert((opc & P_REXW) == 0); 383 tcg_out8(s, 0x66); 384 } 385 if (opc & P_ADDR32) { 386 tcg_out8(s, 0x67); 387 } 388 389 rex = 0; 390 rex |= (opc & P_REXW) >> 8; /* REX.W */ 391 rex |= (r & 8) >> 1; /* REX.R */ 392 rex |= (x & 8) >> 2; /* REX.X */ 393 rex |= (rm & 8) >> 3; /* REX.B */ 394 395 /* P_REXB_{R,RM} indicates that the given register is the low byte. 396 For %[abcd]l we need no REX prefix, but for %{si,di,bp,sp}l we do, 397 as otherwise the encoding indicates %[abcd]h. Note that the values 398 that are ORed in merely indicate that the REX byte must be present; 399 those bits get discarded in output. */ 400 rex |= opc & (r >= 4 ? P_REXB_R : 0); 401 rex |= opc & (rm >= 4 ? P_REXB_RM : 0); 402 403 if (rex) { 404 tcg_out8(s, (uint8_t)(rex | 0x40)); 405 } 406 407 if (opc & P_EXT) { 408 tcg_out8(s, 0x0f); 409 } 410 tcg_out8(s, opc); 411 } 412 #else 413 static void tcg_out_opc(TCGContext *s, int opc) 414 { 415 if (opc & P_DATA16) { 416 tcg_out8(s, 0x66); 417 } 418 if (opc & P_EXT) { 419 tcg_out8(s, 0x0f); 420 } 421 tcg_out8(s, opc); 422 } 423 /* Discard the register arguments to tcg_out_opc early, so as not to penalize 424 the 32-bit compilation paths. This method works with all versions of gcc, 425 whereas relying on optimization may not be able to exclude them. */ 426 #define tcg_out_opc(s, opc, r, rm, x) (tcg_out_opc)(s, opc) 427 #endif 428 429 static void tcg_out_modrm(TCGContext *s, int opc, int r, int rm) 430 { 431 tcg_out_opc(s, opc, r, rm, 0); 432 tcg_out8(s, 0xc0 | (LOWREGMASK(r) << 3) | LOWREGMASK(rm)); 433 } 434 435 /* Output an opcode with a full "rm + (index<<shift) + offset" address mode. 436 We handle either RM and INDEX missing with a negative value. In 64-bit 437 mode for absolute addresses, ~RM is the size of the immediate operand 438 that will follow the instruction. */ 439 440 static void tcg_out_modrm_sib_offset(TCGContext *s, int opc, int r, int rm, 441 int index, int shift, intptr_t offset) 442 { 443 int mod, len; 444 445 if (index < 0 && rm < 0) { 446 if (TCG_TARGET_REG_BITS == 64) { 447 /* Try for a rip-relative addressing mode. This has replaced 448 the 32-bit-mode absolute addressing encoding. */ 449 intptr_t pc = (intptr_t)s->code_ptr + 5 + ~rm; 450 intptr_t disp = offset - pc; 451 if (disp == (int32_t)disp) { 452 tcg_out_opc(s, opc, r, 0, 0); 453 tcg_out8(s, (LOWREGMASK(r) << 3) | 5); 454 tcg_out32(s, disp); 455 return; 456 } 457 458 /* Try for an absolute address encoding. This requires the 459 use of the MODRM+SIB encoding and is therefore larger than 460 rip-relative addressing. */ 461 if (offset == (int32_t)offset) { 462 tcg_out_opc(s, opc, r, 0, 0); 463 tcg_out8(s, (LOWREGMASK(r) << 3) | 4); 464 tcg_out8(s, (4 << 3) | 5); 465 tcg_out32(s, offset); 466 return; 467 } 468 469 /* ??? The memory isn't directly addressable. */ 470 tcg_abort(); 471 } else { 472 /* Absolute address. */ 473 tcg_out_opc(s, opc, r, 0, 0); 474 tcg_out8(s, (r << 3) | 5); 475 tcg_out32(s, offset); 476 return; 477 } 478 } 479 480 /* Find the length of the immediate addend. Note that the encoding 481 that would be used for (%ebp) indicates absolute addressing. */ 482 if (rm < 0) { 483 mod = 0, len = 4, rm = 5; 484 } else if (offset == 0 && LOWREGMASK(rm) != TCG_REG_EBP) { 485 mod = 0, len = 0; 486 } else if (offset == (int8_t)offset) { 487 mod = 0x40, len = 1; 488 } else { 489 mod = 0x80, len = 4; 490 } 491 492 /* Use a single byte MODRM format if possible. Note that the encoding 493 that would be used for %esp is the escape to the two byte form. */ 494 if (index < 0 && LOWREGMASK(rm) != TCG_REG_ESP) { 495 /* Single byte MODRM format. */ 496 tcg_out_opc(s, opc, r, rm, 0); 497 tcg_out8(s, mod | (LOWREGMASK(r) << 3) | LOWREGMASK(rm)); 498 } else { 499 /* Two byte MODRM+SIB format. */ 500 501 /* Note that the encoding that would place %esp into the index 502 field indicates no index register. In 64-bit mode, the REX.X 503 bit counts, so %r12 can be used as the index. */ 504 if (index < 0) { 505 index = 4; 506 } else { 507 assert(index != TCG_REG_ESP); 508 } 509 510 tcg_out_opc(s, opc, r, rm, index); 511 tcg_out8(s, mod | (LOWREGMASK(r) << 3) | 4); 512 tcg_out8(s, (shift << 6) | (LOWREGMASK(index) << 3) | LOWREGMASK(rm)); 513 } 514 515 if (len == 1) { 516 tcg_out8(s, offset); 517 } else if (len == 4) { 518 tcg_out32(s, offset); 519 } 520 } 521 522 /* A simplification of the above with no index or shift. */ 523 static inline void tcg_out_modrm_offset(TCGContext *s, int opc, int r, 524 int rm, intptr_t offset) 525 { 526 tcg_out_modrm_sib_offset(s, opc, r, rm, -1, 0, offset); 527 } 528 529 /* Generate dest op= src. Uses the same ARITH_* codes as tgen_arithi. */ 530 static inline void tgen_arithr(TCGContext *s, int subop, int dest, int src) 531 { 532 /* Propagate an opcode prefix, such as P_REXW. */ 533 int ext = subop & ~0x7; 534 subop &= 0x7; 535 536 tcg_out_modrm(s, OPC_ARITH_GvEv + (subop << 3) + ext, dest, src); 537 } 538 539 static inline void tcg_out_mov(TCGContext *s, TCGType type, 540 TCGReg ret, TCGReg arg) 541 { 542 if (arg != ret) { 543 int opc = OPC_MOVL_GvEv + (type == TCG_TYPE_I64 ? P_REXW : 0); 544 tcg_out_modrm(s, opc, ret, arg); 545 } 546 } 547 548 static void tcg_out_movi(TCGContext *s, TCGType type, 549 TCGReg ret, tcg_target_long arg) 550 { 551 tcg_target_long diff; 552 553 if (arg == 0) { 554 tgen_arithr(s, ARITH_XOR, ret, ret); 555 return; 556 } 557 if (arg == (uint32_t)arg || type == TCG_TYPE_I32) { 558 tcg_out_opc(s, OPC_MOVL_Iv + LOWREGMASK(ret), 0, ret, 0); 559 tcg_out32(s, arg); 560 return; 561 } 562 if (arg == (int32_t)arg) { 563 tcg_out_modrm(s, OPC_MOVL_EvIz + P_REXW, 0, ret); 564 tcg_out32(s, arg); 565 return; 566 } 567 568 /* Try a 7 byte pc-relative lea before the 10 byte movq. */ 569 diff = arg - ((uintptr_t)s->code_ptr + 7); 570 if (diff == (int32_t)diff) { 571 tcg_out_opc(s, OPC_LEA | P_REXW, ret, 0, 0); 572 tcg_out8(s, (LOWREGMASK(ret) << 3) | 5); 573 tcg_out32(s, diff); 574 return; 575 } 576 577 tcg_out_opc(s, OPC_MOVL_Iv + P_REXW + LOWREGMASK(ret), 0, ret, 0); 578 tcg_out64(s, arg); 579 } 580 581 static inline void tcg_out_pushi(TCGContext *s, tcg_target_long val) 582 { 583 if (val == (int8_t)val) { 584 tcg_out_opc(s, OPC_PUSH_Ib, 0, 0, 0); 585 tcg_out8(s, val); 586 } else if (val == (int32_t)val) { 587 tcg_out_opc(s, OPC_PUSH_Iv, 0, 0, 0); 588 tcg_out32(s, val); 589 } else { 590 tcg_abort(); 591 } 592 } 593 594 static inline void tcg_out_push(TCGContext *s, int reg) 595 { 596 tcg_out_opc(s, OPC_PUSH_r32 + LOWREGMASK(reg), 0, reg, 0); 597 } 598 599 static inline void tcg_out_pop(TCGContext *s, int reg) 600 { 601 tcg_out_opc(s, OPC_POP_r32 + LOWREGMASK(reg), 0, reg, 0); 602 } 603 604 static inline void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, 605 TCGReg arg1, intptr_t arg2) 606 { 607 int opc = OPC_MOVL_GvEv + (type == TCG_TYPE_I64 ? P_REXW : 0); 608 tcg_out_modrm_offset(s, opc, ret, arg1, arg2); 609 } 610 611 static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, 612 TCGReg arg1, intptr_t arg2) 613 { 614 int opc = OPC_MOVL_EvGv + (type == TCG_TYPE_I64 ? P_REXW : 0); 615 tcg_out_modrm_offset(s, opc, arg, arg1, arg2); 616 } 617 618 static inline void tcg_out_sti(TCGContext *s, TCGType type, TCGReg base, 619 tcg_target_long ofs, tcg_target_long val) 620 { 621 int opc = OPC_MOVL_EvIz + (type == TCG_TYPE_I64 ? P_REXW : 0); 622 tcg_out_modrm_offset(s, opc, 0, base, ofs); 623 tcg_out32(s, val); 624 } 625 626 static void tcg_out_shifti(TCGContext *s, int subopc, int reg, int count) 627 { 628 /* Propagate an opcode prefix, such as P_DATA16. */ 629 int ext = subopc & ~0x7; 630 subopc &= 0x7; 631 632 if (count == 1) { 633 tcg_out_modrm(s, OPC_SHIFT_1 + ext, subopc, reg); 634 } else { 635 tcg_out_modrm(s, OPC_SHIFT_Ib + ext, subopc, reg); 636 tcg_out8(s, count); 637 } 638 } 639 640 static inline void tcg_out_bswap32(TCGContext *s, int reg) 641 { 642 tcg_out_opc(s, OPC_BSWAP + LOWREGMASK(reg), 0, reg, 0); 643 } 644 645 static inline void tcg_out_rolw_8(TCGContext *s, int reg) 646 { 647 tcg_out_shifti(s, SHIFT_ROL + P_DATA16, reg, 8); 648 } 649 650 static inline void tcg_out_ext8u(TCGContext *s, int dest, int src) 651 { 652 /* movzbl */ 653 assert(src < 4 || TCG_TARGET_REG_BITS == 64); 654 tcg_out_modrm(s, OPC_MOVZBL + P_REXB_RM, dest, src); 655 } 656 657 static void tcg_out_ext8s(TCGContext *s, int dest, int src, int rexw) 658 { 659 /* movsbl */ 660 assert(src < 4 || TCG_TARGET_REG_BITS == 64); 661 tcg_out_modrm(s, OPC_MOVSBL + P_REXB_RM + rexw, dest, src); 662 } 663 664 static inline void tcg_out_ext16u(TCGContext *s, int dest, int src) 665 { 666 /* movzwl */ 667 tcg_out_modrm(s, OPC_MOVZWL, dest, src); 668 } 669 670 static inline void tcg_out_ext16s(TCGContext *s, int dest, int src, int rexw) 671 { 672 /* movsw[lq] */ 673 tcg_out_modrm(s, OPC_MOVSWL + rexw, dest, src); 674 } 675 676 static inline void tcg_out_ext32u(TCGContext *s, int dest, int src) 677 { 678 /* 32-bit mov zero extends. */ 679 tcg_out_modrm(s, OPC_MOVL_GvEv, dest, src); 680 } 681 682 static inline void tcg_out_ext32s(TCGContext *s, int dest, int src) 683 { 684 tcg_out_modrm(s, OPC_MOVSLQ, dest, src); 685 } 686 687 static inline void tcg_out_bswap64(TCGContext *s, int reg) 688 { 689 tcg_out_opc(s, OPC_BSWAP + P_REXW + LOWREGMASK(reg), 0, reg, 0); 690 } 691 692 static void tgen_arithi(TCGContext *s, int c, int r0, 693 tcg_target_long val, int cf) 694 { 695 int rexw = 0; 696 697 if (TCG_TARGET_REG_BITS == 64) { 698 rexw = c & -8; 699 c &= 7; 700 } 701 702 /* ??? While INC is 2 bytes shorter than ADDL $1, they also induce 703 partial flags update stalls on Pentium4 and are not recommended 704 by current Intel optimization manuals. */ 705 if (!cf && (c == ARITH_ADD || c == ARITH_SUB) && (val == 1 || val == -1)) { 706 int is_inc = (c == ARITH_ADD) ^ (val < 0); 707 if (TCG_TARGET_REG_BITS == 64) { 708 /* The single-byte increment encodings are re-tasked as the 709 REX prefixes. Use the MODRM encoding. */ 710 tcg_out_modrm(s, OPC_GRP5 + rexw, 711 (is_inc ? EXT5_INC_Ev : EXT5_DEC_Ev), r0); 712 } else { 713 tcg_out8(s, (is_inc ? OPC_INC_r32 : OPC_DEC_r32) + r0); 714 } 715 return; 716 } 717 718 if (c == ARITH_AND) { 719 if (TCG_TARGET_REG_BITS == 64) { 720 if (val == 0xffffffffu) { 721 tcg_out_ext32u(s, r0, r0); 722 return; 723 } 724 if (val == (uint32_t)val) { 725 /* AND with no high bits set can use a 32-bit operation. */ 726 rexw = 0; 727 } 728 } 729 if (val == 0xffu && (r0 < 4 || TCG_TARGET_REG_BITS == 64)) { 730 tcg_out_ext8u(s, r0, r0); 731 return; 732 } 733 if (val == 0xffffu) { 734 tcg_out_ext16u(s, r0, r0); 735 return; 736 } 737 } 738 739 if (val == (int8_t)val) { 740 tcg_out_modrm(s, OPC_ARITH_EvIb + rexw, c, r0); 741 tcg_out8(s, val); 742 return; 743 } 744 if (rexw == 0 || val == (int32_t)val) { 745 tcg_out_modrm(s, OPC_ARITH_EvIz + rexw, c, r0); 746 tcg_out32(s, val); 747 return; 748 } 749 750 tcg_abort(); 751 } 752 753 static void tcg_out_addi(TCGContext *s, int reg, tcg_target_long val) 754 { 755 if (val != 0) { 756 tgen_arithi(s, ARITH_ADD + P_REXW, reg, val, 0); 757 } 758 } 759 760 /* Use SMALL != 0 to force a short forward branch. */ 761 static void tcg_out_jxx(TCGContext *s, int opc, int label_index, int small) 762 { 763 int32_t val, val1; 764 TCGLabel *l = &s->labels[label_index]; 765 766 if (l->has_value) { 767 val = l->u.value - (intptr_t)s->code_ptr; 768 val1 = val - 2; 769 if ((int8_t)val1 == val1) { 770 if (opc == -1) { 771 tcg_out8(s, OPC_JMP_short); 772 } else { 773 tcg_out8(s, OPC_JCC_short + opc); 774 } 775 tcg_out8(s, val1); 776 } else { 777 if (small) { 778 tcg_abort(); 779 } 780 if (opc == -1) { 781 tcg_out8(s, OPC_JMP_long); 782 tcg_out32(s, val - 5); 783 } else { 784 tcg_out_opc(s, OPC_JCC_long + opc, 0, 0, 0); 785 tcg_out32(s, val - 6); 786 } 787 } 788 } else if (small) { 789 if (opc == -1) { 790 tcg_out8(s, OPC_JMP_short); 791 } else { 792 tcg_out8(s, OPC_JCC_short + opc); 793 } 794 tcg_out_reloc(s, s->code_ptr, R_386_PC8, label_index, -1); 795 s->code_ptr += 1; 796 } else { 797 if (opc == -1) { 798 tcg_out8(s, OPC_JMP_long); 799 } else { 800 tcg_out_opc(s, OPC_JCC_long + opc, 0, 0, 0); 801 } 802 tcg_out_reloc(s, s->code_ptr, R_386_PC32, label_index, -4); 803 s->code_ptr += 4; 804 } 805 } 806 807 static void tcg_out_cmp(TCGContext *s, TCGArg arg1, TCGArg arg2, 808 int const_arg2, int rexw) 809 { 810 if (const_arg2) { 811 if (arg2 == 0) { 812 /* test r, r */ 813 tcg_out_modrm(s, OPC_TESTL + rexw, arg1, arg1); 814 } else { 815 tgen_arithi(s, ARITH_CMP + rexw, arg1, arg2, 0); 816 } 817 } else { 818 tgen_arithr(s, ARITH_CMP + rexw, arg1, arg2); 819 } 820 } 821 822 static void tcg_out_brcond32(TCGContext *s, TCGCond cond, 823 TCGArg arg1, TCGArg arg2, int const_arg2, 824 int label_index, int small) 825 { 826 tcg_out_cmp(s, arg1, arg2, const_arg2, 0); 827 tcg_out_jxx(s, tcg_cond_to_jcc[cond], label_index, small); 828 } 829 830 #if TCG_TARGET_REG_BITS == 64 831 static void tcg_out_brcond64(TCGContext *s, TCGCond cond, 832 TCGArg arg1, TCGArg arg2, int const_arg2, 833 int label_index, int small) 834 { 835 tcg_out_cmp(s, arg1, arg2, const_arg2, P_REXW); 836 tcg_out_jxx(s, tcg_cond_to_jcc[cond], label_index, small); 837 } 838 #else 839 /* XXX: we implement it at the target level to avoid having to 840 handle cross basic blocks temporaries */ 841 static void tcg_out_brcond2(TCGContext *s, const TCGArg *args, 842 const int *const_args, int small) 843 { 844 int label_next; 845 label_next = gen_new_label(); 846 switch(args[4]) { 847 case TCG_COND_EQ: 848 tcg_out_brcond32(s, TCG_COND_NE, args[0], args[2], const_args[2], 849 label_next, 1); 850 tcg_out_brcond32(s, TCG_COND_EQ, args[1], args[3], const_args[3], 851 args[5], small); 852 break; 853 case TCG_COND_NE: 854 tcg_out_brcond32(s, TCG_COND_NE, args[0], args[2], const_args[2], 855 args[5], small); 856 tcg_out_brcond32(s, TCG_COND_NE, args[1], args[3], const_args[3], 857 args[5], small); 858 break; 859 case TCG_COND_LT: 860 tcg_out_brcond32(s, TCG_COND_LT, args[1], args[3], const_args[3], 861 args[5], small); 862 tcg_out_jxx(s, JCC_JNE, label_next, 1); 863 tcg_out_brcond32(s, TCG_COND_LTU, args[0], args[2], const_args[2], 864 args[5], small); 865 break; 866 case TCG_COND_LE: 867 tcg_out_brcond32(s, TCG_COND_LT, args[1], args[3], const_args[3], 868 args[5], small); 869 tcg_out_jxx(s, JCC_JNE, label_next, 1); 870 tcg_out_brcond32(s, TCG_COND_LEU, args[0], args[2], const_args[2], 871 args[5], small); 872 break; 873 case TCG_COND_GT: 874 tcg_out_brcond32(s, TCG_COND_GT, args[1], args[3], const_args[3], 875 args[5], small); 876 tcg_out_jxx(s, JCC_JNE, label_next, 1); 877 tcg_out_brcond32(s, TCG_COND_GTU, args[0], args[2], const_args[2], 878 args[5], small); 879 break; 880 case TCG_COND_GE: 881 tcg_out_brcond32(s, TCG_COND_GT, args[1], args[3], const_args[3], 882 args[5], small); 883 tcg_out_jxx(s, JCC_JNE, label_next, 1); 884 tcg_out_brcond32(s, TCG_COND_GEU, args[0], args[2], const_args[2], 885 args[5], small); 886 break; 887 case TCG_COND_LTU: 888 tcg_out_brcond32(s, TCG_COND_LTU, args[1], args[3], const_args[3], 889 args[5], small); 890 tcg_out_jxx(s, JCC_JNE, label_next, 1); 891 tcg_out_brcond32(s, TCG_COND_LTU, args[0], args[2], const_args[2], 892 args[5], small); 893 break; 894 case TCG_COND_LEU: 895 tcg_out_brcond32(s, TCG_COND_LTU, args[1], args[3], const_args[3], 896 args[5], small); 897 tcg_out_jxx(s, JCC_JNE, label_next, 1); 898 tcg_out_brcond32(s, TCG_COND_LEU, args[0], args[2], const_args[2], 899 args[5], small); 900 break; 901 case TCG_COND_GTU: 902 tcg_out_brcond32(s, TCG_COND_GTU, args[1], args[3], const_args[3], 903 args[5], small); 904 tcg_out_jxx(s, JCC_JNE, label_next, 1); 905 tcg_out_brcond32(s, TCG_COND_GTU, args[0], args[2], const_args[2], 906 args[5], small); 907 break; 908 case TCG_COND_GEU: 909 tcg_out_brcond32(s, TCG_COND_GTU, args[1], args[3], const_args[3], 910 args[5], small); 911 tcg_out_jxx(s, JCC_JNE, label_next, 1); 912 tcg_out_brcond32(s, TCG_COND_GEU, args[0], args[2], const_args[2], 913 args[5], small); 914 break; 915 default: 916 tcg_abort(); 917 } 918 tcg_out_label(s, label_next, s->code_ptr); 919 } 920 #endif 921 922 static void tcg_out_setcond32(TCGContext *s, TCGCond cond, TCGArg dest, 923 TCGArg arg1, TCGArg arg2, int const_arg2) 924 { 925 tcg_out_cmp(s, arg1, arg2, const_arg2, 0); 926 tcg_out_modrm(s, OPC_SETCC | tcg_cond_to_jcc[cond], 0, dest); 927 tcg_out_ext8u(s, dest, dest); 928 } 929 930 #if TCG_TARGET_REG_BITS == 64 931 static void tcg_out_setcond64(TCGContext *s, TCGCond cond, TCGArg dest, 932 TCGArg arg1, TCGArg arg2, int const_arg2) 933 { 934 tcg_out_cmp(s, arg1, arg2, const_arg2, P_REXW); 935 tcg_out_modrm(s, OPC_SETCC | tcg_cond_to_jcc[cond], 0, dest); 936 tcg_out_ext8u(s, dest, dest); 937 } 938 #else 939 static void tcg_out_setcond2(TCGContext *s, const TCGArg *args, 940 const int *const_args) 941 { 942 TCGArg new_args[6]; 943 int label_true, label_over; 944 945 memcpy(new_args, args+1, 5*sizeof(TCGArg)); 946 947 if (args[0] == args[1] || args[0] == args[2] 948 || (!const_args[3] && args[0] == args[3]) 949 || (!const_args[4] && args[0] == args[4])) { 950 /* When the destination overlaps with one of the argument 951 registers, don't do anything tricky. */ 952 label_true = gen_new_label(); 953 label_over = gen_new_label(); 954 955 new_args[5] = label_true; 956 tcg_out_brcond2(s, new_args, const_args+1, 1); 957 958 tcg_out_movi(s, TCG_TYPE_I32, args[0], 0); 959 tcg_out_jxx(s, JCC_JMP, label_over, 1); 960 tcg_out_label(s, label_true, s->code_ptr); 961 962 tcg_out_movi(s, TCG_TYPE_I32, args[0], 1); 963 tcg_out_label(s, label_over, s->code_ptr); 964 } else { 965 /* When the destination does not overlap one of the arguments, 966 clear the destination first, jump if cond false, and emit an 967 increment in the true case. This results in smaller code. */ 968 969 tcg_out_movi(s, TCG_TYPE_I32, args[0], 0); 970 971 label_over = gen_new_label(); 972 new_args[4] = tcg_invert_cond(new_args[4]); 973 new_args[5] = label_over; 974 tcg_out_brcond2(s, new_args, const_args+1, 1); 975 976 tgen_arithi(s, ARITH_ADD, args[0], 1, 0); 977 tcg_out_label(s, label_over, s->code_ptr); 978 } 979 } 980 #endif 981 982 static void tcg_out_movcond32(TCGContext *s, TCGCond cond, TCGArg dest, 983 TCGArg c1, TCGArg c2, int const_c2, 984 TCGArg v1) 985 { 986 tcg_out_cmp(s, c1, c2, const_c2, 0); 987 if (have_cmov) { 988 tcg_out_modrm(s, OPC_CMOVCC | tcg_cond_to_jcc[cond], dest, v1); 989 } else { 990 int over = gen_new_label(); 991 tcg_out_jxx(s, tcg_cond_to_jcc[tcg_invert_cond(cond)], over, 1); 992 tcg_out_mov(s, TCG_TYPE_I32, dest, v1); 993 tcg_out_label(s, over, s->code_ptr); 994 } 995 } 996 997 #if TCG_TARGET_REG_BITS == 64 998 static void tcg_out_movcond64(TCGContext *s, TCGCond cond, TCGArg dest, 999 TCGArg c1, TCGArg c2, int const_c2, 1000 TCGArg v1) 1001 { 1002 tcg_out_cmp(s, c1, c2, const_c2, P_REXW); 1003 tcg_out_modrm(s, OPC_CMOVCC | tcg_cond_to_jcc[cond] | P_REXW, dest, v1); 1004 } 1005 #endif 1006 1007 static void tcg_out_branch(TCGContext *s, int call, uintptr_t dest) 1008 { 1009 intptr_t disp = dest - (intptr_t)s->code_ptr - 5; 1010 1011 if (disp == (int32_t)disp) { 1012 tcg_out_opc(s, call ? OPC_CALL_Jz : OPC_JMP_long, 0, 0, 0); 1013 tcg_out32(s, disp); 1014 } else { 1015 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R10, dest); 1016 tcg_out_modrm(s, OPC_GRP5, 1017 call ? EXT5_CALLN_Ev : EXT5_JMPN_Ev, TCG_REG_R10); 1018 } 1019 } 1020 1021 static inline void tcg_out_calli(TCGContext *s, uintptr_t dest) 1022 { 1023 tcg_out_branch(s, 1, dest); 1024 } 1025 1026 static void tcg_out_jmp(TCGContext *s, uintptr_t dest) 1027 { 1028 tcg_out_branch(s, 0, dest); 1029 } 1030 1031 #if defined(CONFIG_SOFTMMU) 1032 /* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr, 1033 * int mmu_idx, uintptr_t ra) 1034 */ 1035 static const void * const qemu_ld_helpers[16] = { 1036 [MO_UB] = helper_ret_ldub_mmu, 1037 [MO_LEUW] = helper_le_lduw_mmu, 1038 [MO_LEUL] = helper_le_ldul_mmu, 1039 [MO_LEQ] = helper_le_ldq_mmu, 1040 [MO_BEUW] = helper_be_lduw_mmu, 1041 [MO_BEUL] = helper_be_ldul_mmu, 1042 [MO_BEQ] = helper_be_ldq_mmu, 1043 }; 1044 1045 /* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr, 1046 * uintxx_t val, int mmu_idx, uintptr_t ra) 1047 */ 1048 static const void * const qemu_st_helpers[16] = { 1049 [MO_UB] = helper_ret_stb_mmu, 1050 [MO_LEUW] = helper_le_stw_mmu, 1051 [MO_LEUL] = helper_le_stl_mmu, 1052 [MO_LEQ] = helper_le_stq_mmu, 1053 [MO_BEUW] = helper_be_stw_mmu, 1054 [MO_BEUL] = helper_be_stl_mmu, 1055 [MO_BEQ] = helper_be_stq_mmu, 1056 }; 1057 1058 /* Perform the TLB load and compare. 1059 1060 Inputs: 1061 ADDRLO and ADDRHI contain the low and high part of the address. 1062 1063 MEM_INDEX and S_BITS are the memory context and log2 size of the load. 1064 1065 WHICH is the offset into the CPUTLBEntry structure of the slot to read. 1066 This should be offsetof addr_read or addr_write. 1067 1068 Outputs: 1069 LABEL_PTRS is filled with 1 (32-bit addresses) or 2 (64-bit addresses) 1070 positions of the displacements of forward jumps to the TLB miss case. 1071 1072 Second argument register is loaded with the low part of the address. 1073 In the TLB hit case, it has been adjusted as indicated by the TLB 1074 and so is a host address. In the TLB miss case, it continues to 1075 hold a guest address. 1076 1077 First argument register is clobbered. */ 1078 1079 static inline void tcg_out_tlb_load(TCGContext *s, TCGReg addrlo, TCGReg addrhi, 1080 int mem_index, TCGMemOp s_bits, 1081 uint8_t **label_ptr, int which) 1082 { 1083 const TCGReg r0 = TCG_REG_L0; 1084 const TCGReg r1 = TCG_REG_L1; 1085 TCGType ttype = TCG_TYPE_I32; 1086 TCGType htype = TCG_TYPE_I32; 1087 int trexw = 0, hrexw = 0; 1088 1089 if (TCG_TARGET_REG_BITS == 64) { 1090 if (TARGET_LONG_BITS == 64) { 1091 ttype = TCG_TYPE_I64; 1092 trexw = P_REXW; 1093 } 1094 if (TCG_TYPE_PTR == TCG_TYPE_I64) { 1095 htype = TCG_TYPE_I64; 1096 hrexw = P_REXW; 1097 } 1098 } 1099 1100 tcg_out_mov(s, htype, r0, addrlo); 1101 tcg_out_mov(s, ttype, r1, addrlo); 1102 1103 tcg_out_shifti(s, SHIFT_SHR + hrexw, r0, 1104 TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS); 1105 1106 tgen_arithi(s, ARITH_AND + trexw, r1, 1107 TARGET_PAGE_MASK | ((1 << s_bits) - 1), 0); 1108 tgen_arithi(s, ARITH_AND + hrexw, r0, 1109 (CPU_TLB_SIZE - 1) << CPU_TLB_ENTRY_BITS, 0); 1110 1111 tcg_out_modrm_sib_offset(s, OPC_LEA + hrexw, r0, TCG_AREG0, r0, 0, 1112 offsetof(CPUArchState, tlb_table[mem_index][0]) 1113 + which); 1114 1115 /* cmp 0(r0), r1 */ 1116 tcg_out_modrm_offset(s, OPC_CMP_GvEv + trexw, r1, r0, 0); 1117 1118 /* Prepare for both the fast path add of the tlb addend, and the slow 1119 path function argument setup. There are two cases worth note: 1120 For 32-bit guest and x86_64 host, MOVL zero-extends the guest address 1121 before the fastpath ADDQ below. For 64-bit guest and x32 host, MOVQ 1122 copies the entire guest address for the slow path, while truncation 1123 for the 32-bit host happens with the fastpath ADDL below. */ 1124 tcg_out_mov(s, ttype, r1, addrlo); 1125 1126 /* jne slow_path */ 1127 tcg_out_opc(s, OPC_JCC_long + JCC_JNE, 0, 0, 0); 1128 label_ptr[0] = s->code_ptr; 1129 s->code_ptr += 4; 1130 1131 if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) { 1132 /* cmp 4(r0), addrhi */ 1133 tcg_out_modrm_offset(s, OPC_CMP_GvEv, addrhi, r0, 4); 1134 1135 /* jne slow_path */ 1136 tcg_out_opc(s, OPC_JCC_long + JCC_JNE, 0, 0, 0); 1137 label_ptr[1] = s->code_ptr; 1138 s->code_ptr += 4; 1139 } 1140 1141 /* TLB Hit. */ 1142 1143 /* add addend(r0), r1 */ 1144 tcg_out_modrm_offset(s, OPC_ADD_GvEv + hrexw, r1, r0, 1145 offsetof(CPUTLBEntry, addend) - which); 1146 } 1147 1148 /* 1149 * Record the context of a call to the out of line helper code for the slow path 1150 * for a load or store, so that we can later generate the correct helper code 1151 */ 1152 static void add_qemu_ldst_label(TCGContext *s, int is_ld, TCGMemOp opc, 1153 TCGReg datalo, TCGReg datahi, 1154 TCGReg addrlo, TCGReg addrhi, 1155 int mem_index, uint8_t *raddr, 1156 uint8_t **label_ptr) 1157 { 1158 TCGLabelQemuLdst *label = new_ldst_label(s); 1159 1160 label->is_ld = is_ld; 1161 label->opc = opc; 1162 label->datalo_reg = datalo; 1163 label->datahi_reg = datahi; 1164 label->addrlo_reg = addrlo; 1165 label->addrhi_reg = addrhi; 1166 label->mem_index = mem_index; 1167 label->raddr = raddr; 1168 label->label_ptr[0] = label_ptr[0]; 1169 if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) { 1170 label->label_ptr[1] = label_ptr[1]; 1171 } 1172 } 1173 1174 /* 1175 * Generate code for the slow path for a load at the end of block 1176 */ 1177 static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l) 1178 { 1179 TCGMemOp opc = l->opc; 1180 TCGReg data_reg; 1181 uint8_t **label_ptr = &l->label_ptr[0]; 1182 1183 /* resolve label address */ 1184 *(uint32_t *)label_ptr[0] = (uint32_t)(s->code_ptr - label_ptr[0] - 4); 1185 if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) { 1186 *(uint32_t *)label_ptr[1] = (uint32_t)(s->code_ptr - label_ptr[1] - 4); 1187 } 1188 1189 if (TCG_TARGET_REG_BITS == 32) { 1190 int ofs = 0; 1191 1192 tcg_out_st(s, TCG_TYPE_PTR, TCG_AREG0, TCG_REG_ESP, ofs); 1193 ofs += 4; 1194 1195 tcg_out_st(s, TCG_TYPE_I32, l->addrlo_reg, TCG_REG_ESP, ofs); 1196 ofs += 4; 1197 1198 if (TARGET_LONG_BITS == 64) { 1199 tcg_out_st(s, TCG_TYPE_I32, l->addrhi_reg, TCG_REG_ESP, ofs); 1200 ofs += 4; 1201 } 1202 1203 tcg_out_sti(s, TCG_TYPE_I32, TCG_REG_ESP, ofs, l->mem_index); 1204 ofs += 4; 1205 1206 tcg_out_sti(s, TCG_TYPE_I32, TCG_REG_ESP, ofs, (uintptr_t)l->raddr); 1207 } else { 1208 tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0], TCG_AREG0); 1209 /* The second argument is already loaded with addrlo. */ 1210 tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[2], 1211 l->mem_index); 1212 tcg_out_movi(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[3], 1213 (uintptr_t)l->raddr); 1214 } 1215 1216 tcg_out_calli(s, (uintptr_t)qemu_ld_helpers[opc & ~MO_SIGN]); 1217 1218 data_reg = l->datalo_reg; 1219 switch (opc & MO_SSIZE) { 1220 case MO_SB: 1221 tcg_out_ext8s(s, data_reg, TCG_REG_EAX, P_REXW); 1222 break; 1223 case MO_SW: 1224 tcg_out_ext16s(s, data_reg, TCG_REG_EAX, P_REXW); 1225 break; 1226 #if TCG_TARGET_REG_BITS == 64 1227 case MO_SL: 1228 tcg_out_ext32s(s, data_reg, TCG_REG_EAX); 1229 break; 1230 #endif 1231 case MO_UB: 1232 case MO_UW: 1233 /* Note that the helpers have zero-extended to tcg_target_long. */ 1234 case MO_UL: 1235 tcg_out_mov(s, TCG_TYPE_I32, data_reg, TCG_REG_EAX); 1236 break; 1237 case MO_Q: 1238 if (TCG_TARGET_REG_BITS == 64) { 1239 tcg_out_mov(s, TCG_TYPE_I64, data_reg, TCG_REG_RAX); 1240 } else if (data_reg == TCG_REG_EDX) { 1241 /* xchg %edx, %eax */ 1242 tcg_out_opc(s, OPC_XCHG_ax_r32 + TCG_REG_EDX, 0, 0, 0); 1243 tcg_out_mov(s, TCG_TYPE_I32, l->datahi_reg, TCG_REG_EAX); 1244 } else { 1245 tcg_out_mov(s, TCG_TYPE_I32, data_reg, TCG_REG_EAX); 1246 tcg_out_mov(s, TCG_TYPE_I32, l->datahi_reg, TCG_REG_EDX); 1247 } 1248 break; 1249 default: 1250 tcg_abort(); 1251 } 1252 1253 /* Jump to the code corresponding to next IR of qemu_st */ 1254 tcg_out_jmp(s, (uintptr_t)l->raddr); 1255 } 1256 1257 /* 1258 * Generate code for the slow path for a store at the end of block 1259 */ 1260 static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l) 1261 { 1262 TCGMemOp opc = l->opc; 1263 TCGMemOp s_bits = opc & MO_SIZE; 1264 uint8_t **label_ptr = &l->label_ptr[0]; 1265 TCGReg retaddr; 1266 1267 /* resolve label address */ 1268 *(uint32_t *)label_ptr[0] = (uint32_t)(s->code_ptr - label_ptr[0] - 4); 1269 if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) { 1270 *(uint32_t *)label_ptr[1] = (uint32_t)(s->code_ptr - label_ptr[1] - 4); 1271 } 1272 1273 if (TCG_TARGET_REG_BITS == 32) { 1274 int ofs = 0; 1275 1276 tcg_out_st(s, TCG_TYPE_PTR, TCG_AREG0, TCG_REG_ESP, ofs); 1277 ofs += 4; 1278 1279 tcg_out_st(s, TCG_TYPE_I32, l->addrlo_reg, TCG_REG_ESP, ofs); 1280 ofs += 4; 1281 1282 if (TARGET_LONG_BITS == 64) { 1283 tcg_out_st(s, TCG_TYPE_I32, l->addrhi_reg, TCG_REG_ESP, ofs); 1284 ofs += 4; 1285 } 1286 1287 tcg_out_st(s, TCG_TYPE_I32, l->datalo_reg, TCG_REG_ESP, ofs); 1288 ofs += 4; 1289 1290 if (s_bits == MO_64) { 1291 tcg_out_st(s, TCG_TYPE_I32, l->datahi_reg, TCG_REG_ESP, ofs); 1292 ofs += 4; 1293 } 1294 1295 tcg_out_sti(s, TCG_TYPE_I32, TCG_REG_ESP, ofs, l->mem_index); 1296 ofs += 4; 1297 1298 retaddr = TCG_REG_EAX; 1299 tcg_out_movi(s, TCG_TYPE_I32, retaddr, (uintptr_t)l->raddr); 1300 tcg_out_st(s, TCG_TYPE_I32, retaddr, TCG_REG_ESP, ofs); 1301 } else { 1302 tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0], TCG_AREG0); 1303 /* The second argument is already loaded with addrlo. */ 1304 tcg_out_mov(s, (s_bits == MO_64 ? TCG_TYPE_I64 : TCG_TYPE_I32), 1305 tcg_target_call_iarg_regs[2], l->datalo_reg); 1306 tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[3], 1307 l->mem_index); 1308 1309 if (ARRAY_SIZE(tcg_target_call_iarg_regs) > 4) { 1310 retaddr = tcg_target_call_iarg_regs[4]; 1311 tcg_out_movi(s, TCG_TYPE_PTR, retaddr, (uintptr_t)l->raddr); 1312 } else { 1313 retaddr = TCG_REG_RAX; 1314 tcg_out_movi(s, TCG_TYPE_PTR, retaddr, (uintptr_t)l->raddr); 1315 tcg_out_st(s, TCG_TYPE_PTR, retaddr, TCG_REG_ESP, 0); 1316 } 1317 } 1318 1319 /* "Tail call" to the helper, with the return address back inline. */ 1320 tcg_out_push(s, retaddr); 1321 tcg_out_jmp(s, (uintptr_t)qemu_st_helpers[opc]); 1322 } 1323 #elif defined(__x86_64__) && defined(__linux__) 1324 # include <asm/prctl.h> 1325 # include <sys/prctl.h> 1326 1327 int arch_prctl(int code, unsigned long addr); 1328 1329 static int guest_base_flags; 1330 static inline void setup_guest_base_seg(void) 1331 { 1332 if (arch_prctl(ARCH_SET_GS, GUEST_BASE) == 0) { 1333 guest_base_flags = P_GS; 1334 } 1335 } 1336 #else 1337 # define guest_base_flags 0 1338 static inline void setup_guest_base_seg(void) { } 1339 #endif /* SOFTMMU */ 1340 1341 static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg datalo, TCGReg datahi, 1342 TCGReg base, intptr_t ofs, int seg, 1343 TCGMemOp memop) 1344 { 1345 const TCGMemOp bswap = memop & MO_BSWAP; 1346 1347 switch (memop & MO_SSIZE) { 1348 case MO_UB: 1349 tcg_out_modrm_offset(s, OPC_MOVZBL + seg, datalo, base, ofs); 1350 break; 1351 case MO_SB: 1352 tcg_out_modrm_offset(s, OPC_MOVSBL + P_REXW + seg, datalo, base, ofs); 1353 break; 1354 case MO_UW: 1355 tcg_out_modrm_offset(s, OPC_MOVZWL + seg, datalo, base, ofs); 1356 if (bswap) { 1357 tcg_out_rolw_8(s, datalo); 1358 } 1359 break; 1360 case MO_SW: 1361 if (bswap) { 1362 tcg_out_modrm_offset(s, OPC_MOVZWL + seg, datalo, base, ofs); 1363 tcg_out_rolw_8(s, datalo); 1364 tcg_out_modrm(s, OPC_MOVSWL + P_REXW, datalo, datalo); 1365 } else { 1366 tcg_out_modrm_offset(s, OPC_MOVSWL + P_REXW + seg, 1367 datalo, base, ofs); 1368 } 1369 break; 1370 case MO_UL: 1371 tcg_out_modrm_offset(s, OPC_MOVL_GvEv + seg, datalo, base, ofs); 1372 if (bswap) { 1373 tcg_out_bswap32(s, datalo); 1374 } 1375 break; 1376 #if TCG_TARGET_REG_BITS == 64 1377 case MO_SL: 1378 if (bswap) { 1379 tcg_out_modrm_offset(s, OPC_MOVL_GvEv + seg, datalo, base, ofs); 1380 tcg_out_bswap32(s, datalo); 1381 tcg_out_ext32s(s, datalo, datalo); 1382 } else { 1383 tcg_out_modrm_offset(s, OPC_MOVSLQ + seg, datalo, base, ofs); 1384 } 1385 break; 1386 #endif 1387 case MO_Q: 1388 if (TCG_TARGET_REG_BITS == 64) { 1389 tcg_out_modrm_offset(s, OPC_MOVL_GvEv + P_REXW + seg, 1390 datalo, base, ofs); 1391 if (bswap) { 1392 tcg_out_bswap64(s, datalo); 1393 } 1394 } else { 1395 if (bswap) { 1396 int t = datalo; 1397 datalo = datahi; 1398 datahi = t; 1399 } 1400 if (base != datalo) { 1401 tcg_out_modrm_offset(s, OPC_MOVL_GvEv + seg, 1402 datalo, base, ofs); 1403 tcg_out_modrm_offset(s, OPC_MOVL_GvEv + seg, 1404 datahi, base, ofs + 4); 1405 } else { 1406 tcg_out_modrm_offset(s, OPC_MOVL_GvEv + seg, 1407 datahi, base, ofs + 4); 1408 tcg_out_modrm_offset(s, OPC_MOVL_GvEv + seg, 1409 datalo, base, ofs); 1410 } 1411 if (bswap) { 1412 tcg_out_bswap32(s, datalo); 1413 tcg_out_bswap32(s, datahi); 1414 } 1415 } 1416 break; 1417 default: 1418 tcg_abort(); 1419 } 1420 } 1421 1422 /* XXX: qemu_ld and qemu_st could be modified to clobber only EDX and 1423 EAX. It will be useful once fixed registers globals are less 1424 common. */ 1425 static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is64) 1426 { 1427 TCGReg datalo, datahi, addrlo; 1428 TCGReg addrhi __attribute__((unused)); 1429 TCGMemOp opc; 1430 #if defined(CONFIG_SOFTMMU) 1431 int mem_index; 1432 TCGMemOp s_bits; 1433 uint8_t *label_ptr[2]; 1434 #endif 1435 1436 datalo = *args++; 1437 datahi = (TCG_TARGET_REG_BITS == 32 && is64 ? *args++ : 0); 1438 addrlo = *args++; 1439 addrhi = (TARGET_LONG_BITS > TCG_TARGET_REG_BITS ? *args++ : 0); 1440 opc = *args++; 1441 1442 #if defined(CONFIG_SOFTMMU) 1443 mem_index = *args++; 1444 s_bits = opc & MO_SIZE; 1445 1446 tcg_out_tlb_load(s, addrlo, addrhi, mem_index, s_bits, 1447 label_ptr, offsetof(CPUTLBEntry, addr_read)); 1448 1449 /* TLB Hit. */ 1450 tcg_out_qemu_ld_direct(s, datalo, datahi, TCG_REG_L1, 0, 0, opc); 1451 1452 /* Record the current context of a load into ldst label */ 1453 add_qemu_ldst_label(s, 1, opc, datalo, datahi, addrlo, addrhi, 1454 mem_index, s->code_ptr, label_ptr); 1455 #else 1456 { 1457 int32_t offset = GUEST_BASE; 1458 TCGReg base = addrlo; 1459 int seg = 0; 1460 1461 /* ??? We assume all operations have left us with register contents 1462 that are zero extended. So far this appears to be true. If we 1463 want to enforce this, we can either do an explicit zero-extension 1464 here, or (if GUEST_BASE == 0, or a segment register is in use) 1465 use the ADDR32 prefix. For now, do nothing. */ 1466 if (GUEST_BASE && guest_base_flags) { 1467 seg = guest_base_flags; 1468 offset = 0; 1469 } else if (TCG_TARGET_REG_BITS == 64 && offset != GUEST_BASE) { 1470 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_L1, GUEST_BASE); 1471 tgen_arithr(s, ARITH_ADD + P_REXW, TCG_REG_L1, base); 1472 base = TCG_REG_L1; 1473 offset = 0; 1474 } 1475 1476 tcg_out_qemu_ld_direct(s, datalo, datahi, base, offset, seg, opc); 1477 } 1478 #endif 1479 } 1480 1481 static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg datalo, TCGReg datahi, 1482 TCGReg base, intptr_t ofs, int seg, 1483 TCGMemOp memop) 1484 { 1485 const TCGMemOp bswap = memop & MO_BSWAP; 1486 1487 /* ??? Ideally we wouldn't need a scratch register. For user-only, 1488 we could perform the bswap twice to restore the original value 1489 instead of moving to the scratch. But as it is, the L constraint 1490 means that TCG_REG_L0 is definitely free here. */ 1491 const TCGReg scratch = TCG_REG_L0; 1492 1493 switch (memop & MO_SIZE) { 1494 case MO_8: 1495 /* In 32-bit mode, 8-byte stores can only happen from [abcd]x. 1496 Use the scratch register if necessary. */ 1497 if (TCG_TARGET_REG_BITS == 32 && datalo >= 4) { 1498 tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo); 1499 datalo = scratch; 1500 } 1501 tcg_out_modrm_offset(s, OPC_MOVB_EvGv + P_REXB_R + seg, 1502 datalo, base, ofs); 1503 break; 1504 case MO_16: 1505 if (bswap) { 1506 tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo); 1507 tcg_out_rolw_8(s, scratch); 1508 datalo = scratch; 1509 } 1510 tcg_out_modrm_offset(s, OPC_MOVL_EvGv + P_DATA16 + seg, 1511 datalo, base, ofs); 1512 break; 1513 case MO_32: 1514 if (bswap) { 1515 tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo); 1516 tcg_out_bswap32(s, scratch); 1517 datalo = scratch; 1518 } 1519 tcg_out_modrm_offset(s, OPC_MOVL_EvGv + seg, datalo, base, ofs); 1520 break; 1521 case MO_64: 1522 if (TCG_TARGET_REG_BITS == 64) { 1523 if (bswap) { 1524 tcg_out_mov(s, TCG_TYPE_I64, scratch, datalo); 1525 tcg_out_bswap64(s, scratch); 1526 datalo = scratch; 1527 } 1528 tcg_out_modrm_offset(s, OPC_MOVL_EvGv + P_REXW + seg, 1529 datalo, base, ofs); 1530 } else if (bswap) { 1531 tcg_out_mov(s, TCG_TYPE_I32, scratch, datahi); 1532 tcg_out_bswap32(s, scratch); 1533 tcg_out_modrm_offset(s, OPC_MOVL_EvGv + seg, scratch, base, ofs); 1534 tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo); 1535 tcg_out_bswap32(s, scratch); 1536 tcg_out_modrm_offset(s, OPC_MOVL_EvGv + seg, scratch, base, ofs+4); 1537 } else { 1538 tcg_out_modrm_offset(s, OPC_MOVL_EvGv + seg, datalo, base, ofs); 1539 tcg_out_modrm_offset(s, OPC_MOVL_EvGv + seg, datahi, base, ofs+4); 1540 } 1541 break; 1542 default: 1543 tcg_abort(); 1544 } 1545 } 1546 1547 static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64) 1548 { 1549 TCGReg datalo, datahi, addrlo; 1550 TCGReg addrhi __attribute__((unused)); 1551 TCGMemOp opc; 1552 #if defined(CONFIG_SOFTMMU) 1553 int mem_index; 1554 TCGMemOp s_bits; 1555 uint8_t *label_ptr[2]; 1556 #endif 1557 1558 datalo = *args++; 1559 datahi = (TCG_TARGET_REG_BITS == 32 && is64 ? *args++ : 0); 1560 addrlo = *args++; 1561 addrhi = (TARGET_LONG_BITS > TCG_TARGET_REG_BITS ? *args++ : 0); 1562 opc = *args++; 1563 1564 #if defined(CONFIG_SOFTMMU) 1565 mem_index = *args++; 1566 s_bits = opc & MO_SIZE; 1567 1568 tcg_out_tlb_load(s, addrlo, addrhi, mem_index, s_bits, 1569 label_ptr, offsetof(CPUTLBEntry, addr_write)); 1570 1571 /* TLB Hit. */ 1572 tcg_out_qemu_st_direct(s, datalo, datahi, TCG_REG_L1, 0, 0, opc); 1573 1574 /* Record the current context of a store into ldst label */ 1575 add_qemu_ldst_label(s, 0, opc, datalo, datahi, addrlo, addrhi, 1576 mem_index, s->code_ptr, label_ptr); 1577 #else 1578 { 1579 int32_t offset = GUEST_BASE; 1580 TCGReg base = addrlo; 1581 int seg = 0; 1582 1583 /* ??? We assume all operations have left us with register contents 1584 that are zero extended. So far this appears to be true. If we 1585 want to enforce this, we can either do an explicit zero-extension 1586 here, or (if GUEST_BASE == 0, or a segment register is in use) 1587 use the ADDR32 prefix. For now, do nothing. */ 1588 if (GUEST_BASE && guest_base_flags) { 1589 seg = guest_base_flags; 1590 offset = 0; 1591 } else if (TCG_TARGET_REG_BITS == 64 && offset != GUEST_BASE) { 1592 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_L1, GUEST_BASE); 1593 tgen_arithr(s, ARITH_ADD + P_REXW, TCG_REG_L1, base); 1594 base = TCG_REG_L1; 1595 offset = 0; 1596 } 1597 1598 tcg_out_qemu_st_direct(s, datalo, datahi, base, offset, seg, opc); 1599 } 1600 #endif 1601 } 1602 1603 static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, 1604 const TCGArg *args, const int *const_args) 1605 { 1606 int c, rexw = 0; 1607 1608 #if TCG_TARGET_REG_BITS == 64 1609 # define OP_32_64(x) \ 1610 case glue(glue(INDEX_op_, x), _i64): \ 1611 rexw = P_REXW; /* FALLTHRU */ \ 1612 case glue(glue(INDEX_op_, x), _i32) 1613 #else 1614 # define OP_32_64(x) \ 1615 case glue(glue(INDEX_op_, x), _i32) 1616 #endif 1617 1618 switch(opc) { 1619 case INDEX_op_exit_tb: 1620 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_EAX, args[0]); 1621 tcg_out_jmp(s, (uintptr_t)tb_ret_addr); 1622 break; 1623 case INDEX_op_goto_tb: 1624 if (s->tb_jmp_offset) { 1625 /* direct jump method */ 1626 tcg_out8(s, OPC_JMP_long); /* jmp im */ 1627 s->tb_jmp_offset[args[0]] = s->code_ptr - s->code_buf; 1628 tcg_out32(s, 0); 1629 } else { 1630 /* indirect jump method */ 1631 tcg_out_modrm_offset(s, OPC_GRP5, EXT5_JMPN_Ev, -1, 1632 (intptr_t)(s->tb_next + args[0])); 1633 } 1634 s->tb_next_offset[args[0]] = s->code_ptr - s->code_buf; 1635 break; 1636 case INDEX_op_call: 1637 if (const_args[0]) { 1638 tcg_out_calli(s, args[0]); 1639 } else { 1640 /* call *reg */ 1641 tcg_out_modrm(s, OPC_GRP5, EXT5_CALLN_Ev, args[0]); 1642 } 1643 break; 1644 case INDEX_op_br: 1645 tcg_out_jxx(s, JCC_JMP, args[0], 0); 1646 break; 1647 case INDEX_op_movi_i32: 1648 tcg_out_movi(s, TCG_TYPE_I32, args[0], args[1]); 1649 break; 1650 OP_32_64(ld8u): 1651 /* Note that we can ignore REXW for the zero-extend to 64-bit. */ 1652 tcg_out_modrm_offset(s, OPC_MOVZBL, args[0], args[1], args[2]); 1653 break; 1654 OP_32_64(ld8s): 1655 tcg_out_modrm_offset(s, OPC_MOVSBL + rexw, args[0], args[1], args[2]); 1656 break; 1657 OP_32_64(ld16u): 1658 /* Note that we can ignore REXW for the zero-extend to 64-bit. */ 1659 tcg_out_modrm_offset(s, OPC_MOVZWL, args[0], args[1], args[2]); 1660 break; 1661 OP_32_64(ld16s): 1662 tcg_out_modrm_offset(s, OPC_MOVSWL + rexw, args[0], args[1], args[2]); 1663 break; 1664 #if TCG_TARGET_REG_BITS == 64 1665 case INDEX_op_ld32u_i64: 1666 #endif 1667 case INDEX_op_ld_i32: 1668 tcg_out_ld(s, TCG_TYPE_I32, args[0], args[1], args[2]); 1669 break; 1670 1671 OP_32_64(st8): 1672 if (const_args[0]) { 1673 tcg_out_modrm_offset(s, OPC_MOVB_EvIz, 1674 0, args[1], args[2]); 1675 tcg_out8(s, args[0]); 1676 } else { 1677 tcg_out_modrm_offset(s, OPC_MOVB_EvGv | P_REXB_R, 1678 args[0], args[1], args[2]); 1679 } 1680 break; 1681 OP_32_64(st16): 1682 if (const_args[0]) { 1683 tcg_out_modrm_offset(s, OPC_MOVL_EvIz | P_DATA16, 1684 0, args[1], args[2]); 1685 tcg_out16(s, args[0]); 1686 } else { 1687 tcg_out_modrm_offset(s, OPC_MOVL_EvGv | P_DATA16, 1688 args[0], args[1], args[2]); 1689 } 1690 break; 1691 #if TCG_TARGET_REG_BITS == 64 1692 case INDEX_op_st32_i64: 1693 #endif 1694 case INDEX_op_st_i32: 1695 if (const_args[0]) { 1696 tcg_out_modrm_offset(s, OPC_MOVL_EvIz, 0, args[1], args[2]); 1697 tcg_out32(s, args[0]); 1698 } else { 1699 tcg_out_st(s, TCG_TYPE_I32, args[0], args[1], args[2]); 1700 } 1701 break; 1702 1703 OP_32_64(add): 1704 /* For 3-operand addition, use LEA. */ 1705 if (args[0] != args[1]) { 1706 TCGArg a0 = args[0], a1 = args[1], a2 = args[2], c3 = 0; 1707 1708 if (const_args[2]) { 1709 c3 = a2, a2 = -1; 1710 } else if (a0 == a2) { 1711 /* Watch out for dest = src + dest, since we've removed 1712 the matching constraint on the add. */ 1713 tgen_arithr(s, ARITH_ADD + rexw, a0, a1); 1714 break; 1715 } 1716 1717 tcg_out_modrm_sib_offset(s, OPC_LEA + rexw, a0, a1, a2, 0, c3); 1718 break; 1719 } 1720 c = ARITH_ADD; 1721 goto gen_arith; 1722 OP_32_64(sub): 1723 c = ARITH_SUB; 1724 goto gen_arith; 1725 OP_32_64(and): 1726 c = ARITH_AND; 1727 goto gen_arith; 1728 OP_32_64(or): 1729 c = ARITH_OR; 1730 goto gen_arith; 1731 OP_32_64(xor): 1732 c = ARITH_XOR; 1733 goto gen_arith; 1734 gen_arith: 1735 if (const_args[2]) { 1736 tgen_arithi(s, c + rexw, args[0], args[2], 0); 1737 } else { 1738 tgen_arithr(s, c + rexw, args[0], args[2]); 1739 } 1740 break; 1741 1742 OP_32_64(mul): 1743 if (const_args[2]) { 1744 int32_t val; 1745 val = args[2]; 1746 if (val == (int8_t)val) { 1747 tcg_out_modrm(s, OPC_IMUL_GvEvIb + rexw, args[0], args[0]); 1748 tcg_out8(s, val); 1749 } else { 1750 tcg_out_modrm(s, OPC_IMUL_GvEvIz + rexw, args[0], args[0]); 1751 tcg_out32(s, val); 1752 } 1753 } else { 1754 tcg_out_modrm(s, OPC_IMUL_GvEv + rexw, args[0], args[2]); 1755 } 1756 break; 1757 1758 OP_32_64(div2): 1759 tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_IDIV, args[4]); 1760 break; 1761 OP_32_64(divu2): 1762 tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_DIV, args[4]); 1763 break; 1764 1765 OP_32_64(shl): 1766 c = SHIFT_SHL; 1767 goto gen_shift; 1768 OP_32_64(shr): 1769 c = SHIFT_SHR; 1770 goto gen_shift; 1771 OP_32_64(sar): 1772 c = SHIFT_SAR; 1773 goto gen_shift; 1774 OP_32_64(rotl): 1775 c = SHIFT_ROL; 1776 goto gen_shift; 1777 OP_32_64(rotr): 1778 c = SHIFT_ROR; 1779 goto gen_shift; 1780 gen_shift: 1781 if (const_args[2]) { 1782 tcg_out_shifti(s, c + rexw, args[0], args[2]); 1783 } else { 1784 tcg_out_modrm(s, OPC_SHIFT_cl + rexw, c, args[0]); 1785 } 1786 break; 1787 1788 case INDEX_op_brcond_i32: 1789 tcg_out_brcond32(s, args[2], args[0], args[1], const_args[1], 1790 args[3], 0); 1791 break; 1792 case INDEX_op_setcond_i32: 1793 tcg_out_setcond32(s, args[3], args[0], args[1], 1794 args[2], const_args[2]); 1795 break; 1796 case INDEX_op_movcond_i32: 1797 tcg_out_movcond32(s, args[5], args[0], args[1], 1798 args[2], const_args[2], args[3]); 1799 break; 1800 1801 OP_32_64(bswap16): 1802 tcg_out_rolw_8(s, args[0]); 1803 break; 1804 OP_32_64(bswap32): 1805 tcg_out_bswap32(s, args[0]); 1806 break; 1807 1808 OP_32_64(neg): 1809 tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_NEG, args[0]); 1810 break; 1811 OP_32_64(not): 1812 tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_NOT, args[0]); 1813 break; 1814 1815 OP_32_64(ext8s): 1816 tcg_out_ext8s(s, args[0], args[1], rexw); 1817 break; 1818 OP_32_64(ext16s): 1819 tcg_out_ext16s(s, args[0], args[1], rexw); 1820 break; 1821 OP_32_64(ext8u): 1822 tcg_out_ext8u(s, args[0], args[1]); 1823 break; 1824 OP_32_64(ext16u): 1825 tcg_out_ext16u(s, args[0], args[1]); 1826 break; 1827 1828 case INDEX_op_qemu_ld_i32: 1829 tcg_out_qemu_ld(s, args, 0); 1830 break; 1831 case INDEX_op_qemu_ld_i64: 1832 tcg_out_qemu_ld(s, args, 1); 1833 break; 1834 case INDEX_op_qemu_st_i32: 1835 tcg_out_qemu_st(s, args, 0); 1836 break; 1837 case INDEX_op_qemu_st_i64: 1838 tcg_out_qemu_st(s, args, 1); 1839 break; 1840 1841 OP_32_64(mulu2): 1842 tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_MUL, args[3]); 1843 break; 1844 OP_32_64(muls2): 1845 tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_IMUL, args[3]); 1846 break; 1847 OP_32_64(add2): 1848 if (const_args[4]) { 1849 tgen_arithi(s, ARITH_ADD + rexw, args[0], args[4], 1); 1850 } else { 1851 tgen_arithr(s, ARITH_ADD + rexw, args[0], args[4]); 1852 } 1853 if (const_args[5]) { 1854 tgen_arithi(s, ARITH_ADC + rexw, args[1], args[5], 1); 1855 } else { 1856 tgen_arithr(s, ARITH_ADC + rexw, args[1], args[5]); 1857 } 1858 break; 1859 OP_32_64(sub2): 1860 if (const_args[4]) { 1861 tgen_arithi(s, ARITH_SUB + rexw, args[0], args[4], 1); 1862 } else { 1863 tgen_arithr(s, ARITH_SUB + rexw, args[0], args[4]); 1864 } 1865 if (const_args[5]) { 1866 tgen_arithi(s, ARITH_SBB + rexw, args[1], args[5], 1); 1867 } else { 1868 tgen_arithr(s, ARITH_SBB + rexw, args[1], args[5]); 1869 } 1870 break; 1871 1872 #if TCG_TARGET_REG_BITS == 32 1873 case INDEX_op_brcond2_i32: 1874 tcg_out_brcond2(s, args, const_args, 0); 1875 break; 1876 case INDEX_op_setcond2_i32: 1877 tcg_out_setcond2(s, args, const_args); 1878 break; 1879 #else /* TCG_TARGET_REG_BITS == 64 */ 1880 case INDEX_op_movi_i64: 1881 tcg_out_movi(s, TCG_TYPE_I64, args[0], args[1]); 1882 break; 1883 case INDEX_op_ld32s_i64: 1884 tcg_out_modrm_offset(s, OPC_MOVSLQ, args[0], args[1], args[2]); 1885 break; 1886 case INDEX_op_ld_i64: 1887 tcg_out_ld(s, TCG_TYPE_I64, args[0], args[1], args[2]); 1888 break; 1889 case INDEX_op_st_i64: 1890 if (const_args[0]) { 1891 tcg_out_modrm_offset(s, OPC_MOVL_EvIz | P_REXW, 1892 0, args[1], args[2]); 1893 tcg_out32(s, args[0]); 1894 } else { 1895 tcg_out_st(s, TCG_TYPE_I64, args[0], args[1], args[2]); 1896 } 1897 break; 1898 1899 case INDEX_op_brcond_i64: 1900 tcg_out_brcond64(s, args[2], args[0], args[1], const_args[1], 1901 args[3], 0); 1902 break; 1903 case INDEX_op_setcond_i64: 1904 tcg_out_setcond64(s, args[3], args[0], args[1], 1905 args[2], const_args[2]); 1906 break; 1907 case INDEX_op_movcond_i64: 1908 tcg_out_movcond64(s, args[5], args[0], args[1], 1909 args[2], const_args[2], args[3]); 1910 break; 1911 1912 case INDEX_op_bswap64_i64: 1913 tcg_out_bswap64(s, args[0]); 1914 break; 1915 case INDEX_op_ext32u_i64: 1916 tcg_out_ext32u(s, args[0], args[1]); 1917 break; 1918 case INDEX_op_ext32s_i64: 1919 tcg_out_ext32s(s, args[0], args[1]); 1920 break; 1921 #endif 1922 1923 OP_32_64(deposit): 1924 if (args[3] == 0 && args[4] == 8) { 1925 /* load bits 0..7 */ 1926 tcg_out_modrm(s, OPC_MOVB_EvGv | P_REXB_R | P_REXB_RM, 1927 args[2], args[0]); 1928 } else if (args[3] == 8 && args[4] == 8) { 1929 /* load bits 8..15 */ 1930 tcg_out_modrm(s, OPC_MOVB_EvGv, args[2], args[0] + 4); 1931 } else if (args[3] == 0 && args[4] == 16) { 1932 /* load bits 0..15 */ 1933 tcg_out_modrm(s, OPC_MOVL_EvGv | P_DATA16, args[2], args[0]); 1934 } else { 1935 tcg_abort(); 1936 } 1937 break; 1938 1939 default: 1940 tcg_abort(); 1941 } 1942 1943 #undef OP_32_64 1944 } 1945 1946 static const TCGTargetOpDef x86_op_defs[] = { 1947 { INDEX_op_exit_tb, { } }, 1948 { INDEX_op_goto_tb, { } }, 1949 { INDEX_op_call, { "ri" } }, 1950 { INDEX_op_br, { } }, 1951 { INDEX_op_mov_i32, { "r", "r" } }, 1952 { INDEX_op_movi_i32, { "r" } }, 1953 { INDEX_op_ld8u_i32, { "r", "r" } }, 1954 { INDEX_op_ld8s_i32, { "r", "r" } }, 1955 { INDEX_op_ld16u_i32, { "r", "r" } }, 1956 { INDEX_op_ld16s_i32, { "r", "r" } }, 1957 { INDEX_op_ld_i32, { "r", "r" } }, 1958 { INDEX_op_st8_i32, { "qi", "r" } }, 1959 { INDEX_op_st16_i32, { "ri", "r" } }, 1960 { INDEX_op_st_i32, { "ri", "r" } }, 1961 1962 { INDEX_op_add_i32, { "r", "r", "ri" } }, 1963 { INDEX_op_sub_i32, { "r", "0", "ri" } }, 1964 { INDEX_op_mul_i32, { "r", "0", "ri" } }, 1965 { INDEX_op_div2_i32, { "a", "d", "0", "1", "r" } }, 1966 { INDEX_op_divu2_i32, { "a", "d", "0", "1", "r" } }, 1967 { INDEX_op_and_i32, { "r", "0", "ri" } }, 1968 { INDEX_op_or_i32, { "r", "0", "ri" } }, 1969 { INDEX_op_xor_i32, { "r", "0", "ri" } }, 1970 1971 { INDEX_op_shl_i32, { "r", "0", "ci" } }, 1972 { INDEX_op_shr_i32, { "r", "0", "ci" } }, 1973 { INDEX_op_sar_i32, { "r", "0", "ci" } }, 1974 { INDEX_op_rotl_i32, { "r", "0", "ci" } }, 1975 { INDEX_op_rotr_i32, { "r", "0", "ci" } }, 1976 1977 { INDEX_op_brcond_i32, { "r", "ri" } }, 1978 1979 { INDEX_op_bswap16_i32, { "r", "0" } }, 1980 { INDEX_op_bswap32_i32, { "r", "0" } }, 1981 1982 { INDEX_op_neg_i32, { "r", "0" } }, 1983 1984 { INDEX_op_not_i32, { "r", "0" } }, 1985 1986 { INDEX_op_ext8s_i32, { "r", "q" } }, 1987 { INDEX_op_ext16s_i32, { "r", "r" } }, 1988 { INDEX_op_ext8u_i32, { "r", "q" } }, 1989 { INDEX_op_ext16u_i32, { "r", "r" } }, 1990 1991 { INDEX_op_setcond_i32, { "q", "r", "ri" } }, 1992 1993 { INDEX_op_deposit_i32, { "Q", "0", "Q" } }, 1994 #if TCG_TARGET_HAS_movcond_i32 1995 { INDEX_op_movcond_i32, { "r", "r", "ri", "r", "0" } }, 1996 #endif 1997 1998 { INDEX_op_mulu2_i32, { "a", "d", "a", "r" } }, 1999 { INDEX_op_muls2_i32, { "a", "d", "a", "r" } }, 2000 { INDEX_op_add2_i32, { "r", "r", "0", "1", "ri", "ri" } }, 2001 { INDEX_op_sub2_i32, { "r", "r", "0", "1", "ri", "ri" } }, 2002 2003 #if TCG_TARGET_REG_BITS == 32 2004 { INDEX_op_brcond2_i32, { "r", "r", "ri", "ri" } }, 2005 { INDEX_op_setcond2_i32, { "r", "r", "r", "ri", "ri" } }, 2006 #else 2007 { INDEX_op_mov_i64, { "r", "r" } }, 2008 { INDEX_op_movi_i64, { "r" } }, 2009 { INDEX_op_ld8u_i64, { "r", "r" } }, 2010 { INDEX_op_ld8s_i64, { "r", "r" } }, 2011 { INDEX_op_ld16u_i64, { "r", "r" } }, 2012 { INDEX_op_ld16s_i64, { "r", "r" } }, 2013 { INDEX_op_ld32u_i64, { "r", "r" } }, 2014 { INDEX_op_ld32s_i64, { "r", "r" } }, 2015 { INDEX_op_ld_i64, { "r", "r" } }, 2016 { INDEX_op_st8_i64, { "ri", "r" } }, 2017 { INDEX_op_st16_i64, { "ri", "r" } }, 2018 { INDEX_op_st32_i64, { "ri", "r" } }, 2019 { INDEX_op_st_i64, { "re", "r" } }, 2020 2021 { INDEX_op_add_i64, { "r", "r", "re" } }, 2022 { INDEX_op_mul_i64, { "r", "0", "re" } }, 2023 { INDEX_op_div2_i64, { "a", "d", "0", "1", "r" } }, 2024 { INDEX_op_divu2_i64, { "a", "d", "0", "1", "r" } }, 2025 { INDEX_op_sub_i64, { "r", "0", "re" } }, 2026 { INDEX_op_and_i64, { "r", "0", "reZ" } }, 2027 { INDEX_op_or_i64, { "r", "0", "re" } }, 2028 { INDEX_op_xor_i64, { "r", "0", "re" } }, 2029 2030 { INDEX_op_shl_i64, { "r", "0", "ci" } }, 2031 { INDEX_op_shr_i64, { "r", "0", "ci" } }, 2032 { INDEX_op_sar_i64, { "r", "0", "ci" } }, 2033 { INDEX_op_rotl_i64, { "r", "0", "ci" } }, 2034 { INDEX_op_rotr_i64, { "r", "0", "ci" } }, 2035 2036 { INDEX_op_brcond_i64, { "r", "re" } }, 2037 { INDEX_op_setcond_i64, { "r", "r", "re" } }, 2038 2039 { INDEX_op_bswap16_i64, { "r", "0" } }, 2040 { INDEX_op_bswap32_i64, { "r", "0" } }, 2041 { INDEX_op_bswap64_i64, { "r", "0" } }, 2042 { INDEX_op_neg_i64, { "r", "0" } }, 2043 { INDEX_op_not_i64, { "r", "0" } }, 2044 2045 { INDEX_op_ext8s_i64, { "r", "r" } }, 2046 { INDEX_op_ext16s_i64, { "r", "r" } }, 2047 { INDEX_op_ext32s_i64, { "r", "r" } }, 2048 { INDEX_op_ext8u_i64, { "r", "r" } }, 2049 { INDEX_op_ext16u_i64, { "r", "r" } }, 2050 { INDEX_op_ext32u_i64, { "r", "r" } }, 2051 2052 { INDEX_op_deposit_i64, { "Q", "0", "Q" } }, 2053 { INDEX_op_movcond_i64, { "r", "r", "re", "r", "0" } }, 2054 2055 { INDEX_op_mulu2_i64, { "a", "d", "a", "r" } }, 2056 { INDEX_op_muls2_i64, { "a", "d", "a", "r" } }, 2057 { INDEX_op_add2_i64, { "r", "r", "0", "1", "re", "re" } }, 2058 { INDEX_op_sub2_i64, { "r", "r", "0", "1", "re", "re" } }, 2059 #endif 2060 2061 #if TCG_TARGET_REG_BITS == 64 2062 { INDEX_op_qemu_ld_i32, { "r", "L" } }, 2063 { INDEX_op_qemu_st_i32, { "L", "L" } }, 2064 { INDEX_op_qemu_ld_i64, { "r", "L" } }, 2065 { INDEX_op_qemu_st_i64, { "L", "L" } }, 2066 #elif TARGET_LONG_BITS <= TCG_TARGET_REG_BITS 2067 { INDEX_op_qemu_ld_i32, { "r", "L" } }, 2068 { INDEX_op_qemu_st_i32, { "L", "L" } }, 2069 { INDEX_op_qemu_ld_i64, { "r", "r", "L" } }, 2070 { INDEX_op_qemu_st_i64, { "L", "L", "L" } }, 2071 #else 2072 { INDEX_op_qemu_ld_i32, { "r", "L", "L" } }, 2073 { INDEX_op_qemu_st_i32, { "L", "L", "L" } }, 2074 { INDEX_op_qemu_ld_i64, { "r", "r", "L", "L" } }, 2075 { INDEX_op_qemu_st_i64, { "L", "L", "L", "L" } }, 2076 #endif 2077 { -1 }, 2078 }; 2079 2080 static int tcg_target_callee_save_regs[] = { 2081 #if TCG_TARGET_REG_BITS == 64 2082 TCG_REG_RBP, 2083 TCG_REG_RBX, 2084 #if defined(_WIN64) 2085 TCG_REG_RDI, 2086 TCG_REG_RSI, 2087 #endif 2088 TCG_REG_R12, 2089 TCG_REG_R13, 2090 TCG_REG_R14, /* Currently used for the global env. */ 2091 TCG_REG_R15, 2092 #else 2093 TCG_REG_EBP, /* Currently used for the global env. */ 2094 TCG_REG_EBX, 2095 TCG_REG_ESI, 2096 TCG_REG_EDI, 2097 #endif 2098 }; 2099 2100 /* Compute frame size via macros, to share between tcg_target_qemu_prologue 2101 and tcg_register_jit. */ 2102 2103 #define PUSH_SIZE \ 2104 ((1 + ARRAY_SIZE(tcg_target_callee_save_regs)) \ 2105 * (TCG_TARGET_REG_BITS / 8)) 2106 2107 #define FRAME_SIZE \ 2108 ((PUSH_SIZE \ 2109 + TCG_STATIC_CALL_ARGS_SIZE \ 2110 + CPU_TEMP_BUF_NLONGS * sizeof(long) \ 2111 + TCG_TARGET_STACK_ALIGN - 1) \ 2112 & ~(TCG_TARGET_STACK_ALIGN - 1)) 2113 2114 /* Generate global QEMU prologue and epilogue code */ 2115 static void tcg_target_qemu_prologue(TCGContext *s) 2116 { 2117 int i, stack_addend; 2118 2119 /* TB prologue */ 2120 2121 /* Reserve some stack space, also for TCG temps. */ 2122 stack_addend = FRAME_SIZE - PUSH_SIZE; 2123 tcg_set_frame(s, TCG_REG_CALL_STACK, TCG_STATIC_CALL_ARGS_SIZE, 2124 CPU_TEMP_BUF_NLONGS * sizeof(long)); 2125 2126 /* Save all callee saved registers. */ 2127 for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); i++) { 2128 tcg_out_push(s, tcg_target_callee_save_regs[i]); 2129 } 2130 2131 #if TCG_TARGET_REG_BITS == 32 2132 tcg_out_ld(s, TCG_TYPE_PTR, TCG_AREG0, TCG_REG_ESP, 2133 (ARRAY_SIZE(tcg_target_callee_save_regs) + 1) * 4); 2134 tcg_out_addi(s, TCG_REG_ESP, -stack_addend); 2135 /* jmp *tb. */ 2136 tcg_out_modrm_offset(s, OPC_GRP5, EXT5_JMPN_Ev, TCG_REG_ESP, 2137 (ARRAY_SIZE(tcg_target_callee_save_regs) + 2) * 4 2138 + stack_addend); 2139 #else 2140 tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]); 2141 tcg_out_addi(s, TCG_REG_ESP, -stack_addend); 2142 /* jmp *tb. */ 2143 tcg_out_modrm(s, OPC_GRP5, EXT5_JMPN_Ev, tcg_target_call_iarg_regs[1]); 2144 #endif 2145 2146 /* TB epilogue */ 2147 tb_ret_addr = s->code_ptr; 2148 2149 tcg_out_addi(s, TCG_REG_CALL_STACK, stack_addend); 2150 2151 for (i = ARRAY_SIZE(tcg_target_callee_save_regs) - 1; i >= 0; i--) { 2152 tcg_out_pop(s, tcg_target_callee_save_regs[i]); 2153 } 2154 tcg_out_opc(s, OPC_RET, 0, 0, 0); 2155 2156 #if !defined(CONFIG_SOFTMMU) 2157 /* Try to set up a segment register to point to GUEST_BASE. */ 2158 if (GUEST_BASE) { 2159 setup_guest_base_seg(); 2160 } 2161 #endif 2162 } 2163 2164 static void tcg_target_init(TCGContext *s) 2165 { 2166 /* For 32-bit, 99% certainty that we're running on hardware that supports 2167 cmov, but we still need to check. In case cmov is not available, we'll 2168 use a small forward branch. */ 2169 #ifndef have_cmov 2170 { 2171 unsigned a, b, c, d; 2172 have_cmov = (__get_cpuid(1, &a, &b, &c, &d) && (d & bit_CMOV)); 2173 } 2174 #endif 2175 2176 if (TCG_TARGET_REG_BITS == 64) { 2177 tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xffff); 2178 tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I64], 0, 0xffff); 2179 } else { 2180 tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xff); 2181 } 2182 2183 tcg_regset_clear(tcg_target_call_clobber_regs); 2184 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_EAX); 2185 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_EDX); 2186 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_ECX); 2187 if (TCG_TARGET_REG_BITS == 64) { 2188 #if !defined(_WIN64) 2189 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_RDI); 2190 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_RSI); 2191 #endif 2192 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R8); 2193 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R9); 2194 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R10); 2195 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R11); 2196 } 2197 2198 tcg_regset_clear(s->reserved_regs); 2199 tcg_regset_set_reg(s->reserved_regs, TCG_REG_CALL_STACK); 2200 2201 tcg_add_target_add_op_defs(x86_op_defs); 2202 } 2203 2204 typedef struct { 2205 DebugFrameCIE cie; 2206 DebugFrameFDEHeader fde; 2207 uint8_t fde_def_cfa[4]; 2208 uint8_t fde_reg_ofs[14]; 2209 } DebugFrame; 2210 2211 /* We're expecting a 2 byte uleb128 encoded value. */ 2212 QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14)); 2213 2214 #if !defined(__ELF__) 2215 /* Host machine without ELF. */ 2216 #elif TCG_TARGET_REG_BITS == 64 2217 #define ELF_HOST_MACHINE EM_X86_64 2218 static DebugFrame debug_frame = { 2219 .cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */ 2220 .cie.id = -1, 2221 .cie.version = 1, 2222 .cie.code_align = 1, 2223 .cie.data_align = 0x78, /* sleb128 -8 */ 2224 .cie.return_column = 16, 2225 2226 /* Total FDE size does not include the "len" member. */ 2227 .fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, fde.cie_offset), 2228 2229 .fde_def_cfa = { 2230 12, 7, /* DW_CFA_def_cfa %rsp, ... */ 2231 (FRAME_SIZE & 0x7f) | 0x80, /* ... uleb128 FRAME_SIZE */ 2232 (FRAME_SIZE >> 7) 2233 }, 2234 .fde_reg_ofs = { 2235 0x90, 1, /* DW_CFA_offset, %rip, -8 */ 2236 /* The following ordering must match tcg_target_callee_save_regs. */ 2237 0x86, 2, /* DW_CFA_offset, %rbp, -16 */ 2238 0x83, 3, /* DW_CFA_offset, %rbx, -24 */ 2239 0x8c, 4, /* DW_CFA_offset, %r12, -32 */ 2240 0x8d, 5, /* DW_CFA_offset, %r13, -40 */ 2241 0x8e, 6, /* DW_CFA_offset, %r14, -48 */ 2242 0x8f, 7, /* DW_CFA_offset, %r15, -56 */ 2243 } 2244 }; 2245 #else 2246 #define ELF_HOST_MACHINE EM_386 2247 static DebugFrame debug_frame = { 2248 .cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */ 2249 .cie.id = -1, 2250 .cie.version = 1, 2251 .cie.code_align = 1, 2252 .cie.data_align = 0x7c, /* sleb128 -4 */ 2253 .cie.return_column = 8, 2254 2255 /* Total FDE size does not include the "len" member. */ 2256 .fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, fde.cie_offset), 2257 2258 .fde_def_cfa = { 2259 12, 4, /* DW_CFA_def_cfa %esp, ... */ 2260 (FRAME_SIZE & 0x7f) | 0x80, /* ... uleb128 FRAME_SIZE */ 2261 (FRAME_SIZE >> 7) 2262 }, 2263 .fde_reg_ofs = { 2264 0x88, 1, /* DW_CFA_offset, %eip, -4 */ 2265 /* The following ordering must match tcg_target_callee_save_regs. */ 2266 0x85, 2, /* DW_CFA_offset, %ebp, -8 */ 2267 0x83, 3, /* DW_CFA_offset, %ebx, -12 */ 2268 0x86, 4, /* DW_CFA_offset, %esi, -16 */ 2269 0x87, 5, /* DW_CFA_offset, %edi, -20 */ 2270 } 2271 }; 2272 #endif 2273 2274 #if defined(ELF_HOST_MACHINE) 2275 void tcg_register_jit(void *buf, size_t buf_size) 2276 { 2277 debug_frame.fde.func_start = (uintptr_t)buf; 2278 debug_frame.fde.func_len = buf_size; 2279 2280 tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame)); 2281 } 2282 #endif 2283