1 /* 2 * Stack-less Just-In-Time compiler 3 * 4 * Copyright 2009-2012 Zoltan Herczeg (hzmester (at) freemail.hu). All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without modification, are 7 * permitted provided that the following conditions are met: 8 * 9 * 1. Redistributions of source code must retain the above copyright notice, this list of 10 * conditions and the following disclaimer. 11 * 12 * 2. Redistributions in binary form must reproduce the above copyright notice, this list 13 * of conditions and the following disclaimer in the documentation and/or other materials 14 * provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY 17 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT 19 * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED 21 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR 22 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 23 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 24 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 */ 26 27 SLJIT_API_FUNC_ATTRIBUTE const char* sljit_get_platform_name(void) 28 { 29 return "x86" SLJIT_CPUINFO; 30 } 31 32 /* 33 32b register indexes: 34 0 - EAX 35 1 - ECX 36 2 - EDX 37 3 - EBX 38 4 - none 39 5 - EBP 40 6 - ESI 41 7 - EDI 42 */ 43 44 /* 45 64b register indexes: 46 0 - RAX 47 1 - RCX 48 2 - RDX 49 3 - RBX 50 4 - none 51 5 - RBP 52 6 - RSI 53 7 - RDI 54 8 - R8 - From now on REX prefix is required 55 9 - R9 56 10 - R10 57 11 - R11 58 12 - R12 59 13 - R13 60 14 - R14 61 15 - R15 62 */ 63 64 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 65 66 /* Last register + 1. */ 67 #define TMP_REG1 (SLJIT_NUMBER_OF_REGISTERS + 2) 68 69 static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 3] = { 70 0, 0, 2, 1, 0, 0, 0, 0, 7, 6, 3, 4, 5 71 }; 72 73 #define CHECK_EXTRA_REGS(p, w, do) \ 74 if (p >= SLJIT_R3 && p <= SLJIT_R6) { \ 75 w = SLJIT_LOCALS_OFFSET + ((p) - (SLJIT_R3 + 4)) * sizeof(sljit_sw); \ 76 p = SLJIT_MEM1(SLJIT_SP); \ 77 do; \ 78 } 79 80 #else /* SLJIT_CONFIG_X86_32 */ 81 82 /* Last register + 1. */ 83 #define TMP_REG1 (SLJIT_NUMBER_OF_REGISTERS + 2) 84 #define TMP_REG2 (SLJIT_NUMBER_OF_REGISTERS + 3) 85 #define TMP_REG3 (SLJIT_NUMBER_OF_REGISTERS + 4) 86 87 /* Note: r12 & 0x7 == 0b100, which decoded as SIB byte present 88 Note: avoid to use r12 and r13 for memory addessing 89 therefore r12 is better for SAVED_EREG than SAVED_REG. */ 90 #ifndef _WIN64 91 /* 1st passed in rdi, 2nd argument passed in rsi, 3rd in rdx. */ 92 static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 5] = { 93 0, 0, 6, 1, 8, 11, 10, 12, 5, 13, 14, 15, 3, 4, 2, 7, 9 94 }; 95 /* low-map. reg_map & 0x7. */ 96 static const sljit_u8 reg_lmap[SLJIT_NUMBER_OF_REGISTERS + 5] = { 97 0, 0, 6, 1, 0, 3, 2, 4, 5, 5, 6, 7, 3, 4, 2, 7, 1 98 }; 99 #else 100 /* 1st passed in rcx, 2nd argument passed in rdx, 3rd in r8. */ 101 static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 5] = { 102 0, 0, 2, 1, 11, 12, 5, 13, 14, 15, 7, 6, 3, 4, 10, 8, 9 103 }; 104 /* low-map. reg_map & 0x7. */ 105 static const sljit_u8 reg_lmap[SLJIT_NUMBER_OF_REGISTERS + 5] = { 106 0, 0, 2, 1, 3, 4, 5, 5, 6, 7, 7, 6, 3, 4, 2, 0, 1 107 }; 108 #endif 109 110 #define REX_W 0x48 111 #define REX_R 0x44 112 #define REX_X 0x42 113 #define REX_B 0x41 114 #define REX 0x40 115 116 #ifndef _WIN64 117 #define HALFWORD_MAX 0x7fffffffl 118 #define HALFWORD_MIN -0x80000000l 119 #else 120 #define HALFWORD_MAX 0x7fffffffll 121 #define HALFWORD_MIN -0x80000000ll 122 #endif 123 124 #define IS_HALFWORD(x) ((x) <= HALFWORD_MAX && (x) >= HALFWORD_MIN) 125 #define NOT_HALFWORD(x) ((x) > HALFWORD_MAX || (x) < HALFWORD_MIN) 126 127 #define CHECK_EXTRA_REGS(p, w, do) 128 129 #endif /* SLJIT_CONFIG_X86_32 */ 130 131 #define TMP_FREG (0) 132 133 /* Size flags for emit_x86_instruction: */ 134 #define EX86_BIN_INS 0x0010 135 #define EX86_SHIFT_INS 0x0020 136 #define EX86_REX 0x0040 137 #define EX86_NO_REXW 0x0080 138 #define EX86_BYTE_ARG 0x0100 139 #define EX86_HALF_ARG 0x0200 140 #define EX86_PREF_66 0x0400 141 #define EX86_PREF_F2 0x0800 142 #define EX86_PREF_F3 0x1000 143 #define EX86_SSE2_OP1 0x2000 144 #define EX86_SSE2_OP2 0x4000 145 #define EX86_SSE2 (EX86_SSE2_OP1 | EX86_SSE2_OP2) 146 147 /* --------------------------------------------------------------------- */ 148 /* Instrucion forms */ 149 /* --------------------------------------------------------------------- */ 150 151 #define ADD (/* BINARY */ 0 << 3) 152 #define ADD_EAX_i32 0x05 153 #define ADD_r_rm 0x03 154 #define ADD_rm_r 0x01 155 #define ADDSD_x_xm 0x58 156 #define ADC (/* BINARY */ 2 << 3) 157 #define ADC_EAX_i32 0x15 158 #define ADC_r_rm 0x13 159 #define ADC_rm_r 0x11 160 #define AND (/* BINARY */ 4 << 3) 161 #define AND_EAX_i32 0x25 162 #define AND_r_rm 0x23 163 #define AND_rm_r 0x21 164 #define ANDPD_x_xm 0x54 165 #define BSR_r_rm (/* GROUP_0F */ 0xbd) 166 #define CALL_i32 0xe8 167 #define CALL_rm (/* GROUP_FF */ 2 << 3) 168 #define CDQ 0x99 169 #define CMOVNE_r_rm (/* GROUP_0F */ 0x45) 170 #define CMP (/* BINARY */ 7 << 3) 171 #define CMP_EAX_i32 0x3d 172 #define CMP_r_rm 0x3b 173 #define CMP_rm_r 0x39 174 #define CVTPD2PS_x_xm 0x5a 175 #define CVTSI2SD_x_rm 0x2a 176 #define CVTTSD2SI_r_xm 0x2c 177 #define DIV (/* GROUP_F7 */ 6 << 3) 178 #define DIVSD_x_xm 0x5e 179 #define INT3 0xcc 180 #define IDIV (/* GROUP_F7 */ 7 << 3) 181 #define IMUL (/* GROUP_F7 */ 5 << 3) 182 #define IMUL_r_rm (/* GROUP_0F */ 0xaf) 183 #define IMUL_r_rm_i8 0x6b 184 #define IMUL_r_rm_i32 0x69 185 #define JE_i8 0x74 186 #define JNE_i8 0x75 187 #define JMP_i8 0xeb 188 #define JMP_i32 0xe9 189 #define JMP_rm (/* GROUP_FF */ 4 << 3) 190 #define LEA_r_m 0x8d 191 #define MOV_r_rm 0x8b 192 #define MOV_r_i32 0xb8 193 #define MOV_rm_r 0x89 194 #define MOV_rm_i32 0xc7 195 #define MOV_rm8_i8 0xc6 196 #define MOV_rm8_r8 0x88 197 #define MOVSD_x_xm 0x10 198 #define MOVSD_xm_x 0x11 199 #define MOVSXD_r_rm 0x63 200 #define MOVSX_r_rm8 (/* GROUP_0F */ 0xbe) 201 #define MOVSX_r_rm16 (/* GROUP_0F */ 0xbf) 202 #define MOVZX_r_rm8 (/* GROUP_0F */ 0xb6) 203 #define MOVZX_r_rm16 (/* GROUP_0F */ 0xb7) 204 #define MUL (/* GROUP_F7 */ 4 << 3) 205 #define MULSD_x_xm 0x59 206 #define NEG_rm (/* GROUP_F7 */ 3 << 3) 207 #define NOP 0x90 208 #define NOT_rm (/* GROUP_F7 */ 2 << 3) 209 #define OR (/* BINARY */ 1 << 3) 210 #define OR_r_rm 0x0b 211 #define OR_EAX_i32 0x0d 212 #define OR_rm_r 0x09 213 #define OR_rm8_r8 0x08 214 #define POP_r 0x58 215 #define POP_rm 0x8f 216 #define POPF 0x9d 217 #define PUSH_i32 0x68 218 #define PUSH_r 0x50 219 #define PUSH_rm (/* GROUP_FF */ 6 << 3) 220 #define PUSHF 0x9c 221 #define RET_near 0xc3 222 #define RET_i16 0xc2 223 #define SBB (/* BINARY */ 3 << 3) 224 #define SBB_EAX_i32 0x1d 225 #define SBB_r_rm 0x1b 226 #define SBB_rm_r 0x19 227 #define SAR (/* SHIFT */ 7 << 3) 228 #define SHL (/* SHIFT */ 4 << 3) 229 #define SHR (/* SHIFT */ 5 << 3) 230 #define SUB (/* BINARY */ 5 << 3) 231 #define SUB_EAX_i32 0x2d 232 #define SUB_r_rm 0x2b 233 #define SUB_rm_r 0x29 234 #define SUBSD_x_xm 0x5c 235 #define TEST_EAX_i32 0xa9 236 #define TEST_rm_r 0x85 237 #define UCOMISD_x_xm 0x2e 238 #define UNPCKLPD_x_xm 0x14 239 #define XCHG_EAX_r 0x90 240 #define XCHG_r_rm 0x87 241 #define XOR (/* BINARY */ 6 << 3) 242 #define XOR_EAX_i32 0x35 243 #define XOR_r_rm 0x33 244 #define XOR_rm_r 0x31 245 #define XORPD_x_xm 0x57 246 247 #define GROUP_0F 0x0f 248 #define GROUP_F7 0xf7 249 #define GROUP_FF 0xff 250 #define GROUP_BINARY_81 0x81 251 #define GROUP_BINARY_83 0x83 252 #define GROUP_SHIFT_1 0xd1 253 #define GROUP_SHIFT_N 0xc1 254 #define GROUP_SHIFT_CL 0xd3 255 256 #define MOD_REG 0xc0 257 #define MOD_DISP8 0x40 258 259 #define INC_SIZE(s) (*inst++ = (s), compiler->size += (s)) 260 261 #define PUSH_REG(r) (*inst++ = (PUSH_r + (r))) 262 #define POP_REG(r) (*inst++ = (POP_r + (r))) 263 #define RET() (*inst++ = (RET_near)) 264 #define RET_I16(n) (*inst++ = (RET_i16), *inst++ = n, *inst++ = 0) 265 /* r32, r/m32 */ 266 #define MOV_RM(mod, reg, rm) (*inst++ = (MOV_r_rm), *inst++ = (mod) << 6 | (reg) << 3 | (rm)) 267 268 /* Multithreading does not affect these static variables, since they store 269 built-in CPU features. Therefore they can be overwritten by different threads 270 if they detect the CPU features in the same time. */ 271 #if (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2) 272 static sljit_s32 cpu_has_sse2 = -1; 273 #endif 274 static sljit_s32 cpu_has_cmov = -1; 275 276 #ifdef _WIN32_WCE 277 #include <cmnintrin.h> 278 #elif defined(_MSC_VER) && _MSC_VER >= 1400 279 #include <intrin.h> 280 #endif 281 282 /******************************************************/ 283 /* Unaligned-store functions */ 284 /******************************************************/ 285 286 static SLJIT_INLINE void sljit_unaligned_store_s16(void *addr, sljit_s16 value) 287 { 288 SLJIT_MEMCPY(addr, &value, sizeof(value)); 289 } 290 291 static SLJIT_INLINE void sljit_unaligned_store_s32(void *addr, sljit_s32 value) 292 { 293 SLJIT_MEMCPY(addr, &value, sizeof(value)); 294 } 295 296 static SLJIT_INLINE void sljit_unaligned_store_sw(void *addr, sljit_sw value) 297 { 298 SLJIT_MEMCPY(addr, &value, sizeof(value)); 299 } 300 301 /******************************************************/ 302 /* Utility functions */ 303 /******************************************************/ 304 305 static void get_cpu_features(void) 306 { 307 sljit_u32 features; 308 309 #if defined(_MSC_VER) && _MSC_VER >= 1400 310 311 int CPUInfo[4]; 312 __cpuid(CPUInfo, 1); 313 features = (sljit_u32)CPUInfo[3]; 314 315 #elif defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__SUNPRO_C) 316 317 /* AT&T syntax. */ 318 __asm__ ( 319 "movl $0x1, %%eax\n" 320 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 321 /* On x86-32, there is no red zone, so this 322 should work (no need for a local variable). */ 323 "push %%ebx\n" 324 #endif 325 "cpuid\n" 326 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 327 "pop %%ebx\n" 328 #endif 329 "movl %%edx, %0\n" 330 : "=g" (features) 331 : 332 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 333 : "%eax", "%ecx", "%edx" 334 #else 335 : "%rax", "%rbx", "%rcx", "%rdx" 336 #endif 337 ); 338 339 #else /* _MSC_VER && _MSC_VER >= 1400 */ 340 341 /* Intel syntax. */ 342 __asm { 343 mov eax, 1 344 cpuid 345 mov features, edx 346 } 347 348 #endif /* _MSC_VER && _MSC_VER >= 1400 */ 349 350 #if (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2) 351 cpu_has_sse2 = (features >> 26) & 0x1; 352 #endif 353 cpu_has_cmov = (features >> 15) & 0x1; 354 } 355 356 static sljit_u8 get_jump_code(sljit_s32 type) 357 { 358 switch (type) { 359 case SLJIT_EQUAL: 360 case SLJIT_EQUAL_F64: 361 return 0x84 /* je */; 362 363 case SLJIT_NOT_EQUAL: 364 case SLJIT_NOT_EQUAL_F64: 365 return 0x85 /* jne */; 366 367 case SLJIT_LESS: 368 case SLJIT_LESS_F64: 369 return 0x82 /* jc */; 370 371 case SLJIT_GREATER_EQUAL: 372 case SLJIT_GREATER_EQUAL_F64: 373 return 0x83 /* jae */; 374 375 case SLJIT_GREATER: 376 case SLJIT_GREATER_F64: 377 return 0x87 /* jnbe */; 378 379 case SLJIT_LESS_EQUAL: 380 case SLJIT_LESS_EQUAL_F64: 381 return 0x86 /* jbe */; 382 383 case SLJIT_SIG_LESS: 384 return 0x8c /* jl */; 385 386 case SLJIT_SIG_GREATER_EQUAL: 387 return 0x8d /* jnl */; 388 389 case SLJIT_SIG_GREATER: 390 return 0x8f /* jnle */; 391 392 case SLJIT_SIG_LESS_EQUAL: 393 return 0x8e /* jle */; 394 395 case SLJIT_OVERFLOW: 396 case SLJIT_MUL_OVERFLOW: 397 return 0x80 /* jo */; 398 399 case SLJIT_NOT_OVERFLOW: 400 case SLJIT_MUL_NOT_OVERFLOW: 401 return 0x81 /* jno */; 402 403 case SLJIT_UNORDERED_F64: 404 return 0x8a /* jp */; 405 406 case SLJIT_ORDERED_F64: 407 return 0x8b /* jpo */; 408 } 409 return 0; 410 } 411 412 static sljit_u8* generate_far_jump_code(struct sljit_jump *jump, sljit_u8 *code_ptr, sljit_s32 type); 413 414 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 415 static sljit_u8* generate_fixed_jump(sljit_u8 *code_ptr, sljit_sw addr, sljit_s32 type); 416 #endif 417 418 static sljit_u8* generate_near_jump_code(struct sljit_jump *jump, sljit_u8 *code_ptr, sljit_u8 *code, sljit_s32 type) 419 { 420 sljit_s32 short_jump; 421 sljit_uw label_addr; 422 423 if (jump->flags & JUMP_LABEL) 424 label_addr = (sljit_uw)(code + jump->u.label->size); 425 else 426 label_addr = jump->u.target; 427 short_jump = (sljit_sw)(label_addr - (jump->addr + 2)) >= -128 && (sljit_sw)(label_addr - (jump->addr + 2)) <= 127; 428 429 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 430 if ((sljit_sw)(label_addr - (jump->addr + 1)) > HALFWORD_MAX || (sljit_sw)(label_addr - (jump->addr + 1)) < HALFWORD_MIN) 431 return generate_far_jump_code(jump, code_ptr, type); 432 #endif 433 434 if (type == SLJIT_JUMP) { 435 if (short_jump) 436 *code_ptr++ = JMP_i8; 437 else 438 *code_ptr++ = JMP_i32; 439 jump->addr++; 440 } 441 else if (type >= SLJIT_FAST_CALL) { 442 short_jump = 0; 443 *code_ptr++ = CALL_i32; 444 jump->addr++; 445 } 446 else if (short_jump) { 447 *code_ptr++ = get_jump_code(type) - 0x10; 448 jump->addr++; 449 } 450 else { 451 *code_ptr++ = GROUP_0F; 452 *code_ptr++ = get_jump_code(type); 453 jump->addr += 2; 454 } 455 456 if (short_jump) { 457 jump->flags |= PATCH_MB; 458 code_ptr += sizeof(sljit_s8); 459 } else { 460 jump->flags |= PATCH_MW; 461 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 462 code_ptr += sizeof(sljit_sw); 463 #else 464 code_ptr += sizeof(sljit_s32); 465 #endif 466 } 467 468 return code_ptr; 469 } 470 471 SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler) 472 { 473 struct sljit_memory_fragment *buf; 474 sljit_u8 *code; 475 sljit_u8 *code_ptr; 476 sljit_u8 *buf_ptr; 477 sljit_u8 *buf_end; 478 sljit_u8 len; 479 480 struct sljit_label *label; 481 struct sljit_jump *jump; 482 struct sljit_const *const_; 483 484 CHECK_ERROR_PTR(); 485 CHECK_PTR(check_sljit_generate_code(compiler)); 486 reverse_buf(compiler); 487 488 /* Second code generation pass. */ 489 code = (sljit_u8*)SLJIT_MALLOC_EXEC(compiler->size); 490 PTR_FAIL_WITH_EXEC_IF(code); 491 buf = compiler->buf; 492 493 code_ptr = code; 494 label = compiler->labels; 495 jump = compiler->jumps; 496 const_ = compiler->consts; 497 do { 498 buf_ptr = buf->memory; 499 buf_end = buf_ptr + buf->used_size; 500 do { 501 len = *buf_ptr++; 502 if (len > 0) { 503 /* The code is already generated. */ 504 SLJIT_MEMCPY(code_ptr, buf_ptr, len); 505 code_ptr += len; 506 buf_ptr += len; 507 } 508 else { 509 if (*buf_ptr >= 4) { 510 jump->addr = (sljit_uw)code_ptr; 511 if (!(jump->flags & SLJIT_REWRITABLE_JUMP)) 512 code_ptr = generate_near_jump_code(jump, code_ptr, code, *buf_ptr - 4); 513 else 514 code_ptr = generate_far_jump_code(jump, code_ptr, *buf_ptr - 4); 515 jump = jump->next; 516 } 517 else if (*buf_ptr == 0) { 518 label->addr = (sljit_uw)code_ptr; 519 label->size = code_ptr - code; 520 label = label->next; 521 } 522 else if (*buf_ptr == 1) { 523 const_->addr = ((sljit_uw)code_ptr) - sizeof(sljit_sw); 524 const_ = const_->next; 525 } 526 else { 527 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 528 *code_ptr++ = (*buf_ptr == 2) ? CALL_i32 : JMP_i32; 529 buf_ptr++; 530 sljit_unaligned_store_sw(code_ptr, *(sljit_sw*)buf_ptr - ((sljit_sw)code_ptr + sizeof(sljit_sw))); 531 code_ptr += sizeof(sljit_sw); 532 buf_ptr += sizeof(sljit_sw) - 1; 533 #else 534 code_ptr = generate_fixed_jump(code_ptr, *(sljit_sw*)(buf_ptr + 1), *buf_ptr); 535 buf_ptr += sizeof(sljit_sw); 536 #endif 537 } 538 buf_ptr++; 539 } 540 } while (buf_ptr < buf_end); 541 SLJIT_ASSERT(buf_ptr == buf_end); 542 buf = buf->next; 543 } while (buf); 544 545 SLJIT_ASSERT(!label); 546 SLJIT_ASSERT(!jump); 547 SLJIT_ASSERT(!const_); 548 549 jump = compiler->jumps; 550 while (jump) { 551 if (jump->flags & PATCH_MB) { 552 SLJIT_ASSERT((sljit_sw)(jump->u.label->addr - (jump->addr + sizeof(sljit_s8))) >= -128 && (sljit_sw)(jump->u.label->addr - (jump->addr + sizeof(sljit_s8))) <= 127); 553 *(sljit_u8*)jump->addr = (sljit_u8)(jump->u.label->addr - (jump->addr + sizeof(sljit_s8))); 554 } else if (jump->flags & PATCH_MW) { 555 if (jump->flags & JUMP_LABEL) { 556 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 557 sljit_unaligned_store_sw((void*)jump->addr, (sljit_sw)(jump->u.label->addr - (jump->addr + sizeof(sljit_sw)))); 558 #else 559 SLJIT_ASSERT((sljit_sw)(jump->u.label->addr - (jump->addr + sizeof(sljit_s32))) >= HALFWORD_MIN && (sljit_sw)(jump->u.label->addr - (jump->addr + sizeof(sljit_s32))) <= HALFWORD_MAX); 560 sljit_unaligned_store_s32((void*)jump->addr, (sljit_s32)(jump->u.label->addr - (jump->addr + sizeof(sljit_s32)))); 561 #endif 562 } 563 else { 564 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 565 sljit_unaligned_store_sw((void*)jump->addr, (sljit_sw)(jump->u.target - (jump->addr + sizeof(sljit_sw)))); 566 #else 567 SLJIT_ASSERT((sljit_sw)(jump->u.target - (jump->addr + sizeof(sljit_s32))) >= HALFWORD_MIN && (sljit_sw)(jump->u.target - (jump->addr + sizeof(sljit_s32))) <= HALFWORD_MAX); 568 sljit_unaligned_store_s32((void*)jump->addr, (sljit_s32)(jump->u.target - (jump->addr + sizeof(sljit_s32)))); 569 #endif 570 } 571 } 572 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 573 else if (jump->flags & PATCH_MD) 574 sljit_unaligned_store_sw((void*)jump->addr, jump->u.label->addr); 575 #endif 576 577 jump = jump->next; 578 } 579 580 /* Maybe we waste some space because of short jumps. */ 581 SLJIT_ASSERT(code_ptr <= code + compiler->size); 582 compiler->error = SLJIT_ERR_COMPILED; 583 compiler->executable_size = code_ptr - code; 584 return (void*)code; 585 } 586 587 /* --------------------------------------------------------------------- */ 588 /* Operators */ 589 /* --------------------------------------------------------------------- */ 590 591 static sljit_s32 emit_cum_binary(struct sljit_compiler *compiler, 592 sljit_u8 op_rm, sljit_u8 op_mr, sljit_u8 op_imm, sljit_u8 op_eax_imm, 593 sljit_s32 dst, sljit_sw dstw, 594 sljit_s32 src1, sljit_sw src1w, 595 sljit_s32 src2, sljit_sw src2w); 596 597 static sljit_s32 emit_non_cum_binary(struct sljit_compiler *compiler, 598 sljit_u8 op_rm, sljit_u8 op_mr, sljit_u8 op_imm, sljit_u8 op_eax_imm, 599 sljit_s32 dst, sljit_sw dstw, 600 sljit_s32 src1, sljit_sw src1w, 601 sljit_s32 src2, sljit_sw src2w); 602 603 static sljit_s32 emit_mov(struct sljit_compiler *compiler, 604 sljit_s32 dst, sljit_sw dstw, 605 sljit_s32 src, sljit_sw srcw); 606 607 static SLJIT_INLINE sljit_s32 emit_save_flags(struct sljit_compiler *compiler) 608 { 609 sljit_u8 *inst; 610 611 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 612 inst = (sljit_u8*)ensure_buf(compiler, 1 + 5); 613 FAIL_IF(!inst); 614 INC_SIZE(5); 615 #else 616 inst = (sljit_u8*)ensure_buf(compiler, 1 + 6); 617 FAIL_IF(!inst); 618 INC_SIZE(6); 619 *inst++ = REX_W; 620 #endif 621 *inst++ = LEA_r_m; /* lea esp/rsp, [esp/rsp + sizeof(sljit_sw)] */ 622 *inst++ = 0x64; 623 *inst++ = 0x24; 624 *inst++ = (sljit_u8)sizeof(sljit_sw); 625 *inst++ = PUSHF; 626 compiler->flags_saved = 1; 627 return SLJIT_SUCCESS; 628 } 629 630 static SLJIT_INLINE sljit_s32 emit_restore_flags(struct sljit_compiler *compiler, sljit_s32 keep_flags) 631 { 632 sljit_u8 *inst; 633 634 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 635 inst = (sljit_u8*)ensure_buf(compiler, 1 + 5); 636 FAIL_IF(!inst); 637 INC_SIZE(5); 638 *inst++ = POPF; 639 #else 640 inst = (sljit_u8*)ensure_buf(compiler, 1 + 6); 641 FAIL_IF(!inst); 642 INC_SIZE(6); 643 *inst++ = POPF; 644 *inst++ = REX_W; 645 #endif 646 *inst++ = LEA_r_m; /* lea esp/rsp, [esp/rsp - sizeof(sljit_sw)] */ 647 *inst++ = 0x64; 648 *inst++ = 0x24; 649 *inst++ = (sljit_u8)(-(sljit_s8)sizeof(sljit_sw)); 650 compiler->flags_saved = keep_flags; 651 return SLJIT_SUCCESS; 652 } 653 654 #ifdef _WIN32 655 #include <malloc.h> 656 657 static void SLJIT_CALL sljit_grow_stack(sljit_sw local_size) 658 { 659 /* Workaround for calling the internal _chkstk() function on Windows. 660 This function touches all 4k pages belongs to the requested stack space, 661 which size is passed in local_size. This is necessary on Windows where 662 the stack can only grow in 4k steps. However, this function just burn 663 CPU cycles if the stack is large enough. However, you don't know it in 664 advance, so it must always be called. I think this is a bad design in 665 general even if it has some reasons. */ 666 *(volatile sljit_s32*)alloca(local_size) = 0; 667 } 668 669 #endif 670 671 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 672 #include "sljitNativeX86_32.c" 673 #else 674 #include "sljitNativeX86_64.c" 675 #endif 676 677 static sljit_s32 emit_mov(struct sljit_compiler *compiler, 678 sljit_s32 dst, sljit_sw dstw, 679 sljit_s32 src, sljit_sw srcw) 680 { 681 sljit_u8* inst; 682 683 if (dst == SLJIT_UNUSED) { 684 /* No destination, doesn't need to setup flags. */ 685 if (src & SLJIT_MEM) { 686 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src, srcw); 687 FAIL_IF(!inst); 688 *inst = MOV_r_rm; 689 } 690 return SLJIT_SUCCESS; 691 } 692 if (FAST_IS_REG(src)) { 693 inst = emit_x86_instruction(compiler, 1, src, 0, dst, dstw); 694 FAIL_IF(!inst); 695 *inst = MOV_rm_r; 696 return SLJIT_SUCCESS; 697 } 698 if (src & SLJIT_IMM) { 699 if (FAST_IS_REG(dst)) { 700 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 701 return emit_do_imm(compiler, MOV_r_i32 + reg_map[dst], srcw); 702 #else 703 if (!compiler->mode32) { 704 if (NOT_HALFWORD(srcw)) 705 return emit_load_imm64(compiler, dst, srcw); 706 } 707 else 708 return emit_do_imm32(compiler, (reg_map[dst] >= 8) ? REX_B : 0, MOV_r_i32 + reg_lmap[dst], srcw); 709 #endif 710 } 711 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 712 if (!compiler->mode32 && NOT_HALFWORD(srcw)) { 713 FAIL_IF(emit_load_imm64(compiler, TMP_REG2, srcw)); 714 inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, dst, dstw); 715 FAIL_IF(!inst); 716 *inst = MOV_rm_r; 717 return SLJIT_SUCCESS; 718 } 719 #endif 720 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, srcw, dst, dstw); 721 FAIL_IF(!inst); 722 *inst = MOV_rm_i32; 723 return SLJIT_SUCCESS; 724 } 725 if (FAST_IS_REG(dst)) { 726 inst = emit_x86_instruction(compiler, 1, dst, 0, src, srcw); 727 FAIL_IF(!inst); 728 *inst = MOV_r_rm; 729 return SLJIT_SUCCESS; 730 } 731 732 /* Memory to memory move. Requires two instruction. */ 733 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src, srcw); 734 FAIL_IF(!inst); 735 *inst = MOV_r_rm; 736 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw); 737 FAIL_IF(!inst); 738 *inst = MOV_rm_r; 739 return SLJIT_SUCCESS; 740 } 741 742 #define EMIT_MOV(compiler, dst, dstw, src, srcw) \ 743 FAIL_IF(emit_mov(compiler, dst, dstw, src, srcw)); 744 745 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compiler, sljit_s32 op) 746 { 747 sljit_u8 *inst; 748 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 749 sljit_s32 size; 750 #endif 751 752 CHECK_ERROR(); 753 CHECK(check_sljit_emit_op0(compiler, op)); 754 755 switch (GET_OPCODE(op)) { 756 case SLJIT_BREAKPOINT: 757 inst = (sljit_u8*)ensure_buf(compiler, 1 + 1); 758 FAIL_IF(!inst); 759 INC_SIZE(1); 760 *inst = INT3; 761 break; 762 case SLJIT_NOP: 763 inst = (sljit_u8*)ensure_buf(compiler, 1 + 1); 764 FAIL_IF(!inst); 765 INC_SIZE(1); 766 *inst = NOP; 767 break; 768 case SLJIT_LMUL_UW: 769 case SLJIT_LMUL_SW: 770 case SLJIT_DIVMOD_UW: 771 case SLJIT_DIVMOD_SW: 772 case SLJIT_DIV_UW: 773 case SLJIT_DIV_SW: 774 compiler->flags_saved = 0; 775 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 776 #ifdef _WIN64 777 SLJIT_COMPILE_ASSERT( 778 reg_map[SLJIT_R0] == 0 779 && reg_map[SLJIT_R1] == 2 780 && reg_map[TMP_REG1] > 7, 781 invalid_register_assignment_for_div_mul); 782 #else 783 SLJIT_COMPILE_ASSERT( 784 reg_map[SLJIT_R0] == 0 785 && reg_map[SLJIT_R1] < 7 786 && reg_map[TMP_REG1] == 2, 787 invalid_register_assignment_for_div_mul); 788 #endif 789 compiler->mode32 = op & SLJIT_I32_OP; 790 #endif 791 SLJIT_COMPILE_ASSERT((SLJIT_DIVMOD_UW & 0x2) == 0 && SLJIT_DIV_UW - 0x2 == SLJIT_DIVMOD_UW, bad_div_opcode_assignments); 792 793 op = GET_OPCODE(op); 794 if ((op | 0x2) == SLJIT_DIV_UW) { 795 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || defined(_WIN64) 796 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_R1, 0); 797 inst = emit_x86_instruction(compiler, 1, SLJIT_R1, 0, SLJIT_R1, 0); 798 #else 799 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, TMP_REG1, 0); 800 #endif 801 FAIL_IF(!inst); 802 *inst = XOR_r_rm; 803 } 804 805 if ((op | 0x2) == SLJIT_DIV_SW) { 806 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || defined(_WIN64) 807 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_R1, 0); 808 #endif 809 810 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 811 inst = (sljit_u8*)ensure_buf(compiler, 1 + 1); 812 FAIL_IF(!inst); 813 INC_SIZE(1); 814 *inst = CDQ; 815 #else 816 if (compiler->mode32) { 817 inst = (sljit_u8*)ensure_buf(compiler, 1 + 1); 818 FAIL_IF(!inst); 819 INC_SIZE(1); 820 *inst = CDQ; 821 } else { 822 inst = (sljit_u8*)ensure_buf(compiler, 1 + 2); 823 FAIL_IF(!inst); 824 INC_SIZE(2); 825 *inst++ = REX_W; 826 *inst = CDQ; 827 } 828 #endif 829 } 830 831 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 832 inst = (sljit_u8*)ensure_buf(compiler, 1 + 2); 833 FAIL_IF(!inst); 834 INC_SIZE(2); 835 *inst++ = GROUP_F7; 836 *inst = MOD_REG | ((op >= SLJIT_DIVMOD_UW) ? reg_map[TMP_REG1] : reg_map[SLJIT_R1]); 837 #else 838 #ifdef _WIN64 839 size = (!compiler->mode32 || op >= SLJIT_DIVMOD_UW) ? 3 : 2; 840 #else 841 size = (!compiler->mode32) ? 3 : 2; 842 #endif 843 inst = (sljit_u8*)ensure_buf(compiler, 1 + size); 844 FAIL_IF(!inst); 845 INC_SIZE(size); 846 #ifdef _WIN64 847 if (!compiler->mode32) 848 *inst++ = REX_W | ((op >= SLJIT_DIVMOD_UW) ? REX_B : 0); 849 else if (op >= SLJIT_DIVMOD_UW) 850 *inst++ = REX_B; 851 *inst++ = GROUP_F7; 852 *inst = MOD_REG | ((op >= SLJIT_DIVMOD_UW) ? reg_lmap[TMP_REG1] : reg_lmap[SLJIT_R1]); 853 #else 854 if (!compiler->mode32) 855 *inst++ = REX_W; 856 *inst++ = GROUP_F7; 857 *inst = MOD_REG | reg_map[SLJIT_R1]; 858 #endif 859 #endif 860 switch (op) { 861 case SLJIT_LMUL_UW: 862 *inst |= MUL; 863 break; 864 case SLJIT_LMUL_SW: 865 *inst |= IMUL; 866 break; 867 case SLJIT_DIVMOD_UW: 868 case SLJIT_DIV_UW: 869 *inst |= DIV; 870 break; 871 case SLJIT_DIVMOD_SW: 872 case SLJIT_DIV_SW: 873 *inst |= IDIV; 874 break; 875 } 876 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) && !defined(_WIN64) 877 if (op <= SLJIT_DIVMOD_SW) 878 EMIT_MOV(compiler, SLJIT_R1, 0, TMP_REG1, 0); 879 #else 880 if (op >= SLJIT_DIV_UW) 881 EMIT_MOV(compiler, SLJIT_R1, 0, TMP_REG1, 0); 882 #endif 883 break; 884 } 885 886 return SLJIT_SUCCESS; 887 } 888 889 #define ENCODE_PREFIX(prefix) \ 890 do { \ 891 inst = (sljit_u8*)ensure_buf(compiler, 1 + 1); \ 892 FAIL_IF(!inst); \ 893 INC_SIZE(1); \ 894 *inst = (prefix); \ 895 } while (0) 896 897 static sljit_s32 emit_mov_byte(struct sljit_compiler *compiler, sljit_s32 sign, 898 sljit_s32 dst, sljit_sw dstw, 899 sljit_s32 src, sljit_sw srcw) 900 { 901 sljit_u8* inst; 902 sljit_s32 dst_r; 903 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 904 sljit_s32 work_r; 905 #endif 906 907 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 908 compiler->mode32 = 0; 909 #endif 910 911 if (dst == SLJIT_UNUSED && !(src & SLJIT_MEM)) 912 return SLJIT_SUCCESS; /* Empty instruction. */ 913 914 if (src & SLJIT_IMM) { 915 if (FAST_IS_REG(dst)) { 916 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 917 return emit_do_imm(compiler, MOV_r_i32 + reg_map[dst], srcw); 918 #else 919 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, srcw, dst, 0); 920 FAIL_IF(!inst); 921 *inst = MOV_rm_i32; 922 return SLJIT_SUCCESS; 923 #endif 924 } 925 inst = emit_x86_instruction(compiler, 1 | EX86_BYTE_ARG | EX86_NO_REXW, SLJIT_IMM, srcw, dst, dstw); 926 FAIL_IF(!inst); 927 *inst = MOV_rm8_i8; 928 return SLJIT_SUCCESS; 929 } 930 931 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1; 932 933 if ((dst & SLJIT_MEM) && FAST_IS_REG(src)) { 934 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 935 if (reg_map[src] >= 4) { 936 SLJIT_ASSERT(dst_r == TMP_REG1); 937 EMIT_MOV(compiler, TMP_REG1, 0, src, 0); 938 } else 939 dst_r = src; 940 #else 941 dst_r = src; 942 #endif 943 } 944 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 945 else if (FAST_IS_REG(src) && reg_map[src] >= 4) { 946 /* src, dst are registers. */ 947 SLJIT_ASSERT(SLOW_IS_REG(dst)); 948 if (reg_map[dst] < 4) { 949 if (dst != src) 950 EMIT_MOV(compiler, dst, 0, src, 0); 951 inst = emit_x86_instruction(compiler, 2, dst, 0, dst, 0); 952 FAIL_IF(!inst); 953 *inst++ = GROUP_0F; 954 *inst = sign ? MOVSX_r_rm8 : MOVZX_r_rm8; 955 } 956 else { 957 if (dst != src) 958 EMIT_MOV(compiler, dst, 0, src, 0); 959 if (sign) { 960 /* shl reg, 24 */ 961 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 24, dst, 0); 962 FAIL_IF(!inst); 963 *inst |= SHL; 964 /* sar reg, 24 */ 965 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 24, dst, 0); 966 FAIL_IF(!inst); 967 *inst |= SAR; 968 } 969 else { 970 inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, 0xff, dst, 0); 971 FAIL_IF(!inst); 972 *(inst + 1) |= AND; 973 } 974 } 975 return SLJIT_SUCCESS; 976 } 977 #endif 978 else { 979 /* src can be memory addr or reg_map[src] < 4 on x86_32 architectures. */ 980 inst = emit_x86_instruction(compiler, 2, dst_r, 0, src, srcw); 981 FAIL_IF(!inst); 982 *inst++ = GROUP_0F; 983 *inst = sign ? MOVSX_r_rm8 : MOVZX_r_rm8; 984 } 985 986 if (dst & SLJIT_MEM) { 987 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 988 if (dst_r == TMP_REG1) { 989 /* Find a non-used register, whose reg_map[src] < 4. */ 990 if ((dst & REG_MASK) == SLJIT_R0) { 991 if ((dst & OFFS_REG_MASK) == TO_OFFS_REG(SLJIT_R1)) 992 work_r = SLJIT_R2; 993 else 994 work_r = SLJIT_R1; 995 } 996 else { 997 if ((dst & OFFS_REG_MASK) != TO_OFFS_REG(SLJIT_R0)) 998 work_r = SLJIT_R0; 999 else if ((dst & REG_MASK) == SLJIT_R1) 1000 work_r = SLJIT_R2; 1001 else 1002 work_r = SLJIT_R1; 1003 } 1004 1005 if (work_r == SLJIT_R0) { 1006 ENCODE_PREFIX(XCHG_EAX_r + reg_map[TMP_REG1]); 1007 } 1008 else { 1009 inst = emit_x86_instruction(compiler, 1, work_r, 0, dst_r, 0); 1010 FAIL_IF(!inst); 1011 *inst = XCHG_r_rm; 1012 } 1013 1014 inst = emit_x86_instruction(compiler, 1, work_r, 0, dst, dstw); 1015 FAIL_IF(!inst); 1016 *inst = MOV_rm8_r8; 1017 1018 if (work_r == SLJIT_R0) { 1019 ENCODE_PREFIX(XCHG_EAX_r + reg_map[TMP_REG1]); 1020 } 1021 else { 1022 inst = emit_x86_instruction(compiler, 1, work_r, 0, dst_r, 0); 1023 FAIL_IF(!inst); 1024 *inst = XCHG_r_rm; 1025 } 1026 } 1027 else { 1028 inst = emit_x86_instruction(compiler, 1, dst_r, 0, dst, dstw); 1029 FAIL_IF(!inst); 1030 *inst = MOV_rm8_r8; 1031 } 1032 #else 1033 inst = emit_x86_instruction(compiler, 1 | EX86_REX | EX86_NO_REXW, dst_r, 0, dst, dstw); 1034 FAIL_IF(!inst); 1035 *inst = MOV_rm8_r8; 1036 #endif 1037 } 1038 1039 return SLJIT_SUCCESS; 1040 } 1041 1042 static sljit_s32 emit_mov_half(struct sljit_compiler *compiler, sljit_s32 sign, 1043 sljit_s32 dst, sljit_sw dstw, 1044 sljit_s32 src, sljit_sw srcw) 1045 { 1046 sljit_u8* inst; 1047 sljit_s32 dst_r; 1048 1049 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 1050 compiler->mode32 = 0; 1051 #endif 1052 1053 if (dst == SLJIT_UNUSED && !(src & SLJIT_MEM)) 1054 return SLJIT_SUCCESS; /* Empty instruction. */ 1055 1056 if (src & SLJIT_IMM) { 1057 if (FAST_IS_REG(dst)) { 1058 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 1059 return emit_do_imm(compiler, MOV_r_i32 + reg_map[dst], srcw); 1060 #else 1061 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, srcw, dst, 0); 1062 FAIL_IF(!inst); 1063 *inst = MOV_rm_i32; 1064 return SLJIT_SUCCESS; 1065 #endif 1066 } 1067 inst = emit_x86_instruction(compiler, 1 | EX86_HALF_ARG | EX86_NO_REXW | EX86_PREF_66, SLJIT_IMM, srcw, dst, dstw); 1068 FAIL_IF(!inst); 1069 *inst = MOV_rm_i32; 1070 return SLJIT_SUCCESS; 1071 } 1072 1073 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1; 1074 1075 if ((dst & SLJIT_MEM) && FAST_IS_REG(src)) 1076 dst_r = src; 1077 else { 1078 inst = emit_x86_instruction(compiler, 2, dst_r, 0, src, srcw); 1079 FAIL_IF(!inst); 1080 *inst++ = GROUP_0F; 1081 *inst = sign ? MOVSX_r_rm16 : MOVZX_r_rm16; 1082 } 1083 1084 if (dst & SLJIT_MEM) { 1085 inst = emit_x86_instruction(compiler, 1 | EX86_NO_REXW | EX86_PREF_66, dst_r, 0, dst, dstw); 1086 FAIL_IF(!inst); 1087 *inst = MOV_rm_r; 1088 } 1089 1090 return SLJIT_SUCCESS; 1091 } 1092 1093 static sljit_s32 emit_unary(struct sljit_compiler *compiler, sljit_u8 opcode, 1094 sljit_s32 dst, sljit_sw dstw, 1095 sljit_s32 src, sljit_sw srcw) 1096 { 1097 sljit_u8* inst; 1098 1099 if (dst == SLJIT_UNUSED) { 1100 EMIT_MOV(compiler, TMP_REG1, 0, src, srcw); 1101 inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REG1, 0); 1102 FAIL_IF(!inst); 1103 *inst++ = GROUP_F7; 1104 *inst |= opcode; 1105 return SLJIT_SUCCESS; 1106 } 1107 if (dst == src && dstw == srcw) { 1108 /* Same input and output */ 1109 inst = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw); 1110 FAIL_IF(!inst); 1111 *inst++ = GROUP_F7; 1112 *inst |= opcode; 1113 return SLJIT_SUCCESS; 1114 } 1115 if (FAST_IS_REG(dst)) { 1116 EMIT_MOV(compiler, dst, 0, src, srcw); 1117 inst = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw); 1118 FAIL_IF(!inst); 1119 *inst++ = GROUP_F7; 1120 *inst |= opcode; 1121 return SLJIT_SUCCESS; 1122 } 1123 EMIT_MOV(compiler, TMP_REG1, 0, src, srcw); 1124 inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REG1, 0); 1125 FAIL_IF(!inst); 1126 *inst++ = GROUP_F7; 1127 *inst |= opcode; 1128 EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0); 1129 return SLJIT_SUCCESS; 1130 } 1131 1132 static sljit_s32 emit_not_with_flags(struct sljit_compiler *compiler, 1133 sljit_s32 dst, sljit_sw dstw, 1134 sljit_s32 src, sljit_sw srcw) 1135 { 1136 sljit_u8* inst; 1137 1138 if (dst == SLJIT_UNUSED) { 1139 EMIT_MOV(compiler, TMP_REG1, 0, src, srcw); 1140 inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REG1, 0); 1141 FAIL_IF(!inst); 1142 *inst++ = GROUP_F7; 1143 *inst |= NOT_rm; 1144 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, TMP_REG1, 0); 1145 FAIL_IF(!inst); 1146 *inst = OR_r_rm; 1147 return SLJIT_SUCCESS; 1148 } 1149 if (FAST_IS_REG(dst)) { 1150 EMIT_MOV(compiler, dst, 0, src, srcw); 1151 inst = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw); 1152 FAIL_IF(!inst); 1153 *inst++ = GROUP_F7; 1154 *inst |= NOT_rm; 1155 inst = emit_x86_instruction(compiler, 1, dst, 0, dst, 0); 1156 FAIL_IF(!inst); 1157 *inst = OR_r_rm; 1158 return SLJIT_SUCCESS; 1159 } 1160 EMIT_MOV(compiler, TMP_REG1, 0, src, srcw); 1161 inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REG1, 0); 1162 FAIL_IF(!inst); 1163 *inst++ = GROUP_F7; 1164 *inst |= NOT_rm; 1165 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, TMP_REG1, 0); 1166 FAIL_IF(!inst); 1167 *inst = OR_r_rm; 1168 EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0); 1169 return SLJIT_SUCCESS; 1170 } 1171 1172 static sljit_s32 emit_clz(struct sljit_compiler *compiler, sljit_s32 op_flags, 1173 sljit_s32 dst, sljit_sw dstw, 1174 sljit_s32 src, sljit_sw srcw) 1175 { 1176 sljit_u8* inst; 1177 sljit_s32 dst_r; 1178 1179 SLJIT_UNUSED_ARG(op_flags); 1180 if (SLJIT_UNLIKELY(dst == SLJIT_UNUSED)) { 1181 /* Just set the zero flag. */ 1182 EMIT_MOV(compiler, TMP_REG1, 0, src, srcw); 1183 inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REG1, 0); 1184 FAIL_IF(!inst); 1185 *inst++ = GROUP_F7; 1186 *inst |= NOT_rm; 1187 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 1188 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 31, TMP_REG1, 0); 1189 #else 1190 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, !(op_flags & SLJIT_I32_OP) ? 63 : 31, TMP_REG1, 0); 1191 #endif 1192 FAIL_IF(!inst); 1193 *inst |= SHR; 1194 return SLJIT_SUCCESS; 1195 } 1196 1197 if (SLJIT_UNLIKELY(src & SLJIT_IMM)) { 1198 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, srcw); 1199 src = TMP_REG1; 1200 srcw = 0; 1201 } 1202 1203 inst = emit_x86_instruction(compiler, 2, TMP_REG1, 0, src, srcw); 1204 FAIL_IF(!inst); 1205 *inst++ = GROUP_0F; 1206 *inst = BSR_r_rm; 1207 1208 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 1209 if (FAST_IS_REG(dst)) 1210 dst_r = dst; 1211 else { 1212 /* Find an unused temporary register. */ 1213 if ((dst & REG_MASK) != SLJIT_R0 && (dst & OFFS_REG_MASK) != TO_OFFS_REG(SLJIT_R0)) 1214 dst_r = SLJIT_R0; 1215 else if ((dst & REG_MASK) != SLJIT_R1 && (dst & OFFS_REG_MASK) != TO_OFFS_REG(SLJIT_R1)) 1216 dst_r = SLJIT_R1; 1217 else 1218 dst_r = SLJIT_R2; 1219 EMIT_MOV(compiler, dst, dstw, dst_r, 0); 1220 } 1221 EMIT_MOV(compiler, dst_r, 0, SLJIT_IMM, 32 + 31); 1222 #else 1223 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2; 1224 compiler->mode32 = 0; 1225 EMIT_MOV(compiler, dst_r, 0, SLJIT_IMM, !(op_flags & SLJIT_I32_OP) ? 64 + 63 : 32 + 31); 1226 compiler->mode32 = op_flags & SLJIT_I32_OP; 1227 #endif 1228 1229 if (cpu_has_cmov == -1) 1230 get_cpu_features(); 1231 1232 if (cpu_has_cmov) { 1233 inst = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG1, 0); 1234 FAIL_IF(!inst); 1235 *inst++ = GROUP_0F; 1236 *inst = CMOVNE_r_rm; 1237 } else { 1238 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 1239 inst = (sljit_u8*)ensure_buf(compiler, 1 + 4); 1240 FAIL_IF(!inst); 1241 INC_SIZE(4); 1242 1243 *inst++ = JE_i8; 1244 *inst++ = 2; 1245 *inst++ = MOV_r_rm; 1246 *inst++ = MOD_REG | (reg_map[dst_r] << 3) | reg_map[TMP_REG1]; 1247 #else 1248 inst = (sljit_u8*)ensure_buf(compiler, 1 + 5); 1249 FAIL_IF(!inst); 1250 INC_SIZE(5); 1251 1252 *inst++ = JE_i8; 1253 *inst++ = 3; 1254 *inst++ = REX_W | (reg_map[dst_r] >= 8 ? REX_R : 0) | (reg_map[TMP_REG1] >= 8 ? REX_B : 0); 1255 *inst++ = MOV_r_rm; 1256 *inst++ = MOD_REG | (reg_lmap[dst_r] << 3) | reg_lmap[TMP_REG1]; 1257 #endif 1258 } 1259 1260 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 1261 inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, 31, dst_r, 0); 1262 #else 1263 inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, !(op_flags & SLJIT_I32_OP) ? 63 : 31, dst_r, 0); 1264 #endif 1265 FAIL_IF(!inst); 1266 *(inst + 1) |= XOR; 1267 1268 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 1269 if (dst & SLJIT_MEM) { 1270 inst = emit_x86_instruction(compiler, 1, dst_r, 0, dst, dstw); 1271 FAIL_IF(!inst); 1272 *inst = XCHG_r_rm; 1273 } 1274 #else 1275 if (dst & SLJIT_MEM) 1276 EMIT_MOV(compiler, dst, dstw, TMP_REG2, 0); 1277 #endif 1278 return SLJIT_SUCCESS; 1279 } 1280 1281 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compiler, sljit_s32 op, 1282 sljit_s32 dst, sljit_sw dstw, 1283 sljit_s32 src, sljit_sw srcw) 1284 { 1285 sljit_u8* inst; 1286 sljit_s32 update = 0; 1287 sljit_s32 op_flags = GET_ALL_FLAGS(op); 1288 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 1289 sljit_s32 dst_is_ereg = 0; 1290 sljit_s32 src_is_ereg = 0; 1291 #else 1292 # define src_is_ereg 0 1293 #endif 1294 1295 CHECK_ERROR(); 1296 CHECK(check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw)); 1297 ADJUST_LOCAL_OFFSET(dst, dstw); 1298 ADJUST_LOCAL_OFFSET(src, srcw); 1299 1300 CHECK_EXTRA_REGS(dst, dstw, dst_is_ereg = 1); 1301 CHECK_EXTRA_REGS(src, srcw, src_is_ereg = 1); 1302 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 1303 compiler->mode32 = op_flags & SLJIT_I32_OP; 1304 #endif 1305 1306 op = GET_OPCODE(op); 1307 if (op >= SLJIT_MOV && op <= SLJIT_MOVU_P) { 1308 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 1309 compiler->mode32 = 0; 1310 #endif 1311 1312 if (op_flags & SLJIT_I32_OP) { 1313 if (FAST_IS_REG(src) && src == dst) { 1314 if (!TYPE_CAST_NEEDED(op)) 1315 return SLJIT_SUCCESS; 1316 } 1317 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 1318 if (op == SLJIT_MOV_S32 && (src & SLJIT_MEM)) 1319 op = SLJIT_MOV_U32; 1320 if (op == SLJIT_MOVU_S32 && (src & SLJIT_MEM)) 1321 op = SLJIT_MOVU_U32; 1322 if (op == SLJIT_MOV_U32 && (src & SLJIT_IMM)) 1323 op = SLJIT_MOV_S32; 1324 if (op == SLJIT_MOVU_U32 && (src & SLJIT_IMM)) 1325 op = SLJIT_MOVU_S32; 1326 #endif 1327 } 1328 1329 SLJIT_COMPILE_ASSERT(SLJIT_MOV + 8 == SLJIT_MOVU, movu_offset); 1330 if (op >= SLJIT_MOVU) { 1331 update = 1; 1332 op -= 8; 1333 } 1334 1335 if (src & SLJIT_IMM) { 1336 switch (op) { 1337 case SLJIT_MOV_U8: 1338 srcw = (sljit_u8)srcw; 1339 break; 1340 case SLJIT_MOV_S8: 1341 srcw = (sljit_s8)srcw; 1342 break; 1343 case SLJIT_MOV_U16: 1344 srcw = (sljit_u16)srcw; 1345 break; 1346 case SLJIT_MOV_S16: 1347 srcw = (sljit_s16)srcw; 1348 break; 1349 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 1350 case SLJIT_MOV_U32: 1351 srcw = (sljit_u32)srcw; 1352 break; 1353 case SLJIT_MOV_S32: 1354 srcw = (sljit_s32)srcw; 1355 break; 1356 #endif 1357 } 1358 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 1359 if (SLJIT_UNLIKELY(dst_is_ereg)) 1360 return emit_mov(compiler, dst, dstw, src, srcw); 1361 #endif 1362 } 1363 1364 if (SLJIT_UNLIKELY(update) && (src & SLJIT_MEM) && !src_is_ereg && (src & REG_MASK) && (srcw != 0 || (src & OFFS_REG_MASK) != 0)) { 1365 inst = emit_x86_instruction(compiler, 1, src & REG_MASK, 0, src, srcw); 1366 FAIL_IF(!inst); 1367 *inst = LEA_r_m; 1368 src &= SLJIT_MEM | 0xf; 1369 srcw = 0; 1370 } 1371 1372 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 1373 if (SLJIT_UNLIKELY(dst_is_ereg) && (!(op == SLJIT_MOV || op == SLJIT_MOV_U32 || op == SLJIT_MOV_S32 || op == SLJIT_MOV_P) || (src & SLJIT_MEM))) { 1374 SLJIT_ASSERT(dst == SLJIT_MEM1(SLJIT_SP)); 1375 dst = TMP_REG1; 1376 } 1377 #endif 1378 1379 switch (op) { 1380 case SLJIT_MOV: 1381 case SLJIT_MOV_P: 1382 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 1383 case SLJIT_MOV_U32: 1384 case SLJIT_MOV_S32: 1385 #endif 1386 FAIL_IF(emit_mov(compiler, dst, dstw, src, srcw)); 1387 break; 1388 case SLJIT_MOV_U8: 1389 FAIL_IF(emit_mov_byte(compiler, 0, dst, dstw, src, srcw)); 1390 break; 1391 case SLJIT_MOV_S8: 1392 FAIL_IF(emit_mov_byte(compiler, 1, dst, dstw, src, srcw)); 1393 break; 1394 case SLJIT_MOV_U16: 1395 FAIL_IF(emit_mov_half(compiler, 0, dst, dstw, src, srcw)); 1396 break; 1397 case SLJIT_MOV_S16: 1398 FAIL_IF(emit_mov_half(compiler, 1, dst, dstw, src, srcw)); 1399 break; 1400 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 1401 case SLJIT_MOV_U32: 1402 FAIL_IF(emit_mov_int(compiler, 0, dst, dstw, src, srcw)); 1403 break; 1404 case SLJIT_MOV_S32: 1405 FAIL_IF(emit_mov_int(compiler, 1, dst, dstw, src, srcw)); 1406 break; 1407 #endif 1408 } 1409 1410 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 1411 if (SLJIT_UNLIKELY(dst_is_ereg) && dst == TMP_REG1) 1412 return emit_mov(compiler, SLJIT_MEM1(SLJIT_SP), dstw, TMP_REG1, 0); 1413 #endif 1414 1415 if (SLJIT_UNLIKELY(update) && (dst & SLJIT_MEM) && (dst & REG_MASK) && (dstw != 0 || (dst & OFFS_REG_MASK) != 0)) { 1416 inst = emit_x86_instruction(compiler, 1, dst & REG_MASK, 0, dst, dstw); 1417 FAIL_IF(!inst); 1418 *inst = LEA_r_m; 1419 } 1420 return SLJIT_SUCCESS; 1421 } 1422 1423 if (SLJIT_UNLIKELY(GET_FLAGS(op_flags))) 1424 compiler->flags_saved = 0; 1425 1426 switch (op) { 1427 case SLJIT_NOT: 1428 if (SLJIT_UNLIKELY(op_flags & SLJIT_SET_E)) 1429 return emit_not_with_flags(compiler, dst, dstw, src, srcw); 1430 return emit_unary(compiler, NOT_rm, dst, dstw, src, srcw); 1431 1432 case SLJIT_NEG: 1433 if (SLJIT_UNLIKELY(op_flags & SLJIT_KEEP_FLAGS) && !compiler->flags_saved) 1434 FAIL_IF(emit_save_flags(compiler)); 1435 return emit_unary(compiler, NEG_rm, dst, dstw, src, srcw); 1436 1437 case SLJIT_CLZ: 1438 if (SLJIT_UNLIKELY(op_flags & SLJIT_KEEP_FLAGS) && !compiler->flags_saved) 1439 FAIL_IF(emit_save_flags(compiler)); 1440 return emit_clz(compiler, op_flags, dst, dstw, src, srcw); 1441 } 1442 1443 return SLJIT_SUCCESS; 1444 1445 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 1446 # undef src_is_ereg 1447 #endif 1448 } 1449 1450 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 1451 1452 #define BINARY_IMM(op_imm, op_mr, immw, arg, argw) \ 1453 if (IS_HALFWORD(immw) || compiler->mode32) { \ 1454 inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, immw, arg, argw); \ 1455 FAIL_IF(!inst); \ 1456 *(inst + 1) |= (op_imm); \ 1457 } \ 1458 else { \ 1459 FAIL_IF(emit_load_imm64(compiler, TMP_REG2, immw)); \ 1460 inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, arg, argw); \ 1461 FAIL_IF(!inst); \ 1462 *inst = (op_mr); \ 1463 } 1464 1465 #define BINARY_EAX_IMM(op_eax_imm, immw) \ 1466 FAIL_IF(emit_do_imm32(compiler, (!compiler->mode32) ? REX_W : 0, (op_eax_imm), immw)) 1467 1468 #else 1469 1470 #define BINARY_IMM(op_imm, op_mr, immw, arg, argw) \ 1471 inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, immw, arg, argw); \ 1472 FAIL_IF(!inst); \ 1473 *(inst + 1) |= (op_imm); 1474 1475 #define BINARY_EAX_IMM(op_eax_imm, immw) \ 1476 FAIL_IF(emit_do_imm(compiler, (op_eax_imm), immw)) 1477 1478 #endif 1479 1480 static sljit_s32 emit_cum_binary(struct sljit_compiler *compiler, 1481 sljit_u8 op_rm, sljit_u8 op_mr, sljit_u8 op_imm, sljit_u8 op_eax_imm, 1482 sljit_s32 dst, sljit_sw dstw, 1483 sljit_s32 src1, sljit_sw src1w, 1484 sljit_s32 src2, sljit_sw src2w) 1485 { 1486 sljit_u8* inst; 1487 1488 if (dst == SLJIT_UNUSED) { 1489 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w); 1490 if (src2 & SLJIT_IMM) { 1491 BINARY_IMM(op_imm, op_mr, src2w, TMP_REG1, 0); 1492 } 1493 else { 1494 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w); 1495 FAIL_IF(!inst); 1496 *inst = op_rm; 1497 } 1498 return SLJIT_SUCCESS; 1499 } 1500 1501 if (dst == src1 && dstw == src1w) { 1502 if (src2 & SLJIT_IMM) { 1503 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 1504 if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) { 1505 #else 1506 if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128)) { 1507 #endif 1508 BINARY_EAX_IMM(op_eax_imm, src2w); 1509 } 1510 else { 1511 BINARY_IMM(op_imm, op_mr, src2w, dst, dstw); 1512 } 1513 } 1514 else if (FAST_IS_REG(dst)) { 1515 inst = emit_x86_instruction(compiler, 1, dst, dstw, src2, src2w); 1516 FAIL_IF(!inst); 1517 *inst = op_rm; 1518 } 1519 else if (FAST_IS_REG(src2)) { 1520 /* Special exception for sljit_emit_op_flags. */ 1521 inst = emit_x86_instruction(compiler, 1, src2, src2w, dst, dstw); 1522 FAIL_IF(!inst); 1523 *inst = op_mr; 1524 } 1525 else { 1526 EMIT_MOV(compiler, TMP_REG1, 0, src2, src2w); 1527 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw); 1528 FAIL_IF(!inst); 1529 *inst = op_mr; 1530 } 1531 return SLJIT_SUCCESS; 1532 } 1533 1534 /* Only for cumulative operations. */ 1535 if (dst == src2 && dstw == src2w) { 1536 if (src1 & SLJIT_IMM) { 1537 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 1538 if ((dst == SLJIT_R0) && (src1w > 127 || src1w < -128) && (compiler->mode32 || IS_HALFWORD(src1w))) { 1539 #else 1540 if ((dst == SLJIT_R0) && (src1w > 127 || src1w < -128)) { 1541 #endif 1542 BINARY_EAX_IMM(op_eax_imm, src1w); 1543 } 1544 else { 1545 BINARY_IMM(op_imm, op_mr, src1w, dst, dstw); 1546 } 1547 } 1548 else if (FAST_IS_REG(dst)) { 1549 inst = emit_x86_instruction(compiler, 1, dst, dstw, src1, src1w); 1550 FAIL_IF(!inst); 1551 *inst = op_rm; 1552 } 1553 else if (FAST_IS_REG(src1)) { 1554 inst = emit_x86_instruction(compiler, 1, src1, src1w, dst, dstw); 1555 FAIL_IF(!inst); 1556 *inst = op_mr; 1557 } 1558 else { 1559 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w); 1560 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw); 1561 FAIL_IF(!inst); 1562 *inst = op_mr; 1563 } 1564 return SLJIT_SUCCESS; 1565 } 1566 1567 /* General version. */ 1568 if (FAST_IS_REG(dst)) { 1569 EMIT_MOV(compiler, dst, 0, src1, src1w); 1570 if (src2 & SLJIT_IMM) { 1571 BINARY_IMM(op_imm, op_mr, src2w, dst, 0); 1572 } 1573 else { 1574 inst = emit_x86_instruction(compiler, 1, dst, 0, src2, src2w); 1575 FAIL_IF(!inst); 1576 *inst = op_rm; 1577 } 1578 } 1579 else { 1580 /* This version requires less memory writing. */ 1581 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w); 1582 if (src2 & SLJIT_IMM) { 1583 BINARY_IMM(op_imm, op_mr, src2w, TMP_REG1, 0); 1584 } 1585 else { 1586 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w); 1587 FAIL_IF(!inst); 1588 *inst = op_rm; 1589 } 1590 EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0); 1591 } 1592 1593 return SLJIT_SUCCESS; 1594 } 1595 1596 static sljit_s32 emit_non_cum_binary(struct sljit_compiler *compiler, 1597 sljit_u8 op_rm, sljit_u8 op_mr, sljit_u8 op_imm, sljit_u8 op_eax_imm, 1598 sljit_s32 dst, sljit_sw dstw, 1599 sljit_s32 src1, sljit_sw src1w, 1600 sljit_s32 src2, sljit_sw src2w) 1601 { 1602 sljit_u8* inst; 1603 1604 if (dst == SLJIT_UNUSED) { 1605 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w); 1606 if (src2 & SLJIT_IMM) { 1607 BINARY_IMM(op_imm, op_mr, src2w, TMP_REG1, 0); 1608 } 1609 else { 1610 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w); 1611 FAIL_IF(!inst); 1612 *inst = op_rm; 1613 } 1614 return SLJIT_SUCCESS; 1615 } 1616 1617 if (dst == src1 && dstw == src1w) { 1618 if (src2 & SLJIT_IMM) { 1619 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 1620 if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) { 1621 #else 1622 if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128)) { 1623 #endif 1624 BINARY_EAX_IMM(op_eax_imm, src2w); 1625 } 1626 else { 1627 BINARY_IMM(op_imm, op_mr, src2w, dst, dstw); 1628 } 1629 } 1630 else if (FAST_IS_REG(dst)) { 1631 inst = emit_x86_instruction(compiler, 1, dst, dstw, src2, src2w); 1632 FAIL_IF(!inst); 1633 *inst = op_rm; 1634 } 1635 else if (FAST_IS_REG(src2)) { 1636 inst = emit_x86_instruction(compiler, 1, src2, src2w, dst, dstw); 1637 FAIL_IF(!inst); 1638 *inst = op_mr; 1639 } 1640 else { 1641 EMIT_MOV(compiler, TMP_REG1, 0, src2, src2w); 1642 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw); 1643 FAIL_IF(!inst); 1644 *inst = op_mr; 1645 } 1646 return SLJIT_SUCCESS; 1647 } 1648 1649 /* General version. */ 1650 if (FAST_IS_REG(dst) && dst != src2) { 1651 EMIT_MOV(compiler, dst, 0, src1, src1w); 1652 if (src2 & SLJIT_IMM) { 1653 BINARY_IMM(op_imm, op_mr, src2w, dst, 0); 1654 } 1655 else { 1656 inst = emit_x86_instruction(compiler, 1, dst, 0, src2, src2w); 1657 FAIL_IF(!inst); 1658 *inst = op_rm; 1659 } 1660 } 1661 else { 1662 /* This version requires less memory writing. */ 1663 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w); 1664 if (src2 & SLJIT_IMM) { 1665 BINARY_IMM(op_imm, op_mr, src2w, TMP_REG1, 0); 1666 } 1667 else { 1668 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w); 1669 FAIL_IF(!inst); 1670 *inst = op_rm; 1671 } 1672 EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0); 1673 } 1674 1675 return SLJIT_SUCCESS; 1676 } 1677 1678 static sljit_s32 emit_mul(struct sljit_compiler *compiler, 1679 sljit_s32 dst, sljit_sw dstw, 1680 sljit_s32 src1, sljit_sw src1w, 1681 sljit_s32 src2, sljit_sw src2w) 1682 { 1683 sljit_u8* inst; 1684 sljit_s32 dst_r; 1685 1686 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1; 1687 1688 /* Register destination. */ 1689 if (dst_r == src1 && !(src2 & SLJIT_IMM)) { 1690 inst = emit_x86_instruction(compiler, 2, dst_r, 0, src2, src2w); 1691 FAIL_IF(!inst); 1692 *inst++ = GROUP_0F; 1693 *inst = IMUL_r_rm; 1694 } 1695 else if (dst_r == src2 && !(src1 & SLJIT_IMM)) { 1696 inst = emit_x86_instruction(compiler, 2, dst_r, 0, src1, src1w); 1697 FAIL_IF(!inst); 1698 *inst++ = GROUP_0F; 1699 *inst = IMUL_r_rm; 1700 } 1701 else if (src1 & SLJIT_IMM) { 1702 if (src2 & SLJIT_IMM) { 1703 EMIT_MOV(compiler, dst_r, 0, SLJIT_IMM, src2w); 1704 src2 = dst_r; 1705 src2w = 0; 1706 } 1707 1708 if (src1w <= 127 && src1w >= -128) { 1709 inst = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w); 1710 FAIL_IF(!inst); 1711 *inst = IMUL_r_rm_i8; 1712 inst = (sljit_u8*)ensure_buf(compiler, 1 + 1); 1713 FAIL_IF(!inst); 1714 INC_SIZE(1); 1715 *inst = (sljit_s8)src1w; 1716 } 1717 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 1718 else { 1719 inst = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w); 1720 FAIL_IF(!inst); 1721 *inst = IMUL_r_rm_i32; 1722 inst = (sljit_u8*)ensure_buf(compiler, 1 + 4); 1723 FAIL_IF(!inst); 1724 INC_SIZE(4); 1725 sljit_unaligned_store_sw(inst, src1w); 1726 } 1727 #else 1728 else if (IS_HALFWORD(src1w)) { 1729 inst = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w); 1730 FAIL_IF(!inst); 1731 *inst = IMUL_r_rm_i32; 1732 inst = (sljit_u8*)ensure_buf(compiler, 1 + 4); 1733 FAIL_IF(!inst); 1734 INC_SIZE(4); 1735 sljit_unaligned_store_s32(inst, (sljit_s32)src1w); 1736 } 1737 else { 1738 EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_IMM, src1w); 1739 if (dst_r != src2) 1740 EMIT_MOV(compiler, dst_r, 0, src2, src2w); 1741 inst = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG2, 0); 1742 FAIL_IF(!inst); 1743 *inst++ = GROUP_0F; 1744 *inst = IMUL_r_rm; 1745 } 1746 #endif 1747 } 1748 else if (src2 & SLJIT_IMM) { 1749 /* Note: src1 is NOT immediate. */ 1750 1751 if (src2w <= 127 && src2w >= -128) { 1752 inst = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w); 1753 FAIL_IF(!inst); 1754 *inst = IMUL_r_rm_i8; 1755 inst = (sljit_u8*)ensure_buf(compiler, 1 + 1); 1756 FAIL_IF(!inst); 1757 INC_SIZE(1); 1758 *inst = (sljit_s8)src2w; 1759 } 1760 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 1761 else { 1762 inst = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w); 1763 FAIL_IF(!inst); 1764 *inst = IMUL_r_rm_i32; 1765 inst = (sljit_u8*)ensure_buf(compiler, 1 + 4); 1766 FAIL_IF(!inst); 1767 INC_SIZE(4); 1768 sljit_unaligned_store_sw(inst, src2w); 1769 } 1770 #else 1771 else if (IS_HALFWORD(src2w)) { 1772 inst = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w); 1773 FAIL_IF(!inst); 1774 *inst = IMUL_r_rm_i32; 1775 inst = (sljit_u8*)ensure_buf(compiler, 1 + 4); 1776 FAIL_IF(!inst); 1777 INC_SIZE(4); 1778 sljit_unaligned_store_s32(inst, (sljit_s32)src2w); 1779 } 1780 else { 1781 EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_IMM, src2w); 1782 if (dst_r != src1) 1783 EMIT_MOV(compiler, dst_r, 0, src1, src1w); 1784 inst = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG2, 0); 1785 FAIL_IF(!inst); 1786 *inst++ = GROUP_0F; 1787 *inst = IMUL_r_rm; 1788 } 1789 #endif 1790 } 1791 else { 1792 /* Neither argument is immediate. */ 1793 if (ADDRESSING_DEPENDS_ON(src2, dst_r)) 1794 dst_r = TMP_REG1; 1795 EMIT_MOV(compiler, dst_r, 0, src1, src1w); 1796 inst = emit_x86_instruction(compiler, 2, dst_r, 0, src2, src2w); 1797 FAIL_IF(!inst); 1798 *inst++ = GROUP_0F; 1799 *inst = IMUL_r_rm; 1800 } 1801 1802 if (dst_r == TMP_REG1) 1803 EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0); 1804 1805 return SLJIT_SUCCESS; 1806 } 1807 1808 static sljit_s32 emit_lea_binary(struct sljit_compiler *compiler, sljit_s32 keep_flags, 1809 sljit_s32 dst, sljit_sw dstw, 1810 sljit_s32 src1, sljit_sw src1w, 1811 sljit_s32 src2, sljit_sw src2w) 1812 { 1813 sljit_u8* inst; 1814 sljit_s32 dst_r, done = 0; 1815 1816 /* These cases better be left to handled by normal way. */ 1817 if (!keep_flags) { 1818 if (dst == src1 && dstw == src1w) 1819 return SLJIT_ERR_UNSUPPORTED; 1820 if (dst == src2 && dstw == src2w) 1821 return SLJIT_ERR_UNSUPPORTED; 1822 } 1823 1824 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1; 1825 1826 if (FAST_IS_REG(src1)) { 1827 if (FAST_IS_REG(src2)) { 1828 inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM2(src1, src2), 0); 1829 FAIL_IF(!inst); 1830 *inst = LEA_r_m; 1831 done = 1; 1832 } 1833 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 1834 if ((src2 & SLJIT_IMM) && (compiler->mode32 || IS_HALFWORD(src2w))) { 1835 inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src1), (sljit_s32)src2w); 1836 #else 1837 if (src2 & SLJIT_IMM) { 1838 inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src1), src2w); 1839 #endif 1840 FAIL_IF(!inst); 1841 *inst = LEA_r_m; 1842 done = 1; 1843 } 1844 } 1845 else if (FAST_IS_REG(src2)) { 1846 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 1847 if ((src1 & SLJIT_IMM) && (compiler->mode32 || IS_HALFWORD(src1w))) { 1848 inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src2), (sljit_s32)src1w); 1849 #else 1850 if (src1 & SLJIT_IMM) { 1851 inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src2), src1w); 1852 #endif 1853 FAIL_IF(!inst); 1854 *inst = LEA_r_m; 1855 done = 1; 1856 } 1857 } 1858 1859 if (done) { 1860 if (dst_r == TMP_REG1) 1861 return emit_mov(compiler, dst, dstw, TMP_REG1, 0); 1862 return SLJIT_SUCCESS; 1863 } 1864 return SLJIT_ERR_UNSUPPORTED; 1865 } 1866 1867 static sljit_s32 emit_cmp_binary(struct sljit_compiler *compiler, 1868 sljit_s32 src1, sljit_sw src1w, 1869 sljit_s32 src2, sljit_sw src2w) 1870 { 1871 sljit_u8* inst; 1872 1873 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 1874 if (src1 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) { 1875 #else 1876 if (src1 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128)) { 1877 #endif 1878 BINARY_EAX_IMM(CMP_EAX_i32, src2w); 1879 return SLJIT_SUCCESS; 1880 } 1881 1882 if (FAST_IS_REG(src1)) { 1883 if (src2 & SLJIT_IMM) { 1884 BINARY_IMM(CMP, CMP_rm_r, src2w, src1, 0); 1885 } 1886 else { 1887 inst = emit_x86_instruction(compiler, 1, src1, 0, src2, src2w); 1888 FAIL_IF(!inst); 1889 *inst = CMP_r_rm; 1890 } 1891 return SLJIT_SUCCESS; 1892 } 1893 1894 if (FAST_IS_REG(src2) && !(src1 & SLJIT_IMM)) { 1895 inst = emit_x86_instruction(compiler, 1, src2, 0, src1, src1w); 1896 FAIL_IF(!inst); 1897 *inst = CMP_rm_r; 1898 return SLJIT_SUCCESS; 1899 } 1900 1901 if (src2 & SLJIT_IMM) { 1902 if (src1 & SLJIT_IMM) { 1903 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w); 1904 src1 = TMP_REG1; 1905 src1w = 0; 1906 } 1907 BINARY_IMM(CMP, CMP_rm_r, src2w, src1, src1w); 1908 } 1909 else { 1910 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w); 1911 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w); 1912 FAIL_IF(!inst); 1913 *inst = CMP_r_rm; 1914 } 1915 return SLJIT_SUCCESS; 1916 } 1917 1918 static sljit_s32 emit_test_binary(struct sljit_compiler *compiler, 1919 sljit_s32 src1, sljit_sw src1w, 1920 sljit_s32 src2, sljit_sw src2w) 1921 { 1922 sljit_u8* inst; 1923 1924 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 1925 if (src1 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) { 1926 #else 1927 if (src1 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128)) { 1928 #endif 1929 BINARY_EAX_IMM(TEST_EAX_i32, src2w); 1930 return SLJIT_SUCCESS; 1931 } 1932 1933 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 1934 if (src2 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src1w > 127 || src1w < -128) && (compiler->mode32 || IS_HALFWORD(src1w))) { 1935 #else 1936 if (src2 == SLJIT_R0 && (src1 & SLJIT_IMM) && (src1w > 127 || src1w < -128)) { 1937 #endif 1938 BINARY_EAX_IMM(TEST_EAX_i32, src1w); 1939 return SLJIT_SUCCESS; 1940 } 1941 1942 if (!(src1 & SLJIT_IMM)) { 1943 if (src2 & SLJIT_IMM) { 1944 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 1945 if (IS_HALFWORD(src2w) || compiler->mode32) { 1946 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, src1, src1w); 1947 FAIL_IF(!inst); 1948 *inst = GROUP_F7; 1949 } 1950 else { 1951 FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src2w)); 1952 inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, src1, src1w); 1953 FAIL_IF(!inst); 1954 *inst = TEST_rm_r; 1955 } 1956 #else 1957 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, src1, src1w); 1958 FAIL_IF(!inst); 1959 *inst = GROUP_F7; 1960 #endif 1961 return SLJIT_SUCCESS; 1962 } 1963 else if (FAST_IS_REG(src1)) { 1964 inst = emit_x86_instruction(compiler, 1, src1, 0, src2, src2w); 1965 FAIL_IF(!inst); 1966 *inst = TEST_rm_r; 1967 return SLJIT_SUCCESS; 1968 } 1969 } 1970 1971 if (!(src2 & SLJIT_IMM)) { 1972 if (src1 & SLJIT_IMM) { 1973 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 1974 if (IS_HALFWORD(src1w) || compiler->mode32) { 1975 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src1w, src2, src2w); 1976 FAIL_IF(!inst); 1977 *inst = GROUP_F7; 1978 } 1979 else { 1980 FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src1w)); 1981 inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, src2, src2w); 1982 FAIL_IF(!inst); 1983 *inst = TEST_rm_r; 1984 } 1985 #else 1986 inst = emit_x86_instruction(compiler, 1, src1, src1w, src2, src2w); 1987 FAIL_IF(!inst); 1988 *inst = GROUP_F7; 1989 #endif 1990 return SLJIT_SUCCESS; 1991 } 1992 else if (FAST_IS_REG(src2)) { 1993 inst = emit_x86_instruction(compiler, 1, src2, 0, src1, src1w); 1994 FAIL_IF(!inst); 1995 *inst = TEST_rm_r; 1996 return SLJIT_SUCCESS; 1997 } 1998 } 1999 2000 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w); 2001 if (src2 & SLJIT_IMM) { 2002 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 2003 if (IS_HALFWORD(src2w) || compiler->mode32) { 2004 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, TMP_REG1, 0); 2005 FAIL_IF(!inst); 2006 *inst = GROUP_F7; 2007 } 2008 else { 2009 FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src2w)); 2010 inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, TMP_REG1, 0); 2011 FAIL_IF(!inst); 2012 *inst = TEST_rm_r; 2013 } 2014 #else 2015 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, TMP_REG1, 0); 2016 FAIL_IF(!inst); 2017 *inst = GROUP_F7; 2018 #endif 2019 } 2020 else { 2021 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w); 2022 FAIL_IF(!inst); 2023 *inst = TEST_rm_r; 2024 } 2025 return SLJIT_SUCCESS; 2026 } 2027 2028 static sljit_s32 emit_shift(struct sljit_compiler *compiler, 2029 sljit_u8 mode, 2030 sljit_s32 dst, sljit_sw dstw, 2031 sljit_s32 src1, sljit_sw src1w, 2032 sljit_s32 src2, sljit_sw src2w) 2033 { 2034 sljit_u8* inst; 2035 2036 if ((src2 & SLJIT_IMM) || (src2 == SLJIT_PREF_SHIFT_REG)) { 2037 if (dst == src1 && dstw == src1w) { 2038 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, dst, dstw); 2039 FAIL_IF(!inst); 2040 *inst |= mode; 2041 return SLJIT_SUCCESS; 2042 } 2043 if (dst == SLJIT_UNUSED) { 2044 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w); 2045 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, TMP_REG1, 0); 2046 FAIL_IF(!inst); 2047 *inst |= mode; 2048 return SLJIT_SUCCESS; 2049 } 2050 if (dst == SLJIT_PREF_SHIFT_REG && src2 == SLJIT_PREF_SHIFT_REG) { 2051 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w); 2052 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0); 2053 FAIL_IF(!inst); 2054 *inst |= mode; 2055 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0); 2056 return SLJIT_SUCCESS; 2057 } 2058 if (FAST_IS_REG(dst)) { 2059 EMIT_MOV(compiler, dst, 0, src1, src1w); 2060 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, dst, 0); 2061 FAIL_IF(!inst); 2062 *inst |= mode; 2063 return SLJIT_SUCCESS; 2064 } 2065 2066 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w); 2067 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, TMP_REG1, 0); 2068 FAIL_IF(!inst); 2069 *inst |= mode; 2070 EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0); 2071 return SLJIT_SUCCESS; 2072 } 2073 2074 if (dst == SLJIT_PREF_SHIFT_REG) { 2075 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w); 2076 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w); 2077 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0); 2078 FAIL_IF(!inst); 2079 *inst |= mode; 2080 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0); 2081 } 2082 else if (FAST_IS_REG(dst) && dst != src2 && !ADDRESSING_DEPENDS_ON(src2, dst)) { 2083 if (src1 != dst) 2084 EMIT_MOV(compiler, dst, 0, src1, src1w); 2085 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_PREF_SHIFT_REG, 0); 2086 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w); 2087 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, dst, 0); 2088 FAIL_IF(!inst); 2089 *inst |= mode; 2090 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0); 2091 } 2092 else { 2093 /* This case is really difficult, since ecx itself may used for 2094 addressing, and we must ensure to work even in that case. */ 2095 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w); 2096 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 2097 EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_PREF_SHIFT_REG, 0); 2098 #else 2099 /* [esp+0] contains the flags. */ 2100 EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), sizeof(sljit_sw), SLJIT_PREF_SHIFT_REG, 0); 2101 #endif 2102 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w); 2103 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0); 2104 FAIL_IF(!inst); 2105 *inst |= mode; 2106 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 2107 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG2, 0); 2108 #else 2109 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, SLJIT_MEM1(SLJIT_SP), sizeof(sljit_sw)); 2110 #endif 2111 EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0); 2112 } 2113 2114 return SLJIT_SUCCESS; 2115 } 2116 2117 static sljit_s32 emit_shift_with_flags(struct sljit_compiler *compiler, 2118 sljit_u8 mode, sljit_s32 set_flags, 2119 sljit_s32 dst, sljit_sw dstw, 2120 sljit_s32 src1, sljit_sw src1w, 2121 sljit_s32 src2, sljit_sw src2w) 2122 { 2123 /* The CPU does not set flags if the shift count is 0. */ 2124 if (src2 & SLJIT_IMM) { 2125 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 2126 if ((src2w & 0x3f) != 0 || (compiler->mode32 && (src2w & 0x1f) != 0)) 2127 return emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w); 2128 #else 2129 if ((src2w & 0x1f) != 0) 2130 return emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w); 2131 #endif 2132 if (!set_flags) 2133 return emit_mov(compiler, dst, dstw, src1, src1w); 2134 /* OR dst, src, 0 */ 2135 return emit_cum_binary(compiler, OR_r_rm, OR_rm_r, OR, OR_EAX_i32, 2136 dst, dstw, src1, src1w, SLJIT_IMM, 0); 2137 } 2138 2139 if (!set_flags) 2140 return emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w); 2141 2142 if (!FAST_IS_REG(dst)) 2143 FAIL_IF(emit_cmp_binary(compiler, src1, src1w, SLJIT_IMM, 0)); 2144 2145 FAIL_IF(emit_shift(compiler,mode, dst, dstw, src1, src1w, src2, src2w)); 2146 2147 if (FAST_IS_REG(dst)) 2148 return emit_cmp_binary(compiler, dst, dstw, SLJIT_IMM, 0); 2149 return SLJIT_SUCCESS; 2150 } 2151 2152 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compiler, sljit_s32 op, 2153 sljit_s32 dst, sljit_sw dstw, 2154 sljit_s32 src1, sljit_sw src1w, 2155 sljit_s32 src2, sljit_sw src2w) 2156 { 2157 CHECK_ERROR(); 2158 CHECK(check_sljit_emit_op2(compiler, op, dst, dstw, src1, src1w, src2, src2w)); 2159 ADJUST_LOCAL_OFFSET(dst, dstw); 2160 ADJUST_LOCAL_OFFSET(src1, src1w); 2161 ADJUST_LOCAL_OFFSET(src2, src2w); 2162 2163 CHECK_EXTRA_REGS(dst, dstw, (void)0); 2164 CHECK_EXTRA_REGS(src1, src1w, (void)0); 2165 CHECK_EXTRA_REGS(src2, src2w, (void)0); 2166 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 2167 compiler->mode32 = op & SLJIT_I32_OP; 2168 #endif 2169 2170 if (GET_OPCODE(op) >= SLJIT_MUL) { 2171 if (SLJIT_UNLIKELY(GET_FLAGS(op))) 2172 compiler->flags_saved = 0; 2173 else if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS) && !compiler->flags_saved) 2174 FAIL_IF(emit_save_flags(compiler)); 2175 } 2176 2177 switch (GET_OPCODE(op)) { 2178 case SLJIT_ADD: 2179 if (!GET_FLAGS(op)) { 2180 if (emit_lea_binary(compiler, op & SLJIT_KEEP_FLAGS, dst, dstw, src1, src1w, src2, src2w) != SLJIT_ERR_UNSUPPORTED) 2181 return compiler->error; 2182 } 2183 else 2184 compiler->flags_saved = 0; 2185 if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS) && !compiler->flags_saved) 2186 FAIL_IF(emit_save_flags(compiler)); 2187 return emit_cum_binary(compiler, ADD_r_rm, ADD_rm_r, ADD, ADD_EAX_i32, 2188 dst, dstw, src1, src1w, src2, src2w); 2189 case SLJIT_ADDC: 2190 if (SLJIT_UNLIKELY(compiler->flags_saved)) /* C flag must be restored. */ 2191 FAIL_IF(emit_restore_flags(compiler, 1)); 2192 else if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS)) 2193 FAIL_IF(emit_save_flags(compiler)); 2194 if (SLJIT_UNLIKELY(GET_FLAGS(op))) 2195 compiler->flags_saved = 0; 2196 return emit_cum_binary(compiler, ADC_r_rm, ADC_rm_r, ADC, ADC_EAX_i32, 2197 dst, dstw, src1, src1w, src2, src2w); 2198 case SLJIT_SUB: 2199 if (!GET_FLAGS(op)) { 2200 if ((src2 & SLJIT_IMM) && emit_lea_binary(compiler, op & SLJIT_KEEP_FLAGS, dst, dstw, src1, src1w, SLJIT_IMM, -src2w) != SLJIT_ERR_UNSUPPORTED) 2201 return compiler->error; 2202 } 2203 else 2204 compiler->flags_saved = 0; 2205 if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS) && !compiler->flags_saved) 2206 FAIL_IF(emit_save_flags(compiler)); 2207 if (dst == SLJIT_UNUSED) 2208 return emit_cmp_binary(compiler, src1, src1w, src2, src2w); 2209 return emit_non_cum_binary(compiler, SUB_r_rm, SUB_rm_r, SUB, SUB_EAX_i32, 2210 dst, dstw, src1, src1w, src2, src2w); 2211 case SLJIT_SUBC: 2212 if (SLJIT_UNLIKELY(compiler->flags_saved)) /* C flag must be restored. */ 2213 FAIL_IF(emit_restore_flags(compiler, 1)); 2214 else if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS)) 2215 FAIL_IF(emit_save_flags(compiler)); 2216 if (SLJIT_UNLIKELY(GET_FLAGS(op))) 2217 compiler->flags_saved = 0; 2218 return emit_non_cum_binary(compiler, SBB_r_rm, SBB_rm_r, SBB, SBB_EAX_i32, 2219 dst, dstw, src1, src1w, src2, src2w); 2220 case SLJIT_MUL: 2221 return emit_mul(compiler, dst, dstw, src1, src1w, src2, src2w); 2222 case SLJIT_AND: 2223 if (dst == SLJIT_UNUSED) 2224 return emit_test_binary(compiler, src1, src1w, src2, src2w); 2225 return emit_cum_binary(compiler, AND_r_rm, AND_rm_r, AND, AND_EAX_i32, 2226 dst, dstw, src1, src1w, src2, src2w); 2227 case SLJIT_OR: 2228 return emit_cum_binary(compiler, OR_r_rm, OR_rm_r, OR, OR_EAX_i32, 2229 dst, dstw, src1, src1w, src2, src2w); 2230 case SLJIT_XOR: 2231 return emit_cum_binary(compiler, XOR_r_rm, XOR_rm_r, XOR, XOR_EAX_i32, 2232 dst, dstw, src1, src1w, src2, src2w); 2233 case SLJIT_SHL: 2234 return emit_shift_with_flags(compiler, SHL, GET_FLAGS(op), 2235 dst, dstw, src1, src1w, src2, src2w); 2236 case SLJIT_LSHR: 2237 return emit_shift_with_flags(compiler, SHR, GET_FLAGS(op), 2238 dst, dstw, src1, src1w, src2, src2w); 2239 case SLJIT_ASHR: 2240 return emit_shift_with_flags(compiler, SAR, GET_FLAGS(op), 2241 dst, dstw, src1, src1w, src2, src2w); 2242 } 2243 2244 return SLJIT_SUCCESS; 2245 } 2246 2247 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 reg) 2248 { 2249 CHECK_REG_INDEX(check_sljit_get_register_index(reg)); 2250 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 2251 if (reg >= SLJIT_R3 && reg <= SLJIT_R6) 2252 return -1; 2253 #endif 2254 return reg_map[reg]; 2255 } 2256 2257 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_float_register_index(sljit_s32 reg) 2258 { 2259 CHECK_REG_INDEX(check_sljit_get_float_register_index(reg)); 2260 return reg; 2261 } 2262 2263 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler, 2264 void *instruction, sljit_s32 size) 2265 { 2266 sljit_u8 *inst; 2267 2268 CHECK_ERROR(); 2269 CHECK(check_sljit_emit_op_custom(compiler, instruction, size)); 2270 2271 inst = (sljit_u8*)ensure_buf(compiler, 1 + size); 2272 FAIL_IF(!inst); 2273 INC_SIZE(size); 2274 SLJIT_MEMCPY(inst, instruction, size); 2275 return SLJIT_SUCCESS; 2276 } 2277 2278 /* --------------------------------------------------------------------- */ 2279 /* Floating point operators */ 2280 /* --------------------------------------------------------------------- */ 2281 2282 /* Alignment + 2 * 16 bytes. */ 2283 static sljit_s32 sse2_data[3 + (4 + 4) * 2]; 2284 static sljit_s32 *sse2_buffer; 2285 2286 static void init_compiler(void) 2287 { 2288 sse2_buffer = (sljit_s32*)(((sljit_uw)sse2_data + 15) & ~0xf); 2289 /* Single precision constants. */ 2290 sse2_buffer[0] = 0x80000000; 2291 sse2_buffer[4] = 0x7fffffff; 2292 /* Double precision constants. */ 2293 sse2_buffer[8] = 0; 2294 sse2_buffer[9] = 0x80000000; 2295 sse2_buffer[12] = 0xffffffff; 2296 sse2_buffer[13] = 0x7fffffff; 2297 } 2298 2299 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_is_fpu_available(void) 2300 { 2301 #ifdef SLJIT_IS_FPU_AVAILABLE 2302 return SLJIT_IS_FPU_AVAILABLE; 2303 #elif (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2) 2304 if (cpu_has_sse2 == -1) 2305 get_cpu_features(); 2306 return cpu_has_sse2; 2307 #else /* SLJIT_DETECT_SSE2 */ 2308 return 1; 2309 #endif /* SLJIT_DETECT_SSE2 */ 2310 } 2311 2312 static sljit_s32 emit_sse2(struct sljit_compiler *compiler, sljit_u8 opcode, 2313 sljit_s32 single, sljit_s32 xmm1, sljit_s32 xmm2, sljit_sw xmm2w) 2314 { 2315 sljit_u8 *inst; 2316 2317 inst = emit_x86_instruction(compiler, 2 | (single ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2, xmm1, 0, xmm2, xmm2w); 2318 FAIL_IF(!inst); 2319 *inst++ = GROUP_0F; 2320 *inst = opcode; 2321 return SLJIT_SUCCESS; 2322 } 2323 2324 static sljit_s32 emit_sse2_logic(struct sljit_compiler *compiler, sljit_u8 opcode, 2325 sljit_s32 pref66, sljit_s32 xmm1, sljit_s32 xmm2, sljit_sw xmm2w) 2326 { 2327 sljit_u8 *inst; 2328 2329 inst = emit_x86_instruction(compiler, 2 | (pref66 ? EX86_PREF_66 : 0) | EX86_SSE2, xmm1, 0, xmm2, xmm2w); 2330 FAIL_IF(!inst); 2331 *inst++ = GROUP_0F; 2332 *inst = opcode; 2333 return SLJIT_SUCCESS; 2334 } 2335 2336 static SLJIT_INLINE sljit_s32 emit_sse2_load(struct sljit_compiler *compiler, 2337 sljit_s32 single, sljit_s32 dst, sljit_s32 src, sljit_sw srcw) 2338 { 2339 return emit_sse2(compiler, MOVSD_x_xm, single, dst, src, srcw); 2340 } 2341 2342 static SLJIT_INLINE sljit_s32 emit_sse2_store(struct sljit_compiler *compiler, 2343 sljit_s32 single, sljit_s32 dst, sljit_sw dstw, sljit_s32 src) 2344 { 2345 return emit_sse2(compiler, MOVSD_xm_x, single, src, dst, dstw); 2346 } 2347 2348 static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_compiler *compiler, sljit_s32 op, 2349 sljit_s32 dst, sljit_sw dstw, 2350 sljit_s32 src, sljit_sw srcw) 2351 { 2352 sljit_s32 dst_r = SLOW_IS_REG(dst) ? dst : TMP_REG1; 2353 sljit_u8 *inst; 2354 2355 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 2356 if (GET_OPCODE(op) == SLJIT_CONV_SW_FROM_F64) 2357 compiler->mode32 = 0; 2358 #endif 2359 2360 inst = emit_x86_instruction(compiler, 2 | ((op & SLJIT_F32_OP) ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2_OP2, dst_r, 0, src, srcw); 2361 FAIL_IF(!inst); 2362 *inst++ = GROUP_0F; 2363 *inst = CVTTSD2SI_r_xm; 2364 2365 if (dst_r == TMP_REG1 && dst != SLJIT_UNUSED) 2366 return emit_mov(compiler, dst, dstw, TMP_REG1, 0); 2367 return SLJIT_SUCCESS; 2368 } 2369 2370 static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_compiler *compiler, sljit_s32 op, 2371 sljit_s32 dst, sljit_sw dstw, 2372 sljit_s32 src, sljit_sw srcw) 2373 { 2374 sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG; 2375 sljit_u8 *inst; 2376 2377 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 2378 if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_SW) 2379 compiler->mode32 = 0; 2380 #endif 2381 2382 if (src & SLJIT_IMM) { 2383 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 2384 if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_S32) 2385 srcw = (sljit_s32)srcw; 2386 #endif 2387 EMIT_MOV(compiler, TMP_REG1, 0, src, srcw); 2388 src = TMP_REG1; 2389 srcw = 0; 2390 } 2391 2392 inst = emit_x86_instruction(compiler, 2 | ((op & SLJIT_F32_OP) ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2_OP1, dst_r, 0, src, srcw); 2393 FAIL_IF(!inst); 2394 *inst++ = GROUP_0F; 2395 *inst = CVTSI2SD_x_rm; 2396 2397 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 2398 compiler->mode32 = 1; 2399 #endif 2400 if (dst_r == TMP_FREG) 2401 return emit_sse2_store(compiler, op & SLJIT_F32_OP, dst, dstw, TMP_FREG); 2402 return SLJIT_SUCCESS; 2403 } 2404 2405 static SLJIT_INLINE sljit_s32 sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_s32 op, 2406 sljit_s32 src1, sljit_sw src1w, 2407 sljit_s32 src2, sljit_sw src2w) 2408 { 2409 compiler->flags_saved = 0; 2410 if (!FAST_IS_REG(src1)) { 2411 FAIL_IF(emit_sse2_load(compiler, op & SLJIT_F32_OP, TMP_FREG, src1, src1w)); 2412 src1 = TMP_FREG; 2413 } 2414 return emit_sse2_logic(compiler, UCOMISD_x_xm, !(op & SLJIT_F32_OP), src1, src2, src2w); 2415 } 2416 2417 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compiler, sljit_s32 op, 2418 sljit_s32 dst, sljit_sw dstw, 2419 sljit_s32 src, sljit_sw srcw) 2420 { 2421 sljit_s32 dst_r; 2422 2423 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 2424 compiler->mode32 = 1; 2425 #endif 2426 2427 CHECK_ERROR(); 2428 SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw); 2429 2430 if (GET_OPCODE(op) == SLJIT_MOV_F64) { 2431 if (FAST_IS_REG(dst)) 2432 return emit_sse2_load(compiler, op & SLJIT_F32_OP, dst, src, srcw); 2433 if (FAST_IS_REG(src)) 2434 return emit_sse2_store(compiler, op & SLJIT_F32_OP, dst, dstw, src); 2435 FAIL_IF(emit_sse2_load(compiler, op & SLJIT_F32_OP, TMP_FREG, src, srcw)); 2436 return emit_sse2_store(compiler, op & SLJIT_F32_OP, dst, dstw, TMP_FREG); 2437 } 2438 2439 if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_F32) { 2440 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG; 2441 if (FAST_IS_REG(src)) { 2442 /* We overwrite the high bits of source. From SLJIT point of view, 2443 this is not an issue. 2444 Note: In SSE3, we could also use MOVDDUP and MOVSLDUP. */ 2445 FAIL_IF(emit_sse2_logic(compiler, UNPCKLPD_x_xm, op & SLJIT_F32_OP, src, src, 0)); 2446 } 2447 else { 2448 FAIL_IF(emit_sse2_load(compiler, !(op & SLJIT_F32_OP), TMP_FREG, src, srcw)); 2449 src = TMP_FREG; 2450 } 2451 2452 FAIL_IF(emit_sse2_logic(compiler, CVTPD2PS_x_xm, op & SLJIT_F32_OP, dst_r, src, 0)); 2453 if (dst_r == TMP_FREG) 2454 return emit_sse2_store(compiler, op & SLJIT_F32_OP, dst, dstw, TMP_FREG); 2455 return SLJIT_SUCCESS; 2456 } 2457 2458 if (SLOW_IS_REG(dst)) { 2459 dst_r = dst; 2460 if (dst != src) 2461 FAIL_IF(emit_sse2_load(compiler, op & SLJIT_F32_OP, dst_r, src, srcw)); 2462 } 2463 else { 2464 dst_r = TMP_FREG; 2465 FAIL_IF(emit_sse2_load(compiler, op & SLJIT_F32_OP, dst_r, src, srcw)); 2466 } 2467 2468 switch (GET_OPCODE(op)) { 2469 case SLJIT_NEG_F64: 2470 FAIL_IF(emit_sse2_logic(compiler, XORPD_x_xm, 1, dst_r, SLJIT_MEM0(), (sljit_sw)(op & SLJIT_F32_OP ? sse2_buffer : sse2_buffer + 8))); 2471 break; 2472 2473 case SLJIT_ABS_F64: 2474 FAIL_IF(emit_sse2_logic(compiler, ANDPD_x_xm, 1, dst_r, SLJIT_MEM0(), (sljit_sw)(op & SLJIT_F32_OP ? sse2_buffer + 4 : sse2_buffer + 12))); 2475 break; 2476 } 2477 2478 if (dst_r == TMP_FREG) 2479 return emit_sse2_store(compiler, op & SLJIT_F32_OP, dst, dstw, TMP_FREG); 2480 return SLJIT_SUCCESS; 2481 } 2482 2483 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compiler, sljit_s32 op, 2484 sljit_s32 dst, sljit_sw dstw, 2485 sljit_s32 src1, sljit_sw src1w, 2486 sljit_s32 src2, sljit_sw src2w) 2487 { 2488 sljit_s32 dst_r; 2489 2490 CHECK_ERROR(); 2491 CHECK(check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w)); 2492 ADJUST_LOCAL_OFFSET(dst, dstw); 2493 ADJUST_LOCAL_OFFSET(src1, src1w); 2494 ADJUST_LOCAL_OFFSET(src2, src2w); 2495 2496 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 2497 compiler->mode32 = 1; 2498 #endif 2499 2500 if (FAST_IS_REG(dst)) { 2501 dst_r = dst; 2502 if (dst == src1) 2503 ; /* Do nothing here. */ 2504 else if (dst == src2 && (op == SLJIT_ADD_F64 || op == SLJIT_MUL_F64)) { 2505 /* Swap arguments. */ 2506 src2 = src1; 2507 src2w = src1w; 2508 } 2509 else if (dst != src2) 2510 FAIL_IF(emit_sse2_load(compiler, op & SLJIT_F32_OP, dst_r, src1, src1w)); 2511 else { 2512 dst_r = TMP_FREG; 2513 FAIL_IF(emit_sse2_load(compiler, op & SLJIT_F32_OP, TMP_FREG, src1, src1w)); 2514 } 2515 } 2516 else { 2517 dst_r = TMP_FREG; 2518 FAIL_IF(emit_sse2_load(compiler, op & SLJIT_F32_OP, TMP_FREG, src1, src1w)); 2519 } 2520 2521 switch (GET_OPCODE(op)) { 2522 case SLJIT_ADD_F64: 2523 FAIL_IF(emit_sse2(compiler, ADDSD_x_xm, op & SLJIT_F32_OP, dst_r, src2, src2w)); 2524 break; 2525 2526 case SLJIT_SUB_F64: 2527 FAIL_IF(emit_sse2(compiler, SUBSD_x_xm, op & SLJIT_F32_OP, dst_r, src2, src2w)); 2528 break; 2529 2530 case SLJIT_MUL_F64: 2531 FAIL_IF(emit_sse2(compiler, MULSD_x_xm, op & SLJIT_F32_OP, dst_r, src2, src2w)); 2532 break; 2533 2534 case SLJIT_DIV_F64: 2535 FAIL_IF(emit_sse2(compiler, DIVSD_x_xm, op & SLJIT_F32_OP, dst_r, src2, src2w)); 2536 break; 2537 } 2538 2539 if (dst_r == TMP_FREG) 2540 return emit_sse2_store(compiler, op & SLJIT_F32_OP, dst, dstw, TMP_FREG); 2541 return SLJIT_SUCCESS; 2542 } 2543 2544 /* --------------------------------------------------------------------- */ 2545 /* Conditional instructions */ 2546 /* --------------------------------------------------------------------- */ 2547 2548 SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler) 2549 { 2550 sljit_u8 *inst; 2551 struct sljit_label *label; 2552 2553 CHECK_ERROR_PTR(); 2554 CHECK_PTR(check_sljit_emit_label(compiler)); 2555 2556 /* We should restore the flags before the label, 2557 since other taken jumps has their own flags as well. */ 2558 if (SLJIT_UNLIKELY(compiler->flags_saved)) 2559 PTR_FAIL_IF(emit_restore_flags(compiler, 0)); 2560 2561 if (compiler->last_label && compiler->last_label->size == compiler->size) 2562 return compiler->last_label; 2563 2564 label = (struct sljit_label*)ensure_abuf(compiler, sizeof(struct sljit_label)); 2565 PTR_FAIL_IF(!label); 2566 set_label(label, compiler); 2567 2568 inst = (sljit_u8*)ensure_buf(compiler, 2); 2569 PTR_FAIL_IF(!inst); 2570 2571 *inst++ = 0; 2572 *inst++ = 0; 2573 2574 return label; 2575 } 2576 2577 SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_s32 type) 2578 { 2579 sljit_u8 *inst; 2580 struct sljit_jump *jump; 2581 2582 CHECK_ERROR_PTR(); 2583 CHECK_PTR(check_sljit_emit_jump(compiler, type)); 2584 2585 if (SLJIT_UNLIKELY(compiler->flags_saved)) { 2586 if ((type & 0xff) <= SLJIT_JUMP) 2587 PTR_FAIL_IF(emit_restore_flags(compiler, 0)); 2588 compiler->flags_saved = 0; 2589 } 2590 2591 jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); 2592 PTR_FAIL_IF_NULL(jump); 2593 set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP); 2594 type &= 0xff; 2595 2596 if (type >= SLJIT_CALL1) 2597 PTR_FAIL_IF(call_with_args(compiler, type)); 2598 2599 /* Worst case size. */ 2600 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 2601 compiler->size += (type >= SLJIT_JUMP) ? 5 : 6; 2602 #else 2603 compiler->size += (type >= SLJIT_JUMP) ? (10 + 3) : (2 + 10 + 3); 2604 #endif 2605 2606 inst = (sljit_u8*)ensure_buf(compiler, 2); 2607 PTR_FAIL_IF_NULL(inst); 2608 2609 *inst++ = 0; 2610 *inst++ = type + 4; 2611 return jump; 2612 } 2613 2614 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 src, sljit_sw srcw) 2615 { 2616 sljit_u8 *inst; 2617 struct sljit_jump *jump; 2618 2619 CHECK_ERROR(); 2620 CHECK(check_sljit_emit_ijump(compiler, type, src, srcw)); 2621 ADJUST_LOCAL_OFFSET(src, srcw); 2622 2623 CHECK_EXTRA_REGS(src, srcw, (void)0); 2624 2625 if (SLJIT_UNLIKELY(compiler->flags_saved)) { 2626 if (type <= SLJIT_JUMP) 2627 FAIL_IF(emit_restore_flags(compiler, 0)); 2628 compiler->flags_saved = 0; 2629 } 2630 2631 if (type >= SLJIT_CALL1) { 2632 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 2633 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) 2634 if (src == SLJIT_R2) { 2635 EMIT_MOV(compiler, TMP_REG1, 0, src, 0); 2636 src = TMP_REG1; 2637 } 2638 if (src == SLJIT_MEM1(SLJIT_SP) && type >= SLJIT_CALL3) 2639 srcw += sizeof(sljit_sw); 2640 #endif 2641 #endif 2642 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) && defined(_WIN64) 2643 if (src == SLJIT_R2) { 2644 EMIT_MOV(compiler, TMP_REG1, 0, src, 0); 2645 src = TMP_REG1; 2646 } 2647 #endif 2648 FAIL_IF(call_with_args(compiler, type)); 2649 } 2650 2651 if (src == SLJIT_IMM) { 2652 jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); 2653 FAIL_IF_NULL(jump); 2654 set_jump(jump, compiler, JUMP_ADDR); 2655 jump->u.target = srcw; 2656 2657 /* Worst case size. */ 2658 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 2659 compiler->size += 5; 2660 #else 2661 compiler->size += 10 + 3; 2662 #endif 2663 2664 inst = (sljit_u8*)ensure_buf(compiler, 2); 2665 FAIL_IF_NULL(inst); 2666 2667 *inst++ = 0; 2668 *inst++ = type + 4; 2669 } 2670 else { 2671 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 2672 /* REX_W is not necessary (src is not immediate). */ 2673 compiler->mode32 = 1; 2674 #endif 2675 inst = emit_x86_instruction(compiler, 1, 0, 0, src, srcw); 2676 FAIL_IF(!inst); 2677 *inst++ = GROUP_FF; 2678 *inst |= (type >= SLJIT_FAST_CALL) ? CALL_rm : JMP_rm; 2679 } 2680 return SLJIT_SUCCESS; 2681 } 2682 2683 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_s32 op, 2684 sljit_s32 dst, sljit_sw dstw, 2685 sljit_s32 src, sljit_sw srcw, 2686 sljit_s32 type) 2687 { 2688 sljit_u8 *inst; 2689 sljit_u8 cond_set = 0; 2690 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 2691 sljit_s32 reg; 2692 #else 2693 /* CHECK_EXTRA_REGS migh overwrite these values. */ 2694 sljit_s32 dst_save = dst; 2695 sljit_sw dstw_save = dstw; 2696 #endif 2697 2698 CHECK_ERROR(); 2699 CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, src, srcw, type)); 2700 SLJIT_UNUSED_ARG(srcw); 2701 2702 if (dst == SLJIT_UNUSED) 2703 return SLJIT_SUCCESS; 2704 2705 ADJUST_LOCAL_OFFSET(dst, dstw); 2706 CHECK_EXTRA_REGS(dst, dstw, (void)0); 2707 if (SLJIT_UNLIKELY(compiler->flags_saved)) 2708 FAIL_IF(emit_restore_flags(compiler, op & SLJIT_KEEP_FLAGS)); 2709 2710 type &= 0xff; 2711 /* setcc = jcc + 0x10. */ 2712 cond_set = get_jump_code(type) + 0x10; 2713 2714 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 2715 if (GET_OPCODE(op) == SLJIT_OR && !GET_ALL_FLAGS(op) && FAST_IS_REG(dst) && dst == src) { 2716 inst = (sljit_u8*)ensure_buf(compiler, 1 + 4 + 3); 2717 FAIL_IF(!inst); 2718 INC_SIZE(4 + 3); 2719 /* Set low register to conditional flag. */ 2720 *inst++ = (reg_map[TMP_REG1] <= 7) ? REX : REX_B; 2721 *inst++ = GROUP_0F; 2722 *inst++ = cond_set; 2723 *inst++ = MOD_REG | reg_lmap[TMP_REG1]; 2724 *inst++ = REX | (reg_map[TMP_REG1] <= 7 ? 0 : REX_R) | (reg_map[dst] <= 7 ? 0 : REX_B); 2725 *inst++ = OR_rm8_r8; 2726 *inst++ = MOD_REG | (reg_lmap[TMP_REG1] << 3) | reg_lmap[dst]; 2727 return SLJIT_SUCCESS; 2728 } 2729 2730 reg = (op == SLJIT_MOV && FAST_IS_REG(dst)) ? dst : TMP_REG1; 2731 2732 inst = (sljit_u8*)ensure_buf(compiler, 1 + 4 + 4); 2733 FAIL_IF(!inst); 2734 INC_SIZE(4 + 4); 2735 /* Set low register to conditional flag. */ 2736 *inst++ = (reg_map[reg] <= 7) ? REX : REX_B; 2737 *inst++ = GROUP_0F; 2738 *inst++ = cond_set; 2739 *inst++ = MOD_REG | reg_lmap[reg]; 2740 *inst++ = REX_W | (reg_map[reg] <= 7 ? 0 : (REX_B | REX_R)); 2741 *inst++ = GROUP_0F; 2742 *inst++ = MOVZX_r_rm8; 2743 *inst = MOD_REG | (reg_lmap[reg] << 3) | reg_lmap[reg]; 2744 2745 if (reg != TMP_REG1) 2746 return SLJIT_SUCCESS; 2747 2748 if (GET_OPCODE(op) < SLJIT_ADD) { 2749 compiler->mode32 = GET_OPCODE(op) != SLJIT_MOV; 2750 return emit_mov(compiler, dst, dstw, TMP_REG1, 0); 2751 } 2752 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ 2753 || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) 2754 compiler->skip_checks = 1; 2755 #endif 2756 return sljit_emit_op2(compiler, op, dst, dstw, dst, dstw, TMP_REG1, 0); 2757 #else /* SLJIT_CONFIG_X86_64 */ 2758 if (GET_OPCODE(op) < SLJIT_ADD && FAST_IS_REG(dst)) { 2759 if (reg_map[dst] <= 4) { 2760 /* Low byte is accessible. */ 2761 inst = (sljit_u8*)ensure_buf(compiler, 1 + 3 + 3); 2762 FAIL_IF(!inst); 2763 INC_SIZE(3 + 3); 2764 /* Set low byte to conditional flag. */ 2765 *inst++ = GROUP_0F; 2766 *inst++ = cond_set; 2767 *inst++ = MOD_REG | reg_map[dst]; 2768 2769 *inst++ = GROUP_0F; 2770 *inst++ = MOVZX_r_rm8; 2771 *inst = MOD_REG | (reg_map[dst] << 3) | reg_map[dst]; 2772 return SLJIT_SUCCESS; 2773 } 2774 2775 /* Low byte is not accessible. */ 2776 if (cpu_has_cmov == -1) 2777 get_cpu_features(); 2778 2779 if (cpu_has_cmov) { 2780 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, 1); 2781 /* a xor reg, reg operation would overwrite the flags. */ 2782 EMIT_MOV(compiler, dst, 0, SLJIT_IMM, 0); 2783 2784 inst = (sljit_u8*)ensure_buf(compiler, 1 + 3); 2785 FAIL_IF(!inst); 2786 INC_SIZE(3); 2787 2788 *inst++ = GROUP_0F; 2789 /* cmovcc = setcc - 0x50. */ 2790 *inst++ = cond_set - 0x50; 2791 *inst++ = MOD_REG | (reg_map[dst] << 3) | reg_map[TMP_REG1]; 2792 return SLJIT_SUCCESS; 2793 } 2794 2795 inst = (sljit_u8*)ensure_buf(compiler, 1 + 1 + 3 + 3 + 1); 2796 FAIL_IF(!inst); 2797 INC_SIZE(1 + 3 + 3 + 1); 2798 *inst++ = XCHG_EAX_r + reg_map[TMP_REG1]; 2799 /* Set al to conditional flag. */ 2800 *inst++ = GROUP_0F; 2801 *inst++ = cond_set; 2802 *inst++ = MOD_REG | 0 /* eax */; 2803 2804 *inst++ = GROUP_0F; 2805 *inst++ = MOVZX_r_rm8; 2806 *inst++ = MOD_REG | (reg_map[dst] << 3) | 0 /* eax */; 2807 *inst++ = XCHG_EAX_r + reg_map[TMP_REG1]; 2808 return SLJIT_SUCCESS; 2809 } 2810 2811 if (GET_OPCODE(op) == SLJIT_OR && !GET_ALL_FLAGS(op) && FAST_IS_REG(dst) && dst == src && reg_map[dst] <= 4) { 2812 SLJIT_COMPILE_ASSERT(reg_map[SLJIT_R0] == 0, scratch_reg1_must_be_eax); 2813 if (dst != SLJIT_R0) { 2814 inst = (sljit_u8*)ensure_buf(compiler, 1 + 1 + 3 + 2 + 1); 2815 FAIL_IF(!inst); 2816 INC_SIZE(1 + 3 + 2 + 1); 2817 /* Set low register to conditional flag. */ 2818 *inst++ = XCHG_EAX_r + reg_map[TMP_REG1]; 2819 *inst++ = GROUP_0F; 2820 *inst++ = cond_set; 2821 *inst++ = MOD_REG | 0 /* eax */; 2822 *inst++ = OR_rm8_r8; 2823 *inst++ = MOD_REG | (0 /* eax */ << 3) | reg_map[dst]; 2824 *inst++ = XCHG_EAX_r + reg_map[TMP_REG1]; 2825 } 2826 else { 2827 inst = (sljit_u8*)ensure_buf(compiler, 1 + 2 + 3 + 2 + 2); 2828 FAIL_IF(!inst); 2829 INC_SIZE(2 + 3 + 2 + 2); 2830 /* Set low register to conditional flag. */ 2831 *inst++ = XCHG_r_rm; 2832 *inst++ = MOD_REG | (1 /* ecx */ << 3) | reg_map[TMP_REG1]; 2833 *inst++ = GROUP_0F; 2834 *inst++ = cond_set; 2835 *inst++ = MOD_REG | 1 /* ecx */; 2836 *inst++ = OR_rm8_r8; 2837 *inst++ = MOD_REG | (1 /* ecx */ << 3) | 0 /* eax */; 2838 *inst++ = XCHG_r_rm; 2839 *inst++ = MOD_REG | (1 /* ecx */ << 3) | reg_map[TMP_REG1]; 2840 } 2841 return SLJIT_SUCCESS; 2842 } 2843 2844 /* Set TMP_REG1 to the bit. */ 2845 inst = (sljit_u8*)ensure_buf(compiler, 1 + 1 + 3 + 3 + 1); 2846 FAIL_IF(!inst); 2847 INC_SIZE(1 + 3 + 3 + 1); 2848 *inst++ = XCHG_EAX_r + reg_map[TMP_REG1]; 2849 /* Set al to conditional flag. */ 2850 *inst++ = GROUP_0F; 2851 *inst++ = cond_set; 2852 *inst++ = MOD_REG | 0 /* eax */; 2853 2854 *inst++ = GROUP_0F; 2855 *inst++ = MOVZX_r_rm8; 2856 *inst++ = MOD_REG | (0 << 3) /* eax */ | 0 /* eax */; 2857 2858 *inst++ = XCHG_EAX_r + reg_map[TMP_REG1]; 2859 2860 if (GET_OPCODE(op) < SLJIT_ADD) 2861 return emit_mov(compiler, dst, dstw, TMP_REG1, 0); 2862 2863 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ 2864 || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) 2865 compiler->skip_checks = 1; 2866 #endif 2867 return sljit_emit_op2(compiler, op, dst_save, dstw_save, dst_save, dstw_save, TMP_REG1, 0); 2868 #endif /* SLJIT_CONFIG_X86_64 */ 2869 } 2870 2871 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_local_base(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw offset) 2872 { 2873 CHECK_ERROR(); 2874 CHECK(check_sljit_get_local_base(compiler, dst, dstw, offset)); 2875 ADJUST_LOCAL_OFFSET(dst, dstw); 2876 2877 CHECK_EXTRA_REGS(dst, dstw, (void)0); 2878 2879 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 2880 compiler->mode32 = 0; 2881 #endif 2882 2883 ADJUST_LOCAL_OFFSET(SLJIT_MEM1(SLJIT_SP), offset); 2884 2885 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 2886 if (NOT_HALFWORD(offset)) { 2887 FAIL_IF(emit_load_imm64(compiler, TMP_REG1, offset)); 2888 #if (defined SLJIT_DEBUG && SLJIT_DEBUG) 2889 SLJIT_ASSERT(emit_lea_binary(compiler, SLJIT_KEEP_FLAGS, dst, dstw, SLJIT_SP, 0, TMP_REG1, 0) != SLJIT_ERR_UNSUPPORTED); 2890 return compiler->error; 2891 #else 2892 return emit_lea_binary(compiler, SLJIT_KEEP_FLAGS, dst, dstw, SLJIT_SP, 0, TMP_REG1, 0); 2893 #endif 2894 } 2895 #endif 2896 2897 if (offset != 0) 2898 return emit_lea_binary(compiler, SLJIT_KEEP_FLAGS, dst, dstw, SLJIT_SP, 0, SLJIT_IMM, offset); 2899 return emit_mov(compiler, dst, dstw, SLJIT_SP, 0); 2900 } 2901 2902 SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw init_value) 2903 { 2904 sljit_u8 *inst; 2905 struct sljit_const *const_; 2906 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 2907 sljit_s32 reg; 2908 #endif 2909 2910 CHECK_ERROR_PTR(); 2911 CHECK_PTR(check_sljit_emit_const(compiler, dst, dstw, init_value)); 2912 ADJUST_LOCAL_OFFSET(dst, dstw); 2913 2914 CHECK_EXTRA_REGS(dst, dstw, (void)0); 2915 2916 const_ = (struct sljit_const*)ensure_abuf(compiler, sizeof(struct sljit_const)); 2917 PTR_FAIL_IF(!const_); 2918 set_const(const_, compiler); 2919 2920 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 2921 compiler->mode32 = 0; 2922 reg = SLOW_IS_REG(dst) ? dst : TMP_REG1; 2923 2924 if (emit_load_imm64(compiler, reg, init_value)) 2925 return NULL; 2926 #else 2927 if (dst == SLJIT_UNUSED) 2928 dst = TMP_REG1; 2929 2930 if (emit_mov(compiler, dst, dstw, SLJIT_IMM, init_value)) 2931 return NULL; 2932 #endif 2933 2934 inst = (sljit_u8*)ensure_buf(compiler, 2); 2935 PTR_FAIL_IF(!inst); 2936 2937 *inst++ = 0; 2938 *inst++ = 1; 2939 2940 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 2941 if (dst & SLJIT_MEM) 2942 if (emit_mov(compiler, dst, dstw, TMP_REG1, 0)) 2943 return NULL; 2944 #endif 2945 2946 return const_; 2947 } 2948 2949 SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_addr) 2950 { 2951 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 2952 sljit_unaligned_store_sw((void*)addr, new_addr - (addr + 4)); 2953 #else 2954 sljit_unaligned_store_sw((void*)addr, (sljit_sw) new_addr); 2955 #endif 2956 } 2957 2958 SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant) 2959 { 2960 sljit_unaligned_store_sw((void*)addr, new_constant); 2961 } 2962 2963 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_x86_is_sse2_available(void) 2964 { 2965 #if (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2) 2966 if (cpu_has_sse2 == -1) 2967 get_cpu_features(); 2968 return cpu_has_sse2; 2969 #else 2970 return 1; 2971 #endif 2972 } 2973 2974 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_x86_is_cmov_available(void) 2975 { 2976 if (cpu_has_cmov == -1) 2977 get_cpu_features(); 2978 return cpu_has_cmov; 2979 } 2980 2981 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_x86_emit_cmov(struct sljit_compiler *compiler, 2982 sljit_s32 type, 2983 sljit_s32 dst_reg, 2984 sljit_s32 src, sljit_sw srcw) 2985 { 2986 sljit_u8* inst; 2987 2988 CHECK_ERROR(); 2989 #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) 2990 CHECK_ARGUMENT(sljit_x86_is_cmov_available()); 2991 CHECK_ARGUMENT(!(type & ~(0xff | SLJIT_I32_OP))); 2992 CHECK_ARGUMENT((type & 0xff) >= SLJIT_EQUAL && (type & 0xff) <= SLJIT_ORDERED_F64); 2993 CHECK_ARGUMENT(FUNCTION_CHECK_IS_REG(dst_reg & ~SLJIT_I32_OP)); 2994 FUNCTION_CHECK_SRC(src, srcw); 2995 #endif 2996 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) 2997 if (SLJIT_UNLIKELY(!!compiler->verbose)) { 2998 fprintf(compiler->verbose, " x86_cmov%s %s%s, ", 2999 !(dst_reg & SLJIT_I32_OP) ? "" : ".i", 3000 jump_names[type & 0xff], JUMP_POSTFIX(type)); 3001 sljit_verbose_reg(compiler, dst_reg & ~SLJIT_I32_OP); 3002 fprintf(compiler->verbose, ", "); 3003 sljit_verbose_param(compiler, src, srcw); 3004 fprintf(compiler->verbose, "\n"); 3005 } 3006 #endif 3007 3008 ADJUST_LOCAL_OFFSET(src, srcw); 3009 CHECK_EXTRA_REGS(src, srcw, (void)0); 3010 3011 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 3012 compiler->mode32 = dst_reg & SLJIT_I32_OP; 3013 #endif 3014 dst_reg &= ~SLJIT_I32_OP; 3015 3016 if (SLJIT_UNLIKELY(src & SLJIT_IMM)) { 3017 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, srcw); 3018 src = TMP_REG1; 3019 srcw = 0; 3020 } 3021 3022 inst = emit_x86_instruction(compiler, 2, dst_reg, 0, src, srcw); 3023 FAIL_IF(!inst); 3024 *inst++ = GROUP_0F; 3025 *inst = get_jump_code(type & 0xff) - 0x40; 3026 return SLJIT_SUCCESS; 3027 } 3028