1 /* 2 * Stack-less Just-In-Time compiler 3 * 4 * Copyright 2009-2012 Zoltan Herczeg (hzmester (at) freemail.hu). All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without modification, are 7 * permitted provided that the following conditions are met: 8 * 9 * 1. Redistributions of source code must retain the above copyright notice, this list of 10 * conditions and the following disclaimer. 11 * 12 * 2. Redistributions in binary form must reproduce the above copyright notice, this list 13 * of conditions and the following disclaimer in the documentation and/or other materials 14 * provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY 17 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT 19 * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED 21 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR 22 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 23 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 24 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 */ 26 27 SLJIT_API_FUNC_ATTRIBUTE SLJIT_CONST char* sljit_get_platform_name(void) 28 { 29 return "x86" SLJIT_CPUINFO; 30 } 31 32 /* 33 32b register indexes: 34 0 - EAX 35 1 - ECX 36 2 - EDX 37 3 - EBX 38 4 - none 39 5 - EBP 40 6 - ESI 41 7 - EDI 42 */ 43 44 /* 45 64b register indexes: 46 0 - RAX 47 1 - RCX 48 2 - RDX 49 3 - RBX 50 4 - none 51 5 - RBP 52 6 - RSI 53 7 - RDI 54 8 - R8 - From now on REX prefix is required 55 9 - R9 56 10 - R10 57 11 - R11 58 12 - R12 59 13 - R13 60 14 - R14 61 15 - R15 62 */ 63 64 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 65 66 /* Last register + 1. */ 67 #define TMP_REG1 (SLJIT_NUMBER_OF_REGISTERS + 2) 68 69 static SLJIT_CONST sljit_ub reg_map[SLJIT_NUMBER_OF_REGISTERS + 3] = { 70 0, 0, 2, 1, 0, 0, 0, 0, 7, 6, 3, 4, 5 71 }; 72 73 #define CHECK_EXTRA_REGS(p, w, do) \ 74 if (p >= SLJIT_R3 && p <= SLJIT_R6) { \ 75 w = SLJIT_LOCALS_OFFSET + ((p) - (SLJIT_R3 + 4)) * sizeof(sljit_sw); \ 76 p = SLJIT_MEM1(SLJIT_SP); \ 77 do; \ 78 } 79 80 #else /* SLJIT_CONFIG_X86_32 */ 81 82 /* Last register + 1. */ 83 #define TMP_REG1 (SLJIT_NUMBER_OF_REGISTERS + 2) 84 #define TMP_REG2 (SLJIT_NUMBER_OF_REGISTERS + 3) 85 #define TMP_REG3 (SLJIT_NUMBER_OF_REGISTERS + 4) 86 87 /* Note: r12 & 0x7 == 0b100, which decoded as SIB byte present 88 Note: avoid to use r12 and r13 for memory addessing 89 therefore r12 is better for SAVED_EREG than SAVED_REG. */ 90 #ifndef _WIN64 91 /* 1st passed in rdi, 2nd argument passed in rsi, 3rd in rdx. */ 92 static SLJIT_CONST sljit_ub reg_map[SLJIT_NUMBER_OF_REGISTERS + 5] = { 93 0, 0, 6, 1, 8, 11, 10, 12, 5, 13, 14, 15, 3, 4, 2, 7, 9 94 }; 95 /* low-map. reg_map & 0x7. */ 96 static SLJIT_CONST sljit_ub reg_lmap[SLJIT_NUMBER_OF_REGISTERS + 5] = { 97 0, 0, 6, 1, 0, 3, 2, 4, 5, 5, 6, 7, 3, 4, 2, 7, 1 98 }; 99 #else 100 /* 1st passed in rcx, 2nd argument passed in rdx, 3rd in r8. */ 101 static SLJIT_CONST sljit_ub reg_map[SLJIT_NUMBER_OF_REGISTERS + 5] = { 102 0, 0, 2, 1, 11, 12, 5, 13, 14, 15, 7, 6, 3, 4, 10, 8, 9 103 }; 104 /* low-map. reg_map & 0x7. */ 105 static SLJIT_CONST sljit_ub reg_lmap[SLJIT_NUMBER_OF_REGISTERS + 5] = { 106 0, 0, 2, 1, 3, 4, 5, 5, 6, 7, 7, 6, 3, 4, 2, 0, 1 107 }; 108 #endif 109 110 #define REX_W 0x48 111 #define REX_R 0x44 112 #define REX_X 0x42 113 #define REX_B 0x41 114 #define REX 0x40 115 116 #ifndef _WIN64 117 #define HALFWORD_MAX 0x7fffffffl 118 #define HALFWORD_MIN -0x80000000l 119 #else 120 #define HALFWORD_MAX 0x7fffffffll 121 #define HALFWORD_MIN -0x80000000ll 122 #endif 123 124 #define IS_HALFWORD(x) ((x) <= HALFWORD_MAX && (x) >= HALFWORD_MIN) 125 #define NOT_HALFWORD(x) ((x) > HALFWORD_MAX || (x) < HALFWORD_MIN) 126 127 #define CHECK_EXTRA_REGS(p, w, do) 128 129 #endif /* SLJIT_CONFIG_X86_32 */ 130 131 #define TMP_FREG (0) 132 133 /* Size flags for emit_x86_instruction: */ 134 #define EX86_BIN_INS 0x0010 135 #define EX86_SHIFT_INS 0x0020 136 #define EX86_REX 0x0040 137 #define EX86_NO_REXW 0x0080 138 #define EX86_BYTE_ARG 0x0100 139 #define EX86_HALF_ARG 0x0200 140 #define EX86_PREF_66 0x0400 141 #define EX86_PREF_F2 0x0800 142 #define EX86_PREF_F3 0x1000 143 #define EX86_SSE2_OP1 0x2000 144 #define EX86_SSE2_OP2 0x4000 145 #define EX86_SSE2 (EX86_SSE2_OP1 | EX86_SSE2_OP2) 146 147 /* --------------------------------------------------------------------- */ 148 /* Instrucion forms */ 149 /* --------------------------------------------------------------------- */ 150 151 #define ADD (/* BINARY */ 0 << 3) 152 #define ADD_EAX_i32 0x05 153 #define ADD_r_rm 0x03 154 #define ADD_rm_r 0x01 155 #define ADDSD_x_xm 0x58 156 #define ADC (/* BINARY */ 2 << 3) 157 #define ADC_EAX_i32 0x15 158 #define ADC_r_rm 0x13 159 #define ADC_rm_r 0x11 160 #define AND (/* BINARY */ 4 << 3) 161 #define AND_EAX_i32 0x25 162 #define AND_r_rm 0x23 163 #define AND_rm_r 0x21 164 #define ANDPD_x_xm 0x54 165 #define BSR_r_rm (/* GROUP_0F */ 0xbd) 166 #define CALL_i32 0xe8 167 #define CALL_rm (/* GROUP_FF */ 2 << 3) 168 #define CDQ 0x99 169 #define CMOVNE_r_rm (/* GROUP_0F */ 0x45) 170 #define CMP (/* BINARY */ 7 << 3) 171 #define CMP_EAX_i32 0x3d 172 #define CMP_r_rm 0x3b 173 #define CMP_rm_r 0x39 174 #define CVTPD2PS_x_xm 0x5a 175 #define CVTSI2SD_x_rm 0x2a 176 #define CVTTSD2SI_r_xm 0x2c 177 #define DIV (/* GROUP_F7 */ 6 << 3) 178 #define DIVSD_x_xm 0x5e 179 #define INT3 0xcc 180 #define IDIV (/* GROUP_F7 */ 7 << 3) 181 #define IMUL (/* GROUP_F7 */ 5 << 3) 182 #define IMUL_r_rm (/* GROUP_0F */ 0xaf) 183 #define IMUL_r_rm_i8 0x6b 184 #define IMUL_r_rm_i32 0x69 185 #define JE_i8 0x74 186 #define JNE_i8 0x75 187 #define JMP_i8 0xeb 188 #define JMP_i32 0xe9 189 #define JMP_rm (/* GROUP_FF */ 4 << 3) 190 #define LEA_r_m 0x8d 191 #define MOV_r_rm 0x8b 192 #define MOV_r_i32 0xb8 193 #define MOV_rm_r 0x89 194 #define MOV_rm_i32 0xc7 195 #define MOV_rm8_i8 0xc6 196 #define MOV_rm8_r8 0x88 197 #define MOVSD_x_xm 0x10 198 #define MOVSD_xm_x 0x11 199 #define MOVSXD_r_rm 0x63 200 #define MOVSX_r_rm8 (/* GROUP_0F */ 0xbe) 201 #define MOVSX_r_rm16 (/* GROUP_0F */ 0xbf) 202 #define MOVZX_r_rm8 (/* GROUP_0F */ 0xb6) 203 #define MOVZX_r_rm16 (/* GROUP_0F */ 0xb7) 204 #define MUL (/* GROUP_F7 */ 4 << 3) 205 #define MULSD_x_xm 0x59 206 #define NEG_rm (/* GROUP_F7 */ 3 << 3) 207 #define NOP 0x90 208 #define NOT_rm (/* GROUP_F7 */ 2 << 3) 209 #define OR (/* BINARY */ 1 << 3) 210 #define OR_r_rm 0x0b 211 #define OR_EAX_i32 0x0d 212 #define OR_rm_r 0x09 213 #define OR_rm8_r8 0x08 214 #define POP_r 0x58 215 #define POP_rm 0x8f 216 #define POPF 0x9d 217 #define PUSH_i32 0x68 218 #define PUSH_r 0x50 219 #define PUSH_rm (/* GROUP_FF */ 6 << 3) 220 #define PUSHF 0x9c 221 #define RET_near 0xc3 222 #define RET_i16 0xc2 223 #define SBB (/* BINARY */ 3 << 3) 224 #define SBB_EAX_i32 0x1d 225 #define SBB_r_rm 0x1b 226 #define SBB_rm_r 0x19 227 #define SAR (/* SHIFT */ 7 << 3) 228 #define SHL (/* SHIFT */ 4 << 3) 229 #define SHR (/* SHIFT */ 5 << 3) 230 #define SUB (/* BINARY */ 5 << 3) 231 #define SUB_EAX_i32 0x2d 232 #define SUB_r_rm 0x2b 233 #define SUB_rm_r 0x29 234 #define SUBSD_x_xm 0x5c 235 #define TEST_EAX_i32 0xa9 236 #define TEST_rm_r 0x85 237 #define UCOMISD_x_xm 0x2e 238 #define UNPCKLPD_x_xm 0x14 239 #define XCHG_EAX_r 0x90 240 #define XCHG_r_rm 0x87 241 #define XOR (/* BINARY */ 6 << 3) 242 #define XOR_EAX_i32 0x35 243 #define XOR_r_rm 0x33 244 #define XOR_rm_r 0x31 245 #define XORPD_x_xm 0x57 246 247 #define GROUP_0F 0x0f 248 #define GROUP_F7 0xf7 249 #define GROUP_FF 0xff 250 #define GROUP_BINARY_81 0x81 251 #define GROUP_BINARY_83 0x83 252 #define GROUP_SHIFT_1 0xd1 253 #define GROUP_SHIFT_N 0xc1 254 #define GROUP_SHIFT_CL 0xd3 255 256 #define MOD_REG 0xc0 257 #define MOD_DISP8 0x40 258 259 #define INC_SIZE(s) (*inst++ = (s), compiler->size += (s)) 260 261 #define PUSH_REG(r) (*inst++ = (PUSH_r + (r))) 262 #define POP_REG(r) (*inst++ = (POP_r + (r))) 263 #define RET() (*inst++ = (RET_near)) 264 #define RET_I16(n) (*inst++ = (RET_i16), *inst++ = n, *inst++ = 0) 265 /* r32, r/m32 */ 266 #define MOV_RM(mod, reg, rm) (*inst++ = (MOV_r_rm), *inst++ = (mod) << 6 | (reg) << 3 | (rm)) 267 268 /* Multithreading does not affect these static variables, since they store 269 built-in CPU features. Therefore they can be overwritten by different threads 270 if they detect the CPU features in the same time. */ 271 #if (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2) 272 static sljit_si cpu_has_sse2 = -1; 273 #endif 274 static sljit_si cpu_has_cmov = -1; 275 276 #ifdef _WIN32_WCE 277 #include <cmnintrin.h> 278 #elif defined(_MSC_VER) && _MSC_VER >= 1400 279 #include <intrin.h> 280 #endif 281 282 static void get_cpu_features(void) 283 { 284 sljit_ui features; 285 286 #if defined(_MSC_VER) && _MSC_VER >= 1400 287 288 int CPUInfo[4]; 289 __cpuid(CPUInfo, 1); 290 features = (sljit_ui)CPUInfo[3]; 291 292 #elif defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__SUNPRO_C) 293 294 /* AT&T syntax. */ 295 __asm__ ( 296 "movl $0x1, %%eax\n" 297 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 298 /* On x86-32, there is no red zone, so this 299 should work (no need for a local variable). */ 300 "push %%ebx\n" 301 #endif 302 "cpuid\n" 303 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 304 "pop %%ebx\n" 305 #endif 306 "movl %%edx, %0\n" 307 : "=g" (features) 308 : 309 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 310 : "%eax", "%ecx", "%edx" 311 #else 312 : "%rax", "%rbx", "%rcx", "%rdx" 313 #endif 314 ); 315 316 #else /* _MSC_VER && _MSC_VER >= 1400 */ 317 318 /* Intel syntax. */ 319 __asm { 320 mov eax, 1 321 cpuid 322 mov features, edx 323 } 324 325 #endif /* _MSC_VER && _MSC_VER >= 1400 */ 326 327 #if (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2) 328 cpu_has_sse2 = (features >> 26) & 0x1; 329 #endif 330 cpu_has_cmov = (features >> 15) & 0x1; 331 } 332 333 static sljit_ub get_jump_code(sljit_si type) 334 { 335 switch (type) { 336 case SLJIT_EQUAL: 337 case SLJIT_D_EQUAL: 338 return 0x84 /* je */; 339 340 case SLJIT_NOT_EQUAL: 341 case SLJIT_D_NOT_EQUAL: 342 return 0x85 /* jne */; 343 344 case SLJIT_LESS: 345 case SLJIT_D_LESS: 346 return 0x82 /* jc */; 347 348 case SLJIT_GREATER_EQUAL: 349 case SLJIT_D_GREATER_EQUAL: 350 return 0x83 /* jae */; 351 352 case SLJIT_GREATER: 353 case SLJIT_D_GREATER: 354 return 0x87 /* jnbe */; 355 356 case SLJIT_LESS_EQUAL: 357 case SLJIT_D_LESS_EQUAL: 358 return 0x86 /* jbe */; 359 360 case SLJIT_SIG_LESS: 361 return 0x8c /* jl */; 362 363 case SLJIT_SIG_GREATER_EQUAL: 364 return 0x8d /* jnl */; 365 366 case SLJIT_SIG_GREATER: 367 return 0x8f /* jnle */; 368 369 case SLJIT_SIG_LESS_EQUAL: 370 return 0x8e /* jle */; 371 372 case SLJIT_OVERFLOW: 373 case SLJIT_MUL_OVERFLOW: 374 return 0x80 /* jo */; 375 376 case SLJIT_NOT_OVERFLOW: 377 case SLJIT_MUL_NOT_OVERFLOW: 378 return 0x81 /* jno */; 379 380 case SLJIT_D_UNORDERED: 381 return 0x8a /* jp */; 382 383 case SLJIT_D_ORDERED: 384 return 0x8b /* jpo */; 385 } 386 return 0; 387 } 388 389 static sljit_ub* generate_far_jump_code(struct sljit_jump *jump, sljit_ub *code_ptr, sljit_si type); 390 391 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 392 static sljit_ub* generate_fixed_jump(sljit_ub *code_ptr, sljit_sw addr, sljit_si type); 393 #endif 394 395 static sljit_ub* generate_near_jump_code(struct sljit_jump *jump, sljit_ub *code_ptr, sljit_ub *code, sljit_si type) 396 { 397 sljit_si short_jump; 398 sljit_uw label_addr; 399 400 if (jump->flags & JUMP_LABEL) 401 label_addr = (sljit_uw)(code + jump->u.label->size); 402 else 403 label_addr = jump->u.target; 404 short_jump = (sljit_sw)(label_addr - (jump->addr + 2)) >= -128 && (sljit_sw)(label_addr - (jump->addr + 2)) <= 127; 405 406 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 407 if ((sljit_sw)(label_addr - (jump->addr + 1)) > HALFWORD_MAX || (sljit_sw)(label_addr - (jump->addr + 1)) < HALFWORD_MIN) 408 return generate_far_jump_code(jump, code_ptr, type); 409 #endif 410 411 if (type == SLJIT_JUMP) { 412 if (short_jump) 413 *code_ptr++ = JMP_i8; 414 else 415 *code_ptr++ = JMP_i32; 416 jump->addr++; 417 } 418 else if (type >= SLJIT_FAST_CALL) { 419 short_jump = 0; 420 *code_ptr++ = CALL_i32; 421 jump->addr++; 422 } 423 else if (short_jump) { 424 *code_ptr++ = get_jump_code(type) - 0x10; 425 jump->addr++; 426 } 427 else { 428 *code_ptr++ = GROUP_0F; 429 *code_ptr++ = get_jump_code(type); 430 jump->addr += 2; 431 } 432 433 if (short_jump) { 434 jump->flags |= PATCH_MB; 435 code_ptr += sizeof(sljit_sb); 436 } else { 437 jump->flags |= PATCH_MW; 438 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 439 code_ptr += sizeof(sljit_sw); 440 #else 441 code_ptr += sizeof(sljit_si); 442 #endif 443 } 444 445 return code_ptr; 446 } 447 448 SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler) 449 { 450 struct sljit_memory_fragment *buf; 451 sljit_ub *code; 452 sljit_ub *code_ptr; 453 sljit_ub *buf_ptr; 454 sljit_ub *buf_end; 455 sljit_ub len; 456 457 struct sljit_label *label; 458 struct sljit_jump *jump; 459 struct sljit_const *const_; 460 461 CHECK_ERROR_PTR(); 462 CHECK_PTR(check_sljit_generate_code(compiler)); 463 reverse_buf(compiler); 464 465 /* Second code generation pass. */ 466 code = (sljit_ub*)SLJIT_MALLOC_EXEC(compiler->size); 467 PTR_FAIL_WITH_EXEC_IF(code); 468 buf = compiler->buf; 469 470 code_ptr = code; 471 label = compiler->labels; 472 jump = compiler->jumps; 473 const_ = compiler->consts; 474 do { 475 buf_ptr = buf->memory; 476 buf_end = buf_ptr + buf->used_size; 477 do { 478 len = *buf_ptr++; 479 if (len > 0) { 480 /* The code is already generated. */ 481 SLJIT_MEMMOVE(code_ptr, buf_ptr, len); 482 code_ptr += len; 483 buf_ptr += len; 484 } 485 else { 486 if (*buf_ptr >= 4) { 487 jump->addr = (sljit_uw)code_ptr; 488 if (!(jump->flags & SLJIT_REWRITABLE_JUMP)) 489 code_ptr = generate_near_jump_code(jump, code_ptr, code, *buf_ptr - 4); 490 else 491 code_ptr = generate_far_jump_code(jump, code_ptr, *buf_ptr - 4); 492 jump = jump->next; 493 } 494 else if (*buf_ptr == 0) { 495 label->addr = (sljit_uw)code_ptr; 496 label->size = code_ptr - code; 497 label = label->next; 498 } 499 else if (*buf_ptr == 1) { 500 const_->addr = ((sljit_uw)code_ptr) - sizeof(sljit_sw); 501 const_ = const_->next; 502 } 503 else { 504 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 505 *code_ptr++ = (*buf_ptr == 2) ? CALL_i32 : JMP_i32; 506 buf_ptr++; 507 *(sljit_sw*)code_ptr = *(sljit_sw*)buf_ptr - ((sljit_sw)code_ptr + sizeof(sljit_sw)); 508 code_ptr += sizeof(sljit_sw); 509 buf_ptr += sizeof(sljit_sw) - 1; 510 #else 511 code_ptr = generate_fixed_jump(code_ptr, *(sljit_sw*)(buf_ptr + 1), *buf_ptr); 512 buf_ptr += sizeof(sljit_sw); 513 #endif 514 } 515 buf_ptr++; 516 } 517 } while (buf_ptr < buf_end); 518 SLJIT_ASSERT(buf_ptr == buf_end); 519 buf = buf->next; 520 } while (buf); 521 522 SLJIT_ASSERT(!label); 523 SLJIT_ASSERT(!jump); 524 SLJIT_ASSERT(!const_); 525 526 jump = compiler->jumps; 527 while (jump) { 528 if (jump->flags & PATCH_MB) { 529 SLJIT_ASSERT((sljit_sw)(jump->u.label->addr - (jump->addr + sizeof(sljit_sb))) >= -128 && (sljit_sw)(jump->u.label->addr - (jump->addr + sizeof(sljit_sb))) <= 127); 530 *(sljit_ub*)jump->addr = (sljit_ub)(jump->u.label->addr - (jump->addr + sizeof(sljit_sb))); 531 } else if (jump->flags & PATCH_MW) { 532 if (jump->flags & JUMP_LABEL) { 533 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 534 *(sljit_sw*)jump->addr = (sljit_sw)(jump->u.label->addr - (jump->addr + sizeof(sljit_sw))); 535 #else 536 SLJIT_ASSERT((sljit_sw)(jump->u.label->addr - (jump->addr + sizeof(sljit_si))) >= HALFWORD_MIN && (sljit_sw)(jump->u.label->addr - (jump->addr + sizeof(sljit_si))) <= HALFWORD_MAX); 537 *(sljit_si*)jump->addr = (sljit_si)(jump->u.label->addr - (jump->addr + sizeof(sljit_si))); 538 #endif 539 } 540 else { 541 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 542 *(sljit_sw*)jump->addr = (sljit_sw)(jump->u.target - (jump->addr + sizeof(sljit_sw))); 543 #else 544 SLJIT_ASSERT((sljit_sw)(jump->u.target - (jump->addr + sizeof(sljit_si))) >= HALFWORD_MIN && (sljit_sw)(jump->u.target - (jump->addr + sizeof(sljit_si))) <= HALFWORD_MAX); 545 *(sljit_si*)jump->addr = (sljit_si)(jump->u.target - (jump->addr + sizeof(sljit_si))); 546 #endif 547 } 548 } 549 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 550 else if (jump->flags & PATCH_MD) 551 *(sljit_sw*)jump->addr = jump->u.label->addr; 552 #endif 553 554 jump = jump->next; 555 } 556 557 /* Maybe we waste some space because of short jumps. */ 558 SLJIT_ASSERT(code_ptr <= code + compiler->size); 559 compiler->error = SLJIT_ERR_COMPILED; 560 compiler->executable_size = code_ptr - code; 561 return (void*)code; 562 } 563 564 /* --------------------------------------------------------------------- */ 565 /* Operators */ 566 /* --------------------------------------------------------------------- */ 567 568 static sljit_si emit_cum_binary(struct sljit_compiler *compiler, 569 sljit_ub op_rm, sljit_ub op_mr, sljit_ub op_imm, sljit_ub op_eax_imm, 570 sljit_si dst, sljit_sw dstw, 571 sljit_si src1, sljit_sw src1w, 572 sljit_si src2, sljit_sw src2w); 573 574 static sljit_si emit_non_cum_binary(struct sljit_compiler *compiler, 575 sljit_ub op_rm, sljit_ub op_mr, sljit_ub op_imm, sljit_ub op_eax_imm, 576 sljit_si dst, sljit_sw dstw, 577 sljit_si src1, sljit_sw src1w, 578 sljit_si src2, sljit_sw src2w); 579 580 static sljit_si emit_mov(struct sljit_compiler *compiler, 581 sljit_si dst, sljit_sw dstw, 582 sljit_si src, sljit_sw srcw); 583 584 static SLJIT_INLINE sljit_si emit_save_flags(struct sljit_compiler *compiler) 585 { 586 sljit_ub *inst; 587 588 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 589 inst = (sljit_ub*)ensure_buf(compiler, 1 + 5); 590 FAIL_IF(!inst); 591 INC_SIZE(5); 592 #else 593 inst = (sljit_ub*)ensure_buf(compiler, 1 + 6); 594 FAIL_IF(!inst); 595 INC_SIZE(6); 596 *inst++ = REX_W; 597 #endif 598 *inst++ = LEA_r_m; /* lea esp/rsp, [esp/rsp + sizeof(sljit_sw)] */ 599 *inst++ = 0x64; 600 *inst++ = 0x24; 601 *inst++ = (sljit_ub)sizeof(sljit_sw); 602 *inst++ = PUSHF; 603 compiler->flags_saved = 1; 604 return SLJIT_SUCCESS; 605 } 606 607 static SLJIT_INLINE sljit_si emit_restore_flags(struct sljit_compiler *compiler, sljit_si keep_flags) 608 { 609 sljit_ub *inst; 610 611 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 612 inst = (sljit_ub*)ensure_buf(compiler, 1 + 5); 613 FAIL_IF(!inst); 614 INC_SIZE(5); 615 *inst++ = POPF; 616 #else 617 inst = (sljit_ub*)ensure_buf(compiler, 1 + 6); 618 FAIL_IF(!inst); 619 INC_SIZE(6); 620 *inst++ = POPF; 621 *inst++ = REX_W; 622 #endif 623 *inst++ = LEA_r_m; /* lea esp/rsp, [esp/rsp - sizeof(sljit_sw)] */ 624 *inst++ = 0x64; 625 *inst++ = 0x24; 626 *inst++ = (sljit_ub)-(sljit_sb)sizeof(sljit_sw); 627 compiler->flags_saved = keep_flags; 628 return SLJIT_SUCCESS; 629 } 630 631 #ifdef _WIN32 632 #include <malloc.h> 633 634 static void SLJIT_CALL sljit_grow_stack(sljit_sw local_size) 635 { 636 /* Workaround for calling the internal _chkstk() function on Windows. 637 This function touches all 4k pages belongs to the requested stack space, 638 which size is passed in local_size. This is necessary on Windows where 639 the stack can only grow in 4k steps. However, this function just burn 640 CPU cycles if the stack is large enough. However, you don't know it in 641 advance, so it must always be called. I think this is a bad design in 642 general even if it has some reasons. */ 643 *(volatile sljit_si*)alloca(local_size) = 0; 644 } 645 646 #endif 647 648 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 649 #include "sljitNativeX86_32.c" 650 #else 651 #include "sljitNativeX86_64.c" 652 #endif 653 654 static sljit_si emit_mov(struct sljit_compiler *compiler, 655 sljit_si dst, sljit_sw dstw, 656 sljit_si src, sljit_sw srcw) 657 { 658 sljit_ub* inst; 659 660 if (dst == SLJIT_UNUSED) { 661 /* No destination, doesn't need to setup flags. */ 662 if (src & SLJIT_MEM) { 663 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src, srcw); 664 FAIL_IF(!inst); 665 *inst = MOV_r_rm; 666 } 667 return SLJIT_SUCCESS; 668 } 669 if (FAST_IS_REG(src)) { 670 inst = emit_x86_instruction(compiler, 1, src, 0, dst, dstw); 671 FAIL_IF(!inst); 672 *inst = MOV_rm_r; 673 return SLJIT_SUCCESS; 674 } 675 if (src & SLJIT_IMM) { 676 if (FAST_IS_REG(dst)) { 677 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 678 return emit_do_imm(compiler, MOV_r_i32 + reg_map[dst], srcw); 679 #else 680 if (!compiler->mode32) { 681 if (NOT_HALFWORD(srcw)) 682 return emit_load_imm64(compiler, dst, srcw); 683 } 684 else 685 return emit_do_imm32(compiler, (reg_map[dst] >= 8) ? REX_B : 0, MOV_r_i32 + reg_lmap[dst], srcw); 686 #endif 687 } 688 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 689 if (!compiler->mode32 && NOT_HALFWORD(srcw)) { 690 FAIL_IF(emit_load_imm64(compiler, TMP_REG2, srcw)); 691 inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, dst, dstw); 692 FAIL_IF(!inst); 693 *inst = MOV_rm_r; 694 return SLJIT_SUCCESS; 695 } 696 #endif 697 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, srcw, dst, dstw); 698 FAIL_IF(!inst); 699 *inst = MOV_rm_i32; 700 return SLJIT_SUCCESS; 701 } 702 if (FAST_IS_REG(dst)) { 703 inst = emit_x86_instruction(compiler, 1, dst, 0, src, srcw); 704 FAIL_IF(!inst); 705 *inst = MOV_r_rm; 706 return SLJIT_SUCCESS; 707 } 708 709 /* Memory to memory move. Requires two instruction. */ 710 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src, srcw); 711 FAIL_IF(!inst); 712 *inst = MOV_r_rm; 713 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw); 714 FAIL_IF(!inst); 715 *inst = MOV_rm_r; 716 return SLJIT_SUCCESS; 717 } 718 719 #define EMIT_MOV(compiler, dst, dstw, src, srcw) \ 720 FAIL_IF(emit_mov(compiler, dst, dstw, src, srcw)); 721 722 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op0(struct sljit_compiler *compiler, sljit_si op) 723 { 724 sljit_ub *inst; 725 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 726 sljit_si size; 727 #endif 728 729 CHECK_ERROR(); 730 CHECK(check_sljit_emit_op0(compiler, op)); 731 732 switch (GET_OPCODE(op)) { 733 case SLJIT_BREAKPOINT: 734 inst = (sljit_ub*)ensure_buf(compiler, 1 + 1); 735 FAIL_IF(!inst); 736 INC_SIZE(1); 737 *inst = INT3; 738 break; 739 case SLJIT_NOP: 740 inst = (sljit_ub*)ensure_buf(compiler, 1 + 1); 741 FAIL_IF(!inst); 742 INC_SIZE(1); 743 *inst = NOP; 744 break; 745 case SLJIT_LUMUL: 746 case SLJIT_LSMUL: 747 case SLJIT_UDIVMOD: 748 case SLJIT_SDIVMOD: 749 case SLJIT_UDIVI: 750 case SLJIT_SDIVI: 751 compiler->flags_saved = 0; 752 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 753 #ifdef _WIN64 754 SLJIT_COMPILE_ASSERT( 755 reg_map[SLJIT_R0] == 0 756 && reg_map[SLJIT_R1] == 2 757 && reg_map[TMP_REG1] > 7, 758 invalid_register_assignment_for_div_mul); 759 #else 760 SLJIT_COMPILE_ASSERT( 761 reg_map[SLJIT_R0] == 0 762 && reg_map[SLJIT_R1] < 7 763 && reg_map[TMP_REG1] == 2, 764 invalid_register_assignment_for_div_mul); 765 #endif 766 compiler->mode32 = op & SLJIT_INT_OP; 767 #endif 768 SLJIT_COMPILE_ASSERT((SLJIT_UDIVMOD & 0x2) == 0 && SLJIT_UDIVI - 0x2 == SLJIT_UDIVMOD, bad_div_opcode_assignments); 769 770 op = GET_OPCODE(op); 771 if ((op | 0x2) == SLJIT_UDIVI) { 772 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || defined(_WIN64) 773 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_R1, 0); 774 inst = emit_x86_instruction(compiler, 1, SLJIT_R1, 0, SLJIT_R1, 0); 775 #else 776 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, TMP_REG1, 0); 777 #endif 778 FAIL_IF(!inst); 779 *inst = XOR_r_rm; 780 } 781 782 if ((op | 0x2) == SLJIT_SDIVI) { 783 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || defined(_WIN64) 784 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_R1, 0); 785 #endif 786 787 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 788 inst = (sljit_ub*)ensure_buf(compiler, 1 + 1); 789 FAIL_IF(!inst); 790 INC_SIZE(1); 791 *inst = CDQ; 792 #else 793 if (compiler->mode32) { 794 inst = (sljit_ub*)ensure_buf(compiler, 1 + 1); 795 FAIL_IF(!inst); 796 INC_SIZE(1); 797 *inst = CDQ; 798 } else { 799 inst = (sljit_ub*)ensure_buf(compiler, 1 + 2); 800 FAIL_IF(!inst); 801 INC_SIZE(2); 802 *inst++ = REX_W; 803 *inst = CDQ; 804 } 805 #endif 806 } 807 808 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 809 inst = (sljit_ub*)ensure_buf(compiler, 1 + 2); 810 FAIL_IF(!inst); 811 INC_SIZE(2); 812 *inst++ = GROUP_F7; 813 *inst = MOD_REG | ((op >= SLJIT_UDIVMOD) ? reg_map[TMP_REG1] : reg_map[SLJIT_R1]); 814 #else 815 #ifdef _WIN64 816 size = (!compiler->mode32 || op >= SLJIT_UDIVMOD) ? 3 : 2; 817 #else 818 size = (!compiler->mode32) ? 3 : 2; 819 #endif 820 inst = (sljit_ub*)ensure_buf(compiler, 1 + size); 821 FAIL_IF(!inst); 822 INC_SIZE(size); 823 #ifdef _WIN64 824 if (!compiler->mode32) 825 *inst++ = REX_W | ((op >= SLJIT_UDIVMOD) ? REX_B : 0); 826 else if (op >= SLJIT_UDIVMOD) 827 *inst++ = REX_B; 828 *inst++ = GROUP_F7; 829 *inst = MOD_REG | ((op >= SLJIT_UDIVMOD) ? reg_lmap[TMP_REG1] : reg_lmap[SLJIT_R1]); 830 #else 831 if (!compiler->mode32) 832 *inst++ = REX_W; 833 *inst++ = GROUP_F7; 834 *inst = MOD_REG | reg_map[SLJIT_R1]; 835 #endif 836 #endif 837 switch (op) { 838 case SLJIT_LUMUL: 839 *inst |= MUL; 840 break; 841 case SLJIT_LSMUL: 842 *inst |= IMUL; 843 break; 844 case SLJIT_UDIVMOD: 845 case SLJIT_UDIVI: 846 *inst |= DIV; 847 break; 848 case SLJIT_SDIVMOD: 849 case SLJIT_SDIVI: 850 *inst |= IDIV; 851 break; 852 } 853 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) && !defined(_WIN64) 854 if (op <= SLJIT_SDIVMOD) 855 EMIT_MOV(compiler, SLJIT_R1, 0, TMP_REG1, 0); 856 #else 857 if (op >= SLJIT_UDIVI) 858 EMIT_MOV(compiler, SLJIT_R1, 0, TMP_REG1, 0); 859 #endif 860 break; 861 } 862 863 return SLJIT_SUCCESS; 864 } 865 866 #define ENCODE_PREFIX(prefix) \ 867 do { \ 868 inst = (sljit_ub*)ensure_buf(compiler, 1 + 1); \ 869 FAIL_IF(!inst); \ 870 INC_SIZE(1); \ 871 *inst = (prefix); \ 872 } while (0) 873 874 static sljit_si emit_mov_byte(struct sljit_compiler *compiler, sljit_si sign, 875 sljit_si dst, sljit_sw dstw, 876 sljit_si src, sljit_sw srcw) 877 { 878 sljit_ub* inst; 879 sljit_si dst_r; 880 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 881 sljit_si work_r; 882 #endif 883 884 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 885 compiler->mode32 = 0; 886 #endif 887 888 if (dst == SLJIT_UNUSED && !(src & SLJIT_MEM)) 889 return SLJIT_SUCCESS; /* Empty instruction. */ 890 891 if (src & SLJIT_IMM) { 892 if (FAST_IS_REG(dst)) { 893 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 894 return emit_do_imm(compiler, MOV_r_i32 + reg_map[dst], srcw); 895 #else 896 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, srcw, dst, 0); 897 FAIL_IF(!inst); 898 *inst = MOV_rm_i32; 899 return SLJIT_SUCCESS; 900 #endif 901 } 902 inst = emit_x86_instruction(compiler, 1 | EX86_BYTE_ARG | EX86_NO_REXW, SLJIT_IMM, srcw, dst, dstw); 903 FAIL_IF(!inst); 904 *inst = MOV_rm8_i8; 905 return SLJIT_SUCCESS; 906 } 907 908 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1; 909 910 if ((dst & SLJIT_MEM) && FAST_IS_REG(src)) { 911 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 912 if (reg_map[src] >= 4) { 913 SLJIT_ASSERT(dst_r == TMP_REG1); 914 EMIT_MOV(compiler, TMP_REG1, 0, src, 0); 915 } else 916 dst_r = src; 917 #else 918 dst_r = src; 919 #endif 920 } 921 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 922 else if (FAST_IS_REG(src) && reg_map[src] >= 4) { 923 /* src, dst are registers. */ 924 SLJIT_ASSERT(SLOW_IS_REG(dst)); 925 if (reg_map[dst] < 4) { 926 if (dst != src) 927 EMIT_MOV(compiler, dst, 0, src, 0); 928 inst = emit_x86_instruction(compiler, 2, dst, 0, dst, 0); 929 FAIL_IF(!inst); 930 *inst++ = GROUP_0F; 931 *inst = sign ? MOVSX_r_rm8 : MOVZX_r_rm8; 932 } 933 else { 934 if (dst != src) 935 EMIT_MOV(compiler, dst, 0, src, 0); 936 if (sign) { 937 /* shl reg, 24 */ 938 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 24, dst, 0); 939 FAIL_IF(!inst); 940 *inst |= SHL; 941 /* sar reg, 24 */ 942 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 24, dst, 0); 943 FAIL_IF(!inst); 944 *inst |= SAR; 945 } 946 else { 947 inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, 0xff, dst, 0); 948 FAIL_IF(!inst); 949 *(inst + 1) |= AND; 950 } 951 } 952 return SLJIT_SUCCESS; 953 } 954 #endif 955 else { 956 /* src can be memory addr or reg_map[src] < 4 on x86_32 architectures. */ 957 inst = emit_x86_instruction(compiler, 2, dst_r, 0, src, srcw); 958 FAIL_IF(!inst); 959 *inst++ = GROUP_0F; 960 *inst = sign ? MOVSX_r_rm8 : MOVZX_r_rm8; 961 } 962 963 if (dst & SLJIT_MEM) { 964 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 965 if (dst_r == TMP_REG1) { 966 /* Find a non-used register, whose reg_map[src] < 4. */ 967 if ((dst & REG_MASK) == SLJIT_R0) { 968 if ((dst & OFFS_REG_MASK) == TO_OFFS_REG(SLJIT_R1)) 969 work_r = SLJIT_R2; 970 else 971 work_r = SLJIT_R1; 972 } 973 else { 974 if ((dst & OFFS_REG_MASK) != TO_OFFS_REG(SLJIT_R0)) 975 work_r = SLJIT_R0; 976 else if ((dst & REG_MASK) == SLJIT_R1) 977 work_r = SLJIT_R2; 978 else 979 work_r = SLJIT_R1; 980 } 981 982 if (work_r == SLJIT_R0) { 983 ENCODE_PREFIX(XCHG_EAX_r + reg_map[TMP_REG1]); 984 } 985 else { 986 inst = emit_x86_instruction(compiler, 1, work_r, 0, dst_r, 0); 987 FAIL_IF(!inst); 988 *inst = XCHG_r_rm; 989 } 990 991 inst = emit_x86_instruction(compiler, 1, work_r, 0, dst, dstw); 992 FAIL_IF(!inst); 993 *inst = MOV_rm8_r8; 994 995 if (work_r == SLJIT_R0) { 996 ENCODE_PREFIX(XCHG_EAX_r + reg_map[TMP_REG1]); 997 } 998 else { 999 inst = emit_x86_instruction(compiler, 1, work_r, 0, dst_r, 0); 1000 FAIL_IF(!inst); 1001 *inst = XCHG_r_rm; 1002 } 1003 } 1004 else { 1005 inst = emit_x86_instruction(compiler, 1, dst_r, 0, dst, dstw); 1006 FAIL_IF(!inst); 1007 *inst = MOV_rm8_r8; 1008 } 1009 #else 1010 inst = emit_x86_instruction(compiler, 1 | EX86_REX | EX86_NO_REXW, dst_r, 0, dst, dstw); 1011 FAIL_IF(!inst); 1012 *inst = MOV_rm8_r8; 1013 #endif 1014 } 1015 1016 return SLJIT_SUCCESS; 1017 } 1018 1019 static sljit_si emit_mov_half(struct sljit_compiler *compiler, sljit_si sign, 1020 sljit_si dst, sljit_sw dstw, 1021 sljit_si src, sljit_sw srcw) 1022 { 1023 sljit_ub* inst; 1024 sljit_si dst_r; 1025 1026 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 1027 compiler->mode32 = 0; 1028 #endif 1029 1030 if (dst == SLJIT_UNUSED && !(src & SLJIT_MEM)) 1031 return SLJIT_SUCCESS; /* Empty instruction. */ 1032 1033 if (src & SLJIT_IMM) { 1034 if (FAST_IS_REG(dst)) { 1035 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 1036 return emit_do_imm(compiler, MOV_r_i32 + reg_map[dst], srcw); 1037 #else 1038 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, srcw, dst, 0); 1039 FAIL_IF(!inst); 1040 *inst = MOV_rm_i32; 1041 return SLJIT_SUCCESS; 1042 #endif 1043 } 1044 inst = emit_x86_instruction(compiler, 1 | EX86_HALF_ARG | EX86_NO_REXW | EX86_PREF_66, SLJIT_IMM, srcw, dst, dstw); 1045 FAIL_IF(!inst); 1046 *inst = MOV_rm_i32; 1047 return SLJIT_SUCCESS; 1048 } 1049 1050 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1; 1051 1052 if ((dst & SLJIT_MEM) && FAST_IS_REG(src)) 1053 dst_r = src; 1054 else { 1055 inst = emit_x86_instruction(compiler, 2, dst_r, 0, src, srcw); 1056 FAIL_IF(!inst); 1057 *inst++ = GROUP_0F; 1058 *inst = sign ? MOVSX_r_rm16 : MOVZX_r_rm16; 1059 } 1060 1061 if (dst & SLJIT_MEM) { 1062 inst = emit_x86_instruction(compiler, 1 | EX86_NO_REXW | EX86_PREF_66, dst_r, 0, dst, dstw); 1063 FAIL_IF(!inst); 1064 *inst = MOV_rm_r; 1065 } 1066 1067 return SLJIT_SUCCESS; 1068 } 1069 1070 static sljit_si emit_unary(struct sljit_compiler *compiler, sljit_ub opcode, 1071 sljit_si dst, sljit_sw dstw, 1072 sljit_si src, sljit_sw srcw) 1073 { 1074 sljit_ub* inst; 1075 1076 if (dst == SLJIT_UNUSED) { 1077 EMIT_MOV(compiler, TMP_REG1, 0, src, srcw); 1078 inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REG1, 0); 1079 FAIL_IF(!inst); 1080 *inst++ = GROUP_F7; 1081 *inst |= opcode; 1082 return SLJIT_SUCCESS; 1083 } 1084 if (dst == src && dstw == srcw) { 1085 /* Same input and output */ 1086 inst = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw); 1087 FAIL_IF(!inst); 1088 *inst++ = GROUP_F7; 1089 *inst |= opcode; 1090 return SLJIT_SUCCESS; 1091 } 1092 if (FAST_IS_REG(dst)) { 1093 EMIT_MOV(compiler, dst, 0, src, srcw); 1094 inst = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw); 1095 FAIL_IF(!inst); 1096 *inst++ = GROUP_F7; 1097 *inst |= opcode; 1098 return SLJIT_SUCCESS; 1099 } 1100 EMIT_MOV(compiler, TMP_REG1, 0, src, srcw); 1101 inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REG1, 0); 1102 FAIL_IF(!inst); 1103 *inst++ = GROUP_F7; 1104 *inst |= opcode; 1105 EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0); 1106 return SLJIT_SUCCESS; 1107 } 1108 1109 static sljit_si emit_not_with_flags(struct sljit_compiler *compiler, 1110 sljit_si dst, sljit_sw dstw, 1111 sljit_si src, sljit_sw srcw) 1112 { 1113 sljit_ub* inst; 1114 1115 if (dst == SLJIT_UNUSED) { 1116 EMIT_MOV(compiler, TMP_REG1, 0, src, srcw); 1117 inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REG1, 0); 1118 FAIL_IF(!inst); 1119 *inst++ = GROUP_F7; 1120 *inst |= NOT_rm; 1121 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, TMP_REG1, 0); 1122 FAIL_IF(!inst); 1123 *inst = OR_r_rm; 1124 return SLJIT_SUCCESS; 1125 } 1126 if (FAST_IS_REG(dst)) { 1127 EMIT_MOV(compiler, dst, 0, src, srcw); 1128 inst = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw); 1129 FAIL_IF(!inst); 1130 *inst++ = GROUP_F7; 1131 *inst |= NOT_rm; 1132 inst = emit_x86_instruction(compiler, 1, dst, 0, dst, 0); 1133 FAIL_IF(!inst); 1134 *inst = OR_r_rm; 1135 return SLJIT_SUCCESS; 1136 } 1137 EMIT_MOV(compiler, TMP_REG1, 0, src, srcw); 1138 inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REG1, 0); 1139 FAIL_IF(!inst); 1140 *inst++ = GROUP_F7; 1141 *inst |= NOT_rm; 1142 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, TMP_REG1, 0); 1143 FAIL_IF(!inst); 1144 *inst = OR_r_rm; 1145 EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0); 1146 return SLJIT_SUCCESS; 1147 } 1148 1149 static sljit_si emit_clz(struct sljit_compiler *compiler, sljit_si op_flags, 1150 sljit_si dst, sljit_sw dstw, 1151 sljit_si src, sljit_sw srcw) 1152 { 1153 sljit_ub* inst; 1154 sljit_si dst_r; 1155 1156 SLJIT_UNUSED_ARG(op_flags); 1157 if (SLJIT_UNLIKELY(dst == SLJIT_UNUSED)) { 1158 /* Just set the zero flag. */ 1159 EMIT_MOV(compiler, TMP_REG1, 0, src, srcw); 1160 inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REG1, 0); 1161 FAIL_IF(!inst); 1162 *inst++ = GROUP_F7; 1163 *inst |= NOT_rm; 1164 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 1165 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 31, TMP_REG1, 0); 1166 #else 1167 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, !(op_flags & SLJIT_INT_OP) ? 63 : 31, TMP_REG1, 0); 1168 #endif 1169 FAIL_IF(!inst); 1170 *inst |= SHR; 1171 return SLJIT_SUCCESS; 1172 } 1173 1174 if (SLJIT_UNLIKELY(src & SLJIT_IMM)) { 1175 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, srcw); 1176 src = TMP_REG1; 1177 srcw = 0; 1178 } 1179 1180 inst = emit_x86_instruction(compiler, 2, TMP_REG1, 0, src, srcw); 1181 FAIL_IF(!inst); 1182 *inst++ = GROUP_0F; 1183 *inst = BSR_r_rm; 1184 1185 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 1186 if (FAST_IS_REG(dst)) 1187 dst_r = dst; 1188 else { 1189 /* Find an unused temporary register. */ 1190 if ((dst & REG_MASK) != SLJIT_R0 && (dst & OFFS_REG_MASK) != TO_OFFS_REG(SLJIT_R0)) 1191 dst_r = SLJIT_R0; 1192 else if ((dst & REG_MASK) != SLJIT_R1 && (dst & OFFS_REG_MASK) != TO_OFFS_REG(SLJIT_R1)) 1193 dst_r = SLJIT_R1; 1194 else 1195 dst_r = SLJIT_R2; 1196 EMIT_MOV(compiler, dst, dstw, dst_r, 0); 1197 } 1198 EMIT_MOV(compiler, dst_r, 0, SLJIT_IMM, 32 + 31); 1199 #else 1200 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2; 1201 compiler->mode32 = 0; 1202 EMIT_MOV(compiler, dst_r, 0, SLJIT_IMM, !(op_flags & SLJIT_INT_OP) ? 64 + 63 : 32 + 31); 1203 compiler->mode32 = op_flags & SLJIT_INT_OP; 1204 #endif 1205 1206 if (cpu_has_cmov == -1) 1207 get_cpu_features(); 1208 1209 if (cpu_has_cmov) { 1210 inst = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG1, 0); 1211 FAIL_IF(!inst); 1212 *inst++ = GROUP_0F; 1213 *inst = CMOVNE_r_rm; 1214 } else { 1215 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 1216 inst = (sljit_ub*)ensure_buf(compiler, 1 + 4); 1217 FAIL_IF(!inst); 1218 INC_SIZE(4); 1219 1220 *inst++ = JE_i8; 1221 *inst++ = 2; 1222 *inst++ = MOV_r_rm; 1223 *inst++ = MOD_REG | (reg_map[dst_r] << 3) | reg_map[TMP_REG1]; 1224 #else 1225 inst = (sljit_ub*)ensure_buf(compiler, 1 + 5); 1226 FAIL_IF(!inst); 1227 INC_SIZE(5); 1228 1229 *inst++ = JE_i8; 1230 *inst++ = 3; 1231 *inst++ = REX_W | (reg_map[dst_r] >= 8 ? REX_R : 0) | (reg_map[TMP_REG1] >= 8 ? REX_B : 0); 1232 *inst++ = MOV_r_rm; 1233 *inst++ = MOD_REG | (reg_lmap[dst_r] << 3) | reg_lmap[TMP_REG1]; 1234 #endif 1235 } 1236 1237 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 1238 inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, 31, dst_r, 0); 1239 #else 1240 inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, !(op_flags & SLJIT_INT_OP) ? 63 : 31, dst_r, 0); 1241 #endif 1242 FAIL_IF(!inst); 1243 *(inst + 1) |= XOR; 1244 1245 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 1246 if (dst & SLJIT_MEM) { 1247 inst = emit_x86_instruction(compiler, 1, dst_r, 0, dst, dstw); 1248 FAIL_IF(!inst); 1249 *inst = XCHG_r_rm; 1250 } 1251 #else 1252 if (dst & SLJIT_MEM) 1253 EMIT_MOV(compiler, dst, dstw, TMP_REG2, 0); 1254 #endif 1255 return SLJIT_SUCCESS; 1256 } 1257 1258 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op1(struct sljit_compiler *compiler, sljit_si op, 1259 sljit_si dst, sljit_sw dstw, 1260 sljit_si src, sljit_sw srcw) 1261 { 1262 sljit_ub* inst; 1263 sljit_si update = 0; 1264 sljit_si op_flags = GET_ALL_FLAGS(op); 1265 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 1266 sljit_si dst_is_ereg = 0; 1267 sljit_si src_is_ereg = 0; 1268 #else 1269 # define src_is_ereg 0 1270 #endif 1271 1272 CHECK_ERROR(); 1273 CHECK(check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw)); 1274 ADJUST_LOCAL_OFFSET(dst, dstw); 1275 ADJUST_LOCAL_OFFSET(src, srcw); 1276 1277 CHECK_EXTRA_REGS(dst, dstw, dst_is_ereg = 1); 1278 CHECK_EXTRA_REGS(src, srcw, src_is_ereg = 1); 1279 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 1280 compiler->mode32 = op_flags & SLJIT_INT_OP; 1281 #endif 1282 1283 op = GET_OPCODE(op); 1284 if (op >= SLJIT_MOV && op <= SLJIT_MOVU_P) { 1285 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 1286 compiler->mode32 = 0; 1287 #endif 1288 1289 if (op_flags & SLJIT_INT_OP) { 1290 if (FAST_IS_REG(src) && src == dst) { 1291 if (!TYPE_CAST_NEEDED(op)) 1292 return SLJIT_SUCCESS; 1293 } 1294 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 1295 if (op == SLJIT_MOV_SI && (src & SLJIT_MEM)) 1296 op = SLJIT_MOV_UI; 1297 if (op == SLJIT_MOVU_SI && (src & SLJIT_MEM)) 1298 op = SLJIT_MOVU_UI; 1299 if (op == SLJIT_MOV_UI && (src & SLJIT_IMM)) 1300 op = SLJIT_MOV_SI; 1301 if (op == SLJIT_MOVU_UI && (src & SLJIT_IMM)) 1302 op = SLJIT_MOVU_SI; 1303 #endif 1304 } 1305 1306 SLJIT_COMPILE_ASSERT(SLJIT_MOV + 8 == SLJIT_MOVU, movu_offset); 1307 if (op >= SLJIT_MOVU) { 1308 update = 1; 1309 op -= 8; 1310 } 1311 1312 if (src & SLJIT_IMM) { 1313 switch (op) { 1314 case SLJIT_MOV_UB: 1315 srcw = (sljit_ub)srcw; 1316 break; 1317 case SLJIT_MOV_SB: 1318 srcw = (sljit_sb)srcw; 1319 break; 1320 case SLJIT_MOV_UH: 1321 srcw = (sljit_uh)srcw; 1322 break; 1323 case SLJIT_MOV_SH: 1324 srcw = (sljit_sh)srcw; 1325 break; 1326 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 1327 case SLJIT_MOV_UI: 1328 srcw = (sljit_ui)srcw; 1329 break; 1330 case SLJIT_MOV_SI: 1331 srcw = (sljit_si)srcw; 1332 break; 1333 #endif 1334 } 1335 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 1336 if (SLJIT_UNLIKELY(dst_is_ereg)) 1337 return emit_mov(compiler, dst, dstw, src, srcw); 1338 #endif 1339 } 1340 1341 if (SLJIT_UNLIKELY(update) && (src & SLJIT_MEM) && !src_is_ereg && (src & REG_MASK) && (srcw != 0 || (src & OFFS_REG_MASK) != 0)) { 1342 inst = emit_x86_instruction(compiler, 1, src & REG_MASK, 0, src, srcw); 1343 FAIL_IF(!inst); 1344 *inst = LEA_r_m; 1345 src &= SLJIT_MEM | 0xf; 1346 srcw = 0; 1347 } 1348 1349 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 1350 if (SLJIT_UNLIKELY(dst_is_ereg) && (!(op == SLJIT_MOV || op == SLJIT_MOV_UI || op == SLJIT_MOV_SI || op == SLJIT_MOV_P) || (src & SLJIT_MEM))) { 1351 SLJIT_ASSERT(dst == SLJIT_MEM1(SLJIT_SP)); 1352 dst = TMP_REG1; 1353 } 1354 #endif 1355 1356 switch (op) { 1357 case SLJIT_MOV: 1358 case SLJIT_MOV_P: 1359 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 1360 case SLJIT_MOV_UI: 1361 case SLJIT_MOV_SI: 1362 #endif 1363 FAIL_IF(emit_mov(compiler, dst, dstw, src, srcw)); 1364 break; 1365 case SLJIT_MOV_UB: 1366 FAIL_IF(emit_mov_byte(compiler, 0, dst, dstw, src, srcw)); 1367 break; 1368 case SLJIT_MOV_SB: 1369 FAIL_IF(emit_mov_byte(compiler, 1, dst, dstw, src, srcw)); 1370 break; 1371 case SLJIT_MOV_UH: 1372 FAIL_IF(emit_mov_half(compiler, 0, dst, dstw, src, srcw)); 1373 break; 1374 case SLJIT_MOV_SH: 1375 FAIL_IF(emit_mov_half(compiler, 1, dst, dstw, src, srcw)); 1376 break; 1377 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 1378 case SLJIT_MOV_UI: 1379 FAIL_IF(emit_mov_int(compiler, 0, dst, dstw, src, srcw)); 1380 break; 1381 case SLJIT_MOV_SI: 1382 FAIL_IF(emit_mov_int(compiler, 1, dst, dstw, src, srcw)); 1383 break; 1384 #endif 1385 } 1386 1387 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 1388 if (SLJIT_UNLIKELY(dst_is_ereg) && dst == TMP_REG1) 1389 return emit_mov(compiler, SLJIT_MEM1(SLJIT_SP), dstw, TMP_REG1, 0); 1390 #endif 1391 1392 if (SLJIT_UNLIKELY(update) && (dst & SLJIT_MEM) && (dst & REG_MASK) && (dstw != 0 || (dst & OFFS_REG_MASK) != 0)) { 1393 inst = emit_x86_instruction(compiler, 1, dst & REG_MASK, 0, dst, dstw); 1394 FAIL_IF(!inst); 1395 *inst = LEA_r_m; 1396 } 1397 return SLJIT_SUCCESS; 1398 } 1399 1400 if (SLJIT_UNLIKELY(GET_FLAGS(op_flags))) 1401 compiler->flags_saved = 0; 1402 1403 switch (op) { 1404 case SLJIT_NOT: 1405 if (SLJIT_UNLIKELY(op_flags & SLJIT_SET_E)) 1406 return emit_not_with_flags(compiler, dst, dstw, src, srcw); 1407 return emit_unary(compiler, NOT_rm, dst, dstw, src, srcw); 1408 1409 case SLJIT_NEG: 1410 if (SLJIT_UNLIKELY(op_flags & SLJIT_KEEP_FLAGS) && !compiler->flags_saved) 1411 FAIL_IF(emit_save_flags(compiler)); 1412 return emit_unary(compiler, NEG_rm, dst, dstw, src, srcw); 1413 1414 case SLJIT_CLZ: 1415 if (SLJIT_UNLIKELY(op_flags & SLJIT_KEEP_FLAGS) && !compiler->flags_saved) 1416 FAIL_IF(emit_save_flags(compiler)); 1417 return emit_clz(compiler, op_flags, dst, dstw, src, srcw); 1418 } 1419 1420 return SLJIT_SUCCESS; 1421 1422 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 1423 # undef src_is_ereg 1424 #endif 1425 } 1426 1427 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 1428 1429 #define BINARY_IMM(op_imm, op_mr, immw, arg, argw) \ 1430 if (IS_HALFWORD(immw) || compiler->mode32) { \ 1431 inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, immw, arg, argw); \ 1432 FAIL_IF(!inst); \ 1433 *(inst + 1) |= (op_imm); \ 1434 } \ 1435 else { \ 1436 FAIL_IF(emit_load_imm64(compiler, TMP_REG2, immw)); \ 1437 inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, arg, argw); \ 1438 FAIL_IF(!inst); \ 1439 *inst = (op_mr); \ 1440 } 1441 1442 #define BINARY_EAX_IMM(op_eax_imm, immw) \ 1443 FAIL_IF(emit_do_imm32(compiler, (!compiler->mode32) ? REX_W : 0, (op_eax_imm), immw)) 1444 1445 #else 1446 1447 #define BINARY_IMM(op_imm, op_mr, immw, arg, argw) \ 1448 inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, immw, arg, argw); \ 1449 FAIL_IF(!inst); \ 1450 *(inst + 1) |= (op_imm); 1451 1452 #define BINARY_EAX_IMM(op_eax_imm, immw) \ 1453 FAIL_IF(emit_do_imm(compiler, (op_eax_imm), immw)) 1454 1455 #endif 1456 1457 static sljit_si emit_cum_binary(struct sljit_compiler *compiler, 1458 sljit_ub op_rm, sljit_ub op_mr, sljit_ub op_imm, sljit_ub op_eax_imm, 1459 sljit_si dst, sljit_sw dstw, 1460 sljit_si src1, sljit_sw src1w, 1461 sljit_si src2, sljit_sw src2w) 1462 { 1463 sljit_ub* inst; 1464 1465 if (dst == SLJIT_UNUSED) { 1466 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w); 1467 if (src2 & SLJIT_IMM) { 1468 BINARY_IMM(op_imm, op_mr, src2w, TMP_REG1, 0); 1469 } 1470 else { 1471 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w); 1472 FAIL_IF(!inst); 1473 *inst = op_rm; 1474 } 1475 return SLJIT_SUCCESS; 1476 } 1477 1478 if (dst == src1 && dstw == src1w) { 1479 if (src2 & SLJIT_IMM) { 1480 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 1481 if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) { 1482 #else 1483 if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128)) { 1484 #endif 1485 BINARY_EAX_IMM(op_eax_imm, src2w); 1486 } 1487 else { 1488 BINARY_IMM(op_imm, op_mr, src2w, dst, dstw); 1489 } 1490 } 1491 else if (FAST_IS_REG(dst)) { 1492 inst = emit_x86_instruction(compiler, 1, dst, dstw, src2, src2w); 1493 FAIL_IF(!inst); 1494 *inst = op_rm; 1495 } 1496 else if (FAST_IS_REG(src2)) { 1497 /* Special exception for sljit_emit_op_flags. */ 1498 inst = emit_x86_instruction(compiler, 1, src2, src2w, dst, dstw); 1499 FAIL_IF(!inst); 1500 *inst = op_mr; 1501 } 1502 else { 1503 EMIT_MOV(compiler, TMP_REG1, 0, src2, src2w); 1504 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw); 1505 FAIL_IF(!inst); 1506 *inst = op_mr; 1507 } 1508 return SLJIT_SUCCESS; 1509 } 1510 1511 /* Only for cumulative operations. */ 1512 if (dst == src2 && dstw == src2w) { 1513 if (src1 & SLJIT_IMM) { 1514 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 1515 if ((dst == SLJIT_R0) && (src1w > 127 || src1w < -128) && (compiler->mode32 || IS_HALFWORD(src1w))) { 1516 #else 1517 if ((dst == SLJIT_R0) && (src1w > 127 || src1w < -128)) { 1518 #endif 1519 BINARY_EAX_IMM(op_eax_imm, src1w); 1520 } 1521 else { 1522 BINARY_IMM(op_imm, op_mr, src1w, dst, dstw); 1523 } 1524 } 1525 else if (FAST_IS_REG(dst)) { 1526 inst = emit_x86_instruction(compiler, 1, dst, dstw, src1, src1w); 1527 FAIL_IF(!inst); 1528 *inst = op_rm; 1529 } 1530 else if (FAST_IS_REG(src1)) { 1531 inst = emit_x86_instruction(compiler, 1, src1, src1w, dst, dstw); 1532 FAIL_IF(!inst); 1533 *inst = op_mr; 1534 } 1535 else { 1536 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w); 1537 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw); 1538 FAIL_IF(!inst); 1539 *inst = op_mr; 1540 } 1541 return SLJIT_SUCCESS; 1542 } 1543 1544 /* General version. */ 1545 if (FAST_IS_REG(dst)) { 1546 EMIT_MOV(compiler, dst, 0, src1, src1w); 1547 if (src2 & SLJIT_IMM) { 1548 BINARY_IMM(op_imm, op_mr, src2w, dst, 0); 1549 } 1550 else { 1551 inst = emit_x86_instruction(compiler, 1, dst, 0, src2, src2w); 1552 FAIL_IF(!inst); 1553 *inst = op_rm; 1554 } 1555 } 1556 else { 1557 /* This version requires less memory writing. */ 1558 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w); 1559 if (src2 & SLJIT_IMM) { 1560 BINARY_IMM(op_imm, op_mr, src2w, TMP_REG1, 0); 1561 } 1562 else { 1563 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w); 1564 FAIL_IF(!inst); 1565 *inst = op_rm; 1566 } 1567 EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0); 1568 } 1569 1570 return SLJIT_SUCCESS; 1571 } 1572 1573 static sljit_si emit_non_cum_binary(struct sljit_compiler *compiler, 1574 sljit_ub op_rm, sljit_ub op_mr, sljit_ub op_imm, sljit_ub op_eax_imm, 1575 sljit_si dst, sljit_sw dstw, 1576 sljit_si src1, sljit_sw src1w, 1577 sljit_si src2, sljit_sw src2w) 1578 { 1579 sljit_ub* inst; 1580 1581 if (dst == SLJIT_UNUSED) { 1582 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w); 1583 if (src2 & SLJIT_IMM) { 1584 BINARY_IMM(op_imm, op_mr, src2w, TMP_REG1, 0); 1585 } 1586 else { 1587 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w); 1588 FAIL_IF(!inst); 1589 *inst = op_rm; 1590 } 1591 return SLJIT_SUCCESS; 1592 } 1593 1594 if (dst == src1 && dstw == src1w) { 1595 if (src2 & SLJIT_IMM) { 1596 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 1597 if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) { 1598 #else 1599 if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128)) { 1600 #endif 1601 BINARY_EAX_IMM(op_eax_imm, src2w); 1602 } 1603 else { 1604 BINARY_IMM(op_imm, op_mr, src2w, dst, dstw); 1605 } 1606 } 1607 else if (FAST_IS_REG(dst)) { 1608 inst = emit_x86_instruction(compiler, 1, dst, dstw, src2, src2w); 1609 FAIL_IF(!inst); 1610 *inst = op_rm; 1611 } 1612 else if (FAST_IS_REG(src2)) { 1613 inst = emit_x86_instruction(compiler, 1, src2, src2w, dst, dstw); 1614 FAIL_IF(!inst); 1615 *inst = op_mr; 1616 } 1617 else { 1618 EMIT_MOV(compiler, TMP_REG1, 0, src2, src2w); 1619 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw); 1620 FAIL_IF(!inst); 1621 *inst = op_mr; 1622 } 1623 return SLJIT_SUCCESS; 1624 } 1625 1626 /* General version. */ 1627 if (FAST_IS_REG(dst) && dst != src2) { 1628 EMIT_MOV(compiler, dst, 0, src1, src1w); 1629 if (src2 & SLJIT_IMM) { 1630 BINARY_IMM(op_imm, op_mr, src2w, dst, 0); 1631 } 1632 else { 1633 inst = emit_x86_instruction(compiler, 1, dst, 0, src2, src2w); 1634 FAIL_IF(!inst); 1635 *inst = op_rm; 1636 } 1637 } 1638 else { 1639 /* This version requires less memory writing. */ 1640 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w); 1641 if (src2 & SLJIT_IMM) { 1642 BINARY_IMM(op_imm, op_mr, src2w, TMP_REG1, 0); 1643 } 1644 else { 1645 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w); 1646 FAIL_IF(!inst); 1647 *inst = op_rm; 1648 } 1649 EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0); 1650 } 1651 1652 return SLJIT_SUCCESS; 1653 } 1654 1655 static sljit_si emit_mul(struct sljit_compiler *compiler, 1656 sljit_si dst, sljit_sw dstw, 1657 sljit_si src1, sljit_sw src1w, 1658 sljit_si src2, sljit_sw src2w) 1659 { 1660 sljit_ub* inst; 1661 sljit_si dst_r; 1662 1663 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1; 1664 1665 /* Register destination. */ 1666 if (dst_r == src1 && !(src2 & SLJIT_IMM)) { 1667 inst = emit_x86_instruction(compiler, 2, dst_r, 0, src2, src2w); 1668 FAIL_IF(!inst); 1669 *inst++ = GROUP_0F; 1670 *inst = IMUL_r_rm; 1671 } 1672 else if (dst_r == src2 && !(src1 & SLJIT_IMM)) { 1673 inst = emit_x86_instruction(compiler, 2, dst_r, 0, src1, src1w); 1674 FAIL_IF(!inst); 1675 *inst++ = GROUP_0F; 1676 *inst = IMUL_r_rm; 1677 } 1678 else if (src1 & SLJIT_IMM) { 1679 if (src2 & SLJIT_IMM) { 1680 EMIT_MOV(compiler, dst_r, 0, SLJIT_IMM, src2w); 1681 src2 = dst_r; 1682 src2w = 0; 1683 } 1684 1685 if (src1w <= 127 && src1w >= -128) { 1686 inst = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w); 1687 FAIL_IF(!inst); 1688 *inst = IMUL_r_rm_i8; 1689 inst = (sljit_ub*)ensure_buf(compiler, 1 + 1); 1690 FAIL_IF(!inst); 1691 INC_SIZE(1); 1692 *inst = (sljit_sb)src1w; 1693 } 1694 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 1695 else { 1696 inst = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w); 1697 FAIL_IF(!inst); 1698 *inst = IMUL_r_rm_i32; 1699 inst = (sljit_ub*)ensure_buf(compiler, 1 + 4); 1700 FAIL_IF(!inst); 1701 INC_SIZE(4); 1702 *(sljit_sw*)inst = src1w; 1703 } 1704 #else 1705 else if (IS_HALFWORD(src1w)) { 1706 inst = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w); 1707 FAIL_IF(!inst); 1708 *inst = IMUL_r_rm_i32; 1709 inst = (sljit_ub*)ensure_buf(compiler, 1 + 4); 1710 FAIL_IF(!inst); 1711 INC_SIZE(4); 1712 *(sljit_si*)inst = (sljit_si)src1w; 1713 } 1714 else { 1715 EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_IMM, src1w); 1716 if (dst_r != src2) 1717 EMIT_MOV(compiler, dst_r, 0, src2, src2w); 1718 inst = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG2, 0); 1719 FAIL_IF(!inst); 1720 *inst++ = GROUP_0F; 1721 *inst = IMUL_r_rm; 1722 } 1723 #endif 1724 } 1725 else if (src2 & SLJIT_IMM) { 1726 /* Note: src1 is NOT immediate. */ 1727 1728 if (src2w <= 127 && src2w >= -128) { 1729 inst = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w); 1730 FAIL_IF(!inst); 1731 *inst = IMUL_r_rm_i8; 1732 inst = (sljit_ub*)ensure_buf(compiler, 1 + 1); 1733 FAIL_IF(!inst); 1734 INC_SIZE(1); 1735 *inst = (sljit_sb)src2w; 1736 } 1737 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 1738 else { 1739 inst = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w); 1740 FAIL_IF(!inst); 1741 *inst = IMUL_r_rm_i32; 1742 inst = (sljit_ub*)ensure_buf(compiler, 1 + 4); 1743 FAIL_IF(!inst); 1744 INC_SIZE(4); 1745 *(sljit_sw*)inst = src2w; 1746 } 1747 #else 1748 else if (IS_HALFWORD(src2w)) { 1749 inst = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w); 1750 FAIL_IF(!inst); 1751 *inst = IMUL_r_rm_i32; 1752 inst = (sljit_ub*)ensure_buf(compiler, 1 + 4); 1753 FAIL_IF(!inst); 1754 INC_SIZE(4); 1755 *(sljit_si*)inst = (sljit_si)src2w; 1756 } 1757 else { 1758 EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_IMM, src2w); 1759 if (dst_r != src1) 1760 EMIT_MOV(compiler, dst_r, 0, src1, src1w); 1761 inst = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG2, 0); 1762 FAIL_IF(!inst); 1763 *inst++ = GROUP_0F; 1764 *inst = IMUL_r_rm; 1765 } 1766 #endif 1767 } 1768 else { 1769 /* Neither argument is immediate. */ 1770 if (ADDRESSING_DEPENDS_ON(src2, dst_r)) 1771 dst_r = TMP_REG1; 1772 EMIT_MOV(compiler, dst_r, 0, src1, src1w); 1773 inst = emit_x86_instruction(compiler, 2, dst_r, 0, src2, src2w); 1774 FAIL_IF(!inst); 1775 *inst++ = GROUP_0F; 1776 *inst = IMUL_r_rm; 1777 } 1778 1779 if (dst_r == TMP_REG1) 1780 EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0); 1781 1782 return SLJIT_SUCCESS; 1783 } 1784 1785 static sljit_si emit_lea_binary(struct sljit_compiler *compiler, sljit_si keep_flags, 1786 sljit_si dst, sljit_sw dstw, 1787 sljit_si src1, sljit_sw src1w, 1788 sljit_si src2, sljit_sw src2w) 1789 { 1790 sljit_ub* inst; 1791 sljit_si dst_r, done = 0; 1792 1793 /* These cases better be left to handled by normal way. */ 1794 if (!keep_flags) { 1795 if (dst == src1 && dstw == src1w) 1796 return SLJIT_ERR_UNSUPPORTED; 1797 if (dst == src2 && dstw == src2w) 1798 return SLJIT_ERR_UNSUPPORTED; 1799 } 1800 1801 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1; 1802 1803 if (FAST_IS_REG(src1)) { 1804 if (FAST_IS_REG(src2)) { 1805 inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM2(src1, src2), 0); 1806 FAIL_IF(!inst); 1807 *inst = LEA_r_m; 1808 done = 1; 1809 } 1810 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 1811 if ((src2 & SLJIT_IMM) && (compiler->mode32 || IS_HALFWORD(src2w))) { 1812 inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src1), (sljit_si)src2w); 1813 #else 1814 if (src2 & SLJIT_IMM) { 1815 inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src1), src2w); 1816 #endif 1817 FAIL_IF(!inst); 1818 *inst = LEA_r_m; 1819 done = 1; 1820 } 1821 } 1822 else if (FAST_IS_REG(src2)) { 1823 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 1824 if ((src1 & SLJIT_IMM) && (compiler->mode32 || IS_HALFWORD(src1w))) { 1825 inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src2), (sljit_si)src1w); 1826 #else 1827 if (src1 & SLJIT_IMM) { 1828 inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src2), src1w); 1829 #endif 1830 FAIL_IF(!inst); 1831 *inst = LEA_r_m; 1832 done = 1; 1833 } 1834 } 1835 1836 if (done) { 1837 if (dst_r == TMP_REG1) 1838 return emit_mov(compiler, dst, dstw, TMP_REG1, 0); 1839 return SLJIT_SUCCESS; 1840 } 1841 return SLJIT_ERR_UNSUPPORTED; 1842 } 1843 1844 static sljit_si emit_cmp_binary(struct sljit_compiler *compiler, 1845 sljit_si src1, sljit_sw src1w, 1846 sljit_si src2, sljit_sw src2w) 1847 { 1848 sljit_ub* inst; 1849 1850 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 1851 if (src1 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) { 1852 #else 1853 if (src1 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128)) { 1854 #endif 1855 BINARY_EAX_IMM(CMP_EAX_i32, src2w); 1856 return SLJIT_SUCCESS; 1857 } 1858 1859 if (FAST_IS_REG(src1)) { 1860 if (src2 & SLJIT_IMM) { 1861 BINARY_IMM(CMP, CMP_rm_r, src2w, src1, 0); 1862 } 1863 else { 1864 inst = emit_x86_instruction(compiler, 1, src1, 0, src2, src2w); 1865 FAIL_IF(!inst); 1866 *inst = CMP_r_rm; 1867 } 1868 return SLJIT_SUCCESS; 1869 } 1870 1871 if (FAST_IS_REG(src2) && !(src1 & SLJIT_IMM)) { 1872 inst = emit_x86_instruction(compiler, 1, src2, 0, src1, src1w); 1873 FAIL_IF(!inst); 1874 *inst = CMP_rm_r; 1875 return SLJIT_SUCCESS; 1876 } 1877 1878 if (src2 & SLJIT_IMM) { 1879 if (src1 & SLJIT_IMM) { 1880 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w); 1881 src1 = TMP_REG1; 1882 src1w = 0; 1883 } 1884 BINARY_IMM(CMP, CMP_rm_r, src2w, src1, src1w); 1885 } 1886 else { 1887 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w); 1888 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w); 1889 FAIL_IF(!inst); 1890 *inst = CMP_r_rm; 1891 } 1892 return SLJIT_SUCCESS; 1893 } 1894 1895 static sljit_si emit_test_binary(struct sljit_compiler *compiler, 1896 sljit_si src1, sljit_sw src1w, 1897 sljit_si src2, sljit_sw src2w) 1898 { 1899 sljit_ub* inst; 1900 1901 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 1902 if (src1 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) { 1903 #else 1904 if (src1 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128)) { 1905 #endif 1906 BINARY_EAX_IMM(TEST_EAX_i32, src2w); 1907 return SLJIT_SUCCESS; 1908 } 1909 1910 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 1911 if (src2 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src1w > 127 || src1w < -128) && (compiler->mode32 || IS_HALFWORD(src1w))) { 1912 #else 1913 if (src2 == SLJIT_R0 && (src1 & SLJIT_IMM) && (src1w > 127 || src1w < -128)) { 1914 #endif 1915 BINARY_EAX_IMM(TEST_EAX_i32, src1w); 1916 return SLJIT_SUCCESS; 1917 } 1918 1919 if (!(src1 & SLJIT_IMM)) { 1920 if (src2 & SLJIT_IMM) { 1921 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 1922 if (IS_HALFWORD(src2w) || compiler->mode32) { 1923 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, src1, src1w); 1924 FAIL_IF(!inst); 1925 *inst = GROUP_F7; 1926 } 1927 else { 1928 FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src2w)); 1929 inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, src1, src1w); 1930 FAIL_IF(!inst); 1931 *inst = TEST_rm_r; 1932 } 1933 #else 1934 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, src1, src1w); 1935 FAIL_IF(!inst); 1936 *inst = GROUP_F7; 1937 #endif 1938 return SLJIT_SUCCESS; 1939 } 1940 else if (FAST_IS_REG(src1)) { 1941 inst = emit_x86_instruction(compiler, 1, src1, 0, src2, src2w); 1942 FAIL_IF(!inst); 1943 *inst = TEST_rm_r; 1944 return SLJIT_SUCCESS; 1945 } 1946 } 1947 1948 if (!(src2 & SLJIT_IMM)) { 1949 if (src1 & SLJIT_IMM) { 1950 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 1951 if (IS_HALFWORD(src1w) || compiler->mode32) { 1952 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src1w, src2, src2w); 1953 FAIL_IF(!inst); 1954 *inst = GROUP_F7; 1955 } 1956 else { 1957 FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src1w)); 1958 inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, src2, src2w); 1959 FAIL_IF(!inst); 1960 *inst = TEST_rm_r; 1961 } 1962 #else 1963 inst = emit_x86_instruction(compiler, 1, src1, src1w, src2, src2w); 1964 FAIL_IF(!inst); 1965 *inst = GROUP_F7; 1966 #endif 1967 return SLJIT_SUCCESS; 1968 } 1969 else if (FAST_IS_REG(src2)) { 1970 inst = emit_x86_instruction(compiler, 1, src2, 0, src1, src1w); 1971 FAIL_IF(!inst); 1972 *inst = TEST_rm_r; 1973 return SLJIT_SUCCESS; 1974 } 1975 } 1976 1977 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w); 1978 if (src2 & SLJIT_IMM) { 1979 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 1980 if (IS_HALFWORD(src2w) || compiler->mode32) { 1981 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, TMP_REG1, 0); 1982 FAIL_IF(!inst); 1983 *inst = GROUP_F7; 1984 } 1985 else { 1986 FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src2w)); 1987 inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, TMP_REG1, 0); 1988 FAIL_IF(!inst); 1989 *inst = TEST_rm_r; 1990 } 1991 #else 1992 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, TMP_REG1, 0); 1993 FAIL_IF(!inst); 1994 *inst = GROUP_F7; 1995 #endif 1996 } 1997 else { 1998 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w); 1999 FAIL_IF(!inst); 2000 *inst = TEST_rm_r; 2001 } 2002 return SLJIT_SUCCESS; 2003 } 2004 2005 static sljit_si emit_shift(struct sljit_compiler *compiler, 2006 sljit_ub mode, 2007 sljit_si dst, sljit_sw dstw, 2008 sljit_si src1, sljit_sw src1w, 2009 sljit_si src2, sljit_sw src2w) 2010 { 2011 sljit_ub* inst; 2012 2013 if ((src2 & SLJIT_IMM) || (src2 == SLJIT_PREF_SHIFT_REG)) { 2014 if (dst == src1 && dstw == src1w) { 2015 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, dst, dstw); 2016 FAIL_IF(!inst); 2017 *inst |= mode; 2018 return SLJIT_SUCCESS; 2019 } 2020 if (dst == SLJIT_UNUSED) { 2021 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w); 2022 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, TMP_REG1, 0); 2023 FAIL_IF(!inst); 2024 *inst |= mode; 2025 return SLJIT_SUCCESS; 2026 } 2027 if (dst == SLJIT_PREF_SHIFT_REG && src2 == SLJIT_PREF_SHIFT_REG) { 2028 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w); 2029 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0); 2030 FAIL_IF(!inst); 2031 *inst |= mode; 2032 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0); 2033 return SLJIT_SUCCESS; 2034 } 2035 if (FAST_IS_REG(dst)) { 2036 EMIT_MOV(compiler, dst, 0, src1, src1w); 2037 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, dst, 0); 2038 FAIL_IF(!inst); 2039 *inst |= mode; 2040 return SLJIT_SUCCESS; 2041 } 2042 2043 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w); 2044 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, TMP_REG1, 0); 2045 FAIL_IF(!inst); 2046 *inst |= mode; 2047 EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0); 2048 return SLJIT_SUCCESS; 2049 } 2050 2051 if (dst == SLJIT_PREF_SHIFT_REG) { 2052 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w); 2053 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w); 2054 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0); 2055 FAIL_IF(!inst); 2056 *inst |= mode; 2057 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0); 2058 } 2059 else if (FAST_IS_REG(dst) && dst != src2 && !ADDRESSING_DEPENDS_ON(src2, dst)) { 2060 if (src1 != dst) 2061 EMIT_MOV(compiler, dst, 0, src1, src1w); 2062 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_PREF_SHIFT_REG, 0); 2063 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w); 2064 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, dst, 0); 2065 FAIL_IF(!inst); 2066 *inst |= mode; 2067 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0); 2068 } 2069 else { 2070 /* This case is really difficult, since ecx itself may used for 2071 addressing, and we must ensure to work even in that case. */ 2072 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w); 2073 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 2074 EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_PREF_SHIFT_REG, 0); 2075 #else 2076 /* [esp+0] contains the flags. */ 2077 EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), sizeof(sljit_sw), SLJIT_PREF_SHIFT_REG, 0); 2078 #endif 2079 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w); 2080 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0); 2081 FAIL_IF(!inst); 2082 *inst |= mode; 2083 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 2084 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG2, 0); 2085 #else 2086 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, SLJIT_MEM1(SLJIT_SP), sizeof(sljit_sw)); 2087 #endif 2088 EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0); 2089 } 2090 2091 return SLJIT_SUCCESS; 2092 } 2093 2094 static sljit_si emit_shift_with_flags(struct sljit_compiler *compiler, 2095 sljit_ub mode, sljit_si set_flags, 2096 sljit_si dst, sljit_sw dstw, 2097 sljit_si src1, sljit_sw src1w, 2098 sljit_si src2, sljit_sw src2w) 2099 { 2100 /* The CPU does not set flags if the shift count is 0. */ 2101 if (src2 & SLJIT_IMM) { 2102 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 2103 if ((src2w & 0x3f) != 0 || (compiler->mode32 && (src2w & 0x1f) != 0)) 2104 return emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w); 2105 #else 2106 if ((src2w & 0x1f) != 0) 2107 return emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w); 2108 #endif 2109 if (!set_flags) 2110 return emit_mov(compiler, dst, dstw, src1, src1w); 2111 /* OR dst, src, 0 */ 2112 return emit_cum_binary(compiler, OR_r_rm, OR_rm_r, OR, OR_EAX_i32, 2113 dst, dstw, src1, src1w, SLJIT_IMM, 0); 2114 } 2115 2116 if (!set_flags) 2117 return emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w); 2118 2119 if (!FAST_IS_REG(dst)) 2120 FAIL_IF(emit_cmp_binary(compiler, src1, src1w, SLJIT_IMM, 0)); 2121 2122 FAIL_IF(emit_shift(compiler,mode, dst, dstw, src1, src1w, src2, src2w)); 2123 2124 if (FAST_IS_REG(dst)) 2125 return emit_cmp_binary(compiler, dst, dstw, SLJIT_IMM, 0); 2126 return SLJIT_SUCCESS; 2127 } 2128 2129 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op2(struct sljit_compiler *compiler, sljit_si op, 2130 sljit_si dst, sljit_sw dstw, 2131 sljit_si src1, sljit_sw src1w, 2132 sljit_si src2, sljit_sw src2w) 2133 { 2134 CHECK_ERROR(); 2135 CHECK(check_sljit_emit_op2(compiler, op, dst, dstw, src1, src1w, src2, src2w)); 2136 ADJUST_LOCAL_OFFSET(dst, dstw); 2137 ADJUST_LOCAL_OFFSET(src1, src1w); 2138 ADJUST_LOCAL_OFFSET(src2, src2w); 2139 2140 CHECK_EXTRA_REGS(dst, dstw, (void)0); 2141 CHECK_EXTRA_REGS(src1, src1w, (void)0); 2142 CHECK_EXTRA_REGS(src2, src2w, (void)0); 2143 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 2144 compiler->mode32 = op & SLJIT_INT_OP; 2145 #endif 2146 2147 if (GET_OPCODE(op) >= SLJIT_MUL) { 2148 if (SLJIT_UNLIKELY(GET_FLAGS(op))) 2149 compiler->flags_saved = 0; 2150 else if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS) && !compiler->flags_saved) 2151 FAIL_IF(emit_save_flags(compiler)); 2152 } 2153 2154 switch (GET_OPCODE(op)) { 2155 case SLJIT_ADD: 2156 if (!GET_FLAGS(op)) { 2157 if (emit_lea_binary(compiler, op & SLJIT_KEEP_FLAGS, dst, dstw, src1, src1w, src2, src2w) != SLJIT_ERR_UNSUPPORTED) 2158 return compiler->error; 2159 } 2160 else 2161 compiler->flags_saved = 0; 2162 if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS) && !compiler->flags_saved) 2163 FAIL_IF(emit_save_flags(compiler)); 2164 return emit_cum_binary(compiler, ADD_r_rm, ADD_rm_r, ADD, ADD_EAX_i32, 2165 dst, dstw, src1, src1w, src2, src2w); 2166 case SLJIT_ADDC: 2167 if (SLJIT_UNLIKELY(compiler->flags_saved)) /* C flag must be restored. */ 2168 FAIL_IF(emit_restore_flags(compiler, 1)); 2169 else if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS)) 2170 FAIL_IF(emit_save_flags(compiler)); 2171 if (SLJIT_UNLIKELY(GET_FLAGS(op))) 2172 compiler->flags_saved = 0; 2173 return emit_cum_binary(compiler, ADC_r_rm, ADC_rm_r, ADC, ADC_EAX_i32, 2174 dst, dstw, src1, src1w, src2, src2w); 2175 case SLJIT_SUB: 2176 if (!GET_FLAGS(op)) { 2177 if ((src2 & SLJIT_IMM) && emit_lea_binary(compiler, op & SLJIT_KEEP_FLAGS, dst, dstw, src1, src1w, SLJIT_IMM, -src2w) != SLJIT_ERR_UNSUPPORTED) 2178 return compiler->error; 2179 } 2180 else 2181 compiler->flags_saved = 0; 2182 if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS) && !compiler->flags_saved) 2183 FAIL_IF(emit_save_flags(compiler)); 2184 if (dst == SLJIT_UNUSED) 2185 return emit_cmp_binary(compiler, src1, src1w, src2, src2w); 2186 return emit_non_cum_binary(compiler, SUB_r_rm, SUB_rm_r, SUB, SUB_EAX_i32, 2187 dst, dstw, src1, src1w, src2, src2w); 2188 case SLJIT_SUBC: 2189 if (SLJIT_UNLIKELY(compiler->flags_saved)) /* C flag must be restored. */ 2190 FAIL_IF(emit_restore_flags(compiler, 1)); 2191 else if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS)) 2192 FAIL_IF(emit_save_flags(compiler)); 2193 if (SLJIT_UNLIKELY(GET_FLAGS(op))) 2194 compiler->flags_saved = 0; 2195 return emit_non_cum_binary(compiler, SBB_r_rm, SBB_rm_r, SBB, SBB_EAX_i32, 2196 dst, dstw, src1, src1w, src2, src2w); 2197 case SLJIT_MUL: 2198 return emit_mul(compiler, dst, dstw, src1, src1w, src2, src2w); 2199 case SLJIT_AND: 2200 if (dst == SLJIT_UNUSED) 2201 return emit_test_binary(compiler, src1, src1w, src2, src2w); 2202 return emit_cum_binary(compiler, AND_r_rm, AND_rm_r, AND, AND_EAX_i32, 2203 dst, dstw, src1, src1w, src2, src2w); 2204 case SLJIT_OR: 2205 return emit_cum_binary(compiler, OR_r_rm, OR_rm_r, OR, OR_EAX_i32, 2206 dst, dstw, src1, src1w, src2, src2w); 2207 case SLJIT_XOR: 2208 return emit_cum_binary(compiler, XOR_r_rm, XOR_rm_r, XOR, XOR_EAX_i32, 2209 dst, dstw, src1, src1w, src2, src2w); 2210 case SLJIT_SHL: 2211 return emit_shift_with_flags(compiler, SHL, GET_FLAGS(op), 2212 dst, dstw, src1, src1w, src2, src2w); 2213 case SLJIT_LSHR: 2214 return emit_shift_with_flags(compiler, SHR, GET_FLAGS(op), 2215 dst, dstw, src1, src1w, src2, src2w); 2216 case SLJIT_ASHR: 2217 return emit_shift_with_flags(compiler, SAR, GET_FLAGS(op), 2218 dst, dstw, src1, src1w, src2, src2w); 2219 } 2220 2221 return SLJIT_SUCCESS; 2222 } 2223 2224 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_register_index(sljit_si reg) 2225 { 2226 CHECK_REG_INDEX(check_sljit_get_register_index(reg)); 2227 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 2228 if (reg >= SLJIT_R3 && reg <= SLJIT_R6) 2229 return -1; 2230 #endif 2231 return reg_map[reg]; 2232 } 2233 2234 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_float_register_index(sljit_si reg) 2235 { 2236 CHECK_REG_INDEX(check_sljit_get_float_register_index(reg)); 2237 return reg; 2238 } 2239 2240 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_custom(struct sljit_compiler *compiler, 2241 void *instruction, sljit_si size) 2242 { 2243 sljit_ub *inst; 2244 2245 CHECK_ERROR(); 2246 CHECK(check_sljit_emit_op_custom(compiler, instruction, size)); 2247 2248 inst = (sljit_ub*)ensure_buf(compiler, 1 + size); 2249 FAIL_IF(!inst); 2250 INC_SIZE(size); 2251 SLJIT_MEMMOVE(inst, instruction, size); 2252 return SLJIT_SUCCESS; 2253 } 2254 2255 /* --------------------------------------------------------------------- */ 2256 /* Floating point operators */ 2257 /* --------------------------------------------------------------------- */ 2258 2259 /* Alignment + 2 * 16 bytes. */ 2260 static sljit_si sse2_data[3 + (4 + 4) * 2]; 2261 static sljit_si *sse2_buffer; 2262 2263 static void init_compiler(void) 2264 { 2265 sse2_buffer = (sljit_si*)(((sljit_uw)sse2_data + 15) & ~0xf); 2266 /* Single precision constants. */ 2267 sse2_buffer[0] = 0x80000000; 2268 sse2_buffer[4] = 0x7fffffff; 2269 /* Double precision constants. */ 2270 sse2_buffer[8] = 0; 2271 sse2_buffer[9] = 0x80000000; 2272 sse2_buffer[12] = 0xffffffff; 2273 sse2_buffer[13] = 0x7fffffff; 2274 } 2275 2276 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_is_fpu_available(void) 2277 { 2278 #ifdef SLJIT_IS_FPU_AVAILABLE 2279 return SLJIT_IS_FPU_AVAILABLE; 2280 #elif (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2) 2281 if (cpu_has_sse2 == -1) 2282 get_cpu_features(); 2283 return cpu_has_sse2; 2284 #else /* SLJIT_DETECT_SSE2 */ 2285 return 1; 2286 #endif /* SLJIT_DETECT_SSE2 */ 2287 } 2288 2289 static sljit_si emit_sse2(struct sljit_compiler *compiler, sljit_ub opcode, 2290 sljit_si single, sljit_si xmm1, sljit_si xmm2, sljit_sw xmm2w) 2291 { 2292 sljit_ub *inst; 2293 2294 inst = emit_x86_instruction(compiler, 2 | (single ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2, xmm1, 0, xmm2, xmm2w); 2295 FAIL_IF(!inst); 2296 *inst++ = GROUP_0F; 2297 *inst = opcode; 2298 return SLJIT_SUCCESS; 2299 } 2300 2301 static sljit_si emit_sse2_logic(struct sljit_compiler *compiler, sljit_ub opcode, 2302 sljit_si pref66, sljit_si xmm1, sljit_si xmm2, sljit_sw xmm2w) 2303 { 2304 sljit_ub *inst; 2305 2306 inst = emit_x86_instruction(compiler, 2 | (pref66 ? EX86_PREF_66 : 0) | EX86_SSE2, xmm1, 0, xmm2, xmm2w); 2307 FAIL_IF(!inst); 2308 *inst++ = GROUP_0F; 2309 *inst = opcode; 2310 return SLJIT_SUCCESS; 2311 } 2312 2313 static SLJIT_INLINE sljit_si emit_sse2_load(struct sljit_compiler *compiler, 2314 sljit_si single, sljit_si dst, sljit_si src, sljit_sw srcw) 2315 { 2316 return emit_sse2(compiler, MOVSD_x_xm, single, dst, src, srcw); 2317 } 2318 2319 static SLJIT_INLINE sljit_si emit_sse2_store(struct sljit_compiler *compiler, 2320 sljit_si single, sljit_si dst, sljit_sw dstw, sljit_si src) 2321 { 2322 return emit_sse2(compiler, MOVSD_xm_x, single, src, dst, dstw); 2323 } 2324 2325 static SLJIT_INLINE sljit_si sljit_emit_fop1_convw_fromd(struct sljit_compiler *compiler, sljit_si op, 2326 sljit_si dst, sljit_sw dstw, 2327 sljit_si src, sljit_sw srcw) 2328 { 2329 sljit_si dst_r = SLOW_IS_REG(dst) ? dst : TMP_REG1; 2330 sljit_ub *inst; 2331 2332 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 2333 if (GET_OPCODE(op) == SLJIT_CONVW_FROMD) 2334 compiler->mode32 = 0; 2335 #endif 2336 2337 inst = emit_x86_instruction(compiler, 2 | ((op & SLJIT_SINGLE_OP) ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2_OP2, dst_r, 0, src, srcw); 2338 FAIL_IF(!inst); 2339 *inst++ = GROUP_0F; 2340 *inst = CVTTSD2SI_r_xm; 2341 2342 if (dst_r == TMP_REG1 && dst != SLJIT_UNUSED) 2343 return emit_mov(compiler, dst, dstw, TMP_REG1, 0); 2344 return SLJIT_SUCCESS; 2345 } 2346 2347 static SLJIT_INLINE sljit_si sljit_emit_fop1_convd_fromw(struct sljit_compiler *compiler, sljit_si op, 2348 sljit_si dst, sljit_sw dstw, 2349 sljit_si src, sljit_sw srcw) 2350 { 2351 sljit_si dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG; 2352 sljit_ub *inst; 2353 2354 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 2355 if (GET_OPCODE(op) == SLJIT_CONVD_FROMW) 2356 compiler->mode32 = 0; 2357 #endif 2358 2359 if (src & SLJIT_IMM) { 2360 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 2361 if (GET_OPCODE(op) == SLJIT_CONVD_FROMI) 2362 srcw = (sljit_si)srcw; 2363 #endif 2364 EMIT_MOV(compiler, TMP_REG1, 0, src, srcw); 2365 src = TMP_REG1; 2366 srcw = 0; 2367 } 2368 2369 inst = emit_x86_instruction(compiler, 2 | ((op & SLJIT_SINGLE_OP) ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2_OP1, dst_r, 0, src, srcw); 2370 FAIL_IF(!inst); 2371 *inst++ = GROUP_0F; 2372 *inst = CVTSI2SD_x_rm; 2373 2374 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 2375 compiler->mode32 = 1; 2376 #endif 2377 if (dst_r == TMP_FREG) 2378 return emit_sse2_store(compiler, op & SLJIT_SINGLE_OP, dst, dstw, TMP_FREG); 2379 return SLJIT_SUCCESS; 2380 } 2381 2382 static SLJIT_INLINE sljit_si sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_si op, 2383 sljit_si src1, sljit_sw src1w, 2384 sljit_si src2, sljit_sw src2w) 2385 { 2386 compiler->flags_saved = 0; 2387 if (!FAST_IS_REG(src1)) { 2388 FAIL_IF(emit_sse2_load(compiler, op & SLJIT_SINGLE_OP, TMP_FREG, src1, src1w)); 2389 src1 = TMP_FREG; 2390 } 2391 return emit_sse2_logic(compiler, UCOMISD_x_xm, !(op & SLJIT_SINGLE_OP), src1, src2, src2w); 2392 } 2393 2394 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop1(struct sljit_compiler *compiler, sljit_si op, 2395 sljit_si dst, sljit_sw dstw, 2396 sljit_si src, sljit_sw srcw) 2397 { 2398 sljit_si dst_r; 2399 2400 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 2401 compiler->mode32 = 1; 2402 #endif 2403 2404 CHECK_ERROR(); 2405 SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw); 2406 2407 if (GET_OPCODE(op) == SLJIT_DMOV) { 2408 if (FAST_IS_REG(dst)) 2409 return emit_sse2_load(compiler, op & SLJIT_SINGLE_OP, dst, src, srcw); 2410 if (FAST_IS_REG(src)) 2411 return emit_sse2_store(compiler, op & SLJIT_SINGLE_OP, dst, dstw, src); 2412 FAIL_IF(emit_sse2_load(compiler, op & SLJIT_SINGLE_OP, TMP_FREG, src, srcw)); 2413 return emit_sse2_store(compiler, op & SLJIT_SINGLE_OP, dst, dstw, TMP_FREG); 2414 } 2415 2416 if (GET_OPCODE(op) == SLJIT_CONVD_FROMS) { 2417 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG; 2418 if (FAST_IS_REG(src)) { 2419 /* We overwrite the high bits of source. From SLJIT point of view, 2420 this is not an issue. 2421 Note: In SSE3, we could also use MOVDDUP and MOVSLDUP. */ 2422 FAIL_IF(emit_sse2_logic(compiler, UNPCKLPD_x_xm, op & SLJIT_SINGLE_OP, src, src, 0)); 2423 } 2424 else { 2425 FAIL_IF(emit_sse2_load(compiler, !(op & SLJIT_SINGLE_OP), TMP_FREG, src, srcw)); 2426 src = TMP_FREG; 2427 } 2428 2429 FAIL_IF(emit_sse2_logic(compiler, CVTPD2PS_x_xm, op & SLJIT_SINGLE_OP, dst_r, src, 0)); 2430 if (dst_r == TMP_FREG) 2431 return emit_sse2_store(compiler, op & SLJIT_SINGLE_OP, dst, dstw, TMP_FREG); 2432 return SLJIT_SUCCESS; 2433 } 2434 2435 if (SLOW_IS_REG(dst)) { 2436 dst_r = dst; 2437 if (dst != src) 2438 FAIL_IF(emit_sse2_load(compiler, op & SLJIT_SINGLE_OP, dst_r, src, srcw)); 2439 } 2440 else { 2441 dst_r = TMP_FREG; 2442 FAIL_IF(emit_sse2_load(compiler, op & SLJIT_SINGLE_OP, dst_r, src, srcw)); 2443 } 2444 2445 switch (GET_OPCODE(op)) { 2446 case SLJIT_DNEG: 2447 FAIL_IF(emit_sse2_logic(compiler, XORPD_x_xm, 1, dst_r, SLJIT_MEM0(), (sljit_sw)(op & SLJIT_SINGLE_OP ? sse2_buffer : sse2_buffer + 8))); 2448 break; 2449 2450 case SLJIT_DABS: 2451 FAIL_IF(emit_sse2_logic(compiler, ANDPD_x_xm, 1, dst_r, SLJIT_MEM0(), (sljit_sw)(op & SLJIT_SINGLE_OP ? sse2_buffer + 4 : sse2_buffer + 12))); 2452 break; 2453 } 2454 2455 if (dst_r == TMP_FREG) 2456 return emit_sse2_store(compiler, op & SLJIT_SINGLE_OP, dst, dstw, TMP_FREG); 2457 return SLJIT_SUCCESS; 2458 } 2459 2460 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop2(struct sljit_compiler *compiler, sljit_si op, 2461 sljit_si dst, sljit_sw dstw, 2462 sljit_si src1, sljit_sw src1w, 2463 sljit_si src2, sljit_sw src2w) 2464 { 2465 sljit_si dst_r; 2466 2467 CHECK_ERROR(); 2468 CHECK(check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w)); 2469 ADJUST_LOCAL_OFFSET(dst, dstw); 2470 ADJUST_LOCAL_OFFSET(src1, src1w); 2471 ADJUST_LOCAL_OFFSET(src2, src2w); 2472 2473 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 2474 compiler->mode32 = 1; 2475 #endif 2476 2477 if (FAST_IS_REG(dst)) { 2478 dst_r = dst; 2479 if (dst == src1) 2480 ; /* Do nothing here. */ 2481 else if (dst == src2 && (op == SLJIT_DADD || op == SLJIT_DMUL)) { 2482 /* Swap arguments. */ 2483 src2 = src1; 2484 src2w = src1w; 2485 } 2486 else if (dst != src2) 2487 FAIL_IF(emit_sse2_load(compiler, op & SLJIT_SINGLE_OP, dst_r, src1, src1w)); 2488 else { 2489 dst_r = TMP_FREG; 2490 FAIL_IF(emit_sse2_load(compiler, op & SLJIT_SINGLE_OP, TMP_FREG, src1, src1w)); 2491 } 2492 } 2493 else { 2494 dst_r = TMP_FREG; 2495 FAIL_IF(emit_sse2_load(compiler, op & SLJIT_SINGLE_OP, TMP_FREG, src1, src1w)); 2496 } 2497 2498 switch (GET_OPCODE(op)) { 2499 case SLJIT_DADD: 2500 FAIL_IF(emit_sse2(compiler, ADDSD_x_xm, op & SLJIT_SINGLE_OP, dst_r, src2, src2w)); 2501 break; 2502 2503 case SLJIT_DSUB: 2504 FAIL_IF(emit_sse2(compiler, SUBSD_x_xm, op & SLJIT_SINGLE_OP, dst_r, src2, src2w)); 2505 break; 2506 2507 case SLJIT_DMUL: 2508 FAIL_IF(emit_sse2(compiler, MULSD_x_xm, op & SLJIT_SINGLE_OP, dst_r, src2, src2w)); 2509 break; 2510 2511 case SLJIT_DDIV: 2512 FAIL_IF(emit_sse2(compiler, DIVSD_x_xm, op & SLJIT_SINGLE_OP, dst_r, src2, src2w)); 2513 break; 2514 } 2515 2516 if (dst_r == TMP_FREG) 2517 return emit_sse2_store(compiler, op & SLJIT_SINGLE_OP, dst, dstw, TMP_FREG); 2518 return SLJIT_SUCCESS; 2519 } 2520 2521 /* --------------------------------------------------------------------- */ 2522 /* Conditional instructions */ 2523 /* --------------------------------------------------------------------- */ 2524 2525 SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler) 2526 { 2527 sljit_ub *inst; 2528 struct sljit_label *label; 2529 2530 CHECK_ERROR_PTR(); 2531 CHECK_PTR(check_sljit_emit_label(compiler)); 2532 2533 /* We should restore the flags before the label, 2534 since other taken jumps has their own flags as well. */ 2535 if (SLJIT_UNLIKELY(compiler->flags_saved)) 2536 PTR_FAIL_IF(emit_restore_flags(compiler, 0)); 2537 2538 if (compiler->last_label && compiler->last_label->size == compiler->size) 2539 return compiler->last_label; 2540 2541 label = (struct sljit_label*)ensure_abuf(compiler, sizeof(struct sljit_label)); 2542 PTR_FAIL_IF(!label); 2543 set_label(label, compiler); 2544 2545 inst = (sljit_ub*)ensure_buf(compiler, 2); 2546 PTR_FAIL_IF(!inst); 2547 2548 *inst++ = 0; 2549 *inst++ = 0; 2550 2551 return label; 2552 } 2553 2554 SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_si type) 2555 { 2556 sljit_ub *inst; 2557 struct sljit_jump *jump; 2558 2559 CHECK_ERROR_PTR(); 2560 CHECK_PTR(check_sljit_emit_jump(compiler, type)); 2561 2562 if (SLJIT_UNLIKELY(compiler->flags_saved)) { 2563 if ((type & 0xff) <= SLJIT_JUMP) 2564 PTR_FAIL_IF(emit_restore_flags(compiler, 0)); 2565 compiler->flags_saved = 0; 2566 } 2567 2568 jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); 2569 PTR_FAIL_IF_NULL(jump); 2570 set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP); 2571 type &= 0xff; 2572 2573 if (type >= SLJIT_CALL1) 2574 PTR_FAIL_IF(call_with_args(compiler, type)); 2575 2576 /* Worst case size. */ 2577 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 2578 compiler->size += (type >= SLJIT_JUMP) ? 5 : 6; 2579 #else 2580 compiler->size += (type >= SLJIT_JUMP) ? (10 + 3) : (2 + 10 + 3); 2581 #endif 2582 2583 inst = (sljit_ub*)ensure_buf(compiler, 2); 2584 PTR_FAIL_IF_NULL(inst); 2585 2586 *inst++ = 0; 2587 *inst++ = type + 4; 2588 return jump; 2589 } 2590 2591 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_ijump(struct sljit_compiler *compiler, sljit_si type, sljit_si src, sljit_sw srcw) 2592 { 2593 sljit_ub *inst; 2594 struct sljit_jump *jump; 2595 2596 CHECK_ERROR(); 2597 CHECK(check_sljit_emit_ijump(compiler, type, src, srcw)); 2598 ADJUST_LOCAL_OFFSET(src, srcw); 2599 2600 CHECK_EXTRA_REGS(src, srcw, (void)0); 2601 2602 if (SLJIT_UNLIKELY(compiler->flags_saved)) { 2603 if (type <= SLJIT_JUMP) 2604 FAIL_IF(emit_restore_flags(compiler, 0)); 2605 compiler->flags_saved = 0; 2606 } 2607 2608 if (type >= SLJIT_CALL1) { 2609 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 2610 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) 2611 if (src == SLJIT_R2) { 2612 EMIT_MOV(compiler, TMP_REG1, 0, src, 0); 2613 src = TMP_REG1; 2614 } 2615 if (src == SLJIT_MEM1(SLJIT_SP) && type >= SLJIT_CALL3) 2616 srcw += sizeof(sljit_sw); 2617 #endif 2618 #endif 2619 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) && defined(_WIN64) 2620 if (src == SLJIT_R2) { 2621 EMIT_MOV(compiler, TMP_REG1, 0, src, 0); 2622 src = TMP_REG1; 2623 } 2624 #endif 2625 FAIL_IF(call_with_args(compiler, type)); 2626 } 2627 2628 if (src == SLJIT_IMM) { 2629 jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); 2630 FAIL_IF_NULL(jump); 2631 set_jump(jump, compiler, JUMP_ADDR); 2632 jump->u.target = srcw; 2633 2634 /* Worst case size. */ 2635 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 2636 compiler->size += 5; 2637 #else 2638 compiler->size += 10 + 3; 2639 #endif 2640 2641 inst = (sljit_ub*)ensure_buf(compiler, 2); 2642 FAIL_IF_NULL(inst); 2643 2644 *inst++ = 0; 2645 *inst++ = type + 4; 2646 } 2647 else { 2648 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 2649 /* REX_W is not necessary (src is not immediate). */ 2650 compiler->mode32 = 1; 2651 #endif 2652 inst = emit_x86_instruction(compiler, 1, 0, 0, src, srcw); 2653 FAIL_IF(!inst); 2654 *inst++ = GROUP_FF; 2655 *inst |= (type >= SLJIT_FAST_CALL) ? CALL_rm : JMP_rm; 2656 } 2657 return SLJIT_SUCCESS; 2658 } 2659 2660 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_si op, 2661 sljit_si dst, sljit_sw dstw, 2662 sljit_si src, sljit_sw srcw, 2663 sljit_si type) 2664 { 2665 sljit_ub *inst; 2666 sljit_ub cond_set = 0; 2667 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 2668 sljit_si reg; 2669 #else 2670 /* CHECK_EXTRA_REGS migh overwrite these values. */ 2671 sljit_si dst_save = dst; 2672 sljit_sw dstw_save = dstw; 2673 #endif 2674 2675 CHECK_ERROR(); 2676 CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, src, srcw, type)); 2677 SLJIT_UNUSED_ARG(srcw); 2678 2679 if (dst == SLJIT_UNUSED) 2680 return SLJIT_SUCCESS; 2681 2682 ADJUST_LOCAL_OFFSET(dst, dstw); 2683 CHECK_EXTRA_REGS(dst, dstw, (void)0); 2684 if (SLJIT_UNLIKELY(compiler->flags_saved)) 2685 FAIL_IF(emit_restore_flags(compiler, op & SLJIT_KEEP_FLAGS)); 2686 2687 type &= 0xff; 2688 /* setcc = jcc + 0x10. */ 2689 cond_set = get_jump_code(type) + 0x10; 2690 2691 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 2692 if (GET_OPCODE(op) == SLJIT_OR && !GET_ALL_FLAGS(op) && FAST_IS_REG(dst) && dst == src) { 2693 inst = (sljit_ub*)ensure_buf(compiler, 1 + 4 + 3); 2694 FAIL_IF(!inst); 2695 INC_SIZE(4 + 3); 2696 /* Set low register to conditional flag. */ 2697 *inst++ = (reg_map[TMP_REG1] <= 7) ? REX : REX_B; 2698 *inst++ = GROUP_0F; 2699 *inst++ = cond_set; 2700 *inst++ = MOD_REG | reg_lmap[TMP_REG1]; 2701 *inst++ = REX | (reg_map[TMP_REG1] <= 7 ? 0 : REX_R) | (reg_map[dst] <= 7 ? 0 : REX_B); 2702 *inst++ = OR_rm8_r8; 2703 *inst++ = MOD_REG | (reg_lmap[TMP_REG1] << 3) | reg_lmap[dst]; 2704 return SLJIT_SUCCESS; 2705 } 2706 2707 reg = (op == SLJIT_MOV && FAST_IS_REG(dst)) ? dst : TMP_REG1; 2708 2709 inst = (sljit_ub*)ensure_buf(compiler, 1 + 4 + 4); 2710 FAIL_IF(!inst); 2711 INC_SIZE(4 + 4); 2712 /* Set low register to conditional flag. */ 2713 *inst++ = (reg_map[reg] <= 7) ? REX : REX_B; 2714 *inst++ = GROUP_0F; 2715 *inst++ = cond_set; 2716 *inst++ = MOD_REG | reg_lmap[reg]; 2717 *inst++ = REX_W | (reg_map[reg] <= 7 ? 0 : (REX_B | REX_R)); 2718 *inst++ = GROUP_0F; 2719 *inst++ = MOVZX_r_rm8; 2720 *inst = MOD_REG | (reg_lmap[reg] << 3) | reg_lmap[reg]; 2721 2722 if (reg != TMP_REG1) 2723 return SLJIT_SUCCESS; 2724 2725 if (GET_OPCODE(op) < SLJIT_ADD) { 2726 compiler->mode32 = GET_OPCODE(op) != SLJIT_MOV; 2727 return emit_mov(compiler, dst, dstw, TMP_REG1, 0); 2728 } 2729 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ 2730 || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) 2731 compiler->skip_checks = 1; 2732 #endif 2733 return sljit_emit_op2(compiler, op, dst, dstw, dst, dstw, TMP_REG1, 0); 2734 #else /* SLJIT_CONFIG_X86_64 */ 2735 if (GET_OPCODE(op) < SLJIT_ADD && FAST_IS_REG(dst)) { 2736 if (reg_map[dst] <= 4) { 2737 /* Low byte is accessible. */ 2738 inst = (sljit_ub*)ensure_buf(compiler, 1 + 3 + 3); 2739 FAIL_IF(!inst); 2740 INC_SIZE(3 + 3); 2741 /* Set low byte to conditional flag. */ 2742 *inst++ = GROUP_0F; 2743 *inst++ = cond_set; 2744 *inst++ = MOD_REG | reg_map[dst]; 2745 2746 *inst++ = GROUP_0F; 2747 *inst++ = MOVZX_r_rm8; 2748 *inst = MOD_REG | (reg_map[dst] << 3) | reg_map[dst]; 2749 return SLJIT_SUCCESS; 2750 } 2751 2752 /* Low byte is not accessible. */ 2753 if (cpu_has_cmov == -1) 2754 get_cpu_features(); 2755 2756 if (cpu_has_cmov) { 2757 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, 1); 2758 /* a xor reg, reg operation would overwrite the flags. */ 2759 EMIT_MOV(compiler, dst, 0, SLJIT_IMM, 0); 2760 2761 inst = (sljit_ub*)ensure_buf(compiler, 1 + 3); 2762 FAIL_IF(!inst); 2763 INC_SIZE(3); 2764 2765 *inst++ = GROUP_0F; 2766 /* cmovcc = setcc - 0x50. */ 2767 *inst++ = cond_set - 0x50; 2768 *inst++ = MOD_REG | (reg_map[dst] << 3) | reg_map[TMP_REG1]; 2769 return SLJIT_SUCCESS; 2770 } 2771 2772 inst = (sljit_ub*)ensure_buf(compiler, 1 + 1 + 3 + 3 + 1); 2773 FAIL_IF(!inst); 2774 INC_SIZE(1 + 3 + 3 + 1); 2775 *inst++ = XCHG_EAX_r + reg_map[TMP_REG1]; 2776 /* Set al to conditional flag. */ 2777 *inst++ = GROUP_0F; 2778 *inst++ = cond_set; 2779 *inst++ = MOD_REG | 0 /* eax */; 2780 2781 *inst++ = GROUP_0F; 2782 *inst++ = MOVZX_r_rm8; 2783 *inst++ = MOD_REG | (reg_map[dst] << 3) | 0 /* eax */; 2784 *inst++ = XCHG_EAX_r + reg_map[TMP_REG1]; 2785 return SLJIT_SUCCESS; 2786 } 2787 2788 if (GET_OPCODE(op) == SLJIT_OR && !GET_ALL_FLAGS(op) && FAST_IS_REG(dst) && dst == src && reg_map[dst] <= 4) { 2789 SLJIT_COMPILE_ASSERT(reg_map[SLJIT_R0] == 0, scratch_reg1_must_be_eax); 2790 if (dst != SLJIT_R0) { 2791 inst = (sljit_ub*)ensure_buf(compiler, 1 + 1 + 3 + 2 + 1); 2792 FAIL_IF(!inst); 2793 INC_SIZE(1 + 3 + 2 + 1); 2794 /* Set low register to conditional flag. */ 2795 *inst++ = XCHG_EAX_r + reg_map[TMP_REG1]; 2796 *inst++ = GROUP_0F; 2797 *inst++ = cond_set; 2798 *inst++ = MOD_REG | 0 /* eax */; 2799 *inst++ = OR_rm8_r8; 2800 *inst++ = MOD_REG | (0 /* eax */ << 3) | reg_map[dst]; 2801 *inst++ = XCHG_EAX_r + reg_map[TMP_REG1]; 2802 } 2803 else { 2804 inst = (sljit_ub*)ensure_buf(compiler, 1 + 2 + 3 + 2 + 2); 2805 FAIL_IF(!inst); 2806 INC_SIZE(2 + 3 + 2 + 2); 2807 /* Set low register to conditional flag. */ 2808 *inst++ = XCHG_r_rm; 2809 *inst++ = MOD_REG | (1 /* ecx */ << 3) | reg_map[TMP_REG1]; 2810 *inst++ = GROUP_0F; 2811 *inst++ = cond_set; 2812 *inst++ = MOD_REG | 1 /* ecx */; 2813 *inst++ = OR_rm8_r8; 2814 *inst++ = MOD_REG | (1 /* ecx */ << 3) | 0 /* eax */; 2815 *inst++ = XCHG_r_rm; 2816 *inst++ = MOD_REG | (1 /* ecx */ << 3) | reg_map[TMP_REG1]; 2817 } 2818 return SLJIT_SUCCESS; 2819 } 2820 2821 /* Set TMP_REG1 to the bit. */ 2822 inst = (sljit_ub*)ensure_buf(compiler, 1 + 1 + 3 + 3 + 1); 2823 FAIL_IF(!inst); 2824 INC_SIZE(1 + 3 + 3 + 1); 2825 *inst++ = XCHG_EAX_r + reg_map[TMP_REG1]; 2826 /* Set al to conditional flag. */ 2827 *inst++ = GROUP_0F; 2828 *inst++ = cond_set; 2829 *inst++ = MOD_REG | 0 /* eax */; 2830 2831 *inst++ = GROUP_0F; 2832 *inst++ = MOVZX_r_rm8; 2833 *inst++ = MOD_REG | (0 << 3) /* eax */ | 0 /* eax */; 2834 2835 *inst++ = XCHG_EAX_r + reg_map[TMP_REG1]; 2836 2837 if (GET_OPCODE(op) < SLJIT_ADD) 2838 return emit_mov(compiler, dst, dstw, TMP_REG1, 0); 2839 2840 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ 2841 || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) 2842 compiler->skip_checks = 1; 2843 #endif 2844 return sljit_emit_op2(compiler, op, dst_save, dstw_save, dst_save, dstw_save, TMP_REG1, 0); 2845 #endif /* SLJIT_CONFIG_X86_64 */ 2846 } 2847 2848 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_local_base(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw, sljit_sw offset) 2849 { 2850 CHECK_ERROR(); 2851 CHECK(check_sljit_get_local_base(compiler, dst, dstw, offset)); 2852 ADJUST_LOCAL_OFFSET(dst, dstw); 2853 2854 CHECK_EXTRA_REGS(dst, dstw, (void)0); 2855 2856 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 2857 compiler->mode32 = 0; 2858 #endif 2859 2860 ADJUST_LOCAL_OFFSET(SLJIT_MEM1(SLJIT_SP), offset); 2861 2862 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 2863 if (NOT_HALFWORD(offset)) { 2864 FAIL_IF(emit_load_imm64(compiler, TMP_REG1, offset)); 2865 #if (defined SLJIT_DEBUG && SLJIT_DEBUG) 2866 SLJIT_ASSERT(emit_lea_binary(compiler, SLJIT_KEEP_FLAGS, dst, dstw, SLJIT_SP, 0, TMP_REG1, 0) != SLJIT_ERR_UNSUPPORTED); 2867 return compiler->error; 2868 #else 2869 return emit_lea_binary(compiler, SLJIT_KEEP_FLAGS, dst, dstw, SLJIT_SP, 0, TMP_REG1, 0); 2870 #endif 2871 } 2872 #endif 2873 2874 if (offset != 0) 2875 return emit_lea_binary(compiler, SLJIT_KEEP_FLAGS, dst, dstw, SLJIT_SP, 0, SLJIT_IMM, offset); 2876 return emit_mov(compiler, dst, dstw, SLJIT_SP, 0); 2877 } 2878 2879 SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw, sljit_sw init_value) 2880 { 2881 sljit_ub *inst; 2882 struct sljit_const *const_; 2883 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 2884 sljit_si reg; 2885 #endif 2886 2887 CHECK_ERROR_PTR(); 2888 CHECK_PTR(check_sljit_emit_const(compiler, dst, dstw, init_value)); 2889 ADJUST_LOCAL_OFFSET(dst, dstw); 2890 2891 CHECK_EXTRA_REGS(dst, dstw, (void)0); 2892 2893 const_ = (struct sljit_const*)ensure_abuf(compiler, sizeof(struct sljit_const)); 2894 PTR_FAIL_IF(!const_); 2895 set_const(const_, compiler); 2896 2897 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 2898 compiler->mode32 = 0; 2899 reg = SLOW_IS_REG(dst) ? dst : TMP_REG1; 2900 2901 if (emit_load_imm64(compiler, reg, init_value)) 2902 return NULL; 2903 #else 2904 if (dst == SLJIT_UNUSED) 2905 dst = TMP_REG1; 2906 2907 if (emit_mov(compiler, dst, dstw, SLJIT_IMM, init_value)) 2908 return NULL; 2909 #endif 2910 2911 inst = (sljit_ub*)ensure_buf(compiler, 2); 2912 PTR_FAIL_IF(!inst); 2913 2914 *inst++ = 0; 2915 *inst++ = 1; 2916 2917 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 2918 if (dst & SLJIT_MEM) 2919 if (emit_mov(compiler, dst, dstw, TMP_REG1, 0)) 2920 return NULL; 2921 #endif 2922 2923 return const_; 2924 } 2925 2926 SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_addr) 2927 { 2928 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 2929 *(sljit_sw*)addr = new_addr - (addr + 4); 2930 #else 2931 *(sljit_uw*)addr = new_addr; 2932 #endif 2933 } 2934 2935 SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant) 2936 { 2937 *(sljit_sw*)addr = new_constant; 2938 } 2939 2940 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_x86_is_sse2_available(void) 2941 { 2942 #if (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2) 2943 if (cpu_has_sse2 == -1) 2944 get_cpu_features(); 2945 return cpu_has_sse2; 2946 #else 2947 return 1; 2948 #endif 2949 } 2950 2951 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_x86_is_cmov_available(void) 2952 { 2953 if (cpu_has_cmov == -1) 2954 get_cpu_features(); 2955 return cpu_has_cmov; 2956 } 2957 2958 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_x86_emit_cmov(struct sljit_compiler *compiler, 2959 sljit_si type, 2960 sljit_si dst_reg, 2961 sljit_si src, sljit_sw srcw) 2962 { 2963 sljit_ub* inst; 2964 2965 CHECK_ERROR(); 2966 #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) 2967 CHECK_ARGUMENT(sljit_x86_is_cmov_available()); 2968 CHECK_ARGUMENT(!(type & ~(0xff | SLJIT_INT_OP))); 2969 CHECK_ARGUMENT((type & 0xff) >= SLJIT_EQUAL && (type & 0xff) <= SLJIT_D_ORDERED); 2970 CHECK_ARGUMENT(FUNCTION_CHECK_IS_REG(dst_reg & ~SLJIT_INT_OP)); 2971 FUNCTION_CHECK_SRC(src, srcw); 2972 #endif 2973 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) 2974 if (SLJIT_UNLIKELY(!!compiler->verbose)) { 2975 fprintf(compiler->verbose, " x86_cmov%s %s%s, ", 2976 !(dst_reg & SLJIT_INT_OP) ? "" : ".i", 2977 JUMP_PREFIX(type), jump_names[type & 0xff]); 2978 sljit_verbose_reg(compiler, dst_reg & ~SLJIT_INT_OP); 2979 fprintf(compiler->verbose, ", "); 2980 sljit_verbose_param(compiler, src, srcw); 2981 fprintf(compiler->verbose, "\n"); 2982 } 2983 #endif 2984 2985 ADJUST_LOCAL_OFFSET(src, srcw); 2986 CHECK_EXTRA_REGS(src, srcw, (void)0); 2987 2988 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 2989 compiler->mode32 = dst_reg & SLJIT_INT_OP; 2990 #endif 2991 dst_reg &= ~SLJIT_INT_OP; 2992 2993 if (SLJIT_UNLIKELY(src & SLJIT_IMM)) { 2994 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, srcw); 2995 src = TMP_REG1; 2996 srcw = 0; 2997 } 2998 2999 inst = emit_x86_instruction(compiler, 2, dst_reg, 0, src, srcw); 3000 FAIL_IF(!inst); 3001 *inst++ = GROUP_0F; 3002 *inst = get_jump_code(type & 0xff) - 0x40; 3003 return SLJIT_SUCCESS; 3004 } 3005