1 /* 2 * Stack-less Just-In-Time compiler 3 * 4 * Copyright 2009-2012 Zoltan Herczeg (hzmester (at) freemail.hu). All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without modification, are 7 * permitted provided that the following conditions are met: 8 * 9 * 1. Redistributions of source code must retain the above copyright notice, this list of 10 * conditions and the following disclaimer. 11 * 12 * 2. Redistributions in binary form must reproduce the above copyright notice, this list 13 * of conditions and the following disclaimer in the documentation and/or other materials 14 * provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY 17 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT 19 * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED 21 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR 22 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 23 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 24 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 */ 26 27 SLJIT_API_FUNC_ATTRIBUTE SLJIT_CONST char* sljit_get_platform_name(void) 28 { 29 return "x86" SLJIT_CPUINFO; 30 } 31 32 /* 33 32b register indexes: 34 0 - EAX 35 1 - ECX 36 2 - EDX 37 3 - EBX 38 4 - none 39 5 - EBP 40 6 - ESI 41 7 - EDI 42 */ 43 44 /* 45 64b register indexes: 46 0 - RAX 47 1 - RCX 48 2 - RDX 49 3 - RBX 50 4 - none 51 5 - RBP 52 6 - RSI 53 7 - RDI 54 8 - R8 - From now on REX prefix is required 55 9 - R9 56 10 - R10 57 11 - R11 58 12 - R12 59 13 - R13 60 14 - R14 61 15 - R15 62 */ 63 64 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 65 66 /* Last register + 1. */ 67 #define TMP_REG1 (SLJIT_NUMBER_OF_REGISTERS + 2) 68 69 static SLJIT_CONST sljit_ub reg_map[SLJIT_NUMBER_OF_REGISTERS + 3] = { 70 0, 0, 2, 1, 0, 0, 0, 0, 7, 6, 3, 4, 5 71 }; 72 73 #define CHECK_EXTRA_REGS(p, w, do) \ 74 if (p >= SLJIT_R3 && p <= SLJIT_R6) { \ 75 w = FIXED_LOCALS_OFFSET + ((p) - (SLJIT_R3 + 4)) * sizeof(sljit_sw); \ 76 p = SLJIT_MEM1(SLJIT_SP); \ 77 do; \ 78 } 79 80 #else /* SLJIT_CONFIG_X86_32 */ 81 82 /* Last register + 1. */ 83 #define TMP_REG1 (SLJIT_NUMBER_OF_REGISTERS + 2) 84 #define TMP_REG2 (SLJIT_NUMBER_OF_REGISTERS + 3) 85 #define TMP_REG3 (SLJIT_NUMBER_OF_REGISTERS + 4) 86 87 /* Note: r12 & 0x7 == 0b100, which decoded as SIB byte present 88 Note: avoid to use r12 and r13 for memory addessing 89 therefore r12 is better for SAVED_EREG than SAVED_REG. */ 90 #ifndef _WIN64 91 /* 1st passed in rdi, 2nd argument passed in rsi, 3rd in rdx. */ 92 static SLJIT_CONST sljit_ub reg_map[SLJIT_NUMBER_OF_REGISTERS + 5] = { 93 0, 0, 6, 1, 8, 11, 10, 12, 5, 13, 14, 15, 3, 4, 2, 7, 9 94 }; 95 /* low-map. reg_map & 0x7. */ 96 static SLJIT_CONST sljit_ub reg_lmap[SLJIT_NUMBER_OF_REGISTERS + 5] = { 97 0, 0, 6, 1, 0, 3, 2, 4, 5, 5, 6, 7, 3, 4, 2, 7, 1 98 }; 99 #else 100 /* 1st passed in rcx, 2nd argument passed in rdx, 3rd in r8. */ 101 static SLJIT_CONST sljit_ub reg_map[SLJIT_NUMBER_OF_REGISTERS + 5] = { 102 0, 0, 2, 1, 11, 12, 5, 13, 14, 15, 7, 6, 3, 4, 10, 8, 9 103 }; 104 /* low-map. reg_map & 0x7. */ 105 static SLJIT_CONST sljit_ub reg_lmap[SLJIT_NUMBER_OF_REGISTERS + 5] = { 106 0, 0, 2, 1, 3, 4, 5, 5, 6, 7, 7, 6, 3, 4, 2, 0, 1 107 }; 108 #endif 109 110 #define REX_W 0x48 111 #define REX_R 0x44 112 #define REX_X 0x42 113 #define REX_B 0x41 114 #define REX 0x40 115 116 #ifndef _WIN64 117 #define HALFWORD_MAX 0x7fffffffl 118 #define HALFWORD_MIN -0x80000000l 119 #else 120 #define HALFWORD_MAX 0x7fffffffll 121 #define HALFWORD_MIN -0x80000000ll 122 #endif 123 124 #define IS_HALFWORD(x) ((x) <= HALFWORD_MAX && (x) >= HALFWORD_MIN) 125 #define NOT_HALFWORD(x) ((x) > HALFWORD_MAX || (x) < HALFWORD_MIN) 126 127 #define CHECK_EXTRA_REGS(p, w, do) 128 129 #endif /* SLJIT_CONFIG_X86_32 */ 130 131 #define TMP_FREG (0) 132 133 /* Size flags for emit_x86_instruction: */ 134 #define EX86_BIN_INS 0x0010 135 #define EX86_SHIFT_INS 0x0020 136 #define EX86_REX 0x0040 137 #define EX86_NO_REXW 0x0080 138 #define EX86_BYTE_ARG 0x0100 139 #define EX86_HALF_ARG 0x0200 140 #define EX86_PREF_66 0x0400 141 #define EX86_PREF_F2 0x0800 142 #define EX86_PREF_F3 0x1000 143 #define EX86_SSE2_OP1 0x2000 144 #define EX86_SSE2_OP2 0x4000 145 #define EX86_SSE2 (EX86_SSE2_OP1 | EX86_SSE2_OP2) 146 147 /* --------------------------------------------------------------------- */ 148 /* Instrucion forms */ 149 /* --------------------------------------------------------------------- */ 150 151 #define ADD (/* BINARY */ 0 << 3) 152 #define ADD_EAX_i32 0x05 153 #define ADD_r_rm 0x03 154 #define ADD_rm_r 0x01 155 #define ADDSD_x_xm 0x58 156 #define ADC (/* BINARY */ 2 << 3) 157 #define ADC_EAX_i32 0x15 158 #define ADC_r_rm 0x13 159 #define ADC_rm_r 0x11 160 #define AND (/* BINARY */ 4 << 3) 161 #define AND_EAX_i32 0x25 162 #define AND_r_rm 0x23 163 #define AND_rm_r 0x21 164 #define ANDPD_x_xm 0x54 165 #define BSR_r_rm (/* GROUP_0F */ 0xbd) 166 #define CALL_i32 0xe8 167 #define CALL_rm (/* GROUP_FF */ 2 << 3) 168 #define CDQ 0x99 169 #define CMOVNE_r_rm (/* GROUP_0F */ 0x45) 170 #define CMP (/* BINARY */ 7 << 3) 171 #define CMP_EAX_i32 0x3d 172 #define CMP_r_rm 0x3b 173 #define CMP_rm_r 0x39 174 #define CVTPD2PS_x_xm 0x5a 175 #define CVTSI2SD_x_rm 0x2a 176 #define CVTTSD2SI_r_xm 0x2c 177 #define DIV (/* GROUP_F7 */ 6 << 3) 178 #define DIVSD_x_xm 0x5e 179 #define INT3 0xcc 180 #define IDIV (/* GROUP_F7 */ 7 << 3) 181 #define IMUL (/* GROUP_F7 */ 5 << 3) 182 #define IMUL_r_rm (/* GROUP_0F */ 0xaf) 183 #define IMUL_r_rm_i8 0x6b 184 #define IMUL_r_rm_i32 0x69 185 #define JE_i8 0x74 186 #define JMP_i8 0xeb 187 #define JMP_i32 0xe9 188 #define JMP_rm (/* GROUP_FF */ 4 << 3) 189 #define LEA_r_m 0x8d 190 #define MOV_r_rm 0x8b 191 #define MOV_r_i32 0xb8 192 #define MOV_rm_r 0x89 193 #define MOV_rm_i32 0xc7 194 #define MOV_rm8_i8 0xc6 195 #define MOV_rm8_r8 0x88 196 #define MOVSD_x_xm 0x10 197 #define MOVSD_xm_x 0x11 198 #define MOVSXD_r_rm 0x63 199 #define MOVSX_r_rm8 (/* GROUP_0F */ 0xbe) 200 #define MOVSX_r_rm16 (/* GROUP_0F */ 0xbf) 201 #define MOVZX_r_rm8 (/* GROUP_0F */ 0xb6) 202 #define MOVZX_r_rm16 (/* GROUP_0F */ 0xb7) 203 #define MUL (/* GROUP_F7 */ 4 << 3) 204 #define MULSD_x_xm 0x59 205 #define NEG_rm (/* GROUP_F7 */ 3 << 3) 206 #define NOP 0x90 207 #define NOT_rm (/* GROUP_F7 */ 2 << 3) 208 #define OR (/* BINARY */ 1 << 3) 209 #define OR_r_rm 0x0b 210 #define OR_EAX_i32 0x0d 211 #define OR_rm_r 0x09 212 #define OR_rm8_r8 0x08 213 #define POP_r 0x58 214 #define POP_rm 0x8f 215 #define POPF 0x9d 216 #define PUSH_i32 0x68 217 #define PUSH_r 0x50 218 #define PUSH_rm (/* GROUP_FF */ 6 << 3) 219 #define PUSHF 0x9c 220 #define RET_near 0xc3 221 #define RET_i16 0xc2 222 #define SBB (/* BINARY */ 3 << 3) 223 #define SBB_EAX_i32 0x1d 224 #define SBB_r_rm 0x1b 225 #define SBB_rm_r 0x19 226 #define SAR (/* SHIFT */ 7 << 3) 227 #define SHL (/* SHIFT */ 4 << 3) 228 #define SHR (/* SHIFT */ 5 << 3) 229 #define SUB (/* BINARY */ 5 << 3) 230 #define SUB_EAX_i32 0x2d 231 #define SUB_r_rm 0x2b 232 #define SUB_rm_r 0x29 233 #define SUBSD_x_xm 0x5c 234 #define TEST_EAX_i32 0xa9 235 #define TEST_rm_r 0x85 236 #define UCOMISD_x_xm 0x2e 237 #define UNPCKLPD_x_xm 0x14 238 #define XCHG_EAX_r 0x90 239 #define XCHG_r_rm 0x87 240 #define XOR (/* BINARY */ 6 << 3) 241 #define XOR_EAX_i32 0x35 242 #define XOR_r_rm 0x33 243 #define XOR_rm_r 0x31 244 #define XORPD_x_xm 0x57 245 246 #define GROUP_0F 0x0f 247 #define GROUP_F7 0xf7 248 #define GROUP_FF 0xff 249 #define GROUP_BINARY_81 0x81 250 #define GROUP_BINARY_83 0x83 251 #define GROUP_SHIFT_1 0xd1 252 #define GROUP_SHIFT_N 0xc1 253 #define GROUP_SHIFT_CL 0xd3 254 255 #define MOD_REG 0xc0 256 #define MOD_DISP8 0x40 257 258 #define INC_SIZE(s) (*inst++ = (s), compiler->size += (s)) 259 260 #define PUSH_REG(r) (*inst++ = (PUSH_r + (r))) 261 #define POP_REG(r) (*inst++ = (POP_r + (r))) 262 #define RET() (*inst++ = (RET_near)) 263 #define RET_I16(n) (*inst++ = (RET_i16), *inst++ = n, *inst++ = 0) 264 /* r32, r/m32 */ 265 #define MOV_RM(mod, reg, rm) (*inst++ = (MOV_r_rm), *inst++ = (mod) << 6 | (reg) << 3 | (rm)) 266 267 /* Multithreading does not affect these static variables, since they store 268 built-in CPU features. Therefore they can be overwritten by different threads 269 if they detect the CPU features in the same time. */ 270 #if (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2) 271 static sljit_si cpu_has_sse2 = -1; 272 #endif 273 static sljit_si cpu_has_cmov = -1; 274 275 #if defined(_MSC_VER) && _MSC_VER >= 1400 276 #include <intrin.h> 277 #endif 278 279 static void get_cpu_features(void) 280 { 281 sljit_ui features; 282 283 #if defined(_MSC_VER) && _MSC_VER >= 1400 284 285 int CPUInfo[4]; 286 __cpuid(CPUInfo, 1); 287 features = (sljit_ui)CPUInfo[3]; 288 289 #elif defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__SUNPRO_C) 290 291 /* AT&T syntax. */ 292 __asm__ ( 293 "movl $0x1, %%eax\n" 294 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 295 /* On x86-32, there is no red zone, so this 296 should work (no need for a local variable). */ 297 "push %%ebx\n" 298 #endif 299 "cpuid\n" 300 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 301 "pop %%ebx\n" 302 #endif 303 "movl %%edx, %0\n" 304 : "=g" (features) 305 : 306 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 307 : "%eax", "%ecx", "%edx" 308 #else 309 : "%rax", "%rbx", "%rcx", "%rdx" 310 #endif 311 ); 312 313 #else /* _MSC_VER && _MSC_VER >= 1400 */ 314 315 /* Intel syntax. */ 316 __asm { 317 mov eax, 1 318 cpuid 319 mov features, edx 320 } 321 322 #endif /* _MSC_VER && _MSC_VER >= 1400 */ 323 324 #if (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2) 325 cpu_has_sse2 = (features >> 26) & 0x1; 326 #endif 327 cpu_has_cmov = (features >> 15) & 0x1; 328 } 329 330 static sljit_ub get_jump_code(sljit_si type) 331 { 332 switch (type) { 333 case SLJIT_C_EQUAL: 334 case SLJIT_C_FLOAT_EQUAL: 335 return 0x84 /* je */; 336 337 case SLJIT_C_NOT_EQUAL: 338 case SLJIT_C_FLOAT_NOT_EQUAL: 339 return 0x85 /* jne */; 340 341 case SLJIT_C_LESS: 342 case SLJIT_C_FLOAT_LESS: 343 return 0x82 /* jc */; 344 345 case SLJIT_C_GREATER_EQUAL: 346 case SLJIT_C_FLOAT_GREATER_EQUAL: 347 return 0x83 /* jae */; 348 349 case SLJIT_C_GREATER: 350 case SLJIT_C_FLOAT_GREATER: 351 return 0x87 /* jnbe */; 352 353 case SLJIT_C_LESS_EQUAL: 354 case SLJIT_C_FLOAT_LESS_EQUAL: 355 return 0x86 /* jbe */; 356 357 case SLJIT_C_SIG_LESS: 358 return 0x8c /* jl */; 359 360 case SLJIT_C_SIG_GREATER_EQUAL: 361 return 0x8d /* jnl */; 362 363 case SLJIT_C_SIG_GREATER: 364 return 0x8f /* jnle */; 365 366 case SLJIT_C_SIG_LESS_EQUAL: 367 return 0x8e /* jle */; 368 369 case SLJIT_C_OVERFLOW: 370 case SLJIT_C_MUL_OVERFLOW: 371 return 0x80 /* jo */; 372 373 case SLJIT_C_NOT_OVERFLOW: 374 case SLJIT_C_MUL_NOT_OVERFLOW: 375 return 0x81 /* jno */; 376 377 case SLJIT_C_FLOAT_UNORDERED: 378 return 0x8a /* jp */; 379 380 case SLJIT_C_FLOAT_ORDERED: 381 return 0x8b /* jpo */; 382 } 383 return 0; 384 } 385 386 static sljit_ub* generate_far_jump_code(struct sljit_jump *jump, sljit_ub *code_ptr, sljit_si type); 387 388 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 389 static sljit_ub* generate_fixed_jump(sljit_ub *code_ptr, sljit_sw addr, sljit_si type); 390 #endif 391 392 static sljit_ub* generate_near_jump_code(struct sljit_jump *jump, sljit_ub *code_ptr, sljit_ub *code, sljit_si type) 393 { 394 sljit_si short_jump; 395 sljit_uw label_addr; 396 397 if (jump->flags & JUMP_LABEL) 398 label_addr = (sljit_uw)(code + jump->u.label->size); 399 else 400 label_addr = jump->u.target; 401 short_jump = (sljit_sw)(label_addr - (jump->addr + 2)) >= -128 && (sljit_sw)(label_addr - (jump->addr + 2)) <= 127; 402 403 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 404 if ((sljit_sw)(label_addr - (jump->addr + 1)) > HALFWORD_MAX || (sljit_sw)(label_addr - (jump->addr + 1)) < HALFWORD_MIN) 405 return generate_far_jump_code(jump, code_ptr, type); 406 #endif 407 408 if (type == SLJIT_JUMP) { 409 if (short_jump) 410 *code_ptr++ = JMP_i8; 411 else 412 *code_ptr++ = JMP_i32; 413 jump->addr++; 414 } 415 else if (type >= SLJIT_FAST_CALL) { 416 short_jump = 0; 417 *code_ptr++ = CALL_i32; 418 jump->addr++; 419 } 420 else if (short_jump) { 421 *code_ptr++ = get_jump_code(type) - 0x10; 422 jump->addr++; 423 } 424 else { 425 *code_ptr++ = GROUP_0F; 426 *code_ptr++ = get_jump_code(type); 427 jump->addr += 2; 428 } 429 430 if (short_jump) { 431 jump->flags |= PATCH_MB; 432 code_ptr += sizeof(sljit_sb); 433 } else { 434 jump->flags |= PATCH_MW; 435 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 436 code_ptr += sizeof(sljit_sw); 437 #else 438 code_ptr += sizeof(sljit_si); 439 #endif 440 } 441 442 return code_ptr; 443 } 444 445 SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler) 446 { 447 struct sljit_memory_fragment *buf; 448 sljit_ub *code; 449 sljit_ub *code_ptr; 450 sljit_ub *buf_ptr; 451 sljit_ub *buf_end; 452 sljit_ub len; 453 454 struct sljit_label *label; 455 struct sljit_jump *jump; 456 struct sljit_const *const_; 457 458 CHECK_ERROR_PTR(); 459 check_sljit_generate_code(compiler); 460 reverse_buf(compiler); 461 462 /* Second code generation pass. */ 463 code = (sljit_ub*)SLJIT_MALLOC_EXEC(compiler->size); 464 PTR_FAIL_WITH_EXEC_IF(code); 465 buf = compiler->buf; 466 467 code_ptr = code; 468 label = compiler->labels; 469 jump = compiler->jumps; 470 const_ = compiler->consts; 471 do { 472 buf_ptr = buf->memory; 473 buf_end = buf_ptr + buf->used_size; 474 do { 475 len = *buf_ptr++; 476 if (len > 0) { 477 /* The code is already generated. */ 478 SLJIT_MEMMOVE(code_ptr, buf_ptr, len); 479 code_ptr += len; 480 buf_ptr += len; 481 } 482 else { 483 if (*buf_ptr >= 4) { 484 jump->addr = (sljit_uw)code_ptr; 485 if (!(jump->flags & SLJIT_REWRITABLE_JUMP)) 486 code_ptr = generate_near_jump_code(jump, code_ptr, code, *buf_ptr - 4); 487 else 488 code_ptr = generate_far_jump_code(jump, code_ptr, *buf_ptr - 4); 489 jump = jump->next; 490 } 491 else if (*buf_ptr == 0) { 492 label->addr = (sljit_uw)code_ptr; 493 label->size = code_ptr - code; 494 label = label->next; 495 } 496 else if (*buf_ptr == 1) { 497 const_->addr = ((sljit_uw)code_ptr) - sizeof(sljit_sw); 498 const_ = const_->next; 499 } 500 else { 501 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 502 *code_ptr++ = (*buf_ptr == 2) ? CALL_i32 : JMP_i32; 503 buf_ptr++; 504 *(sljit_sw*)code_ptr = *(sljit_sw*)buf_ptr - ((sljit_sw)code_ptr + sizeof(sljit_sw)); 505 code_ptr += sizeof(sljit_sw); 506 buf_ptr += sizeof(sljit_sw) - 1; 507 #else 508 code_ptr = generate_fixed_jump(code_ptr, *(sljit_sw*)(buf_ptr + 1), *buf_ptr); 509 buf_ptr += sizeof(sljit_sw); 510 #endif 511 } 512 buf_ptr++; 513 } 514 } while (buf_ptr < buf_end); 515 SLJIT_ASSERT(buf_ptr == buf_end); 516 buf = buf->next; 517 } while (buf); 518 519 SLJIT_ASSERT(!label); 520 SLJIT_ASSERT(!jump); 521 SLJIT_ASSERT(!const_); 522 523 jump = compiler->jumps; 524 while (jump) { 525 if (jump->flags & PATCH_MB) { 526 SLJIT_ASSERT((sljit_sw)(jump->u.label->addr - (jump->addr + sizeof(sljit_sb))) >= -128 && (sljit_sw)(jump->u.label->addr - (jump->addr + sizeof(sljit_sb))) <= 127); 527 *(sljit_ub*)jump->addr = (sljit_ub)(jump->u.label->addr - (jump->addr + sizeof(sljit_sb))); 528 } else if (jump->flags & PATCH_MW) { 529 if (jump->flags & JUMP_LABEL) { 530 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 531 *(sljit_sw*)jump->addr = (sljit_sw)(jump->u.label->addr - (jump->addr + sizeof(sljit_sw))); 532 #else 533 SLJIT_ASSERT((sljit_sw)(jump->u.label->addr - (jump->addr + sizeof(sljit_si))) >= HALFWORD_MIN && (sljit_sw)(jump->u.label->addr - (jump->addr + sizeof(sljit_si))) <= HALFWORD_MAX); 534 *(sljit_si*)jump->addr = (sljit_si)(jump->u.label->addr - (jump->addr + sizeof(sljit_si))); 535 #endif 536 } 537 else { 538 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 539 *(sljit_sw*)jump->addr = (sljit_sw)(jump->u.target - (jump->addr + sizeof(sljit_sw))); 540 #else 541 SLJIT_ASSERT((sljit_sw)(jump->u.target - (jump->addr + sizeof(sljit_si))) >= HALFWORD_MIN && (sljit_sw)(jump->u.target - (jump->addr + sizeof(sljit_si))) <= HALFWORD_MAX); 542 *(sljit_si*)jump->addr = (sljit_si)(jump->u.target - (jump->addr + sizeof(sljit_si))); 543 #endif 544 } 545 } 546 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 547 else if (jump->flags & PATCH_MD) 548 *(sljit_sw*)jump->addr = jump->u.label->addr; 549 #endif 550 551 jump = jump->next; 552 } 553 554 /* Maybe we waste some space because of short jumps. */ 555 SLJIT_ASSERT(code_ptr <= code + compiler->size); 556 compiler->error = SLJIT_ERR_COMPILED; 557 compiler->executable_size = code_ptr - code; 558 return (void*)code; 559 } 560 561 /* --------------------------------------------------------------------- */ 562 /* Operators */ 563 /* --------------------------------------------------------------------- */ 564 565 static sljit_si emit_cum_binary(struct sljit_compiler *compiler, 566 sljit_ub op_rm, sljit_ub op_mr, sljit_ub op_imm, sljit_ub op_eax_imm, 567 sljit_si dst, sljit_sw dstw, 568 sljit_si src1, sljit_sw src1w, 569 sljit_si src2, sljit_sw src2w); 570 571 static sljit_si emit_non_cum_binary(struct sljit_compiler *compiler, 572 sljit_ub op_rm, sljit_ub op_mr, sljit_ub op_imm, sljit_ub op_eax_imm, 573 sljit_si dst, sljit_sw dstw, 574 sljit_si src1, sljit_sw src1w, 575 sljit_si src2, sljit_sw src2w); 576 577 static sljit_si emit_mov(struct sljit_compiler *compiler, 578 sljit_si dst, sljit_sw dstw, 579 sljit_si src, sljit_sw srcw); 580 581 static SLJIT_INLINE sljit_si emit_save_flags(struct sljit_compiler *compiler) 582 { 583 sljit_ub *inst; 584 585 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 586 inst = (sljit_ub*)ensure_buf(compiler, 1 + 5); 587 FAIL_IF(!inst); 588 INC_SIZE(5); 589 #else 590 inst = (sljit_ub*)ensure_buf(compiler, 1 + 6); 591 FAIL_IF(!inst); 592 INC_SIZE(6); 593 *inst++ = REX_W; 594 #endif 595 *inst++ = LEA_r_m; /* lea esp/rsp, [esp/rsp + sizeof(sljit_sw)] */ 596 *inst++ = 0x64; 597 *inst++ = 0x24; 598 *inst++ = (sljit_ub)sizeof(sljit_sw); 599 *inst++ = PUSHF; 600 compiler->flags_saved = 1; 601 return SLJIT_SUCCESS; 602 } 603 604 static SLJIT_INLINE sljit_si emit_restore_flags(struct sljit_compiler *compiler, sljit_si keep_flags) 605 { 606 sljit_ub *inst; 607 608 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 609 inst = (sljit_ub*)ensure_buf(compiler, 1 + 5); 610 FAIL_IF(!inst); 611 INC_SIZE(5); 612 *inst++ = POPF; 613 #else 614 inst = (sljit_ub*)ensure_buf(compiler, 1 + 6); 615 FAIL_IF(!inst); 616 INC_SIZE(6); 617 *inst++ = POPF; 618 *inst++ = REX_W; 619 #endif 620 *inst++ = LEA_r_m; /* lea esp/rsp, [esp/rsp - sizeof(sljit_sw)] */ 621 *inst++ = 0x64; 622 *inst++ = 0x24; 623 *inst++ = (sljit_ub)-(sljit_sb)sizeof(sljit_sw); 624 compiler->flags_saved = keep_flags; 625 return SLJIT_SUCCESS; 626 } 627 628 #ifdef _WIN32 629 #include <malloc.h> 630 631 static void SLJIT_CALL sljit_grow_stack(sljit_sw local_size) 632 { 633 /* Workaround for calling the internal _chkstk() function on Windows. 634 This function touches all 4k pages belongs to the requested stack space, 635 which size is passed in local_size. This is necessary on Windows where 636 the stack can only grow in 4k steps. However, this function just burn 637 CPU cycles if the stack is large enough. However, you don't know it in 638 advance, so it must always be called. I think this is a bad design in 639 general even if it has some reasons. */ 640 *(volatile sljit_si*)alloca(local_size) = 0; 641 } 642 643 #endif 644 645 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 646 #include "sljitNativeX86_32.c" 647 #else 648 #include "sljitNativeX86_64.c" 649 #endif 650 651 static sljit_si emit_mov(struct sljit_compiler *compiler, 652 sljit_si dst, sljit_sw dstw, 653 sljit_si src, sljit_sw srcw) 654 { 655 sljit_ub* inst; 656 657 if (dst == SLJIT_UNUSED) { 658 /* No destination, doesn't need to setup flags. */ 659 if (src & SLJIT_MEM) { 660 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src, srcw); 661 FAIL_IF(!inst); 662 *inst = MOV_r_rm; 663 } 664 return SLJIT_SUCCESS; 665 } 666 if (FAST_IS_REG(src)) { 667 inst = emit_x86_instruction(compiler, 1, src, 0, dst, dstw); 668 FAIL_IF(!inst); 669 *inst = MOV_rm_r; 670 return SLJIT_SUCCESS; 671 } 672 if (src & SLJIT_IMM) { 673 if (FAST_IS_REG(dst)) { 674 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 675 return emit_do_imm(compiler, MOV_r_i32 + reg_map[dst], srcw); 676 #else 677 if (!compiler->mode32) { 678 if (NOT_HALFWORD(srcw)) 679 return emit_load_imm64(compiler, dst, srcw); 680 } 681 else 682 return emit_do_imm32(compiler, (reg_map[dst] >= 8) ? REX_B : 0, MOV_r_i32 + reg_lmap[dst], srcw); 683 #endif 684 } 685 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 686 if (!compiler->mode32 && NOT_HALFWORD(srcw)) { 687 FAIL_IF(emit_load_imm64(compiler, TMP_REG2, srcw)); 688 inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, dst, dstw); 689 FAIL_IF(!inst); 690 *inst = MOV_rm_r; 691 return SLJIT_SUCCESS; 692 } 693 #endif 694 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, srcw, dst, dstw); 695 FAIL_IF(!inst); 696 *inst = MOV_rm_i32; 697 return SLJIT_SUCCESS; 698 } 699 if (FAST_IS_REG(dst)) { 700 inst = emit_x86_instruction(compiler, 1, dst, 0, src, srcw); 701 FAIL_IF(!inst); 702 *inst = MOV_r_rm; 703 return SLJIT_SUCCESS; 704 } 705 706 /* Memory to memory move. Requires two instruction. */ 707 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src, srcw); 708 FAIL_IF(!inst); 709 *inst = MOV_r_rm; 710 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw); 711 FAIL_IF(!inst); 712 *inst = MOV_rm_r; 713 return SLJIT_SUCCESS; 714 } 715 716 #define EMIT_MOV(compiler, dst, dstw, src, srcw) \ 717 FAIL_IF(emit_mov(compiler, dst, dstw, src, srcw)); 718 719 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op0(struct sljit_compiler *compiler, sljit_si op) 720 { 721 sljit_ub *inst; 722 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 723 sljit_si size; 724 #endif 725 726 CHECK_ERROR(); 727 check_sljit_emit_op0(compiler, op); 728 729 switch (GET_OPCODE(op)) { 730 case SLJIT_BREAKPOINT: 731 inst = (sljit_ub*)ensure_buf(compiler, 1 + 1); 732 FAIL_IF(!inst); 733 INC_SIZE(1); 734 *inst = INT3; 735 break; 736 case SLJIT_NOP: 737 inst = (sljit_ub*)ensure_buf(compiler, 1 + 1); 738 FAIL_IF(!inst); 739 INC_SIZE(1); 740 *inst = NOP; 741 break; 742 case SLJIT_UMUL: 743 case SLJIT_SMUL: 744 case SLJIT_UDIV: 745 case SLJIT_SDIV: 746 compiler->flags_saved = 0; 747 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 748 #ifdef _WIN64 749 SLJIT_COMPILE_ASSERT( 750 reg_map[SLJIT_R0] == 0 751 && reg_map[SLJIT_R1] == 2 752 && reg_map[TMP_REG1] > 7, 753 invalid_register_assignment_for_div_mul); 754 #else 755 SLJIT_COMPILE_ASSERT( 756 reg_map[SLJIT_R0] == 0 757 && reg_map[SLJIT_R1] < 7 758 && reg_map[TMP_REG1] == 2, 759 invalid_register_assignment_for_div_mul); 760 #endif 761 compiler->mode32 = op & SLJIT_INT_OP; 762 #endif 763 764 op = GET_OPCODE(op); 765 if (op == SLJIT_UDIV) { 766 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || defined(_WIN64) 767 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_R1, 0); 768 inst = emit_x86_instruction(compiler, 1, SLJIT_R1, 0, SLJIT_R1, 0); 769 #else 770 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, TMP_REG1, 0); 771 #endif 772 FAIL_IF(!inst); 773 *inst = XOR_r_rm; 774 } 775 776 if (op == SLJIT_SDIV) { 777 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || defined(_WIN64) 778 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_R1, 0); 779 #endif 780 781 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 782 inst = (sljit_ub*)ensure_buf(compiler, 1 + 1); 783 FAIL_IF(!inst); 784 INC_SIZE(1); 785 *inst = CDQ; 786 #else 787 if (compiler->mode32) { 788 inst = (sljit_ub*)ensure_buf(compiler, 1 + 1); 789 FAIL_IF(!inst); 790 INC_SIZE(1); 791 *inst = CDQ; 792 } else { 793 inst = (sljit_ub*)ensure_buf(compiler, 1 + 2); 794 FAIL_IF(!inst); 795 INC_SIZE(2); 796 *inst++ = REX_W; 797 *inst = CDQ; 798 } 799 #endif 800 } 801 802 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 803 inst = (sljit_ub*)ensure_buf(compiler, 1 + 2); 804 FAIL_IF(!inst); 805 INC_SIZE(2); 806 *inst++ = GROUP_F7; 807 *inst = MOD_REG | ((op >= SLJIT_UDIV) ? reg_map[TMP_REG1] : reg_map[SLJIT_R1]); 808 #else 809 #ifdef _WIN64 810 size = (!compiler->mode32 || op >= SLJIT_UDIV) ? 3 : 2; 811 #else 812 size = (!compiler->mode32) ? 3 : 2; 813 #endif 814 inst = (sljit_ub*)ensure_buf(compiler, 1 + size); 815 FAIL_IF(!inst); 816 INC_SIZE(size); 817 #ifdef _WIN64 818 if (!compiler->mode32) 819 *inst++ = REX_W | ((op >= SLJIT_UDIV) ? REX_B : 0); 820 else if (op >= SLJIT_UDIV) 821 *inst++ = REX_B; 822 *inst++ = GROUP_F7; 823 *inst = MOD_REG | ((op >= SLJIT_UDIV) ? reg_lmap[TMP_REG1] : reg_lmap[SLJIT_R1]); 824 #else 825 if (!compiler->mode32) 826 *inst++ = REX_W; 827 *inst++ = GROUP_F7; 828 *inst = MOD_REG | reg_map[SLJIT_R1]; 829 #endif 830 #endif 831 switch (op) { 832 case SLJIT_UMUL: 833 *inst |= MUL; 834 break; 835 case SLJIT_SMUL: 836 *inst |= IMUL; 837 break; 838 case SLJIT_UDIV: 839 *inst |= DIV; 840 break; 841 case SLJIT_SDIV: 842 *inst |= IDIV; 843 break; 844 } 845 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) && !defined(_WIN64) 846 EMIT_MOV(compiler, SLJIT_R1, 0, TMP_REG1, 0); 847 #endif 848 break; 849 } 850 851 return SLJIT_SUCCESS; 852 } 853 854 #define ENCODE_PREFIX(prefix) \ 855 do { \ 856 inst = (sljit_ub*)ensure_buf(compiler, 1 + 1); \ 857 FAIL_IF(!inst); \ 858 INC_SIZE(1); \ 859 *inst = (prefix); \ 860 } while (0) 861 862 static sljit_si emit_mov_byte(struct sljit_compiler *compiler, sljit_si sign, 863 sljit_si dst, sljit_sw dstw, 864 sljit_si src, sljit_sw srcw) 865 { 866 sljit_ub* inst; 867 sljit_si dst_r; 868 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 869 sljit_si work_r; 870 #endif 871 872 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 873 compiler->mode32 = 0; 874 #endif 875 876 if (dst == SLJIT_UNUSED && !(src & SLJIT_MEM)) 877 return SLJIT_SUCCESS; /* Empty instruction. */ 878 879 if (src & SLJIT_IMM) { 880 if (FAST_IS_REG(dst)) { 881 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 882 return emit_do_imm(compiler, MOV_r_i32 + reg_map[dst], srcw); 883 #else 884 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, srcw, dst, 0); 885 FAIL_IF(!inst); 886 *inst = MOV_rm_i32; 887 return SLJIT_SUCCESS; 888 #endif 889 } 890 inst = emit_x86_instruction(compiler, 1 | EX86_BYTE_ARG | EX86_NO_REXW, SLJIT_IMM, srcw, dst, dstw); 891 FAIL_IF(!inst); 892 *inst = MOV_rm8_i8; 893 return SLJIT_SUCCESS; 894 } 895 896 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1; 897 898 if ((dst & SLJIT_MEM) && FAST_IS_REG(src)) { 899 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 900 if (reg_map[src] >= 4) { 901 SLJIT_ASSERT(dst_r == TMP_REG1); 902 EMIT_MOV(compiler, TMP_REG1, 0, src, 0); 903 } else 904 dst_r = src; 905 #else 906 dst_r = src; 907 #endif 908 } 909 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 910 else if (FAST_IS_REG(src) && reg_map[src] >= 4) { 911 /* src, dst are registers. */ 912 SLJIT_ASSERT(SLOW_IS_REG(dst)); 913 if (reg_map[dst] < 4) { 914 if (dst != src) 915 EMIT_MOV(compiler, dst, 0, src, 0); 916 inst = emit_x86_instruction(compiler, 2, dst, 0, dst, 0); 917 FAIL_IF(!inst); 918 *inst++ = GROUP_0F; 919 *inst = sign ? MOVSX_r_rm8 : MOVZX_r_rm8; 920 } 921 else { 922 if (dst != src) 923 EMIT_MOV(compiler, dst, 0, src, 0); 924 if (sign) { 925 /* shl reg, 24 */ 926 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 24, dst, 0); 927 FAIL_IF(!inst); 928 *inst |= SHL; 929 /* sar reg, 24 */ 930 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 24, dst, 0); 931 FAIL_IF(!inst); 932 *inst |= SAR; 933 } 934 else { 935 inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, 0xff, dst, 0); 936 FAIL_IF(!inst); 937 *(inst + 1) |= AND; 938 } 939 } 940 return SLJIT_SUCCESS; 941 } 942 #endif 943 else { 944 /* src can be memory addr or reg_map[src] < 4 on x86_32 architectures. */ 945 inst = emit_x86_instruction(compiler, 2, dst_r, 0, src, srcw); 946 FAIL_IF(!inst); 947 *inst++ = GROUP_0F; 948 *inst = sign ? MOVSX_r_rm8 : MOVZX_r_rm8; 949 } 950 951 if (dst & SLJIT_MEM) { 952 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 953 if (dst_r == TMP_REG1) { 954 /* Find a non-used register, whose reg_map[src] < 4. */ 955 if ((dst & REG_MASK) == SLJIT_R0) { 956 if ((dst & OFFS_REG_MASK) == TO_OFFS_REG(SLJIT_R1)) 957 work_r = SLJIT_R2; 958 else 959 work_r = SLJIT_R1; 960 } 961 else { 962 if ((dst & OFFS_REG_MASK) != TO_OFFS_REG(SLJIT_R0)) 963 work_r = SLJIT_R0; 964 else if ((dst & REG_MASK) == SLJIT_R1) 965 work_r = SLJIT_R2; 966 else 967 work_r = SLJIT_R1; 968 } 969 970 if (work_r == SLJIT_R0) { 971 ENCODE_PREFIX(XCHG_EAX_r + reg_map[TMP_REG1]); 972 } 973 else { 974 inst = emit_x86_instruction(compiler, 1, work_r, 0, dst_r, 0); 975 FAIL_IF(!inst); 976 *inst = XCHG_r_rm; 977 } 978 979 inst = emit_x86_instruction(compiler, 1, work_r, 0, dst, dstw); 980 FAIL_IF(!inst); 981 *inst = MOV_rm8_r8; 982 983 if (work_r == SLJIT_R0) { 984 ENCODE_PREFIX(XCHG_EAX_r + reg_map[TMP_REG1]); 985 } 986 else { 987 inst = emit_x86_instruction(compiler, 1, work_r, 0, dst_r, 0); 988 FAIL_IF(!inst); 989 *inst = XCHG_r_rm; 990 } 991 } 992 else { 993 inst = emit_x86_instruction(compiler, 1, dst_r, 0, dst, dstw); 994 FAIL_IF(!inst); 995 *inst = MOV_rm8_r8; 996 } 997 #else 998 inst = emit_x86_instruction(compiler, 1 | EX86_REX | EX86_NO_REXW, dst_r, 0, dst, dstw); 999 FAIL_IF(!inst); 1000 *inst = MOV_rm8_r8; 1001 #endif 1002 } 1003 1004 return SLJIT_SUCCESS; 1005 } 1006 1007 static sljit_si emit_mov_half(struct sljit_compiler *compiler, sljit_si sign, 1008 sljit_si dst, sljit_sw dstw, 1009 sljit_si src, sljit_sw srcw) 1010 { 1011 sljit_ub* inst; 1012 sljit_si dst_r; 1013 1014 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 1015 compiler->mode32 = 0; 1016 #endif 1017 1018 if (dst == SLJIT_UNUSED && !(src & SLJIT_MEM)) 1019 return SLJIT_SUCCESS; /* Empty instruction. */ 1020 1021 if (src & SLJIT_IMM) { 1022 if (FAST_IS_REG(dst)) { 1023 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 1024 return emit_do_imm(compiler, MOV_r_i32 + reg_map[dst], srcw); 1025 #else 1026 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, srcw, dst, 0); 1027 FAIL_IF(!inst); 1028 *inst = MOV_rm_i32; 1029 return SLJIT_SUCCESS; 1030 #endif 1031 } 1032 inst = emit_x86_instruction(compiler, 1 | EX86_HALF_ARG | EX86_NO_REXW | EX86_PREF_66, SLJIT_IMM, srcw, dst, dstw); 1033 FAIL_IF(!inst); 1034 *inst = MOV_rm_i32; 1035 return SLJIT_SUCCESS; 1036 } 1037 1038 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1; 1039 1040 if ((dst & SLJIT_MEM) && FAST_IS_REG(src)) 1041 dst_r = src; 1042 else { 1043 inst = emit_x86_instruction(compiler, 2, dst_r, 0, src, srcw); 1044 FAIL_IF(!inst); 1045 *inst++ = GROUP_0F; 1046 *inst = sign ? MOVSX_r_rm16 : MOVZX_r_rm16; 1047 } 1048 1049 if (dst & SLJIT_MEM) { 1050 inst = emit_x86_instruction(compiler, 1 | EX86_NO_REXW | EX86_PREF_66, dst_r, 0, dst, dstw); 1051 FAIL_IF(!inst); 1052 *inst = MOV_rm_r; 1053 } 1054 1055 return SLJIT_SUCCESS; 1056 } 1057 1058 static sljit_si emit_unary(struct sljit_compiler *compiler, sljit_ub opcode, 1059 sljit_si dst, sljit_sw dstw, 1060 sljit_si src, sljit_sw srcw) 1061 { 1062 sljit_ub* inst; 1063 1064 if (dst == SLJIT_UNUSED) { 1065 EMIT_MOV(compiler, TMP_REG1, 0, src, srcw); 1066 inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REG1, 0); 1067 FAIL_IF(!inst); 1068 *inst++ = GROUP_F7; 1069 *inst |= opcode; 1070 return SLJIT_SUCCESS; 1071 } 1072 if (dst == src && dstw == srcw) { 1073 /* Same input and output */ 1074 inst = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw); 1075 FAIL_IF(!inst); 1076 *inst++ = GROUP_F7; 1077 *inst |= opcode; 1078 return SLJIT_SUCCESS; 1079 } 1080 if (FAST_IS_REG(dst)) { 1081 EMIT_MOV(compiler, dst, 0, src, srcw); 1082 inst = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw); 1083 FAIL_IF(!inst); 1084 *inst++ = GROUP_F7; 1085 *inst |= opcode; 1086 return SLJIT_SUCCESS; 1087 } 1088 EMIT_MOV(compiler, TMP_REG1, 0, src, srcw); 1089 inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REG1, 0); 1090 FAIL_IF(!inst); 1091 *inst++ = GROUP_F7; 1092 *inst |= opcode; 1093 EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0); 1094 return SLJIT_SUCCESS; 1095 } 1096 1097 static sljit_si emit_not_with_flags(struct sljit_compiler *compiler, 1098 sljit_si dst, sljit_sw dstw, 1099 sljit_si src, sljit_sw srcw) 1100 { 1101 sljit_ub* inst; 1102 1103 if (dst == SLJIT_UNUSED) { 1104 EMIT_MOV(compiler, TMP_REG1, 0, src, srcw); 1105 inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REG1, 0); 1106 FAIL_IF(!inst); 1107 *inst++ = GROUP_F7; 1108 *inst |= NOT_rm; 1109 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, TMP_REG1, 0); 1110 FAIL_IF(!inst); 1111 *inst = OR_r_rm; 1112 return SLJIT_SUCCESS; 1113 } 1114 if (FAST_IS_REG(dst)) { 1115 EMIT_MOV(compiler, dst, 0, src, srcw); 1116 inst = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw); 1117 FAIL_IF(!inst); 1118 *inst++ = GROUP_F7; 1119 *inst |= NOT_rm; 1120 inst = emit_x86_instruction(compiler, 1, dst, 0, dst, 0); 1121 FAIL_IF(!inst); 1122 *inst = OR_r_rm; 1123 return SLJIT_SUCCESS; 1124 } 1125 EMIT_MOV(compiler, TMP_REG1, 0, src, srcw); 1126 inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REG1, 0); 1127 FAIL_IF(!inst); 1128 *inst++ = GROUP_F7; 1129 *inst |= NOT_rm; 1130 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, TMP_REG1, 0); 1131 FAIL_IF(!inst); 1132 *inst = OR_r_rm; 1133 EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0); 1134 return SLJIT_SUCCESS; 1135 } 1136 1137 static sljit_si emit_clz(struct sljit_compiler *compiler, sljit_si op_flags, 1138 sljit_si dst, sljit_sw dstw, 1139 sljit_si src, sljit_sw srcw) 1140 { 1141 sljit_ub* inst; 1142 sljit_si dst_r; 1143 1144 SLJIT_UNUSED_ARG(op_flags); 1145 if (SLJIT_UNLIKELY(dst == SLJIT_UNUSED)) { 1146 /* Just set the zero flag. */ 1147 EMIT_MOV(compiler, TMP_REG1, 0, src, srcw); 1148 inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REG1, 0); 1149 FAIL_IF(!inst); 1150 *inst++ = GROUP_F7; 1151 *inst |= NOT_rm; 1152 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 1153 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 31, TMP_REG1, 0); 1154 #else 1155 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, !(op_flags & SLJIT_INT_OP) ? 63 : 31, TMP_REG1, 0); 1156 #endif 1157 FAIL_IF(!inst); 1158 *inst |= SHR; 1159 return SLJIT_SUCCESS; 1160 } 1161 1162 if (SLJIT_UNLIKELY(src & SLJIT_IMM)) { 1163 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, srcw); 1164 src = TMP_REG1; 1165 srcw = 0; 1166 } 1167 1168 inst = emit_x86_instruction(compiler, 2, TMP_REG1, 0, src, srcw); 1169 FAIL_IF(!inst); 1170 *inst++ = GROUP_0F; 1171 *inst = BSR_r_rm; 1172 1173 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 1174 if (FAST_IS_REG(dst)) 1175 dst_r = dst; 1176 else { 1177 /* Find an unused temporary register. */ 1178 if ((dst & REG_MASK) != SLJIT_R0 && (dst & OFFS_REG_MASK) != TO_OFFS_REG(SLJIT_R0)) 1179 dst_r = SLJIT_R0; 1180 else if ((dst & REG_MASK) != SLJIT_R1 && (dst & OFFS_REG_MASK) != TO_OFFS_REG(SLJIT_R1)) 1181 dst_r = SLJIT_R1; 1182 else 1183 dst_r = SLJIT_R2; 1184 EMIT_MOV(compiler, dst, dstw, dst_r, 0); 1185 } 1186 EMIT_MOV(compiler, dst_r, 0, SLJIT_IMM, 32 + 31); 1187 #else 1188 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2; 1189 compiler->mode32 = 0; 1190 EMIT_MOV(compiler, dst_r, 0, SLJIT_IMM, !(op_flags & SLJIT_INT_OP) ? 64 + 63 : 32 + 31); 1191 compiler->mode32 = op_flags & SLJIT_INT_OP; 1192 #endif 1193 1194 if (cpu_has_cmov == -1) 1195 get_cpu_features(); 1196 1197 if (cpu_has_cmov) { 1198 inst = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG1, 0); 1199 FAIL_IF(!inst); 1200 *inst++ = GROUP_0F; 1201 *inst = CMOVNE_r_rm; 1202 } else { 1203 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 1204 inst = (sljit_ub*)ensure_buf(compiler, 1 + 4); 1205 FAIL_IF(!inst); 1206 INC_SIZE(4); 1207 1208 *inst++ = JE_i8; 1209 *inst++ = 2; 1210 *inst++ = MOV_r_rm; 1211 *inst++ = MOD_REG | (reg_map[dst_r] << 3) | reg_map[TMP_REG1]; 1212 #else 1213 inst = (sljit_ub*)ensure_buf(compiler, 1 + 5); 1214 FAIL_IF(!inst); 1215 INC_SIZE(5); 1216 1217 *inst++ = JE_i8; 1218 *inst++ = 3; 1219 *inst++ = REX_W | (reg_map[dst_r] >= 8 ? REX_R : 0) | (reg_map[TMP_REG1] >= 8 ? REX_B : 0); 1220 *inst++ = MOV_r_rm; 1221 *inst++ = MOD_REG | (reg_lmap[dst_r] << 3) | reg_lmap[TMP_REG1]; 1222 #endif 1223 } 1224 1225 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 1226 inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, 31, dst_r, 0); 1227 #else 1228 inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, !(op_flags & SLJIT_INT_OP) ? 63 : 31, dst_r, 0); 1229 #endif 1230 FAIL_IF(!inst); 1231 *(inst + 1) |= XOR; 1232 1233 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 1234 if (dst & SLJIT_MEM) { 1235 inst = emit_x86_instruction(compiler, 1, dst_r, 0, dst, dstw); 1236 FAIL_IF(!inst); 1237 *inst = XCHG_r_rm; 1238 } 1239 #else 1240 if (dst & SLJIT_MEM) 1241 EMIT_MOV(compiler, dst, dstw, TMP_REG2, 0); 1242 #endif 1243 return SLJIT_SUCCESS; 1244 } 1245 1246 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op1(struct sljit_compiler *compiler, sljit_si op, 1247 sljit_si dst, sljit_sw dstw, 1248 sljit_si src, sljit_sw srcw) 1249 { 1250 sljit_ub* inst; 1251 sljit_si update = 0; 1252 sljit_si op_flags = GET_ALL_FLAGS(op); 1253 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 1254 sljit_si dst_is_ereg = 0; 1255 sljit_si src_is_ereg = 0; 1256 #else 1257 # define src_is_ereg 0 1258 #endif 1259 1260 CHECK_ERROR(); 1261 check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw); 1262 ADJUST_LOCAL_OFFSET(dst, dstw); 1263 ADJUST_LOCAL_OFFSET(src, srcw); 1264 1265 CHECK_EXTRA_REGS(dst, dstw, dst_is_ereg = 1); 1266 CHECK_EXTRA_REGS(src, srcw, src_is_ereg = 1); 1267 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 1268 compiler->mode32 = op_flags & SLJIT_INT_OP; 1269 #endif 1270 1271 op = GET_OPCODE(op); 1272 if (op >= SLJIT_MOV && op <= SLJIT_MOVU_P) { 1273 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 1274 compiler->mode32 = 0; 1275 #endif 1276 1277 if (op_flags & SLJIT_INT_OP) { 1278 if (FAST_IS_REG(src) && src == dst) { 1279 if (!TYPE_CAST_NEEDED(op)) 1280 return SLJIT_SUCCESS; 1281 } 1282 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 1283 if (op == SLJIT_MOV_SI && (src & SLJIT_MEM)) 1284 op = SLJIT_MOV_UI; 1285 if (op == SLJIT_MOVU_SI && (src & SLJIT_MEM)) 1286 op = SLJIT_MOVU_UI; 1287 if (op == SLJIT_MOV_UI && (src & SLJIT_IMM)) 1288 op = SLJIT_MOV_SI; 1289 if (op == SLJIT_MOVU_UI && (src & SLJIT_IMM)) 1290 op = SLJIT_MOVU_SI; 1291 #endif 1292 } 1293 1294 SLJIT_COMPILE_ASSERT(SLJIT_MOV + 8 == SLJIT_MOVU, movu_offset); 1295 if (op >= SLJIT_MOVU) { 1296 update = 1; 1297 op -= 8; 1298 } 1299 1300 if (src & SLJIT_IMM) { 1301 switch (op) { 1302 case SLJIT_MOV_UB: 1303 srcw = (sljit_ub)srcw; 1304 break; 1305 case SLJIT_MOV_SB: 1306 srcw = (sljit_sb)srcw; 1307 break; 1308 case SLJIT_MOV_UH: 1309 srcw = (sljit_uh)srcw; 1310 break; 1311 case SLJIT_MOV_SH: 1312 srcw = (sljit_sh)srcw; 1313 break; 1314 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 1315 case SLJIT_MOV_UI: 1316 srcw = (sljit_ui)srcw; 1317 break; 1318 case SLJIT_MOV_SI: 1319 srcw = (sljit_si)srcw; 1320 break; 1321 #endif 1322 } 1323 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 1324 if (SLJIT_UNLIKELY(dst_is_ereg)) 1325 return emit_mov(compiler, dst, dstw, src, srcw); 1326 #endif 1327 } 1328 1329 if (SLJIT_UNLIKELY(update) && (src & SLJIT_MEM) && !src_is_ereg && (src & REG_MASK) && (srcw != 0 || (src & OFFS_REG_MASK) != 0)) { 1330 inst = emit_x86_instruction(compiler, 1, src & REG_MASK, 0, src, srcw); 1331 FAIL_IF(!inst); 1332 *inst = LEA_r_m; 1333 src &= SLJIT_MEM | 0xf; 1334 srcw = 0; 1335 } 1336 1337 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 1338 if (SLJIT_UNLIKELY(dst_is_ereg) && (!(op == SLJIT_MOV || op == SLJIT_MOV_UI || op == SLJIT_MOV_SI || op == SLJIT_MOV_P) || (src & SLJIT_MEM))) { 1339 SLJIT_ASSERT(dst == SLJIT_MEM1(SLJIT_SP)); 1340 dst = TMP_REG1; 1341 } 1342 #endif 1343 1344 switch (op) { 1345 case SLJIT_MOV: 1346 case SLJIT_MOV_P: 1347 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 1348 case SLJIT_MOV_UI: 1349 case SLJIT_MOV_SI: 1350 #endif 1351 FAIL_IF(emit_mov(compiler, dst, dstw, src, srcw)); 1352 break; 1353 case SLJIT_MOV_UB: 1354 FAIL_IF(emit_mov_byte(compiler, 0, dst, dstw, src, srcw)); 1355 break; 1356 case SLJIT_MOV_SB: 1357 FAIL_IF(emit_mov_byte(compiler, 1, dst, dstw, src, srcw)); 1358 break; 1359 case SLJIT_MOV_UH: 1360 FAIL_IF(emit_mov_half(compiler, 0, dst, dstw, src, srcw)); 1361 break; 1362 case SLJIT_MOV_SH: 1363 FAIL_IF(emit_mov_half(compiler, 1, dst, dstw, src, srcw)); 1364 break; 1365 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 1366 case SLJIT_MOV_UI: 1367 FAIL_IF(emit_mov_int(compiler, 0, dst, dstw, src, srcw)); 1368 break; 1369 case SLJIT_MOV_SI: 1370 FAIL_IF(emit_mov_int(compiler, 1, dst, dstw, src, srcw)); 1371 break; 1372 #endif 1373 } 1374 1375 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 1376 if (SLJIT_UNLIKELY(dst_is_ereg) && dst == TMP_REG1) 1377 return emit_mov(compiler, SLJIT_MEM1(SLJIT_SP), dstw, TMP_REG1, 0); 1378 #endif 1379 1380 if (SLJIT_UNLIKELY(update) && (dst & SLJIT_MEM) && (dst & REG_MASK) && (dstw != 0 || (dst & OFFS_REG_MASK) != 0)) { 1381 inst = emit_x86_instruction(compiler, 1, dst & REG_MASK, 0, dst, dstw); 1382 FAIL_IF(!inst); 1383 *inst = LEA_r_m; 1384 } 1385 return SLJIT_SUCCESS; 1386 } 1387 1388 if (SLJIT_UNLIKELY(GET_FLAGS(op_flags))) 1389 compiler->flags_saved = 0; 1390 1391 switch (op) { 1392 case SLJIT_NOT: 1393 if (SLJIT_UNLIKELY(op_flags & SLJIT_SET_E)) 1394 return emit_not_with_flags(compiler, dst, dstw, src, srcw); 1395 return emit_unary(compiler, NOT_rm, dst, dstw, src, srcw); 1396 1397 case SLJIT_NEG: 1398 if (SLJIT_UNLIKELY(op_flags & SLJIT_KEEP_FLAGS) && !compiler->flags_saved) 1399 FAIL_IF(emit_save_flags(compiler)); 1400 return emit_unary(compiler, NEG_rm, dst, dstw, src, srcw); 1401 1402 case SLJIT_CLZ: 1403 if (SLJIT_UNLIKELY(op_flags & SLJIT_KEEP_FLAGS) && !compiler->flags_saved) 1404 FAIL_IF(emit_save_flags(compiler)); 1405 return emit_clz(compiler, op_flags, dst, dstw, src, srcw); 1406 } 1407 1408 return SLJIT_SUCCESS; 1409 1410 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 1411 # undef src_is_ereg 1412 #endif 1413 } 1414 1415 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 1416 1417 #define BINARY_IMM(op_imm, op_mr, immw, arg, argw) \ 1418 if (IS_HALFWORD(immw) || compiler->mode32) { \ 1419 inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, immw, arg, argw); \ 1420 FAIL_IF(!inst); \ 1421 *(inst + 1) |= (op_imm); \ 1422 } \ 1423 else { \ 1424 FAIL_IF(emit_load_imm64(compiler, TMP_REG2, immw)); \ 1425 inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, arg, argw); \ 1426 FAIL_IF(!inst); \ 1427 *inst = (op_mr); \ 1428 } 1429 1430 #define BINARY_EAX_IMM(op_eax_imm, immw) \ 1431 FAIL_IF(emit_do_imm32(compiler, (!compiler->mode32) ? REX_W : 0, (op_eax_imm), immw)) 1432 1433 #else 1434 1435 #define BINARY_IMM(op_imm, op_mr, immw, arg, argw) \ 1436 inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, immw, arg, argw); \ 1437 FAIL_IF(!inst); \ 1438 *(inst + 1) |= (op_imm); 1439 1440 #define BINARY_EAX_IMM(op_eax_imm, immw) \ 1441 FAIL_IF(emit_do_imm(compiler, (op_eax_imm), immw)) 1442 1443 #endif 1444 1445 static sljit_si emit_cum_binary(struct sljit_compiler *compiler, 1446 sljit_ub op_rm, sljit_ub op_mr, sljit_ub op_imm, sljit_ub op_eax_imm, 1447 sljit_si dst, sljit_sw dstw, 1448 sljit_si src1, sljit_sw src1w, 1449 sljit_si src2, sljit_sw src2w) 1450 { 1451 sljit_ub* inst; 1452 1453 if (dst == SLJIT_UNUSED) { 1454 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w); 1455 if (src2 & SLJIT_IMM) { 1456 BINARY_IMM(op_imm, op_mr, src2w, TMP_REG1, 0); 1457 } 1458 else { 1459 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w); 1460 FAIL_IF(!inst); 1461 *inst = op_rm; 1462 } 1463 return SLJIT_SUCCESS; 1464 } 1465 1466 if (dst == src1 && dstw == src1w) { 1467 if (src2 & SLJIT_IMM) { 1468 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 1469 if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) { 1470 #else 1471 if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128)) { 1472 #endif 1473 BINARY_EAX_IMM(op_eax_imm, src2w); 1474 } 1475 else { 1476 BINARY_IMM(op_imm, op_mr, src2w, dst, dstw); 1477 } 1478 } 1479 else if (FAST_IS_REG(dst)) { 1480 inst = emit_x86_instruction(compiler, 1, dst, dstw, src2, src2w); 1481 FAIL_IF(!inst); 1482 *inst = op_rm; 1483 } 1484 else if (FAST_IS_REG(src2)) { 1485 /* Special exception for sljit_emit_op_flags. */ 1486 inst = emit_x86_instruction(compiler, 1, src2, src2w, dst, dstw); 1487 FAIL_IF(!inst); 1488 *inst = op_mr; 1489 } 1490 else { 1491 EMIT_MOV(compiler, TMP_REG1, 0, src2, src2w); 1492 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw); 1493 FAIL_IF(!inst); 1494 *inst = op_mr; 1495 } 1496 return SLJIT_SUCCESS; 1497 } 1498 1499 /* Only for cumulative operations. */ 1500 if (dst == src2 && dstw == src2w) { 1501 if (src1 & SLJIT_IMM) { 1502 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 1503 if ((dst == SLJIT_R0) && (src1w > 127 || src1w < -128) && (compiler->mode32 || IS_HALFWORD(src1w))) { 1504 #else 1505 if ((dst == SLJIT_R0) && (src1w > 127 || src1w < -128)) { 1506 #endif 1507 BINARY_EAX_IMM(op_eax_imm, src1w); 1508 } 1509 else { 1510 BINARY_IMM(op_imm, op_mr, src1w, dst, dstw); 1511 } 1512 } 1513 else if (FAST_IS_REG(dst)) { 1514 inst = emit_x86_instruction(compiler, 1, dst, dstw, src1, src1w); 1515 FAIL_IF(!inst); 1516 *inst = op_rm; 1517 } 1518 else if (FAST_IS_REG(src1)) { 1519 inst = emit_x86_instruction(compiler, 1, src1, src1w, dst, dstw); 1520 FAIL_IF(!inst); 1521 *inst = op_mr; 1522 } 1523 else { 1524 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w); 1525 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw); 1526 FAIL_IF(!inst); 1527 *inst = op_mr; 1528 } 1529 return SLJIT_SUCCESS; 1530 } 1531 1532 /* General version. */ 1533 if (FAST_IS_REG(dst)) { 1534 EMIT_MOV(compiler, dst, 0, src1, src1w); 1535 if (src2 & SLJIT_IMM) { 1536 BINARY_IMM(op_imm, op_mr, src2w, dst, 0); 1537 } 1538 else { 1539 inst = emit_x86_instruction(compiler, 1, dst, 0, src2, src2w); 1540 FAIL_IF(!inst); 1541 *inst = op_rm; 1542 } 1543 } 1544 else { 1545 /* This version requires less memory writing. */ 1546 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w); 1547 if (src2 & SLJIT_IMM) { 1548 BINARY_IMM(op_imm, op_mr, src2w, TMP_REG1, 0); 1549 } 1550 else { 1551 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w); 1552 FAIL_IF(!inst); 1553 *inst = op_rm; 1554 } 1555 EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0); 1556 } 1557 1558 return SLJIT_SUCCESS; 1559 } 1560 1561 static sljit_si emit_non_cum_binary(struct sljit_compiler *compiler, 1562 sljit_ub op_rm, sljit_ub op_mr, sljit_ub op_imm, sljit_ub op_eax_imm, 1563 sljit_si dst, sljit_sw dstw, 1564 sljit_si src1, sljit_sw src1w, 1565 sljit_si src2, sljit_sw src2w) 1566 { 1567 sljit_ub* inst; 1568 1569 if (dst == SLJIT_UNUSED) { 1570 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w); 1571 if (src2 & SLJIT_IMM) { 1572 BINARY_IMM(op_imm, op_mr, src2w, TMP_REG1, 0); 1573 } 1574 else { 1575 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w); 1576 FAIL_IF(!inst); 1577 *inst = op_rm; 1578 } 1579 return SLJIT_SUCCESS; 1580 } 1581 1582 if (dst == src1 && dstw == src1w) { 1583 if (src2 & SLJIT_IMM) { 1584 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 1585 if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) { 1586 #else 1587 if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128)) { 1588 #endif 1589 BINARY_EAX_IMM(op_eax_imm, src2w); 1590 } 1591 else { 1592 BINARY_IMM(op_imm, op_mr, src2w, dst, dstw); 1593 } 1594 } 1595 else if (FAST_IS_REG(dst)) { 1596 inst = emit_x86_instruction(compiler, 1, dst, dstw, src2, src2w); 1597 FAIL_IF(!inst); 1598 *inst = op_rm; 1599 } 1600 else if (FAST_IS_REG(src2)) { 1601 inst = emit_x86_instruction(compiler, 1, src2, src2w, dst, dstw); 1602 FAIL_IF(!inst); 1603 *inst = op_mr; 1604 } 1605 else { 1606 EMIT_MOV(compiler, TMP_REG1, 0, src2, src2w); 1607 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw); 1608 FAIL_IF(!inst); 1609 *inst = op_mr; 1610 } 1611 return SLJIT_SUCCESS; 1612 } 1613 1614 /* General version. */ 1615 if (FAST_IS_REG(dst) && dst != src2) { 1616 EMIT_MOV(compiler, dst, 0, src1, src1w); 1617 if (src2 & SLJIT_IMM) { 1618 BINARY_IMM(op_imm, op_mr, src2w, dst, 0); 1619 } 1620 else { 1621 inst = emit_x86_instruction(compiler, 1, dst, 0, src2, src2w); 1622 FAIL_IF(!inst); 1623 *inst = op_rm; 1624 } 1625 } 1626 else { 1627 /* This version requires less memory writing. */ 1628 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w); 1629 if (src2 & SLJIT_IMM) { 1630 BINARY_IMM(op_imm, op_mr, src2w, TMP_REG1, 0); 1631 } 1632 else { 1633 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w); 1634 FAIL_IF(!inst); 1635 *inst = op_rm; 1636 } 1637 EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0); 1638 } 1639 1640 return SLJIT_SUCCESS; 1641 } 1642 1643 static sljit_si emit_mul(struct sljit_compiler *compiler, 1644 sljit_si dst, sljit_sw dstw, 1645 sljit_si src1, sljit_sw src1w, 1646 sljit_si src2, sljit_sw src2w) 1647 { 1648 sljit_ub* inst; 1649 sljit_si dst_r; 1650 1651 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1; 1652 1653 /* Register destination. */ 1654 if (dst_r == src1 && !(src2 & SLJIT_IMM)) { 1655 inst = emit_x86_instruction(compiler, 2, dst_r, 0, src2, src2w); 1656 FAIL_IF(!inst); 1657 *inst++ = GROUP_0F; 1658 *inst = IMUL_r_rm; 1659 } 1660 else if (dst_r == src2 && !(src1 & SLJIT_IMM)) { 1661 inst = emit_x86_instruction(compiler, 2, dst_r, 0, src1, src1w); 1662 FAIL_IF(!inst); 1663 *inst++ = GROUP_0F; 1664 *inst = IMUL_r_rm; 1665 } 1666 else if (src1 & SLJIT_IMM) { 1667 if (src2 & SLJIT_IMM) { 1668 EMIT_MOV(compiler, dst_r, 0, SLJIT_IMM, src2w); 1669 src2 = dst_r; 1670 src2w = 0; 1671 } 1672 1673 if (src1w <= 127 && src1w >= -128) { 1674 inst = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w); 1675 FAIL_IF(!inst); 1676 *inst = IMUL_r_rm_i8; 1677 inst = (sljit_ub*)ensure_buf(compiler, 1 + 1); 1678 FAIL_IF(!inst); 1679 INC_SIZE(1); 1680 *inst = (sljit_sb)src1w; 1681 } 1682 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 1683 else { 1684 inst = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w); 1685 FAIL_IF(!inst); 1686 *inst = IMUL_r_rm_i32; 1687 inst = (sljit_ub*)ensure_buf(compiler, 1 + 4); 1688 FAIL_IF(!inst); 1689 INC_SIZE(4); 1690 *(sljit_sw*)inst = src1w; 1691 } 1692 #else 1693 else if (IS_HALFWORD(src1w)) { 1694 inst = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w); 1695 FAIL_IF(!inst); 1696 *inst = IMUL_r_rm_i32; 1697 inst = (sljit_ub*)ensure_buf(compiler, 1 + 4); 1698 FAIL_IF(!inst); 1699 INC_SIZE(4); 1700 *(sljit_si*)inst = (sljit_si)src1w; 1701 } 1702 else { 1703 EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_IMM, src1w); 1704 if (dst_r != src2) 1705 EMIT_MOV(compiler, dst_r, 0, src2, src2w); 1706 inst = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG2, 0); 1707 FAIL_IF(!inst); 1708 *inst++ = GROUP_0F; 1709 *inst = IMUL_r_rm; 1710 } 1711 #endif 1712 } 1713 else if (src2 & SLJIT_IMM) { 1714 /* Note: src1 is NOT immediate. */ 1715 1716 if (src2w <= 127 && src2w >= -128) { 1717 inst = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w); 1718 FAIL_IF(!inst); 1719 *inst = IMUL_r_rm_i8; 1720 inst = (sljit_ub*)ensure_buf(compiler, 1 + 1); 1721 FAIL_IF(!inst); 1722 INC_SIZE(1); 1723 *inst = (sljit_sb)src2w; 1724 } 1725 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 1726 else { 1727 inst = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w); 1728 FAIL_IF(!inst); 1729 *inst = IMUL_r_rm_i32; 1730 inst = (sljit_ub*)ensure_buf(compiler, 1 + 4); 1731 FAIL_IF(!inst); 1732 INC_SIZE(4); 1733 *(sljit_sw*)inst = src2w; 1734 } 1735 #else 1736 else if (IS_HALFWORD(src2w)) { 1737 inst = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w); 1738 FAIL_IF(!inst); 1739 *inst = IMUL_r_rm_i32; 1740 inst = (sljit_ub*)ensure_buf(compiler, 1 + 4); 1741 FAIL_IF(!inst); 1742 INC_SIZE(4); 1743 *(sljit_si*)inst = (sljit_si)src2w; 1744 } 1745 else { 1746 EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_IMM, src1w); 1747 if (dst_r != src1) 1748 EMIT_MOV(compiler, dst_r, 0, src1, src1w); 1749 inst = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG2, 0); 1750 FAIL_IF(!inst); 1751 *inst++ = GROUP_0F; 1752 *inst = IMUL_r_rm; 1753 } 1754 #endif 1755 } 1756 else { 1757 /* Neither argument is immediate. */ 1758 if (ADDRESSING_DEPENDS_ON(src2, dst_r)) 1759 dst_r = TMP_REG1; 1760 EMIT_MOV(compiler, dst_r, 0, src1, src1w); 1761 inst = emit_x86_instruction(compiler, 2, dst_r, 0, src2, src2w); 1762 FAIL_IF(!inst); 1763 *inst++ = GROUP_0F; 1764 *inst = IMUL_r_rm; 1765 } 1766 1767 if (dst_r == TMP_REG1) 1768 EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0); 1769 1770 return SLJIT_SUCCESS; 1771 } 1772 1773 static sljit_si emit_lea_binary(struct sljit_compiler *compiler, sljit_si keep_flags, 1774 sljit_si dst, sljit_sw dstw, 1775 sljit_si src1, sljit_sw src1w, 1776 sljit_si src2, sljit_sw src2w) 1777 { 1778 sljit_ub* inst; 1779 sljit_si dst_r, done = 0; 1780 1781 /* These cases better be left to handled by normal way. */ 1782 if (!keep_flags) { 1783 if (dst == src1 && dstw == src1w) 1784 return SLJIT_ERR_UNSUPPORTED; 1785 if (dst == src2 && dstw == src2w) 1786 return SLJIT_ERR_UNSUPPORTED; 1787 } 1788 1789 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1; 1790 1791 if (FAST_IS_REG(src1)) { 1792 if (FAST_IS_REG(src2)) { 1793 inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM2(src1, src2), 0); 1794 FAIL_IF(!inst); 1795 *inst = LEA_r_m; 1796 done = 1; 1797 } 1798 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 1799 if ((src2 & SLJIT_IMM) && (compiler->mode32 || IS_HALFWORD(src2w))) { 1800 inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src1), (sljit_si)src2w); 1801 #else 1802 if (src2 & SLJIT_IMM) { 1803 inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src1), src2w); 1804 #endif 1805 FAIL_IF(!inst); 1806 *inst = LEA_r_m; 1807 done = 1; 1808 } 1809 } 1810 else if (FAST_IS_REG(src2)) { 1811 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 1812 if ((src1 & SLJIT_IMM) && (compiler->mode32 || IS_HALFWORD(src1w))) { 1813 inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src2), (sljit_si)src1w); 1814 #else 1815 if (src1 & SLJIT_IMM) { 1816 inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src2), src1w); 1817 #endif 1818 FAIL_IF(!inst); 1819 *inst = LEA_r_m; 1820 done = 1; 1821 } 1822 } 1823 1824 if (done) { 1825 if (dst_r == TMP_REG1) 1826 return emit_mov(compiler, dst, dstw, TMP_REG1, 0); 1827 return SLJIT_SUCCESS; 1828 } 1829 return SLJIT_ERR_UNSUPPORTED; 1830 } 1831 1832 static sljit_si emit_cmp_binary(struct sljit_compiler *compiler, 1833 sljit_si src1, sljit_sw src1w, 1834 sljit_si src2, sljit_sw src2w) 1835 { 1836 sljit_ub* inst; 1837 1838 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 1839 if (src1 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) { 1840 #else 1841 if (src1 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128)) { 1842 #endif 1843 BINARY_EAX_IMM(CMP_EAX_i32, src2w); 1844 return SLJIT_SUCCESS; 1845 } 1846 1847 if (FAST_IS_REG(src1)) { 1848 if (src2 & SLJIT_IMM) { 1849 BINARY_IMM(CMP, CMP_rm_r, src2w, src1, 0); 1850 } 1851 else { 1852 inst = emit_x86_instruction(compiler, 1, src1, 0, src2, src2w); 1853 FAIL_IF(!inst); 1854 *inst = CMP_r_rm; 1855 } 1856 return SLJIT_SUCCESS; 1857 } 1858 1859 if (FAST_IS_REG(src2) && !(src1 & SLJIT_IMM)) { 1860 inst = emit_x86_instruction(compiler, 1, src2, 0, src1, src1w); 1861 FAIL_IF(!inst); 1862 *inst = CMP_rm_r; 1863 return SLJIT_SUCCESS; 1864 } 1865 1866 if (src2 & SLJIT_IMM) { 1867 if (src1 & SLJIT_IMM) { 1868 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w); 1869 src1 = TMP_REG1; 1870 src1w = 0; 1871 } 1872 BINARY_IMM(CMP, CMP_rm_r, src2w, src1, src1w); 1873 } 1874 else { 1875 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w); 1876 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w); 1877 FAIL_IF(!inst); 1878 *inst = CMP_r_rm; 1879 } 1880 return SLJIT_SUCCESS; 1881 } 1882 1883 static sljit_si emit_test_binary(struct sljit_compiler *compiler, 1884 sljit_si src1, sljit_sw src1w, 1885 sljit_si src2, sljit_sw src2w) 1886 { 1887 sljit_ub* inst; 1888 1889 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 1890 if (src1 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) { 1891 #else 1892 if (src1 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128)) { 1893 #endif 1894 BINARY_EAX_IMM(TEST_EAX_i32, src2w); 1895 return SLJIT_SUCCESS; 1896 } 1897 1898 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 1899 if (src2 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src1w > 127 || src1w < -128) && (compiler->mode32 || IS_HALFWORD(src1w))) { 1900 #else 1901 if (src2 == SLJIT_R0 && (src1 & SLJIT_IMM) && (src1w > 127 || src1w < -128)) { 1902 #endif 1903 BINARY_EAX_IMM(TEST_EAX_i32, src1w); 1904 return SLJIT_SUCCESS; 1905 } 1906 1907 if (FAST_IS_REG(src1)) { 1908 if (src2 & SLJIT_IMM) { 1909 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 1910 if (IS_HALFWORD(src2w) || compiler->mode32) { 1911 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, src1, 0); 1912 FAIL_IF(!inst); 1913 *inst = GROUP_F7; 1914 } 1915 else { 1916 FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src2w)); 1917 inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, src1, 0); 1918 FAIL_IF(!inst); 1919 *inst = TEST_rm_r; 1920 } 1921 #else 1922 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, src1, 0); 1923 FAIL_IF(!inst); 1924 *inst = GROUP_F7; 1925 #endif 1926 } 1927 else { 1928 inst = emit_x86_instruction(compiler, 1, src1, 0, src2, src2w); 1929 FAIL_IF(!inst); 1930 *inst = TEST_rm_r; 1931 } 1932 return SLJIT_SUCCESS; 1933 } 1934 1935 if (FAST_IS_REG(src2)) { 1936 if (src1 & SLJIT_IMM) { 1937 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 1938 if (IS_HALFWORD(src1w) || compiler->mode32) { 1939 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src1w, src2, 0); 1940 FAIL_IF(!inst); 1941 *inst = GROUP_F7; 1942 } 1943 else { 1944 FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src1w)); 1945 inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, src2, 0); 1946 FAIL_IF(!inst); 1947 *inst = TEST_rm_r; 1948 } 1949 #else 1950 inst = emit_x86_instruction(compiler, 1, src1, src1w, src2, 0); 1951 FAIL_IF(!inst); 1952 *inst = GROUP_F7; 1953 #endif 1954 } 1955 else { 1956 inst = emit_x86_instruction(compiler, 1, src2, 0, src1, src1w); 1957 FAIL_IF(!inst); 1958 *inst = TEST_rm_r; 1959 } 1960 return SLJIT_SUCCESS; 1961 } 1962 1963 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w); 1964 if (src2 & SLJIT_IMM) { 1965 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 1966 if (IS_HALFWORD(src2w) || compiler->mode32) { 1967 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, TMP_REG1, 0); 1968 FAIL_IF(!inst); 1969 *inst = GROUP_F7; 1970 } 1971 else { 1972 FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src2w)); 1973 inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, TMP_REG1, 0); 1974 FAIL_IF(!inst); 1975 *inst = TEST_rm_r; 1976 } 1977 #else 1978 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, TMP_REG1, 0); 1979 FAIL_IF(!inst); 1980 *inst = GROUP_F7; 1981 #endif 1982 } 1983 else { 1984 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w); 1985 FAIL_IF(!inst); 1986 *inst = TEST_rm_r; 1987 } 1988 return SLJIT_SUCCESS; 1989 } 1990 1991 static sljit_si emit_shift(struct sljit_compiler *compiler, 1992 sljit_ub mode, 1993 sljit_si dst, sljit_sw dstw, 1994 sljit_si src1, sljit_sw src1w, 1995 sljit_si src2, sljit_sw src2w) 1996 { 1997 sljit_ub* inst; 1998 1999 if ((src2 & SLJIT_IMM) || (src2 == SLJIT_PREF_SHIFT_REG)) { 2000 if (dst == src1 && dstw == src1w) { 2001 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, dst, dstw); 2002 FAIL_IF(!inst); 2003 *inst |= mode; 2004 return SLJIT_SUCCESS; 2005 } 2006 if (dst == SLJIT_UNUSED) { 2007 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w); 2008 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, TMP_REG1, 0); 2009 FAIL_IF(!inst); 2010 *inst |= mode; 2011 return SLJIT_SUCCESS; 2012 } 2013 if (dst == SLJIT_PREF_SHIFT_REG && src2 == SLJIT_PREF_SHIFT_REG) { 2014 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w); 2015 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0); 2016 FAIL_IF(!inst); 2017 *inst |= mode; 2018 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0); 2019 return SLJIT_SUCCESS; 2020 } 2021 if (FAST_IS_REG(dst)) { 2022 EMIT_MOV(compiler, dst, 0, src1, src1w); 2023 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, dst, 0); 2024 FAIL_IF(!inst); 2025 *inst |= mode; 2026 return SLJIT_SUCCESS; 2027 } 2028 2029 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w); 2030 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, TMP_REG1, 0); 2031 FAIL_IF(!inst); 2032 *inst |= mode; 2033 EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0); 2034 return SLJIT_SUCCESS; 2035 } 2036 2037 if (dst == SLJIT_PREF_SHIFT_REG) { 2038 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w); 2039 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w); 2040 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0); 2041 FAIL_IF(!inst); 2042 *inst |= mode; 2043 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0); 2044 } 2045 else if (FAST_IS_REG(dst) && dst != src2 && !ADDRESSING_DEPENDS_ON(src2, dst)) { 2046 if (src1 != dst) 2047 EMIT_MOV(compiler, dst, 0, src1, src1w); 2048 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_PREF_SHIFT_REG, 0); 2049 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w); 2050 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, dst, 0); 2051 FAIL_IF(!inst); 2052 *inst |= mode; 2053 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0); 2054 } 2055 else { 2056 /* This case is really difficult, since ecx itself may used for 2057 addressing, and we must ensure to work even in that case. */ 2058 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w); 2059 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 2060 EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_PREF_SHIFT_REG, 0); 2061 #else 2062 /* [esp+0] contains the flags. */ 2063 EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), sizeof(sljit_sw), SLJIT_PREF_SHIFT_REG, 0); 2064 #endif 2065 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w); 2066 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0); 2067 FAIL_IF(!inst); 2068 *inst |= mode; 2069 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 2070 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG2, 0); 2071 #else 2072 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, SLJIT_MEM1(SLJIT_SP), sizeof(sljit_sw)); 2073 #endif 2074 EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0); 2075 } 2076 2077 return SLJIT_SUCCESS; 2078 } 2079 2080 static sljit_si emit_shift_with_flags(struct sljit_compiler *compiler, 2081 sljit_ub mode, sljit_si set_flags, 2082 sljit_si dst, sljit_sw dstw, 2083 sljit_si src1, sljit_sw src1w, 2084 sljit_si src2, sljit_sw src2w) 2085 { 2086 /* The CPU does not set flags if the shift count is 0. */ 2087 if (src2 & SLJIT_IMM) { 2088 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 2089 if ((src2w & 0x3f) != 0 || (compiler->mode32 && (src2w & 0x1f) != 0)) 2090 return emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w); 2091 #else 2092 if ((src2w & 0x1f) != 0) 2093 return emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w); 2094 #endif 2095 if (!set_flags) 2096 return emit_mov(compiler, dst, dstw, src1, src1w); 2097 /* OR dst, src, 0 */ 2098 return emit_cum_binary(compiler, OR_r_rm, OR_rm_r, OR, OR_EAX_i32, 2099 dst, dstw, src1, src1w, SLJIT_IMM, 0); 2100 } 2101 2102 if (!set_flags) 2103 return emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w); 2104 2105 if (!FAST_IS_REG(dst)) 2106 FAIL_IF(emit_cmp_binary(compiler, src1, src1w, SLJIT_IMM, 0)); 2107 2108 FAIL_IF(emit_shift(compiler,mode, dst, dstw, src1, src1w, src2, src2w)); 2109 2110 if (FAST_IS_REG(dst)) 2111 return emit_cmp_binary(compiler, dst, dstw, SLJIT_IMM, 0); 2112 return SLJIT_SUCCESS; 2113 } 2114 2115 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op2(struct sljit_compiler *compiler, sljit_si op, 2116 sljit_si dst, sljit_sw dstw, 2117 sljit_si src1, sljit_sw src1w, 2118 sljit_si src2, sljit_sw src2w) 2119 { 2120 CHECK_ERROR(); 2121 check_sljit_emit_op2(compiler, op, dst, dstw, src1, src1w, src2, src2w); 2122 ADJUST_LOCAL_OFFSET(dst, dstw); 2123 ADJUST_LOCAL_OFFSET(src1, src1w); 2124 ADJUST_LOCAL_OFFSET(src2, src2w); 2125 2126 CHECK_EXTRA_REGS(dst, dstw, (void)0); 2127 CHECK_EXTRA_REGS(src1, src1w, (void)0); 2128 CHECK_EXTRA_REGS(src2, src2w, (void)0); 2129 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 2130 compiler->mode32 = op & SLJIT_INT_OP; 2131 #endif 2132 2133 if (GET_OPCODE(op) >= SLJIT_MUL) { 2134 if (SLJIT_UNLIKELY(GET_FLAGS(op))) 2135 compiler->flags_saved = 0; 2136 else if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS) && !compiler->flags_saved) 2137 FAIL_IF(emit_save_flags(compiler)); 2138 } 2139 2140 switch (GET_OPCODE(op)) { 2141 case SLJIT_ADD: 2142 if (!GET_FLAGS(op)) { 2143 if (emit_lea_binary(compiler, op & SLJIT_KEEP_FLAGS, dst, dstw, src1, src1w, src2, src2w) != SLJIT_ERR_UNSUPPORTED) 2144 return compiler->error; 2145 } 2146 else 2147 compiler->flags_saved = 0; 2148 if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS) && !compiler->flags_saved) 2149 FAIL_IF(emit_save_flags(compiler)); 2150 return emit_cum_binary(compiler, ADD_r_rm, ADD_rm_r, ADD, ADD_EAX_i32, 2151 dst, dstw, src1, src1w, src2, src2w); 2152 case SLJIT_ADDC: 2153 if (SLJIT_UNLIKELY(compiler->flags_saved)) /* C flag must be restored. */ 2154 FAIL_IF(emit_restore_flags(compiler, 1)); 2155 else if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS)) 2156 FAIL_IF(emit_save_flags(compiler)); 2157 if (SLJIT_UNLIKELY(GET_FLAGS(op))) 2158 compiler->flags_saved = 0; 2159 return emit_cum_binary(compiler, ADC_r_rm, ADC_rm_r, ADC, ADC_EAX_i32, 2160 dst, dstw, src1, src1w, src2, src2w); 2161 case SLJIT_SUB: 2162 if (!GET_FLAGS(op)) { 2163 if ((src2 & SLJIT_IMM) && emit_lea_binary(compiler, op & SLJIT_KEEP_FLAGS, dst, dstw, src1, src1w, SLJIT_IMM, -src2w) != SLJIT_ERR_UNSUPPORTED) 2164 return compiler->error; 2165 } 2166 else 2167 compiler->flags_saved = 0; 2168 if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS) && !compiler->flags_saved) 2169 FAIL_IF(emit_save_flags(compiler)); 2170 if (dst == SLJIT_UNUSED) 2171 return emit_cmp_binary(compiler, src1, src1w, src2, src2w); 2172 return emit_non_cum_binary(compiler, SUB_r_rm, SUB_rm_r, SUB, SUB_EAX_i32, 2173 dst, dstw, src1, src1w, src2, src2w); 2174 case SLJIT_SUBC: 2175 if (SLJIT_UNLIKELY(compiler->flags_saved)) /* C flag must be restored. */ 2176 FAIL_IF(emit_restore_flags(compiler, 1)); 2177 else if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS)) 2178 FAIL_IF(emit_save_flags(compiler)); 2179 if (SLJIT_UNLIKELY(GET_FLAGS(op))) 2180 compiler->flags_saved = 0; 2181 return emit_non_cum_binary(compiler, SBB_r_rm, SBB_rm_r, SBB, SBB_EAX_i32, 2182 dst, dstw, src1, src1w, src2, src2w); 2183 case SLJIT_MUL: 2184 return emit_mul(compiler, dst, dstw, src1, src1w, src2, src2w); 2185 case SLJIT_AND: 2186 if (dst == SLJIT_UNUSED) 2187 return emit_test_binary(compiler, src1, src1w, src2, src2w); 2188 return emit_cum_binary(compiler, AND_r_rm, AND_rm_r, AND, AND_EAX_i32, 2189 dst, dstw, src1, src1w, src2, src2w); 2190 case SLJIT_OR: 2191 return emit_cum_binary(compiler, OR_r_rm, OR_rm_r, OR, OR_EAX_i32, 2192 dst, dstw, src1, src1w, src2, src2w); 2193 case SLJIT_XOR: 2194 return emit_cum_binary(compiler, XOR_r_rm, XOR_rm_r, XOR, XOR_EAX_i32, 2195 dst, dstw, src1, src1w, src2, src2w); 2196 case SLJIT_SHL: 2197 return emit_shift_with_flags(compiler, SHL, GET_FLAGS(op), 2198 dst, dstw, src1, src1w, src2, src2w); 2199 case SLJIT_LSHR: 2200 return emit_shift_with_flags(compiler, SHR, GET_FLAGS(op), 2201 dst, dstw, src1, src1w, src2, src2w); 2202 case SLJIT_ASHR: 2203 return emit_shift_with_flags(compiler, SAR, GET_FLAGS(op), 2204 dst, dstw, src1, src1w, src2, src2w); 2205 } 2206 2207 return SLJIT_SUCCESS; 2208 } 2209 2210 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_register_index(sljit_si reg) 2211 { 2212 check_sljit_get_register_index(reg); 2213 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 2214 if (reg >= SLJIT_R3 && reg <= SLJIT_R6) 2215 return -1; 2216 #endif 2217 return reg_map[reg]; 2218 } 2219 2220 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_float_register_index(sljit_si reg) 2221 { 2222 check_sljit_get_float_register_index(reg); 2223 return reg; 2224 } 2225 2226 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_custom(struct sljit_compiler *compiler, 2227 void *instruction, sljit_si size) 2228 { 2229 sljit_ub *inst; 2230 2231 CHECK_ERROR(); 2232 check_sljit_emit_op_custom(compiler, instruction, size); 2233 SLJIT_ASSERT(size > 0 && size < 16); 2234 2235 inst = (sljit_ub*)ensure_buf(compiler, 1 + size); 2236 FAIL_IF(!inst); 2237 INC_SIZE(size); 2238 SLJIT_MEMMOVE(inst, instruction, size); 2239 return SLJIT_SUCCESS; 2240 } 2241 2242 /* --------------------------------------------------------------------- */ 2243 /* Floating point operators */ 2244 /* --------------------------------------------------------------------- */ 2245 2246 /* Alignment + 2 * 16 bytes. */ 2247 static sljit_si sse2_data[3 + (4 + 4) * 2]; 2248 static sljit_si *sse2_buffer; 2249 2250 static void init_compiler(void) 2251 { 2252 sse2_buffer = (sljit_si*)(((sljit_uw)sse2_data + 15) & ~0xf); 2253 /* Single precision constants. */ 2254 sse2_buffer[0] = 0x80000000; 2255 sse2_buffer[4] = 0x7fffffff; 2256 /* Double precision constants. */ 2257 sse2_buffer[8] = 0; 2258 sse2_buffer[9] = 0x80000000; 2259 sse2_buffer[12] = 0xffffffff; 2260 sse2_buffer[13] = 0x7fffffff; 2261 } 2262 2263 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_is_fpu_available(void) 2264 { 2265 #ifdef SLJIT_IS_FPU_AVAILABLE 2266 return SLJIT_IS_FPU_AVAILABLE; 2267 #elif (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2) 2268 if (cpu_has_sse2 == -1) 2269 get_cpu_features(); 2270 return cpu_has_sse2; 2271 #else /* SLJIT_DETECT_SSE2 */ 2272 return 1; 2273 #endif /* SLJIT_DETECT_SSE2 */ 2274 } 2275 2276 static sljit_si emit_sse2(struct sljit_compiler *compiler, sljit_ub opcode, 2277 sljit_si single, sljit_si xmm1, sljit_si xmm2, sljit_sw xmm2w) 2278 { 2279 sljit_ub *inst; 2280 2281 inst = emit_x86_instruction(compiler, 2 | (single ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2, xmm1, 0, xmm2, xmm2w); 2282 FAIL_IF(!inst); 2283 *inst++ = GROUP_0F; 2284 *inst = opcode; 2285 return SLJIT_SUCCESS; 2286 } 2287 2288 static sljit_si emit_sse2_logic(struct sljit_compiler *compiler, sljit_ub opcode, 2289 sljit_si pref66, sljit_si xmm1, sljit_si xmm2, sljit_sw xmm2w) 2290 { 2291 sljit_ub *inst; 2292 2293 inst = emit_x86_instruction(compiler, 2 | (pref66 ? EX86_PREF_66 : 0) | EX86_SSE2, xmm1, 0, xmm2, xmm2w); 2294 FAIL_IF(!inst); 2295 *inst++ = GROUP_0F; 2296 *inst = opcode; 2297 return SLJIT_SUCCESS; 2298 } 2299 2300 static SLJIT_INLINE sljit_si emit_sse2_load(struct sljit_compiler *compiler, 2301 sljit_si single, sljit_si dst, sljit_si src, sljit_sw srcw) 2302 { 2303 return emit_sse2(compiler, MOVSD_x_xm, single, dst, src, srcw); 2304 } 2305 2306 static SLJIT_INLINE sljit_si emit_sse2_store(struct sljit_compiler *compiler, 2307 sljit_si single, sljit_si dst, sljit_sw dstw, sljit_si src) 2308 { 2309 return emit_sse2(compiler, MOVSD_xm_x, single, src, dst, dstw); 2310 } 2311 2312 static SLJIT_INLINE sljit_si sljit_emit_fop1_convw_fromd(struct sljit_compiler *compiler, sljit_si op, 2313 sljit_si dst, sljit_sw dstw, 2314 sljit_si src, sljit_sw srcw) 2315 { 2316 sljit_si dst_r = SLOW_IS_REG(dst) ? dst : TMP_REG1; 2317 sljit_ub *inst; 2318 2319 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 2320 if (GET_OPCODE(op) == SLJIT_CONVW_FROMD) 2321 compiler->mode32 = 0; 2322 #endif 2323 2324 inst = emit_x86_instruction(compiler, 2 | ((op & SLJIT_SINGLE_OP) ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2_OP2, dst_r, 0, src, srcw); 2325 FAIL_IF(!inst); 2326 *inst++ = GROUP_0F; 2327 *inst = CVTTSD2SI_r_xm; 2328 2329 if (dst_r == TMP_REG1 && dst != SLJIT_UNUSED) 2330 return emit_mov(compiler, dst, dstw, TMP_REG1, 0); 2331 return SLJIT_SUCCESS; 2332 } 2333 2334 static SLJIT_INLINE sljit_si sljit_emit_fop1_convd_fromw(struct sljit_compiler *compiler, sljit_si op, 2335 sljit_si dst, sljit_sw dstw, 2336 sljit_si src, sljit_sw srcw) 2337 { 2338 sljit_si dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG; 2339 sljit_ub *inst; 2340 2341 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 2342 if (GET_OPCODE(op) == SLJIT_CONVD_FROMW) 2343 compiler->mode32 = 0; 2344 #endif 2345 2346 if (src & SLJIT_IMM) { 2347 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 2348 if (GET_OPCODE(op) == SLJIT_CONVD_FROMI) 2349 srcw = (sljit_si)srcw; 2350 #endif 2351 EMIT_MOV(compiler, TMP_REG1, 0, src, srcw); 2352 src = TMP_REG1; 2353 srcw = 0; 2354 } 2355 2356 inst = emit_x86_instruction(compiler, 2 | ((op & SLJIT_SINGLE_OP) ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2_OP1, dst_r, 0, src, srcw); 2357 FAIL_IF(!inst); 2358 *inst++ = GROUP_0F; 2359 *inst = CVTSI2SD_x_rm; 2360 2361 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 2362 compiler->mode32 = 1; 2363 #endif 2364 if (dst_r == TMP_FREG) 2365 return emit_sse2_store(compiler, op & SLJIT_SINGLE_OP, dst, dstw, TMP_FREG); 2366 return SLJIT_SUCCESS; 2367 } 2368 2369 static SLJIT_INLINE sljit_si sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_si op, 2370 sljit_si src1, sljit_sw src1w, 2371 sljit_si src2, sljit_sw src2w) 2372 { 2373 compiler->flags_saved = 0; 2374 if (!FAST_IS_REG(src1)) { 2375 FAIL_IF(emit_sse2_load(compiler, op & SLJIT_SINGLE_OP, TMP_FREG, src1, src1w)); 2376 src1 = TMP_FREG; 2377 } 2378 return emit_sse2_logic(compiler, UCOMISD_x_xm, !(op & SLJIT_SINGLE_OP), src1, src2, src2w); 2379 } 2380 2381 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop1(struct sljit_compiler *compiler, sljit_si op, 2382 sljit_si dst, sljit_sw dstw, 2383 sljit_si src, sljit_sw srcw) 2384 { 2385 sljit_si dst_r; 2386 2387 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 2388 compiler->mode32 = 1; 2389 #endif 2390 2391 CHECK_ERROR(); 2392 SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw); 2393 2394 if (GET_OPCODE(op) == SLJIT_MOVD) { 2395 if (FAST_IS_REG(dst)) 2396 return emit_sse2_load(compiler, op & SLJIT_SINGLE_OP, dst, src, srcw); 2397 if (FAST_IS_REG(src)) 2398 return emit_sse2_store(compiler, op & SLJIT_SINGLE_OP, dst, dstw, src); 2399 FAIL_IF(emit_sse2_load(compiler, op & SLJIT_SINGLE_OP, TMP_FREG, src, srcw)); 2400 return emit_sse2_store(compiler, op & SLJIT_SINGLE_OP, dst, dstw, TMP_FREG); 2401 } 2402 2403 if (GET_OPCODE(op) == SLJIT_CONVD_FROMS) { 2404 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG; 2405 if (FAST_IS_REG(src)) { 2406 /* We overwrite the high bits of source. From SLJIT point of view, 2407 this is not an issue. 2408 Note: In SSE3, we could also use MOVDDUP and MOVSLDUP. */ 2409 FAIL_IF(emit_sse2_logic(compiler, UNPCKLPD_x_xm, op & SLJIT_SINGLE_OP, src, src, 0)); 2410 } 2411 else { 2412 FAIL_IF(emit_sse2_load(compiler, !(op & SLJIT_SINGLE_OP), TMP_FREG, src, srcw)); 2413 src = TMP_FREG; 2414 } 2415 2416 FAIL_IF(emit_sse2_logic(compiler, CVTPD2PS_x_xm, op & SLJIT_SINGLE_OP, dst_r, src, 0)); 2417 if (dst_r == TMP_FREG) 2418 return emit_sse2_store(compiler, op & SLJIT_SINGLE_OP, dst, dstw, TMP_FREG); 2419 return SLJIT_SUCCESS; 2420 } 2421 2422 if (SLOW_IS_REG(dst)) { 2423 dst_r = dst; 2424 if (dst != src) 2425 FAIL_IF(emit_sse2_load(compiler, op & SLJIT_SINGLE_OP, dst_r, src, srcw)); 2426 } 2427 else { 2428 dst_r = TMP_FREG; 2429 FAIL_IF(emit_sse2_load(compiler, op & SLJIT_SINGLE_OP, dst_r, src, srcw)); 2430 } 2431 2432 switch (GET_OPCODE(op)) { 2433 case SLJIT_NEGD: 2434 FAIL_IF(emit_sse2_logic(compiler, XORPD_x_xm, 1, dst_r, SLJIT_MEM0(), (sljit_sw)(op & SLJIT_SINGLE_OP ? sse2_buffer : sse2_buffer + 8))); 2435 break; 2436 2437 case SLJIT_ABSD: 2438 FAIL_IF(emit_sse2_logic(compiler, ANDPD_x_xm, 1, dst_r, SLJIT_MEM0(), (sljit_sw)(op & SLJIT_SINGLE_OP ? sse2_buffer + 4 : sse2_buffer + 12))); 2439 break; 2440 } 2441 2442 if (dst_r == TMP_FREG) 2443 return emit_sse2_store(compiler, op & SLJIT_SINGLE_OP, dst, dstw, TMP_FREG); 2444 return SLJIT_SUCCESS; 2445 } 2446 2447 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop2(struct sljit_compiler *compiler, sljit_si op, 2448 sljit_si dst, sljit_sw dstw, 2449 sljit_si src1, sljit_sw src1w, 2450 sljit_si src2, sljit_sw src2w) 2451 { 2452 sljit_si dst_r; 2453 2454 CHECK_ERROR(); 2455 check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w); 2456 ADJUST_LOCAL_OFFSET(dst, dstw); 2457 ADJUST_LOCAL_OFFSET(src1, src1w); 2458 ADJUST_LOCAL_OFFSET(src2, src2w); 2459 2460 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 2461 compiler->mode32 = 1; 2462 #endif 2463 2464 if (FAST_IS_REG(dst)) { 2465 dst_r = dst; 2466 if (dst == src1) 2467 ; /* Do nothing here. */ 2468 else if (dst == src2 && (op == SLJIT_ADDD || op == SLJIT_MULD)) { 2469 /* Swap arguments. */ 2470 src2 = src1; 2471 src2w = src1w; 2472 } 2473 else if (dst != src2) 2474 FAIL_IF(emit_sse2_load(compiler, op & SLJIT_SINGLE_OP, dst_r, src1, src1w)); 2475 else { 2476 dst_r = TMP_FREG; 2477 FAIL_IF(emit_sse2_load(compiler, op & SLJIT_SINGLE_OP, TMP_FREG, src1, src1w)); 2478 } 2479 } 2480 else { 2481 dst_r = TMP_FREG; 2482 FAIL_IF(emit_sse2_load(compiler, op & SLJIT_SINGLE_OP, TMP_FREG, src1, src1w)); 2483 } 2484 2485 switch (GET_OPCODE(op)) { 2486 case SLJIT_ADDD: 2487 FAIL_IF(emit_sse2(compiler, ADDSD_x_xm, op & SLJIT_SINGLE_OP, dst_r, src2, src2w)); 2488 break; 2489 2490 case SLJIT_SUBD: 2491 FAIL_IF(emit_sse2(compiler, SUBSD_x_xm, op & SLJIT_SINGLE_OP, dst_r, src2, src2w)); 2492 break; 2493 2494 case SLJIT_MULD: 2495 FAIL_IF(emit_sse2(compiler, MULSD_x_xm, op & SLJIT_SINGLE_OP, dst_r, src2, src2w)); 2496 break; 2497 2498 case SLJIT_DIVD: 2499 FAIL_IF(emit_sse2(compiler, DIVSD_x_xm, op & SLJIT_SINGLE_OP, dst_r, src2, src2w)); 2500 break; 2501 } 2502 2503 if (dst_r == TMP_FREG) 2504 return emit_sse2_store(compiler, op & SLJIT_SINGLE_OP, dst, dstw, TMP_FREG); 2505 return SLJIT_SUCCESS; 2506 } 2507 2508 /* --------------------------------------------------------------------- */ 2509 /* Conditional instructions */ 2510 /* --------------------------------------------------------------------- */ 2511 2512 SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler) 2513 { 2514 sljit_ub *inst; 2515 struct sljit_label *label; 2516 2517 CHECK_ERROR_PTR(); 2518 check_sljit_emit_label(compiler); 2519 2520 /* We should restore the flags before the label, 2521 since other taken jumps has their own flags as well. */ 2522 if (SLJIT_UNLIKELY(compiler->flags_saved)) 2523 PTR_FAIL_IF(emit_restore_flags(compiler, 0)); 2524 2525 if (compiler->last_label && compiler->last_label->size == compiler->size) 2526 return compiler->last_label; 2527 2528 label = (struct sljit_label*)ensure_abuf(compiler, sizeof(struct sljit_label)); 2529 PTR_FAIL_IF(!label); 2530 set_label(label, compiler); 2531 2532 inst = (sljit_ub*)ensure_buf(compiler, 2); 2533 PTR_FAIL_IF(!inst); 2534 2535 *inst++ = 0; 2536 *inst++ = 0; 2537 2538 return label; 2539 } 2540 2541 SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_si type) 2542 { 2543 sljit_ub *inst; 2544 struct sljit_jump *jump; 2545 2546 CHECK_ERROR_PTR(); 2547 check_sljit_emit_jump(compiler, type); 2548 2549 if (SLJIT_UNLIKELY(compiler->flags_saved)) { 2550 if ((type & 0xff) <= SLJIT_JUMP) 2551 PTR_FAIL_IF(emit_restore_flags(compiler, 0)); 2552 compiler->flags_saved = 0; 2553 } 2554 2555 jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); 2556 PTR_FAIL_IF_NULL(jump); 2557 set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP); 2558 type &= 0xff; 2559 2560 if (type >= SLJIT_CALL1) 2561 PTR_FAIL_IF(call_with_args(compiler, type)); 2562 2563 /* Worst case size. */ 2564 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 2565 compiler->size += (type >= SLJIT_JUMP) ? 5 : 6; 2566 #else 2567 compiler->size += (type >= SLJIT_JUMP) ? (10 + 3) : (2 + 10 + 3); 2568 #endif 2569 2570 inst = (sljit_ub*)ensure_buf(compiler, 2); 2571 PTR_FAIL_IF_NULL(inst); 2572 2573 *inst++ = 0; 2574 *inst++ = type + 4; 2575 return jump; 2576 } 2577 2578 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_ijump(struct sljit_compiler *compiler, sljit_si type, sljit_si src, sljit_sw srcw) 2579 { 2580 sljit_ub *inst; 2581 struct sljit_jump *jump; 2582 2583 CHECK_ERROR(); 2584 check_sljit_emit_ijump(compiler, type, src, srcw); 2585 ADJUST_LOCAL_OFFSET(src, srcw); 2586 2587 CHECK_EXTRA_REGS(src, srcw, (void)0); 2588 2589 if (SLJIT_UNLIKELY(compiler->flags_saved)) { 2590 if (type <= SLJIT_JUMP) 2591 FAIL_IF(emit_restore_flags(compiler, 0)); 2592 compiler->flags_saved = 0; 2593 } 2594 2595 if (type >= SLJIT_CALL1) { 2596 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 2597 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) 2598 if (src == SLJIT_R2) { 2599 EMIT_MOV(compiler, TMP_REG1, 0, src, 0); 2600 src = TMP_REG1; 2601 } 2602 if (src == SLJIT_MEM1(SLJIT_SP) && type >= SLJIT_CALL3) 2603 srcw += sizeof(sljit_sw); 2604 #endif 2605 #endif 2606 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) && defined(_WIN64) 2607 if (src == SLJIT_R2) { 2608 EMIT_MOV(compiler, TMP_REG1, 0, src, 0); 2609 src = TMP_REG1; 2610 } 2611 #endif 2612 FAIL_IF(call_with_args(compiler, type)); 2613 } 2614 2615 if (src == SLJIT_IMM) { 2616 jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); 2617 FAIL_IF_NULL(jump); 2618 set_jump(jump, compiler, JUMP_ADDR); 2619 jump->u.target = srcw; 2620 2621 /* Worst case size. */ 2622 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 2623 compiler->size += 5; 2624 #else 2625 compiler->size += 10 + 3; 2626 #endif 2627 2628 inst = (sljit_ub*)ensure_buf(compiler, 2); 2629 FAIL_IF_NULL(inst); 2630 2631 *inst++ = 0; 2632 *inst++ = type + 4; 2633 } 2634 else { 2635 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 2636 /* REX_W is not necessary (src is not immediate). */ 2637 compiler->mode32 = 1; 2638 #endif 2639 inst = emit_x86_instruction(compiler, 1, 0, 0, src, srcw); 2640 FAIL_IF(!inst); 2641 *inst++ = GROUP_FF; 2642 *inst |= (type >= SLJIT_FAST_CALL) ? CALL_rm : JMP_rm; 2643 } 2644 return SLJIT_SUCCESS; 2645 } 2646 2647 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_si op, 2648 sljit_si dst, sljit_sw dstw, 2649 sljit_si src, sljit_sw srcw, 2650 sljit_si type) 2651 { 2652 sljit_ub *inst; 2653 sljit_ub cond_set = 0; 2654 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 2655 sljit_si reg; 2656 #else 2657 /* CHECK_EXTRA_REGS migh overwrite these values. */ 2658 sljit_si dst_save = dst; 2659 sljit_sw dstw_save = dstw; 2660 #endif 2661 2662 CHECK_ERROR(); 2663 check_sljit_emit_op_flags(compiler, op, dst, dstw, src, srcw, type); 2664 2665 if (dst == SLJIT_UNUSED) 2666 return SLJIT_SUCCESS; 2667 2668 ADJUST_LOCAL_OFFSET(dst, dstw); 2669 CHECK_EXTRA_REGS(dst, dstw, (void)0); 2670 if (SLJIT_UNLIKELY(compiler->flags_saved)) 2671 FAIL_IF(emit_restore_flags(compiler, op & SLJIT_KEEP_FLAGS)); 2672 2673 /* setcc = jcc + 0x10. */ 2674 cond_set = get_jump_code(type) + 0x10; 2675 2676 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 2677 if (GET_OPCODE(op) == SLJIT_OR && !GET_ALL_FLAGS(op) && FAST_IS_REG(dst) && dst == src) { 2678 inst = (sljit_ub*)ensure_buf(compiler, 1 + 4 + 3); 2679 FAIL_IF(!inst); 2680 INC_SIZE(4 + 3); 2681 /* Set low register to conditional flag. */ 2682 *inst++ = (reg_map[TMP_REG1] <= 7) ? REX : REX_B; 2683 *inst++ = GROUP_0F; 2684 *inst++ = cond_set; 2685 *inst++ = MOD_REG | reg_lmap[TMP_REG1]; 2686 *inst++ = REX | (reg_map[TMP_REG1] <= 7 ? 0 : REX_R) | (reg_map[dst] <= 7 ? 0 : REX_B); 2687 *inst++ = OR_rm8_r8; 2688 *inst++ = MOD_REG | (reg_lmap[TMP_REG1] << 3) | reg_lmap[dst]; 2689 return SLJIT_SUCCESS; 2690 } 2691 2692 reg = (op == SLJIT_MOV && FAST_IS_REG(dst)) ? dst : TMP_REG1; 2693 2694 inst = (sljit_ub*)ensure_buf(compiler, 1 + 4 + 4); 2695 FAIL_IF(!inst); 2696 INC_SIZE(4 + 4); 2697 /* Set low register to conditional flag. */ 2698 *inst++ = (reg_map[reg] <= 7) ? REX : REX_B; 2699 *inst++ = GROUP_0F; 2700 *inst++ = cond_set; 2701 *inst++ = MOD_REG | reg_lmap[reg]; 2702 *inst++ = REX_W | (reg_map[reg] <= 7 ? 0 : (REX_B | REX_R)); 2703 *inst++ = GROUP_0F; 2704 *inst++ = MOVZX_r_rm8; 2705 *inst = MOD_REG | (reg_lmap[reg] << 3) | reg_lmap[reg]; 2706 2707 if (reg != TMP_REG1) 2708 return SLJIT_SUCCESS; 2709 2710 if (GET_OPCODE(op) < SLJIT_ADD) { 2711 compiler->mode32 = GET_OPCODE(op) != SLJIT_MOV; 2712 return emit_mov(compiler, dst, dstw, TMP_REG1, 0); 2713 } 2714 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) || (defined SLJIT_DEBUG && SLJIT_DEBUG) 2715 compiler->skip_checks = 1; 2716 #endif 2717 return sljit_emit_op2(compiler, op, dst, dstw, dst, dstw, TMP_REG1, 0); 2718 #else /* SLJIT_CONFIG_X86_64 */ 2719 if (GET_OPCODE(op) < SLJIT_ADD && FAST_IS_REG(dst)) { 2720 if (reg_map[dst] <= 4) { 2721 /* Low byte is accessible. */ 2722 inst = (sljit_ub*)ensure_buf(compiler, 1 + 3 + 3); 2723 FAIL_IF(!inst); 2724 INC_SIZE(3 + 3); 2725 /* Set low byte to conditional flag. */ 2726 *inst++ = GROUP_0F; 2727 *inst++ = cond_set; 2728 *inst++ = MOD_REG | reg_map[dst]; 2729 2730 *inst++ = GROUP_0F; 2731 *inst++ = MOVZX_r_rm8; 2732 *inst = MOD_REG | (reg_map[dst] << 3) | reg_map[dst]; 2733 return SLJIT_SUCCESS; 2734 } 2735 2736 /* Low byte is not accessible. */ 2737 if (cpu_has_cmov == -1) 2738 get_cpu_features(); 2739 2740 if (cpu_has_cmov) { 2741 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, 1); 2742 /* a xor reg, reg operation would overwrite the flags. */ 2743 EMIT_MOV(compiler, dst, 0, SLJIT_IMM, 0); 2744 2745 inst = (sljit_ub*)ensure_buf(compiler, 1 + 3); 2746 FAIL_IF(!inst); 2747 INC_SIZE(3); 2748 2749 *inst++ = GROUP_0F; 2750 /* cmovcc = setcc - 0x50. */ 2751 *inst++ = cond_set - 0x50; 2752 *inst++ = MOD_REG | (reg_map[dst] << 3) | reg_map[TMP_REG1]; 2753 return SLJIT_SUCCESS; 2754 } 2755 2756 inst = (sljit_ub*)ensure_buf(compiler, 1 + 1 + 3 + 3 + 1); 2757 FAIL_IF(!inst); 2758 INC_SIZE(1 + 3 + 3 + 1); 2759 *inst++ = XCHG_EAX_r + reg_map[TMP_REG1]; 2760 /* Set al to conditional flag. */ 2761 *inst++ = GROUP_0F; 2762 *inst++ = cond_set; 2763 *inst++ = MOD_REG | 0 /* eax */; 2764 2765 *inst++ = GROUP_0F; 2766 *inst++ = MOVZX_r_rm8; 2767 *inst++ = MOD_REG | (reg_map[dst] << 3) | 0 /* eax */; 2768 *inst++ = XCHG_EAX_r + reg_map[TMP_REG1]; 2769 return SLJIT_SUCCESS; 2770 } 2771 2772 if (GET_OPCODE(op) == SLJIT_OR && !GET_ALL_FLAGS(op) && FAST_IS_REG(dst) && dst == src && reg_map[dst] <= 4) { 2773 SLJIT_COMPILE_ASSERT(reg_map[SLJIT_R0] == 0, scratch_reg1_must_be_eax); 2774 if (dst != SLJIT_R0) { 2775 inst = (sljit_ub*)ensure_buf(compiler, 1 + 1 + 3 + 2 + 1); 2776 FAIL_IF(!inst); 2777 INC_SIZE(1 + 3 + 2 + 1); 2778 /* Set low register to conditional flag. */ 2779 *inst++ = XCHG_EAX_r + reg_map[TMP_REG1]; 2780 *inst++ = GROUP_0F; 2781 *inst++ = cond_set; 2782 *inst++ = MOD_REG | 0 /* eax */; 2783 *inst++ = OR_rm8_r8; 2784 *inst++ = MOD_REG | (0 /* eax */ << 3) | reg_map[dst]; 2785 *inst++ = XCHG_EAX_r + reg_map[TMP_REG1]; 2786 } 2787 else { 2788 inst = (sljit_ub*)ensure_buf(compiler, 1 + 2 + 3 + 2 + 2); 2789 FAIL_IF(!inst); 2790 INC_SIZE(2 + 3 + 2 + 2); 2791 /* Set low register to conditional flag. */ 2792 *inst++ = XCHG_r_rm; 2793 *inst++ = MOD_REG | (1 /* ecx */ << 3) | reg_map[TMP_REG1]; 2794 *inst++ = GROUP_0F; 2795 *inst++ = cond_set; 2796 *inst++ = MOD_REG | 1 /* ecx */; 2797 *inst++ = OR_rm8_r8; 2798 *inst++ = MOD_REG | (1 /* ecx */ << 3) | 0 /* eax */; 2799 *inst++ = XCHG_r_rm; 2800 *inst++ = MOD_REG | (1 /* ecx */ << 3) | reg_map[TMP_REG1]; 2801 } 2802 return SLJIT_SUCCESS; 2803 } 2804 2805 /* Set TMP_REG1 to the bit. */ 2806 inst = (sljit_ub*)ensure_buf(compiler, 1 + 1 + 3 + 3 + 1); 2807 FAIL_IF(!inst); 2808 INC_SIZE(1 + 3 + 3 + 1); 2809 *inst++ = XCHG_EAX_r + reg_map[TMP_REG1]; 2810 /* Set al to conditional flag. */ 2811 *inst++ = GROUP_0F; 2812 *inst++ = cond_set; 2813 *inst++ = MOD_REG | 0 /* eax */; 2814 2815 *inst++ = GROUP_0F; 2816 *inst++ = MOVZX_r_rm8; 2817 *inst++ = MOD_REG | (0 << 3) /* eax */ | 0 /* eax */; 2818 2819 *inst++ = XCHG_EAX_r + reg_map[TMP_REG1]; 2820 2821 if (GET_OPCODE(op) < SLJIT_ADD) 2822 return emit_mov(compiler, dst, dstw, TMP_REG1, 0); 2823 2824 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) || (defined SLJIT_DEBUG && SLJIT_DEBUG) 2825 compiler->skip_checks = 1; 2826 #endif 2827 return sljit_emit_op2(compiler, op, dst_save, dstw_save, dst_save, dstw_save, TMP_REG1, 0); 2828 #endif /* SLJIT_CONFIG_X86_64 */ 2829 } 2830 2831 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_local_base(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw, sljit_sw offset) 2832 { 2833 CHECK_ERROR(); 2834 check_sljit_get_local_base(compiler, dst, dstw, offset); 2835 ADJUST_LOCAL_OFFSET(dst, dstw); 2836 2837 CHECK_EXTRA_REGS(dst, dstw, (void)0); 2838 2839 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 2840 compiler->mode32 = 0; 2841 #endif 2842 2843 ADJUST_LOCAL_OFFSET(SLJIT_MEM1(SLJIT_SP), offset); 2844 2845 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 2846 if (NOT_HALFWORD(offset)) { 2847 FAIL_IF(emit_load_imm64(compiler, TMP_REG1, offset)); 2848 #if (defined SLJIT_DEBUG && SLJIT_DEBUG) 2849 SLJIT_ASSERT(emit_lea_binary(compiler, SLJIT_KEEP_FLAGS, dst, dstw, SLJIT_SP, 0, TMP_REG1, 0) != SLJIT_ERR_UNSUPPORTED); 2850 return compiler->error; 2851 #else 2852 return emit_lea_binary(compiler, SLJIT_KEEP_FLAGS, dst, dstw, SLJIT_SP, 0, TMP_REG1, 0); 2853 #endif 2854 } 2855 #endif 2856 2857 if (offset != 0) 2858 return emit_lea_binary(compiler, SLJIT_KEEP_FLAGS, dst, dstw, SLJIT_SP, 0, SLJIT_IMM, offset); 2859 return emit_mov(compiler, dst, dstw, SLJIT_SP, 0); 2860 } 2861 2862 SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw, sljit_sw init_value) 2863 { 2864 sljit_ub *inst; 2865 struct sljit_const *const_; 2866 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 2867 sljit_si reg; 2868 #endif 2869 2870 CHECK_ERROR_PTR(); 2871 check_sljit_emit_const(compiler, dst, dstw, init_value); 2872 ADJUST_LOCAL_OFFSET(dst, dstw); 2873 2874 CHECK_EXTRA_REGS(dst, dstw, (void)0); 2875 2876 const_ = (struct sljit_const*)ensure_abuf(compiler, sizeof(struct sljit_const)); 2877 PTR_FAIL_IF(!const_); 2878 set_const(const_, compiler); 2879 2880 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 2881 compiler->mode32 = 0; 2882 reg = SLOW_IS_REG(dst) ? dst : TMP_REG1; 2883 2884 if (emit_load_imm64(compiler, reg, init_value)) 2885 return NULL; 2886 #else 2887 if (dst == SLJIT_UNUSED) 2888 dst = TMP_REG1; 2889 2890 if (emit_mov(compiler, dst, dstw, SLJIT_IMM, init_value)) 2891 return NULL; 2892 #endif 2893 2894 inst = (sljit_ub*)ensure_buf(compiler, 2); 2895 PTR_FAIL_IF(!inst); 2896 2897 *inst++ = 0; 2898 *inst++ = 1; 2899 2900 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 2901 if (dst & SLJIT_MEM) 2902 if (emit_mov(compiler, dst, dstw, TMP_REG1, 0)) 2903 return NULL; 2904 #endif 2905 2906 return const_; 2907 } 2908 2909 SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_addr) 2910 { 2911 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 2912 *(sljit_sw*)addr = new_addr - (addr + 4); 2913 #else 2914 *(sljit_uw*)addr = new_addr; 2915 #endif 2916 } 2917 2918 SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant) 2919 { 2920 *(sljit_sw*)addr = new_constant; 2921 } 2922