Home | History | Annotate | Download | only in sljit
      1 /*
      2  *    Stack-less Just-In-Time compiler
      3  *
      4  *    Copyright 2009-2012 Zoltan Herczeg (hzmester (at) freemail.hu). All rights reserved.
      5  *
      6  * Redistribution and use in source and binary forms, with or without modification, are
      7  * permitted provided that the following conditions are met:
      8  *
      9  *   1. Redistributions of source code must retain the above copyright notice, this list of
     10  *      conditions and the following disclaimer.
     11  *
     12  *   2. Redistributions in binary form must reproduce the above copyright notice, this list
     13  *      of conditions and the following disclaimer in the documentation and/or other materials
     14  *      provided with the distribution.
     15  *
     16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
     17  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
     18  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
     19  * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
     20  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
     21  * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
     22  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     23  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
     24  * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     25  */
     26 
     27 SLJIT_API_FUNC_ATTRIBUTE SLJIT_CONST char* sljit_get_platform_name(void)
     28 {
     29 	return "x86" SLJIT_CPUINFO;
     30 }
     31 
     32 /*
     33    32b register indexes:
     34      0 - EAX
     35      1 - ECX
     36      2 - EDX
     37      3 - EBX
     38      4 - none
     39      5 - EBP
     40      6 - ESI
     41      7 - EDI
     42 */
     43 
     44 /*
     45    64b register indexes:
     46      0 - RAX
     47      1 - RCX
     48      2 - RDX
     49      3 - RBX
     50      4 - none
     51      5 - RBP
     52      6 - RSI
     53      7 - RDI
     54      8 - R8   - From now on REX prefix is required
     55      9 - R9
     56     10 - R10
     57     11 - R11
     58     12 - R12
     59     13 - R13
     60     14 - R14
     61     15 - R15
     62 */
     63 
     64 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
     65 
     66 /* Last register + 1. */
     67 #define TMP_REG1	(SLJIT_NUMBER_OF_REGISTERS + 2)
     68 
     69 static SLJIT_CONST sljit_ub reg_map[SLJIT_NUMBER_OF_REGISTERS + 3] = {
     70 	0, 0, 2, 1, 0, 0, 0, 0, 7, 6, 3, 4, 5
     71 };
     72 
     73 #define CHECK_EXTRA_REGS(p, w, do) \
     74 	if (p >= SLJIT_R3 && p <= SLJIT_R6) { \
     75 		w = FIXED_LOCALS_OFFSET + ((p) - (SLJIT_R3 + 4)) * sizeof(sljit_sw); \
     76 		p = SLJIT_MEM1(SLJIT_SP); \
     77 		do; \
     78 	}
     79 
     80 #else /* SLJIT_CONFIG_X86_32 */
     81 
     82 /* Last register + 1. */
     83 #define TMP_REG1	(SLJIT_NUMBER_OF_REGISTERS + 2)
     84 #define TMP_REG2	(SLJIT_NUMBER_OF_REGISTERS + 3)
     85 #define TMP_REG3	(SLJIT_NUMBER_OF_REGISTERS + 4)
     86 
     87 /* Note: r12 & 0x7 == 0b100, which decoded as SIB byte present
     88    Note: avoid to use r12 and r13 for memory addessing
     89    therefore r12 is better for SAVED_EREG than SAVED_REG. */
     90 #ifndef _WIN64
     91 /* 1st passed in rdi, 2nd argument passed in rsi, 3rd in rdx. */
     92 static SLJIT_CONST sljit_ub reg_map[SLJIT_NUMBER_OF_REGISTERS + 5] = {
     93 	0, 0, 6, 1, 8, 11, 10, 12, 5, 13, 14, 15, 3, 4, 2, 7, 9
     94 };
     95 /* low-map. reg_map & 0x7. */
     96 static SLJIT_CONST sljit_ub reg_lmap[SLJIT_NUMBER_OF_REGISTERS + 5] = {
     97 	0, 0, 6, 1, 0, 3,  2,  4,  5,  5,  6,  7, 3, 4, 2, 7, 1
     98 };
     99 #else
    100 /* 1st passed in rcx, 2nd argument passed in rdx, 3rd in r8. */
    101 static SLJIT_CONST sljit_ub reg_map[SLJIT_NUMBER_OF_REGISTERS + 5] = {
    102 	0, 0, 2, 1, 11, 12, 5, 13, 14, 15, 7, 6, 3, 4, 10, 8, 9
    103 };
    104 /* low-map. reg_map & 0x7. */
    105 static SLJIT_CONST sljit_ub reg_lmap[SLJIT_NUMBER_OF_REGISTERS + 5] = {
    106 	0, 0, 2, 1, 3,  4,  5,  5, 6,  7,  7, 6, 3, 4, 2,  0, 1
    107 };
    108 #endif
    109 
    110 #define REX_W		0x48
    111 #define REX_R		0x44
    112 #define REX_X		0x42
    113 #define REX_B		0x41
    114 #define REX		0x40
    115 
    116 #ifndef _WIN64
    117 #define HALFWORD_MAX 0x7fffffffl
    118 #define HALFWORD_MIN -0x80000000l
    119 #else
    120 #define HALFWORD_MAX 0x7fffffffll
    121 #define HALFWORD_MIN -0x80000000ll
    122 #endif
    123 
    124 #define IS_HALFWORD(x)		((x) <= HALFWORD_MAX && (x) >= HALFWORD_MIN)
    125 #define NOT_HALFWORD(x)		((x) > HALFWORD_MAX || (x) < HALFWORD_MIN)
    126 
    127 #define CHECK_EXTRA_REGS(p, w, do)
    128 
    129 #endif /* SLJIT_CONFIG_X86_32 */
    130 
    131 #define TMP_FREG	(0)
    132 
    133 /* Size flags for emit_x86_instruction: */
    134 #define EX86_BIN_INS		0x0010
    135 #define EX86_SHIFT_INS		0x0020
    136 #define EX86_REX		0x0040
    137 #define EX86_NO_REXW		0x0080
    138 #define EX86_BYTE_ARG		0x0100
    139 #define EX86_HALF_ARG		0x0200
    140 #define EX86_PREF_66		0x0400
    141 #define EX86_PREF_F2		0x0800
    142 #define EX86_PREF_F3		0x1000
    143 #define EX86_SSE2_OP1		0x2000
    144 #define EX86_SSE2_OP2		0x4000
    145 #define EX86_SSE2		(EX86_SSE2_OP1 | EX86_SSE2_OP2)
    146 
    147 /* --------------------------------------------------------------------- */
    148 /*  Instrucion forms                                                     */
    149 /* --------------------------------------------------------------------- */
    150 
    151 #define ADD		(/* BINARY */ 0 << 3)
    152 #define ADD_EAX_i32	0x05
    153 #define ADD_r_rm	0x03
    154 #define ADD_rm_r	0x01
    155 #define ADDSD_x_xm	0x58
    156 #define ADC		(/* BINARY */ 2 << 3)
    157 #define ADC_EAX_i32	0x15
    158 #define ADC_r_rm	0x13
    159 #define ADC_rm_r	0x11
    160 #define AND		(/* BINARY */ 4 << 3)
    161 #define AND_EAX_i32	0x25
    162 #define AND_r_rm	0x23
    163 #define AND_rm_r	0x21
    164 #define ANDPD_x_xm	0x54
    165 #define BSR_r_rm	(/* GROUP_0F */ 0xbd)
    166 #define CALL_i32	0xe8
    167 #define CALL_rm		(/* GROUP_FF */ 2 << 3)
    168 #define CDQ		0x99
    169 #define CMOVNE_r_rm	(/* GROUP_0F */ 0x45)
    170 #define CMP		(/* BINARY */ 7 << 3)
    171 #define CMP_EAX_i32	0x3d
    172 #define CMP_r_rm	0x3b
    173 #define CMP_rm_r	0x39
    174 #define CVTPD2PS_x_xm	0x5a
    175 #define CVTSI2SD_x_rm	0x2a
    176 #define CVTTSD2SI_r_xm	0x2c
    177 #define DIV		(/* GROUP_F7 */ 6 << 3)
    178 #define DIVSD_x_xm	0x5e
    179 #define INT3		0xcc
    180 #define IDIV		(/* GROUP_F7 */ 7 << 3)
    181 #define IMUL		(/* GROUP_F7 */ 5 << 3)
    182 #define IMUL_r_rm	(/* GROUP_0F */ 0xaf)
    183 #define IMUL_r_rm_i8	0x6b
    184 #define IMUL_r_rm_i32	0x69
    185 #define JE_i8		0x74
    186 #define JMP_i8		0xeb
    187 #define JMP_i32		0xe9
    188 #define JMP_rm		(/* GROUP_FF */ 4 << 3)
    189 #define LEA_r_m		0x8d
    190 #define MOV_r_rm	0x8b
    191 #define MOV_r_i32	0xb8
    192 #define MOV_rm_r	0x89
    193 #define MOV_rm_i32	0xc7
    194 #define MOV_rm8_i8	0xc6
    195 #define MOV_rm8_r8	0x88
    196 #define MOVSD_x_xm	0x10
    197 #define MOVSD_xm_x	0x11
    198 #define MOVSXD_r_rm	0x63
    199 #define MOVSX_r_rm8	(/* GROUP_0F */ 0xbe)
    200 #define MOVSX_r_rm16	(/* GROUP_0F */ 0xbf)
    201 #define MOVZX_r_rm8	(/* GROUP_0F */ 0xb6)
    202 #define MOVZX_r_rm16	(/* GROUP_0F */ 0xb7)
    203 #define MUL		(/* GROUP_F7 */ 4 << 3)
    204 #define MULSD_x_xm	0x59
    205 #define NEG_rm		(/* GROUP_F7 */ 3 << 3)
    206 #define NOP		0x90
    207 #define NOT_rm		(/* GROUP_F7 */ 2 << 3)
    208 #define OR		(/* BINARY */ 1 << 3)
    209 #define OR_r_rm		0x0b
    210 #define OR_EAX_i32	0x0d
    211 #define OR_rm_r		0x09
    212 #define OR_rm8_r8	0x08
    213 #define POP_r		0x58
    214 #define POP_rm		0x8f
    215 #define POPF		0x9d
    216 #define PUSH_i32	0x68
    217 #define PUSH_r		0x50
    218 #define PUSH_rm		(/* GROUP_FF */ 6 << 3)
    219 #define PUSHF		0x9c
    220 #define RET_near	0xc3
    221 #define RET_i16		0xc2
    222 #define SBB		(/* BINARY */ 3 << 3)
    223 #define SBB_EAX_i32	0x1d
    224 #define SBB_r_rm	0x1b
    225 #define SBB_rm_r	0x19
    226 #define SAR		(/* SHIFT */ 7 << 3)
    227 #define SHL		(/* SHIFT */ 4 << 3)
    228 #define SHR		(/* SHIFT */ 5 << 3)
    229 #define SUB		(/* BINARY */ 5 << 3)
    230 #define SUB_EAX_i32	0x2d
    231 #define SUB_r_rm	0x2b
    232 #define SUB_rm_r	0x29
    233 #define SUBSD_x_xm	0x5c
    234 #define TEST_EAX_i32	0xa9
    235 #define TEST_rm_r	0x85
    236 #define UCOMISD_x_xm	0x2e
    237 #define UNPCKLPD_x_xm	0x14
    238 #define XCHG_EAX_r	0x90
    239 #define XCHG_r_rm	0x87
    240 #define XOR		(/* BINARY */ 6 << 3)
    241 #define XOR_EAX_i32	0x35
    242 #define XOR_r_rm	0x33
    243 #define XOR_rm_r	0x31
    244 #define XORPD_x_xm	0x57
    245 
    246 #define GROUP_0F	0x0f
    247 #define GROUP_F7	0xf7
    248 #define GROUP_FF	0xff
    249 #define GROUP_BINARY_81	0x81
    250 #define GROUP_BINARY_83	0x83
    251 #define GROUP_SHIFT_1	0xd1
    252 #define GROUP_SHIFT_N	0xc1
    253 #define GROUP_SHIFT_CL	0xd3
    254 
    255 #define MOD_REG		0xc0
    256 #define MOD_DISP8	0x40
    257 
    258 #define INC_SIZE(s)			(*inst++ = (s), compiler->size += (s))
    259 
    260 #define PUSH_REG(r)			(*inst++ = (PUSH_r + (r)))
    261 #define POP_REG(r)			(*inst++ = (POP_r + (r)))
    262 #define RET()				(*inst++ = (RET_near))
    263 #define RET_I16(n)			(*inst++ = (RET_i16), *inst++ = n, *inst++ = 0)
    264 /* r32, r/m32 */
    265 #define MOV_RM(mod, reg, rm)		(*inst++ = (MOV_r_rm), *inst++ = (mod) << 6 | (reg) << 3 | (rm))
    266 
    267 /* Multithreading does not affect these static variables, since they store
    268    built-in CPU features. Therefore they can be overwritten by different threads
    269    if they detect the CPU features in the same time. */
    270 #if (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
    271 static sljit_si cpu_has_sse2 = -1;
    272 #endif
    273 static sljit_si cpu_has_cmov = -1;
    274 
    275 #if defined(_MSC_VER) && _MSC_VER >= 1400
    276 #include <intrin.h>
    277 #endif
    278 
    279 static void get_cpu_features(void)
    280 {
    281 	sljit_ui features;
    282 
    283 #if defined(_MSC_VER) && _MSC_VER >= 1400
    284 
    285 	int CPUInfo[4];
    286 	__cpuid(CPUInfo, 1);
    287 	features = (sljit_ui)CPUInfo[3];
    288 
    289 #elif defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__SUNPRO_C)
    290 
    291 	/* AT&T syntax. */
    292 	__asm__ (
    293 		"movl $0x1, %%eax\n"
    294 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
    295 		/* On x86-32, there is no red zone, so this
    296 		   should work (no need for a local variable). */
    297 		"push %%ebx\n"
    298 #endif
    299 		"cpuid\n"
    300 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
    301 		"pop %%ebx\n"
    302 #endif
    303 		"movl %%edx, %0\n"
    304 		: "=g" (features)
    305 		:
    306 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
    307 		: "%eax", "%ecx", "%edx"
    308 #else
    309 		: "%rax", "%rbx", "%rcx", "%rdx"
    310 #endif
    311 	);
    312 
    313 #else /* _MSC_VER && _MSC_VER >= 1400 */
    314 
    315 	/* Intel syntax. */
    316 	__asm {
    317 		mov eax, 1
    318 		cpuid
    319 		mov features, edx
    320 	}
    321 
    322 #endif /* _MSC_VER && _MSC_VER >= 1400 */
    323 
    324 #if (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
    325 	cpu_has_sse2 = (features >> 26) & 0x1;
    326 #endif
    327 	cpu_has_cmov = (features >> 15) & 0x1;
    328 }
    329 
    330 static sljit_ub get_jump_code(sljit_si type)
    331 {
    332 	switch (type) {
    333 	case SLJIT_C_EQUAL:
    334 	case SLJIT_C_FLOAT_EQUAL:
    335 		return 0x84 /* je */;
    336 
    337 	case SLJIT_C_NOT_EQUAL:
    338 	case SLJIT_C_FLOAT_NOT_EQUAL:
    339 		return 0x85 /* jne */;
    340 
    341 	case SLJIT_C_LESS:
    342 	case SLJIT_C_FLOAT_LESS:
    343 		return 0x82 /* jc */;
    344 
    345 	case SLJIT_C_GREATER_EQUAL:
    346 	case SLJIT_C_FLOAT_GREATER_EQUAL:
    347 		return 0x83 /* jae */;
    348 
    349 	case SLJIT_C_GREATER:
    350 	case SLJIT_C_FLOAT_GREATER:
    351 		return 0x87 /* jnbe */;
    352 
    353 	case SLJIT_C_LESS_EQUAL:
    354 	case SLJIT_C_FLOAT_LESS_EQUAL:
    355 		return 0x86 /* jbe */;
    356 
    357 	case SLJIT_C_SIG_LESS:
    358 		return 0x8c /* jl */;
    359 
    360 	case SLJIT_C_SIG_GREATER_EQUAL:
    361 		return 0x8d /* jnl */;
    362 
    363 	case SLJIT_C_SIG_GREATER:
    364 		return 0x8f /* jnle */;
    365 
    366 	case SLJIT_C_SIG_LESS_EQUAL:
    367 		return 0x8e /* jle */;
    368 
    369 	case SLJIT_C_OVERFLOW:
    370 	case SLJIT_C_MUL_OVERFLOW:
    371 		return 0x80 /* jo */;
    372 
    373 	case SLJIT_C_NOT_OVERFLOW:
    374 	case SLJIT_C_MUL_NOT_OVERFLOW:
    375 		return 0x81 /* jno */;
    376 
    377 	case SLJIT_C_FLOAT_UNORDERED:
    378 		return 0x8a /* jp */;
    379 
    380 	case SLJIT_C_FLOAT_ORDERED:
    381 		return 0x8b /* jpo */;
    382 	}
    383 	return 0;
    384 }
    385 
    386 static sljit_ub* generate_far_jump_code(struct sljit_jump *jump, sljit_ub *code_ptr, sljit_si type);
    387 
    388 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
    389 static sljit_ub* generate_fixed_jump(sljit_ub *code_ptr, sljit_sw addr, sljit_si type);
    390 #endif
    391 
    392 static sljit_ub* generate_near_jump_code(struct sljit_jump *jump, sljit_ub *code_ptr, sljit_ub *code, sljit_si type)
    393 {
    394 	sljit_si short_jump;
    395 	sljit_uw label_addr;
    396 
    397 	if (jump->flags & JUMP_LABEL)
    398 		label_addr = (sljit_uw)(code + jump->u.label->size);
    399 	else
    400 		label_addr = jump->u.target;
    401 	short_jump = (sljit_sw)(label_addr - (jump->addr + 2)) >= -128 && (sljit_sw)(label_addr - (jump->addr + 2)) <= 127;
    402 
    403 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
    404 	if ((sljit_sw)(label_addr - (jump->addr + 1)) > HALFWORD_MAX || (sljit_sw)(label_addr - (jump->addr + 1)) < HALFWORD_MIN)
    405 		return generate_far_jump_code(jump, code_ptr, type);
    406 #endif
    407 
    408 	if (type == SLJIT_JUMP) {
    409 		if (short_jump)
    410 			*code_ptr++ = JMP_i8;
    411 		else
    412 			*code_ptr++ = JMP_i32;
    413 		jump->addr++;
    414 	}
    415 	else if (type >= SLJIT_FAST_CALL) {
    416 		short_jump = 0;
    417 		*code_ptr++ = CALL_i32;
    418 		jump->addr++;
    419 	}
    420 	else if (short_jump) {
    421 		*code_ptr++ = get_jump_code(type) - 0x10;
    422 		jump->addr++;
    423 	}
    424 	else {
    425 		*code_ptr++ = GROUP_0F;
    426 		*code_ptr++ = get_jump_code(type);
    427 		jump->addr += 2;
    428 	}
    429 
    430 	if (short_jump) {
    431 		jump->flags |= PATCH_MB;
    432 		code_ptr += sizeof(sljit_sb);
    433 	} else {
    434 		jump->flags |= PATCH_MW;
    435 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
    436 		code_ptr += sizeof(sljit_sw);
    437 #else
    438 		code_ptr += sizeof(sljit_si);
    439 #endif
    440 	}
    441 
    442 	return code_ptr;
    443 }
    444 
    445 SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler)
    446 {
    447 	struct sljit_memory_fragment *buf;
    448 	sljit_ub *code;
    449 	sljit_ub *code_ptr;
    450 	sljit_ub *buf_ptr;
    451 	sljit_ub *buf_end;
    452 	sljit_ub len;
    453 
    454 	struct sljit_label *label;
    455 	struct sljit_jump *jump;
    456 	struct sljit_const *const_;
    457 
    458 	CHECK_ERROR_PTR();
    459 	check_sljit_generate_code(compiler);
    460 	reverse_buf(compiler);
    461 
    462 	/* Second code generation pass. */
    463 	code = (sljit_ub*)SLJIT_MALLOC_EXEC(compiler->size);
    464 	PTR_FAIL_WITH_EXEC_IF(code);
    465 	buf = compiler->buf;
    466 
    467 	code_ptr = code;
    468 	label = compiler->labels;
    469 	jump = compiler->jumps;
    470 	const_ = compiler->consts;
    471 	do {
    472 		buf_ptr = buf->memory;
    473 		buf_end = buf_ptr + buf->used_size;
    474 		do {
    475 			len = *buf_ptr++;
    476 			if (len > 0) {
    477 				/* The code is already generated. */
    478 				SLJIT_MEMMOVE(code_ptr, buf_ptr, len);
    479 				code_ptr += len;
    480 				buf_ptr += len;
    481 			}
    482 			else {
    483 				if (*buf_ptr >= 4) {
    484 					jump->addr = (sljit_uw)code_ptr;
    485 					if (!(jump->flags & SLJIT_REWRITABLE_JUMP))
    486 						code_ptr = generate_near_jump_code(jump, code_ptr, code, *buf_ptr - 4);
    487 					else
    488 						code_ptr = generate_far_jump_code(jump, code_ptr, *buf_ptr - 4);
    489 					jump = jump->next;
    490 				}
    491 				else if (*buf_ptr == 0) {
    492 					label->addr = (sljit_uw)code_ptr;
    493 					label->size = code_ptr - code;
    494 					label = label->next;
    495 				}
    496 				else if (*buf_ptr == 1) {
    497 					const_->addr = ((sljit_uw)code_ptr) - sizeof(sljit_sw);
    498 					const_ = const_->next;
    499 				}
    500 				else {
    501 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
    502 					*code_ptr++ = (*buf_ptr == 2) ? CALL_i32 : JMP_i32;
    503 					buf_ptr++;
    504 					*(sljit_sw*)code_ptr = *(sljit_sw*)buf_ptr - ((sljit_sw)code_ptr + sizeof(sljit_sw));
    505 					code_ptr += sizeof(sljit_sw);
    506 					buf_ptr += sizeof(sljit_sw) - 1;
    507 #else
    508 					code_ptr = generate_fixed_jump(code_ptr, *(sljit_sw*)(buf_ptr + 1), *buf_ptr);
    509 					buf_ptr += sizeof(sljit_sw);
    510 #endif
    511 				}
    512 				buf_ptr++;
    513 			}
    514 		} while (buf_ptr < buf_end);
    515 		SLJIT_ASSERT(buf_ptr == buf_end);
    516 		buf = buf->next;
    517 	} while (buf);
    518 
    519 	SLJIT_ASSERT(!label);
    520 	SLJIT_ASSERT(!jump);
    521 	SLJIT_ASSERT(!const_);
    522 
    523 	jump = compiler->jumps;
    524 	while (jump) {
    525 		if (jump->flags & PATCH_MB) {
    526 			SLJIT_ASSERT((sljit_sw)(jump->u.label->addr - (jump->addr + sizeof(sljit_sb))) >= -128 && (sljit_sw)(jump->u.label->addr - (jump->addr + sizeof(sljit_sb))) <= 127);
    527 			*(sljit_ub*)jump->addr = (sljit_ub)(jump->u.label->addr - (jump->addr + sizeof(sljit_sb)));
    528 		} else if (jump->flags & PATCH_MW) {
    529 			if (jump->flags & JUMP_LABEL) {
    530 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
    531 				*(sljit_sw*)jump->addr = (sljit_sw)(jump->u.label->addr - (jump->addr + sizeof(sljit_sw)));
    532 #else
    533 				SLJIT_ASSERT((sljit_sw)(jump->u.label->addr - (jump->addr + sizeof(sljit_si))) >= HALFWORD_MIN && (sljit_sw)(jump->u.label->addr - (jump->addr + sizeof(sljit_si))) <= HALFWORD_MAX);
    534 				*(sljit_si*)jump->addr = (sljit_si)(jump->u.label->addr - (jump->addr + sizeof(sljit_si)));
    535 #endif
    536 			}
    537 			else {
    538 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
    539 				*(sljit_sw*)jump->addr = (sljit_sw)(jump->u.target - (jump->addr + sizeof(sljit_sw)));
    540 #else
    541 				SLJIT_ASSERT((sljit_sw)(jump->u.target - (jump->addr + sizeof(sljit_si))) >= HALFWORD_MIN && (sljit_sw)(jump->u.target - (jump->addr + sizeof(sljit_si))) <= HALFWORD_MAX);
    542 				*(sljit_si*)jump->addr = (sljit_si)(jump->u.target - (jump->addr + sizeof(sljit_si)));
    543 #endif
    544 			}
    545 		}
    546 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
    547 		else if (jump->flags & PATCH_MD)
    548 			*(sljit_sw*)jump->addr = jump->u.label->addr;
    549 #endif
    550 
    551 		jump = jump->next;
    552 	}
    553 
    554 	/* Maybe we waste some space because of short jumps. */
    555 	SLJIT_ASSERT(code_ptr <= code + compiler->size);
    556 	compiler->error = SLJIT_ERR_COMPILED;
    557 	compiler->executable_size = code_ptr - code;
    558 	return (void*)code;
    559 }
    560 
    561 /* --------------------------------------------------------------------- */
    562 /*  Operators                                                            */
    563 /* --------------------------------------------------------------------- */
    564 
    565 static sljit_si emit_cum_binary(struct sljit_compiler *compiler,
    566 	sljit_ub op_rm, sljit_ub op_mr, sljit_ub op_imm, sljit_ub op_eax_imm,
    567 	sljit_si dst, sljit_sw dstw,
    568 	sljit_si src1, sljit_sw src1w,
    569 	sljit_si src2, sljit_sw src2w);
    570 
    571 static sljit_si emit_non_cum_binary(struct sljit_compiler *compiler,
    572 	sljit_ub op_rm, sljit_ub op_mr, sljit_ub op_imm, sljit_ub op_eax_imm,
    573 	sljit_si dst, sljit_sw dstw,
    574 	sljit_si src1, sljit_sw src1w,
    575 	sljit_si src2, sljit_sw src2w);
    576 
    577 static sljit_si emit_mov(struct sljit_compiler *compiler,
    578 	sljit_si dst, sljit_sw dstw,
    579 	sljit_si src, sljit_sw srcw);
    580 
    581 static SLJIT_INLINE sljit_si emit_save_flags(struct sljit_compiler *compiler)
    582 {
    583 	sljit_ub *inst;
    584 
    585 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
    586 	inst = (sljit_ub*)ensure_buf(compiler, 1 + 5);
    587 	FAIL_IF(!inst);
    588 	INC_SIZE(5);
    589 #else
    590 	inst = (sljit_ub*)ensure_buf(compiler, 1 + 6);
    591 	FAIL_IF(!inst);
    592 	INC_SIZE(6);
    593 	*inst++ = REX_W;
    594 #endif
    595 	*inst++ = LEA_r_m; /* lea esp/rsp, [esp/rsp + sizeof(sljit_sw)] */
    596 	*inst++ = 0x64;
    597 	*inst++ = 0x24;
    598 	*inst++ = (sljit_ub)sizeof(sljit_sw);
    599 	*inst++ = PUSHF;
    600 	compiler->flags_saved = 1;
    601 	return SLJIT_SUCCESS;
    602 }
    603 
    604 static SLJIT_INLINE sljit_si emit_restore_flags(struct sljit_compiler *compiler, sljit_si keep_flags)
    605 {
    606 	sljit_ub *inst;
    607 
    608 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
    609 	inst = (sljit_ub*)ensure_buf(compiler, 1 + 5);
    610 	FAIL_IF(!inst);
    611 	INC_SIZE(5);
    612 	*inst++ = POPF;
    613 #else
    614 	inst = (sljit_ub*)ensure_buf(compiler, 1 + 6);
    615 	FAIL_IF(!inst);
    616 	INC_SIZE(6);
    617 	*inst++ = POPF;
    618 	*inst++ = REX_W;
    619 #endif
    620 	*inst++ = LEA_r_m; /* lea esp/rsp, [esp/rsp - sizeof(sljit_sw)] */
    621 	*inst++ = 0x64;
    622 	*inst++ = 0x24;
    623 	*inst++ = (sljit_ub)-(sljit_sb)sizeof(sljit_sw);
    624 	compiler->flags_saved = keep_flags;
    625 	return SLJIT_SUCCESS;
    626 }
    627 
    628 #ifdef _WIN32
    629 #include <malloc.h>
    630 
    631 static void SLJIT_CALL sljit_grow_stack(sljit_sw local_size)
    632 {
    633 	/* Workaround for calling the internal _chkstk() function on Windows.
    634 	This function touches all 4k pages belongs to the requested stack space,
    635 	which size is passed in local_size. This is necessary on Windows where
    636 	the stack can only grow in 4k steps. However, this function just burn
    637 	CPU cycles if the stack is large enough. However, you don't know it in
    638 	advance, so it must always be called. I think this is a bad design in
    639 	general even if it has some reasons. */
    640 	*(volatile sljit_si*)alloca(local_size) = 0;
    641 }
    642 
    643 #endif
    644 
    645 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
    646 #include "sljitNativeX86_32.c"
    647 #else
    648 #include "sljitNativeX86_64.c"
    649 #endif
    650 
    651 static sljit_si emit_mov(struct sljit_compiler *compiler,
    652 	sljit_si dst, sljit_sw dstw,
    653 	sljit_si src, sljit_sw srcw)
    654 {
    655 	sljit_ub* inst;
    656 
    657 	if (dst == SLJIT_UNUSED) {
    658 		/* No destination, doesn't need to setup flags. */
    659 		if (src & SLJIT_MEM) {
    660 			inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src, srcw);
    661 			FAIL_IF(!inst);
    662 			*inst = MOV_r_rm;
    663 		}
    664 		return SLJIT_SUCCESS;
    665 	}
    666 	if (FAST_IS_REG(src)) {
    667 		inst = emit_x86_instruction(compiler, 1, src, 0, dst, dstw);
    668 		FAIL_IF(!inst);
    669 		*inst = MOV_rm_r;
    670 		return SLJIT_SUCCESS;
    671 	}
    672 	if (src & SLJIT_IMM) {
    673 		if (FAST_IS_REG(dst)) {
    674 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
    675 			return emit_do_imm(compiler, MOV_r_i32 + reg_map[dst], srcw);
    676 #else
    677 			if (!compiler->mode32) {
    678 				if (NOT_HALFWORD(srcw))
    679 					return emit_load_imm64(compiler, dst, srcw);
    680 			}
    681 			else
    682 				return emit_do_imm32(compiler, (reg_map[dst] >= 8) ? REX_B : 0, MOV_r_i32 + reg_lmap[dst], srcw);
    683 #endif
    684 		}
    685 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
    686 		if (!compiler->mode32 && NOT_HALFWORD(srcw)) {
    687 			FAIL_IF(emit_load_imm64(compiler, TMP_REG2, srcw));
    688 			inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, dst, dstw);
    689 			FAIL_IF(!inst);
    690 			*inst = MOV_rm_r;
    691 			return SLJIT_SUCCESS;
    692 		}
    693 #endif
    694 		inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, srcw, dst, dstw);
    695 		FAIL_IF(!inst);
    696 		*inst = MOV_rm_i32;
    697 		return SLJIT_SUCCESS;
    698 	}
    699 	if (FAST_IS_REG(dst)) {
    700 		inst = emit_x86_instruction(compiler, 1, dst, 0, src, srcw);
    701 		FAIL_IF(!inst);
    702 		*inst = MOV_r_rm;
    703 		return SLJIT_SUCCESS;
    704 	}
    705 
    706 	/* Memory to memory move. Requires two instruction. */
    707 	inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src, srcw);
    708 	FAIL_IF(!inst);
    709 	*inst = MOV_r_rm;
    710 	inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw);
    711 	FAIL_IF(!inst);
    712 	*inst = MOV_rm_r;
    713 	return SLJIT_SUCCESS;
    714 }
    715 
    716 #define EMIT_MOV(compiler, dst, dstw, src, srcw) \
    717 	FAIL_IF(emit_mov(compiler, dst, dstw, src, srcw));
    718 
    719 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op0(struct sljit_compiler *compiler, sljit_si op)
    720 {
    721 	sljit_ub *inst;
    722 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
    723 	sljit_si size;
    724 #endif
    725 
    726 	CHECK_ERROR();
    727 	check_sljit_emit_op0(compiler, op);
    728 
    729 	switch (GET_OPCODE(op)) {
    730 	case SLJIT_BREAKPOINT:
    731 		inst = (sljit_ub*)ensure_buf(compiler, 1 + 1);
    732 		FAIL_IF(!inst);
    733 		INC_SIZE(1);
    734 		*inst = INT3;
    735 		break;
    736 	case SLJIT_NOP:
    737 		inst = (sljit_ub*)ensure_buf(compiler, 1 + 1);
    738 		FAIL_IF(!inst);
    739 		INC_SIZE(1);
    740 		*inst = NOP;
    741 		break;
    742 	case SLJIT_UMUL:
    743 	case SLJIT_SMUL:
    744 	case SLJIT_UDIV:
    745 	case SLJIT_SDIV:
    746 		compiler->flags_saved = 0;
    747 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
    748 #ifdef _WIN64
    749 		SLJIT_COMPILE_ASSERT(
    750 			reg_map[SLJIT_R0] == 0
    751 			&& reg_map[SLJIT_R1] == 2
    752 			&& reg_map[TMP_REG1] > 7,
    753 			invalid_register_assignment_for_div_mul);
    754 #else
    755 		SLJIT_COMPILE_ASSERT(
    756 			reg_map[SLJIT_R0] == 0
    757 			&& reg_map[SLJIT_R1] < 7
    758 			&& reg_map[TMP_REG1] == 2,
    759 			invalid_register_assignment_for_div_mul);
    760 #endif
    761 		compiler->mode32 = op & SLJIT_INT_OP;
    762 #endif
    763 
    764 		op = GET_OPCODE(op);
    765 		if (op == SLJIT_UDIV) {
    766 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || defined(_WIN64)
    767 			EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_R1, 0);
    768 			inst = emit_x86_instruction(compiler, 1, SLJIT_R1, 0, SLJIT_R1, 0);
    769 #else
    770 			inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, TMP_REG1, 0);
    771 #endif
    772 			FAIL_IF(!inst);
    773 			*inst = XOR_r_rm;
    774 		}
    775 
    776 		if (op == SLJIT_SDIV) {
    777 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || defined(_WIN64)
    778 			EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_R1, 0);
    779 #endif
    780 
    781 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
    782 			inst = (sljit_ub*)ensure_buf(compiler, 1 + 1);
    783 			FAIL_IF(!inst);
    784 			INC_SIZE(1);
    785 			*inst = CDQ;
    786 #else
    787 			if (compiler->mode32) {
    788 				inst = (sljit_ub*)ensure_buf(compiler, 1 + 1);
    789 				FAIL_IF(!inst);
    790 				INC_SIZE(1);
    791 				*inst = CDQ;
    792 			} else {
    793 				inst = (sljit_ub*)ensure_buf(compiler, 1 + 2);
    794 				FAIL_IF(!inst);
    795 				INC_SIZE(2);
    796 				*inst++ = REX_W;
    797 				*inst = CDQ;
    798 			}
    799 #endif
    800 		}
    801 
    802 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
    803 		inst = (sljit_ub*)ensure_buf(compiler, 1 + 2);
    804 		FAIL_IF(!inst);
    805 		INC_SIZE(2);
    806 		*inst++ = GROUP_F7;
    807 		*inst = MOD_REG | ((op >= SLJIT_UDIV) ? reg_map[TMP_REG1] : reg_map[SLJIT_R1]);
    808 #else
    809 #ifdef _WIN64
    810 		size = (!compiler->mode32 || op >= SLJIT_UDIV) ? 3 : 2;
    811 #else
    812 		size = (!compiler->mode32) ? 3 : 2;
    813 #endif
    814 		inst = (sljit_ub*)ensure_buf(compiler, 1 + size);
    815 		FAIL_IF(!inst);
    816 		INC_SIZE(size);
    817 #ifdef _WIN64
    818 		if (!compiler->mode32)
    819 			*inst++ = REX_W | ((op >= SLJIT_UDIV) ? REX_B : 0);
    820 		else if (op >= SLJIT_UDIV)
    821 			*inst++ = REX_B;
    822 		*inst++ = GROUP_F7;
    823 		*inst = MOD_REG | ((op >= SLJIT_UDIV) ? reg_lmap[TMP_REG1] : reg_lmap[SLJIT_R1]);
    824 #else
    825 		if (!compiler->mode32)
    826 			*inst++ = REX_W;
    827 		*inst++ = GROUP_F7;
    828 		*inst = MOD_REG | reg_map[SLJIT_R1];
    829 #endif
    830 #endif
    831 		switch (op) {
    832 		case SLJIT_UMUL:
    833 			*inst |= MUL;
    834 			break;
    835 		case SLJIT_SMUL:
    836 			*inst |= IMUL;
    837 			break;
    838 		case SLJIT_UDIV:
    839 			*inst |= DIV;
    840 			break;
    841 		case SLJIT_SDIV:
    842 			*inst |= IDIV;
    843 			break;
    844 		}
    845 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) && !defined(_WIN64)
    846 		EMIT_MOV(compiler, SLJIT_R1, 0, TMP_REG1, 0);
    847 #endif
    848 		break;
    849 	}
    850 
    851 	return SLJIT_SUCCESS;
    852 }
    853 
    854 #define ENCODE_PREFIX(prefix) \
    855 	do { \
    856 		inst = (sljit_ub*)ensure_buf(compiler, 1 + 1); \
    857 		FAIL_IF(!inst); \
    858 		INC_SIZE(1); \
    859 		*inst = (prefix); \
    860 	} while (0)
    861 
    862 static sljit_si emit_mov_byte(struct sljit_compiler *compiler, sljit_si sign,
    863 	sljit_si dst, sljit_sw dstw,
    864 	sljit_si src, sljit_sw srcw)
    865 {
    866 	sljit_ub* inst;
    867 	sljit_si dst_r;
    868 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
    869 	sljit_si work_r;
    870 #endif
    871 
    872 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
    873 	compiler->mode32 = 0;
    874 #endif
    875 
    876 	if (dst == SLJIT_UNUSED && !(src & SLJIT_MEM))
    877 		return SLJIT_SUCCESS; /* Empty instruction. */
    878 
    879 	if (src & SLJIT_IMM) {
    880 		if (FAST_IS_REG(dst)) {
    881 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
    882 			return emit_do_imm(compiler, MOV_r_i32 + reg_map[dst], srcw);
    883 #else
    884 			inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, srcw, dst, 0);
    885 			FAIL_IF(!inst);
    886 			*inst = MOV_rm_i32;
    887 			return SLJIT_SUCCESS;
    888 #endif
    889 		}
    890 		inst = emit_x86_instruction(compiler, 1 | EX86_BYTE_ARG | EX86_NO_REXW, SLJIT_IMM, srcw, dst, dstw);
    891 		FAIL_IF(!inst);
    892 		*inst = MOV_rm8_i8;
    893 		return SLJIT_SUCCESS;
    894 	}
    895 
    896 	dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
    897 
    898 	if ((dst & SLJIT_MEM) && FAST_IS_REG(src)) {
    899 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
    900 		if (reg_map[src] >= 4) {
    901 			SLJIT_ASSERT(dst_r == TMP_REG1);
    902 			EMIT_MOV(compiler, TMP_REG1, 0, src, 0);
    903 		} else
    904 			dst_r = src;
    905 #else
    906 		dst_r = src;
    907 #endif
    908 	}
    909 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
    910 	else if (FAST_IS_REG(src) && reg_map[src] >= 4) {
    911 		/* src, dst are registers. */
    912 		SLJIT_ASSERT(SLOW_IS_REG(dst));
    913 		if (reg_map[dst] < 4) {
    914 			if (dst != src)
    915 				EMIT_MOV(compiler, dst, 0, src, 0);
    916 			inst = emit_x86_instruction(compiler, 2, dst, 0, dst, 0);
    917 			FAIL_IF(!inst);
    918 			*inst++ = GROUP_0F;
    919 			*inst = sign ? MOVSX_r_rm8 : MOVZX_r_rm8;
    920 		}
    921 		else {
    922 			if (dst != src)
    923 				EMIT_MOV(compiler, dst, 0, src, 0);
    924 			if (sign) {
    925 				/* shl reg, 24 */
    926 				inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 24, dst, 0);
    927 				FAIL_IF(!inst);
    928 				*inst |= SHL;
    929 				/* sar reg, 24 */
    930 				inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 24, dst, 0);
    931 				FAIL_IF(!inst);
    932 				*inst |= SAR;
    933 			}
    934 			else {
    935 				inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, 0xff, dst, 0);
    936 				FAIL_IF(!inst);
    937 				*(inst + 1) |= AND;
    938 			}
    939 		}
    940 		return SLJIT_SUCCESS;
    941 	}
    942 #endif
    943 	else {
    944 		/* src can be memory addr or reg_map[src] < 4 on x86_32 architectures. */
    945 		inst = emit_x86_instruction(compiler, 2, dst_r, 0, src, srcw);
    946 		FAIL_IF(!inst);
    947 		*inst++ = GROUP_0F;
    948 		*inst = sign ? MOVSX_r_rm8 : MOVZX_r_rm8;
    949 	}
    950 
    951 	if (dst & SLJIT_MEM) {
    952 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
    953 		if (dst_r == TMP_REG1) {
    954 			/* Find a non-used register, whose reg_map[src] < 4. */
    955 			if ((dst & REG_MASK) == SLJIT_R0) {
    956 				if ((dst & OFFS_REG_MASK) == TO_OFFS_REG(SLJIT_R1))
    957 					work_r = SLJIT_R2;
    958 				else
    959 					work_r = SLJIT_R1;
    960 			}
    961 			else {
    962 				if ((dst & OFFS_REG_MASK) != TO_OFFS_REG(SLJIT_R0))
    963 					work_r = SLJIT_R0;
    964 				else if ((dst & REG_MASK) == SLJIT_R1)
    965 					work_r = SLJIT_R2;
    966 				else
    967 					work_r = SLJIT_R1;
    968 			}
    969 
    970 			if (work_r == SLJIT_R0) {
    971 				ENCODE_PREFIX(XCHG_EAX_r + reg_map[TMP_REG1]);
    972 			}
    973 			else {
    974 				inst = emit_x86_instruction(compiler, 1, work_r, 0, dst_r, 0);
    975 				FAIL_IF(!inst);
    976 				*inst = XCHG_r_rm;
    977 			}
    978 
    979 			inst = emit_x86_instruction(compiler, 1, work_r, 0, dst, dstw);
    980 			FAIL_IF(!inst);
    981 			*inst = MOV_rm8_r8;
    982 
    983 			if (work_r == SLJIT_R0) {
    984 				ENCODE_PREFIX(XCHG_EAX_r + reg_map[TMP_REG1]);
    985 			}
    986 			else {
    987 				inst = emit_x86_instruction(compiler, 1, work_r, 0, dst_r, 0);
    988 				FAIL_IF(!inst);
    989 				*inst = XCHG_r_rm;
    990 			}
    991 		}
    992 		else {
    993 			inst = emit_x86_instruction(compiler, 1, dst_r, 0, dst, dstw);
    994 			FAIL_IF(!inst);
    995 			*inst = MOV_rm8_r8;
    996 		}
    997 #else
    998 		inst = emit_x86_instruction(compiler, 1 | EX86_REX | EX86_NO_REXW, dst_r, 0, dst, dstw);
    999 		FAIL_IF(!inst);
   1000 		*inst = MOV_rm8_r8;
   1001 #endif
   1002 	}
   1003 
   1004 	return SLJIT_SUCCESS;
   1005 }
   1006 
   1007 static sljit_si emit_mov_half(struct sljit_compiler *compiler, sljit_si sign,
   1008 	sljit_si dst, sljit_sw dstw,
   1009 	sljit_si src, sljit_sw srcw)
   1010 {
   1011 	sljit_ub* inst;
   1012 	sljit_si dst_r;
   1013 
   1014 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
   1015 	compiler->mode32 = 0;
   1016 #endif
   1017 
   1018 	if (dst == SLJIT_UNUSED && !(src & SLJIT_MEM))
   1019 		return SLJIT_SUCCESS; /* Empty instruction. */
   1020 
   1021 	if (src & SLJIT_IMM) {
   1022 		if (FAST_IS_REG(dst)) {
   1023 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
   1024 			return emit_do_imm(compiler, MOV_r_i32 + reg_map[dst], srcw);
   1025 #else
   1026 			inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, srcw, dst, 0);
   1027 			FAIL_IF(!inst);
   1028 			*inst = MOV_rm_i32;
   1029 			return SLJIT_SUCCESS;
   1030 #endif
   1031 		}
   1032 		inst = emit_x86_instruction(compiler, 1 | EX86_HALF_ARG | EX86_NO_REXW | EX86_PREF_66, SLJIT_IMM, srcw, dst, dstw);
   1033 		FAIL_IF(!inst);
   1034 		*inst = MOV_rm_i32;
   1035 		return SLJIT_SUCCESS;
   1036 	}
   1037 
   1038 	dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
   1039 
   1040 	if ((dst & SLJIT_MEM) && FAST_IS_REG(src))
   1041 		dst_r = src;
   1042 	else {
   1043 		inst = emit_x86_instruction(compiler, 2, dst_r, 0, src, srcw);
   1044 		FAIL_IF(!inst);
   1045 		*inst++ = GROUP_0F;
   1046 		*inst = sign ? MOVSX_r_rm16 : MOVZX_r_rm16;
   1047 	}
   1048 
   1049 	if (dst & SLJIT_MEM) {
   1050 		inst = emit_x86_instruction(compiler, 1 | EX86_NO_REXW | EX86_PREF_66, dst_r, 0, dst, dstw);
   1051 		FAIL_IF(!inst);
   1052 		*inst = MOV_rm_r;
   1053 	}
   1054 
   1055 	return SLJIT_SUCCESS;
   1056 }
   1057 
   1058 static sljit_si emit_unary(struct sljit_compiler *compiler, sljit_ub opcode,
   1059 	sljit_si dst, sljit_sw dstw,
   1060 	sljit_si src, sljit_sw srcw)
   1061 {
   1062 	sljit_ub* inst;
   1063 
   1064 	if (dst == SLJIT_UNUSED) {
   1065 		EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);
   1066 		inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REG1, 0);
   1067 		FAIL_IF(!inst);
   1068 		*inst++ = GROUP_F7;
   1069 		*inst |= opcode;
   1070 		return SLJIT_SUCCESS;
   1071 	}
   1072 	if (dst == src && dstw == srcw) {
   1073 		/* Same input and output */
   1074 		inst = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw);
   1075 		FAIL_IF(!inst);
   1076 		*inst++ = GROUP_F7;
   1077 		*inst |= opcode;
   1078 		return SLJIT_SUCCESS;
   1079 	}
   1080 	if (FAST_IS_REG(dst)) {
   1081 		EMIT_MOV(compiler, dst, 0, src, srcw);
   1082 		inst = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw);
   1083 		FAIL_IF(!inst);
   1084 		*inst++ = GROUP_F7;
   1085 		*inst |= opcode;
   1086 		return SLJIT_SUCCESS;
   1087 	}
   1088 	EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);
   1089 	inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REG1, 0);
   1090 	FAIL_IF(!inst);
   1091 	*inst++ = GROUP_F7;
   1092 	*inst |= opcode;
   1093 	EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
   1094 	return SLJIT_SUCCESS;
   1095 }
   1096 
   1097 static sljit_si emit_not_with_flags(struct sljit_compiler *compiler,
   1098 	sljit_si dst, sljit_sw dstw,
   1099 	sljit_si src, sljit_sw srcw)
   1100 {
   1101 	sljit_ub* inst;
   1102 
   1103 	if (dst == SLJIT_UNUSED) {
   1104 		EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);
   1105 		inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REG1, 0);
   1106 		FAIL_IF(!inst);
   1107 		*inst++ = GROUP_F7;
   1108 		*inst |= NOT_rm;
   1109 		inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, TMP_REG1, 0);
   1110 		FAIL_IF(!inst);
   1111 		*inst = OR_r_rm;
   1112 		return SLJIT_SUCCESS;
   1113 	}
   1114 	if (FAST_IS_REG(dst)) {
   1115 		EMIT_MOV(compiler, dst, 0, src, srcw);
   1116 		inst = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw);
   1117 		FAIL_IF(!inst);
   1118 		*inst++ = GROUP_F7;
   1119 		*inst |= NOT_rm;
   1120 		inst = emit_x86_instruction(compiler, 1, dst, 0, dst, 0);
   1121 		FAIL_IF(!inst);
   1122 		*inst = OR_r_rm;
   1123 		return SLJIT_SUCCESS;
   1124 	}
   1125 	EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);
   1126 	inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REG1, 0);
   1127 	FAIL_IF(!inst);
   1128 	*inst++ = GROUP_F7;
   1129 	*inst |= NOT_rm;
   1130 	inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, TMP_REG1, 0);
   1131 	FAIL_IF(!inst);
   1132 	*inst = OR_r_rm;
   1133 	EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
   1134 	return SLJIT_SUCCESS;
   1135 }
   1136 
   1137 static sljit_si emit_clz(struct sljit_compiler *compiler, sljit_si op_flags,
   1138 	sljit_si dst, sljit_sw dstw,
   1139 	sljit_si src, sljit_sw srcw)
   1140 {
   1141 	sljit_ub* inst;
   1142 	sljit_si dst_r;
   1143 
   1144 	SLJIT_UNUSED_ARG(op_flags);
   1145 	if (SLJIT_UNLIKELY(dst == SLJIT_UNUSED)) {
   1146 		/* Just set the zero flag. */
   1147 		EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);
   1148 		inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REG1, 0);
   1149 		FAIL_IF(!inst);
   1150 		*inst++ = GROUP_F7;
   1151 		*inst |= NOT_rm;
   1152 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
   1153 		inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 31, TMP_REG1, 0);
   1154 #else
   1155 		inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, !(op_flags & SLJIT_INT_OP) ? 63 : 31, TMP_REG1, 0);
   1156 #endif
   1157 		FAIL_IF(!inst);
   1158 		*inst |= SHR;
   1159 		return SLJIT_SUCCESS;
   1160 	}
   1161 
   1162 	if (SLJIT_UNLIKELY(src & SLJIT_IMM)) {
   1163 		EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, srcw);
   1164 		src = TMP_REG1;
   1165 		srcw = 0;
   1166 	}
   1167 
   1168 	inst = emit_x86_instruction(compiler, 2, TMP_REG1, 0, src, srcw);
   1169 	FAIL_IF(!inst);
   1170 	*inst++ = GROUP_0F;
   1171 	*inst = BSR_r_rm;
   1172 
   1173 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
   1174 	if (FAST_IS_REG(dst))
   1175 		dst_r = dst;
   1176 	else {
   1177 		/* Find an unused temporary register. */
   1178 		if ((dst & REG_MASK) != SLJIT_R0 && (dst & OFFS_REG_MASK) != TO_OFFS_REG(SLJIT_R0))
   1179 			dst_r = SLJIT_R0;
   1180 		else if ((dst & REG_MASK) != SLJIT_R1 && (dst & OFFS_REG_MASK) != TO_OFFS_REG(SLJIT_R1))
   1181 			dst_r = SLJIT_R1;
   1182 		else
   1183 			dst_r = SLJIT_R2;
   1184 		EMIT_MOV(compiler, dst, dstw, dst_r, 0);
   1185 	}
   1186 	EMIT_MOV(compiler, dst_r, 0, SLJIT_IMM, 32 + 31);
   1187 #else
   1188 	dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2;
   1189 	compiler->mode32 = 0;
   1190 	EMIT_MOV(compiler, dst_r, 0, SLJIT_IMM, !(op_flags & SLJIT_INT_OP) ? 64 + 63 : 32 + 31);
   1191 	compiler->mode32 = op_flags & SLJIT_INT_OP;
   1192 #endif
   1193 
   1194 	if (cpu_has_cmov == -1)
   1195 		get_cpu_features();
   1196 
   1197 	if (cpu_has_cmov) {
   1198 		inst = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG1, 0);
   1199 		FAIL_IF(!inst);
   1200 		*inst++ = GROUP_0F;
   1201 		*inst = CMOVNE_r_rm;
   1202 	} else {
   1203 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
   1204 		inst = (sljit_ub*)ensure_buf(compiler, 1 + 4);
   1205 		FAIL_IF(!inst);
   1206 		INC_SIZE(4);
   1207 
   1208 		*inst++ = JE_i8;
   1209 		*inst++ = 2;
   1210 		*inst++ = MOV_r_rm;
   1211 		*inst++ = MOD_REG | (reg_map[dst_r] << 3) | reg_map[TMP_REG1];
   1212 #else
   1213 		inst = (sljit_ub*)ensure_buf(compiler, 1 + 5);
   1214 		FAIL_IF(!inst);
   1215 		INC_SIZE(5);
   1216 
   1217 		*inst++ = JE_i8;
   1218 		*inst++ = 3;
   1219 		*inst++ = REX_W | (reg_map[dst_r] >= 8 ? REX_R : 0) | (reg_map[TMP_REG1] >= 8 ? REX_B : 0);
   1220 		*inst++ = MOV_r_rm;
   1221 		*inst++ = MOD_REG | (reg_lmap[dst_r] << 3) | reg_lmap[TMP_REG1];
   1222 #endif
   1223 	}
   1224 
   1225 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
   1226 	inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, 31, dst_r, 0);
   1227 #else
   1228 	inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, !(op_flags & SLJIT_INT_OP) ? 63 : 31, dst_r, 0);
   1229 #endif
   1230 	FAIL_IF(!inst);
   1231 	*(inst + 1) |= XOR;
   1232 
   1233 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
   1234 	if (dst & SLJIT_MEM) {
   1235 		inst = emit_x86_instruction(compiler, 1, dst_r, 0, dst, dstw);
   1236 		FAIL_IF(!inst);
   1237 		*inst = XCHG_r_rm;
   1238 	}
   1239 #else
   1240 	if (dst & SLJIT_MEM)
   1241 		EMIT_MOV(compiler, dst, dstw, TMP_REG2, 0);
   1242 #endif
   1243 	return SLJIT_SUCCESS;
   1244 }
   1245 
   1246 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op1(struct sljit_compiler *compiler, sljit_si op,
   1247 	sljit_si dst, sljit_sw dstw,
   1248 	sljit_si src, sljit_sw srcw)
   1249 {
   1250 	sljit_ub* inst;
   1251 	sljit_si update = 0;
   1252 	sljit_si op_flags = GET_ALL_FLAGS(op);
   1253 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
   1254 	sljit_si dst_is_ereg = 0;
   1255 	sljit_si src_is_ereg = 0;
   1256 #else
   1257 #	define src_is_ereg 0
   1258 #endif
   1259 
   1260 	CHECK_ERROR();
   1261 	check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw);
   1262 	ADJUST_LOCAL_OFFSET(dst, dstw);
   1263 	ADJUST_LOCAL_OFFSET(src, srcw);
   1264 
   1265 	CHECK_EXTRA_REGS(dst, dstw, dst_is_ereg = 1);
   1266 	CHECK_EXTRA_REGS(src, srcw, src_is_ereg = 1);
   1267 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
   1268 	compiler->mode32 = op_flags & SLJIT_INT_OP;
   1269 #endif
   1270 
   1271 	op = GET_OPCODE(op);
   1272 	if (op >= SLJIT_MOV && op <= SLJIT_MOVU_P) {
   1273 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
   1274 		compiler->mode32 = 0;
   1275 #endif
   1276 
   1277 		if (op_flags & SLJIT_INT_OP) {
   1278 			if (FAST_IS_REG(src) && src == dst) {
   1279 				if (!TYPE_CAST_NEEDED(op))
   1280 					return SLJIT_SUCCESS;
   1281 			}
   1282 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
   1283 			if (op == SLJIT_MOV_SI && (src & SLJIT_MEM))
   1284 				op = SLJIT_MOV_UI;
   1285 			if (op == SLJIT_MOVU_SI && (src & SLJIT_MEM))
   1286 				op = SLJIT_MOVU_UI;
   1287 			if (op == SLJIT_MOV_UI && (src & SLJIT_IMM))
   1288 				op = SLJIT_MOV_SI;
   1289 			if (op == SLJIT_MOVU_UI && (src & SLJIT_IMM))
   1290 				op = SLJIT_MOVU_SI;
   1291 #endif
   1292 		}
   1293 
   1294 		SLJIT_COMPILE_ASSERT(SLJIT_MOV + 8 == SLJIT_MOVU, movu_offset);
   1295 		if (op >= SLJIT_MOVU) {
   1296 			update = 1;
   1297 			op -= 8;
   1298 		}
   1299 
   1300 		if (src & SLJIT_IMM) {
   1301 			switch (op) {
   1302 			case SLJIT_MOV_UB:
   1303 				srcw = (sljit_ub)srcw;
   1304 				break;
   1305 			case SLJIT_MOV_SB:
   1306 				srcw = (sljit_sb)srcw;
   1307 				break;
   1308 			case SLJIT_MOV_UH:
   1309 				srcw = (sljit_uh)srcw;
   1310 				break;
   1311 			case SLJIT_MOV_SH:
   1312 				srcw = (sljit_sh)srcw;
   1313 				break;
   1314 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
   1315 			case SLJIT_MOV_UI:
   1316 				srcw = (sljit_ui)srcw;
   1317 				break;
   1318 			case SLJIT_MOV_SI:
   1319 				srcw = (sljit_si)srcw;
   1320 				break;
   1321 #endif
   1322 			}
   1323 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
   1324 			if (SLJIT_UNLIKELY(dst_is_ereg))
   1325 				return emit_mov(compiler, dst, dstw, src, srcw);
   1326 #endif
   1327 		}
   1328 
   1329 		if (SLJIT_UNLIKELY(update) && (src & SLJIT_MEM) && !src_is_ereg && (src & REG_MASK) && (srcw != 0 || (src & OFFS_REG_MASK) != 0)) {
   1330 			inst = emit_x86_instruction(compiler, 1, src & REG_MASK, 0, src, srcw);
   1331 			FAIL_IF(!inst);
   1332 			*inst = LEA_r_m;
   1333 			src &= SLJIT_MEM | 0xf;
   1334 			srcw = 0;
   1335 		}
   1336 
   1337 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
   1338 		if (SLJIT_UNLIKELY(dst_is_ereg) && (!(op == SLJIT_MOV || op == SLJIT_MOV_UI || op == SLJIT_MOV_SI || op == SLJIT_MOV_P) || (src & SLJIT_MEM))) {
   1339 			SLJIT_ASSERT(dst == SLJIT_MEM1(SLJIT_SP));
   1340 			dst = TMP_REG1;
   1341 		}
   1342 #endif
   1343 
   1344 		switch (op) {
   1345 		case SLJIT_MOV:
   1346 		case SLJIT_MOV_P:
   1347 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
   1348 		case SLJIT_MOV_UI:
   1349 		case SLJIT_MOV_SI:
   1350 #endif
   1351 			FAIL_IF(emit_mov(compiler, dst, dstw, src, srcw));
   1352 			break;
   1353 		case SLJIT_MOV_UB:
   1354 			FAIL_IF(emit_mov_byte(compiler, 0, dst, dstw, src, srcw));
   1355 			break;
   1356 		case SLJIT_MOV_SB:
   1357 			FAIL_IF(emit_mov_byte(compiler, 1, dst, dstw, src, srcw));
   1358 			break;
   1359 		case SLJIT_MOV_UH:
   1360 			FAIL_IF(emit_mov_half(compiler, 0, dst, dstw, src, srcw));
   1361 			break;
   1362 		case SLJIT_MOV_SH:
   1363 			FAIL_IF(emit_mov_half(compiler, 1, dst, dstw, src, srcw));
   1364 			break;
   1365 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
   1366 		case SLJIT_MOV_UI:
   1367 			FAIL_IF(emit_mov_int(compiler, 0, dst, dstw, src, srcw));
   1368 			break;
   1369 		case SLJIT_MOV_SI:
   1370 			FAIL_IF(emit_mov_int(compiler, 1, dst, dstw, src, srcw));
   1371 			break;
   1372 #endif
   1373 		}
   1374 
   1375 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
   1376 		if (SLJIT_UNLIKELY(dst_is_ereg) && dst == TMP_REG1)
   1377 			return emit_mov(compiler, SLJIT_MEM1(SLJIT_SP), dstw, TMP_REG1, 0);
   1378 #endif
   1379 
   1380 		if (SLJIT_UNLIKELY(update) && (dst & SLJIT_MEM) && (dst & REG_MASK) && (dstw != 0 || (dst & OFFS_REG_MASK) != 0)) {
   1381 			inst = emit_x86_instruction(compiler, 1, dst & REG_MASK, 0, dst, dstw);
   1382 			FAIL_IF(!inst);
   1383 			*inst = LEA_r_m;
   1384 		}
   1385 		return SLJIT_SUCCESS;
   1386 	}
   1387 
   1388 	if (SLJIT_UNLIKELY(GET_FLAGS(op_flags)))
   1389 		compiler->flags_saved = 0;
   1390 
   1391 	switch (op) {
   1392 	case SLJIT_NOT:
   1393 		if (SLJIT_UNLIKELY(op_flags & SLJIT_SET_E))
   1394 			return emit_not_with_flags(compiler, dst, dstw, src, srcw);
   1395 		return emit_unary(compiler, NOT_rm, dst, dstw, src, srcw);
   1396 
   1397 	case SLJIT_NEG:
   1398 		if (SLJIT_UNLIKELY(op_flags & SLJIT_KEEP_FLAGS) && !compiler->flags_saved)
   1399 			FAIL_IF(emit_save_flags(compiler));
   1400 		return emit_unary(compiler, NEG_rm, dst, dstw, src, srcw);
   1401 
   1402 	case SLJIT_CLZ:
   1403 		if (SLJIT_UNLIKELY(op_flags & SLJIT_KEEP_FLAGS) && !compiler->flags_saved)
   1404 			FAIL_IF(emit_save_flags(compiler));
   1405 		return emit_clz(compiler, op_flags, dst, dstw, src, srcw);
   1406 	}
   1407 
   1408 	return SLJIT_SUCCESS;
   1409 
   1410 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
   1411 #	undef src_is_ereg
   1412 #endif
   1413 }
   1414 
   1415 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
   1416 
   1417 #define BINARY_IMM(op_imm, op_mr, immw, arg, argw) \
   1418 	if (IS_HALFWORD(immw) || compiler->mode32) { \
   1419 		inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, immw, arg, argw); \
   1420 		FAIL_IF(!inst); \
   1421 		*(inst + 1) |= (op_imm); \
   1422 	} \
   1423 	else { \
   1424 		FAIL_IF(emit_load_imm64(compiler, TMP_REG2, immw)); \
   1425 		inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, arg, argw); \
   1426 		FAIL_IF(!inst); \
   1427 		*inst = (op_mr); \
   1428 	}
   1429 
   1430 #define BINARY_EAX_IMM(op_eax_imm, immw) \
   1431 	FAIL_IF(emit_do_imm32(compiler, (!compiler->mode32) ? REX_W : 0, (op_eax_imm), immw))
   1432 
   1433 #else
   1434 
   1435 #define BINARY_IMM(op_imm, op_mr, immw, arg, argw) \
   1436 	inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, immw, arg, argw); \
   1437 	FAIL_IF(!inst); \
   1438 	*(inst + 1) |= (op_imm);
   1439 
   1440 #define BINARY_EAX_IMM(op_eax_imm, immw) \
   1441 	FAIL_IF(emit_do_imm(compiler, (op_eax_imm), immw))
   1442 
   1443 #endif
   1444 
   1445 static sljit_si emit_cum_binary(struct sljit_compiler *compiler,
   1446 	sljit_ub op_rm, sljit_ub op_mr, sljit_ub op_imm, sljit_ub op_eax_imm,
   1447 	sljit_si dst, sljit_sw dstw,
   1448 	sljit_si src1, sljit_sw src1w,
   1449 	sljit_si src2, sljit_sw src2w)
   1450 {
   1451 	sljit_ub* inst;
   1452 
   1453 	if (dst == SLJIT_UNUSED) {
   1454 		EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
   1455 		if (src2 & SLJIT_IMM) {
   1456 			BINARY_IMM(op_imm, op_mr, src2w, TMP_REG1, 0);
   1457 		}
   1458 		else {
   1459 			inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
   1460 			FAIL_IF(!inst);
   1461 			*inst = op_rm;
   1462 		}
   1463 		return SLJIT_SUCCESS;
   1464 	}
   1465 
   1466 	if (dst == src1 && dstw == src1w) {
   1467 		if (src2 & SLJIT_IMM) {
   1468 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
   1469 			if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
   1470 #else
   1471 			if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128)) {
   1472 #endif
   1473 				BINARY_EAX_IMM(op_eax_imm, src2w);
   1474 			}
   1475 			else {
   1476 				BINARY_IMM(op_imm, op_mr, src2w, dst, dstw);
   1477 			}
   1478 		}
   1479 		else if (FAST_IS_REG(dst)) {
   1480 			inst = emit_x86_instruction(compiler, 1, dst, dstw, src2, src2w);
   1481 			FAIL_IF(!inst);
   1482 			*inst = op_rm;
   1483 		}
   1484 		else if (FAST_IS_REG(src2)) {
   1485 			/* Special exception for sljit_emit_op_flags. */
   1486 			inst = emit_x86_instruction(compiler, 1, src2, src2w, dst, dstw);
   1487 			FAIL_IF(!inst);
   1488 			*inst = op_mr;
   1489 		}
   1490 		else {
   1491 			EMIT_MOV(compiler, TMP_REG1, 0, src2, src2w);
   1492 			inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw);
   1493 			FAIL_IF(!inst);
   1494 			*inst = op_mr;
   1495 		}
   1496 		return SLJIT_SUCCESS;
   1497 	}
   1498 
   1499 	/* Only for cumulative operations. */
   1500 	if (dst == src2 && dstw == src2w) {
   1501 		if (src1 & SLJIT_IMM) {
   1502 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
   1503 			if ((dst == SLJIT_R0) && (src1w > 127 || src1w < -128) && (compiler->mode32 || IS_HALFWORD(src1w))) {
   1504 #else
   1505 			if ((dst == SLJIT_R0) && (src1w > 127 || src1w < -128)) {
   1506 #endif
   1507 				BINARY_EAX_IMM(op_eax_imm, src1w);
   1508 			}
   1509 			else {
   1510 				BINARY_IMM(op_imm, op_mr, src1w, dst, dstw);
   1511 			}
   1512 		}
   1513 		else if (FAST_IS_REG(dst)) {
   1514 			inst = emit_x86_instruction(compiler, 1, dst, dstw, src1, src1w);
   1515 			FAIL_IF(!inst);
   1516 			*inst = op_rm;
   1517 		}
   1518 		else if (FAST_IS_REG(src1)) {
   1519 			inst = emit_x86_instruction(compiler, 1, src1, src1w, dst, dstw);
   1520 			FAIL_IF(!inst);
   1521 			*inst = op_mr;
   1522 		}
   1523 		else {
   1524 			EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
   1525 			inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw);
   1526 			FAIL_IF(!inst);
   1527 			*inst = op_mr;
   1528 		}
   1529 		return SLJIT_SUCCESS;
   1530 	}
   1531 
   1532 	/* General version. */
   1533 	if (FAST_IS_REG(dst)) {
   1534 		EMIT_MOV(compiler, dst, 0, src1, src1w);
   1535 		if (src2 & SLJIT_IMM) {
   1536 			BINARY_IMM(op_imm, op_mr, src2w, dst, 0);
   1537 		}
   1538 		else {
   1539 			inst = emit_x86_instruction(compiler, 1, dst, 0, src2, src2w);
   1540 			FAIL_IF(!inst);
   1541 			*inst = op_rm;
   1542 		}
   1543 	}
   1544 	else {
   1545 		/* This version requires less memory writing. */
   1546 		EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
   1547 		if (src2 & SLJIT_IMM) {
   1548 			BINARY_IMM(op_imm, op_mr, src2w, TMP_REG1, 0);
   1549 		}
   1550 		else {
   1551 			inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
   1552 			FAIL_IF(!inst);
   1553 			*inst = op_rm;
   1554 		}
   1555 		EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
   1556 	}
   1557 
   1558 	return SLJIT_SUCCESS;
   1559 }
   1560 
   1561 static sljit_si emit_non_cum_binary(struct sljit_compiler *compiler,
   1562 	sljit_ub op_rm, sljit_ub op_mr, sljit_ub op_imm, sljit_ub op_eax_imm,
   1563 	sljit_si dst, sljit_sw dstw,
   1564 	sljit_si src1, sljit_sw src1w,
   1565 	sljit_si src2, sljit_sw src2w)
   1566 {
   1567 	sljit_ub* inst;
   1568 
   1569 	if (dst == SLJIT_UNUSED) {
   1570 		EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
   1571 		if (src2 & SLJIT_IMM) {
   1572 			BINARY_IMM(op_imm, op_mr, src2w, TMP_REG1, 0);
   1573 		}
   1574 		else {
   1575 			inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
   1576 			FAIL_IF(!inst);
   1577 			*inst = op_rm;
   1578 		}
   1579 		return SLJIT_SUCCESS;
   1580 	}
   1581 
   1582 	if (dst == src1 && dstw == src1w) {
   1583 		if (src2 & SLJIT_IMM) {
   1584 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
   1585 			if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
   1586 #else
   1587 			if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128)) {
   1588 #endif
   1589 				BINARY_EAX_IMM(op_eax_imm, src2w);
   1590 			}
   1591 			else {
   1592 				BINARY_IMM(op_imm, op_mr, src2w, dst, dstw);
   1593 			}
   1594 		}
   1595 		else if (FAST_IS_REG(dst)) {
   1596 			inst = emit_x86_instruction(compiler, 1, dst, dstw, src2, src2w);
   1597 			FAIL_IF(!inst);
   1598 			*inst = op_rm;
   1599 		}
   1600 		else if (FAST_IS_REG(src2)) {
   1601 			inst = emit_x86_instruction(compiler, 1, src2, src2w, dst, dstw);
   1602 			FAIL_IF(!inst);
   1603 			*inst = op_mr;
   1604 		}
   1605 		else {
   1606 			EMIT_MOV(compiler, TMP_REG1, 0, src2, src2w);
   1607 			inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw);
   1608 			FAIL_IF(!inst);
   1609 			*inst = op_mr;
   1610 		}
   1611 		return SLJIT_SUCCESS;
   1612 	}
   1613 
   1614 	/* General version. */
   1615 	if (FAST_IS_REG(dst) && dst != src2) {
   1616 		EMIT_MOV(compiler, dst, 0, src1, src1w);
   1617 		if (src2 & SLJIT_IMM) {
   1618 			BINARY_IMM(op_imm, op_mr, src2w, dst, 0);
   1619 		}
   1620 		else {
   1621 			inst = emit_x86_instruction(compiler, 1, dst, 0, src2, src2w);
   1622 			FAIL_IF(!inst);
   1623 			*inst = op_rm;
   1624 		}
   1625 	}
   1626 	else {
   1627 		/* This version requires less memory writing. */
   1628 		EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
   1629 		if (src2 & SLJIT_IMM) {
   1630 			BINARY_IMM(op_imm, op_mr, src2w, TMP_REG1, 0);
   1631 		}
   1632 		else {
   1633 			inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
   1634 			FAIL_IF(!inst);
   1635 			*inst = op_rm;
   1636 		}
   1637 		EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
   1638 	}
   1639 
   1640 	return SLJIT_SUCCESS;
   1641 }
   1642 
   1643 static sljit_si emit_mul(struct sljit_compiler *compiler,
   1644 	sljit_si dst, sljit_sw dstw,
   1645 	sljit_si src1, sljit_sw src1w,
   1646 	sljit_si src2, sljit_sw src2w)
   1647 {
   1648 	sljit_ub* inst;
   1649 	sljit_si dst_r;
   1650 
   1651 	dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
   1652 
   1653 	/* Register destination. */
   1654 	if (dst_r == src1 && !(src2 & SLJIT_IMM)) {
   1655 		inst = emit_x86_instruction(compiler, 2, dst_r, 0, src2, src2w);
   1656 		FAIL_IF(!inst);
   1657 		*inst++ = GROUP_0F;
   1658 		*inst = IMUL_r_rm;
   1659 	}
   1660 	else if (dst_r == src2 && !(src1 & SLJIT_IMM)) {
   1661 		inst = emit_x86_instruction(compiler, 2, dst_r, 0, src1, src1w);
   1662 		FAIL_IF(!inst);
   1663 		*inst++ = GROUP_0F;
   1664 		*inst = IMUL_r_rm;
   1665 	}
   1666 	else if (src1 & SLJIT_IMM) {
   1667 		if (src2 & SLJIT_IMM) {
   1668 			EMIT_MOV(compiler, dst_r, 0, SLJIT_IMM, src2w);
   1669 			src2 = dst_r;
   1670 			src2w = 0;
   1671 		}
   1672 
   1673 		if (src1w <= 127 && src1w >= -128) {
   1674 			inst = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w);
   1675 			FAIL_IF(!inst);
   1676 			*inst = IMUL_r_rm_i8;
   1677 			inst = (sljit_ub*)ensure_buf(compiler, 1 + 1);
   1678 			FAIL_IF(!inst);
   1679 			INC_SIZE(1);
   1680 			*inst = (sljit_sb)src1w;
   1681 		}
   1682 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
   1683 		else {
   1684 			inst = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w);
   1685 			FAIL_IF(!inst);
   1686 			*inst = IMUL_r_rm_i32;
   1687 			inst = (sljit_ub*)ensure_buf(compiler, 1 + 4);
   1688 			FAIL_IF(!inst);
   1689 			INC_SIZE(4);
   1690 			*(sljit_sw*)inst = src1w;
   1691 		}
   1692 #else
   1693 		else if (IS_HALFWORD(src1w)) {
   1694 			inst = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w);
   1695 			FAIL_IF(!inst);
   1696 			*inst = IMUL_r_rm_i32;
   1697 			inst = (sljit_ub*)ensure_buf(compiler, 1 + 4);
   1698 			FAIL_IF(!inst);
   1699 			INC_SIZE(4);
   1700 			*(sljit_si*)inst = (sljit_si)src1w;
   1701 		}
   1702 		else {
   1703 			EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_IMM, src1w);
   1704 			if (dst_r != src2)
   1705 				EMIT_MOV(compiler, dst_r, 0, src2, src2w);
   1706 			inst = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG2, 0);
   1707 			FAIL_IF(!inst);
   1708 			*inst++ = GROUP_0F;
   1709 			*inst = IMUL_r_rm;
   1710 		}
   1711 #endif
   1712 	}
   1713 	else if (src2 & SLJIT_IMM) {
   1714 		/* Note: src1 is NOT immediate. */
   1715 
   1716 		if (src2w <= 127 && src2w >= -128) {
   1717 			inst = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w);
   1718 			FAIL_IF(!inst);
   1719 			*inst = IMUL_r_rm_i8;
   1720 			inst = (sljit_ub*)ensure_buf(compiler, 1 + 1);
   1721 			FAIL_IF(!inst);
   1722 			INC_SIZE(1);
   1723 			*inst = (sljit_sb)src2w;
   1724 		}
   1725 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
   1726 		else {
   1727 			inst = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w);
   1728 			FAIL_IF(!inst);
   1729 			*inst = IMUL_r_rm_i32;
   1730 			inst = (sljit_ub*)ensure_buf(compiler, 1 + 4);
   1731 			FAIL_IF(!inst);
   1732 			INC_SIZE(4);
   1733 			*(sljit_sw*)inst = src2w;
   1734 		}
   1735 #else
   1736 		else if (IS_HALFWORD(src2w)) {
   1737 			inst = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w);
   1738 			FAIL_IF(!inst);
   1739 			*inst = IMUL_r_rm_i32;
   1740 			inst = (sljit_ub*)ensure_buf(compiler, 1 + 4);
   1741 			FAIL_IF(!inst);
   1742 			INC_SIZE(4);
   1743 			*(sljit_si*)inst = (sljit_si)src2w;
   1744 		}
   1745 		else {
   1746 			EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_IMM, src1w);
   1747 			if (dst_r != src1)
   1748 				EMIT_MOV(compiler, dst_r, 0, src1, src1w);
   1749 			inst = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG2, 0);
   1750 			FAIL_IF(!inst);
   1751 			*inst++ = GROUP_0F;
   1752 			*inst = IMUL_r_rm;
   1753 		}
   1754 #endif
   1755 	}
   1756 	else {
   1757 		/* Neither argument is immediate. */
   1758 		if (ADDRESSING_DEPENDS_ON(src2, dst_r))
   1759 			dst_r = TMP_REG1;
   1760 		EMIT_MOV(compiler, dst_r, 0, src1, src1w);
   1761 		inst = emit_x86_instruction(compiler, 2, dst_r, 0, src2, src2w);
   1762 		FAIL_IF(!inst);
   1763 		*inst++ = GROUP_0F;
   1764 		*inst = IMUL_r_rm;
   1765 	}
   1766 
   1767 	if (dst_r == TMP_REG1)
   1768 		EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
   1769 
   1770 	return SLJIT_SUCCESS;
   1771 }
   1772 
   1773 static sljit_si emit_lea_binary(struct sljit_compiler *compiler, sljit_si keep_flags,
   1774 	sljit_si dst, sljit_sw dstw,
   1775 	sljit_si src1, sljit_sw src1w,
   1776 	sljit_si src2, sljit_sw src2w)
   1777 {
   1778 	sljit_ub* inst;
   1779 	sljit_si dst_r, done = 0;
   1780 
   1781 	/* These cases better be left to handled by normal way. */
   1782 	if (!keep_flags) {
   1783 		if (dst == src1 && dstw == src1w)
   1784 			return SLJIT_ERR_UNSUPPORTED;
   1785 		if (dst == src2 && dstw == src2w)
   1786 			return SLJIT_ERR_UNSUPPORTED;
   1787 	}
   1788 
   1789 	dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
   1790 
   1791 	if (FAST_IS_REG(src1)) {
   1792 		if (FAST_IS_REG(src2)) {
   1793 			inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM2(src1, src2), 0);
   1794 			FAIL_IF(!inst);
   1795 			*inst = LEA_r_m;
   1796 			done = 1;
   1797 		}
   1798 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
   1799 		if ((src2 & SLJIT_IMM) && (compiler->mode32 || IS_HALFWORD(src2w))) {
   1800 			inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src1), (sljit_si)src2w);
   1801 #else
   1802 		if (src2 & SLJIT_IMM) {
   1803 			inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src1), src2w);
   1804 #endif
   1805 			FAIL_IF(!inst);
   1806 			*inst = LEA_r_m;
   1807 			done = 1;
   1808 		}
   1809 	}
   1810 	else if (FAST_IS_REG(src2)) {
   1811 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
   1812 		if ((src1 & SLJIT_IMM) && (compiler->mode32 || IS_HALFWORD(src1w))) {
   1813 			inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src2), (sljit_si)src1w);
   1814 #else
   1815 		if (src1 & SLJIT_IMM) {
   1816 			inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src2), src1w);
   1817 #endif
   1818 			FAIL_IF(!inst);
   1819 			*inst = LEA_r_m;
   1820 			done = 1;
   1821 		}
   1822 	}
   1823 
   1824 	if (done) {
   1825 		if (dst_r == TMP_REG1)
   1826 			return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
   1827 		return SLJIT_SUCCESS;
   1828 	}
   1829 	return SLJIT_ERR_UNSUPPORTED;
   1830 }
   1831 
   1832 static sljit_si emit_cmp_binary(struct sljit_compiler *compiler,
   1833 	sljit_si src1, sljit_sw src1w,
   1834 	sljit_si src2, sljit_sw src2w)
   1835 {
   1836 	sljit_ub* inst;
   1837 
   1838 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
   1839 	if (src1 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
   1840 #else
   1841 	if (src1 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128)) {
   1842 #endif
   1843 		BINARY_EAX_IMM(CMP_EAX_i32, src2w);
   1844 		return SLJIT_SUCCESS;
   1845 	}
   1846 
   1847 	if (FAST_IS_REG(src1)) {
   1848 		if (src2 & SLJIT_IMM) {
   1849 			BINARY_IMM(CMP, CMP_rm_r, src2w, src1, 0);
   1850 		}
   1851 		else {
   1852 			inst = emit_x86_instruction(compiler, 1, src1, 0, src2, src2w);
   1853 			FAIL_IF(!inst);
   1854 			*inst = CMP_r_rm;
   1855 		}
   1856 		return SLJIT_SUCCESS;
   1857 	}
   1858 
   1859 	if (FAST_IS_REG(src2) && !(src1 & SLJIT_IMM)) {
   1860 		inst = emit_x86_instruction(compiler, 1, src2, 0, src1, src1w);
   1861 		FAIL_IF(!inst);
   1862 		*inst = CMP_rm_r;
   1863 		return SLJIT_SUCCESS;
   1864 	}
   1865 
   1866 	if (src2 & SLJIT_IMM) {
   1867 		if (src1 & SLJIT_IMM) {
   1868 			EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
   1869 			src1 = TMP_REG1;
   1870 			src1w = 0;
   1871 		}
   1872 		BINARY_IMM(CMP, CMP_rm_r, src2w, src1, src1w);
   1873 	}
   1874 	else {
   1875 		EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
   1876 		inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
   1877 		FAIL_IF(!inst);
   1878 		*inst = CMP_r_rm;
   1879 	}
   1880 	return SLJIT_SUCCESS;
   1881 }
   1882 
   1883 static sljit_si emit_test_binary(struct sljit_compiler *compiler,
   1884 	sljit_si src1, sljit_sw src1w,
   1885 	sljit_si src2, sljit_sw src2w)
   1886 {
   1887 	sljit_ub* inst;
   1888 
   1889 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
   1890 	if (src1 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
   1891 #else
   1892 	if (src1 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128)) {
   1893 #endif
   1894 		BINARY_EAX_IMM(TEST_EAX_i32, src2w);
   1895 		return SLJIT_SUCCESS;
   1896 	}
   1897 
   1898 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
   1899 	if (src2 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src1w > 127 || src1w < -128) && (compiler->mode32 || IS_HALFWORD(src1w))) {
   1900 #else
   1901 	if (src2 == SLJIT_R0 && (src1 & SLJIT_IMM) && (src1w > 127 || src1w < -128)) {
   1902 #endif
   1903 		BINARY_EAX_IMM(TEST_EAX_i32, src1w);
   1904 		return SLJIT_SUCCESS;
   1905 	}
   1906 
   1907 	if (FAST_IS_REG(src1)) {
   1908 		if (src2 & SLJIT_IMM) {
   1909 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
   1910 			if (IS_HALFWORD(src2w) || compiler->mode32) {
   1911 				inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, src1, 0);
   1912 				FAIL_IF(!inst);
   1913 				*inst = GROUP_F7;
   1914 			}
   1915 			else {
   1916 				FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src2w));
   1917 				inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, src1, 0);
   1918 				FAIL_IF(!inst);
   1919 				*inst = TEST_rm_r;
   1920 			}
   1921 #else
   1922 			inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, src1, 0);
   1923 			FAIL_IF(!inst);
   1924 			*inst = GROUP_F7;
   1925 #endif
   1926 		}
   1927 		else {
   1928 			inst = emit_x86_instruction(compiler, 1, src1, 0, src2, src2w);
   1929 			FAIL_IF(!inst);
   1930 			*inst = TEST_rm_r;
   1931 		}
   1932 		return SLJIT_SUCCESS;
   1933 	}
   1934 
   1935 	if (FAST_IS_REG(src2)) {
   1936 		if (src1 & SLJIT_IMM) {
   1937 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
   1938 			if (IS_HALFWORD(src1w) || compiler->mode32) {
   1939 				inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src1w, src2, 0);
   1940 				FAIL_IF(!inst);
   1941 				*inst = GROUP_F7;
   1942 			}
   1943 			else {
   1944 				FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src1w));
   1945 				inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, src2, 0);
   1946 				FAIL_IF(!inst);
   1947 				*inst = TEST_rm_r;
   1948 			}
   1949 #else
   1950 			inst = emit_x86_instruction(compiler, 1, src1, src1w, src2, 0);
   1951 			FAIL_IF(!inst);
   1952 			*inst = GROUP_F7;
   1953 #endif
   1954 		}
   1955 		else {
   1956 			inst = emit_x86_instruction(compiler, 1, src2, 0, src1, src1w);
   1957 			FAIL_IF(!inst);
   1958 			*inst = TEST_rm_r;
   1959 		}
   1960 		return SLJIT_SUCCESS;
   1961 	}
   1962 
   1963 	EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
   1964 	if (src2 & SLJIT_IMM) {
   1965 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
   1966 		if (IS_HALFWORD(src2w) || compiler->mode32) {
   1967 			inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, TMP_REG1, 0);
   1968 			FAIL_IF(!inst);
   1969 			*inst = GROUP_F7;
   1970 		}
   1971 		else {
   1972 			FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src2w));
   1973 			inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, TMP_REG1, 0);
   1974 			FAIL_IF(!inst);
   1975 			*inst = TEST_rm_r;
   1976 		}
   1977 #else
   1978 		inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, TMP_REG1, 0);
   1979 		FAIL_IF(!inst);
   1980 		*inst = GROUP_F7;
   1981 #endif
   1982 	}
   1983 	else {
   1984 		inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
   1985 		FAIL_IF(!inst);
   1986 		*inst = TEST_rm_r;
   1987 	}
   1988 	return SLJIT_SUCCESS;
   1989 }
   1990 
   1991 static sljit_si emit_shift(struct sljit_compiler *compiler,
   1992 	sljit_ub mode,
   1993 	sljit_si dst, sljit_sw dstw,
   1994 	sljit_si src1, sljit_sw src1w,
   1995 	sljit_si src2, sljit_sw src2w)
   1996 {
   1997 	sljit_ub* inst;
   1998 
   1999 	if ((src2 & SLJIT_IMM) || (src2 == SLJIT_PREF_SHIFT_REG)) {
   2000 		if (dst == src1 && dstw == src1w) {
   2001 			inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, dst, dstw);
   2002 			FAIL_IF(!inst);
   2003 			*inst |= mode;
   2004 			return SLJIT_SUCCESS;
   2005 		}
   2006 		if (dst == SLJIT_UNUSED) {
   2007 			EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
   2008 			inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, TMP_REG1, 0);
   2009 			FAIL_IF(!inst);
   2010 			*inst |= mode;
   2011 			return SLJIT_SUCCESS;
   2012 		}
   2013 		if (dst == SLJIT_PREF_SHIFT_REG && src2 == SLJIT_PREF_SHIFT_REG) {
   2014 			EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
   2015 			inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
   2016 			FAIL_IF(!inst);
   2017 			*inst |= mode;
   2018 			EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
   2019 			return SLJIT_SUCCESS;
   2020 		}
   2021 		if (FAST_IS_REG(dst)) {
   2022 			EMIT_MOV(compiler, dst, 0, src1, src1w);
   2023 			inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, dst, 0);
   2024 			FAIL_IF(!inst);
   2025 			*inst |= mode;
   2026 			return SLJIT_SUCCESS;
   2027 		}
   2028 
   2029 		EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
   2030 		inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, TMP_REG1, 0);
   2031 		FAIL_IF(!inst);
   2032 		*inst |= mode;
   2033 		EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
   2034 		return SLJIT_SUCCESS;
   2035 	}
   2036 
   2037 	if (dst == SLJIT_PREF_SHIFT_REG) {
   2038 		EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
   2039 		EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w);
   2040 		inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
   2041 		FAIL_IF(!inst);
   2042 		*inst |= mode;
   2043 		EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
   2044 	}
   2045 	else if (FAST_IS_REG(dst) && dst != src2 && !ADDRESSING_DEPENDS_ON(src2, dst)) {
   2046 		if (src1 != dst)
   2047 			EMIT_MOV(compiler, dst, 0, src1, src1w);
   2048 		EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_PREF_SHIFT_REG, 0);
   2049 		EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w);
   2050 		inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, dst, 0);
   2051 		FAIL_IF(!inst);
   2052 		*inst |= mode;
   2053 		EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
   2054 	}
   2055 	else {
   2056 		/* This case is really difficult, since ecx itself may used for
   2057 		   addressing, and we must ensure to work even in that case. */
   2058 		EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
   2059 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
   2060 		EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_PREF_SHIFT_REG, 0);
   2061 #else
   2062 		/* [esp+0] contains the flags. */
   2063 		EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), sizeof(sljit_sw), SLJIT_PREF_SHIFT_REG, 0);
   2064 #endif
   2065 		EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w);
   2066 		inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
   2067 		FAIL_IF(!inst);
   2068 		*inst |= mode;
   2069 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
   2070 		EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG2, 0);
   2071 #else
   2072 		EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, SLJIT_MEM1(SLJIT_SP), sizeof(sljit_sw));
   2073 #endif
   2074 		EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
   2075 	}
   2076 
   2077 	return SLJIT_SUCCESS;
   2078 }
   2079 
   2080 static sljit_si emit_shift_with_flags(struct sljit_compiler *compiler,
   2081 	sljit_ub mode, sljit_si set_flags,
   2082 	sljit_si dst, sljit_sw dstw,
   2083 	sljit_si src1, sljit_sw src1w,
   2084 	sljit_si src2, sljit_sw src2w)
   2085 {
   2086 	/* The CPU does not set flags if the shift count is 0. */
   2087 	if (src2 & SLJIT_IMM) {
   2088 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
   2089 		if ((src2w & 0x3f) != 0 || (compiler->mode32 && (src2w & 0x1f) != 0))
   2090 			return emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w);
   2091 #else
   2092 		if ((src2w & 0x1f) != 0)
   2093 			return emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w);
   2094 #endif
   2095 		if (!set_flags)
   2096 			return emit_mov(compiler, dst, dstw, src1, src1w);
   2097 		/* OR dst, src, 0 */
   2098 		return emit_cum_binary(compiler, OR_r_rm, OR_rm_r, OR, OR_EAX_i32,
   2099 			dst, dstw, src1, src1w, SLJIT_IMM, 0);
   2100 	}
   2101 
   2102 	if (!set_flags)
   2103 		return emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w);
   2104 
   2105 	if (!FAST_IS_REG(dst))
   2106 		FAIL_IF(emit_cmp_binary(compiler, src1, src1w, SLJIT_IMM, 0));
   2107 
   2108 	FAIL_IF(emit_shift(compiler,mode, dst, dstw, src1, src1w, src2, src2w));
   2109 
   2110 	if (FAST_IS_REG(dst))
   2111 		return emit_cmp_binary(compiler, dst, dstw, SLJIT_IMM, 0);
   2112 	return SLJIT_SUCCESS;
   2113 }
   2114 
   2115 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op2(struct sljit_compiler *compiler, sljit_si op,
   2116 	sljit_si dst, sljit_sw dstw,
   2117 	sljit_si src1, sljit_sw src1w,
   2118 	sljit_si src2, sljit_sw src2w)
   2119 {
   2120 	CHECK_ERROR();
   2121 	check_sljit_emit_op2(compiler, op, dst, dstw, src1, src1w, src2, src2w);
   2122 	ADJUST_LOCAL_OFFSET(dst, dstw);
   2123 	ADJUST_LOCAL_OFFSET(src1, src1w);
   2124 	ADJUST_LOCAL_OFFSET(src2, src2w);
   2125 
   2126 	CHECK_EXTRA_REGS(dst, dstw, (void)0);
   2127 	CHECK_EXTRA_REGS(src1, src1w, (void)0);
   2128 	CHECK_EXTRA_REGS(src2, src2w, (void)0);
   2129 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
   2130 	compiler->mode32 = op & SLJIT_INT_OP;
   2131 #endif
   2132 
   2133 	if (GET_OPCODE(op) >= SLJIT_MUL) {
   2134 		if (SLJIT_UNLIKELY(GET_FLAGS(op)))
   2135 			compiler->flags_saved = 0;
   2136 		else if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS) && !compiler->flags_saved)
   2137 			FAIL_IF(emit_save_flags(compiler));
   2138 	}
   2139 
   2140 	switch (GET_OPCODE(op)) {
   2141 	case SLJIT_ADD:
   2142 		if (!GET_FLAGS(op)) {
   2143 			if (emit_lea_binary(compiler, op & SLJIT_KEEP_FLAGS, dst, dstw, src1, src1w, src2, src2w) != SLJIT_ERR_UNSUPPORTED)
   2144 				return compiler->error;
   2145 		}
   2146 		else
   2147 			compiler->flags_saved = 0;
   2148 		if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS) && !compiler->flags_saved)
   2149 			FAIL_IF(emit_save_flags(compiler));
   2150 		return emit_cum_binary(compiler, ADD_r_rm, ADD_rm_r, ADD, ADD_EAX_i32,
   2151 			dst, dstw, src1, src1w, src2, src2w);
   2152 	case SLJIT_ADDC:
   2153 		if (SLJIT_UNLIKELY(compiler->flags_saved)) /* C flag must be restored. */
   2154 			FAIL_IF(emit_restore_flags(compiler, 1));
   2155 		else if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS))
   2156 			FAIL_IF(emit_save_flags(compiler));
   2157 		if (SLJIT_UNLIKELY(GET_FLAGS(op)))
   2158 			compiler->flags_saved = 0;
   2159 		return emit_cum_binary(compiler, ADC_r_rm, ADC_rm_r, ADC, ADC_EAX_i32,
   2160 			dst, dstw, src1, src1w, src2, src2w);
   2161 	case SLJIT_SUB:
   2162 		if (!GET_FLAGS(op)) {
   2163 			if ((src2 & SLJIT_IMM) && emit_lea_binary(compiler, op & SLJIT_KEEP_FLAGS, dst, dstw, src1, src1w, SLJIT_IMM, -src2w) != SLJIT_ERR_UNSUPPORTED)
   2164 				return compiler->error;
   2165 		}
   2166 		else
   2167 			compiler->flags_saved = 0;
   2168 		if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS) && !compiler->flags_saved)
   2169 			FAIL_IF(emit_save_flags(compiler));
   2170 		if (dst == SLJIT_UNUSED)
   2171 			return emit_cmp_binary(compiler, src1, src1w, src2, src2w);
   2172 		return emit_non_cum_binary(compiler, SUB_r_rm, SUB_rm_r, SUB, SUB_EAX_i32,
   2173 			dst, dstw, src1, src1w, src2, src2w);
   2174 	case SLJIT_SUBC:
   2175 		if (SLJIT_UNLIKELY(compiler->flags_saved)) /* C flag must be restored. */
   2176 			FAIL_IF(emit_restore_flags(compiler, 1));
   2177 		else if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS))
   2178 			FAIL_IF(emit_save_flags(compiler));
   2179 		if (SLJIT_UNLIKELY(GET_FLAGS(op)))
   2180 			compiler->flags_saved = 0;
   2181 		return emit_non_cum_binary(compiler, SBB_r_rm, SBB_rm_r, SBB, SBB_EAX_i32,
   2182 			dst, dstw, src1, src1w, src2, src2w);
   2183 	case SLJIT_MUL:
   2184 		return emit_mul(compiler, dst, dstw, src1, src1w, src2, src2w);
   2185 	case SLJIT_AND:
   2186 		if (dst == SLJIT_UNUSED)
   2187 			return emit_test_binary(compiler, src1, src1w, src2, src2w);
   2188 		return emit_cum_binary(compiler, AND_r_rm, AND_rm_r, AND, AND_EAX_i32,
   2189 			dst, dstw, src1, src1w, src2, src2w);
   2190 	case SLJIT_OR:
   2191 		return emit_cum_binary(compiler, OR_r_rm, OR_rm_r, OR, OR_EAX_i32,
   2192 			dst, dstw, src1, src1w, src2, src2w);
   2193 	case SLJIT_XOR:
   2194 		return emit_cum_binary(compiler, XOR_r_rm, XOR_rm_r, XOR, XOR_EAX_i32,
   2195 			dst, dstw, src1, src1w, src2, src2w);
   2196 	case SLJIT_SHL:
   2197 		return emit_shift_with_flags(compiler, SHL, GET_FLAGS(op),
   2198 			dst, dstw, src1, src1w, src2, src2w);
   2199 	case SLJIT_LSHR:
   2200 		return emit_shift_with_flags(compiler, SHR, GET_FLAGS(op),
   2201 			dst, dstw, src1, src1w, src2, src2w);
   2202 	case SLJIT_ASHR:
   2203 		return emit_shift_with_flags(compiler, SAR, GET_FLAGS(op),
   2204 			dst, dstw, src1, src1w, src2, src2w);
   2205 	}
   2206 
   2207 	return SLJIT_SUCCESS;
   2208 }
   2209 
   2210 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_register_index(sljit_si reg)
   2211 {
   2212 	check_sljit_get_register_index(reg);
   2213 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
   2214 	if (reg >= SLJIT_R3 && reg <= SLJIT_R6)
   2215 		return -1;
   2216 #endif
   2217 	return reg_map[reg];
   2218 }
   2219 
   2220 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_float_register_index(sljit_si reg)
   2221 {
   2222 	check_sljit_get_float_register_index(reg);
   2223 	return reg;
   2224 }
   2225 
   2226 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_custom(struct sljit_compiler *compiler,
   2227 	void *instruction, sljit_si size)
   2228 {
   2229 	sljit_ub *inst;
   2230 
   2231 	CHECK_ERROR();
   2232 	check_sljit_emit_op_custom(compiler, instruction, size);
   2233 	SLJIT_ASSERT(size > 0 && size < 16);
   2234 
   2235 	inst = (sljit_ub*)ensure_buf(compiler, 1 + size);
   2236 	FAIL_IF(!inst);
   2237 	INC_SIZE(size);
   2238 	SLJIT_MEMMOVE(inst, instruction, size);
   2239 	return SLJIT_SUCCESS;
   2240 }
   2241 
   2242 /* --------------------------------------------------------------------- */
   2243 /*  Floating point operators                                             */
   2244 /* --------------------------------------------------------------------- */
   2245 
   2246 /* Alignment + 2 * 16 bytes. */
   2247 static sljit_si sse2_data[3 + (4 + 4) * 2];
   2248 static sljit_si *sse2_buffer;
   2249 
   2250 static void init_compiler(void)
   2251 {
   2252 	sse2_buffer = (sljit_si*)(((sljit_uw)sse2_data + 15) & ~0xf);
   2253 	/* Single precision constants. */
   2254 	sse2_buffer[0] = 0x80000000;
   2255 	sse2_buffer[4] = 0x7fffffff;
   2256 	/* Double precision constants. */
   2257 	sse2_buffer[8] = 0;
   2258 	sse2_buffer[9] = 0x80000000;
   2259 	sse2_buffer[12] = 0xffffffff;
   2260 	sse2_buffer[13] = 0x7fffffff;
   2261 }
   2262 
   2263 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_is_fpu_available(void)
   2264 {
   2265 #ifdef SLJIT_IS_FPU_AVAILABLE
   2266 	return SLJIT_IS_FPU_AVAILABLE;
   2267 #elif (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
   2268 	if (cpu_has_sse2 == -1)
   2269 		get_cpu_features();
   2270 	return cpu_has_sse2;
   2271 #else /* SLJIT_DETECT_SSE2 */
   2272 	return 1;
   2273 #endif /* SLJIT_DETECT_SSE2 */
   2274 }
   2275 
   2276 static sljit_si emit_sse2(struct sljit_compiler *compiler, sljit_ub opcode,
   2277 	sljit_si single, sljit_si xmm1, sljit_si xmm2, sljit_sw xmm2w)
   2278 {
   2279 	sljit_ub *inst;
   2280 
   2281 	inst = emit_x86_instruction(compiler, 2 | (single ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2, xmm1, 0, xmm2, xmm2w);
   2282 	FAIL_IF(!inst);
   2283 	*inst++ = GROUP_0F;
   2284 	*inst = opcode;
   2285 	return SLJIT_SUCCESS;
   2286 }
   2287 
   2288 static sljit_si emit_sse2_logic(struct sljit_compiler *compiler, sljit_ub opcode,
   2289 	sljit_si pref66, sljit_si xmm1, sljit_si xmm2, sljit_sw xmm2w)
   2290 {
   2291 	sljit_ub *inst;
   2292 
   2293 	inst = emit_x86_instruction(compiler, 2 | (pref66 ? EX86_PREF_66 : 0) | EX86_SSE2, xmm1, 0, xmm2, xmm2w);
   2294 	FAIL_IF(!inst);
   2295 	*inst++ = GROUP_0F;
   2296 	*inst = opcode;
   2297 	return SLJIT_SUCCESS;
   2298 }
   2299 
   2300 static SLJIT_INLINE sljit_si emit_sse2_load(struct sljit_compiler *compiler,
   2301 	sljit_si single, sljit_si dst, sljit_si src, sljit_sw srcw)
   2302 {
   2303 	return emit_sse2(compiler, MOVSD_x_xm, single, dst, src, srcw);
   2304 }
   2305 
   2306 static SLJIT_INLINE sljit_si emit_sse2_store(struct sljit_compiler *compiler,
   2307 	sljit_si single, sljit_si dst, sljit_sw dstw, sljit_si src)
   2308 {
   2309 	return emit_sse2(compiler, MOVSD_xm_x, single, src, dst, dstw);
   2310 }
   2311 
   2312 static SLJIT_INLINE sljit_si sljit_emit_fop1_convw_fromd(struct sljit_compiler *compiler, sljit_si op,
   2313 	sljit_si dst, sljit_sw dstw,
   2314 	sljit_si src, sljit_sw srcw)
   2315 {
   2316 	sljit_si dst_r = SLOW_IS_REG(dst) ? dst : TMP_REG1;
   2317 	sljit_ub *inst;
   2318 
   2319 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
   2320 	if (GET_OPCODE(op) == SLJIT_CONVW_FROMD)
   2321 		compiler->mode32 = 0;
   2322 #endif
   2323 
   2324 	inst = emit_x86_instruction(compiler, 2 | ((op & SLJIT_SINGLE_OP) ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2_OP2, dst_r, 0, src, srcw);
   2325 	FAIL_IF(!inst);
   2326 	*inst++ = GROUP_0F;
   2327 	*inst = CVTTSD2SI_r_xm;
   2328 
   2329 	if (dst_r == TMP_REG1 && dst != SLJIT_UNUSED)
   2330 		return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
   2331 	return SLJIT_SUCCESS;
   2332 }
   2333 
   2334 static SLJIT_INLINE sljit_si sljit_emit_fop1_convd_fromw(struct sljit_compiler *compiler, sljit_si op,
   2335 	sljit_si dst, sljit_sw dstw,
   2336 	sljit_si src, sljit_sw srcw)
   2337 {
   2338 	sljit_si dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG;
   2339 	sljit_ub *inst;
   2340 
   2341 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
   2342 	if (GET_OPCODE(op) == SLJIT_CONVD_FROMW)
   2343 		compiler->mode32 = 0;
   2344 #endif
   2345 
   2346 	if (src & SLJIT_IMM) {
   2347 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
   2348 		if (GET_OPCODE(op) == SLJIT_CONVD_FROMI)
   2349 			srcw = (sljit_si)srcw;
   2350 #endif
   2351 		EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);
   2352 		src = TMP_REG1;
   2353 		srcw = 0;
   2354 	}
   2355 
   2356 	inst = emit_x86_instruction(compiler, 2 | ((op & SLJIT_SINGLE_OP) ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2_OP1, dst_r, 0, src, srcw);
   2357 	FAIL_IF(!inst);
   2358 	*inst++ = GROUP_0F;
   2359 	*inst = CVTSI2SD_x_rm;
   2360 
   2361 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
   2362 	compiler->mode32 = 1;
   2363 #endif
   2364 	if (dst_r == TMP_FREG)
   2365 		return emit_sse2_store(compiler, op & SLJIT_SINGLE_OP, dst, dstw, TMP_FREG);
   2366 	return SLJIT_SUCCESS;
   2367 }
   2368 
   2369 static SLJIT_INLINE sljit_si sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_si op,
   2370 	sljit_si src1, sljit_sw src1w,
   2371 	sljit_si src2, sljit_sw src2w)
   2372 {
   2373 	compiler->flags_saved = 0;
   2374 	if (!FAST_IS_REG(src1)) {
   2375 		FAIL_IF(emit_sse2_load(compiler, op & SLJIT_SINGLE_OP, TMP_FREG, src1, src1w));
   2376 		src1 = TMP_FREG;
   2377 	}
   2378 	return emit_sse2_logic(compiler, UCOMISD_x_xm, !(op & SLJIT_SINGLE_OP), src1, src2, src2w);
   2379 }
   2380 
   2381 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop1(struct sljit_compiler *compiler, sljit_si op,
   2382 	sljit_si dst, sljit_sw dstw,
   2383 	sljit_si src, sljit_sw srcw)
   2384 {
   2385 	sljit_si dst_r;
   2386 
   2387 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
   2388 	compiler->mode32 = 1;
   2389 #endif
   2390 
   2391 	CHECK_ERROR();
   2392 	SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw);
   2393 
   2394 	if (GET_OPCODE(op) == SLJIT_MOVD) {
   2395 		if (FAST_IS_REG(dst))
   2396 			return emit_sse2_load(compiler, op & SLJIT_SINGLE_OP, dst, src, srcw);
   2397 		if (FAST_IS_REG(src))
   2398 			return emit_sse2_store(compiler, op & SLJIT_SINGLE_OP, dst, dstw, src);
   2399 		FAIL_IF(emit_sse2_load(compiler, op & SLJIT_SINGLE_OP, TMP_FREG, src, srcw));
   2400 		return emit_sse2_store(compiler, op & SLJIT_SINGLE_OP, dst, dstw, TMP_FREG);
   2401 	}
   2402 
   2403 	if (GET_OPCODE(op) == SLJIT_CONVD_FROMS) {
   2404 		dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG;
   2405 		if (FAST_IS_REG(src)) {
   2406 			/* We overwrite the high bits of source. From SLJIT point of view,
   2407 			   this is not an issue.
   2408 			   Note: In SSE3, we could also use MOVDDUP and MOVSLDUP. */
   2409 			FAIL_IF(emit_sse2_logic(compiler, UNPCKLPD_x_xm, op & SLJIT_SINGLE_OP, src, src, 0));
   2410 		}
   2411 		else {
   2412 			FAIL_IF(emit_sse2_load(compiler, !(op & SLJIT_SINGLE_OP), TMP_FREG, src, srcw));
   2413 			src = TMP_FREG;
   2414 		}
   2415 
   2416 		FAIL_IF(emit_sse2_logic(compiler, CVTPD2PS_x_xm, op & SLJIT_SINGLE_OP, dst_r, src, 0));
   2417 		if (dst_r == TMP_FREG)
   2418 			return emit_sse2_store(compiler, op & SLJIT_SINGLE_OP, dst, dstw, TMP_FREG);
   2419 		return SLJIT_SUCCESS;
   2420 	}
   2421 
   2422 	if (SLOW_IS_REG(dst)) {
   2423 		dst_r = dst;
   2424 		if (dst != src)
   2425 			FAIL_IF(emit_sse2_load(compiler, op & SLJIT_SINGLE_OP, dst_r, src, srcw));
   2426 	}
   2427 	else {
   2428 		dst_r = TMP_FREG;
   2429 		FAIL_IF(emit_sse2_load(compiler, op & SLJIT_SINGLE_OP, dst_r, src, srcw));
   2430 	}
   2431 
   2432 	switch (GET_OPCODE(op)) {
   2433 	case SLJIT_NEGD:
   2434 		FAIL_IF(emit_sse2_logic(compiler, XORPD_x_xm, 1, dst_r, SLJIT_MEM0(), (sljit_sw)(op & SLJIT_SINGLE_OP ? sse2_buffer : sse2_buffer + 8)));
   2435 		break;
   2436 
   2437 	case SLJIT_ABSD:
   2438 		FAIL_IF(emit_sse2_logic(compiler, ANDPD_x_xm, 1, dst_r, SLJIT_MEM0(), (sljit_sw)(op & SLJIT_SINGLE_OP ? sse2_buffer + 4 : sse2_buffer + 12)));
   2439 		break;
   2440 	}
   2441 
   2442 	if (dst_r == TMP_FREG)
   2443 		return emit_sse2_store(compiler, op & SLJIT_SINGLE_OP, dst, dstw, TMP_FREG);
   2444 	return SLJIT_SUCCESS;
   2445 }
   2446 
   2447 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop2(struct sljit_compiler *compiler, sljit_si op,
   2448 	sljit_si dst, sljit_sw dstw,
   2449 	sljit_si src1, sljit_sw src1w,
   2450 	sljit_si src2, sljit_sw src2w)
   2451 {
   2452 	sljit_si dst_r;
   2453 
   2454 	CHECK_ERROR();
   2455 	check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w);
   2456 	ADJUST_LOCAL_OFFSET(dst, dstw);
   2457 	ADJUST_LOCAL_OFFSET(src1, src1w);
   2458 	ADJUST_LOCAL_OFFSET(src2, src2w);
   2459 
   2460 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
   2461 	compiler->mode32 = 1;
   2462 #endif
   2463 
   2464 	if (FAST_IS_REG(dst)) {
   2465 		dst_r = dst;
   2466 		if (dst == src1)
   2467 			; /* Do nothing here. */
   2468 		else if (dst == src2 && (op == SLJIT_ADDD || op == SLJIT_MULD)) {
   2469 			/* Swap arguments. */
   2470 			src2 = src1;
   2471 			src2w = src1w;
   2472 		}
   2473 		else if (dst != src2)
   2474 			FAIL_IF(emit_sse2_load(compiler, op & SLJIT_SINGLE_OP, dst_r, src1, src1w));
   2475 		else {
   2476 			dst_r = TMP_FREG;
   2477 			FAIL_IF(emit_sse2_load(compiler, op & SLJIT_SINGLE_OP, TMP_FREG, src1, src1w));
   2478 		}
   2479 	}
   2480 	else {
   2481 		dst_r = TMP_FREG;
   2482 		FAIL_IF(emit_sse2_load(compiler, op & SLJIT_SINGLE_OP, TMP_FREG, src1, src1w));
   2483 	}
   2484 
   2485 	switch (GET_OPCODE(op)) {
   2486 	case SLJIT_ADDD:
   2487 		FAIL_IF(emit_sse2(compiler, ADDSD_x_xm, op & SLJIT_SINGLE_OP, dst_r, src2, src2w));
   2488 		break;
   2489 
   2490 	case SLJIT_SUBD:
   2491 		FAIL_IF(emit_sse2(compiler, SUBSD_x_xm, op & SLJIT_SINGLE_OP, dst_r, src2, src2w));
   2492 		break;
   2493 
   2494 	case SLJIT_MULD:
   2495 		FAIL_IF(emit_sse2(compiler, MULSD_x_xm, op & SLJIT_SINGLE_OP, dst_r, src2, src2w));
   2496 		break;
   2497 
   2498 	case SLJIT_DIVD:
   2499 		FAIL_IF(emit_sse2(compiler, DIVSD_x_xm, op & SLJIT_SINGLE_OP, dst_r, src2, src2w));
   2500 		break;
   2501 	}
   2502 
   2503 	if (dst_r == TMP_FREG)
   2504 		return emit_sse2_store(compiler, op & SLJIT_SINGLE_OP, dst, dstw, TMP_FREG);
   2505 	return SLJIT_SUCCESS;
   2506 }
   2507 
   2508 /* --------------------------------------------------------------------- */
   2509 /*  Conditional instructions                                             */
   2510 /* --------------------------------------------------------------------- */
   2511 
   2512 SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler)
   2513 {
   2514 	sljit_ub *inst;
   2515 	struct sljit_label *label;
   2516 
   2517 	CHECK_ERROR_PTR();
   2518 	check_sljit_emit_label(compiler);
   2519 
   2520 	/* We should restore the flags before the label,
   2521 	   since other taken jumps has their own flags as well. */
   2522 	if (SLJIT_UNLIKELY(compiler->flags_saved))
   2523 		PTR_FAIL_IF(emit_restore_flags(compiler, 0));
   2524 
   2525 	if (compiler->last_label && compiler->last_label->size == compiler->size)
   2526 		return compiler->last_label;
   2527 
   2528 	label = (struct sljit_label*)ensure_abuf(compiler, sizeof(struct sljit_label));
   2529 	PTR_FAIL_IF(!label);
   2530 	set_label(label, compiler);
   2531 
   2532 	inst = (sljit_ub*)ensure_buf(compiler, 2);
   2533 	PTR_FAIL_IF(!inst);
   2534 
   2535 	*inst++ = 0;
   2536 	*inst++ = 0;
   2537 
   2538 	return label;
   2539 }
   2540 
   2541 SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_si type)
   2542 {
   2543 	sljit_ub *inst;
   2544 	struct sljit_jump *jump;
   2545 
   2546 	CHECK_ERROR_PTR();
   2547 	check_sljit_emit_jump(compiler, type);
   2548 
   2549 	if (SLJIT_UNLIKELY(compiler->flags_saved)) {
   2550 		if ((type & 0xff) <= SLJIT_JUMP)
   2551 			PTR_FAIL_IF(emit_restore_flags(compiler, 0));
   2552 		compiler->flags_saved = 0;
   2553 	}
   2554 
   2555 	jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
   2556 	PTR_FAIL_IF_NULL(jump);
   2557 	set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP);
   2558 	type &= 0xff;
   2559 
   2560 	if (type >= SLJIT_CALL1)
   2561 		PTR_FAIL_IF(call_with_args(compiler, type));
   2562 
   2563 	/* Worst case size. */
   2564 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
   2565 	compiler->size += (type >= SLJIT_JUMP) ? 5 : 6;
   2566 #else
   2567 	compiler->size += (type >= SLJIT_JUMP) ? (10 + 3) : (2 + 10 + 3);
   2568 #endif
   2569 
   2570 	inst = (sljit_ub*)ensure_buf(compiler, 2);
   2571 	PTR_FAIL_IF_NULL(inst);
   2572 
   2573 	*inst++ = 0;
   2574 	*inst++ = type + 4;
   2575 	return jump;
   2576 }
   2577 
   2578 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_ijump(struct sljit_compiler *compiler, sljit_si type, sljit_si src, sljit_sw srcw)
   2579 {
   2580 	sljit_ub *inst;
   2581 	struct sljit_jump *jump;
   2582 
   2583 	CHECK_ERROR();
   2584 	check_sljit_emit_ijump(compiler, type, src, srcw);
   2585 	ADJUST_LOCAL_OFFSET(src, srcw);
   2586 
   2587 	CHECK_EXTRA_REGS(src, srcw, (void)0);
   2588 
   2589 	if (SLJIT_UNLIKELY(compiler->flags_saved)) {
   2590 		if (type <= SLJIT_JUMP)
   2591 			FAIL_IF(emit_restore_flags(compiler, 0));
   2592 		compiler->flags_saved = 0;
   2593 	}
   2594 
   2595 	if (type >= SLJIT_CALL1) {
   2596 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
   2597 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
   2598 		if (src == SLJIT_R2) {
   2599 			EMIT_MOV(compiler, TMP_REG1, 0, src, 0);
   2600 			src = TMP_REG1;
   2601 		}
   2602 		if (src == SLJIT_MEM1(SLJIT_SP) && type >= SLJIT_CALL3)
   2603 			srcw += sizeof(sljit_sw);
   2604 #endif
   2605 #endif
   2606 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) && defined(_WIN64)
   2607 		if (src == SLJIT_R2) {
   2608 			EMIT_MOV(compiler, TMP_REG1, 0, src, 0);
   2609 			src = TMP_REG1;
   2610 		}
   2611 #endif
   2612 		FAIL_IF(call_with_args(compiler, type));
   2613 	}
   2614 
   2615 	if (src == SLJIT_IMM) {
   2616 		jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
   2617 		FAIL_IF_NULL(jump);
   2618 		set_jump(jump, compiler, JUMP_ADDR);
   2619 		jump->u.target = srcw;
   2620 
   2621 		/* Worst case size. */
   2622 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
   2623 		compiler->size += 5;
   2624 #else
   2625 		compiler->size += 10 + 3;
   2626 #endif
   2627 
   2628 		inst = (sljit_ub*)ensure_buf(compiler, 2);
   2629 		FAIL_IF_NULL(inst);
   2630 
   2631 		*inst++ = 0;
   2632 		*inst++ = type + 4;
   2633 	}
   2634 	else {
   2635 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
   2636 		/* REX_W is not necessary (src is not immediate). */
   2637 		compiler->mode32 = 1;
   2638 #endif
   2639 		inst = emit_x86_instruction(compiler, 1, 0, 0, src, srcw);
   2640 		FAIL_IF(!inst);
   2641 		*inst++ = GROUP_FF;
   2642 		*inst |= (type >= SLJIT_FAST_CALL) ? CALL_rm : JMP_rm;
   2643 	}
   2644 	return SLJIT_SUCCESS;
   2645 }
   2646 
   2647 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_si op,
   2648 	sljit_si dst, sljit_sw dstw,
   2649 	sljit_si src, sljit_sw srcw,
   2650 	sljit_si type)
   2651 {
   2652 	sljit_ub *inst;
   2653 	sljit_ub cond_set = 0;
   2654 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
   2655 	sljit_si reg;
   2656 #else
   2657 	/* CHECK_EXTRA_REGS migh overwrite these values. */
   2658 	sljit_si dst_save = dst;
   2659 	sljit_sw dstw_save = dstw;
   2660 #endif
   2661 
   2662 	CHECK_ERROR();
   2663 	check_sljit_emit_op_flags(compiler, op, dst, dstw, src, srcw, type);
   2664 
   2665 	if (dst == SLJIT_UNUSED)
   2666 		return SLJIT_SUCCESS;
   2667 
   2668 	ADJUST_LOCAL_OFFSET(dst, dstw);
   2669 	CHECK_EXTRA_REGS(dst, dstw, (void)0);
   2670 	if (SLJIT_UNLIKELY(compiler->flags_saved))
   2671 		FAIL_IF(emit_restore_flags(compiler, op & SLJIT_KEEP_FLAGS));
   2672 
   2673 	/* setcc = jcc + 0x10. */
   2674 	cond_set = get_jump_code(type) + 0x10;
   2675 
   2676 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
   2677 	if (GET_OPCODE(op) == SLJIT_OR && !GET_ALL_FLAGS(op) && FAST_IS_REG(dst) && dst == src) {
   2678 		inst = (sljit_ub*)ensure_buf(compiler, 1 + 4 + 3);
   2679 		FAIL_IF(!inst);
   2680 		INC_SIZE(4 + 3);
   2681 		/* Set low register to conditional flag. */
   2682 		*inst++ = (reg_map[TMP_REG1] <= 7) ? REX : REX_B;
   2683 		*inst++ = GROUP_0F;
   2684 		*inst++ = cond_set;
   2685 		*inst++ = MOD_REG | reg_lmap[TMP_REG1];
   2686 		*inst++ = REX | (reg_map[TMP_REG1] <= 7 ? 0 : REX_R) | (reg_map[dst] <= 7 ? 0 : REX_B);
   2687 		*inst++ = OR_rm8_r8;
   2688 		*inst++ = MOD_REG | (reg_lmap[TMP_REG1] << 3) | reg_lmap[dst];
   2689 		return SLJIT_SUCCESS;
   2690 	}
   2691 
   2692 	reg = (op == SLJIT_MOV && FAST_IS_REG(dst)) ? dst : TMP_REG1;
   2693 
   2694 	inst = (sljit_ub*)ensure_buf(compiler, 1 + 4 + 4);
   2695 	FAIL_IF(!inst);
   2696 	INC_SIZE(4 + 4);
   2697 	/* Set low register to conditional flag. */
   2698 	*inst++ = (reg_map[reg] <= 7) ? REX : REX_B;
   2699 	*inst++ = GROUP_0F;
   2700 	*inst++ = cond_set;
   2701 	*inst++ = MOD_REG | reg_lmap[reg];
   2702 	*inst++ = REX_W | (reg_map[reg] <= 7 ? 0 : (REX_B | REX_R));
   2703 	*inst++ = GROUP_0F;
   2704 	*inst++ = MOVZX_r_rm8;
   2705 	*inst = MOD_REG | (reg_lmap[reg] << 3) | reg_lmap[reg];
   2706 
   2707 	if (reg != TMP_REG1)
   2708 		return SLJIT_SUCCESS;
   2709 
   2710 	if (GET_OPCODE(op) < SLJIT_ADD) {
   2711 		compiler->mode32 = GET_OPCODE(op) != SLJIT_MOV;
   2712 		return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
   2713 	}
   2714 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) || (defined SLJIT_DEBUG && SLJIT_DEBUG)
   2715 	compiler->skip_checks = 1;
   2716 #endif
   2717 	return sljit_emit_op2(compiler, op, dst, dstw, dst, dstw, TMP_REG1, 0);
   2718 #else /* SLJIT_CONFIG_X86_64 */
   2719 	if (GET_OPCODE(op) < SLJIT_ADD && FAST_IS_REG(dst)) {
   2720 		if (reg_map[dst] <= 4) {
   2721 			/* Low byte is accessible. */
   2722 			inst = (sljit_ub*)ensure_buf(compiler, 1 + 3 + 3);
   2723 			FAIL_IF(!inst);
   2724 			INC_SIZE(3 + 3);
   2725 			/* Set low byte to conditional flag. */
   2726 			*inst++ = GROUP_0F;
   2727 			*inst++ = cond_set;
   2728 			*inst++ = MOD_REG | reg_map[dst];
   2729 
   2730 			*inst++ = GROUP_0F;
   2731 			*inst++ = MOVZX_r_rm8;
   2732 			*inst = MOD_REG | (reg_map[dst] << 3) | reg_map[dst];
   2733 			return SLJIT_SUCCESS;
   2734 		}
   2735 
   2736 		/* Low byte is not accessible. */
   2737 		if (cpu_has_cmov == -1)
   2738 			get_cpu_features();
   2739 
   2740 		if (cpu_has_cmov) {
   2741 			EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, 1);
   2742 			/* a xor reg, reg operation would overwrite the flags. */
   2743 			EMIT_MOV(compiler, dst, 0, SLJIT_IMM, 0);
   2744 
   2745 			inst = (sljit_ub*)ensure_buf(compiler, 1 + 3);
   2746 			FAIL_IF(!inst);
   2747 			INC_SIZE(3);
   2748 
   2749 			*inst++ = GROUP_0F;
   2750 			/* cmovcc = setcc - 0x50. */
   2751 			*inst++ = cond_set - 0x50;
   2752 			*inst++ = MOD_REG | (reg_map[dst] << 3) | reg_map[TMP_REG1];
   2753 			return SLJIT_SUCCESS;
   2754 		}
   2755 
   2756 		inst = (sljit_ub*)ensure_buf(compiler, 1 + 1 + 3 + 3 + 1);
   2757 		FAIL_IF(!inst);
   2758 		INC_SIZE(1 + 3 + 3 + 1);
   2759 		*inst++ = XCHG_EAX_r + reg_map[TMP_REG1];
   2760 		/* Set al to conditional flag. */
   2761 		*inst++ = GROUP_0F;
   2762 		*inst++ = cond_set;
   2763 		*inst++ = MOD_REG | 0 /* eax */;
   2764 
   2765 		*inst++ = GROUP_0F;
   2766 		*inst++ = MOVZX_r_rm8;
   2767 		*inst++ = MOD_REG | (reg_map[dst] << 3) | 0 /* eax */;
   2768 		*inst++ = XCHG_EAX_r + reg_map[TMP_REG1];
   2769 		return SLJIT_SUCCESS;
   2770 	}
   2771 
   2772 	if (GET_OPCODE(op) == SLJIT_OR && !GET_ALL_FLAGS(op) && FAST_IS_REG(dst) && dst == src && reg_map[dst] <= 4) {
   2773 		SLJIT_COMPILE_ASSERT(reg_map[SLJIT_R0] == 0, scratch_reg1_must_be_eax);
   2774 		if (dst != SLJIT_R0) {
   2775 			inst = (sljit_ub*)ensure_buf(compiler, 1 + 1 + 3 + 2 + 1);
   2776 			FAIL_IF(!inst);
   2777 			INC_SIZE(1 + 3 + 2 + 1);
   2778 			/* Set low register to conditional flag. */
   2779 			*inst++ = XCHG_EAX_r + reg_map[TMP_REG1];
   2780 			*inst++ = GROUP_0F;
   2781 			*inst++ = cond_set;
   2782 			*inst++ = MOD_REG | 0 /* eax */;
   2783 			*inst++ = OR_rm8_r8;
   2784 			*inst++ = MOD_REG | (0 /* eax */ << 3) | reg_map[dst];
   2785 			*inst++ = XCHG_EAX_r + reg_map[TMP_REG1];
   2786 		}
   2787 		else {
   2788 			inst = (sljit_ub*)ensure_buf(compiler, 1 + 2 + 3 + 2 + 2);
   2789 			FAIL_IF(!inst);
   2790 			INC_SIZE(2 + 3 + 2 + 2);
   2791 			/* Set low register to conditional flag. */
   2792 			*inst++ = XCHG_r_rm;
   2793 			*inst++ = MOD_REG | (1 /* ecx */ << 3) | reg_map[TMP_REG1];
   2794 			*inst++ = GROUP_0F;
   2795 			*inst++ = cond_set;
   2796 			*inst++ = MOD_REG | 1 /* ecx */;
   2797 			*inst++ = OR_rm8_r8;
   2798 			*inst++ = MOD_REG | (1 /* ecx */ << 3) | 0 /* eax */;
   2799 			*inst++ = XCHG_r_rm;
   2800 			*inst++ = MOD_REG | (1 /* ecx */ << 3) | reg_map[TMP_REG1];
   2801 		}
   2802 		return SLJIT_SUCCESS;
   2803 	}
   2804 
   2805 	/* Set TMP_REG1 to the bit. */
   2806 	inst = (sljit_ub*)ensure_buf(compiler, 1 + 1 + 3 + 3 + 1);
   2807 	FAIL_IF(!inst);
   2808 	INC_SIZE(1 + 3 + 3 + 1);
   2809 	*inst++ = XCHG_EAX_r + reg_map[TMP_REG1];
   2810 	/* Set al to conditional flag. */
   2811 	*inst++ = GROUP_0F;
   2812 	*inst++ = cond_set;
   2813 	*inst++ = MOD_REG | 0 /* eax */;
   2814 
   2815 	*inst++ = GROUP_0F;
   2816 	*inst++ = MOVZX_r_rm8;
   2817 	*inst++ = MOD_REG | (0 << 3) /* eax */ | 0 /* eax */;
   2818 
   2819 	*inst++ = XCHG_EAX_r + reg_map[TMP_REG1];
   2820 
   2821 	if (GET_OPCODE(op) < SLJIT_ADD)
   2822 		return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
   2823 
   2824 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) || (defined SLJIT_DEBUG && SLJIT_DEBUG)
   2825 	compiler->skip_checks = 1;
   2826 #endif
   2827 	return sljit_emit_op2(compiler, op, dst_save, dstw_save, dst_save, dstw_save, TMP_REG1, 0);
   2828 #endif /* SLJIT_CONFIG_X86_64 */
   2829 }
   2830 
   2831 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_local_base(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw, sljit_sw offset)
   2832 {
   2833 	CHECK_ERROR();
   2834 	check_sljit_get_local_base(compiler, dst, dstw, offset);
   2835 	ADJUST_LOCAL_OFFSET(dst, dstw);
   2836 
   2837 	CHECK_EXTRA_REGS(dst, dstw, (void)0);
   2838 
   2839 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
   2840 	compiler->mode32 = 0;
   2841 #endif
   2842 
   2843 	ADJUST_LOCAL_OFFSET(SLJIT_MEM1(SLJIT_SP), offset);
   2844 
   2845 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
   2846 	if (NOT_HALFWORD(offset)) {
   2847 		FAIL_IF(emit_load_imm64(compiler, TMP_REG1, offset));
   2848 #if (defined SLJIT_DEBUG && SLJIT_DEBUG)
   2849 		SLJIT_ASSERT(emit_lea_binary(compiler, SLJIT_KEEP_FLAGS, dst, dstw, SLJIT_SP, 0, TMP_REG1, 0) != SLJIT_ERR_UNSUPPORTED);
   2850 		return compiler->error;
   2851 #else
   2852 		return emit_lea_binary(compiler, SLJIT_KEEP_FLAGS, dst, dstw, SLJIT_SP, 0, TMP_REG1, 0);
   2853 #endif
   2854 	}
   2855 #endif
   2856 
   2857 	if (offset != 0)
   2858 		return emit_lea_binary(compiler, SLJIT_KEEP_FLAGS, dst, dstw, SLJIT_SP, 0, SLJIT_IMM, offset);
   2859 	return emit_mov(compiler, dst, dstw, SLJIT_SP, 0);
   2860 }
   2861 
   2862 SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw, sljit_sw init_value)
   2863 {
   2864 	sljit_ub *inst;
   2865 	struct sljit_const *const_;
   2866 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
   2867 	sljit_si reg;
   2868 #endif
   2869 
   2870 	CHECK_ERROR_PTR();
   2871 	check_sljit_emit_const(compiler, dst, dstw, init_value);
   2872 	ADJUST_LOCAL_OFFSET(dst, dstw);
   2873 
   2874 	CHECK_EXTRA_REGS(dst, dstw, (void)0);
   2875 
   2876 	const_ = (struct sljit_const*)ensure_abuf(compiler, sizeof(struct sljit_const));
   2877 	PTR_FAIL_IF(!const_);
   2878 	set_const(const_, compiler);
   2879 
   2880 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
   2881 	compiler->mode32 = 0;
   2882 	reg = SLOW_IS_REG(dst) ? dst : TMP_REG1;
   2883 
   2884 	if (emit_load_imm64(compiler, reg, init_value))
   2885 		return NULL;
   2886 #else
   2887 	if (dst == SLJIT_UNUSED)
   2888 		dst = TMP_REG1;
   2889 
   2890 	if (emit_mov(compiler, dst, dstw, SLJIT_IMM, init_value))
   2891 		return NULL;
   2892 #endif
   2893 
   2894 	inst = (sljit_ub*)ensure_buf(compiler, 2);
   2895 	PTR_FAIL_IF(!inst);
   2896 
   2897 	*inst++ = 0;
   2898 	*inst++ = 1;
   2899 
   2900 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
   2901 	if (dst & SLJIT_MEM)
   2902 		if (emit_mov(compiler, dst, dstw, TMP_REG1, 0))
   2903 			return NULL;
   2904 #endif
   2905 
   2906 	return const_;
   2907 }
   2908 
   2909 SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_addr)
   2910 {
   2911 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
   2912 	*(sljit_sw*)addr = new_addr - (addr + 4);
   2913 #else
   2914 	*(sljit_uw*)addr = new_addr;
   2915 #endif
   2916 }
   2917 
   2918 SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant)
   2919 {
   2920 	*(sljit_sw*)addr = new_constant;
   2921 }
   2922