Home | History | Annotate | Download | only in sljit
      1 /*
      2  *    Stack-less Just-In-Time compiler
      3  *
      4  *    Copyright 2009-2012 Zoltan Herczeg (hzmester (at) freemail.hu). All rights reserved.
      5  *
      6  * Redistribution and use in source and binary forms, with or without modification, are
      7  * permitted provided that the following conditions are met:
      8  *
      9  *   1. Redistributions of source code must retain the above copyright notice, this list of
     10  *      conditions and the following disclaimer.
     11  *
     12  *   2. Redistributions in binary form must reproduce the above copyright notice, this list
     13  *      of conditions and the following disclaimer in the documentation and/or other materials
     14  *      provided with the distribution.
     15  *
     16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
     17  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
     18  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
     19  * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
     20  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
     21  * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
     22  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     23  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
     24  * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     25  */
     26 
     27 /* x86 64-bit arch dependent functions. */
     28 
     29 static sljit_si emit_load_imm64(struct sljit_compiler *compiler, sljit_si reg, sljit_sw imm)
     30 {
     31 	sljit_ub *inst;
     32 
     33 	inst = (sljit_ub*)ensure_buf(compiler, 1 + 2 + sizeof(sljit_sw));
     34 	FAIL_IF(!inst);
     35 	INC_SIZE(2 + sizeof(sljit_sw));
     36 	*inst++ = REX_W | ((reg_map[reg] <= 7) ? 0 : REX_B);
     37 	*inst++ = MOV_r_i32 + (reg_map[reg] & 0x7);
     38 	*(sljit_sw*)inst = imm;
     39 	return SLJIT_SUCCESS;
     40 }
     41 
     42 static sljit_ub* generate_far_jump_code(struct sljit_jump *jump, sljit_ub *code_ptr, sljit_si type)
     43 {
     44 	if (type < SLJIT_JUMP) {
     45 		/* Invert type. */
     46 		*code_ptr++ = get_jump_code(type ^ 0x1) - 0x10;
     47 		*code_ptr++ = 10 + 3;
     48 	}
     49 
     50 	SLJIT_COMPILE_ASSERT(reg_map[TMP_REG3] == 9, tmp3_is_9_first);
     51 	*code_ptr++ = REX_W | REX_B;
     52 	*code_ptr++ = MOV_r_i32 + 1;
     53 	jump->addr = (sljit_uw)code_ptr;
     54 
     55 	if (jump->flags & JUMP_LABEL)
     56 		jump->flags |= PATCH_MD;
     57 	else
     58 		*(sljit_sw*)code_ptr = jump->u.target;
     59 
     60 	code_ptr += sizeof(sljit_sw);
     61 	*code_ptr++ = REX_B;
     62 	*code_ptr++ = GROUP_FF;
     63 	*code_ptr++ = (type >= SLJIT_FAST_CALL) ? (MOD_REG | CALL_rm | 1) : (MOD_REG | JMP_rm | 1);
     64 
     65 	return code_ptr;
     66 }
     67 
     68 static sljit_ub* generate_fixed_jump(sljit_ub *code_ptr, sljit_sw addr, sljit_si type)
     69 {
     70 	sljit_sw delta = addr - ((sljit_sw)code_ptr + 1 + sizeof(sljit_si));
     71 
     72 	if (delta <= HALFWORD_MAX && delta >= HALFWORD_MIN) {
     73 		*code_ptr++ = (type == 2) ? CALL_i32 : JMP_i32;
     74 		*(sljit_sw*)code_ptr = delta;
     75 	}
     76 	else {
     77 		SLJIT_COMPILE_ASSERT(reg_map[TMP_REG3] == 9, tmp3_is_9_second);
     78 		*code_ptr++ = REX_W | REX_B;
     79 		*code_ptr++ = MOV_r_i32 + 1;
     80 		*(sljit_sw*)code_ptr = addr;
     81 		code_ptr += sizeof(sljit_sw);
     82 		*code_ptr++ = REX_B;
     83 		*code_ptr++ = GROUP_FF;
     84 		*code_ptr++ = (type == 2) ? (MOD_REG | CALL_rm | 1) : (MOD_REG | JMP_rm | 1);
     85 	}
     86 
     87 	return code_ptr;
     88 }
     89 
     90 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compiler,
     91 	sljit_si options, sljit_si args, sljit_si scratches, sljit_si saveds,
     92 	sljit_si fscratches, sljit_si fsaveds, sljit_si local_size)
     93 {
     94 	sljit_si i, tmp, size, saved_register_size;
     95 	sljit_ub *inst;
     96 
     97 	CHECK_ERROR();
     98 	CHECK(check_sljit_emit_enter(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size));
     99 	set_emit_enter(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size);
    100 
    101 	compiler->flags_saved = 0;
    102 
    103 	/* Including the return address saved by the call instruction. */
    104 	saved_register_size = GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1);
    105 
    106 	tmp = saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - saveds) : SLJIT_FIRST_SAVED_REG;
    107 	for (i = SLJIT_S0; i >= tmp; i--) {
    108 		size = reg_map[i] >= 8 ? 2 : 1;
    109 		inst = (sljit_ub*)ensure_buf(compiler, 1 + size);
    110 		FAIL_IF(!inst);
    111 		INC_SIZE(size);
    112 		if (reg_map[i] >= 8)
    113 			*inst++ = REX_B;
    114 		PUSH_REG(reg_lmap[i]);
    115 	}
    116 
    117 	for (i = scratches; i >= SLJIT_FIRST_SAVED_REG; i--) {
    118 		size = reg_map[i] >= 8 ? 2 : 1;
    119 		inst = (sljit_ub*)ensure_buf(compiler, 1 + size);
    120 		FAIL_IF(!inst);
    121 		INC_SIZE(size);
    122 		if (reg_map[i] >= 8)
    123 			*inst++ = REX_B;
    124 		PUSH_REG(reg_lmap[i]);
    125 	}
    126 
    127 	if (args > 0) {
    128 		size = args * 3;
    129 		inst = (sljit_ub*)ensure_buf(compiler, 1 + size);
    130 		FAIL_IF(!inst);
    131 
    132 		INC_SIZE(size);
    133 
    134 #ifndef _WIN64
    135 		if (args > 0) {
    136 			*inst++ = REX_W;
    137 			*inst++ = MOV_r_rm;
    138 			*inst++ = MOD_REG | (reg_map[SLJIT_S0] << 3) | 0x7 /* rdi */;
    139 		}
    140 		if (args > 1) {
    141 			*inst++ = REX_W | REX_R;
    142 			*inst++ = MOV_r_rm;
    143 			*inst++ = MOD_REG | (reg_lmap[SLJIT_S1] << 3) | 0x6 /* rsi */;
    144 		}
    145 		if (args > 2) {
    146 			*inst++ = REX_W | REX_R;
    147 			*inst++ = MOV_r_rm;
    148 			*inst++ = MOD_REG | (reg_lmap[SLJIT_S2] << 3) | 0x2 /* rdx */;
    149 		}
    150 #else
    151 		if (args > 0) {
    152 			*inst++ = REX_W;
    153 			*inst++ = MOV_r_rm;
    154 			*inst++ = MOD_REG | (reg_map[SLJIT_S0] << 3) | 0x1 /* rcx */;
    155 		}
    156 		if (args > 1) {
    157 			*inst++ = REX_W;
    158 			*inst++ = MOV_r_rm;
    159 			*inst++ = MOD_REG | (reg_map[SLJIT_S1] << 3) | 0x2 /* rdx */;
    160 		}
    161 		if (args > 2) {
    162 			*inst++ = REX_W | REX_B;
    163 			*inst++ = MOV_r_rm;
    164 			*inst++ = MOD_REG | (reg_map[SLJIT_S2] << 3) | 0x0 /* r8 */;
    165 		}
    166 #endif
    167 	}
    168 
    169 	local_size = ((local_size + SLJIT_LOCALS_OFFSET + saved_register_size + 15) & ~15) - saved_register_size;
    170 	compiler->local_size = local_size;
    171 
    172 #ifdef _WIN64
    173 	if (local_size > 1024) {
    174 		/* Allocate stack for the callback, which grows the stack. */
    175 		inst = (sljit_ub*)ensure_buf(compiler, 1 + 4 + (3 + sizeof(sljit_si)));
    176 		FAIL_IF(!inst);
    177 		INC_SIZE(4 + (3 + sizeof(sljit_si)));
    178 		*inst++ = REX_W;
    179 		*inst++ = GROUP_BINARY_83;
    180 		*inst++ = MOD_REG | SUB | 4;
    181 		/* Allocated size for registers must be divisible by 8. */
    182 		SLJIT_ASSERT(!(saved_register_size & 0x7));
    183 		/* Aligned to 16 byte. */
    184 		if (saved_register_size & 0x8) {
    185 			*inst++ = 5 * sizeof(sljit_sw);
    186 			local_size -= 5 * sizeof(sljit_sw);
    187 		} else {
    188 			*inst++ = 4 * sizeof(sljit_sw);
    189 			local_size -= 4 * sizeof(sljit_sw);
    190 		}
    191 		/* Second instruction */
    192 		SLJIT_COMPILE_ASSERT(reg_map[SLJIT_R0] < 8, temporary_reg1_is_loreg);
    193 		*inst++ = REX_W;
    194 		*inst++ = MOV_rm_i32;
    195 		*inst++ = MOD_REG | reg_lmap[SLJIT_R0];
    196 		*(sljit_si*)inst = local_size;
    197 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
    198 			|| (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
    199 		compiler->skip_checks = 1;
    200 #endif
    201 		FAIL_IF(sljit_emit_ijump(compiler, SLJIT_CALL1, SLJIT_IMM, SLJIT_FUNC_OFFSET(sljit_grow_stack)));
    202 	}
    203 #endif
    204 
    205 	SLJIT_ASSERT(local_size > 0);
    206 	if (local_size <= 127) {
    207 		inst = (sljit_ub*)ensure_buf(compiler, 1 + 4);
    208 		FAIL_IF(!inst);
    209 		INC_SIZE(4);
    210 		*inst++ = REX_W;
    211 		*inst++ = GROUP_BINARY_83;
    212 		*inst++ = MOD_REG | SUB | 4;
    213 		*inst++ = local_size;
    214 	}
    215 	else {
    216 		inst = (sljit_ub*)ensure_buf(compiler, 1 + 7);
    217 		FAIL_IF(!inst);
    218 		INC_SIZE(7);
    219 		*inst++ = REX_W;
    220 		*inst++ = GROUP_BINARY_81;
    221 		*inst++ = MOD_REG | SUB | 4;
    222 		*(sljit_si*)inst = local_size;
    223 		inst += sizeof(sljit_si);
    224 	}
    225 
    226 #ifdef _WIN64
    227 	/* Save xmm6 register: movaps [rsp + 0x20], xmm6 */
    228 	if (fscratches >= 6 || fsaveds >= 1) {
    229 		inst = (sljit_ub*)ensure_buf(compiler, 1 + 5);
    230 		FAIL_IF(!inst);
    231 		INC_SIZE(5);
    232 		*inst++ = GROUP_0F;
    233 		*(sljit_si*)inst = 0x20247429;
    234 	}
    235 #endif
    236 
    237 	return SLJIT_SUCCESS;
    238 }
    239 
    240 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_set_context(struct sljit_compiler *compiler,
    241 	sljit_si options, sljit_si args, sljit_si scratches, sljit_si saveds,
    242 	sljit_si fscratches, sljit_si fsaveds, sljit_si local_size)
    243 {
    244 	sljit_si saved_register_size;
    245 
    246 	CHECK_ERROR();
    247 	CHECK(check_sljit_set_context(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size));
    248 	set_set_context(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size);
    249 
    250 	/* Including the return address saved by the call instruction. */
    251 	saved_register_size = GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1);
    252 	compiler->local_size = ((local_size + SLJIT_LOCALS_OFFSET + saved_register_size + 15) & ~15) - saved_register_size;
    253 	return SLJIT_SUCCESS;
    254 }
    255 
    256 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_return(struct sljit_compiler *compiler, sljit_si op, sljit_si src, sljit_sw srcw)
    257 {
    258 	sljit_si i, tmp, size;
    259 	sljit_ub *inst;
    260 
    261 	CHECK_ERROR();
    262 	CHECK(check_sljit_emit_return(compiler, op, src, srcw));
    263 
    264 	compiler->flags_saved = 0;
    265 	FAIL_IF(emit_mov_before_return(compiler, op, src, srcw));
    266 
    267 #ifdef _WIN64
    268 	/* Restore xmm6 register: movaps xmm6, [rsp + 0x20] */
    269 	if (compiler->fscratches >= 6 || compiler->fsaveds >= 1) {
    270 		inst = (sljit_ub*)ensure_buf(compiler, 1 + 5);
    271 		FAIL_IF(!inst);
    272 		INC_SIZE(5);
    273 		*inst++ = GROUP_0F;
    274 		*(sljit_si*)inst = 0x20247428;
    275 	}
    276 #endif
    277 
    278 	SLJIT_ASSERT(compiler->local_size > 0);
    279 	if (compiler->local_size <= 127) {
    280 		inst = (sljit_ub*)ensure_buf(compiler, 1 + 4);
    281 		FAIL_IF(!inst);
    282 		INC_SIZE(4);
    283 		*inst++ = REX_W;
    284 		*inst++ = GROUP_BINARY_83;
    285 		*inst++ = MOD_REG | ADD | 4;
    286 		*inst = compiler->local_size;
    287 	}
    288 	else {
    289 		inst = (sljit_ub*)ensure_buf(compiler, 1 + 7);
    290 		FAIL_IF(!inst);
    291 		INC_SIZE(7);
    292 		*inst++ = REX_W;
    293 		*inst++ = GROUP_BINARY_81;
    294 		*inst++ = MOD_REG | ADD | 4;
    295 		*(sljit_si*)inst = compiler->local_size;
    296 	}
    297 
    298 	tmp = compiler->scratches;
    299 	for (i = SLJIT_FIRST_SAVED_REG; i <= tmp; i++) {
    300 		size = reg_map[i] >= 8 ? 2 : 1;
    301 		inst = (sljit_ub*)ensure_buf(compiler, 1 + size);
    302 		FAIL_IF(!inst);
    303 		INC_SIZE(size);
    304 		if (reg_map[i] >= 8)
    305 			*inst++ = REX_B;
    306 		POP_REG(reg_lmap[i]);
    307 	}
    308 
    309 	tmp = compiler->saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - compiler->saveds) : SLJIT_FIRST_SAVED_REG;
    310 	for (i = tmp; i <= SLJIT_S0; i++) {
    311 		size = reg_map[i] >= 8 ? 2 : 1;
    312 		inst = (sljit_ub*)ensure_buf(compiler, 1 + size);
    313 		FAIL_IF(!inst);
    314 		INC_SIZE(size);
    315 		if (reg_map[i] >= 8)
    316 			*inst++ = REX_B;
    317 		POP_REG(reg_lmap[i]);
    318 	}
    319 
    320 	inst = (sljit_ub*)ensure_buf(compiler, 1 + 1);
    321 	FAIL_IF(!inst);
    322 	INC_SIZE(1);
    323 	RET();
    324 	return SLJIT_SUCCESS;
    325 }
    326 
    327 /* --------------------------------------------------------------------- */
    328 /*  Operators                                                            */
    329 /* --------------------------------------------------------------------- */
    330 
    331 static sljit_si emit_do_imm32(struct sljit_compiler *compiler, sljit_ub rex, sljit_ub opcode, sljit_sw imm)
    332 {
    333 	sljit_ub *inst;
    334 	sljit_si length = 1 + (rex ? 1 : 0) + sizeof(sljit_si);
    335 
    336 	inst = (sljit_ub*)ensure_buf(compiler, 1 + length);
    337 	FAIL_IF(!inst);
    338 	INC_SIZE(length);
    339 	if (rex)
    340 		*inst++ = rex;
    341 	*inst++ = opcode;
    342 	*(sljit_si*)inst = imm;
    343 	return SLJIT_SUCCESS;
    344 }
    345 
    346 static sljit_ub* emit_x86_instruction(struct sljit_compiler *compiler, sljit_si size,
    347 	/* The register or immediate operand. */
    348 	sljit_si a, sljit_sw imma,
    349 	/* The general operand (not immediate). */
    350 	sljit_si b, sljit_sw immb)
    351 {
    352 	sljit_ub *inst;
    353 	sljit_ub *buf_ptr;
    354 	sljit_ub rex = 0;
    355 	sljit_si flags = size & ~0xf;
    356 	sljit_si inst_size;
    357 
    358 	/* The immediate operand must be 32 bit. */
    359 	SLJIT_ASSERT(!(a & SLJIT_IMM) || compiler->mode32 || IS_HALFWORD(imma));
    360 	/* Both cannot be switched on. */
    361 	SLJIT_ASSERT((flags & (EX86_BIN_INS | EX86_SHIFT_INS)) != (EX86_BIN_INS | EX86_SHIFT_INS));
    362 	/* Size flags not allowed for typed instructions. */
    363 	SLJIT_ASSERT(!(flags & (EX86_BIN_INS | EX86_SHIFT_INS)) || (flags & (EX86_BYTE_ARG | EX86_HALF_ARG)) == 0);
    364 	/* Both size flags cannot be switched on. */
    365 	SLJIT_ASSERT((flags & (EX86_BYTE_ARG | EX86_HALF_ARG)) != (EX86_BYTE_ARG | EX86_HALF_ARG));
    366 	/* SSE2 and immediate is not possible. */
    367 	SLJIT_ASSERT(!(a & SLJIT_IMM) || !(flags & EX86_SSE2));
    368 	SLJIT_ASSERT((flags & (EX86_PREF_F2 | EX86_PREF_F3)) != (EX86_PREF_F2 | EX86_PREF_F3)
    369 		&& (flags & (EX86_PREF_F2 | EX86_PREF_66)) != (EX86_PREF_F2 | EX86_PREF_66)
    370 		&& (flags & (EX86_PREF_F3 | EX86_PREF_66)) != (EX86_PREF_F3 | EX86_PREF_66));
    371 
    372 	size &= 0xf;
    373 	inst_size = size;
    374 
    375 	if (!compiler->mode32 && !(flags & EX86_NO_REXW))
    376 		rex |= REX_W;
    377 	else if (flags & EX86_REX)
    378 		rex |= REX;
    379 
    380 	if (flags & (EX86_PREF_F2 | EX86_PREF_F3))
    381 		inst_size++;
    382 	if (flags & EX86_PREF_66)
    383 		inst_size++;
    384 
    385 	/* Calculate size of b. */
    386 	inst_size += 1; /* mod r/m byte. */
    387 	if (b & SLJIT_MEM) {
    388 		if (!(b & OFFS_REG_MASK)) {
    389 			if (NOT_HALFWORD(immb)) {
    390 				if (emit_load_imm64(compiler, TMP_REG3, immb))
    391 					return NULL;
    392 				immb = 0;
    393 				if (b & REG_MASK)
    394 					b |= TO_OFFS_REG(TMP_REG3);
    395 				else
    396 					b |= TMP_REG3;
    397 			}
    398 			else if (reg_lmap[b & REG_MASK] == 4)
    399 				b |= TO_OFFS_REG(SLJIT_SP);
    400 		}
    401 
    402 		if ((b & REG_MASK) == SLJIT_UNUSED)
    403 			inst_size += 1 + sizeof(sljit_si); /* SIB byte required to avoid RIP based addressing. */
    404 		else {
    405 			if (reg_map[b & REG_MASK] >= 8)
    406 				rex |= REX_B;
    407 
    408 			if (immb != 0 && (!(b & OFFS_REG_MASK) || (b & OFFS_REG_MASK) == TO_OFFS_REG(SLJIT_SP))) {
    409 				/* Immediate operand. */
    410 				if (immb <= 127 && immb >= -128)
    411 					inst_size += sizeof(sljit_sb);
    412 				else
    413 					inst_size += sizeof(sljit_si);
    414 			}
    415 			else if (reg_lmap[b & REG_MASK] == 5)
    416 				inst_size += sizeof(sljit_sb);
    417 
    418 			if ((b & OFFS_REG_MASK) != SLJIT_UNUSED) {
    419 				inst_size += 1; /* SIB byte. */
    420 				if (reg_map[OFFS_REG(b)] >= 8)
    421 					rex |= REX_X;
    422 			}
    423 		}
    424 	}
    425 	else if (!(flags & EX86_SSE2_OP2) && reg_map[b] >= 8)
    426 		rex |= REX_B;
    427 
    428 	if (a & SLJIT_IMM) {
    429 		if (flags & EX86_BIN_INS) {
    430 			if (imma <= 127 && imma >= -128) {
    431 				inst_size += 1;
    432 				flags |= EX86_BYTE_ARG;
    433 			} else
    434 				inst_size += 4;
    435 		}
    436 		else if (flags & EX86_SHIFT_INS) {
    437 			imma &= compiler->mode32 ? 0x1f : 0x3f;
    438 			if (imma != 1) {
    439 				inst_size ++;
    440 				flags |= EX86_BYTE_ARG;
    441 			}
    442 		} else if (flags & EX86_BYTE_ARG)
    443 			inst_size++;
    444 		else if (flags & EX86_HALF_ARG)
    445 			inst_size += sizeof(short);
    446 		else
    447 			inst_size += sizeof(sljit_si);
    448 	}
    449 	else {
    450 		SLJIT_ASSERT(!(flags & EX86_SHIFT_INS) || a == SLJIT_PREF_SHIFT_REG);
    451 		/* reg_map[SLJIT_PREF_SHIFT_REG] is less than 8. */
    452 		if (!(flags & EX86_SSE2_OP1) && reg_map[a] >= 8)
    453 			rex |= REX_R;
    454 	}
    455 
    456 	if (rex)
    457 		inst_size++;
    458 
    459 	inst = (sljit_ub*)ensure_buf(compiler, 1 + inst_size);
    460 	PTR_FAIL_IF(!inst);
    461 
    462 	/* Encoding the byte. */
    463 	INC_SIZE(inst_size);
    464 	if (flags & EX86_PREF_F2)
    465 		*inst++ = 0xf2;
    466 	if (flags & EX86_PREF_F3)
    467 		*inst++ = 0xf3;
    468 	if (flags & EX86_PREF_66)
    469 		*inst++ = 0x66;
    470 	if (rex)
    471 		*inst++ = rex;
    472 	buf_ptr = inst + size;
    473 
    474 	/* Encode mod/rm byte. */
    475 	if (!(flags & EX86_SHIFT_INS)) {
    476 		if ((flags & EX86_BIN_INS) && (a & SLJIT_IMM))
    477 			*inst = (flags & EX86_BYTE_ARG) ? GROUP_BINARY_83 : GROUP_BINARY_81;
    478 
    479 		if ((a & SLJIT_IMM) || (a == 0))
    480 			*buf_ptr = 0;
    481 		else if (!(flags & EX86_SSE2_OP1))
    482 			*buf_ptr = reg_lmap[a] << 3;
    483 		else
    484 			*buf_ptr = a << 3;
    485 	}
    486 	else {
    487 		if (a & SLJIT_IMM) {
    488 			if (imma == 1)
    489 				*inst = GROUP_SHIFT_1;
    490 			else
    491 				*inst = GROUP_SHIFT_N;
    492 		} else
    493 			*inst = GROUP_SHIFT_CL;
    494 		*buf_ptr = 0;
    495 	}
    496 
    497 	if (!(b & SLJIT_MEM))
    498 		*buf_ptr++ |= MOD_REG + ((!(flags & EX86_SSE2_OP2)) ? reg_lmap[b] : b);
    499 	else if ((b & REG_MASK) != SLJIT_UNUSED) {
    500 		if ((b & OFFS_REG_MASK) == SLJIT_UNUSED || (b & OFFS_REG_MASK) == TO_OFFS_REG(SLJIT_SP)) {
    501 			if (immb != 0 || reg_lmap[b & REG_MASK] == 5) {
    502 				if (immb <= 127 && immb >= -128)
    503 					*buf_ptr |= 0x40;
    504 				else
    505 					*buf_ptr |= 0x80;
    506 			}
    507 
    508 			if ((b & OFFS_REG_MASK) == SLJIT_UNUSED)
    509 				*buf_ptr++ |= reg_lmap[b & REG_MASK];
    510 			else {
    511 				*buf_ptr++ |= 0x04;
    512 				*buf_ptr++ = reg_lmap[b & REG_MASK] | (reg_lmap[OFFS_REG(b)] << 3);
    513 			}
    514 
    515 			if (immb != 0 || reg_lmap[b & REG_MASK] == 5) {
    516 				if (immb <= 127 && immb >= -128)
    517 					*buf_ptr++ = immb; /* 8 bit displacement. */
    518 				else {
    519 					*(sljit_si*)buf_ptr = immb; /* 32 bit displacement. */
    520 					buf_ptr += sizeof(sljit_si);
    521 				}
    522 			}
    523 		}
    524 		else {
    525 			if (reg_lmap[b & REG_MASK] == 5)
    526 				*buf_ptr |= 0x40;
    527 			*buf_ptr++ |= 0x04;
    528 			*buf_ptr++ = reg_lmap[b & REG_MASK] | (reg_lmap[OFFS_REG(b)] << 3) | (immb << 6);
    529 			if (reg_lmap[b & REG_MASK] == 5)
    530 				*buf_ptr++ = 0;
    531 		}
    532 	}
    533 	else {
    534 		*buf_ptr++ |= 0x04;
    535 		*buf_ptr++ = 0x25;
    536 		*(sljit_si*)buf_ptr = immb; /* 32 bit displacement. */
    537 		buf_ptr += sizeof(sljit_si);
    538 	}
    539 
    540 	if (a & SLJIT_IMM) {
    541 		if (flags & EX86_BYTE_ARG)
    542 			*buf_ptr = imma;
    543 		else if (flags & EX86_HALF_ARG)
    544 			*(short*)buf_ptr = imma;
    545 		else if (!(flags & EX86_SHIFT_INS))
    546 			*(sljit_si*)buf_ptr = imma;
    547 	}
    548 
    549 	return !(flags & EX86_SHIFT_INS) ? inst : (inst + 1);
    550 }
    551 
    552 /* --------------------------------------------------------------------- */
    553 /*  Call / return instructions                                           */
    554 /* --------------------------------------------------------------------- */
    555 
    556 static SLJIT_INLINE sljit_si call_with_args(struct sljit_compiler *compiler, sljit_si type)
    557 {
    558 	sljit_ub *inst;
    559 
    560 #ifndef _WIN64
    561 	SLJIT_COMPILE_ASSERT(reg_map[SLJIT_R1] == 6 && reg_map[SLJIT_R0] < 8 && reg_map[SLJIT_R2] < 8, args_registers);
    562 
    563 	inst = (sljit_ub*)ensure_buf(compiler, 1 + ((type < SLJIT_CALL3) ? 3 : 6));
    564 	FAIL_IF(!inst);
    565 	INC_SIZE((type < SLJIT_CALL3) ? 3 : 6);
    566 	if (type >= SLJIT_CALL3) {
    567 		*inst++ = REX_W;
    568 		*inst++ = MOV_r_rm;
    569 		*inst++ = MOD_REG | (0x2 /* rdx */ << 3) | reg_lmap[SLJIT_R2];
    570 	}
    571 	*inst++ = REX_W;
    572 	*inst++ = MOV_r_rm;
    573 	*inst++ = MOD_REG | (0x7 /* rdi */ << 3) | reg_lmap[SLJIT_R0];
    574 #else
    575 	SLJIT_COMPILE_ASSERT(reg_map[SLJIT_R1] == 2 && reg_map[SLJIT_R0] < 8 && reg_map[SLJIT_R2] < 8, args_registers);
    576 
    577 	inst = (sljit_ub*)ensure_buf(compiler, 1 + ((type < SLJIT_CALL3) ? 3 : 6));
    578 	FAIL_IF(!inst);
    579 	INC_SIZE((type < SLJIT_CALL3) ? 3 : 6);
    580 	if (type >= SLJIT_CALL3) {
    581 		*inst++ = REX_W | REX_R;
    582 		*inst++ = MOV_r_rm;
    583 		*inst++ = MOD_REG | (0x0 /* r8 */ << 3) | reg_lmap[SLJIT_R2];
    584 	}
    585 	*inst++ = REX_W;
    586 	*inst++ = MOV_r_rm;
    587 	*inst++ = MOD_REG | (0x1 /* rcx */ << 3) | reg_lmap[SLJIT_R0];
    588 #endif
    589 	return SLJIT_SUCCESS;
    590 }
    591 
    592 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw)
    593 {
    594 	sljit_ub *inst;
    595 
    596 	CHECK_ERROR();
    597 	CHECK(check_sljit_emit_fast_enter(compiler, dst, dstw));
    598 	ADJUST_LOCAL_OFFSET(dst, dstw);
    599 
    600 	/* For UNUSED dst. Uncommon, but possible. */
    601 	if (dst == SLJIT_UNUSED)
    602 		dst = TMP_REG1;
    603 
    604 	if (FAST_IS_REG(dst)) {
    605 		if (reg_map[dst] < 8) {
    606 			inst = (sljit_ub*)ensure_buf(compiler, 1 + 1);
    607 			FAIL_IF(!inst);
    608 			INC_SIZE(1);
    609 			POP_REG(reg_lmap[dst]);
    610 			return SLJIT_SUCCESS;
    611 		}
    612 
    613 		inst = (sljit_ub*)ensure_buf(compiler, 1 + 2);
    614 		FAIL_IF(!inst);
    615 		INC_SIZE(2);
    616 		*inst++ = REX_B;
    617 		POP_REG(reg_lmap[dst]);
    618 		return SLJIT_SUCCESS;
    619 	}
    620 
    621 	/* REX_W is not necessary (src is not immediate). */
    622 	compiler->mode32 = 1;
    623 	inst = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw);
    624 	FAIL_IF(!inst);
    625 	*inst++ = POP_rm;
    626 	return SLJIT_SUCCESS;
    627 }
    628 
    629 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_return(struct sljit_compiler *compiler, sljit_si src, sljit_sw srcw)
    630 {
    631 	sljit_ub *inst;
    632 
    633 	CHECK_ERROR();
    634 	CHECK(check_sljit_emit_fast_return(compiler, src, srcw));
    635 	ADJUST_LOCAL_OFFSET(src, srcw);
    636 
    637 	if ((src & SLJIT_IMM) && NOT_HALFWORD(srcw)) {
    638 		FAIL_IF(emit_load_imm64(compiler, TMP_REG1, srcw));
    639 		src = TMP_REG1;
    640 	}
    641 
    642 	if (FAST_IS_REG(src)) {
    643 		if (reg_map[src] < 8) {
    644 			inst = (sljit_ub*)ensure_buf(compiler, 1 + 1 + 1);
    645 			FAIL_IF(!inst);
    646 
    647 			INC_SIZE(1 + 1);
    648 			PUSH_REG(reg_lmap[src]);
    649 		}
    650 		else {
    651 			inst = (sljit_ub*)ensure_buf(compiler, 1 + 2 + 1);
    652 			FAIL_IF(!inst);
    653 
    654 			INC_SIZE(2 + 1);
    655 			*inst++ = REX_B;
    656 			PUSH_REG(reg_lmap[src]);
    657 		}
    658 	}
    659 	else if (src & SLJIT_MEM) {
    660 		/* REX_W is not necessary (src is not immediate). */
    661 		compiler->mode32 = 1;
    662 		inst = emit_x86_instruction(compiler, 1, 0, 0, src, srcw);
    663 		FAIL_IF(!inst);
    664 		*inst++ = GROUP_FF;
    665 		*inst |= PUSH_rm;
    666 
    667 		inst = (sljit_ub*)ensure_buf(compiler, 1 + 1);
    668 		FAIL_IF(!inst);
    669 		INC_SIZE(1);
    670 	}
    671 	else {
    672 		SLJIT_ASSERT(IS_HALFWORD(srcw));
    673 		/* SLJIT_IMM. */
    674 		inst = (sljit_ub*)ensure_buf(compiler, 1 + 5 + 1);
    675 		FAIL_IF(!inst);
    676 
    677 		INC_SIZE(5 + 1);
    678 		*inst++ = PUSH_i32;
    679 		*(sljit_si*)inst = srcw;
    680 		inst += sizeof(sljit_si);
    681 	}
    682 
    683 	RET();
    684 	return SLJIT_SUCCESS;
    685 }
    686 
    687 
    688 /* --------------------------------------------------------------------- */
    689 /*  Extend input                                                         */
    690 /* --------------------------------------------------------------------- */
    691 
    692 static sljit_si emit_mov_int(struct sljit_compiler *compiler, sljit_si sign,
    693 	sljit_si dst, sljit_sw dstw,
    694 	sljit_si src, sljit_sw srcw)
    695 {
    696 	sljit_ub* inst;
    697 	sljit_si dst_r;
    698 
    699 	compiler->mode32 = 0;
    700 
    701 	if (dst == SLJIT_UNUSED && !(src & SLJIT_MEM))
    702 		return SLJIT_SUCCESS; /* Empty instruction. */
    703 
    704 	if (src & SLJIT_IMM) {
    705 		if (FAST_IS_REG(dst)) {
    706 			if (sign || ((sljit_uw)srcw <= 0x7fffffff)) {
    707 				inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, (sljit_sw)(sljit_si)srcw, dst, dstw);
    708 				FAIL_IF(!inst);
    709 				*inst = MOV_rm_i32;
    710 				return SLJIT_SUCCESS;
    711 			}
    712 			return emit_load_imm64(compiler, dst, srcw);
    713 		}
    714 		compiler->mode32 = 1;
    715 		inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, (sljit_sw)(sljit_si)srcw, dst, dstw);
    716 		FAIL_IF(!inst);
    717 		*inst = MOV_rm_i32;
    718 		compiler->mode32 = 0;
    719 		return SLJIT_SUCCESS;
    720 	}
    721 
    722 	dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
    723 
    724 	if ((dst & SLJIT_MEM) && FAST_IS_REG(src))
    725 		dst_r = src;
    726 	else {
    727 		if (sign) {
    728 			inst = emit_x86_instruction(compiler, 1, dst_r, 0, src, srcw);
    729 			FAIL_IF(!inst);
    730 			*inst++ = MOVSXD_r_rm;
    731 		} else {
    732 			compiler->mode32 = 1;
    733 			FAIL_IF(emit_mov(compiler, dst_r, 0, src, srcw));
    734 			compiler->mode32 = 0;
    735 		}
    736 	}
    737 
    738 	if (dst & SLJIT_MEM) {
    739 		compiler->mode32 = 1;
    740 		inst = emit_x86_instruction(compiler, 1, dst_r, 0, dst, dstw);
    741 		FAIL_IF(!inst);
    742 		*inst = MOV_rm_r;
    743 		compiler->mode32 = 0;
    744 	}
    745 
    746 	return SLJIT_SUCCESS;
    747 }
    748