Home | History | Annotate | Download | only in sljit
      1 /*
      2  *    Stack-less Just-In-Time compiler
      3  *
      4  *    Copyright 2009-2012 Zoltan Herczeg (hzmester (at) freemail.hu). All rights reserved.
      5  *
      6  * Redistribution and use in source and binary forms, with or without modification, are
      7  * permitted provided that the following conditions are met:
      8  *
      9  *   1. Redistributions of source code must retain the above copyright notice, this list of
     10  *      conditions and the following disclaimer.
     11  *
     12  *   2. Redistributions in binary form must reproduce the above copyright notice, this list
     13  *      of conditions and the following disclaimer in the documentation and/or other materials
     14  *      provided with the distribution.
     15  *
     16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
     17  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
     18  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
     19  * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
     20  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
     21  * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
     22  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     23  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
     24  * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     25  */
     26 
     27 /* x86 32-bit arch dependent functions. */
     28 
     29 static sljit_s32 emit_do_imm(struct sljit_compiler *compiler, sljit_u8 opcode, sljit_sw imm)
     30 {
     31 	sljit_u8 *inst;
     32 
     33 	inst = (sljit_u8*)ensure_buf(compiler, 1 + 1 + sizeof(sljit_sw));
     34 	FAIL_IF(!inst);
     35 	INC_SIZE(1 + sizeof(sljit_sw));
     36 	*inst++ = opcode;
     37 	sljit_unaligned_store_sw(inst, imm);
     38 	return SLJIT_SUCCESS;
     39 }
     40 
     41 static sljit_u8* generate_far_jump_code(struct sljit_jump *jump, sljit_u8 *code_ptr, sljit_s32 type)
     42 {
     43 	if (type == SLJIT_JUMP) {
     44 		*code_ptr++ = JMP_i32;
     45 		jump->addr++;
     46 	}
     47 	else if (type >= SLJIT_FAST_CALL) {
     48 		*code_ptr++ = CALL_i32;
     49 		jump->addr++;
     50 	}
     51 	else {
     52 		*code_ptr++ = GROUP_0F;
     53 		*code_ptr++ = get_jump_code(type);
     54 		jump->addr += 2;
     55 	}
     56 
     57 	if (jump->flags & JUMP_LABEL)
     58 		jump->flags |= PATCH_MW;
     59 	else
     60 		sljit_unaligned_store_sw(code_ptr, jump->u.target - (jump->addr + 4));
     61 	code_ptr += 4;
     62 
     63 	return code_ptr;
     64 }
     65 
     66 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler,
     67 	sljit_s32 options, sljit_s32 args, sljit_s32 scratches, sljit_s32 saveds,
     68 	sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
     69 {
     70 	sljit_s32 size;
     71 	sljit_u8 *inst;
     72 
     73 	CHECK_ERROR();
     74 	CHECK(check_sljit_emit_enter(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size));
     75 	set_emit_enter(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size);
     76 
     77 	compiler->args = args;
     78 	compiler->flags_saved = 0;
     79 
     80 	size = 1 + (scratches > 7 ? (scratches - 7) : 0) + (saveds <= 3 ? saveds : 3);
     81 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
     82 	size += (args > 0 ? (args * 2) : 0) + (args > 2 ? 2 : 0);
     83 #else
     84 	size += (args > 0 ? (2 + args * 3) : 0);
     85 #endif
     86 	inst = (sljit_u8*)ensure_buf(compiler, 1 + size);
     87 	FAIL_IF(!inst);
     88 
     89 	INC_SIZE(size);
     90 	PUSH_REG(reg_map[TMP_REG1]);
     91 #if !(defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
     92 	if (args > 0) {
     93 		*inst++ = MOV_r_rm;
     94 		*inst++ = MOD_REG | (reg_map[TMP_REG1] << 3) | 0x4 /* esp */;
     95 	}
     96 #endif
     97 	if (saveds > 2 || scratches > 7)
     98 		PUSH_REG(reg_map[SLJIT_S2]);
     99 	if (saveds > 1 || scratches > 8)
    100 		PUSH_REG(reg_map[SLJIT_S1]);
    101 	if (saveds > 0 || scratches > 9)
    102 		PUSH_REG(reg_map[SLJIT_S0]);
    103 
    104 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
    105 	if (args > 0) {
    106 		*inst++ = MOV_r_rm;
    107 		*inst++ = MOD_REG | (reg_map[SLJIT_S0] << 3) | reg_map[SLJIT_R2];
    108 	}
    109 	if (args > 1) {
    110 		*inst++ = MOV_r_rm;
    111 		*inst++ = MOD_REG | (reg_map[SLJIT_S1] << 3) | reg_map[SLJIT_R1];
    112 	}
    113 	if (args > 2) {
    114 		*inst++ = MOV_r_rm;
    115 		*inst++ = MOD_DISP8 | (reg_map[SLJIT_S2] << 3) | 0x4 /* esp */;
    116 		*inst++ = 0x24;
    117 		*inst++ = sizeof(sljit_sw) * (3 + 2); /* saveds >= 3 as well. */
    118 	}
    119 #else
    120 	if (args > 0) {
    121 		*inst++ = MOV_r_rm;
    122 		*inst++ = MOD_DISP8 | (reg_map[SLJIT_S0] << 3) | reg_map[TMP_REG1];
    123 		*inst++ = sizeof(sljit_sw) * 2;
    124 	}
    125 	if (args > 1) {
    126 		*inst++ = MOV_r_rm;
    127 		*inst++ = MOD_DISP8 | (reg_map[SLJIT_S1] << 3) | reg_map[TMP_REG1];
    128 		*inst++ = sizeof(sljit_sw) * 3;
    129 	}
    130 	if (args > 2) {
    131 		*inst++ = MOV_r_rm;
    132 		*inst++ = MOD_DISP8 | (reg_map[SLJIT_S2] << 3) | reg_map[TMP_REG1];
    133 		*inst++ = sizeof(sljit_sw) * 4;
    134 	}
    135 #endif
    136 
    137 	SLJIT_COMPILE_ASSERT(SLJIT_LOCALS_OFFSET >= (2 + 4) * sizeof(sljit_uw), require_at_least_two_words);
    138 #if defined(__APPLE__)
    139 	/* Ignore pushed registers and SLJIT_LOCALS_OFFSET when computing the aligned local size. */
    140 	saveds = (2 + (scratches > 7 ? (scratches - 7) : 0) + (saveds <= 3 ? saveds : 3)) * sizeof(sljit_uw);
    141 	local_size = ((SLJIT_LOCALS_OFFSET + saveds + local_size + 15) & ~15) - saveds;
    142 #else
    143 	if (options & SLJIT_DOUBLE_ALIGNMENT) {
    144 		local_size = SLJIT_LOCALS_OFFSET + ((local_size + 7) & ~7);
    145 
    146 		inst = (sljit_u8*)ensure_buf(compiler, 1 + 17);
    147 		FAIL_IF(!inst);
    148 
    149 		INC_SIZE(17);
    150 		inst[0] = MOV_r_rm;
    151 		inst[1] = MOD_REG | (reg_map[TMP_REG1] << 3) | reg_map[SLJIT_SP];
    152 		inst[2] = GROUP_F7;
    153 		inst[3] = MOD_REG | (0 << 3) | reg_map[SLJIT_SP];
    154 		sljit_unaligned_store_sw(inst + 4, 0x4);
    155 		inst[8] = JNE_i8;
    156 		inst[9] = 6;
    157 		inst[10] = GROUP_BINARY_81;
    158 		inst[11] = MOD_REG | (5 << 3) | reg_map[SLJIT_SP];
    159 		sljit_unaligned_store_sw(inst + 12, 0x4);
    160 		inst[16] = PUSH_r + reg_map[TMP_REG1];
    161 	}
    162 	else
    163 		local_size = SLJIT_LOCALS_OFFSET + ((local_size + 3) & ~3);
    164 #endif
    165 
    166 	compiler->local_size = local_size;
    167 #ifdef _WIN32
    168 	if (local_size > 1024) {
    169 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
    170 		FAIL_IF(emit_do_imm(compiler, MOV_r_i32 + reg_map[SLJIT_R0], local_size));
    171 #else
    172 		local_size -= SLJIT_LOCALS_OFFSET;
    173 		FAIL_IF(emit_do_imm(compiler, MOV_r_i32 + reg_map[SLJIT_R0], local_size));
    174 		FAIL_IF(emit_non_cum_binary(compiler, SUB_r_rm, SUB_rm_r, SUB, SUB_EAX_i32,
    175 			SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, SLJIT_LOCALS_OFFSET));
    176 #endif
    177 		FAIL_IF(sljit_emit_ijump(compiler, SLJIT_CALL1, SLJIT_IMM, SLJIT_FUNC_OFFSET(sljit_grow_stack)));
    178 	}
    179 #endif
    180 
    181 	SLJIT_ASSERT(local_size > 0);
    182 	return emit_non_cum_binary(compiler, SUB_r_rm, SUB_rm_r, SUB, SUB_EAX_i32,
    183 		SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, local_size);
    184 }
    185 
    186 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *compiler,
    187 	sljit_s32 options, sljit_s32 args, sljit_s32 scratches, sljit_s32 saveds,
    188 	sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
    189 {
    190 	CHECK_ERROR();
    191 	CHECK(check_sljit_set_context(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size));
    192 	set_set_context(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size);
    193 
    194 	compiler->args = args;
    195 
    196 #if defined(__APPLE__)
    197 	saveds = (2 + (scratches > 7 ? (scratches - 7) : 0) + (saveds <= 3 ? saveds : 3)) * sizeof(sljit_uw);
    198 	compiler->local_size = ((SLJIT_LOCALS_OFFSET + saveds + local_size + 15) & ~15) - saveds;
    199 #else
    200 	if (options & SLJIT_DOUBLE_ALIGNMENT)
    201 		compiler->local_size = SLJIT_LOCALS_OFFSET + ((local_size + 7) & ~7);
    202 	else
    203 		compiler->local_size = SLJIT_LOCALS_OFFSET + ((local_size + 3) & ~3);
    204 #endif
    205 	return SLJIT_SUCCESS;
    206 }
    207 
    208 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 src, sljit_sw srcw)
    209 {
    210 	sljit_s32 size;
    211 	sljit_u8 *inst;
    212 
    213 	CHECK_ERROR();
    214 	CHECK(check_sljit_emit_return(compiler, op, src, srcw));
    215 	SLJIT_ASSERT(compiler->args >= 0);
    216 
    217 	compiler->flags_saved = 0;
    218 	FAIL_IF(emit_mov_before_return(compiler, op, src, srcw));
    219 
    220 	SLJIT_ASSERT(compiler->local_size > 0);
    221 	FAIL_IF(emit_cum_binary(compiler, ADD_r_rm, ADD_rm_r, ADD, ADD_EAX_i32,
    222 		SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, compiler->local_size));
    223 
    224 #if !defined(__APPLE__)
    225 	if (compiler->options & SLJIT_DOUBLE_ALIGNMENT) {
    226 		inst = (sljit_u8*)ensure_buf(compiler, 1 + 3);
    227 		FAIL_IF(!inst);
    228 
    229 		INC_SIZE(3);
    230 		inst[0] = MOV_r_rm;
    231 		inst[1] = (reg_map[SLJIT_SP] << 3) | 0x4 /* SIB */;
    232 		inst[2] = (4 << 3) | reg_map[SLJIT_SP];
    233 	}
    234 #endif
    235 
    236 	size = 2 + (compiler->scratches > 7 ? (compiler->scratches - 7) : 0) +
    237 		(compiler->saveds <= 3 ? compiler->saveds : 3);
    238 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
    239 	if (compiler->args > 2)
    240 		size += 2;
    241 #else
    242 	if (compiler->args > 0)
    243 		size += 2;
    244 #endif
    245 	inst = (sljit_u8*)ensure_buf(compiler, 1 + size);
    246 	FAIL_IF(!inst);
    247 
    248 	INC_SIZE(size);
    249 
    250 	if (compiler->saveds > 0 || compiler->scratches > 9)
    251 		POP_REG(reg_map[SLJIT_S0]);
    252 	if (compiler->saveds > 1 || compiler->scratches > 8)
    253 		POP_REG(reg_map[SLJIT_S1]);
    254 	if (compiler->saveds > 2 || compiler->scratches > 7)
    255 		POP_REG(reg_map[SLJIT_S2]);
    256 	POP_REG(reg_map[TMP_REG1]);
    257 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
    258 	if (compiler->args > 2)
    259 		RET_I16(sizeof(sljit_sw));
    260 	else
    261 		RET();
    262 #else
    263 	RET();
    264 #endif
    265 
    266 	return SLJIT_SUCCESS;
    267 }
    268 
    269 /* --------------------------------------------------------------------- */
    270 /*  Operators                                                            */
    271 /* --------------------------------------------------------------------- */
    272 
    273 /* Size contains the flags as well. */
    274 static sljit_u8* emit_x86_instruction(struct sljit_compiler *compiler, sljit_s32 size,
    275 	/* The register or immediate operand. */
    276 	sljit_s32 a, sljit_sw imma,
    277 	/* The general operand (not immediate). */
    278 	sljit_s32 b, sljit_sw immb)
    279 {
    280 	sljit_u8 *inst;
    281 	sljit_u8 *buf_ptr;
    282 	sljit_s32 flags = size & ~0xf;
    283 	sljit_s32 inst_size;
    284 
    285 	/* Both cannot be switched on. */
    286 	SLJIT_ASSERT((flags & (EX86_BIN_INS | EX86_SHIFT_INS)) != (EX86_BIN_INS | EX86_SHIFT_INS));
    287 	/* Size flags not allowed for typed instructions. */
    288 	SLJIT_ASSERT(!(flags & (EX86_BIN_INS | EX86_SHIFT_INS)) || (flags & (EX86_BYTE_ARG | EX86_HALF_ARG)) == 0);
    289 	/* Both size flags cannot be switched on. */
    290 	SLJIT_ASSERT((flags & (EX86_BYTE_ARG | EX86_HALF_ARG)) != (EX86_BYTE_ARG | EX86_HALF_ARG));
    291 	/* SSE2 and immediate is not possible. */
    292 	SLJIT_ASSERT(!(a & SLJIT_IMM) || !(flags & EX86_SSE2));
    293 	SLJIT_ASSERT((flags & (EX86_PREF_F2 | EX86_PREF_F3)) != (EX86_PREF_F2 | EX86_PREF_F3)
    294 		&& (flags & (EX86_PREF_F2 | EX86_PREF_66)) != (EX86_PREF_F2 | EX86_PREF_66)
    295 		&& (flags & (EX86_PREF_F3 | EX86_PREF_66)) != (EX86_PREF_F3 | EX86_PREF_66));
    296 
    297 	size &= 0xf;
    298 	inst_size = size;
    299 
    300 	if (flags & (EX86_PREF_F2 | EX86_PREF_F3))
    301 		inst_size++;
    302 	if (flags & EX86_PREF_66)
    303 		inst_size++;
    304 
    305 	/* Calculate size of b. */
    306 	inst_size += 1; /* mod r/m byte. */
    307 	if (b & SLJIT_MEM) {
    308 		if ((b & REG_MASK) == SLJIT_UNUSED)
    309 			inst_size += sizeof(sljit_sw);
    310 		else if (immb != 0 && !(b & OFFS_REG_MASK)) {
    311 			/* Immediate operand. */
    312 			if (immb <= 127 && immb >= -128)
    313 				inst_size += sizeof(sljit_s8);
    314 			else
    315 				inst_size += sizeof(sljit_sw);
    316 		}
    317 
    318 		if ((b & REG_MASK) == SLJIT_SP && !(b & OFFS_REG_MASK))
    319 			b |= TO_OFFS_REG(SLJIT_SP);
    320 
    321 		if ((b & OFFS_REG_MASK) != SLJIT_UNUSED)
    322 			inst_size += 1; /* SIB byte. */
    323 	}
    324 
    325 	/* Calculate size of a. */
    326 	if (a & SLJIT_IMM) {
    327 		if (flags & EX86_BIN_INS) {
    328 			if (imma <= 127 && imma >= -128) {
    329 				inst_size += 1;
    330 				flags |= EX86_BYTE_ARG;
    331 			} else
    332 				inst_size += 4;
    333 		}
    334 		else if (flags & EX86_SHIFT_INS) {
    335 			imma &= 0x1f;
    336 			if (imma != 1) {
    337 				inst_size ++;
    338 				flags |= EX86_BYTE_ARG;
    339 			}
    340 		} else if (flags & EX86_BYTE_ARG)
    341 			inst_size++;
    342 		else if (flags & EX86_HALF_ARG)
    343 			inst_size += sizeof(short);
    344 		else
    345 			inst_size += sizeof(sljit_sw);
    346 	}
    347 	else
    348 		SLJIT_ASSERT(!(flags & EX86_SHIFT_INS) || a == SLJIT_PREF_SHIFT_REG);
    349 
    350 	inst = (sljit_u8*)ensure_buf(compiler, 1 + inst_size);
    351 	PTR_FAIL_IF(!inst);
    352 
    353 	/* Encoding the byte. */
    354 	INC_SIZE(inst_size);
    355 	if (flags & EX86_PREF_F2)
    356 		*inst++ = 0xf2;
    357 	if (flags & EX86_PREF_F3)
    358 		*inst++ = 0xf3;
    359 	if (flags & EX86_PREF_66)
    360 		*inst++ = 0x66;
    361 
    362 	buf_ptr = inst + size;
    363 
    364 	/* Encode mod/rm byte. */
    365 	if (!(flags & EX86_SHIFT_INS)) {
    366 		if ((flags & EX86_BIN_INS) && (a & SLJIT_IMM))
    367 			*inst = (flags & EX86_BYTE_ARG) ? GROUP_BINARY_83 : GROUP_BINARY_81;
    368 
    369 		if ((a & SLJIT_IMM) || (a == 0))
    370 			*buf_ptr = 0;
    371 		else if (!(flags & EX86_SSE2_OP1))
    372 			*buf_ptr = reg_map[a] << 3;
    373 		else
    374 			*buf_ptr = a << 3;
    375 	}
    376 	else {
    377 		if (a & SLJIT_IMM) {
    378 			if (imma == 1)
    379 				*inst = GROUP_SHIFT_1;
    380 			else
    381 				*inst = GROUP_SHIFT_N;
    382 		} else
    383 			*inst = GROUP_SHIFT_CL;
    384 		*buf_ptr = 0;
    385 	}
    386 
    387 	if (!(b & SLJIT_MEM))
    388 		*buf_ptr++ |= MOD_REG + ((!(flags & EX86_SSE2_OP2)) ? reg_map[b] : b);
    389 	else if ((b & REG_MASK) != SLJIT_UNUSED) {
    390 		if ((b & OFFS_REG_MASK) == SLJIT_UNUSED || (b & OFFS_REG_MASK) == TO_OFFS_REG(SLJIT_SP)) {
    391 			if (immb != 0) {
    392 				if (immb <= 127 && immb >= -128)
    393 					*buf_ptr |= 0x40;
    394 				else
    395 					*buf_ptr |= 0x80;
    396 			}
    397 
    398 			if ((b & OFFS_REG_MASK) == SLJIT_UNUSED)
    399 				*buf_ptr++ |= reg_map[b & REG_MASK];
    400 			else {
    401 				*buf_ptr++ |= 0x04;
    402 				*buf_ptr++ = reg_map[b & REG_MASK] | (reg_map[OFFS_REG(b)] << 3);
    403 			}
    404 
    405 			if (immb != 0) {
    406 				if (immb <= 127 && immb >= -128)
    407 					*buf_ptr++ = immb; /* 8 bit displacement. */
    408 				else {
    409 					sljit_unaligned_store_sw(buf_ptr, immb); /* 32 bit displacement. */
    410 					buf_ptr += sizeof(sljit_sw);
    411 				}
    412 			}
    413 		}
    414 		else {
    415 			*buf_ptr++ |= 0x04;
    416 			*buf_ptr++ = reg_map[b & REG_MASK] | (reg_map[OFFS_REG(b)] << 3) | (immb << 6);
    417 		}
    418 	}
    419 	else {
    420 		*buf_ptr++ |= 0x05;
    421 		sljit_unaligned_store_sw(buf_ptr, immb); /* 32 bit displacement. */
    422 		buf_ptr += sizeof(sljit_sw);
    423 	}
    424 
    425 	if (a & SLJIT_IMM) {
    426 		if (flags & EX86_BYTE_ARG)
    427 			*buf_ptr = imma;
    428 		else if (flags & EX86_HALF_ARG)
    429 			sljit_unaligned_store_s16(buf_ptr, imma);
    430 		else if (!(flags & EX86_SHIFT_INS))
    431 			sljit_unaligned_store_sw(buf_ptr, imma);
    432 	}
    433 
    434 	return !(flags & EX86_SHIFT_INS) ? inst : (inst + 1);
    435 }
    436 
    437 /* --------------------------------------------------------------------- */
    438 /*  Call / return instructions                                           */
    439 /* --------------------------------------------------------------------- */
    440 
    441 static SLJIT_INLINE sljit_s32 call_with_args(struct sljit_compiler *compiler, sljit_s32 type)
    442 {
    443 	sljit_u8 *inst;
    444 
    445 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
    446 	inst = (sljit_u8*)ensure_buf(compiler, type >= SLJIT_CALL3 ? 1 + 2 + 1 : 1 + 2);
    447 	FAIL_IF(!inst);
    448 	INC_SIZE(type >= SLJIT_CALL3 ? 2 + 1 : 2);
    449 
    450 	if (type >= SLJIT_CALL3)
    451 		PUSH_REG(reg_map[SLJIT_R2]);
    452 	*inst++ = MOV_r_rm;
    453 	*inst++ = MOD_REG | (reg_map[SLJIT_R2] << 3) | reg_map[SLJIT_R0];
    454 #else
    455 	inst = (sljit_u8*)ensure_buf(compiler, 1 + 4 * (type - SLJIT_CALL0));
    456 	FAIL_IF(!inst);
    457 	INC_SIZE(4 * (type - SLJIT_CALL0));
    458 
    459 	*inst++ = MOV_rm_r;
    460 	*inst++ = MOD_DISP8 | (reg_map[SLJIT_R0] << 3) | 0x4 /* SIB */;
    461 	*inst++ = (0x4 /* none*/ << 3) | reg_map[SLJIT_SP];
    462 	*inst++ = 0;
    463 	if (type >= SLJIT_CALL2) {
    464 		*inst++ = MOV_rm_r;
    465 		*inst++ = MOD_DISP8 | (reg_map[SLJIT_R1] << 3) | 0x4 /* SIB */;
    466 		*inst++ = (0x4 /* none*/ << 3) | reg_map[SLJIT_SP];
    467 		*inst++ = sizeof(sljit_sw);
    468 	}
    469 	if (type >= SLJIT_CALL3) {
    470 		*inst++ = MOV_rm_r;
    471 		*inst++ = MOD_DISP8 | (reg_map[SLJIT_R2] << 3) | 0x4 /* SIB */;
    472 		*inst++ = (0x4 /* none*/ << 3) | reg_map[SLJIT_SP];
    473 		*inst++ = 2 * sizeof(sljit_sw);
    474 	}
    475 #endif
    476 	return SLJIT_SUCCESS;
    477 }
    478 
    479 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw)
    480 {
    481 	sljit_u8 *inst;
    482 
    483 	CHECK_ERROR();
    484 	CHECK(check_sljit_emit_fast_enter(compiler, dst, dstw));
    485 	ADJUST_LOCAL_OFFSET(dst, dstw);
    486 
    487 	CHECK_EXTRA_REGS(dst, dstw, (void)0);
    488 
    489 	/* For UNUSED dst. Uncommon, but possible. */
    490 	if (dst == SLJIT_UNUSED)
    491 		dst = TMP_REG1;
    492 
    493 	if (FAST_IS_REG(dst)) {
    494 		/* Unused dest is possible here. */
    495 		inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
    496 		FAIL_IF(!inst);
    497 
    498 		INC_SIZE(1);
    499 		POP_REG(reg_map[dst]);
    500 		return SLJIT_SUCCESS;
    501 	}
    502 
    503 	/* Memory. */
    504 	inst = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw);
    505 	FAIL_IF(!inst);
    506 	*inst++ = POP_rm;
    507 	return SLJIT_SUCCESS;
    508 }
    509 
    510 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_return(struct sljit_compiler *compiler, sljit_s32 src, sljit_sw srcw)
    511 {
    512 	sljit_u8 *inst;
    513 
    514 	CHECK_ERROR();
    515 	CHECK(check_sljit_emit_fast_return(compiler, src, srcw));
    516 	ADJUST_LOCAL_OFFSET(src, srcw);
    517 
    518 	CHECK_EXTRA_REGS(src, srcw, (void)0);
    519 
    520 	if (FAST_IS_REG(src)) {
    521 		inst = (sljit_u8*)ensure_buf(compiler, 1 + 1 + 1);
    522 		FAIL_IF(!inst);
    523 
    524 		INC_SIZE(1 + 1);
    525 		PUSH_REG(reg_map[src]);
    526 	}
    527 	else if (src & SLJIT_MEM) {
    528 		inst = emit_x86_instruction(compiler, 1, 0, 0, src, srcw);
    529 		FAIL_IF(!inst);
    530 		*inst++ = GROUP_FF;
    531 		*inst |= PUSH_rm;
    532 
    533 		inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
    534 		FAIL_IF(!inst);
    535 		INC_SIZE(1);
    536 	}
    537 	else {
    538 		/* SLJIT_IMM. */
    539 		inst = (sljit_u8*)ensure_buf(compiler, 1 + 5 + 1);
    540 		FAIL_IF(!inst);
    541 
    542 		INC_SIZE(5 + 1);
    543 		*inst++ = PUSH_i32;
    544 		sljit_unaligned_store_sw(inst, srcw);
    545 		inst += sizeof(sljit_sw);
    546 	}
    547 
    548 	RET();
    549 	return SLJIT_SUCCESS;
    550 }
    551