Home | History | Annotate | Download | only in sljit
      1 /*
      2  *    Stack-less Just-In-Time compiler
      3  *
      4  *    Copyright 2009-2012 Zoltan Herczeg (hzmester (at) freemail.hu). All rights reserved.
      5  *
      6  * Redistribution and use in source and binary forms, with or without modification, are
      7  * permitted provided that the following conditions are met:
      8  *
      9  *   1. Redistributions of source code must retain the above copyright notice, this list of
     10  *      conditions and the following disclaimer.
     11  *
     12  *   2. Redistributions in binary form must reproduce the above copyright notice, this list
     13  *      of conditions and the following disclaimer in the documentation and/or other materials
     14  *      provided with the distribution.
     15  *
     16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
     17  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
     18  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
     19  * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
     20  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
     21  * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
     22  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     23  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
     24  * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     25  */
     26 
     27 SLJIT_API_FUNC_ATTRIBUTE const char* sljit_get_platform_name(void)
     28 {
     29 	return "PowerPC" SLJIT_CPUINFO;
     30 }
     31 
     32 /* Length of an instruction word.
     33    Both for ppc-32 and ppc-64. */
     34 typedef sljit_u32 sljit_ins;
     35 
     36 #if ((defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) && (defined _AIX)) \
     37 	|| (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
     38 #define SLJIT_PPC_STACK_FRAME_V2 1
     39 #endif
     40 
     41 #ifdef _AIX
     42 #include <sys/cache.h>
     43 #endif
     44 
     45 #if (defined SLJIT_LITTLE_ENDIAN && SLJIT_LITTLE_ENDIAN)
     46 #define SLJIT_PASS_ENTRY_ADDR_TO_CALL 1
     47 #endif
     48 
     49 #if (defined SLJIT_CACHE_FLUSH_OWN_IMPL && SLJIT_CACHE_FLUSH_OWN_IMPL)
     50 
     51 static void ppc_cache_flush(sljit_ins *from, sljit_ins *to)
     52 {
     53 #ifdef _AIX
     54 	_sync_cache_range((caddr_t)from, (int)((size_t)to - (size_t)from));
     55 #elif defined(__GNUC__) || (defined(__IBM_GCC_ASM) && __IBM_GCC_ASM)
     56 #	if defined(_ARCH_PWR) || defined(_ARCH_PWR2)
     57 	/* Cache flush for POWER architecture. */
     58 	while (from < to) {
     59 		__asm__ volatile (
     60 			"clf 0, %0\n"
     61 			"dcs\n"
     62 			: : "r"(from)
     63 		);
     64 		from++;
     65 	}
     66 	__asm__ volatile ( "ics" );
     67 #	elif defined(_ARCH_COM) && !defined(_ARCH_PPC)
     68 #	error "Cache flush is not implemented for PowerPC/POWER common mode."
     69 #	else
     70 	/* Cache flush for PowerPC architecture. */
     71 	while (from < to) {
     72 		__asm__ volatile (
     73 			"dcbf 0, %0\n"
     74 			"sync\n"
     75 			"icbi 0, %0\n"
     76 			: : "r"(from)
     77 		);
     78 		from++;
     79 	}
     80 	__asm__ volatile ( "isync" );
     81 #	endif
     82 #	ifdef __xlc__
     83 #	warning "This file may fail to compile if -qfuncsect is used"
     84 #	endif
     85 #elif defined(__xlc__)
     86 #error "Please enable GCC syntax for inline assembly statements with -qasm=gcc"
     87 #else
     88 #error "This platform requires a cache flush implementation."
     89 #endif /* _AIX */
     90 }
     91 
     92 #endif /* (defined SLJIT_CACHE_FLUSH_OWN_IMPL && SLJIT_CACHE_FLUSH_OWN_IMPL) */
     93 
     94 #define TMP_REG1	(SLJIT_NUMBER_OF_REGISTERS + 2)
     95 #define TMP_REG2	(SLJIT_NUMBER_OF_REGISTERS + 3)
     96 #define TMP_REG3	(SLJIT_NUMBER_OF_REGISTERS + 4)
     97 #define TMP_ZERO	(SLJIT_NUMBER_OF_REGISTERS + 5)
     98 
     99 #if (defined SLJIT_PASS_ENTRY_ADDR_TO_CALL && SLJIT_PASS_ENTRY_ADDR_TO_CALL)
    100 #define TMP_CALL_REG	(SLJIT_NUMBER_OF_REGISTERS + 6)
    101 #else
    102 #define TMP_CALL_REG	TMP_REG2
    103 #endif
    104 
    105 #define TMP_FREG1	(0)
    106 #define TMP_FREG2	(SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1)
    107 
    108 static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 7] = {
    109 	0, 3, 4, 5, 6, 7, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 1, 8, 9, 10, 31, 12
    110 };
    111 
    112 /* --------------------------------------------------------------------- */
    113 /*  Instrucion forms                                                     */
    114 /* --------------------------------------------------------------------- */
    115 #define D(d)		(reg_map[d] << 21)
    116 #define S(s)		(reg_map[s] << 21)
    117 #define A(a)		(reg_map[a] << 16)
    118 #define B(b)		(reg_map[b] << 11)
    119 #define C(c)		(reg_map[c] << 6)
    120 #define FD(fd)		((fd) << 21)
    121 #define FS(fs)		((fs) << 21)
    122 #define FA(fa)		((fa) << 16)
    123 #define FB(fb)		((fb) << 11)
    124 #define FC(fc)		((fc) << 6)
    125 #define IMM(imm)	((imm) & 0xffff)
    126 #define CRD(d)		((d) << 21)
    127 
    128 /* Instruction bit sections.
    129    OE and Rc flag (see ALT_SET_FLAGS). */
    130 #define OERC(flags)	(((flags & ALT_SET_FLAGS) >> 10) | (flags & ALT_SET_FLAGS))
    131 /* Rc flag (see ALT_SET_FLAGS). */
    132 #define RC(flags)	((flags & ALT_SET_FLAGS) >> 10)
    133 #define HI(opcode)	((opcode) << 26)
    134 #define LO(opcode)	((opcode) << 1)
    135 
    136 #define ADD		(HI(31) | LO(266))
    137 #define ADDC		(HI(31) | LO(10))
    138 #define ADDE		(HI(31) | LO(138))
    139 #define ADDI		(HI(14))
    140 #define ADDIC		(HI(13))
    141 #define ADDIS		(HI(15))
    142 #define ADDME		(HI(31) | LO(234))
    143 #define AND		(HI(31) | LO(28))
    144 #define ANDI		(HI(28))
    145 #define ANDIS		(HI(29))
    146 #define Bx		(HI(18))
    147 #define BCx		(HI(16))
    148 #define BCCTR		(HI(19) | LO(528) | (3 << 11))
    149 #define BLR		(HI(19) | LO(16) | (0x14 << 21))
    150 #define CNTLZD		(HI(31) | LO(58))
    151 #define CNTLZW		(HI(31) | LO(26))
    152 #define CMP		(HI(31) | LO(0))
    153 #define CMPI		(HI(11))
    154 #define CMPL		(HI(31) | LO(32))
    155 #define CMPLI		(HI(10))
    156 #define CROR		(HI(19) | LO(449))
    157 #define DIVD		(HI(31) | LO(489))
    158 #define DIVDU		(HI(31) | LO(457))
    159 #define DIVW		(HI(31) | LO(491))
    160 #define DIVWU		(HI(31) | LO(459))
    161 #define EXTSB		(HI(31) | LO(954))
    162 #define EXTSH		(HI(31) | LO(922))
    163 #define EXTSW		(HI(31) | LO(986))
    164 #define FABS		(HI(63) | LO(264))
    165 #define FADD		(HI(63) | LO(21))
    166 #define FADDS		(HI(59) | LO(21))
    167 #define FCFID		(HI(63) | LO(846))
    168 #define FCMPU		(HI(63) | LO(0))
    169 #define FCTIDZ		(HI(63) | LO(815))
    170 #define FCTIWZ		(HI(63) | LO(15))
    171 #define FDIV		(HI(63) | LO(18))
    172 #define FDIVS		(HI(59) | LO(18))
    173 #define FMR		(HI(63) | LO(72))
    174 #define FMUL		(HI(63) | LO(25))
    175 #define FMULS		(HI(59) | LO(25))
    176 #define FNEG		(HI(63) | LO(40))
    177 #define FRSP		(HI(63) | LO(12))
    178 #define FSUB		(HI(63) | LO(20))
    179 #define FSUBS		(HI(59) | LO(20))
    180 #define LD		(HI(58) | 0)
    181 #define LWZ		(HI(32))
    182 #define MFCR		(HI(31) | LO(19))
    183 #define MFLR		(HI(31) | LO(339) | 0x80000)
    184 #define MFXER		(HI(31) | LO(339) | 0x10000)
    185 #define MTCTR		(HI(31) | LO(467) | 0x90000)
    186 #define MTLR		(HI(31) | LO(467) | 0x80000)
    187 #define MTXER		(HI(31) | LO(467) | 0x10000)
    188 #define MULHD		(HI(31) | LO(73))
    189 #define MULHDU		(HI(31) | LO(9))
    190 #define MULHW		(HI(31) | LO(75))
    191 #define MULHWU		(HI(31) | LO(11))
    192 #define MULLD		(HI(31) | LO(233))
    193 #define MULLI		(HI(7))
    194 #define MULLW		(HI(31) | LO(235))
    195 #define NEG		(HI(31) | LO(104))
    196 #define NOP		(HI(24))
    197 #define NOR		(HI(31) | LO(124))
    198 #define OR		(HI(31) | LO(444))
    199 #define ORI		(HI(24))
    200 #define ORIS		(HI(25))
    201 #define RLDICL		(HI(30))
    202 #define RLWINM		(HI(21))
    203 #define SLD		(HI(31) | LO(27))
    204 #define SLW		(HI(31) | LO(24))
    205 #define SRAD		(HI(31) | LO(794))
    206 #define SRADI		(HI(31) | LO(413 << 1))
    207 #define SRAW		(HI(31) | LO(792))
    208 #define SRAWI		(HI(31) | LO(824))
    209 #define SRD		(HI(31) | LO(539))
    210 #define SRW		(HI(31) | LO(536))
    211 #define STD		(HI(62) | 0)
    212 #define STDU		(HI(62) | 1)
    213 #define STDUX		(HI(31) | LO(181))
    214 #define STFIWX		(HI(31) | LO(983))
    215 #define STW		(HI(36))
    216 #define STWU		(HI(37))
    217 #define STWUX		(HI(31) | LO(183))
    218 #define SUBF		(HI(31) | LO(40))
    219 #define SUBFC		(HI(31) | LO(8))
    220 #define SUBFE		(HI(31) | LO(136))
    221 #define SUBFIC		(HI(8))
    222 #define XOR		(HI(31) | LO(316))
    223 #define XORI		(HI(26))
    224 #define XORIS		(HI(27))
    225 
    226 #define SIMM_MAX	(0x7fff)
    227 #define SIMM_MIN	(-0x8000)
    228 #define UIMM_MAX	(0xffff)
    229 
    230 #if (defined SLJIT_INDIRECT_CALL && SLJIT_INDIRECT_CALL)
    231 SLJIT_API_FUNC_ATTRIBUTE void sljit_set_function_context(void** func_ptr, struct sljit_function_context* context, sljit_sw addr, void* func)
    232 {
    233 	sljit_sw* ptrs;
    234 	if (func_ptr)
    235 		*func_ptr = (void*)context;
    236 	ptrs = (sljit_sw*)func;
    237 	context->addr = addr ? addr : ptrs[0];
    238 	context->r2 = ptrs[1];
    239 	context->r11 = ptrs[2];
    240 }
    241 #endif
    242 
    243 static sljit_s32 push_inst(struct sljit_compiler *compiler, sljit_ins ins)
    244 {
    245 	sljit_ins *ptr = (sljit_ins*)ensure_buf(compiler, sizeof(sljit_ins));
    246 	FAIL_IF(!ptr);
    247 	*ptr = ins;
    248 	compiler->size++;
    249 	return SLJIT_SUCCESS;
    250 }
    251 
    252 static SLJIT_INLINE sljit_s32 detect_jump_type(struct sljit_jump *jump, sljit_ins *code_ptr, sljit_ins *code)
    253 {
    254 	sljit_sw diff;
    255 	sljit_uw target_addr;
    256 	sljit_sw extra_jump_flags;
    257 
    258 #if (defined SLJIT_PASS_ENTRY_ADDR_TO_CALL && SLJIT_PASS_ENTRY_ADDR_TO_CALL) && (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
    259 	if (jump->flags & (SLJIT_REWRITABLE_JUMP | IS_CALL))
    260 		return 0;
    261 #else
    262 	if (jump->flags & SLJIT_REWRITABLE_JUMP)
    263 		return 0;
    264 #endif
    265 
    266 	if (jump->flags & JUMP_ADDR)
    267 		target_addr = jump->u.target;
    268 	else {
    269 		SLJIT_ASSERT(jump->flags & JUMP_LABEL);
    270 		target_addr = (sljit_uw)(code + jump->u.label->size);
    271 	}
    272 
    273 #if (defined SLJIT_PASS_ENTRY_ADDR_TO_CALL && SLJIT_PASS_ENTRY_ADDR_TO_CALL) && (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
    274 	if (jump->flags & IS_CALL)
    275 		goto keep_address;
    276 #endif
    277 
    278 	diff = ((sljit_sw)target_addr - (sljit_sw)(code_ptr)) & ~0x3l;
    279 
    280 	extra_jump_flags = 0;
    281 	if (jump->flags & IS_COND) {
    282 		if (diff <= 0x7fff && diff >= -0x8000) {
    283 			jump->flags |= PATCH_B;
    284 			return 1;
    285 		}
    286 		if (target_addr <= 0xffff) {
    287 			jump->flags |= PATCH_B | PATCH_ABS_B;
    288 			return 1;
    289 		}
    290 		extra_jump_flags = REMOVE_COND;
    291 
    292 		diff -= sizeof(sljit_ins);
    293 	}
    294 
    295 	if (diff <= 0x01ffffff && diff >= -0x02000000) {
    296 		jump->flags |= PATCH_B | extra_jump_flags;
    297 		return 1;
    298 	}
    299 	if (target_addr <= 0x03ffffff) {
    300 		jump->flags |= PATCH_B | PATCH_ABS_B | extra_jump_flags;
    301 		return 1;
    302 	}
    303 
    304 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
    305 #if (defined SLJIT_PASS_ENTRY_ADDR_TO_CALL && SLJIT_PASS_ENTRY_ADDR_TO_CALL)
    306 keep_address:
    307 #endif
    308 	if (target_addr <= 0x7fffffff) {
    309 		jump->flags |= PATCH_ABS32;
    310 		return 1;
    311 	}
    312 	if (target_addr <= 0x7fffffffffffl) {
    313 		jump->flags |= PATCH_ABS48;
    314 		return 1;
    315 	}
    316 #endif
    317 
    318 	return 0;
    319 }
    320 
    321 SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler)
    322 {
    323 	struct sljit_memory_fragment *buf;
    324 	sljit_ins *code;
    325 	sljit_ins *code_ptr;
    326 	sljit_ins *buf_ptr;
    327 	sljit_ins *buf_end;
    328 	sljit_uw word_count;
    329 	sljit_uw addr;
    330 
    331 	struct sljit_label *label;
    332 	struct sljit_jump *jump;
    333 	struct sljit_const *const_;
    334 
    335 	CHECK_ERROR_PTR();
    336 	CHECK_PTR(check_sljit_generate_code(compiler));
    337 	reverse_buf(compiler);
    338 
    339 #if (defined SLJIT_INDIRECT_CALL && SLJIT_INDIRECT_CALL)
    340 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
    341 	compiler->size += (compiler->size & 0x1) + (sizeof(struct sljit_function_context) / sizeof(sljit_ins));
    342 #else
    343 	compiler->size += (sizeof(struct sljit_function_context) / sizeof(sljit_ins));
    344 #endif
    345 #endif
    346 	code = (sljit_ins*)SLJIT_MALLOC_EXEC(compiler->size * sizeof(sljit_ins));
    347 	PTR_FAIL_WITH_EXEC_IF(code);
    348 	buf = compiler->buf;
    349 
    350 	code_ptr = code;
    351 	word_count = 0;
    352 	label = compiler->labels;
    353 	jump = compiler->jumps;
    354 	const_ = compiler->consts;
    355 	do {
    356 		buf_ptr = (sljit_ins*)buf->memory;
    357 		buf_end = buf_ptr + (buf->used_size >> 2);
    358 		do {
    359 			*code_ptr = *buf_ptr++;
    360 			SLJIT_ASSERT(!label || label->size >= word_count);
    361 			SLJIT_ASSERT(!jump || jump->addr >= word_count);
    362 			SLJIT_ASSERT(!const_ || const_->addr >= word_count);
    363 			/* These structures are ordered by their address. */
    364 			if (label && label->size == word_count) {
    365 				/* Just recording the address. */
    366 				label->addr = (sljit_uw)code_ptr;
    367 				label->size = code_ptr - code;
    368 				label = label->next;
    369 			}
    370 			if (jump && jump->addr == word_count) {
    371 #if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
    372 				jump->addr = (sljit_uw)(code_ptr - 3);
    373 #else
    374 				jump->addr = (sljit_uw)(code_ptr - 6);
    375 #endif
    376 				if (detect_jump_type(jump, code_ptr, code)) {
    377 #if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
    378 					code_ptr[-3] = code_ptr[0];
    379 					code_ptr -= 3;
    380 #else
    381 					if (jump->flags & PATCH_ABS32) {
    382 						code_ptr -= 3;
    383 						code_ptr[-1] = code_ptr[2];
    384 						code_ptr[0] = code_ptr[3];
    385 					}
    386 					else if (jump->flags & PATCH_ABS48) {
    387 						code_ptr--;
    388 						code_ptr[-1] = code_ptr[0];
    389 						code_ptr[0] = code_ptr[1];
    390 						/* rldicr rX,rX,32,31 -> rX,rX,16,47 */
    391 						SLJIT_ASSERT((code_ptr[-3] & 0xfc00ffff) == 0x780007c6);
    392 						code_ptr[-3] ^= 0x8422;
    393 						/* oris -> ori */
    394 						code_ptr[-2] ^= 0x4000000;
    395 					}
    396 					else {
    397 						code_ptr[-6] = code_ptr[0];
    398 						code_ptr -= 6;
    399 					}
    400 #endif
    401 					if (jump->flags & REMOVE_COND) {
    402 						code_ptr[0] = BCx | (2 << 2) | ((code_ptr[0] ^ (8 << 21)) & 0x03ff0001);
    403 						code_ptr++;
    404 						jump->addr += sizeof(sljit_ins);
    405 						code_ptr[0] = Bx;
    406 						jump->flags -= IS_COND;
    407 					}
    408 				}
    409 				jump = jump->next;
    410 			}
    411 			if (const_ && const_->addr == word_count) {
    412 				const_->addr = (sljit_uw)code_ptr;
    413 				const_ = const_->next;
    414 			}
    415 			code_ptr ++;
    416 			word_count ++;
    417 		} while (buf_ptr < buf_end);
    418 
    419 		buf = buf->next;
    420 	} while (buf);
    421 
    422 	if (label && label->size == word_count) {
    423 		label->addr = (sljit_uw)code_ptr;
    424 		label->size = code_ptr - code;
    425 		label = label->next;
    426 	}
    427 
    428 	SLJIT_ASSERT(!label);
    429 	SLJIT_ASSERT(!jump);
    430 	SLJIT_ASSERT(!const_);
    431 #if (defined SLJIT_INDIRECT_CALL && SLJIT_INDIRECT_CALL)
    432 	SLJIT_ASSERT(code_ptr - code <= (sljit_sw)compiler->size - (sizeof(struct sljit_function_context) / sizeof(sljit_ins)));
    433 #else
    434 	SLJIT_ASSERT(code_ptr - code <= (sljit_sw)compiler->size);
    435 #endif
    436 
    437 	jump = compiler->jumps;
    438 	while (jump) {
    439 		do {
    440 			addr = (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target;
    441 			buf_ptr = (sljit_ins*)jump->addr;
    442 			if (jump->flags & PATCH_B) {
    443 				if (jump->flags & IS_COND) {
    444 					if (!(jump->flags & PATCH_ABS_B)) {
    445 						addr = addr - jump->addr;
    446 						SLJIT_ASSERT((sljit_sw)addr <= 0x7fff && (sljit_sw)addr >= -0x8000);
    447 						*buf_ptr = BCx | (addr & 0xfffc) | ((*buf_ptr) & 0x03ff0001);
    448 					}
    449 					else {
    450 						SLJIT_ASSERT(addr <= 0xffff);
    451 						*buf_ptr = BCx | (addr & 0xfffc) | 0x2 | ((*buf_ptr) & 0x03ff0001);
    452 					}
    453 				}
    454 				else {
    455 					if (!(jump->flags & PATCH_ABS_B)) {
    456 						addr = addr - jump->addr;
    457 						SLJIT_ASSERT((sljit_sw)addr <= 0x01ffffff && (sljit_sw)addr >= -0x02000000);
    458 						*buf_ptr = Bx | (addr & 0x03fffffc) | ((*buf_ptr) & 0x1);
    459 					}
    460 					else {
    461 						SLJIT_ASSERT(addr <= 0x03ffffff);
    462 						*buf_ptr = Bx | (addr & 0x03fffffc) | 0x2 | ((*buf_ptr) & 0x1);
    463 					}
    464 				}
    465 				break;
    466 			}
    467 			/* Set the fields of immediate loads. */
    468 #if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
    469 			buf_ptr[0] = (buf_ptr[0] & 0xffff0000) | ((addr >> 16) & 0xffff);
    470 			buf_ptr[1] = (buf_ptr[1] & 0xffff0000) | (addr & 0xffff);
    471 #else
    472 			if (jump->flags & PATCH_ABS32) {
    473 				SLJIT_ASSERT(addr <= 0x7fffffff);
    474 				buf_ptr[0] = (buf_ptr[0] & 0xffff0000) | ((addr >> 16) & 0xffff);
    475 				buf_ptr[1] = (buf_ptr[1] & 0xffff0000) | (addr & 0xffff);
    476 				break;
    477 			}
    478 			if (jump->flags & PATCH_ABS48) {
    479 				SLJIT_ASSERT(addr <= 0x7fffffffffff);
    480 				buf_ptr[0] = (buf_ptr[0] & 0xffff0000) | ((addr >> 32) & 0xffff);
    481 				buf_ptr[1] = (buf_ptr[1] & 0xffff0000) | ((addr >> 16) & 0xffff);
    482 				buf_ptr[3] = (buf_ptr[3] & 0xffff0000) | (addr & 0xffff);
    483 				break;
    484 			}
    485 			buf_ptr[0] = (buf_ptr[0] & 0xffff0000) | ((addr >> 48) & 0xffff);
    486 			buf_ptr[1] = (buf_ptr[1] & 0xffff0000) | ((addr >> 32) & 0xffff);
    487 			buf_ptr[3] = (buf_ptr[3] & 0xffff0000) | ((addr >> 16) & 0xffff);
    488 			buf_ptr[4] = (buf_ptr[4] & 0xffff0000) | (addr & 0xffff);
    489 #endif
    490 		} while (0);
    491 		jump = jump->next;
    492 	}
    493 
    494 	compiler->error = SLJIT_ERR_COMPILED;
    495 	compiler->executable_size = (code_ptr - code) * sizeof(sljit_ins);
    496 	SLJIT_CACHE_FLUSH(code, code_ptr);
    497 
    498 #if (defined SLJIT_INDIRECT_CALL && SLJIT_INDIRECT_CALL)
    499 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
    500 	if (((sljit_sw)code_ptr) & 0x4)
    501 		code_ptr++;
    502 	sljit_set_function_context(NULL, (struct sljit_function_context*)code_ptr, (sljit_sw)code, (void*)sljit_generate_code);
    503 	return code_ptr;
    504 #else
    505 	sljit_set_function_context(NULL, (struct sljit_function_context*)code_ptr, (sljit_sw)code, (void*)sljit_generate_code);
    506 	return code_ptr;
    507 #endif
    508 #else
    509 	return code;
    510 #endif
    511 }
    512 
    513 /* --------------------------------------------------------------------- */
    514 /*  Entry, exit                                                          */
    515 /* --------------------------------------------------------------------- */
    516 
    517 /* inp_flags: */
    518 
    519 /* Creates an index in data_transfer_insts array. */
    520 #define LOAD_DATA	0x01
    521 #define INDEXED		0x02
    522 #define WRITE_BACK	0x04
    523 #define WORD_DATA	0x00
    524 #define BYTE_DATA	0x08
    525 #define HALF_DATA	0x10
    526 #define INT_DATA	0x18
    527 #define SIGNED_DATA	0x20
    528 /* Separates integer and floating point registers */
    529 #define GPR_REG		0x3f
    530 #define DOUBLE_DATA	0x40
    531 
    532 #define MEM_MASK	0x7f
    533 
    534 /* Other inp_flags. */
    535 
    536 #define ARG_TEST	0x000100
    537 /* Integer opertion and set flags -> requires exts on 64 bit systems. */
    538 #define ALT_SIGN_EXT	0x000200
    539 /* This flag affects the RC() and OERC() macros. */
    540 #define ALT_SET_FLAGS	0x000400
    541 #define ALT_KEEP_CACHE	0x000800
    542 #define ALT_FORM1	0x010000
    543 #define ALT_FORM2	0x020000
    544 #define ALT_FORM3	0x040000
    545 #define ALT_FORM4	0x080000
    546 #define ALT_FORM5	0x100000
    547 #define ALT_FORM6	0x200000
    548 
    549 /* Source and destination is register. */
    550 #define REG_DEST	0x000001
    551 #define REG1_SOURCE	0x000002
    552 #define REG2_SOURCE	0x000004
    553 /* getput_arg_fast returned true. */
    554 #define FAST_DEST	0x000008
    555 /* Multiple instructions are required. */
    556 #define SLOW_DEST	0x000010
    557 /*
    558 ALT_SIGN_EXT		0x000200
    559 ALT_SET_FLAGS		0x000400
    560 ALT_FORM1		0x010000
    561 ...
    562 ALT_FORM6		0x200000 */
    563 
    564 #if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
    565 #include "sljitNativePPC_32.c"
    566 #else
    567 #include "sljitNativePPC_64.c"
    568 #endif
    569 
    570 #if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
    571 #define STACK_STORE	STW
    572 #define STACK_LOAD	LWZ
    573 #else
    574 #define STACK_STORE	STD
    575 #define STACK_LOAD	LD
    576 #endif
    577 
    578 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler,
    579 	sljit_s32 options, sljit_s32 args, sljit_s32 scratches, sljit_s32 saveds,
    580 	sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
    581 {
    582 	sljit_s32 i, tmp, offs;
    583 
    584 	CHECK_ERROR();
    585 	CHECK(check_sljit_emit_enter(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size));
    586 	set_emit_enter(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size);
    587 
    588 	FAIL_IF(push_inst(compiler, MFLR | D(0)));
    589 	offs = -(sljit_s32)(sizeof(sljit_sw));
    590 	FAIL_IF(push_inst(compiler, STACK_STORE | S(TMP_ZERO) | A(SLJIT_SP) | IMM(offs)));
    591 
    592 	tmp = saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - saveds) : SLJIT_FIRST_SAVED_REG;
    593 	for (i = SLJIT_S0; i >= tmp; i--) {
    594 		offs -= (sljit_s32)(sizeof(sljit_sw));
    595 		FAIL_IF(push_inst(compiler, STACK_STORE | S(i) | A(SLJIT_SP) | IMM(offs)));
    596 	}
    597 
    598 	for (i = scratches; i >= SLJIT_FIRST_SAVED_REG; i--) {
    599 		offs -= (sljit_s32)(sizeof(sljit_sw));
    600 		FAIL_IF(push_inst(compiler, STACK_STORE | S(i) | A(SLJIT_SP) | IMM(offs)));
    601 	}
    602 
    603 	SLJIT_ASSERT(offs == -(sljit_s32)GET_SAVED_REGISTERS_SIZE(compiler->scratches, compiler->saveds, 1));
    604 
    605 #if (defined SLJIT_PPC_STACK_FRAME_V2 && SLJIT_PPC_STACK_FRAME_V2)
    606 	FAIL_IF(push_inst(compiler, STACK_STORE | S(0) | A(SLJIT_SP) | IMM(2 * sizeof(sljit_sw))));
    607 #else
    608 	FAIL_IF(push_inst(compiler, STACK_STORE | S(0) | A(SLJIT_SP) | IMM(sizeof(sljit_sw))));
    609 #endif
    610 
    611 	FAIL_IF(push_inst(compiler, ADDI | D(TMP_ZERO) | A(0) | 0));
    612 	if (args >= 1)
    613 		FAIL_IF(push_inst(compiler, OR | S(SLJIT_R0) | A(SLJIT_S0) | B(SLJIT_R0)));
    614 	if (args >= 2)
    615 		FAIL_IF(push_inst(compiler, OR | S(SLJIT_R1) | A(SLJIT_S1) | B(SLJIT_R1)));
    616 	if (args >= 3)
    617 		FAIL_IF(push_inst(compiler, OR | S(SLJIT_R2) | A(SLJIT_S2) | B(SLJIT_R2)));
    618 
    619 	local_size += GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1) + SLJIT_LOCALS_OFFSET;
    620 	local_size = (local_size + 15) & ~0xf;
    621 	compiler->local_size = local_size;
    622 
    623 #if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
    624 	if (local_size <= SIMM_MAX)
    625 		FAIL_IF(push_inst(compiler, STWU | S(SLJIT_SP) | A(SLJIT_SP) | IMM(-local_size)));
    626 	else {
    627 		FAIL_IF(load_immediate(compiler, 0, -local_size));
    628 		FAIL_IF(push_inst(compiler, STWUX | S(SLJIT_SP) | A(SLJIT_SP) | B(0)));
    629 	}
    630 #else
    631 	if (local_size <= SIMM_MAX)
    632 		FAIL_IF(push_inst(compiler, STDU | S(SLJIT_SP) | A(SLJIT_SP) | IMM(-local_size)));
    633 	else {
    634 		FAIL_IF(load_immediate(compiler, 0, -local_size));
    635 		FAIL_IF(push_inst(compiler, STDUX | S(SLJIT_SP) | A(SLJIT_SP) | B(0)));
    636 	}
    637 #endif
    638 
    639 	return SLJIT_SUCCESS;
    640 }
    641 
    642 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *compiler,
    643 	sljit_s32 options, sljit_s32 args, sljit_s32 scratches, sljit_s32 saveds,
    644 	sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
    645 {
    646 	CHECK_ERROR();
    647 	CHECK(check_sljit_set_context(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size));
    648 	set_set_context(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size);
    649 
    650 	local_size += GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1) + SLJIT_LOCALS_OFFSET;
    651 	compiler->local_size = (local_size + 15) & ~0xf;
    652 	return SLJIT_SUCCESS;
    653 }
    654 
    655 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 src, sljit_sw srcw)
    656 {
    657 	sljit_s32 i, tmp, offs;
    658 
    659 	CHECK_ERROR();
    660 	CHECK(check_sljit_emit_return(compiler, op, src, srcw));
    661 
    662 	FAIL_IF(emit_mov_before_return(compiler, op, src, srcw));
    663 
    664 	if (compiler->local_size <= SIMM_MAX)
    665 		FAIL_IF(push_inst(compiler, ADDI | D(SLJIT_SP) | A(SLJIT_SP) | IMM(compiler->local_size)));
    666 	else {
    667 		FAIL_IF(load_immediate(compiler, 0, compiler->local_size));
    668 		FAIL_IF(push_inst(compiler, ADD | D(SLJIT_SP) | A(SLJIT_SP) | B(0)));
    669 	}
    670 
    671 #if (defined SLJIT_PPC_STACK_FRAME_V2 && SLJIT_PPC_STACK_FRAME_V2)
    672 	FAIL_IF(push_inst(compiler, STACK_LOAD | D(0) | A(SLJIT_SP) | IMM(2 * sizeof(sljit_sw))));
    673 #else
    674 	FAIL_IF(push_inst(compiler, STACK_LOAD | D(0) | A(SLJIT_SP) | IMM(sizeof(sljit_sw))));
    675 #endif
    676 
    677 	offs = -(sljit_s32)GET_SAVED_REGISTERS_SIZE(compiler->scratches, compiler->saveds, 1);
    678 
    679 	tmp = compiler->scratches;
    680 	for (i = SLJIT_FIRST_SAVED_REG; i <= tmp; i++) {
    681 		FAIL_IF(push_inst(compiler, STACK_LOAD | D(i) | A(SLJIT_SP) | IMM(offs)));
    682 		offs += (sljit_s32)(sizeof(sljit_sw));
    683 	}
    684 
    685 	tmp = compiler->saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - compiler->saveds) : SLJIT_FIRST_SAVED_REG;
    686 	for (i = tmp; i <= SLJIT_S0; i++) {
    687 		FAIL_IF(push_inst(compiler, STACK_LOAD | D(i) | A(SLJIT_SP) | IMM(offs)));
    688 		offs += (sljit_s32)(sizeof(sljit_sw));
    689 	}
    690 
    691 	FAIL_IF(push_inst(compiler, STACK_LOAD | D(TMP_ZERO) | A(SLJIT_SP) | IMM(offs)));
    692 	SLJIT_ASSERT(offs == -(sljit_sw)(sizeof(sljit_sw)));
    693 
    694 	FAIL_IF(push_inst(compiler, MTLR | S(0)));
    695 	FAIL_IF(push_inst(compiler, BLR));
    696 
    697 	return SLJIT_SUCCESS;
    698 }
    699 
    700 #undef STACK_STORE
    701 #undef STACK_LOAD
    702 
    703 /* --------------------------------------------------------------------- */
    704 /*  Operators                                                            */
    705 /* --------------------------------------------------------------------- */
    706 
    707 /* i/x - immediate/indexed form
    708    n/w - no write-back / write-back (1 bit)
    709    s/l - store/load (1 bit)
    710    u/s - signed/unsigned (1 bit)
    711    w/b/h/i - word/byte/half/int allowed (2 bit)
    712    It contans 32 items, but not all are different. */
    713 
    714 /* 64 bit only: [reg+imm] must be aligned to 4 bytes. */
    715 #define INT_ALIGNED	0x10000
    716 /* 64-bit only: there is no lwau instruction. */
    717 #define UPDATE_REQ	0x20000
    718 
    719 #if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
    720 #define ARCH_32_64(a, b)	a
    721 #define INST_CODE_AND_DST(inst, flags, reg) \
    722 	((inst) | (((flags) & MEM_MASK) <= GPR_REG ? D(reg) : FD(reg)))
    723 #else
    724 #define ARCH_32_64(a, b)	b
    725 #define INST_CODE_AND_DST(inst, flags, reg) \
    726 	(((inst) & ~(INT_ALIGNED | UPDATE_REQ)) | (((flags) & MEM_MASK) <= GPR_REG ? D(reg) : FD(reg)))
    727 #endif
    728 
    729 static const sljit_ins data_transfer_insts[64 + 8] = {
    730 
    731 /* -------- Unsigned -------- */
    732 
    733 /* Word. */
    734 
    735 /* u w n i s */ ARCH_32_64(HI(36) /* stw */, HI(62) | INT_ALIGNED | 0x0 /* std */),
    736 /* u w n i l */ ARCH_32_64(HI(32) /* lwz */, HI(58) | INT_ALIGNED | 0x0 /* ld */),
    737 /* u w n x s */ ARCH_32_64(HI(31) | LO(151) /* stwx */, HI(31) | LO(149) /* stdx */),
    738 /* u w n x l */ ARCH_32_64(HI(31) | LO(23) /* lwzx */, HI(31) | LO(21) /* ldx */),
    739 
    740 /* u w w i s */ ARCH_32_64(HI(37) /* stwu */, HI(62) | INT_ALIGNED | 0x1 /* stdu */),
    741 /* u w w i l */ ARCH_32_64(HI(33) /* lwzu */, HI(58) | INT_ALIGNED | 0x1 /* ldu */),
    742 /* u w w x s */ ARCH_32_64(HI(31) | LO(183) /* stwux */, HI(31) | LO(181) /* stdux */),
    743 /* u w w x l */ ARCH_32_64(HI(31) | LO(55) /* lwzux */, HI(31) | LO(53) /* ldux */),
    744 
    745 /* Byte. */
    746 
    747 /* u b n i s */ HI(38) /* stb */,
    748 /* u b n i l */ HI(34) /* lbz */,
    749 /* u b n x s */ HI(31) | LO(215) /* stbx */,
    750 /* u b n x l */ HI(31) | LO(87) /* lbzx */,
    751 
    752 /* u b w i s */ HI(39) /* stbu */,
    753 /* u b w i l */ HI(35) /* lbzu */,
    754 /* u b w x s */ HI(31) | LO(247) /* stbux */,
    755 /* u b w x l */ HI(31) | LO(119) /* lbzux */,
    756 
    757 /* Half. */
    758 
    759 /* u h n i s */ HI(44) /* sth */,
    760 /* u h n i l */ HI(40) /* lhz */,
    761 /* u h n x s */ HI(31) | LO(407) /* sthx */,
    762 /* u h n x l */ HI(31) | LO(279) /* lhzx */,
    763 
    764 /* u h w i s */ HI(45) /* sthu */,
    765 /* u h w i l */ HI(41) /* lhzu */,
    766 /* u h w x s */ HI(31) | LO(439) /* sthux */,
    767 /* u h w x l */ HI(31) | LO(311) /* lhzux */,
    768 
    769 /* Int. */
    770 
    771 /* u i n i s */ HI(36) /* stw */,
    772 /* u i n i l */ HI(32) /* lwz */,
    773 /* u i n x s */ HI(31) | LO(151) /* stwx */,
    774 /* u i n x l */ HI(31) | LO(23) /* lwzx */,
    775 
    776 /* u i w i s */ HI(37) /* stwu */,
    777 /* u i w i l */ HI(33) /* lwzu */,
    778 /* u i w x s */ HI(31) | LO(183) /* stwux */,
    779 /* u i w x l */ HI(31) | LO(55) /* lwzux */,
    780 
    781 /* -------- Signed -------- */
    782 
    783 /* Word. */
    784 
    785 /* s w n i s */ ARCH_32_64(HI(36) /* stw */, HI(62) | INT_ALIGNED | 0x0 /* std */),
    786 /* s w n i l */ ARCH_32_64(HI(32) /* lwz */, HI(58) | INT_ALIGNED | 0x0 /* ld */),
    787 /* s w n x s */ ARCH_32_64(HI(31) | LO(151) /* stwx */, HI(31) | LO(149) /* stdx */),
    788 /* s w n x l */ ARCH_32_64(HI(31) | LO(23) /* lwzx */, HI(31) | LO(21) /* ldx */),
    789 
    790 /* s w w i s */ ARCH_32_64(HI(37) /* stwu */, HI(62) | INT_ALIGNED | 0x1 /* stdu */),
    791 /* s w w i l */ ARCH_32_64(HI(33) /* lwzu */, HI(58) | INT_ALIGNED | 0x1 /* ldu */),
    792 /* s w w x s */ ARCH_32_64(HI(31) | LO(183) /* stwux */, HI(31) | LO(181) /* stdux */),
    793 /* s w w x l */ ARCH_32_64(HI(31) | LO(55) /* lwzux */, HI(31) | LO(53) /* ldux */),
    794 
    795 /* Byte. */
    796 
    797 /* s b n i s */ HI(38) /* stb */,
    798 /* s b n i l */ HI(34) /* lbz */ /* EXTS_REQ */,
    799 /* s b n x s */ HI(31) | LO(215) /* stbx */,
    800 /* s b n x l */ HI(31) | LO(87) /* lbzx */ /* EXTS_REQ */,
    801 
    802 /* s b w i s */ HI(39) /* stbu */,
    803 /* s b w i l */ HI(35) /* lbzu */ /* EXTS_REQ */,
    804 /* s b w x s */ HI(31) | LO(247) /* stbux */,
    805 /* s b w x l */ HI(31) | LO(119) /* lbzux */ /* EXTS_REQ */,
    806 
    807 /* Half. */
    808 
    809 /* s h n i s */ HI(44) /* sth */,
    810 /* s h n i l */ HI(42) /* lha */,
    811 /* s h n x s */ HI(31) | LO(407) /* sthx */,
    812 /* s h n x l */ HI(31) | LO(343) /* lhax */,
    813 
    814 /* s h w i s */ HI(45) /* sthu */,
    815 /* s h w i l */ HI(43) /* lhau */,
    816 /* s h w x s */ HI(31) | LO(439) /* sthux */,
    817 /* s h w x l */ HI(31) | LO(375) /* lhaux */,
    818 
    819 /* Int. */
    820 
    821 /* s i n i s */ HI(36) /* stw */,
    822 /* s i n i l */ ARCH_32_64(HI(32) /* lwz */, HI(58) | INT_ALIGNED | 0x2 /* lwa */),
    823 /* s i n x s */ HI(31) | LO(151) /* stwx */,
    824 /* s i n x l */ ARCH_32_64(HI(31) | LO(23) /* lwzx */, HI(31) | LO(341) /* lwax */),
    825 
    826 /* s i w i s */ HI(37) /* stwu */,
    827 /* s i w i l */ ARCH_32_64(HI(33) /* lwzu */, HI(58) | INT_ALIGNED | UPDATE_REQ | 0x2 /* lwa */),
    828 /* s i w x s */ HI(31) | LO(183) /* stwux */,
    829 /* s i w x l */ ARCH_32_64(HI(31) | LO(55) /* lwzux */, HI(31) | LO(373) /* lwaux */),
    830 
    831 /* -------- Double -------- */
    832 
    833 /* d   n i s */ HI(54) /* stfd */,
    834 /* d   n i l */ HI(50) /* lfd */,
    835 /* d   n x s */ HI(31) | LO(727) /* stfdx */,
    836 /* d   n x l */ HI(31) | LO(599) /* lfdx */,
    837 
    838 /* s   n i s */ HI(52) /* stfs */,
    839 /* s   n i l */ HI(48) /* lfs */,
    840 /* s   n x s */ HI(31) | LO(663) /* stfsx */,
    841 /* s   n x l */ HI(31) | LO(535) /* lfsx */,
    842 
    843 };
    844 
    845 #undef ARCH_32_64
    846 
    847 /* Simple cases, (no caching is required). */
    848 static sljit_s32 getput_arg_fast(struct sljit_compiler *compiler, sljit_s32 inp_flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw)
    849 {
    850 	sljit_ins inst;
    851 
    852 	/* Should work when (arg & REG_MASK) == 0. */
    853 	SLJIT_COMPILE_ASSERT(A(0) == 0, a0_must_be_0);
    854 	SLJIT_ASSERT(arg & SLJIT_MEM);
    855 
    856 	if (arg & OFFS_REG_MASK) {
    857 		if (argw & 0x3)
    858 			return 0;
    859 		if (inp_flags & ARG_TEST)
    860 			return 1;
    861 
    862 		inst = data_transfer_insts[(inp_flags | INDEXED) & MEM_MASK];
    863 		SLJIT_ASSERT(!(inst & (INT_ALIGNED | UPDATE_REQ)));
    864 		FAIL_IF(push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(arg & REG_MASK) | B(OFFS_REG(arg))));
    865 		return -1;
    866 	}
    867 
    868 	if (SLJIT_UNLIKELY(!(arg & REG_MASK)))
    869 		inp_flags &= ~WRITE_BACK;
    870 
    871 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
    872 	inst = data_transfer_insts[inp_flags & MEM_MASK];
    873 	SLJIT_ASSERT((arg & REG_MASK) || !(inst & UPDATE_REQ));
    874 
    875 	if (argw > SIMM_MAX || argw < SIMM_MIN || ((inst & INT_ALIGNED) && (argw & 0x3)) || (inst & UPDATE_REQ))
    876 		return 0;
    877 	if (inp_flags & ARG_TEST)
    878 		return 1;
    879 #endif
    880 
    881 #if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
    882 	if (argw > SIMM_MAX || argw < SIMM_MIN)
    883 		return 0;
    884 	if (inp_flags & ARG_TEST)
    885 		return 1;
    886 
    887 	inst = data_transfer_insts[inp_flags & MEM_MASK];
    888 	SLJIT_ASSERT(!(inst & (INT_ALIGNED | UPDATE_REQ)));
    889 #endif
    890 
    891 	FAIL_IF(push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(arg & REG_MASK) | IMM(argw)));
    892 	return -1;
    893 }
    894 
    895 /* See getput_arg below.
    896    Note: can_cache is called only for binary operators. Those operator always
    897    uses word arguments without write back. */
    898 static sljit_s32 can_cache(sljit_s32 arg, sljit_sw argw, sljit_s32 next_arg, sljit_sw next_argw)
    899 {
    900 	sljit_sw high_short, next_high_short;
    901 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
    902 	sljit_sw diff;
    903 #endif
    904 
    905 	SLJIT_ASSERT((arg & SLJIT_MEM) && (next_arg & SLJIT_MEM));
    906 
    907 	if (arg & OFFS_REG_MASK)
    908 		return ((arg & OFFS_REG_MASK) == (next_arg & OFFS_REG_MASK) && (argw & 0x3) == (next_argw & 0x3));
    909 
    910 	if (next_arg & OFFS_REG_MASK)
    911 		return 0;
    912 
    913 #if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
    914 	high_short = (argw + ((argw & 0x8000) << 1)) & ~0xffff;
    915 	next_high_short = (next_argw + ((next_argw & 0x8000) << 1)) & ~0xffff;
    916 	return high_short == next_high_short;
    917 #else
    918 	if (argw <= 0x7fffffffl && argw >= -0x80000000l) {
    919 		high_short = (argw + ((argw & 0x8000) << 1)) & ~0xffff;
    920 		next_high_short = (next_argw + ((next_argw & 0x8000) << 1)) & ~0xffff;
    921 		if (high_short == next_high_short)
    922 			return 1;
    923 	}
    924 
    925 	diff = argw - next_argw;
    926 	if (!(arg & REG_MASK))
    927 		return diff <= SIMM_MAX && diff >= SIMM_MIN;
    928 
    929 	if (arg == next_arg && diff <= SIMM_MAX && diff >= SIMM_MIN)
    930 		return 1;
    931 
    932 	return 0;
    933 #endif
    934 }
    935 
    936 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
    937 #define ADJUST_CACHED_IMM(imm) \
    938 	if ((inst & INT_ALIGNED) && (imm & 0x3)) { \
    939 		/* Adjust cached value. Fortunately this is really a rare case */ \
    940 		compiler->cache_argw += imm & 0x3; \
    941 		FAIL_IF(push_inst(compiler, ADDI | D(TMP_REG3) | A(TMP_REG3) | (imm & 0x3))); \
    942 		imm &= ~0x3; \
    943 	}
    944 #endif
    945 
    946 /* Emit the necessary instructions. See can_cache above. */
    947 static sljit_s32 getput_arg(struct sljit_compiler *compiler, sljit_s32 inp_flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw, sljit_s32 next_arg, sljit_sw next_argw)
    948 {
    949 	sljit_s32 tmp_r;
    950 	sljit_ins inst;
    951 	sljit_sw high_short, next_high_short;
    952 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
    953 	sljit_sw diff;
    954 #endif
    955 
    956 	SLJIT_ASSERT(arg & SLJIT_MEM);
    957 
    958 	tmp_r = ((inp_flags & LOAD_DATA) && ((inp_flags) & MEM_MASK) <= GPR_REG) ? reg : TMP_REG1;
    959 	/* Special case for "mov reg, [reg, ... ]". */
    960 	if ((arg & REG_MASK) == tmp_r)
    961 		tmp_r = TMP_REG1;
    962 
    963 	if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) {
    964 		argw &= 0x3;
    965 		/* Otherwise getput_arg_fast would capture it. */
    966 		SLJIT_ASSERT(argw);
    967 
    968 		if ((SLJIT_MEM | (arg & OFFS_REG_MASK)) == compiler->cache_arg && argw == compiler->cache_argw)
    969 			tmp_r = TMP_REG3;
    970 		else {
    971 			if ((arg & OFFS_REG_MASK) == (next_arg & OFFS_REG_MASK) && argw == (next_argw & 0x3)) {
    972 				compiler->cache_arg = SLJIT_MEM | (arg & OFFS_REG_MASK);
    973 				compiler->cache_argw = argw;
    974 				tmp_r = TMP_REG3;
    975 			}
    976 #if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
    977 			FAIL_IF(push_inst(compiler, RLWINM | S(OFFS_REG(arg)) | A(tmp_r) | (argw << 11) | ((31 - argw) << 1)));
    978 #else
    979 			FAIL_IF(push_inst(compiler, RLDI(tmp_r, OFFS_REG(arg), argw, 63 - argw, 1)));
    980 #endif
    981 		}
    982 		inst = data_transfer_insts[(inp_flags | INDEXED) & MEM_MASK];
    983 		SLJIT_ASSERT(!(inst & (INT_ALIGNED | UPDATE_REQ)));
    984 		return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(arg & REG_MASK) | B(tmp_r));
    985 	}
    986 
    987 	if (SLJIT_UNLIKELY(!(arg & REG_MASK)))
    988 		inp_flags &= ~WRITE_BACK;
    989 
    990 	inst = data_transfer_insts[inp_flags & MEM_MASK];
    991 	SLJIT_ASSERT((arg & REG_MASK) || !(inst & UPDATE_REQ));
    992 
    993 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
    994 	if (argw <= 0x7fff7fffl && argw >= -0x80000000l
    995 			&& (!(inst & INT_ALIGNED) || !(argw & 0x3)) && !(inst & UPDATE_REQ)) {
    996 #endif
    997 
    998 		arg &= REG_MASK;
    999 		high_short = (sljit_s32)(argw + ((argw & 0x8000) << 1)) & ~0xffff;
   1000 		/* The getput_arg_fast should handle this otherwise. */
   1001 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
   1002 		SLJIT_ASSERT(high_short && high_short <= 0x7fffffffl && high_short >= -0x80000000l);
   1003 #else
   1004 		SLJIT_ASSERT(high_short && !(inst & (INT_ALIGNED | UPDATE_REQ)));
   1005 #endif
   1006 
   1007 		if (inp_flags & WRITE_BACK) {
   1008 			if (arg == reg) {
   1009 				FAIL_IF(push_inst(compiler, OR | S(reg) | A(tmp_r) | B(reg)));
   1010 				reg = tmp_r;
   1011 			}
   1012 			tmp_r = arg;
   1013 			FAIL_IF(push_inst(compiler, ADDIS | D(arg) | A(arg) | IMM(high_short >> 16)));
   1014 		}
   1015 		else if (compiler->cache_arg != (SLJIT_MEM | arg) || high_short != compiler->cache_argw) {
   1016 			if ((next_arg & SLJIT_MEM) && !(next_arg & OFFS_REG_MASK)) {
   1017 				next_high_short = (sljit_s32)(next_argw + ((next_argw & 0x8000) << 1)) & ~0xffff;
   1018 				if (high_short == next_high_short) {
   1019 					compiler->cache_arg = SLJIT_MEM | arg;
   1020 					compiler->cache_argw = high_short;
   1021 					tmp_r = TMP_REG3;
   1022 				}
   1023 			}
   1024 			FAIL_IF(push_inst(compiler, ADDIS | D(tmp_r) | A(arg & REG_MASK) | IMM(high_short >> 16)));
   1025 		}
   1026 		else
   1027 			tmp_r = TMP_REG3;
   1028 
   1029 		return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(tmp_r) | IMM(argw));
   1030 
   1031 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
   1032 	}
   1033 
   1034 	/* Everything else is PPC-64 only. */
   1035 	if (SLJIT_UNLIKELY(!(arg & REG_MASK))) {
   1036 		diff = argw - compiler->cache_argw;
   1037 		if ((compiler->cache_arg & SLJIT_IMM) && diff <= SIMM_MAX && diff >= SIMM_MIN) {
   1038 			ADJUST_CACHED_IMM(diff);
   1039 			return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(TMP_REG3) | IMM(diff));
   1040 		}
   1041 
   1042 		diff = argw - next_argw;
   1043 		if ((next_arg & SLJIT_MEM) && diff <= SIMM_MAX && diff >= SIMM_MIN) {
   1044 			SLJIT_ASSERT(inp_flags & LOAD_DATA);
   1045 
   1046 			compiler->cache_arg = SLJIT_IMM;
   1047 			compiler->cache_argw = argw;
   1048 			tmp_r = TMP_REG3;
   1049 		}
   1050 
   1051 		FAIL_IF(load_immediate(compiler, tmp_r, argw));
   1052 		return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(tmp_r));
   1053 	}
   1054 
   1055 	diff = argw - compiler->cache_argw;
   1056 	if (compiler->cache_arg == arg && diff <= SIMM_MAX && diff >= SIMM_MIN) {
   1057 		SLJIT_ASSERT(!(inp_flags & WRITE_BACK) && !(inst & UPDATE_REQ));
   1058 		ADJUST_CACHED_IMM(diff);
   1059 		return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(TMP_REG3) | IMM(diff));
   1060 	}
   1061 
   1062 	if ((compiler->cache_arg & SLJIT_IMM) && diff <= SIMM_MAX && diff >= SIMM_MIN) {
   1063 		inst = data_transfer_insts[(inp_flags | INDEXED) & MEM_MASK];
   1064 		SLJIT_ASSERT(!(inst & (INT_ALIGNED | UPDATE_REQ)));
   1065 		if (compiler->cache_argw != argw) {
   1066 			FAIL_IF(push_inst(compiler, ADDI | D(TMP_REG3) | A(TMP_REG3) | IMM(diff)));
   1067 			compiler->cache_argw = argw;
   1068 		}
   1069 		return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(arg & REG_MASK) | B(TMP_REG3));
   1070 	}
   1071 
   1072 	if (argw == next_argw && (next_arg & SLJIT_MEM)) {
   1073 		SLJIT_ASSERT(inp_flags & LOAD_DATA);
   1074 		FAIL_IF(load_immediate(compiler, TMP_REG3, argw));
   1075 
   1076 		compiler->cache_arg = SLJIT_IMM;
   1077 		compiler->cache_argw = argw;
   1078 
   1079 		inst = data_transfer_insts[(inp_flags | INDEXED) & MEM_MASK];
   1080 		SLJIT_ASSERT(!(inst & (INT_ALIGNED | UPDATE_REQ)));
   1081 		return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(arg & REG_MASK) | B(TMP_REG3));
   1082 	}
   1083 
   1084 	diff = argw - next_argw;
   1085 	if (arg == next_arg && !(inp_flags & WRITE_BACK) && diff <= SIMM_MAX && diff >= SIMM_MIN) {
   1086 		SLJIT_ASSERT(inp_flags & LOAD_DATA);
   1087 		FAIL_IF(load_immediate(compiler, TMP_REG3, argw));
   1088 		FAIL_IF(push_inst(compiler, ADD | D(TMP_REG3) | A(TMP_REG3) | B(arg & REG_MASK)));
   1089 
   1090 		compiler->cache_arg = arg;
   1091 		compiler->cache_argw = argw;
   1092 
   1093 		return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(TMP_REG3));
   1094 	}
   1095 
   1096 	if ((next_arg & SLJIT_MEM) && !(next_arg & OFFS_REG_MASK) && diff <= SIMM_MAX && diff >= SIMM_MIN) {
   1097 		SLJIT_ASSERT(inp_flags & LOAD_DATA);
   1098 		FAIL_IF(load_immediate(compiler, TMP_REG3, argw));
   1099 
   1100 		compiler->cache_arg = SLJIT_IMM;
   1101 		compiler->cache_argw = argw;
   1102 		tmp_r = TMP_REG3;
   1103 	}
   1104 	else
   1105 		FAIL_IF(load_immediate(compiler, tmp_r, argw));
   1106 
   1107 	/* Get the indexed version instead of the normal one. */
   1108 	inst = data_transfer_insts[(inp_flags | INDEXED) & MEM_MASK];
   1109 	SLJIT_ASSERT(!(inst & (INT_ALIGNED | UPDATE_REQ)));
   1110 	return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(arg & REG_MASK) | B(tmp_r));
   1111 #endif
   1112 }
   1113 
   1114 static SLJIT_INLINE sljit_s32 emit_op_mem2(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg1, sljit_sw arg1w, sljit_s32 arg2, sljit_sw arg2w)
   1115 {
   1116 	if (getput_arg_fast(compiler, flags, reg, arg1, arg1w))
   1117 		return compiler->error;
   1118 	return getput_arg(compiler, flags, reg, arg1, arg1w, arg2, arg2w);
   1119 }
   1120 
   1121 static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 input_flags,
   1122 	sljit_s32 dst, sljit_sw dstw,
   1123 	sljit_s32 src1, sljit_sw src1w,
   1124 	sljit_s32 src2, sljit_sw src2w)
   1125 {
   1126 	/* arg1 goes to TMP_REG1 or src reg
   1127 	   arg2 goes to TMP_REG2, imm or src reg
   1128 	   TMP_REG3 can be used for caching
   1129 	   result goes to TMP_REG2, so put result can use TMP_REG1 and TMP_REG3. */
   1130 	sljit_s32 dst_r;
   1131 	sljit_s32 src1_r;
   1132 	sljit_s32 src2_r;
   1133 	sljit_s32 sugg_src2_r = TMP_REG2;
   1134 	sljit_s32 flags = input_flags & (ALT_FORM1 | ALT_FORM2 | ALT_FORM3 | ALT_FORM4 | ALT_FORM5 | ALT_FORM6 | ALT_SIGN_EXT | ALT_SET_FLAGS);
   1135 
   1136 	if (!(input_flags & ALT_KEEP_CACHE)) {
   1137 		compiler->cache_arg = 0;
   1138 		compiler->cache_argw = 0;
   1139 	}
   1140 
   1141 	/* Destination check. */
   1142 	if (SLJIT_UNLIKELY(dst == SLJIT_UNUSED)) {
   1143 		if (op >= SLJIT_MOV && op <= SLJIT_MOVU_S32 && !(src2 & SLJIT_MEM))
   1144 			return SLJIT_SUCCESS;
   1145 		dst_r = TMP_REG2;
   1146 	}
   1147 	else if (FAST_IS_REG(dst)) {
   1148 		dst_r = dst;
   1149 		flags |= REG_DEST;
   1150 		if (op >= SLJIT_MOV && op <= SLJIT_MOVU_S32)
   1151 			sugg_src2_r = dst_r;
   1152 	}
   1153 	else {
   1154 		SLJIT_ASSERT(dst & SLJIT_MEM);
   1155 		if (getput_arg_fast(compiler, input_flags | ARG_TEST, TMP_REG2, dst, dstw)) {
   1156 			flags |= FAST_DEST;
   1157 			dst_r = TMP_REG2;
   1158 		}
   1159 		else {
   1160 			flags |= SLOW_DEST;
   1161 			dst_r = 0;
   1162 		}
   1163 	}
   1164 
   1165 	/* Source 1. */
   1166 	if (FAST_IS_REG(src1)) {
   1167 		src1_r = src1;
   1168 		flags |= REG1_SOURCE;
   1169 	}
   1170 	else if (src1 & SLJIT_IMM) {
   1171 		FAIL_IF(load_immediate(compiler, TMP_REG1, src1w));
   1172 		src1_r = TMP_REG1;
   1173 	}
   1174 	else if (getput_arg_fast(compiler, input_flags | LOAD_DATA, TMP_REG1, src1, src1w)) {
   1175 		FAIL_IF(compiler->error);
   1176 		src1_r = TMP_REG1;
   1177 	}
   1178 	else
   1179 		src1_r = 0;
   1180 
   1181 	/* Source 2. */
   1182 	if (FAST_IS_REG(src2)) {
   1183 		src2_r = src2;
   1184 		flags |= REG2_SOURCE;
   1185 		if (!(flags & REG_DEST) && op >= SLJIT_MOV && op <= SLJIT_MOVU_S32)
   1186 			dst_r = src2_r;
   1187 	}
   1188 	else if (src2 & SLJIT_IMM) {
   1189 		FAIL_IF(load_immediate(compiler, sugg_src2_r, src2w));
   1190 		src2_r = sugg_src2_r;
   1191 	}
   1192 	else if (getput_arg_fast(compiler, input_flags | LOAD_DATA, sugg_src2_r, src2, src2w)) {
   1193 		FAIL_IF(compiler->error);
   1194 		src2_r = sugg_src2_r;
   1195 	}
   1196 	else
   1197 		src2_r = 0;
   1198 
   1199 	/* src1_r, src2_r and dst_r can be zero (=unprocessed).
   1200 	   All arguments are complex addressing modes, and it is a binary operator. */
   1201 	if (src1_r == 0 && src2_r == 0 && dst_r == 0) {
   1202 		if (!can_cache(src1, src1w, src2, src2w) && can_cache(src1, src1w, dst, dstw)) {
   1203 			FAIL_IF(getput_arg(compiler, input_flags | LOAD_DATA, TMP_REG2, src2, src2w, src1, src1w));
   1204 			FAIL_IF(getput_arg(compiler, input_flags | LOAD_DATA, TMP_REG1, src1, src1w, dst, dstw));
   1205 		}
   1206 		else {
   1207 			FAIL_IF(getput_arg(compiler, input_flags | LOAD_DATA, TMP_REG1, src1, src1w, src2, src2w));
   1208 			FAIL_IF(getput_arg(compiler, input_flags | LOAD_DATA, TMP_REG2, src2, src2w, dst, dstw));
   1209 		}
   1210 		src1_r = TMP_REG1;
   1211 		src2_r = TMP_REG2;
   1212 	}
   1213 	else if (src1_r == 0 && src2_r == 0) {
   1214 		FAIL_IF(getput_arg(compiler, input_flags | LOAD_DATA, TMP_REG1, src1, src1w, src2, src2w));
   1215 		src1_r = TMP_REG1;
   1216 	}
   1217 	else if (src1_r == 0 && dst_r == 0) {
   1218 		FAIL_IF(getput_arg(compiler, input_flags | LOAD_DATA, TMP_REG1, src1, src1w, dst, dstw));
   1219 		src1_r = TMP_REG1;
   1220 	}
   1221 	else if (src2_r == 0 && dst_r == 0) {
   1222 		FAIL_IF(getput_arg(compiler, input_flags | LOAD_DATA, sugg_src2_r, src2, src2w, dst, dstw));
   1223 		src2_r = sugg_src2_r;
   1224 	}
   1225 
   1226 	if (dst_r == 0)
   1227 		dst_r = TMP_REG2;
   1228 
   1229 	if (src1_r == 0) {
   1230 		FAIL_IF(getput_arg(compiler, input_flags | LOAD_DATA, TMP_REG1, src1, src1w, 0, 0));
   1231 		src1_r = TMP_REG1;
   1232 	}
   1233 
   1234 	if (src2_r == 0) {
   1235 		FAIL_IF(getput_arg(compiler, input_flags | LOAD_DATA, sugg_src2_r, src2, src2w, 0, 0));
   1236 		src2_r = sugg_src2_r;
   1237 	}
   1238 
   1239 	FAIL_IF(emit_single_op(compiler, op, flags, dst_r, src1_r, src2_r));
   1240 
   1241 	if (flags & (FAST_DEST | SLOW_DEST)) {
   1242 		if (flags & FAST_DEST)
   1243 			FAIL_IF(getput_arg_fast(compiler, input_flags, dst_r, dst, dstw));
   1244 		else
   1245 			FAIL_IF(getput_arg(compiler, input_flags, dst_r, dst, dstw, 0, 0));
   1246 	}
   1247 	return SLJIT_SUCCESS;
   1248 }
   1249 
   1250 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compiler, sljit_s32 op)
   1251 {
   1252 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
   1253 	sljit_s32 int_op = op & SLJIT_I32_OP;
   1254 #endif
   1255 
   1256 	CHECK_ERROR();
   1257 	CHECK(check_sljit_emit_op0(compiler, op));
   1258 
   1259 	op = GET_OPCODE(op);
   1260 	switch (op) {
   1261 	case SLJIT_BREAKPOINT:
   1262 	case SLJIT_NOP:
   1263 		return push_inst(compiler, NOP);
   1264 	case SLJIT_LMUL_UW:
   1265 	case SLJIT_LMUL_SW:
   1266 		FAIL_IF(push_inst(compiler, OR | S(SLJIT_R0) | A(TMP_REG1) | B(SLJIT_R0)));
   1267 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
   1268 		FAIL_IF(push_inst(compiler, MULLD | D(SLJIT_R0) | A(TMP_REG1) | B(SLJIT_R1)));
   1269 		return push_inst(compiler, (op == SLJIT_LMUL_UW ? MULHDU : MULHD) | D(SLJIT_R1) | A(TMP_REG1) | B(SLJIT_R1));
   1270 #else
   1271 		FAIL_IF(push_inst(compiler, MULLW | D(SLJIT_R0) | A(TMP_REG1) | B(SLJIT_R1)));
   1272 		return push_inst(compiler, (op == SLJIT_LMUL_UW ? MULHWU : MULHW) | D(SLJIT_R1) | A(TMP_REG1) | B(SLJIT_R1));
   1273 #endif
   1274 	case SLJIT_DIVMOD_UW:
   1275 	case SLJIT_DIVMOD_SW:
   1276 		FAIL_IF(push_inst(compiler, OR | S(SLJIT_R0) | A(TMP_REG1) | B(SLJIT_R0)));
   1277 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
   1278 		FAIL_IF(push_inst(compiler, (int_op ? (op == SLJIT_DIVMOD_UW ? DIVWU : DIVW) : (op == SLJIT_DIVMOD_UW ? DIVDU : DIVD)) | D(SLJIT_R0) | A(SLJIT_R0) | B(SLJIT_R1)));
   1279 		FAIL_IF(push_inst(compiler, (int_op ? MULLW : MULLD) | D(SLJIT_R1) | A(SLJIT_R0) | B(SLJIT_R1)));
   1280 #else
   1281 		FAIL_IF(push_inst(compiler, (op == SLJIT_DIVMOD_UW ? DIVWU : DIVW) | D(SLJIT_R0) | A(SLJIT_R0) | B(SLJIT_R1)));
   1282 		FAIL_IF(push_inst(compiler, MULLW | D(SLJIT_R1) | A(SLJIT_R0) | B(SLJIT_R1)));
   1283 #endif
   1284 		return push_inst(compiler, SUBF | D(SLJIT_R1) | A(SLJIT_R1) | B(TMP_REG1));
   1285 	case SLJIT_DIV_UW:
   1286 	case SLJIT_DIV_SW:
   1287 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
   1288 		return push_inst(compiler, (int_op ? (op == SLJIT_DIV_UW ? DIVWU : DIVW) : (op == SLJIT_DIV_UW ? DIVDU : DIVD)) | D(SLJIT_R0) | A(SLJIT_R0) | B(SLJIT_R1));
   1289 #else
   1290 		return push_inst(compiler, (op == SLJIT_DIV_UW ? DIVWU : DIVW) | D(SLJIT_R0) | A(SLJIT_R0) | B(SLJIT_R1));
   1291 #endif
   1292 	}
   1293 
   1294 	return SLJIT_SUCCESS;
   1295 }
   1296 
   1297 #define EMIT_MOV(type, type_flags, type_cast) \
   1298 	emit_op(compiler, (src & SLJIT_IMM) ? SLJIT_MOV : type, flags | (type_flags), dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? type_cast srcw : srcw)
   1299 
   1300 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compiler, sljit_s32 op,
   1301 	sljit_s32 dst, sljit_sw dstw,
   1302 	sljit_s32 src, sljit_sw srcw)
   1303 {
   1304 	sljit_s32 flags = GET_FLAGS(op) ? ALT_SET_FLAGS : 0;
   1305 	sljit_s32 op_flags = GET_ALL_FLAGS(op);
   1306 
   1307 	CHECK_ERROR();
   1308 	CHECK(check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw));
   1309 	ADJUST_LOCAL_OFFSET(dst, dstw);
   1310 	ADJUST_LOCAL_OFFSET(src, srcw);
   1311 
   1312 	op = GET_OPCODE(op);
   1313 	if ((src & SLJIT_IMM) && srcw == 0)
   1314 		src = TMP_ZERO;
   1315 
   1316 	if (op_flags & SLJIT_SET_O)
   1317 		FAIL_IF(push_inst(compiler, MTXER | S(TMP_ZERO)));
   1318 
   1319 	if (op_flags & SLJIT_I32_OP) {
   1320 		if (op < SLJIT_NOT) {
   1321 			if (FAST_IS_REG(src) && src == dst) {
   1322 				if (!TYPE_CAST_NEEDED(op))
   1323 					return SLJIT_SUCCESS;
   1324 			}
   1325 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
   1326 			if (op == SLJIT_MOV_S32 && (src & SLJIT_MEM))
   1327 				op = SLJIT_MOV_U32;
   1328 			if (op == SLJIT_MOVU_S32 && (src & SLJIT_MEM))
   1329 				op = SLJIT_MOVU_U32;
   1330 			if (op == SLJIT_MOV_U32 && (src & SLJIT_IMM))
   1331 				op = SLJIT_MOV_S32;
   1332 			if (op == SLJIT_MOVU_U32 && (src & SLJIT_IMM))
   1333 				op = SLJIT_MOVU_S32;
   1334 #endif
   1335 		}
   1336 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
   1337 		else {
   1338 			/* Most operations expect sign extended arguments. */
   1339 			flags |= INT_DATA | SIGNED_DATA;
   1340 			if (src & SLJIT_IMM)
   1341 				srcw = (sljit_s32)srcw;
   1342 		}
   1343 #endif
   1344 	}
   1345 
   1346 	switch (op) {
   1347 	case SLJIT_MOV:
   1348 	case SLJIT_MOV_P:
   1349 #if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
   1350 	case SLJIT_MOV_U32:
   1351 	case SLJIT_MOV_S32:
   1352 #endif
   1353 		return emit_op(compiler, SLJIT_MOV, flags | WORD_DATA, dst, dstw, TMP_REG1, 0, src, srcw);
   1354 
   1355 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
   1356 	case SLJIT_MOV_U32:
   1357 		return EMIT_MOV(SLJIT_MOV_U32, INT_DATA, (sljit_u32));
   1358 
   1359 	case SLJIT_MOV_S32:
   1360 		return EMIT_MOV(SLJIT_MOV_S32, INT_DATA | SIGNED_DATA, (sljit_s32));
   1361 #endif
   1362 
   1363 	case SLJIT_MOV_U8:
   1364 		return EMIT_MOV(SLJIT_MOV_U8, BYTE_DATA, (sljit_u8));
   1365 
   1366 	case SLJIT_MOV_S8:
   1367 		return EMIT_MOV(SLJIT_MOV_S8, BYTE_DATA | SIGNED_DATA, (sljit_s8));
   1368 
   1369 	case SLJIT_MOV_U16:
   1370 		return EMIT_MOV(SLJIT_MOV_U16, HALF_DATA, (sljit_u16));
   1371 
   1372 	case SLJIT_MOV_S16:
   1373 		return EMIT_MOV(SLJIT_MOV_S16, HALF_DATA | SIGNED_DATA, (sljit_s16));
   1374 
   1375 	case SLJIT_MOVU:
   1376 	case SLJIT_MOVU_P:
   1377 #if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
   1378 	case SLJIT_MOVU_U32:
   1379 	case SLJIT_MOVU_S32:
   1380 #endif
   1381 		return emit_op(compiler, SLJIT_MOV, flags | WORD_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, srcw);
   1382 
   1383 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
   1384 	case SLJIT_MOVU_U32:
   1385 		return EMIT_MOV(SLJIT_MOV_U32, INT_DATA | WRITE_BACK, (sljit_u32));
   1386 
   1387 	case SLJIT_MOVU_S32:
   1388 		return EMIT_MOV(SLJIT_MOV_S32, INT_DATA | SIGNED_DATA | WRITE_BACK, (sljit_s32));
   1389 #endif
   1390 
   1391 	case SLJIT_MOVU_U8:
   1392 		return EMIT_MOV(SLJIT_MOV_U8, BYTE_DATA | WRITE_BACK, (sljit_u8));
   1393 
   1394 	case SLJIT_MOVU_S8:
   1395 		return EMIT_MOV(SLJIT_MOV_S8, BYTE_DATA | SIGNED_DATA | WRITE_BACK, (sljit_s8));
   1396 
   1397 	case SLJIT_MOVU_U16:
   1398 		return EMIT_MOV(SLJIT_MOV_U16, HALF_DATA | WRITE_BACK, (sljit_u16));
   1399 
   1400 	case SLJIT_MOVU_S16:
   1401 		return EMIT_MOV(SLJIT_MOV_S16, HALF_DATA | SIGNED_DATA | WRITE_BACK, (sljit_s16));
   1402 
   1403 	case SLJIT_NOT:
   1404 		return emit_op(compiler, SLJIT_NOT, flags, dst, dstw, TMP_REG1, 0, src, srcw);
   1405 
   1406 	case SLJIT_NEG:
   1407 		return emit_op(compiler, SLJIT_NEG, flags, dst, dstw, TMP_REG1, 0, src, srcw);
   1408 
   1409 	case SLJIT_CLZ:
   1410 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
   1411 		return emit_op(compiler, SLJIT_CLZ, flags | (!(op_flags & SLJIT_I32_OP) ? 0 : ALT_FORM1), dst, dstw, TMP_REG1, 0, src, srcw);
   1412 #else
   1413 		return emit_op(compiler, SLJIT_CLZ, flags, dst, dstw, TMP_REG1, 0, src, srcw);
   1414 #endif
   1415 	}
   1416 
   1417 	return SLJIT_SUCCESS;
   1418 }
   1419 
   1420 #undef EMIT_MOV
   1421 
   1422 #define TEST_SL_IMM(src, srcw) \
   1423 	(((src) & SLJIT_IMM) && (srcw) <= SIMM_MAX && (srcw) >= SIMM_MIN)
   1424 
   1425 #define TEST_UL_IMM(src, srcw) \
   1426 	(((src) & SLJIT_IMM) && !((srcw) & ~0xffff))
   1427 
   1428 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
   1429 #define TEST_SH_IMM(src, srcw) \
   1430 	(((src) & SLJIT_IMM) && !((srcw) & 0xffff) && (srcw) <= 0x7fffffffl && (srcw) >= -0x80000000l)
   1431 #else
   1432 #define TEST_SH_IMM(src, srcw) \
   1433 	(((src) & SLJIT_IMM) && !((srcw) & 0xffff))
   1434 #endif
   1435 
   1436 #define TEST_UH_IMM(src, srcw) \
   1437 	(((src) & SLJIT_IMM) && !((srcw) & ~0xffff0000))
   1438 
   1439 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
   1440 #define TEST_ADD_IMM(src, srcw) \
   1441 	(((src) & SLJIT_IMM) && (srcw) <= 0x7fff7fffl && (srcw) >= -0x80000000l)
   1442 #else
   1443 #define TEST_ADD_IMM(src, srcw) \
   1444 	((src) & SLJIT_IMM)
   1445 #endif
   1446 
   1447 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
   1448 #define TEST_UI_IMM(src, srcw) \
   1449 	(((src) & SLJIT_IMM) && !((srcw) & ~0xffffffff))
   1450 #else
   1451 #define TEST_UI_IMM(src, srcw) \
   1452 	((src) & SLJIT_IMM)
   1453 #endif
   1454 
   1455 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compiler, sljit_s32 op,
   1456 	sljit_s32 dst, sljit_sw dstw,
   1457 	sljit_s32 src1, sljit_sw src1w,
   1458 	sljit_s32 src2, sljit_sw src2w)
   1459 {
   1460 	sljit_s32 flags = GET_FLAGS(op) ? ALT_SET_FLAGS : 0;
   1461 
   1462 	CHECK_ERROR();
   1463 	CHECK(check_sljit_emit_op2(compiler, op, dst, dstw, src1, src1w, src2, src2w));
   1464 	ADJUST_LOCAL_OFFSET(dst, dstw);
   1465 	ADJUST_LOCAL_OFFSET(src1, src1w);
   1466 	ADJUST_LOCAL_OFFSET(src2, src2w);
   1467 
   1468 	if ((src1 & SLJIT_IMM) && src1w == 0)
   1469 		src1 = TMP_ZERO;
   1470 	if ((src2 & SLJIT_IMM) && src2w == 0)
   1471 		src2 = TMP_ZERO;
   1472 
   1473 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
   1474 	if (op & SLJIT_I32_OP) {
   1475 		/* Most operations expect sign extended arguments. */
   1476 		flags |= INT_DATA | SIGNED_DATA;
   1477 		if (src1 & SLJIT_IMM)
   1478 			src1w = (sljit_s32)(src1w);
   1479 		if (src2 & SLJIT_IMM)
   1480 			src2w = (sljit_s32)(src2w);
   1481 		if (GET_FLAGS(op))
   1482 			flags |= ALT_SIGN_EXT;
   1483 	}
   1484 #endif
   1485 	if (op & SLJIT_SET_O)
   1486 		FAIL_IF(push_inst(compiler, MTXER | S(TMP_ZERO)));
   1487 	if (src2 == TMP_REG2)
   1488 		flags |= ALT_KEEP_CACHE;
   1489 
   1490 	switch (GET_OPCODE(op)) {
   1491 	case SLJIT_ADD:
   1492 		if (!GET_FLAGS(op) && ((src1 | src2) & SLJIT_IMM)) {
   1493 			if (TEST_SL_IMM(src2, src2w)) {
   1494 				compiler->imm = src2w & 0xffff;
   1495 				return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM1, dst, dstw, src1, src1w, TMP_REG2, 0);
   1496 			}
   1497 			if (TEST_SL_IMM(src1, src1w)) {
   1498 				compiler->imm = src1w & 0xffff;
   1499 				return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM1, dst, dstw, src2, src2w, TMP_REG2, 0);
   1500 			}
   1501 			if (TEST_SH_IMM(src2, src2w)) {
   1502 				compiler->imm = (src2w >> 16) & 0xffff;
   1503 				return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM2, dst, dstw, src1, src1w, TMP_REG2, 0);
   1504 			}
   1505 			if (TEST_SH_IMM(src1, src1w)) {
   1506 				compiler->imm = (src1w >> 16) & 0xffff;
   1507 				return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM2, dst, dstw, src2, src2w, TMP_REG2, 0);
   1508 			}
   1509 			/* Range between -1 and -32768 is covered above. */
   1510 			if (TEST_ADD_IMM(src2, src2w)) {
   1511 				compiler->imm = src2w & 0xffffffff;
   1512 				return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM4, dst, dstw, src1, src1w, TMP_REG2, 0);
   1513 			}
   1514 			if (TEST_ADD_IMM(src1, src1w)) {
   1515 				compiler->imm = src1w & 0xffffffff;
   1516 				return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM4, dst, dstw, src2, src2w, TMP_REG2, 0);
   1517 			}
   1518 		}
   1519 		if (!(GET_FLAGS(op) & (SLJIT_SET_E | SLJIT_SET_O))) {
   1520 			if (TEST_SL_IMM(src2, src2w)) {
   1521 				compiler->imm = src2w & 0xffff;
   1522 				return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM3, dst, dstw, src1, src1w, TMP_REG2, 0);
   1523 			}
   1524 			if (TEST_SL_IMM(src1, src1w)) {
   1525 				compiler->imm = src1w & 0xffff;
   1526 				return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM3, dst, dstw, src2, src2w, TMP_REG2, 0);
   1527 			}
   1528 		}
   1529 		return emit_op(compiler, SLJIT_ADD, flags, dst, dstw, src1, src1w, src2, src2w);
   1530 
   1531 	case SLJIT_ADDC:
   1532 		return emit_op(compiler, SLJIT_ADDC, flags | (!(op & SLJIT_KEEP_FLAGS) ? 0 : ALT_FORM1), dst, dstw, src1, src1w, src2, src2w);
   1533 
   1534 	case SLJIT_SUB:
   1535 		if (!GET_FLAGS(op) && ((src1 | src2) & SLJIT_IMM)) {
   1536 			if (TEST_SL_IMM(src2, -src2w)) {
   1537 				compiler->imm = (-src2w) & 0xffff;
   1538 				return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM1, dst, dstw, src1, src1w, TMP_REG2, 0);
   1539 			}
   1540 			if (TEST_SL_IMM(src1, src1w)) {
   1541 				compiler->imm = src1w & 0xffff;
   1542 				return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM1, dst, dstw, src2, src2w, TMP_REG2, 0);
   1543 			}
   1544 			if (TEST_SH_IMM(src2, -src2w)) {
   1545 				compiler->imm = ((-src2w) >> 16) & 0xffff;
   1546 				return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM2, dst, dstw, src1, src1w, TMP_REG2, 0);
   1547 			}
   1548 			/* Range between -1 and -32768 is covered above. */
   1549 			if (TEST_ADD_IMM(src2, -src2w)) {
   1550 				compiler->imm = -src2w & 0xffffffff;
   1551 				return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM4, dst, dstw, src1, src1w, TMP_REG2, 0);
   1552 			}
   1553 		}
   1554 		if (dst == SLJIT_UNUSED && (op & (SLJIT_SET_E | SLJIT_SET_U | SLJIT_SET_S)) && !(op & (SLJIT_SET_O | SLJIT_SET_C))) {
   1555 			if (!(op & SLJIT_SET_U)) {
   1556 				/* We know ALT_SIGN_EXT is set if it is an SLJIT_I32_OP on 64 bit systems. */
   1557 				if (TEST_SL_IMM(src2, src2w)) {
   1558 					compiler->imm = src2w & 0xffff;
   1559 					return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM2, dst, dstw, src1, src1w, TMP_REG2, 0);
   1560 				}
   1561 				if (GET_FLAGS(op) == SLJIT_SET_E && TEST_SL_IMM(src1, src1w)) {
   1562 					compiler->imm = src1w & 0xffff;
   1563 					return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM2, dst, dstw, src2, src2w, TMP_REG2, 0);
   1564 				}
   1565 			}
   1566 			if (!(op & (SLJIT_SET_E | SLJIT_SET_S))) {
   1567 				/* We know ALT_SIGN_EXT is set if it is an SLJIT_I32_OP on 64 bit systems. */
   1568 				if (TEST_UL_IMM(src2, src2w)) {
   1569 					compiler->imm = src2w & 0xffff;
   1570 					return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM3, dst, dstw, src1, src1w, TMP_REG2, 0);
   1571 				}
   1572 				return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM4, dst, dstw, src1, src1w, src2, src2w);
   1573 			}
   1574 			if ((src2 & SLJIT_IMM) && src2w >= 0 && src2w <= 0x7fff) {
   1575 				compiler->imm = src2w;
   1576 				return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM2 | ALT_FORM3, dst, dstw, src1, src1w, TMP_REG2, 0);
   1577 			}
   1578 			return emit_op(compiler, SLJIT_SUB, flags | ((op & SLJIT_SET_U) ? ALT_FORM4 : 0) | ((op & (SLJIT_SET_E | SLJIT_SET_S)) ? ALT_FORM5 : 0), dst, dstw, src1, src1w, src2, src2w);
   1579 		}
   1580 		if (!(op & (SLJIT_SET_E | SLJIT_SET_U | SLJIT_SET_S | SLJIT_SET_O))) {
   1581 			if (TEST_SL_IMM(src2, -src2w)) {
   1582 				compiler->imm = (-src2w) & 0xffff;
   1583 				return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM3, dst, dstw, src1, src1w, TMP_REG2, 0);
   1584 			}
   1585 		}
   1586 		/* We know ALT_SIGN_EXT is set if it is an SLJIT_I32_OP on 64 bit systems. */
   1587 		return emit_op(compiler, SLJIT_SUB, flags | (!(op & SLJIT_SET_U) ? 0 : ALT_FORM6), dst, dstw, src1, src1w, src2, src2w);
   1588 
   1589 	case SLJIT_SUBC:
   1590 		return emit_op(compiler, SLJIT_SUBC, flags | (!(op & SLJIT_KEEP_FLAGS) ? 0 : ALT_FORM1), dst, dstw, src1, src1w, src2, src2w);
   1591 
   1592 	case SLJIT_MUL:
   1593 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
   1594 		if (op & SLJIT_I32_OP)
   1595 			flags |= ALT_FORM2;
   1596 #endif
   1597 		if (!GET_FLAGS(op)) {
   1598 			if (TEST_SL_IMM(src2, src2w)) {
   1599 				compiler->imm = src2w & 0xffff;
   1600 				return emit_op(compiler, SLJIT_MUL, flags | ALT_FORM1, dst, dstw, src1, src1w, TMP_REG2, 0);
   1601 			}
   1602 			if (TEST_SL_IMM(src1, src1w)) {
   1603 				compiler->imm = src1w & 0xffff;
   1604 				return emit_op(compiler, SLJIT_MUL, flags | ALT_FORM1, dst, dstw, src2, src2w, TMP_REG2, 0);
   1605 			}
   1606 		}
   1607 		return emit_op(compiler, SLJIT_MUL, flags, dst, dstw, src1, src1w, src2, src2w);
   1608 
   1609 	case SLJIT_AND:
   1610 	case SLJIT_OR:
   1611 	case SLJIT_XOR:
   1612 		/* Commutative unsigned operations. */
   1613 		if (!GET_FLAGS(op) || GET_OPCODE(op) == SLJIT_AND) {
   1614 			if (TEST_UL_IMM(src2, src2w)) {
   1615 				compiler->imm = src2w;
   1616 				return emit_op(compiler, GET_OPCODE(op), flags | ALT_FORM1, dst, dstw, src1, src1w, TMP_REG2, 0);
   1617 			}
   1618 			if (TEST_UL_IMM(src1, src1w)) {
   1619 				compiler->imm = src1w;
   1620 				return emit_op(compiler, GET_OPCODE(op), flags | ALT_FORM1, dst, dstw, src2, src2w, TMP_REG2, 0);
   1621 			}
   1622 			if (TEST_UH_IMM(src2, src2w)) {
   1623 				compiler->imm = (src2w >> 16) & 0xffff;
   1624 				return emit_op(compiler, GET_OPCODE(op), flags | ALT_FORM2, dst, dstw, src1, src1w, TMP_REG2, 0);
   1625 			}
   1626 			if (TEST_UH_IMM(src1, src1w)) {
   1627 				compiler->imm = (src1w >> 16) & 0xffff;
   1628 				return emit_op(compiler, GET_OPCODE(op), flags | ALT_FORM2, dst, dstw, src2, src2w, TMP_REG2, 0);
   1629 			}
   1630 		}
   1631 		if (!GET_FLAGS(op) && GET_OPCODE(op) != SLJIT_AND) {
   1632 			if (TEST_UI_IMM(src2, src2w)) {
   1633 				compiler->imm = src2w;
   1634 				return emit_op(compiler, GET_OPCODE(op), flags | ALT_FORM3, dst, dstw, src1, src1w, TMP_REG2, 0);
   1635 			}
   1636 			if (TEST_UI_IMM(src1, src1w)) {
   1637 				compiler->imm = src1w;
   1638 				return emit_op(compiler, GET_OPCODE(op), flags | ALT_FORM3, dst, dstw, src2, src2w, TMP_REG2, 0);
   1639 			}
   1640 		}
   1641 		return emit_op(compiler, GET_OPCODE(op), flags, dst, dstw, src1, src1w, src2, src2w);
   1642 
   1643 	case SLJIT_ASHR:
   1644 		if (op & SLJIT_KEEP_FLAGS)
   1645 			flags |= ALT_FORM3;
   1646 		/* Fall through. */
   1647 	case SLJIT_SHL:
   1648 	case SLJIT_LSHR:
   1649 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
   1650 		if (op & SLJIT_I32_OP)
   1651 			flags |= ALT_FORM2;
   1652 #endif
   1653 		if (src2 & SLJIT_IMM) {
   1654 			compiler->imm = src2w;
   1655 			return emit_op(compiler, GET_OPCODE(op), flags | ALT_FORM1, dst, dstw, src1, src1w, TMP_REG2, 0);
   1656 		}
   1657 		return emit_op(compiler, GET_OPCODE(op), flags, dst, dstw, src1, src1w, src2, src2w);
   1658 	}
   1659 
   1660 	return SLJIT_SUCCESS;
   1661 }
   1662 
   1663 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 reg)
   1664 {
   1665 	CHECK_REG_INDEX(check_sljit_get_register_index(reg));
   1666 	return reg_map[reg];
   1667 }
   1668 
   1669 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_float_register_index(sljit_s32 reg)
   1670 {
   1671 	CHECK_REG_INDEX(check_sljit_get_float_register_index(reg));
   1672 	return reg;
   1673 }
   1674 
   1675 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler,
   1676 	void *instruction, sljit_s32 size)
   1677 {
   1678 	CHECK_ERROR();
   1679 	CHECK(check_sljit_emit_op_custom(compiler, instruction, size));
   1680 
   1681 	return push_inst(compiler, *(sljit_ins*)instruction);
   1682 }
   1683 
   1684 /* --------------------------------------------------------------------- */
   1685 /*  Floating point operators                                             */
   1686 /* --------------------------------------------------------------------- */
   1687 
   1688 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_is_fpu_available(void)
   1689 {
   1690 #ifdef SLJIT_IS_FPU_AVAILABLE
   1691 	return SLJIT_IS_FPU_AVAILABLE;
   1692 #else
   1693 	/* Available by default. */
   1694 	return 1;
   1695 #endif
   1696 }
   1697 
   1698 #define FLOAT_DATA(op) (DOUBLE_DATA | ((op & SLJIT_F32_OP) >> 6))
   1699 #define SELECT_FOP(op, single, double) ((op & SLJIT_F32_OP) ? single : double)
   1700 
   1701 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
   1702 #define FLOAT_TMP_MEM_OFFSET (6 * sizeof(sljit_sw))
   1703 #else
   1704 #define FLOAT_TMP_MEM_OFFSET (2 * sizeof(sljit_sw))
   1705 
   1706 #if (defined SLJIT_LITTLE_ENDIAN && SLJIT_LITTLE_ENDIAN)
   1707 #define FLOAT_TMP_MEM_OFFSET_LOW (2 * sizeof(sljit_sw))
   1708 #define FLOAT_TMP_MEM_OFFSET_HI (3 * sizeof(sljit_sw))
   1709 #else
   1710 #define FLOAT_TMP_MEM_OFFSET_LOW (3 * sizeof(sljit_sw))
   1711 #define FLOAT_TMP_MEM_OFFSET_HI (2 * sizeof(sljit_sw))
   1712 #endif
   1713 
   1714 #endif /* SLJIT_CONFIG_PPC_64 */
   1715 
   1716 static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_compiler *compiler, sljit_s32 op,
   1717 	sljit_s32 dst, sljit_sw dstw,
   1718 	sljit_s32 src, sljit_sw srcw)
   1719 {
   1720 	if (src & SLJIT_MEM) {
   1721 		/* We can ignore the temporary data store on the stack from caching point of view. */
   1722 		FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src, srcw, dst, dstw));
   1723 		src = TMP_FREG1;
   1724 	}
   1725 
   1726 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
   1727 	op = GET_OPCODE(op);
   1728 	FAIL_IF(push_inst(compiler, (op == SLJIT_CONV_S32_FROM_F64 ? FCTIWZ : FCTIDZ) | FD(TMP_FREG1) | FB(src)));
   1729 
   1730 	if (dst == SLJIT_UNUSED)
   1731 		return SLJIT_SUCCESS;
   1732 
   1733 	if (op == SLJIT_CONV_SW_FROM_F64) {
   1734 		if (FAST_IS_REG(dst)) {
   1735 			FAIL_IF(emit_op_mem2(compiler, DOUBLE_DATA, TMP_FREG1, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET, 0, 0));
   1736 			return emit_op_mem2(compiler, WORD_DATA | LOAD_DATA, dst, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET, 0, 0);
   1737 		}
   1738 		return emit_op_mem2(compiler, DOUBLE_DATA, TMP_FREG1, dst, dstw, 0, 0);
   1739 	}
   1740 
   1741 #else
   1742 	FAIL_IF(push_inst(compiler, FCTIWZ | FD(TMP_FREG1) | FB(src)));
   1743 
   1744 	if (dst == SLJIT_UNUSED)
   1745 		return SLJIT_SUCCESS;
   1746 #endif
   1747 
   1748 	if (FAST_IS_REG(dst)) {
   1749 		FAIL_IF(load_immediate(compiler, TMP_REG1, FLOAT_TMP_MEM_OFFSET));
   1750 		FAIL_IF(push_inst(compiler, STFIWX | FS(TMP_FREG1) | A(SLJIT_SP) | B(TMP_REG1)));
   1751 		return emit_op_mem2(compiler, INT_DATA | LOAD_DATA, dst, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET, 0, 0);
   1752 	}
   1753 
   1754 	SLJIT_ASSERT(dst & SLJIT_MEM);
   1755 
   1756 	if (dst & OFFS_REG_MASK) {
   1757 		dstw &= 0x3;
   1758 		if (dstw) {
   1759 #if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
   1760 			FAIL_IF(push_inst(compiler, RLWINM | S(OFFS_REG(dst)) | A(TMP_REG1) | (dstw << 11) | ((31 - dstw) << 1)));
   1761 #else
   1762 			FAIL_IF(push_inst(compiler, RLDI(TMP_REG1, OFFS_REG(dst), dstw, 63 - dstw, 1)));
   1763 #endif
   1764 			dstw = TMP_REG1;
   1765 		}
   1766 		else
   1767 			dstw = OFFS_REG(dst);
   1768 	}
   1769 	else {
   1770 		if ((dst & REG_MASK) && !dstw) {
   1771 			dstw = dst & REG_MASK;
   1772 			dst = 0;
   1773 		}
   1774 		else {
   1775 			/* This works regardless we have SLJIT_MEM1 or SLJIT_MEM0. */
   1776 			FAIL_IF(load_immediate(compiler, TMP_REG1, dstw));
   1777 			dstw = TMP_REG1;
   1778 		}
   1779 	}
   1780 
   1781 	return push_inst(compiler, STFIWX | FS(TMP_FREG1) | A(dst & REG_MASK) | B(dstw));
   1782 }
   1783 
   1784 static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_compiler *compiler, sljit_s32 op,
   1785 	sljit_s32 dst, sljit_sw dstw,
   1786 	sljit_s32 src, sljit_sw srcw)
   1787 {
   1788 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
   1789 
   1790 	sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
   1791 
   1792 	if (src & SLJIT_IMM) {
   1793 		if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_S32)
   1794 			srcw = (sljit_s32)srcw;
   1795 		FAIL_IF(load_immediate(compiler, TMP_REG1, srcw));
   1796 		src = TMP_REG1;
   1797 	}
   1798 	else if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_S32) {
   1799 		if (FAST_IS_REG(src))
   1800 			FAIL_IF(push_inst(compiler, EXTSW | S(src) | A(TMP_REG1)));
   1801 		else
   1802 			FAIL_IF(emit_op_mem2(compiler, INT_DATA | SIGNED_DATA | LOAD_DATA, TMP_REG1, src, srcw, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET));
   1803 		src = TMP_REG1;
   1804 	}
   1805 
   1806 	if (FAST_IS_REG(src)) {
   1807 		FAIL_IF(emit_op_mem2(compiler, WORD_DATA, src, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET));
   1808 		FAIL_IF(emit_op_mem2(compiler, DOUBLE_DATA | LOAD_DATA, TMP_FREG1, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET, dst, dstw));
   1809 	}
   1810 	else
   1811 		FAIL_IF(emit_op_mem2(compiler, DOUBLE_DATA | LOAD_DATA, TMP_FREG1, src, srcw, dst, dstw));
   1812 
   1813 	FAIL_IF(push_inst(compiler, FCFID | FD(dst_r) | FB(TMP_FREG1)));
   1814 
   1815 	if (dst & SLJIT_MEM)
   1816 		return emit_op_mem2(compiler, FLOAT_DATA(op), TMP_FREG1, dst, dstw, 0, 0);
   1817 	if (op & SLJIT_F32_OP)
   1818 		return push_inst(compiler, FRSP | FD(dst_r) | FB(dst_r));
   1819 	return SLJIT_SUCCESS;
   1820 
   1821 #else
   1822 
   1823 	sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
   1824 	sljit_s32 invert_sign = 1;
   1825 
   1826 	if (src & SLJIT_IMM) {
   1827 		FAIL_IF(load_immediate(compiler, TMP_REG1, srcw ^ 0x80000000));
   1828 		src = TMP_REG1;
   1829 		invert_sign = 0;
   1830 	}
   1831 	else if (!FAST_IS_REG(src)) {
   1832 		FAIL_IF(emit_op_mem2(compiler, WORD_DATA | SIGNED_DATA | LOAD_DATA, TMP_REG1, src, srcw, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET_LOW));
   1833 		src = TMP_REG1;
   1834 	}
   1835 
   1836 	/* First, a special double floating point value is constructed: (2^53 + (input xor (2^31)))
   1837 	   The double precision format has exactly 53 bit precision, so the lower 32 bit represents
   1838 	   the lower 32 bit of such value. The result of xor 2^31 is the same as adding 0x80000000
   1839 	   to the input, which shifts it into the 0 - 0xffffffff range. To get the converted floating
   1840 	   point value, we need to substract 2^53 + 2^31 from the constructed value. */
   1841 	FAIL_IF(push_inst(compiler, ADDIS | D(TMP_REG2) | A(0) | 0x4330));
   1842 	if (invert_sign)
   1843 		FAIL_IF(push_inst(compiler, XORIS | S(src) | A(TMP_REG1) | 0x8000));
   1844 	FAIL_IF(emit_op_mem2(compiler, WORD_DATA, TMP_REG2, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET_HI, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET));
   1845 	FAIL_IF(emit_op_mem2(compiler, WORD_DATA, TMP_REG1, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET_LOW, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET_HI));
   1846 	FAIL_IF(push_inst(compiler, ADDIS | D(TMP_REG1) | A(0) | 0x8000));
   1847 	FAIL_IF(emit_op_mem2(compiler, DOUBLE_DATA | LOAD_DATA, TMP_FREG1, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET_LOW));
   1848 	FAIL_IF(emit_op_mem2(compiler, WORD_DATA, TMP_REG1, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET_LOW, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET));
   1849 	FAIL_IF(emit_op_mem2(compiler, DOUBLE_DATA | LOAD_DATA, TMP_FREG2, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET_LOW));
   1850 
   1851 	FAIL_IF(push_inst(compiler, FSUB | FD(dst_r) | FA(TMP_FREG1) | FB(TMP_FREG2)));
   1852 
   1853 	if (dst & SLJIT_MEM)
   1854 		return emit_op_mem2(compiler, FLOAT_DATA(op), TMP_FREG1, dst, dstw, 0, 0);
   1855 	if (op & SLJIT_F32_OP)
   1856 		return push_inst(compiler, FRSP | FD(dst_r) | FB(dst_r));
   1857 	return SLJIT_SUCCESS;
   1858 
   1859 #endif
   1860 }
   1861 
   1862 static SLJIT_INLINE sljit_s32 sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_s32 op,
   1863 	sljit_s32 src1, sljit_sw src1w,
   1864 	sljit_s32 src2, sljit_sw src2w)
   1865 {
   1866 	if (src1 & SLJIT_MEM) {
   1867 		FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w, src2, src2w));
   1868 		src1 = TMP_FREG1;
   1869 	}
   1870 
   1871 	if (src2 & SLJIT_MEM) {
   1872 		FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w, 0, 0));
   1873 		src2 = TMP_FREG2;
   1874 	}
   1875 
   1876 	return push_inst(compiler, FCMPU | CRD(4) | FA(src1) | FB(src2));
   1877 }
   1878 
   1879 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compiler, sljit_s32 op,
   1880 	sljit_s32 dst, sljit_sw dstw,
   1881 	sljit_s32 src, sljit_sw srcw)
   1882 {
   1883 	sljit_s32 dst_r;
   1884 
   1885 	CHECK_ERROR();
   1886 	compiler->cache_arg = 0;
   1887 	compiler->cache_argw = 0;
   1888 
   1889 	SLJIT_COMPILE_ASSERT((SLJIT_F32_OP == 0x100) && !(DOUBLE_DATA & 0x4), float_transfer_bit_error);
   1890 	SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw);
   1891 
   1892 	if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_F32)
   1893 		op ^= SLJIT_F32_OP;
   1894 
   1895 	dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
   1896 
   1897 	if (src & SLJIT_MEM) {
   1898 		FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, dst_r, src, srcw, dst, dstw));
   1899 		src = dst_r;
   1900 	}
   1901 
   1902 	switch (GET_OPCODE(op)) {
   1903 	case SLJIT_CONV_F64_FROM_F32:
   1904 		op ^= SLJIT_F32_OP;
   1905 		if (op & SLJIT_F32_OP) {
   1906 			FAIL_IF(push_inst(compiler, FRSP | FD(dst_r) | FB(src)));
   1907 			break;
   1908 		}
   1909 		/* Fall through. */
   1910 	case SLJIT_MOV_F64:
   1911 		if (src != dst_r) {
   1912 			if (dst_r != TMP_FREG1)
   1913 				FAIL_IF(push_inst(compiler, FMR | FD(dst_r) | FB(src)));
   1914 			else
   1915 				dst_r = src;
   1916 		}
   1917 		break;
   1918 	case SLJIT_NEG_F64:
   1919 		FAIL_IF(push_inst(compiler, FNEG | FD(dst_r) | FB(src)));
   1920 		break;
   1921 	case SLJIT_ABS_F64:
   1922 		FAIL_IF(push_inst(compiler, FABS | FD(dst_r) | FB(src)));
   1923 		break;
   1924 	}
   1925 
   1926 	if (dst & SLJIT_MEM)
   1927 		FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op), dst_r, dst, dstw, 0, 0));
   1928 	return SLJIT_SUCCESS;
   1929 }
   1930 
   1931 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compiler, sljit_s32 op,
   1932 	sljit_s32 dst, sljit_sw dstw,
   1933 	sljit_s32 src1, sljit_sw src1w,
   1934 	sljit_s32 src2, sljit_sw src2w)
   1935 {
   1936 	sljit_s32 dst_r, flags = 0;
   1937 
   1938 	CHECK_ERROR();
   1939 	CHECK(check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w));
   1940 	ADJUST_LOCAL_OFFSET(dst, dstw);
   1941 	ADJUST_LOCAL_OFFSET(src1, src1w);
   1942 	ADJUST_LOCAL_OFFSET(src2, src2w);
   1943 
   1944 	compiler->cache_arg = 0;
   1945 	compiler->cache_argw = 0;
   1946 
   1947 	dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG2;
   1948 
   1949 	if (src1 & SLJIT_MEM) {
   1950 		if (getput_arg_fast(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w)) {
   1951 			FAIL_IF(compiler->error);
   1952 			src1 = TMP_FREG1;
   1953 		} else
   1954 			flags |= ALT_FORM1;
   1955 	}
   1956 
   1957 	if (src2 & SLJIT_MEM) {
   1958 		if (getput_arg_fast(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w)) {
   1959 			FAIL_IF(compiler->error);
   1960 			src2 = TMP_FREG2;
   1961 		} else
   1962 			flags |= ALT_FORM2;
   1963 	}
   1964 
   1965 	if ((flags & (ALT_FORM1 | ALT_FORM2)) == (ALT_FORM1 | ALT_FORM2)) {
   1966 		if (!can_cache(src1, src1w, src2, src2w) && can_cache(src1, src1w, dst, dstw)) {
   1967 			FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w, src1, src1w));
   1968 			FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w, dst, dstw));
   1969 		}
   1970 		else {
   1971 			FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w, src2, src2w));
   1972 			FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w, dst, dstw));
   1973 		}
   1974 	}
   1975 	else if (flags & ALT_FORM1)
   1976 		FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w, dst, dstw));
   1977 	else if (flags & ALT_FORM2)
   1978 		FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w, dst, dstw));
   1979 
   1980 	if (flags & ALT_FORM1)
   1981 		src1 = TMP_FREG1;
   1982 	if (flags & ALT_FORM2)
   1983 		src2 = TMP_FREG2;
   1984 
   1985 	switch (GET_OPCODE(op)) {
   1986 	case SLJIT_ADD_F64:
   1987 		FAIL_IF(push_inst(compiler, SELECT_FOP(op, FADDS, FADD) | FD(dst_r) | FA(src1) | FB(src2)));
   1988 		break;
   1989 
   1990 	case SLJIT_SUB_F64:
   1991 		FAIL_IF(push_inst(compiler, SELECT_FOP(op, FSUBS, FSUB) | FD(dst_r) | FA(src1) | FB(src2)));
   1992 		break;
   1993 
   1994 	case SLJIT_MUL_F64:
   1995 		FAIL_IF(push_inst(compiler, SELECT_FOP(op, FMULS, FMUL) | FD(dst_r) | FA(src1) | FC(src2) /* FMUL use FC as src2 */));
   1996 		break;
   1997 
   1998 	case SLJIT_DIV_F64:
   1999 		FAIL_IF(push_inst(compiler, SELECT_FOP(op, FDIVS, FDIV) | FD(dst_r) | FA(src1) | FB(src2)));
   2000 		break;
   2001 	}
   2002 
   2003 	if (dst_r == TMP_FREG2)
   2004 		FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op), TMP_FREG2, dst, dstw, 0, 0));
   2005 
   2006 	return SLJIT_SUCCESS;
   2007 }
   2008 
   2009 #undef FLOAT_DATA
   2010 #undef SELECT_FOP
   2011 
   2012 /* --------------------------------------------------------------------- */
   2013 /*  Other instructions                                                   */
   2014 /* --------------------------------------------------------------------- */
   2015 
   2016 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw)
   2017 {
   2018 	CHECK_ERROR();
   2019 	CHECK(check_sljit_emit_fast_enter(compiler, dst, dstw));
   2020 	ADJUST_LOCAL_OFFSET(dst, dstw);
   2021 
   2022 	/* For UNUSED dst. Uncommon, but possible. */
   2023 	if (dst == SLJIT_UNUSED)
   2024 		return SLJIT_SUCCESS;
   2025 
   2026 	if (FAST_IS_REG(dst))
   2027 		return push_inst(compiler, MFLR | D(dst));
   2028 
   2029 	/* Memory. */
   2030 	FAIL_IF(push_inst(compiler, MFLR | D(TMP_REG2)));
   2031 	return emit_op(compiler, SLJIT_MOV, WORD_DATA, dst, dstw, TMP_REG1, 0, TMP_REG2, 0);
   2032 }
   2033 
   2034 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_return(struct sljit_compiler *compiler, sljit_s32 src, sljit_sw srcw)
   2035 {
   2036 	CHECK_ERROR();
   2037 	CHECK(check_sljit_emit_fast_return(compiler, src, srcw));
   2038 	ADJUST_LOCAL_OFFSET(src, srcw);
   2039 
   2040 	if (FAST_IS_REG(src))
   2041 		FAIL_IF(push_inst(compiler, MTLR | S(src)));
   2042 	else {
   2043 		if (src & SLJIT_MEM)
   2044 			FAIL_IF(emit_op(compiler, SLJIT_MOV, WORD_DATA, TMP_REG2, 0, TMP_REG1, 0, src, srcw));
   2045 		else if (src & SLJIT_IMM)
   2046 			FAIL_IF(load_immediate(compiler, TMP_REG2, srcw));
   2047 		FAIL_IF(push_inst(compiler, MTLR | S(TMP_REG2)));
   2048 	}
   2049 	return push_inst(compiler, BLR);
   2050 }
   2051 
   2052 /* --------------------------------------------------------------------- */
   2053 /*  Conditional instructions                                             */
   2054 /* --------------------------------------------------------------------- */
   2055 
   2056 SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler)
   2057 {
   2058 	struct sljit_label *label;
   2059 
   2060 	CHECK_ERROR_PTR();
   2061 	CHECK_PTR(check_sljit_emit_label(compiler));
   2062 
   2063 	if (compiler->last_label && compiler->last_label->size == compiler->size)
   2064 		return compiler->last_label;
   2065 
   2066 	label = (struct sljit_label*)ensure_abuf(compiler, sizeof(struct sljit_label));
   2067 	PTR_FAIL_IF(!label);
   2068 	set_label(label, compiler);
   2069 	return label;
   2070 }
   2071 
   2072 static sljit_ins get_bo_bi_flags(sljit_s32 type)
   2073 {
   2074 	switch (type) {
   2075 	case SLJIT_EQUAL:
   2076 		return (12 << 21) | (2 << 16);
   2077 
   2078 	case SLJIT_NOT_EQUAL:
   2079 		return (4 << 21) | (2 << 16);
   2080 
   2081 	case SLJIT_LESS:
   2082 	case SLJIT_LESS_F64:
   2083 		return (12 << 21) | ((4 + 0) << 16);
   2084 
   2085 	case SLJIT_GREATER_EQUAL:
   2086 	case SLJIT_GREATER_EQUAL_F64:
   2087 		return (4 << 21) | ((4 + 0) << 16);
   2088 
   2089 	case SLJIT_GREATER:
   2090 	case SLJIT_GREATER_F64:
   2091 		return (12 << 21) | ((4 + 1) << 16);
   2092 
   2093 	case SLJIT_LESS_EQUAL:
   2094 	case SLJIT_LESS_EQUAL_F64:
   2095 		return (4 << 21) | ((4 + 1) << 16);
   2096 
   2097 	case SLJIT_SIG_LESS:
   2098 		return (12 << 21) | (0 << 16);
   2099 
   2100 	case SLJIT_SIG_GREATER_EQUAL:
   2101 		return (4 << 21) | (0 << 16);
   2102 
   2103 	case SLJIT_SIG_GREATER:
   2104 		return (12 << 21) | (1 << 16);
   2105 
   2106 	case SLJIT_SIG_LESS_EQUAL:
   2107 		return (4 << 21) | (1 << 16);
   2108 
   2109 	case SLJIT_OVERFLOW:
   2110 	case SLJIT_MUL_OVERFLOW:
   2111 		return (12 << 21) | (3 << 16);
   2112 
   2113 	case SLJIT_NOT_OVERFLOW:
   2114 	case SLJIT_MUL_NOT_OVERFLOW:
   2115 		return (4 << 21) | (3 << 16);
   2116 
   2117 	case SLJIT_EQUAL_F64:
   2118 		return (12 << 21) | ((4 + 2) << 16);
   2119 
   2120 	case SLJIT_NOT_EQUAL_F64:
   2121 		return (4 << 21) | ((4 + 2) << 16);
   2122 
   2123 	case SLJIT_UNORDERED_F64:
   2124 		return (12 << 21) | ((4 + 3) << 16);
   2125 
   2126 	case SLJIT_ORDERED_F64:
   2127 		return (4 << 21) | ((4 + 3) << 16);
   2128 
   2129 	default:
   2130 		SLJIT_ASSERT(type >= SLJIT_JUMP && type <= SLJIT_CALL3);
   2131 		return (20 << 21);
   2132 	}
   2133 }
   2134 
   2135 SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_s32 type)
   2136 {
   2137 	struct sljit_jump *jump;
   2138 	sljit_ins bo_bi_flags;
   2139 
   2140 	CHECK_ERROR_PTR();
   2141 	CHECK_PTR(check_sljit_emit_jump(compiler, type));
   2142 
   2143 	bo_bi_flags = get_bo_bi_flags(type & 0xff);
   2144 	if (!bo_bi_flags)
   2145 		return NULL;
   2146 
   2147 	jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
   2148 	PTR_FAIL_IF(!jump);
   2149 	set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP);
   2150 	type &= 0xff;
   2151 
   2152 	/* In PPC, we don't need to touch the arguments. */
   2153 	if (type < SLJIT_JUMP)
   2154 		jump->flags |= IS_COND;
   2155 #if (defined SLJIT_PASS_ENTRY_ADDR_TO_CALL && SLJIT_PASS_ENTRY_ADDR_TO_CALL)
   2156 	if (type >= SLJIT_CALL0)
   2157 		jump->flags |= IS_CALL;
   2158 #endif
   2159 
   2160 	PTR_FAIL_IF(emit_const(compiler, TMP_CALL_REG, 0));
   2161 	PTR_FAIL_IF(push_inst(compiler, MTCTR | S(TMP_CALL_REG)));
   2162 	jump->addr = compiler->size;
   2163 	PTR_FAIL_IF(push_inst(compiler, BCCTR | bo_bi_flags | (type >= SLJIT_FAST_CALL ? 1 : 0)));
   2164 	return jump;
   2165 }
   2166 
   2167 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 src, sljit_sw srcw)
   2168 {
   2169 	struct sljit_jump *jump = NULL;
   2170 	sljit_s32 src_r;
   2171 
   2172 	CHECK_ERROR();
   2173 	CHECK(check_sljit_emit_ijump(compiler, type, src, srcw));
   2174 	ADJUST_LOCAL_OFFSET(src, srcw);
   2175 
   2176 	if (FAST_IS_REG(src)) {
   2177 #if (defined SLJIT_PASS_ENTRY_ADDR_TO_CALL && SLJIT_PASS_ENTRY_ADDR_TO_CALL)
   2178 		if (type >= SLJIT_CALL0) {
   2179 			FAIL_IF(push_inst(compiler, OR | S(src) | A(TMP_CALL_REG) | B(src)));
   2180 			src_r = TMP_CALL_REG;
   2181 		}
   2182 		else
   2183 			src_r = src;
   2184 #else
   2185 		src_r = src;
   2186 #endif
   2187 	} else if (src & SLJIT_IMM) {
   2188 		jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
   2189 		FAIL_IF(!jump);
   2190 		set_jump(jump, compiler, JUMP_ADDR);
   2191 		jump->u.target = srcw;
   2192 #if (defined SLJIT_PASS_ENTRY_ADDR_TO_CALL && SLJIT_PASS_ENTRY_ADDR_TO_CALL)
   2193 		if (type >= SLJIT_CALL0)
   2194 			jump->flags |= IS_CALL;
   2195 #endif
   2196 		FAIL_IF(emit_const(compiler, TMP_CALL_REG, 0));
   2197 		src_r = TMP_CALL_REG;
   2198 	}
   2199 	else {
   2200 		FAIL_IF(emit_op(compiler, SLJIT_MOV, WORD_DATA, TMP_CALL_REG, 0, TMP_REG1, 0, src, srcw));
   2201 		src_r = TMP_CALL_REG;
   2202 	}
   2203 
   2204 	FAIL_IF(push_inst(compiler, MTCTR | S(src_r)));
   2205 	if (jump)
   2206 		jump->addr = compiler->size;
   2207 	return push_inst(compiler, BCCTR | (20 << 21) | (type >= SLJIT_FAST_CALL ? 1 : 0));
   2208 }
   2209 
   2210 /* Get a bit from CR, all other bits are zeroed. */
   2211 #define GET_CR_BIT(bit, dst) \
   2212 	FAIL_IF(push_inst(compiler, MFCR | D(dst))); \
   2213 	FAIL_IF(push_inst(compiler, RLWINM | S(dst) | A(dst) | ((1 + (bit)) << 11) | (31 << 6) | (31 << 1)));
   2214 
   2215 #define INVERT_BIT(dst) \
   2216 	FAIL_IF(push_inst(compiler, XORI | S(dst) | A(dst) | 0x1));
   2217 
   2218 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_s32 op,
   2219 	sljit_s32 dst, sljit_sw dstw,
   2220 	sljit_s32 src, sljit_sw srcw,
   2221 	sljit_s32 type)
   2222 {
   2223 	sljit_s32 reg, input_flags;
   2224 	sljit_s32 flags = GET_ALL_FLAGS(op);
   2225 	sljit_sw original_dstw = dstw;
   2226 
   2227 	CHECK_ERROR();
   2228 	CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, src, srcw, type));
   2229 	ADJUST_LOCAL_OFFSET(dst, dstw);
   2230 
   2231 	if (dst == SLJIT_UNUSED)
   2232 		return SLJIT_SUCCESS;
   2233 
   2234 	op = GET_OPCODE(op);
   2235 	reg = (op < SLJIT_ADD && FAST_IS_REG(dst)) ? dst : TMP_REG2;
   2236 
   2237 	compiler->cache_arg = 0;
   2238 	compiler->cache_argw = 0;
   2239 	if (op >= SLJIT_ADD && (src & SLJIT_MEM)) {
   2240 		ADJUST_LOCAL_OFFSET(src, srcw);
   2241 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
   2242 		input_flags = (flags & SLJIT_I32_OP) ? INT_DATA : WORD_DATA;
   2243 #else
   2244 		input_flags = WORD_DATA;
   2245 #endif
   2246 		FAIL_IF(emit_op_mem2(compiler, input_flags | LOAD_DATA, TMP_REG1, src, srcw, dst, dstw));
   2247 		src = TMP_REG1;
   2248 		srcw = 0;
   2249 	}
   2250 
   2251 	switch (type & 0xff) {
   2252 	case SLJIT_EQUAL:
   2253 		GET_CR_BIT(2, reg);
   2254 		break;
   2255 
   2256 	case SLJIT_NOT_EQUAL:
   2257 		GET_CR_BIT(2, reg);
   2258 		INVERT_BIT(reg);
   2259 		break;
   2260 
   2261 	case SLJIT_LESS:
   2262 	case SLJIT_LESS_F64:
   2263 		GET_CR_BIT(4 + 0, reg);
   2264 		break;
   2265 
   2266 	case SLJIT_GREATER_EQUAL:
   2267 	case SLJIT_GREATER_EQUAL_F64:
   2268 		GET_CR_BIT(4 + 0, reg);
   2269 		INVERT_BIT(reg);
   2270 		break;
   2271 
   2272 	case SLJIT_GREATER:
   2273 	case SLJIT_GREATER_F64:
   2274 		GET_CR_BIT(4 + 1, reg);
   2275 		break;
   2276 
   2277 	case SLJIT_LESS_EQUAL:
   2278 	case SLJIT_LESS_EQUAL_F64:
   2279 		GET_CR_BIT(4 + 1, reg);
   2280 		INVERT_BIT(reg);
   2281 		break;
   2282 
   2283 	case SLJIT_SIG_LESS:
   2284 		GET_CR_BIT(0, reg);
   2285 		break;
   2286 
   2287 	case SLJIT_SIG_GREATER_EQUAL:
   2288 		GET_CR_BIT(0, reg);
   2289 		INVERT_BIT(reg);
   2290 		break;
   2291 
   2292 	case SLJIT_SIG_GREATER:
   2293 		GET_CR_BIT(1, reg);
   2294 		break;
   2295 
   2296 	case SLJIT_SIG_LESS_EQUAL:
   2297 		GET_CR_BIT(1, reg);
   2298 		INVERT_BIT(reg);
   2299 		break;
   2300 
   2301 	case SLJIT_OVERFLOW:
   2302 	case SLJIT_MUL_OVERFLOW:
   2303 		GET_CR_BIT(3, reg);
   2304 		break;
   2305 
   2306 	case SLJIT_NOT_OVERFLOW:
   2307 	case SLJIT_MUL_NOT_OVERFLOW:
   2308 		GET_CR_BIT(3, reg);
   2309 		INVERT_BIT(reg);
   2310 		break;
   2311 
   2312 	case SLJIT_EQUAL_F64:
   2313 		GET_CR_BIT(4 + 2, reg);
   2314 		break;
   2315 
   2316 	case SLJIT_NOT_EQUAL_F64:
   2317 		GET_CR_BIT(4 + 2, reg);
   2318 		INVERT_BIT(reg);
   2319 		break;
   2320 
   2321 	case SLJIT_UNORDERED_F64:
   2322 		GET_CR_BIT(4 + 3, reg);
   2323 		break;
   2324 
   2325 	case SLJIT_ORDERED_F64:
   2326 		GET_CR_BIT(4 + 3, reg);
   2327 		INVERT_BIT(reg);
   2328 		break;
   2329 
   2330 	default:
   2331 		SLJIT_ASSERT_STOP();
   2332 		break;
   2333 	}
   2334 
   2335 	if (op < SLJIT_ADD) {
   2336 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
   2337 		if (op == SLJIT_MOV)
   2338 			input_flags = WORD_DATA;
   2339 		else {
   2340 			op = SLJIT_MOV_U32;
   2341 			input_flags = INT_DATA;
   2342 		}
   2343 #else
   2344 		op = SLJIT_MOV;
   2345 		input_flags = WORD_DATA;
   2346 #endif
   2347 		if (reg != TMP_REG2)
   2348 			return SLJIT_SUCCESS;
   2349 		return emit_op(compiler, op, input_flags, dst, dstw, TMP_REG1, 0, TMP_REG2, 0);
   2350 	}
   2351 
   2352 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
   2353 		|| (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
   2354 	compiler->skip_checks = 1;
   2355 #endif
   2356 	return sljit_emit_op2(compiler, op | flags, dst, original_dstw, src, srcw, TMP_REG2, 0);
   2357 }
   2358 
   2359 SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw init_value)
   2360 {
   2361 	struct sljit_const *const_;
   2362 	sljit_s32 reg;
   2363 
   2364 	CHECK_ERROR_PTR();
   2365 	CHECK_PTR(check_sljit_emit_const(compiler, dst, dstw, init_value));
   2366 	ADJUST_LOCAL_OFFSET(dst, dstw);
   2367 
   2368 	const_ = (struct sljit_const*)ensure_abuf(compiler, sizeof(struct sljit_const));
   2369 	PTR_FAIL_IF(!const_);
   2370 	set_const(const_, compiler);
   2371 
   2372 	reg = SLOW_IS_REG(dst) ? dst : TMP_REG2;
   2373 
   2374 	PTR_FAIL_IF(emit_const(compiler, reg, init_value));
   2375 
   2376 	if (dst & SLJIT_MEM)
   2377 		PTR_FAIL_IF(emit_op(compiler, SLJIT_MOV, WORD_DATA, dst, dstw, TMP_REG1, 0, TMP_REG2, 0));
   2378 	return const_;
   2379 }
   2380