Home | History | Annotate | Download | only in sljit
      1 /*
      2  *    Stack-less Just-In-Time compiler
      3  *
      4  *    Copyright 2009-2012 Zoltan Herczeg (hzmester (at) freemail.hu). All rights reserved.
      5  *
      6  * Redistribution and use in source and binary forms, with or without modification, are
      7  * permitted provided that the following conditions are met:
      8  *
      9  *   1. Redistributions of source code must retain the above copyright notice, this list of
     10  *      conditions and the following disclaimer.
     11  *
     12  *   2. Redistributions in binary form must reproduce the above copyright notice, this list
     13  *      of conditions and the following disclaimer in the documentation and/or other materials
     14  *      provided with the distribution.
     15  *
     16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
     17  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
     18  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
     19  * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
     20  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
     21  * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
     22  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     23  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
     24  * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     25  */
     26 
     27 SLJIT_API_FUNC_ATTRIBUTE SLJIT_CONST char* sljit_get_platform_name(void)
     28 {
     29 	return "PowerPC" SLJIT_CPUINFO;
     30 }
     31 
     32 /* Length of an instruction word.
     33    Both for ppc-32 and ppc-64. */
     34 typedef sljit_ui sljit_ins;
     35 
     36 #if ((defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) && (defined _AIX)) \
     37 	|| (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
     38 #define SLJIT_PPC_STACK_FRAME_V2 1
     39 #endif
     40 
     41 #ifdef _AIX
     42 #include <sys/cache.h>
     43 #endif
     44 
     45 #if (defined SLJIT_LITTLE_ENDIAN && SLJIT_LITTLE_ENDIAN)
     46 #define SLJIT_PASS_ENTRY_ADDR_TO_CALL 1
     47 #endif
     48 
     49 static void ppc_cache_flush(sljit_ins *from, sljit_ins *to)
     50 {
     51 #ifdef _AIX
     52 	_sync_cache_range((caddr_t)from, (int)((size_t)to - (size_t)from));
     53 #elif defined(__GNUC__) || (defined(__IBM_GCC_ASM) && __IBM_GCC_ASM)
     54 #	if defined(_ARCH_PWR) || defined(_ARCH_PWR2)
     55 	/* Cache flush for POWER architecture. */
     56 	while (from < to) {
     57 		__asm__ volatile (
     58 			"clf 0, %0\n"
     59 			"dcs\n"
     60 			: : "r"(from)
     61 		);
     62 		from++;
     63 	}
     64 	__asm__ volatile ( "ics" );
     65 #	elif defined(_ARCH_COM) && !defined(_ARCH_PPC)
     66 #	error "Cache flush is not implemented for PowerPC/POWER common mode."
     67 #	else
     68 	/* Cache flush for PowerPC architecture. */
     69 	while (from < to) {
     70 		__asm__ volatile (
     71 			"dcbf 0, %0\n"
     72 			"sync\n"
     73 			"icbi 0, %0\n"
     74 			: : "r"(from)
     75 		);
     76 		from++;
     77 	}
     78 	__asm__ volatile ( "isync" );
     79 #	endif
     80 #	ifdef __xlc__
     81 #	warning "This file may fail to compile if -qfuncsect is used"
     82 #	endif
     83 #elif defined(__xlc__)
     84 #error "Please enable GCC syntax for inline assembly statements with -qasm=gcc"
     85 #else
     86 #error "This platform requires a cache flush implementation."
     87 #endif /* _AIX */
     88 }
     89 
     90 #define TMP_REG1	(SLJIT_NUMBER_OF_REGISTERS + 2)
     91 #define TMP_REG2	(SLJIT_NUMBER_OF_REGISTERS + 3)
     92 #define TMP_REG3	(SLJIT_NUMBER_OF_REGISTERS + 4)
     93 #define TMP_ZERO	(SLJIT_NUMBER_OF_REGISTERS + 5)
     94 
     95 #if (defined SLJIT_PASS_ENTRY_ADDR_TO_CALL && SLJIT_PASS_ENTRY_ADDR_TO_CALL)
     96 #define TMP_CALL_REG	(SLJIT_NUMBER_OF_REGISTERS + 6)
     97 #else
     98 #define TMP_CALL_REG	TMP_REG2
     99 #endif
    100 
    101 #define TMP_FREG1	(0)
    102 #define TMP_FREG2	(SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1)
    103 
    104 static SLJIT_CONST sljit_ub reg_map[SLJIT_NUMBER_OF_REGISTERS + 7] = {
    105 	0, 3, 4, 5, 6, 7, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 1, 8, 9, 10, 31, 12
    106 };
    107 
    108 /* --------------------------------------------------------------------- */
    109 /*  Instrucion forms                                                     */
    110 /* --------------------------------------------------------------------- */
    111 #define D(d)		(reg_map[d] << 21)
    112 #define S(s)		(reg_map[s] << 21)
    113 #define A(a)		(reg_map[a] << 16)
    114 #define B(b)		(reg_map[b] << 11)
    115 #define C(c)		(reg_map[c] << 6)
    116 #define FD(fd)		((fd) << 21)
    117 #define FS(fs)		((fs) << 21)
    118 #define FA(fa)		((fa) << 16)
    119 #define FB(fb)		((fb) << 11)
    120 #define FC(fc)		((fc) << 6)
    121 #define IMM(imm)	((imm) & 0xffff)
    122 #define CRD(d)		((d) << 21)
    123 
    124 /* Instruction bit sections.
    125    OE and Rc flag (see ALT_SET_FLAGS). */
    126 #define OERC(flags)	(((flags & ALT_SET_FLAGS) >> 10) | (flags & ALT_SET_FLAGS))
    127 /* Rc flag (see ALT_SET_FLAGS). */
    128 #define RC(flags)	((flags & ALT_SET_FLAGS) >> 10)
    129 #define HI(opcode)	((opcode) << 26)
    130 #define LO(opcode)	((opcode) << 1)
    131 
    132 #define ADD		(HI(31) | LO(266))
    133 #define ADDC		(HI(31) | LO(10))
    134 #define ADDE		(HI(31) | LO(138))
    135 #define ADDI		(HI(14))
    136 #define ADDIC		(HI(13))
    137 #define ADDIS		(HI(15))
    138 #define ADDME		(HI(31) | LO(234))
    139 #define AND		(HI(31) | LO(28))
    140 #define ANDI		(HI(28))
    141 #define ANDIS		(HI(29))
    142 #define Bx		(HI(18))
    143 #define BCx		(HI(16))
    144 #define BCCTR		(HI(19) | LO(528) | (3 << 11))
    145 #define BLR		(HI(19) | LO(16) | (0x14 << 21))
    146 #define CNTLZD		(HI(31) | LO(58))
    147 #define CNTLZW		(HI(31) | LO(26))
    148 #define CMP		(HI(31) | LO(0))
    149 #define CMPI		(HI(11))
    150 #define CMPL		(HI(31) | LO(32))
    151 #define CMPLI		(HI(10))
    152 #define CROR		(HI(19) | LO(449))
    153 #define DIVD		(HI(31) | LO(489))
    154 #define DIVDU		(HI(31) | LO(457))
    155 #define DIVW		(HI(31) | LO(491))
    156 #define DIVWU		(HI(31) | LO(459))
    157 #define EXTSB		(HI(31) | LO(954))
    158 #define EXTSH		(HI(31) | LO(922))
    159 #define EXTSW		(HI(31) | LO(986))
    160 #define FABS		(HI(63) | LO(264))
    161 #define FADD		(HI(63) | LO(21))
    162 #define FADDS		(HI(59) | LO(21))
    163 #define FCFID		(HI(63) | LO(846))
    164 #define FCMPU		(HI(63) | LO(0))
    165 #define FCTIDZ		(HI(63) | LO(815))
    166 #define FCTIWZ		(HI(63) | LO(15))
    167 #define FDIV		(HI(63) | LO(18))
    168 #define FDIVS		(HI(59) | LO(18))
    169 #define FMR		(HI(63) | LO(72))
    170 #define FMUL		(HI(63) | LO(25))
    171 #define FMULS		(HI(59) | LO(25))
    172 #define FNEG		(HI(63) | LO(40))
    173 #define FRSP		(HI(63) | LO(12))
    174 #define FSUB		(HI(63) | LO(20))
    175 #define FSUBS		(HI(59) | LO(20))
    176 #define LD		(HI(58) | 0)
    177 #define LWZ		(HI(32))
    178 #define MFCR		(HI(31) | LO(19))
    179 #define MFLR		(HI(31) | LO(339) | 0x80000)
    180 #define MFXER		(HI(31) | LO(339) | 0x10000)
    181 #define MTCTR		(HI(31) | LO(467) | 0x90000)
    182 #define MTLR		(HI(31) | LO(467) | 0x80000)
    183 #define MTXER		(HI(31) | LO(467) | 0x10000)
    184 #define MULHD		(HI(31) | LO(73))
    185 #define MULHDU		(HI(31) | LO(9))
    186 #define MULHW		(HI(31) | LO(75))
    187 #define MULHWU		(HI(31) | LO(11))
    188 #define MULLD		(HI(31) | LO(233))
    189 #define MULLI		(HI(7))
    190 #define MULLW		(HI(31) | LO(235))
    191 #define NEG		(HI(31) | LO(104))
    192 #define NOP		(HI(24))
    193 #define NOR		(HI(31) | LO(124))
    194 #define OR		(HI(31) | LO(444))
    195 #define ORI		(HI(24))
    196 #define ORIS		(HI(25))
    197 #define RLDICL		(HI(30))
    198 #define RLWINM		(HI(21))
    199 #define SLD		(HI(31) | LO(27))
    200 #define SLW		(HI(31) | LO(24))
    201 #define SRAD		(HI(31) | LO(794))
    202 #define SRADI		(HI(31) | LO(413 << 1))
    203 #define SRAW		(HI(31) | LO(792))
    204 #define SRAWI		(HI(31) | LO(824))
    205 #define SRD		(HI(31) | LO(539))
    206 #define SRW		(HI(31) | LO(536))
    207 #define STD		(HI(62) | 0)
    208 #define STDU		(HI(62) | 1)
    209 #define STDUX		(HI(31) | LO(181))
    210 #define STFIWX		(HI(31) | LO(983))
    211 #define STW		(HI(36))
    212 #define STWU		(HI(37))
    213 #define STWUX		(HI(31) | LO(183))
    214 #define SUBF		(HI(31) | LO(40))
    215 #define SUBFC		(HI(31) | LO(8))
    216 #define SUBFE		(HI(31) | LO(136))
    217 #define SUBFIC		(HI(8))
    218 #define XOR		(HI(31) | LO(316))
    219 #define XORI		(HI(26))
    220 #define XORIS		(HI(27))
    221 
    222 #define SIMM_MAX	(0x7fff)
    223 #define SIMM_MIN	(-0x8000)
    224 #define UIMM_MAX	(0xffff)
    225 
    226 #if (defined SLJIT_INDIRECT_CALL && SLJIT_INDIRECT_CALL)
    227 SLJIT_API_FUNC_ATTRIBUTE void sljit_set_function_context(void** func_ptr, struct sljit_function_context* context, sljit_sw addr, void* func)
    228 {
    229 	sljit_sw* ptrs;
    230 	if (func_ptr)
    231 		*func_ptr = (void*)context;
    232 	ptrs = (sljit_sw*)func;
    233 	context->addr = addr ? addr : ptrs[0];
    234 	context->r2 = ptrs[1];
    235 	context->r11 = ptrs[2];
    236 }
    237 #endif
    238 
    239 static sljit_si push_inst(struct sljit_compiler *compiler, sljit_ins ins)
    240 {
    241 	sljit_ins *ptr = (sljit_ins*)ensure_buf(compiler, sizeof(sljit_ins));
    242 	FAIL_IF(!ptr);
    243 	*ptr = ins;
    244 	compiler->size++;
    245 	return SLJIT_SUCCESS;
    246 }
    247 
    248 static SLJIT_INLINE sljit_si detect_jump_type(struct sljit_jump *jump, sljit_ins *code_ptr, sljit_ins *code)
    249 {
    250 	sljit_sw diff;
    251 	sljit_uw target_addr;
    252 	sljit_sw extra_jump_flags;
    253 
    254 #if (defined SLJIT_PASS_ENTRY_ADDR_TO_CALL && SLJIT_PASS_ENTRY_ADDR_TO_CALL) && (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
    255 	if (jump->flags & (SLJIT_REWRITABLE_JUMP | IS_CALL))
    256 		return 0;
    257 #else
    258 	if (jump->flags & SLJIT_REWRITABLE_JUMP)
    259 		return 0;
    260 #endif
    261 
    262 	if (jump->flags & JUMP_ADDR)
    263 		target_addr = jump->u.target;
    264 	else {
    265 		SLJIT_ASSERT(jump->flags & JUMP_LABEL);
    266 		target_addr = (sljit_uw)(code + jump->u.label->size);
    267 	}
    268 
    269 #if (defined SLJIT_PASS_ENTRY_ADDR_TO_CALL && SLJIT_PASS_ENTRY_ADDR_TO_CALL) && (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
    270 	if (jump->flags & IS_CALL)
    271 		goto keep_address;
    272 #endif
    273 
    274 	diff = ((sljit_sw)target_addr - (sljit_sw)(code_ptr)) & ~0x3l;
    275 
    276 	extra_jump_flags = 0;
    277 	if (jump->flags & IS_COND) {
    278 		if (diff <= 0x7fff && diff >= -0x8000) {
    279 			jump->flags |= PATCH_B;
    280 			return 1;
    281 		}
    282 		if (target_addr <= 0xffff) {
    283 			jump->flags |= PATCH_B | PATCH_ABS_B;
    284 			return 1;
    285 		}
    286 		extra_jump_flags = REMOVE_COND;
    287 
    288 		diff -= sizeof(sljit_ins);
    289 	}
    290 
    291 	if (diff <= 0x01ffffff && diff >= -0x02000000) {
    292 		jump->flags |= PATCH_B | extra_jump_flags;
    293 		return 1;
    294 	}
    295 	if (target_addr <= 0x03ffffff) {
    296 		jump->flags |= PATCH_B | PATCH_ABS_B | extra_jump_flags;
    297 		return 1;
    298 	}
    299 
    300 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
    301 #if (defined SLJIT_PASS_ENTRY_ADDR_TO_CALL && SLJIT_PASS_ENTRY_ADDR_TO_CALL)
    302 keep_address:
    303 #endif
    304 	if (target_addr <= 0x7fffffff) {
    305 		jump->flags |= PATCH_ABS32;
    306 		return 1;
    307 	}
    308 	if (target_addr <= 0x7fffffffffffl) {
    309 		jump->flags |= PATCH_ABS48;
    310 		return 1;
    311 	}
    312 #endif
    313 
    314 	return 0;
    315 }
    316 
    317 SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler)
    318 {
    319 	struct sljit_memory_fragment *buf;
    320 	sljit_ins *code;
    321 	sljit_ins *code_ptr;
    322 	sljit_ins *buf_ptr;
    323 	sljit_ins *buf_end;
    324 	sljit_uw word_count;
    325 	sljit_uw addr;
    326 
    327 	struct sljit_label *label;
    328 	struct sljit_jump *jump;
    329 	struct sljit_const *const_;
    330 
    331 	CHECK_ERROR_PTR();
    332 	check_sljit_generate_code(compiler);
    333 	reverse_buf(compiler);
    334 
    335 #if (defined SLJIT_INDIRECT_CALL && SLJIT_INDIRECT_CALL)
    336 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
    337 	compiler->size += (compiler->size & 0x1) + (sizeof(struct sljit_function_context) / sizeof(sljit_ins));
    338 #else
    339 	compiler->size += (sizeof(struct sljit_function_context) / sizeof(sljit_ins));
    340 #endif
    341 #endif
    342 	code = (sljit_ins*)SLJIT_MALLOC_EXEC(compiler->size * sizeof(sljit_ins));
    343 	PTR_FAIL_WITH_EXEC_IF(code);
    344 	buf = compiler->buf;
    345 
    346 	code_ptr = code;
    347 	word_count = 0;
    348 	label = compiler->labels;
    349 	jump = compiler->jumps;
    350 	const_ = compiler->consts;
    351 	do {
    352 		buf_ptr = (sljit_ins*)buf->memory;
    353 		buf_end = buf_ptr + (buf->used_size >> 2);
    354 		do {
    355 			*code_ptr = *buf_ptr++;
    356 			SLJIT_ASSERT(!label || label->size >= word_count);
    357 			SLJIT_ASSERT(!jump || jump->addr >= word_count);
    358 			SLJIT_ASSERT(!const_ || const_->addr >= word_count);
    359 			/* These structures are ordered by their address. */
    360 			if (label && label->size == word_count) {
    361 				/* Just recording the address. */
    362 				label->addr = (sljit_uw)code_ptr;
    363 				label->size = code_ptr - code;
    364 				label = label->next;
    365 			}
    366 			if (jump && jump->addr == word_count) {
    367 #if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
    368 				jump->addr = (sljit_uw)(code_ptr - 3);
    369 #else
    370 				jump->addr = (sljit_uw)(code_ptr - 6);
    371 #endif
    372 				if (detect_jump_type(jump, code_ptr, code)) {
    373 #if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
    374 					code_ptr[-3] = code_ptr[0];
    375 					code_ptr -= 3;
    376 #else
    377 					if (jump->flags & PATCH_ABS32) {
    378 						code_ptr -= 3;
    379 						code_ptr[-1] = code_ptr[2];
    380 						code_ptr[0] = code_ptr[3];
    381 					}
    382 					else if (jump->flags & PATCH_ABS48) {
    383 						code_ptr--;
    384 						code_ptr[-1] = code_ptr[0];
    385 						code_ptr[0] = code_ptr[1];
    386 						/* rldicr rX,rX,32,31 -> rX,rX,16,47 */
    387 						SLJIT_ASSERT((code_ptr[-3] & 0xfc00ffff) == 0x780007c6);
    388 						code_ptr[-3] ^= 0x8422;
    389 						/* oris -> ori */
    390 						code_ptr[-2] ^= 0x4000000;
    391 					}
    392 					else {
    393 						code_ptr[-6] = code_ptr[0];
    394 						code_ptr -= 6;
    395 					}
    396 #endif
    397 					if (jump->flags & REMOVE_COND) {
    398 						code_ptr[0] = BCx | (2 << 2) | ((code_ptr[0] ^ (8 << 21)) & 0x03ff0001);
    399 						code_ptr++;
    400 						jump->addr += sizeof(sljit_ins);
    401 						code_ptr[0] = Bx;
    402 						jump->flags -= IS_COND;
    403 					}
    404 				}
    405 				jump = jump->next;
    406 			}
    407 			if (const_ && const_->addr == word_count) {
    408 				const_->addr = (sljit_uw)code_ptr;
    409 				const_ = const_->next;
    410 			}
    411 			code_ptr ++;
    412 			word_count ++;
    413 		} while (buf_ptr < buf_end);
    414 
    415 		buf = buf->next;
    416 	} while (buf);
    417 
    418 	if (label && label->size == word_count) {
    419 		label->addr = (sljit_uw)code_ptr;
    420 		label->size = code_ptr - code;
    421 		label = label->next;
    422 	}
    423 
    424 	SLJIT_ASSERT(!label);
    425 	SLJIT_ASSERT(!jump);
    426 	SLJIT_ASSERT(!const_);
    427 #if (defined SLJIT_INDIRECT_CALL && SLJIT_INDIRECT_CALL)
    428 	SLJIT_ASSERT(code_ptr - code <= (sljit_sw)compiler->size - (sizeof(struct sljit_function_context) / sizeof(sljit_ins)));
    429 #else
    430 	SLJIT_ASSERT(code_ptr - code <= (sljit_sw)compiler->size);
    431 #endif
    432 
    433 	jump = compiler->jumps;
    434 	while (jump) {
    435 		do {
    436 			addr = (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target;
    437 			buf_ptr = (sljit_ins*)jump->addr;
    438 			if (jump->flags & PATCH_B) {
    439 				if (jump->flags & IS_COND) {
    440 					if (!(jump->flags & PATCH_ABS_B)) {
    441 						addr = addr - jump->addr;
    442 						SLJIT_ASSERT((sljit_sw)addr <= 0x7fff && (sljit_sw)addr >= -0x8000);
    443 						*buf_ptr = BCx | (addr & 0xfffc) | ((*buf_ptr) & 0x03ff0001);
    444 					}
    445 					else {
    446 						SLJIT_ASSERT(addr <= 0xffff);
    447 						*buf_ptr = BCx | (addr & 0xfffc) | 0x2 | ((*buf_ptr) & 0x03ff0001);
    448 					}
    449 				}
    450 				else {
    451 					if (!(jump->flags & PATCH_ABS_B)) {
    452 						addr = addr - jump->addr;
    453 						SLJIT_ASSERT((sljit_sw)addr <= 0x01ffffff && (sljit_sw)addr >= -0x02000000);
    454 						*buf_ptr = Bx | (addr & 0x03fffffc) | ((*buf_ptr) & 0x1);
    455 					}
    456 					else {
    457 						SLJIT_ASSERT(addr <= 0x03ffffff);
    458 						*buf_ptr = Bx | (addr & 0x03fffffc) | 0x2 | ((*buf_ptr) & 0x1);
    459 					}
    460 				}
    461 				break;
    462 			}
    463 			/* Set the fields of immediate loads. */
    464 #if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
    465 			buf_ptr[0] = (buf_ptr[0] & 0xffff0000) | ((addr >> 16) & 0xffff);
    466 			buf_ptr[1] = (buf_ptr[1] & 0xffff0000) | (addr & 0xffff);
    467 #else
    468 			if (jump->flags & PATCH_ABS32) {
    469 				SLJIT_ASSERT(addr <= 0x7fffffff);
    470 				buf_ptr[0] = (buf_ptr[0] & 0xffff0000) | ((addr >> 16) & 0xffff);
    471 				buf_ptr[1] = (buf_ptr[1] & 0xffff0000) | (addr & 0xffff);
    472 				break;
    473 			}
    474 			if (jump->flags & PATCH_ABS48) {
    475 				SLJIT_ASSERT(addr <= 0x7fffffffffff);
    476 				buf_ptr[0] = (buf_ptr[0] & 0xffff0000) | ((addr >> 32) & 0xffff);
    477 				buf_ptr[1] = (buf_ptr[1] & 0xffff0000) | ((addr >> 16) & 0xffff);
    478 				buf_ptr[3] = (buf_ptr[3] & 0xffff0000) | (addr & 0xffff);
    479 				break;
    480 			}
    481 			buf_ptr[0] = (buf_ptr[0] & 0xffff0000) | ((addr >> 48) & 0xffff);
    482 			buf_ptr[1] = (buf_ptr[1] & 0xffff0000) | ((addr >> 32) & 0xffff);
    483 			buf_ptr[3] = (buf_ptr[3] & 0xffff0000) | ((addr >> 16) & 0xffff);
    484 			buf_ptr[4] = (buf_ptr[4] & 0xffff0000) | (addr & 0xffff);
    485 #endif
    486 		} while (0);
    487 		jump = jump->next;
    488 	}
    489 
    490 	compiler->error = SLJIT_ERR_COMPILED;
    491 	compiler->executable_size = (code_ptr - code) * sizeof(sljit_ins);
    492 	SLJIT_CACHE_FLUSH(code, code_ptr);
    493 
    494 #if (defined SLJIT_INDIRECT_CALL && SLJIT_INDIRECT_CALL)
    495 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
    496 	if (((sljit_sw)code_ptr) & 0x4)
    497 		code_ptr++;
    498 	sljit_set_function_context(NULL, (struct sljit_function_context*)code_ptr, (sljit_sw)code, (void*)sljit_generate_code);
    499 	return code_ptr;
    500 #else
    501 	sljit_set_function_context(NULL, (struct sljit_function_context*)code_ptr, (sljit_sw)code, (void*)sljit_generate_code);
    502 	return code_ptr;
    503 #endif
    504 #else
    505 	return code;
    506 #endif
    507 }
    508 
    509 /* --------------------------------------------------------------------- */
    510 /*  Entry, exit                                                          */
    511 /* --------------------------------------------------------------------- */
    512 
    513 /* inp_flags: */
    514 
    515 /* Creates an index in data_transfer_insts array. */
    516 #define LOAD_DATA	0x01
    517 #define INDEXED		0x02
    518 #define WRITE_BACK	0x04
    519 #define WORD_DATA	0x00
    520 #define BYTE_DATA	0x08
    521 #define HALF_DATA	0x10
    522 #define INT_DATA	0x18
    523 #define SIGNED_DATA	0x20
    524 /* Separates integer and floating point registers */
    525 #define GPR_REG		0x3f
    526 #define DOUBLE_DATA	0x40
    527 
    528 #define MEM_MASK	0x7f
    529 
    530 /* Other inp_flags. */
    531 
    532 #define ARG_TEST	0x000100
    533 /* Integer opertion and set flags -> requires exts on 64 bit systems. */
    534 #define ALT_SIGN_EXT	0x000200
    535 /* This flag affects the RC() and OERC() macros. */
    536 #define ALT_SET_FLAGS	0x000400
    537 #define ALT_KEEP_CACHE	0x000800
    538 #define ALT_FORM1	0x010000
    539 #define ALT_FORM2	0x020000
    540 #define ALT_FORM3	0x040000
    541 #define ALT_FORM4	0x080000
    542 #define ALT_FORM5	0x100000
    543 #define ALT_FORM6	0x200000
    544 
    545 /* Source and destination is register. */
    546 #define REG_DEST	0x000001
    547 #define REG1_SOURCE	0x000002
    548 #define REG2_SOURCE	0x000004
    549 /* getput_arg_fast returned true. */
    550 #define FAST_DEST	0x000008
    551 /* Multiple instructions are required. */
    552 #define SLOW_DEST	0x000010
    553 /*
    554 ALT_SIGN_EXT		0x000200
    555 ALT_SET_FLAGS		0x000400
    556 ALT_FORM1		0x010000
    557 ...
    558 ALT_FORM6		0x200000 */
    559 
    560 #if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
    561 #include "sljitNativePPC_32.c"
    562 #else
    563 #include "sljitNativePPC_64.c"
    564 #endif
    565 
    566 #if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
    567 #define STACK_STORE	STW
    568 #define STACK_LOAD	LWZ
    569 #else
    570 #define STACK_STORE	STD
    571 #define STACK_LOAD	LD
    572 #endif
    573 
    574 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compiler,
    575 	sljit_si options, sljit_si args, sljit_si scratches, sljit_si saveds,
    576 	sljit_si fscratches, sljit_si fsaveds, sljit_si local_size)
    577 {
    578 	sljit_si i, tmp, offs;
    579 
    580 	CHECK_ERROR();
    581 	check_sljit_emit_enter(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size);
    582 
    583 	compiler->options = options;
    584 	compiler->scratches = scratches;
    585 	compiler->saveds = saveds;
    586 	compiler->fscratches = fscratches;
    587 	compiler->fsaveds = fsaveds;
    588 #if (defined SLJIT_DEBUG && SLJIT_DEBUG)
    589 	compiler->logical_local_size = local_size;
    590 #endif
    591 
    592 	FAIL_IF(push_inst(compiler, MFLR | D(0)));
    593 	offs = -(sljit_si)(sizeof(sljit_sw));
    594 	FAIL_IF(push_inst(compiler, STACK_STORE | S(TMP_ZERO) | A(SLJIT_SP) | IMM(offs)));
    595 
    596 	tmp = saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - saveds) : SLJIT_FIRST_SAVED_REG;
    597 	for (i = SLJIT_S0; i >= tmp; i--) {
    598 		offs -= (sljit_si)(sizeof(sljit_sw));
    599 		FAIL_IF(push_inst(compiler, STACK_STORE | S(i) | A(SLJIT_SP) | IMM(offs)));
    600 	}
    601 
    602 	for (i = scratches; i >= SLJIT_FIRST_SAVED_REG; i--) {
    603 		offs -= (sljit_si)(sizeof(sljit_sw));
    604 		FAIL_IF(push_inst(compiler, STACK_STORE | S(i) | A(SLJIT_SP) | IMM(offs)));
    605 	}
    606 
    607 	SLJIT_ASSERT(offs == -(sljit_si)GET_SAVED_REGISTERS_SIZE(compiler->scratches, compiler->saveds, 1));
    608 
    609 #if (defined SLJIT_PPC_STACK_FRAME_V2 && SLJIT_PPC_STACK_FRAME_V2)
    610 	FAIL_IF(push_inst(compiler, STACK_STORE | S(0) | A(SLJIT_SP) | IMM(2 * sizeof(sljit_sw))));
    611 #else
    612 	FAIL_IF(push_inst(compiler, STACK_STORE | S(0) | A(SLJIT_SP) | IMM(sizeof(sljit_sw))));
    613 #endif
    614 
    615 	FAIL_IF(push_inst(compiler, ADDI | D(TMP_ZERO) | A(0) | 0));
    616 	if (args >= 1)
    617 		FAIL_IF(push_inst(compiler, OR | S(SLJIT_R0) | A(SLJIT_S0) | B(SLJIT_R0)));
    618 	if (args >= 2)
    619 		FAIL_IF(push_inst(compiler, OR | S(SLJIT_R1) | A(SLJIT_S1) | B(SLJIT_R1)));
    620 	if (args >= 3)
    621 		FAIL_IF(push_inst(compiler, OR | S(SLJIT_R2) | A(SLJIT_S2) | B(SLJIT_R2)));
    622 
    623 	local_size += GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1) + FIXED_LOCALS_OFFSET;
    624 	local_size = (local_size + 15) & ~0xf;
    625 	compiler->local_size = local_size;
    626 
    627 #if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
    628 	if (local_size <= SIMM_MAX)
    629 		FAIL_IF(push_inst(compiler, STWU | S(SLJIT_SP) | A(SLJIT_SP) | IMM(-local_size)));
    630 	else {
    631 		FAIL_IF(load_immediate(compiler, 0, -local_size));
    632 		FAIL_IF(push_inst(compiler, STWUX | S(SLJIT_SP) | A(SLJIT_SP) | B(0)));
    633 	}
    634 #else
    635 	if (local_size <= SIMM_MAX)
    636 		FAIL_IF(push_inst(compiler, STDU | S(SLJIT_SP) | A(SLJIT_SP) | IMM(-local_size)));
    637 	else {
    638 		FAIL_IF(load_immediate(compiler, 0, -local_size));
    639 		FAIL_IF(push_inst(compiler, STDUX | S(SLJIT_SP) | A(SLJIT_SP) | B(0)));
    640 	}
    641 #endif
    642 
    643 	return SLJIT_SUCCESS;
    644 }
    645 
    646 SLJIT_API_FUNC_ATTRIBUTE void sljit_set_context(struct sljit_compiler *compiler,
    647 	sljit_si options, sljit_si args, sljit_si scratches, sljit_si saveds,
    648 	sljit_si fscratches, sljit_si fsaveds, sljit_si local_size)
    649 {
    650 	CHECK_ERROR_VOID();
    651 	check_sljit_set_context(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size);
    652 
    653 	compiler->options = options;
    654 	compiler->scratches = scratches;
    655 	compiler->saveds = saveds;
    656 	compiler->fscratches = fscratches;
    657 	compiler->fsaveds = fsaveds;
    658 #if (defined SLJIT_DEBUG && SLJIT_DEBUG)
    659 	compiler->logical_local_size = local_size;
    660 #endif
    661 
    662 	local_size += GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1) + FIXED_LOCALS_OFFSET;
    663 	compiler->local_size = (local_size + 15) & ~0xf;
    664 }
    665 
    666 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_return(struct sljit_compiler *compiler, sljit_si op, sljit_si src, sljit_sw srcw)
    667 {
    668 	sljit_si i, tmp, offs;
    669 
    670 	CHECK_ERROR();
    671 	check_sljit_emit_return(compiler, op, src, srcw);
    672 
    673 	FAIL_IF(emit_mov_before_return(compiler, op, src, srcw));
    674 
    675 	if (compiler->local_size <= SIMM_MAX)
    676 		FAIL_IF(push_inst(compiler, ADDI | D(SLJIT_SP) | A(SLJIT_SP) | IMM(compiler->local_size)));
    677 	else {
    678 		FAIL_IF(load_immediate(compiler, 0, compiler->local_size));
    679 		FAIL_IF(push_inst(compiler, ADD | D(SLJIT_SP) | A(SLJIT_SP) | B(0)));
    680 	}
    681 
    682 #if (defined SLJIT_PPC_STACK_FRAME_V2 && SLJIT_PPC_STACK_FRAME_V2)
    683 	FAIL_IF(push_inst(compiler, STACK_LOAD | D(0) | A(SLJIT_SP) | IMM(2 * sizeof(sljit_sw))));
    684 #else
    685 	FAIL_IF(push_inst(compiler, STACK_LOAD | D(0) | A(SLJIT_SP) | IMM(sizeof(sljit_sw))));
    686 #endif
    687 
    688 	offs = -(sljit_si)GET_SAVED_REGISTERS_SIZE(compiler->scratches, compiler->saveds, 1);
    689 
    690 	tmp = compiler->scratches;
    691 	for (i = SLJIT_FIRST_SAVED_REG; i <= tmp; i++) {
    692 		FAIL_IF(push_inst(compiler, STACK_LOAD | D(i) | A(SLJIT_SP) | IMM(offs)));
    693 		offs += (sljit_si)(sizeof(sljit_sw));
    694 	}
    695 
    696 	tmp = compiler->saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - compiler->saveds) : SLJIT_FIRST_SAVED_REG;
    697 	for (i = tmp; i <= SLJIT_S0; i++) {
    698 		FAIL_IF(push_inst(compiler, STACK_LOAD | D(i) | A(SLJIT_SP) | IMM(offs)));
    699 		offs += (sljit_si)(sizeof(sljit_sw));
    700 	}
    701 
    702 	FAIL_IF(push_inst(compiler, STACK_LOAD | D(TMP_ZERO) | A(SLJIT_SP) | IMM(offs)));
    703 	SLJIT_ASSERT(offs == -(sljit_sw)(sizeof(sljit_sw)));
    704 
    705 	FAIL_IF(push_inst(compiler, MTLR | S(0)));
    706 	FAIL_IF(push_inst(compiler, BLR));
    707 
    708 	return SLJIT_SUCCESS;
    709 }
    710 
    711 #undef STACK_STORE
    712 #undef STACK_LOAD
    713 
    714 /* --------------------------------------------------------------------- */
    715 /*  Operators                                                            */
    716 /* --------------------------------------------------------------------- */
    717 
    718 /* i/x - immediate/indexed form
    719    n/w - no write-back / write-back (1 bit)
    720    s/l - store/load (1 bit)
    721    u/s - signed/unsigned (1 bit)
    722    w/b/h/i - word/byte/half/int allowed (2 bit)
    723    It contans 32 items, but not all are different. */
    724 
    725 /* 64 bit only: [reg+imm] must be aligned to 4 bytes. */
    726 #define INT_ALIGNED	0x10000
    727 /* 64-bit only: there is no lwau instruction. */
    728 #define UPDATE_REQ	0x20000
    729 
    730 #if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
    731 #define ARCH_32_64(a, b)	a
    732 #define INST_CODE_AND_DST(inst, flags, reg) \
    733 	((inst) | (((flags) & MEM_MASK) <= GPR_REG ? D(reg) : FD(reg)))
    734 #else
    735 #define ARCH_32_64(a, b)	b
    736 #define INST_CODE_AND_DST(inst, flags, reg) \
    737 	(((inst) & ~(INT_ALIGNED | UPDATE_REQ)) | (((flags) & MEM_MASK) <= GPR_REG ? D(reg) : FD(reg)))
    738 #endif
    739 
    740 static SLJIT_CONST sljit_ins data_transfer_insts[64 + 8] = {
    741 
    742 /* -------- Unsigned -------- */
    743 
    744 /* Word. */
    745 
    746 /* u w n i s */ ARCH_32_64(HI(36) /* stw */, HI(62) | INT_ALIGNED | 0x0 /* std */),
    747 /* u w n i l */ ARCH_32_64(HI(32) /* lwz */, HI(58) | INT_ALIGNED | 0x0 /* ld */),
    748 /* u w n x s */ ARCH_32_64(HI(31) | LO(151) /* stwx */, HI(31) | LO(149) /* stdx */),
    749 /* u w n x l */ ARCH_32_64(HI(31) | LO(23) /* lwzx */, HI(31) | LO(21) /* ldx */),
    750 
    751 /* u w w i s */ ARCH_32_64(HI(37) /* stwu */, HI(62) | INT_ALIGNED | 0x1 /* stdu */),
    752 /* u w w i l */ ARCH_32_64(HI(33) /* lwzu */, HI(58) | INT_ALIGNED | 0x1 /* ldu */),
    753 /* u w w x s */ ARCH_32_64(HI(31) | LO(183) /* stwux */, HI(31) | LO(181) /* stdux */),
    754 /* u w w x l */ ARCH_32_64(HI(31) | LO(55) /* lwzux */, HI(31) | LO(53) /* ldux */),
    755 
    756 /* Byte. */
    757 
    758 /* u b n i s */ HI(38) /* stb */,
    759 /* u b n i l */ HI(34) /* lbz */,
    760 /* u b n x s */ HI(31) | LO(215) /* stbx */,
    761 /* u b n x l */ HI(31) | LO(87) /* lbzx */,
    762 
    763 /* u b w i s */ HI(39) /* stbu */,
    764 /* u b w i l */ HI(35) /* lbzu */,
    765 /* u b w x s */ HI(31) | LO(247) /* stbux */,
    766 /* u b w x l */ HI(31) | LO(119) /* lbzux */,
    767 
    768 /* Half. */
    769 
    770 /* u h n i s */ HI(44) /* sth */,
    771 /* u h n i l */ HI(40) /* lhz */,
    772 /* u h n x s */ HI(31) | LO(407) /* sthx */,
    773 /* u h n x l */ HI(31) | LO(279) /* lhzx */,
    774 
    775 /* u h w i s */ HI(45) /* sthu */,
    776 /* u h w i l */ HI(41) /* lhzu */,
    777 /* u h w x s */ HI(31) | LO(439) /* sthux */,
    778 /* u h w x l */ HI(31) | LO(311) /* lhzux */,
    779 
    780 /* Int. */
    781 
    782 /* u i n i s */ HI(36) /* stw */,
    783 /* u i n i l */ HI(32) /* lwz */,
    784 /* u i n x s */ HI(31) | LO(151) /* stwx */,
    785 /* u i n x l */ HI(31) | LO(23) /* lwzx */,
    786 
    787 /* u i w i s */ HI(37) /* stwu */,
    788 /* u i w i l */ HI(33) /* lwzu */,
    789 /* u i w x s */ HI(31) | LO(183) /* stwux */,
    790 /* u i w x l */ HI(31) | LO(55) /* lwzux */,
    791 
    792 /* -------- Signed -------- */
    793 
    794 /* Word. */
    795 
    796 /* s w n i s */ ARCH_32_64(HI(36) /* stw */, HI(62) | INT_ALIGNED | 0x0 /* std */),
    797 /* s w n i l */ ARCH_32_64(HI(32) /* lwz */, HI(58) | INT_ALIGNED | 0x0 /* ld */),
    798 /* s w n x s */ ARCH_32_64(HI(31) | LO(151) /* stwx */, HI(31) | LO(149) /* stdx */),
    799 /* s w n x l */ ARCH_32_64(HI(31) | LO(23) /* lwzx */, HI(31) | LO(21) /* ldx */),
    800 
    801 /* s w w i s */ ARCH_32_64(HI(37) /* stwu */, HI(62) | INT_ALIGNED | 0x1 /* stdu */),
    802 /* s w w i l */ ARCH_32_64(HI(33) /* lwzu */, HI(58) | INT_ALIGNED | 0x1 /* ldu */),
    803 /* s w w x s */ ARCH_32_64(HI(31) | LO(183) /* stwux */, HI(31) | LO(181) /* stdux */),
    804 /* s w w x l */ ARCH_32_64(HI(31) | LO(55) /* lwzux */, HI(31) | LO(53) /* ldux */),
    805 
    806 /* Byte. */
    807 
    808 /* s b n i s */ HI(38) /* stb */,
    809 /* s b n i l */ HI(34) /* lbz */ /* EXTS_REQ */,
    810 /* s b n x s */ HI(31) | LO(215) /* stbx */,
    811 /* s b n x l */ HI(31) | LO(87) /* lbzx */ /* EXTS_REQ */,
    812 
    813 /* s b w i s */ HI(39) /* stbu */,
    814 /* s b w i l */ HI(35) /* lbzu */ /* EXTS_REQ */,
    815 /* s b w x s */ HI(31) | LO(247) /* stbux */,
    816 /* s b w x l */ HI(31) | LO(119) /* lbzux */ /* EXTS_REQ */,
    817 
    818 /* Half. */
    819 
    820 /* s h n i s */ HI(44) /* sth */,
    821 /* s h n i l */ HI(42) /* lha */,
    822 /* s h n x s */ HI(31) | LO(407) /* sthx */,
    823 /* s h n x l */ HI(31) | LO(343) /* lhax */,
    824 
    825 /* s h w i s */ HI(45) /* sthu */,
    826 /* s h w i l */ HI(43) /* lhau */,
    827 /* s h w x s */ HI(31) | LO(439) /* sthux */,
    828 /* s h w x l */ HI(31) | LO(375) /* lhaux */,
    829 
    830 /* Int. */
    831 
    832 /* s i n i s */ HI(36) /* stw */,
    833 /* s i n i l */ ARCH_32_64(HI(32) /* lwz */, HI(58) | INT_ALIGNED | 0x2 /* lwa */),
    834 /* s i n x s */ HI(31) | LO(151) /* stwx */,
    835 /* s i n x l */ ARCH_32_64(HI(31) | LO(23) /* lwzx */, HI(31) | LO(341) /* lwax */),
    836 
    837 /* s i w i s */ HI(37) /* stwu */,
    838 /* s i w i l */ ARCH_32_64(HI(33) /* lwzu */, HI(58) | INT_ALIGNED | UPDATE_REQ | 0x2 /* lwa */),
    839 /* s i w x s */ HI(31) | LO(183) /* stwux */,
    840 /* s i w x l */ ARCH_32_64(HI(31) | LO(55) /* lwzux */, HI(31) | LO(373) /* lwaux */),
    841 
    842 /* -------- Double -------- */
    843 
    844 /* d   n i s */ HI(54) /* stfd */,
    845 /* d   n i l */ HI(50) /* lfd */,
    846 /* d   n x s */ HI(31) | LO(727) /* stfdx */,
    847 /* d   n x l */ HI(31) | LO(599) /* lfdx */,
    848 
    849 /* s   n i s */ HI(52) /* stfs */,
    850 /* s   n i l */ HI(48) /* lfs */,
    851 /* s   n x s */ HI(31) | LO(663) /* stfsx */,
    852 /* s   n x l */ HI(31) | LO(535) /* lfsx */,
    853 
    854 };
    855 
    856 #undef ARCH_32_64
    857 
    858 /* Simple cases, (no caching is required). */
    859 static sljit_si getput_arg_fast(struct sljit_compiler *compiler, sljit_si inp_flags, sljit_si reg, sljit_si arg, sljit_sw argw)
    860 {
    861 	sljit_ins inst;
    862 
    863 	/* Should work when (arg & REG_MASK) == 0. */
    864 	SLJIT_COMPILE_ASSERT(A(0) == 0, a0_must_be_0);
    865 	SLJIT_ASSERT(arg & SLJIT_MEM);
    866 
    867 	if (arg & OFFS_REG_MASK) {
    868 		if (argw & 0x3)
    869 			return 0;
    870 		if (inp_flags & ARG_TEST)
    871 			return 1;
    872 
    873 		inst = data_transfer_insts[(inp_flags | INDEXED) & MEM_MASK];
    874 		SLJIT_ASSERT(!(inst & (INT_ALIGNED | UPDATE_REQ)));
    875 		FAIL_IF(push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(arg & REG_MASK) | B(OFFS_REG(arg))));
    876 		return -1;
    877 	}
    878 
    879 	if (SLJIT_UNLIKELY(!(arg & REG_MASK)))
    880 		inp_flags &= ~WRITE_BACK;
    881 
    882 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
    883 	inst = data_transfer_insts[inp_flags & MEM_MASK];
    884 	SLJIT_ASSERT((arg & REG_MASK) || !(inst & UPDATE_REQ));
    885 
    886 	if (argw > SIMM_MAX || argw < SIMM_MIN || ((inst & INT_ALIGNED) && (argw & 0x3)) || (inst & UPDATE_REQ))
    887 		return 0;
    888 	if (inp_flags & ARG_TEST)
    889 		return 1;
    890 #endif
    891 
    892 #if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
    893 	if (argw > SIMM_MAX || argw < SIMM_MIN)
    894 		return 0;
    895 	if (inp_flags & ARG_TEST)
    896 		return 1;
    897 
    898 	inst = data_transfer_insts[inp_flags & MEM_MASK];
    899 	SLJIT_ASSERT(!(inst & (INT_ALIGNED | UPDATE_REQ)));
    900 #endif
    901 
    902 	FAIL_IF(push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(arg & REG_MASK) | IMM(argw)));
    903 	return -1;
    904 }
    905 
    906 /* See getput_arg below.
    907    Note: can_cache is called only for binary operators. Those operator always
    908    uses word arguments without write back. */
    909 static sljit_si can_cache(sljit_si arg, sljit_sw argw, sljit_si next_arg, sljit_sw next_argw)
    910 {
    911 	sljit_sw high_short, next_high_short;
    912 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
    913 	sljit_sw diff;
    914 #endif
    915 
    916 	SLJIT_ASSERT((arg & SLJIT_MEM) && (next_arg & SLJIT_MEM));
    917 
    918 	if (arg & OFFS_REG_MASK)
    919 		return ((arg & OFFS_REG_MASK) == (next_arg & OFFS_REG_MASK) && (argw & 0x3) == (next_argw & 0x3));
    920 
    921 	if (next_arg & OFFS_REG_MASK)
    922 		return 0;
    923 
    924 #if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
    925 	high_short = (argw + ((argw & 0x8000) << 1)) & ~0xffff;
    926 	next_high_short = (next_argw + ((next_argw & 0x8000) << 1)) & ~0xffff;
    927 	return high_short == next_high_short;
    928 #else
    929 	if (argw <= 0x7fffffffl && argw >= -0x80000000l) {
    930 		high_short = (argw + ((argw & 0x8000) << 1)) & ~0xffff;
    931 		next_high_short = (next_argw + ((next_argw & 0x8000) << 1)) & ~0xffff;
    932 		if (high_short == next_high_short)
    933 			return 1;
    934 	}
    935 
    936 	diff = argw - next_argw;
    937 	if (!(arg & REG_MASK))
    938 		return diff <= SIMM_MAX && diff >= SIMM_MIN;
    939 
    940 	if (arg == next_arg && diff <= SIMM_MAX && diff >= SIMM_MIN)
    941 		return 1;
    942 
    943 	return 0;
    944 #endif
    945 }
    946 
    947 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
    948 #define ADJUST_CACHED_IMM(imm) \
    949 	if ((inst & INT_ALIGNED) && (imm & 0x3)) { \
    950 		/* Adjust cached value. Fortunately this is really a rare case */ \
    951 		compiler->cache_argw += imm & 0x3; \
    952 		FAIL_IF(push_inst(compiler, ADDI | D(TMP_REG3) | A(TMP_REG3) | (imm & 0x3))); \
    953 		imm &= ~0x3; \
    954 	}
    955 #endif
    956 
    957 /* Emit the necessary instructions. See can_cache above. */
    958 static sljit_si getput_arg(struct sljit_compiler *compiler, sljit_si inp_flags, sljit_si reg, sljit_si arg, sljit_sw argw, sljit_si next_arg, sljit_sw next_argw)
    959 {
    960 	sljit_si tmp_r;
    961 	sljit_ins inst;
    962 	sljit_sw high_short, next_high_short;
    963 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
    964 	sljit_sw diff;
    965 #endif
    966 
    967 	SLJIT_ASSERT(arg & SLJIT_MEM);
    968 
    969 	tmp_r = ((inp_flags & LOAD_DATA) && ((inp_flags) & MEM_MASK) <= GPR_REG) ? reg : TMP_REG1;
    970 	/* Special case for "mov reg, [reg, ... ]". */
    971 	if ((arg & REG_MASK) == tmp_r)
    972 		tmp_r = TMP_REG1;
    973 
    974 	if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) {
    975 		argw &= 0x3;
    976 		/* Otherwise getput_arg_fast would capture it. */
    977 		SLJIT_ASSERT(argw);
    978 
    979 		if ((SLJIT_MEM | (arg & OFFS_REG_MASK)) == compiler->cache_arg && argw == compiler->cache_argw)
    980 			tmp_r = TMP_REG3;
    981 		else {
    982 			if ((arg & OFFS_REG_MASK) == (next_arg & OFFS_REG_MASK) && argw == (next_argw & 0x3)) {
    983 				compiler->cache_arg = SLJIT_MEM | (arg & OFFS_REG_MASK);
    984 				compiler->cache_argw = argw;
    985 				tmp_r = TMP_REG3;
    986 			}
    987 #if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
    988 			FAIL_IF(push_inst(compiler, RLWINM | S(OFFS_REG(arg)) | A(tmp_r) | (argw << 11) | ((31 - argw) << 1)));
    989 #else
    990 			FAIL_IF(push_inst(compiler, RLDI(tmp_r, OFFS_REG(arg), argw, 63 - argw, 1)));
    991 #endif
    992 		}
    993 		inst = data_transfer_insts[(inp_flags | INDEXED) & MEM_MASK];
    994 		SLJIT_ASSERT(!(inst & (INT_ALIGNED | UPDATE_REQ)));
    995 		return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(arg & REG_MASK) | B(tmp_r));
    996 	}
    997 
    998 	if (SLJIT_UNLIKELY(!(arg & REG_MASK)))
    999 		inp_flags &= ~WRITE_BACK;
   1000 
   1001 	inst = data_transfer_insts[inp_flags & MEM_MASK];
   1002 	SLJIT_ASSERT((arg & REG_MASK) || !(inst & UPDATE_REQ));
   1003 
   1004 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
   1005 	if (argw <= 0x7fff7fffl && argw >= -0x80000000l
   1006 			&& (!(inst & INT_ALIGNED) || !(argw & 0x3)) && !(inst & UPDATE_REQ)) {
   1007 #endif
   1008 
   1009 		arg &= REG_MASK;
   1010 		high_short = (sljit_si)(argw + ((argw & 0x8000) << 1)) & ~0xffff;
   1011 		/* The getput_arg_fast should handle this otherwise. */
   1012 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
   1013 		SLJIT_ASSERT(high_short && high_short <= 0x7fffffffl && high_short >= -0x80000000l);
   1014 #else
   1015 		SLJIT_ASSERT(high_short && !(inst & (INT_ALIGNED | UPDATE_REQ)));
   1016 #endif
   1017 
   1018 		if (inp_flags & WRITE_BACK) {
   1019 			if (arg == reg) {
   1020 				FAIL_IF(push_inst(compiler, OR | S(reg) | A(tmp_r) | B(reg)));
   1021 				reg = tmp_r;
   1022 			}
   1023 			tmp_r = arg;
   1024 			FAIL_IF(push_inst(compiler, ADDIS | D(arg) | A(arg) | IMM(high_short >> 16)));
   1025 		}
   1026 		else if (compiler->cache_arg != (SLJIT_MEM | arg) || high_short != compiler->cache_argw) {
   1027 			if ((next_arg & SLJIT_MEM) && !(next_arg & OFFS_REG_MASK)) {
   1028 				next_high_short = (sljit_si)(next_argw + ((next_argw & 0x8000) << 1)) & ~0xffff;
   1029 				if (high_short == next_high_short) {
   1030 					compiler->cache_arg = SLJIT_MEM | arg;
   1031 					compiler->cache_argw = high_short;
   1032 					tmp_r = TMP_REG3;
   1033 				}
   1034 			}
   1035 			FAIL_IF(push_inst(compiler, ADDIS | D(tmp_r) | A(arg & REG_MASK) | IMM(high_short >> 16)));
   1036 		}
   1037 		else
   1038 			tmp_r = TMP_REG3;
   1039 
   1040 		return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(tmp_r) | IMM(argw));
   1041 
   1042 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
   1043 	}
   1044 
   1045 	/* Everything else is PPC-64 only. */
   1046 	if (SLJIT_UNLIKELY(!(arg & REG_MASK))) {
   1047 		diff = argw - compiler->cache_argw;
   1048 		if ((compiler->cache_arg & SLJIT_IMM) && diff <= SIMM_MAX && diff >= SIMM_MIN) {
   1049 			ADJUST_CACHED_IMM(diff);
   1050 			return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(TMP_REG3) | IMM(diff));
   1051 		}
   1052 
   1053 		diff = argw - next_argw;
   1054 		if ((next_arg & SLJIT_MEM) && diff <= SIMM_MAX && diff >= SIMM_MIN) {
   1055 			SLJIT_ASSERT(inp_flags & LOAD_DATA);
   1056 
   1057 			compiler->cache_arg = SLJIT_IMM;
   1058 			compiler->cache_argw = argw;
   1059 			tmp_r = TMP_REG3;
   1060 		}
   1061 
   1062 		FAIL_IF(load_immediate(compiler, tmp_r, argw));
   1063 		return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(tmp_r));
   1064 	}
   1065 
   1066 	diff = argw - compiler->cache_argw;
   1067 	if (compiler->cache_arg == arg && diff <= SIMM_MAX && diff >= SIMM_MIN) {
   1068 		SLJIT_ASSERT(!(inp_flags & WRITE_BACK) && !(inst & UPDATE_REQ));
   1069 		ADJUST_CACHED_IMM(diff);
   1070 		return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(TMP_REG3) | IMM(diff));
   1071 	}
   1072 
   1073 	if ((compiler->cache_arg & SLJIT_IMM) && diff <= SIMM_MAX && diff >= SIMM_MIN) {
   1074 		inst = data_transfer_insts[(inp_flags | INDEXED) & MEM_MASK];
   1075 		SLJIT_ASSERT(!(inst & (INT_ALIGNED | UPDATE_REQ)));
   1076 		if (compiler->cache_argw != argw) {
   1077 			FAIL_IF(push_inst(compiler, ADDI | D(TMP_REG3) | A(TMP_REG3) | IMM(diff)));
   1078 			compiler->cache_argw = argw;
   1079 		}
   1080 		return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(arg & REG_MASK) | B(TMP_REG3));
   1081 	}
   1082 
   1083 	if (argw == next_argw && (next_arg & SLJIT_MEM)) {
   1084 		SLJIT_ASSERT(inp_flags & LOAD_DATA);
   1085 		FAIL_IF(load_immediate(compiler, TMP_REG3, argw));
   1086 
   1087 		compiler->cache_arg = SLJIT_IMM;
   1088 		compiler->cache_argw = argw;
   1089 
   1090 		inst = data_transfer_insts[(inp_flags | INDEXED) & MEM_MASK];
   1091 		SLJIT_ASSERT(!(inst & (INT_ALIGNED | UPDATE_REQ)));
   1092 		return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(arg & REG_MASK) | B(TMP_REG3));
   1093 	}
   1094 
   1095 	diff = argw - next_argw;
   1096 	if (arg == next_arg && !(inp_flags & WRITE_BACK) && diff <= SIMM_MAX && diff >= SIMM_MIN) {
   1097 		SLJIT_ASSERT(inp_flags & LOAD_DATA);
   1098 		FAIL_IF(load_immediate(compiler, TMP_REG3, argw));
   1099 		FAIL_IF(push_inst(compiler, ADD | D(TMP_REG3) | A(TMP_REG3) | B(arg & REG_MASK)));
   1100 
   1101 		compiler->cache_arg = arg;
   1102 		compiler->cache_argw = argw;
   1103 
   1104 		return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(TMP_REG3));
   1105 	}
   1106 
   1107 	if ((next_arg & SLJIT_MEM) && !(next_arg & OFFS_REG_MASK) && diff <= SIMM_MAX && diff >= SIMM_MIN) {
   1108 		SLJIT_ASSERT(inp_flags & LOAD_DATA);
   1109 		FAIL_IF(load_immediate(compiler, TMP_REG3, argw));
   1110 
   1111 		compiler->cache_arg = SLJIT_IMM;
   1112 		compiler->cache_argw = argw;
   1113 		tmp_r = TMP_REG3;
   1114 	}
   1115 	else
   1116 		FAIL_IF(load_immediate(compiler, tmp_r, argw));
   1117 
   1118 	/* Get the indexed version instead of the normal one. */
   1119 	inst = data_transfer_insts[(inp_flags | INDEXED) & MEM_MASK];
   1120 	SLJIT_ASSERT(!(inst & (INT_ALIGNED | UPDATE_REQ)));
   1121 	return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(arg & REG_MASK) | B(tmp_r));
   1122 #endif
   1123 }
   1124 
   1125 static SLJIT_INLINE sljit_si emit_op_mem2(struct sljit_compiler *compiler, sljit_si flags, sljit_si reg, sljit_si arg1, sljit_sw arg1w, sljit_si arg2, sljit_sw arg2w)
   1126 {
   1127 	if (getput_arg_fast(compiler, flags, reg, arg1, arg1w))
   1128 		return compiler->error;
   1129 	return getput_arg(compiler, flags, reg, arg1, arg1w, arg2, arg2w);
   1130 }
   1131 
   1132 static sljit_si emit_op(struct sljit_compiler *compiler, sljit_si op, sljit_si input_flags,
   1133 	sljit_si dst, sljit_sw dstw,
   1134 	sljit_si src1, sljit_sw src1w,
   1135 	sljit_si src2, sljit_sw src2w)
   1136 {
   1137 	/* arg1 goes to TMP_REG1 or src reg
   1138 	   arg2 goes to TMP_REG2, imm or src reg
   1139 	   TMP_REG3 can be used for caching
   1140 	   result goes to TMP_REG2, so put result can use TMP_REG1 and TMP_REG3. */
   1141 	sljit_si dst_r;
   1142 	sljit_si src1_r;
   1143 	sljit_si src2_r;
   1144 	sljit_si sugg_src2_r = TMP_REG2;
   1145 	sljit_si flags = input_flags & (ALT_FORM1 | ALT_FORM2 | ALT_FORM3 | ALT_FORM4 | ALT_FORM5 | ALT_FORM6 | ALT_SIGN_EXT | ALT_SET_FLAGS);
   1146 
   1147 	if (!(input_flags & ALT_KEEP_CACHE)) {
   1148 		compiler->cache_arg = 0;
   1149 		compiler->cache_argw = 0;
   1150 	}
   1151 
   1152 	/* Destination check. */
   1153 	if (SLJIT_UNLIKELY(dst == SLJIT_UNUSED)) {
   1154 		if (op >= SLJIT_MOV && op <= SLJIT_MOVU_SI && !(src2 & SLJIT_MEM))
   1155 			return SLJIT_SUCCESS;
   1156 		dst_r = TMP_REG2;
   1157 	}
   1158 	else if (FAST_IS_REG(dst)) {
   1159 		dst_r = dst;
   1160 		flags |= REG_DEST;
   1161 		if (op >= SLJIT_MOV && op <= SLJIT_MOVU_SI)
   1162 			sugg_src2_r = dst_r;
   1163 	}
   1164 	else {
   1165 		SLJIT_ASSERT(dst & SLJIT_MEM);
   1166 		if (getput_arg_fast(compiler, input_flags | ARG_TEST, TMP_REG2, dst, dstw)) {
   1167 			flags |= FAST_DEST;
   1168 			dst_r = TMP_REG2;
   1169 		}
   1170 		else {
   1171 			flags |= SLOW_DEST;
   1172 			dst_r = 0;
   1173 		}
   1174 	}
   1175 
   1176 	/* Source 1. */
   1177 	if (FAST_IS_REG(src1)) {
   1178 		src1_r = src1;
   1179 		flags |= REG1_SOURCE;
   1180 	}
   1181 	else if (src1 & SLJIT_IMM) {
   1182 		FAIL_IF(load_immediate(compiler, TMP_REG1, src1w));
   1183 		src1_r = TMP_REG1;
   1184 	}
   1185 	else if (getput_arg_fast(compiler, input_flags | LOAD_DATA, TMP_REG1, src1, src1w)) {
   1186 		FAIL_IF(compiler->error);
   1187 		src1_r = TMP_REG1;
   1188 	}
   1189 	else
   1190 		src1_r = 0;
   1191 
   1192 	/* Source 2. */
   1193 	if (FAST_IS_REG(src2)) {
   1194 		src2_r = src2;
   1195 		flags |= REG2_SOURCE;
   1196 		if (!(flags & REG_DEST) && op >= SLJIT_MOV && op <= SLJIT_MOVU_SI)
   1197 			dst_r = src2_r;
   1198 	}
   1199 	else if (src2 & SLJIT_IMM) {
   1200 		FAIL_IF(load_immediate(compiler, sugg_src2_r, src2w));
   1201 		src2_r = sugg_src2_r;
   1202 	}
   1203 	else if (getput_arg_fast(compiler, input_flags | LOAD_DATA, sugg_src2_r, src2, src2w)) {
   1204 		FAIL_IF(compiler->error);
   1205 		src2_r = sugg_src2_r;
   1206 	}
   1207 	else
   1208 		src2_r = 0;
   1209 
   1210 	/* src1_r, src2_r and dst_r can be zero (=unprocessed).
   1211 	   All arguments are complex addressing modes, and it is a binary operator. */
   1212 	if (src1_r == 0 && src2_r == 0 && dst_r == 0) {
   1213 		if (!can_cache(src1, src1w, src2, src2w) && can_cache(src1, src1w, dst, dstw)) {
   1214 			FAIL_IF(getput_arg(compiler, input_flags | LOAD_DATA, TMP_REG2, src2, src2w, src1, src1w));
   1215 			FAIL_IF(getput_arg(compiler, input_flags | LOAD_DATA, TMP_REG1, src1, src1w, dst, dstw));
   1216 		}
   1217 		else {
   1218 			FAIL_IF(getput_arg(compiler, input_flags | LOAD_DATA, TMP_REG1, src1, src1w, src2, src2w));
   1219 			FAIL_IF(getput_arg(compiler, input_flags | LOAD_DATA, TMP_REG2, src2, src2w, dst, dstw));
   1220 		}
   1221 		src1_r = TMP_REG1;
   1222 		src2_r = TMP_REG2;
   1223 	}
   1224 	else if (src1_r == 0 && src2_r == 0) {
   1225 		FAIL_IF(getput_arg(compiler, input_flags | LOAD_DATA, TMP_REG1, src1, src1w, src2, src2w));
   1226 		src1_r = TMP_REG1;
   1227 	}
   1228 	else if (src1_r == 0 && dst_r == 0) {
   1229 		FAIL_IF(getput_arg(compiler, input_flags | LOAD_DATA, TMP_REG1, src1, src1w, dst, dstw));
   1230 		src1_r = TMP_REG1;
   1231 	}
   1232 	else if (src2_r == 0 && dst_r == 0) {
   1233 		FAIL_IF(getput_arg(compiler, input_flags | LOAD_DATA, sugg_src2_r, src2, src2w, dst, dstw));
   1234 		src2_r = sugg_src2_r;
   1235 	}
   1236 
   1237 	if (dst_r == 0)
   1238 		dst_r = TMP_REG2;
   1239 
   1240 	if (src1_r == 0) {
   1241 		FAIL_IF(getput_arg(compiler, input_flags | LOAD_DATA, TMP_REG1, src1, src1w, 0, 0));
   1242 		src1_r = TMP_REG1;
   1243 	}
   1244 
   1245 	if (src2_r == 0) {
   1246 		FAIL_IF(getput_arg(compiler, input_flags | LOAD_DATA, sugg_src2_r, src2, src2w, 0, 0));
   1247 		src2_r = sugg_src2_r;
   1248 	}
   1249 
   1250 	FAIL_IF(emit_single_op(compiler, op, flags, dst_r, src1_r, src2_r));
   1251 
   1252 	if (flags & (FAST_DEST | SLOW_DEST)) {
   1253 		if (flags & FAST_DEST)
   1254 			FAIL_IF(getput_arg_fast(compiler, input_flags, dst_r, dst, dstw));
   1255 		else
   1256 			FAIL_IF(getput_arg(compiler, input_flags, dst_r, dst, dstw, 0, 0));
   1257 	}
   1258 	return SLJIT_SUCCESS;
   1259 }
   1260 
   1261 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op0(struct sljit_compiler *compiler, sljit_si op)
   1262 {
   1263 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
   1264 	sljit_si int_op = op & SLJIT_INT_OP;
   1265 #endif
   1266 
   1267 	CHECK_ERROR();
   1268 	check_sljit_emit_op0(compiler, op);
   1269 
   1270 	op = GET_OPCODE(op);
   1271 	switch (op) {
   1272 	case SLJIT_BREAKPOINT:
   1273 	case SLJIT_NOP:
   1274 		return push_inst(compiler, NOP);
   1275 	case SLJIT_UMUL:
   1276 	case SLJIT_SMUL:
   1277 		FAIL_IF(push_inst(compiler, OR | S(SLJIT_R0) | A(TMP_REG1) | B(SLJIT_R0)));
   1278 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
   1279 		FAIL_IF(push_inst(compiler, MULLD | D(SLJIT_R0) | A(TMP_REG1) | B(SLJIT_R1)));
   1280 		return push_inst(compiler, (op == SLJIT_UMUL ? MULHDU : MULHD) | D(SLJIT_R1) | A(TMP_REG1) | B(SLJIT_R1));
   1281 #else
   1282 		FAIL_IF(push_inst(compiler, MULLW | D(SLJIT_R0) | A(TMP_REG1) | B(SLJIT_R1)));
   1283 		return push_inst(compiler, (op == SLJIT_UMUL ? MULHWU : MULHW) | D(SLJIT_R1) | A(TMP_REG1) | B(SLJIT_R1));
   1284 #endif
   1285 	case SLJIT_UDIV:
   1286 	case SLJIT_SDIV:
   1287 		FAIL_IF(push_inst(compiler, OR | S(SLJIT_R0) | A(TMP_REG1) | B(SLJIT_R0)));
   1288 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
   1289 		if (int_op) {
   1290 			FAIL_IF(push_inst(compiler, (op == SLJIT_UDIV ? DIVWU : DIVW) | D(SLJIT_R0) | A(TMP_REG1) | B(SLJIT_R1)));
   1291 			FAIL_IF(push_inst(compiler, MULLW | D(SLJIT_R1) | A(SLJIT_R0) | B(SLJIT_R1)));
   1292 		} else {
   1293 			FAIL_IF(push_inst(compiler, (op == SLJIT_UDIV ? DIVDU : DIVD) | D(SLJIT_R0) | A(TMP_REG1) | B(SLJIT_R1)));
   1294 			FAIL_IF(push_inst(compiler, MULLD | D(SLJIT_R1) | A(SLJIT_R0) | B(SLJIT_R1)));
   1295 		}
   1296 		return push_inst(compiler, SUBF | D(SLJIT_R1) | A(SLJIT_R1) | B(TMP_REG1));
   1297 #else
   1298 		FAIL_IF(push_inst(compiler, (op == SLJIT_UDIV ? DIVWU : DIVW) | D(SLJIT_R0) | A(TMP_REG1) | B(SLJIT_R1)));
   1299 		FAIL_IF(push_inst(compiler, MULLW | D(SLJIT_R1) | A(SLJIT_R0) | B(SLJIT_R1)));
   1300 		return push_inst(compiler, SUBF | D(SLJIT_R1) | A(SLJIT_R1) | B(TMP_REG1));
   1301 #endif
   1302 	}
   1303 
   1304 	return SLJIT_SUCCESS;
   1305 }
   1306 
   1307 #define EMIT_MOV(type, type_flags, type_cast) \
   1308 	emit_op(compiler, (src & SLJIT_IMM) ? SLJIT_MOV : type, flags | (type_flags), dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? type_cast srcw : srcw)
   1309 
   1310 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op1(struct sljit_compiler *compiler, sljit_si op,
   1311 	sljit_si dst, sljit_sw dstw,
   1312 	sljit_si src, sljit_sw srcw)
   1313 {
   1314 	sljit_si flags = GET_FLAGS(op) ? ALT_SET_FLAGS : 0;
   1315 	sljit_si op_flags = GET_ALL_FLAGS(op);
   1316 
   1317 	CHECK_ERROR();
   1318 	check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw);
   1319 	ADJUST_LOCAL_OFFSET(dst, dstw);
   1320 	ADJUST_LOCAL_OFFSET(src, srcw);
   1321 
   1322 	op = GET_OPCODE(op);
   1323 	if ((src & SLJIT_IMM) && srcw == 0)
   1324 		src = TMP_ZERO;
   1325 
   1326 	if (op_flags & SLJIT_SET_O)
   1327 		FAIL_IF(push_inst(compiler, MTXER | S(TMP_ZERO)));
   1328 
   1329 	if (op_flags & SLJIT_INT_OP) {
   1330 		if (op < SLJIT_NOT) {
   1331 			if (FAST_IS_REG(src) && src == dst) {
   1332 				if (!TYPE_CAST_NEEDED(op))
   1333 					return SLJIT_SUCCESS;
   1334 			}
   1335 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
   1336 			if (op == SLJIT_MOV_SI && (src & SLJIT_MEM))
   1337 				op = SLJIT_MOV_UI;
   1338 			if (op == SLJIT_MOVU_SI && (src & SLJIT_MEM))
   1339 				op = SLJIT_MOVU_UI;
   1340 			if (op == SLJIT_MOV_UI && (src & SLJIT_IMM))
   1341 				op = SLJIT_MOV_SI;
   1342 			if (op == SLJIT_MOVU_UI && (src & SLJIT_IMM))
   1343 				op = SLJIT_MOVU_SI;
   1344 #endif
   1345 		}
   1346 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
   1347 		else {
   1348 			/* Most operations expect sign extended arguments. */
   1349 			flags |= INT_DATA | SIGNED_DATA;
   1350 			if (src & SLJIT_IMM)
   1351 				srcw = (sljit_si)srcw;
   1352 		}
   1353 #endif
   1354 	}
   1355 
   1356 	switch (op) {
   1357 	case SLJIT_MOV:
   1358 	case SLJIT_MOV_P:
   1359 #if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
   1360 	case SLJIT_MOV_UI:
   1361 	case SLJIT_MOV_SI:
   1362 #endif
   1363 		return emit_op(compiler, SLJIT_MOV, flags | WORD_DATA, dst, dstw, TMP_REG1, 0, src, srcw);
   1364 
   1365 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
   1366 	case SLJIT_MOV_UI:
   1367 		return EMIT_MOV(SLJIT_MOV_UI, INT_DATA, (sljit_ui));
   1368 
   1369 	case SLJIT_MOV_SI:
   1370 		return EMIT_MOV(SLJIT_MOV_SI, INT_DATA | SIGNED_DATA, (sljit_si));
   1371 #endif
   1372 
   1373 	case SLJIT_MOV_UB:
   1374 		return EMIT_MOV(SLJIT_MOV_UB, BYTE_DATA, (sljit_ub));
   1375 
   1376 	case SLJIT_MOV_SB:
   1377 		return EMIT_MOV(SLJIT_MOV_SB, BYTE_DATA | SIGNED_DATA, (sljit_sb));
   1378 
   1379 	case SLJIT_MOV_UH:
   1380 		return EMIT_MOV(SLJIT_MOV_UH, HALF_DATA, (sljit_uh));
   1381 
   1382 	case SLJIT_MOV_SH:
   1383 		return EMIT_MOV(SLJIT_MOV_SH, HALF_DATA | SIGNED_DATA, (sljit_sh));
   1384 
   1385 	case SLJIT_MOVU:
   1386 	case SLJIT_MOVU_P:
   1387 #if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
   1388 	case SLJIT_MOVU_UI:
   1389 	case SLJIT_MOVU_SI:
   1390 #endif
   1391 		return emit_op(compiler, SLJIT_MOV, flags | WORD_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, srcw);
   1392 
   1393 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
   1394 	case SLJIT_MOVU_UI:
   1395 		return EMIT_MOV(SLJIT_MOV_UI, INT_DATA | WRITE_BACK, (sljit_ui));
   1396 
   1397 	case SLJIT_MOVU_SI:
   1398 		return EMIT_MOV(SLJIT_MOV_SI, INT_DATA | SIGNED_DATA | WRITE_BACK, (sljit_si));
   1399 #endif
   1400 
   1401 	case SLJIT_MOVU_UB:
   1402 		return EMIT_MOV(SLJIT_MOV_UB, BYTE_DATA | WRITE_BACK, (sljit_ub));
   1403 
   1404 	case SLJIT_MOVU_SB:
   1405 		return EMIT_MOV(SLJIT_MOV_SB, BYTE_DATA | SIGNED_DATA | WRITE_BACK, (sljit_sb));
   1406 
   1407 	case SLJIT_MOVU_UH:
   1408 		return EMIT_MOV(SLJIT_MOV_UH, HALF_DATA | WRITE_BACK, (sljit_uh));
   1409 
   1410 	case SLJIT_MOVU_SH:
   1411 		return EMIT_MOV(SLJIT_MOV_SH, HALF_DATA | SIGNED_DATA | WRITE_BACK, (sljit_sh));
   1412 
   1413 	case SLJIT_NOT:
   1414 		return emit_op(compiler, SLJIT_NOT, flags, dst, dstw, TMP_REG1, 0, src, srcw);
   1415 
   1416 	case SLJIT_NEG:
   1417 		return emit_op(compiler, SLJIT_NEG, flags, dst, dstw, TMP_REG1, 0, src, srcw);
   1418 
   1419 	case SLJIT_CLZ:
   1420 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
   1421 		return emit_op(compiler, SLJIT_CLZ, flags | (!(op_flags & SLJIT_INT_OP) ? 0 : ALT_FORM1), dst, dstw, TMP_REG1, 0, src, srcw);
   1422 #else
   1423 		return emit_op(compiler, SLJIT_CLZ, flags, dst, dstw, TMP_REG1, 0, src, srcw);
   1424 #endif
   1425 	}
   1426 
   1427 	return SLJIT_SUCCESS;
   1428 }
   1429 
   1430 #undef EMIT_MOV
   1431 
   1432 #define TEST_SL_IMM(src, srcw) \
   1433 	(((src) & SLJIT_IMM) && (srcw) <= SIMM_MAX && (srcw) >= SIMM_MIN)
   1434 
   1435 #define TEST_UL_IMM(src, srcw) \
   1436 	(((src) & SLJIT_IMM) && !((srcw) & ~0xffff))
   1437 
   1438 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
   1439 #define TEST_SH_IMM(src, srcw) \
   1440 	(((src) & SLJIT_IMM) && !((srcw) & 0xffff) && (srcw) <= 0x7fffffffl && (srcw) >= -0x80000000l)
   1441 #else
   1442 #define TEST_SH_IMM(src, srcw) \
   1443 	(((src) & SLJIT_IMM) && !((srcw) & 0xffff))
   1444 #endif
   1445 
   1446 #define TEST_UH_IMM(src, srcw) \
   1447 	(((src) & SLJIT_IMM) && !((srcw) & ~0xffff0000))
   1448 
   1449 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
   1450 #define TEST_ADD_IMM(src, srcw) \
   1451 	(((src) & SLJIT_IMM) && (srcw) <= 0x7fff7fffl && (srcw) >= -0x80000000l)
   1452 #else
   1453 #define TEST_ADD_IMM(src, srcw) \
   1454 	((src) & SLJIT_IMM)
   1455 #endif
   1456 
   1457 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
   1458 #define TEST_UI_IMM(src, srcw) \
   1459 	(((src) & SLJIT_IMM) && !((srcw) & ~0xffffffff))
   1460 #else
   1461 #define TEST_UI_IMM(src, srcw) \
   1462 	((src) & SLJIT_IMM)
   1463 #endif
   1464 
   1465 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op2(struct sljit_compiler *compiler, sljit_si op,
   1466 	sljit_si dst, sljit_sw dstw,
   1467 	sljit_si src1, sljit_sw src1w,
   1468 	sljit_si src2, sljit_sw src2w)
   1469 {
   1470 	sljit_si flags = GET_FLAGS(op) ? ALT_SET_FLAGS : 0;
   1471 
   1472 	CHECK_ERROR();
   1473 	check_sljit_emit_op2(compiler, op, dst, dstw, src1, src1w, src2, src2w);
   1474 	ADJUST_LOCAL_OFFSET(dst, dstw);
   1475 	ADJUST_LOCAL_OFFSET(src1, src1w);
   1476 	ADJUST_LOCAL_OFFSET(src2, src2w);
   1477 
   1478 	if ((src1 & SLJIT_IMM) && src1w == 0)
   1479 		src1 = TMP_ZERO;
   1480 	if ((src2 & SLJIT_IMM) && src2w == 0)
   1481 		src2 = TMP_ZERO;
   1482 
   1483 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
   1484 	if (op & SLJIT_INT_OP) {
   1485 		/* Most operations expect sign extended arguments. */
   1486 		flags |= INT_DATA | SIGNED_DATA;
   1487 		if (src1 & SLJIT_IMM)
   1488 			src1w = (sljit_si)(src1w);
   1489 		if (src2 & SLJIT_IMM)
   1490 			src2w = (sljit_si)(src2w);
   1491 		if (GET_FLAGS(op))
   1492 			flags |= ALT_SIGN_EXT;
   1493 	}
   1494 #endif
   1495 	if (op & SLJIT_SET_O)
   1496 		FAIL_IF(push_inst(compiler, MTXER | S(TMP_ZERO)));
   1497 	if (src2 == TMP_REG2)
   1498 		flags |= ALT_KEEP_CACHE;
   1499 
   1500 	switch (GET_OPCODE(op)) {
   1501 	case SLJIT_ADD:
   1502 		if (!GET_FLAGS(op) && ((src1 | src2) & SLJIT_IMM)) {
   1503 			if (TEST_SL_IMM(src2, src2w)) {
   1504 				compiler->imm = src2w & 0xffff;
   1505 				return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM1, dst, dstw, src1, src1w, TMP_REG2, 0);
   1506 			}
   1507 			if (TEST_SL_IMM(src1, src1w)) {
   1508 				compiler->imm = src1w & 0xffff;
   1509 				return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM1, dst, dstw, src2, src2w, TMP_REG2, 0);
   1510 			}
   1511 			if (TEST_SH_IMM(src2, src2w)) {
   1512 				compiler->imm = (src2w >> 16) & 0xffff;
   1513 				return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM2, dst, dstw, src1, src1w, TMP_REG2, 0);
   1514 			}
   1515 			if (TEST_SH_IMM(src1, src1w)) {
   1516 				compiler->imm = (src1w >> 16) & 0xffff;
   1517 				return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM2, dst, dstw, src2, src2w, TMP_REG2, 0);
   1518 			}
   1519 			/* Range between -1 and -32768 is covered above. */
   1520 			if (TEST_ADD_IMM(src2, src2w)) {
   1521 				compiler->imm = src2w & 0xffffffff;
   1522 				return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM4, dst, dstw, src1, src1w, TMP_REG2, 0);
   1523 			}
   1524 			if (TEST_ADD_IMM(src1, src1w)) {
   1525 				compiler->imm = src1w & 0xffffffff;
   1526 				return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM4, dst, dstw, src2, src2w, TMP_REG2, 0);
   1527 			}
   1528 		}
   1529 		if (!(GET_FLAGS(op) & (SLJIT_SET_E | SLJIT_SET_O))) {
   1530 			if (TEST_SL_IMM(src2, src2w)) {
   1531 				compiler->imm = src2w & 0xffff;
   1532 				return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM3, dst, dstw, src1, src1w, TMP_REG2, 0);
   1533 			}
   1534 			if (TEST_SL_IMM(src1, src1w)) {
   1535 				compiler->imm = src1w & 0xffff;
   1536 				return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM3, dst, dstw, src2, src2w, TMP_REG2, 0);
   1537 			}
   1538 		}
   1539 		return emit_op(compiler, SLJIT_ADD, flags, dst, dstw, src1, src1w, src2, src2w);
   1540 
   1541 	case SLJIT_ADDC:
   1542 		return emit_op(compiler, SLJIT_ADDC, flags | (!(op & SLJIT_KEEP_FLAGS) ? 0 : ALT_FORM1), dst, dstw, src1, src1w, src2, src2w);
   1543 
   1544 	case SLJIT_SUB:
   1545 		if (!GET_FLAGS(op) && ((src1 | src2) & SLJIT_IMM)) {
   1546 			if (TEST_SL_IMM(src2, -src2w)) {
   1547 				compiler->imm = (-src2w) & 0xffff;
   1548 				return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM1, dst, dstw, src1, src1w, TMP_REG2, 0);
   1549 			}
   1550 			if (TEST_SL_IMM(src1, src1w)) {
   1551 				compiler->imm = src1w & 0xffff;
   1552 				return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM1, dst, dstw, src2, src2w, TMP_REG2, 0);
   1553 			}
   1554 			if (TEST_SH_IMM(src2, -src2w)) {
   1555 				compiler->imm = ((-src2w) >> 16) & 0xffff;
   1556 				return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM2, dst, dstw, src1, src1w, TMP_REG2, 0);
   1557 			}
   1558 			/* Range between -1 and -32768 is covered above. */
   1559 			if (TEST_ADD_IMM(src2, -src2w)) {
   1560 				compiler->imm = -src2w & 0xffffffff;
   1561 				return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM4, dst, dstw, src1, src1w, TMP_REG2, 0);
   1562 			}
   1563 		}
   1564 		if (dst == SLJIT_UNUSED && (op & (SLJIT_SET_E | SLJIT_SET_U | SLJIT_SET_S)) && !(op & (SLJIT_SET_O | SLJIT_SET_C))) {
   1565 			if (!(op & SLJIT_SET_U)) {
   1566 				/* We know ALT_SIGN_EXT is set if it is an SLJIT_INT_OP on 64 bit systems. */
   1567 				if (TEST_SL_IMM(src2, src2w)) {
   1568 					compiler->imm = src2w & 0xffff;
   1569 					return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM2, dst, dstw, src1, src1w, TMP_REG2, 0);
   1570 				}
   1571 				if (GET_FLAGS(op) == SLJIT_SET_E && TEST_SL_IMM(src1, src1w)) {
   1572 					compiler->imm = src1w & 0xffff;
   1573 					return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM2, dst, dstw, src2, src2w, TMP_REG2, 0);
   1574 				}
   1575 			}
   1576 			if (!(op & (SLJIT_SET_E | SLJIT_SET_S))) {
   1577 				/* We know ALT_SIGN_EXT is set if it is an SLJIT_INT_OP on 64 bit systems. */
   1578 				if (TEST_UL_IMM(src2, src2w)) {
   1579 					compiler->imm = src2w & 0xffff;
   1580 					return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM3, dst, dstw, src1, src1w, TMP_REG2, 0);
   1581 				}
   1582 				return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM4, dst, dstw, src1, src1w, src2, src2w);
   1583 			}
   1584 			if ((src2 & SLJIT_IMM) && src2w >= 0 && src2w <= 0x7fff) {
   1585 				compiler->imm = src2w;
   1586 				return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM2 | ALT_FORM3, dst, dstw, src1, src1w, TMP_REG2, 0);
   1587 			}
   1588 			return emit_op(compiler, SLJIT_SUB, flags | ((op & SLJIT_SET_U) ? ALT_FORM4 : 0) | ((op & (SLJIT_SET_E | SLJIT_SET_S)) ? ALT_FORM5 : 0), dst, dstw, src1, src1w, src2, src2w);
   1589 		}
   1590 		if (!(op & (SLJIT_SET_E | SLJIT_SET_U | SLJIT_SET_S | SLJIT_SET_O))) {
   1591 			if (TEST_SL_IMM(src2, -src2w)) {
   1592 				compiler->imm = (-src2w) & 0xffff;
   1593 				return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM3, dst, dstw, src1, src1w, TMP_REG2, 0);
   1594 			}
   1595 		}
   1596 		/* We know ALT_SIGN_EXT is set if it is an SLJIT_INT_OP on 64 bit systems. */
   1597 		return emit_op(compiler, SLJIT_SUB, flags | (!(op & SLJIT_SET_U) ? 0 : ALT_FORM6), dst, dstw, src1, src1w, src2, src2w);
   1598 
   1599 	case SLJIT_SUBC:
   1600 		return emit_op(compiler, SLJIT_SUBC, flags | (!(op & SLJIT_KEEP_FLAGS) ? 0 : ALT_FORM1), dst, dstw, src1, src1w, src2, src2w);
   1601 
   1602 	case SLJIT_MUL:
   1603 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
   1604 		if (op & SLJIT_INT_OP)
   1605 			flags |= ALT_FORM2;
   1606 #endif
   1607 		if (!GET_FLAGS(op)) {
   1608 			if (TEST_SL_IMM(src2, src2w)) {
   1609 				compiler->imm = src2w & 0xffff;
   1610 				return emit_op(compiler, SLJIT_MUL, flags | ALT_FORM1, dst, dstw, src1, src1w, TMP_REG2, 0);
   1611 			}
   1612 			if (TEST_SL_IMM(src1, src1w)) {
   1613 				compiler->imm = src1w & 0xffff;
   1614 				return emit_op(compiler, SLJIT_MUL, flags | ALT_FORM1, dst, dstw, src2, src2w, TMP_REG2, 0);
   1615 			}
   1616 		}
   1617 		return emit_op(compiler, SLJIT_MUL, flags, dst, dstw, src1, src1w, src2, src2w);
   1618 
   1619 	case SLJIT_AND:
   1620 	case SLJIT_OR:
   1621 	case SLJIT_XOR:
   1622 		/* Commutative unsigned operations. */
   1623 		if (!GET_FLAGS(op) || GET_OPCODE(op) == SLJIT_AND) {
   1624 			if (TEST_UL_IMM(src2, src2w)) {
   1625 				compiler->imm = src2w;
   1626 				return emit_op(compiler, GET_OPCODE(op), flags | ALT_FORM1, dst, dstw, src1, src1w, TMP_REG2, 0);
   1627 			}
   1628 			if (TEST_UL_IMM(src1, src1w)) {
   1629 				compiler->imm = src1w;
   1630 				return emit_op(compiler, GET_OPCODE(op), flags | ALT_FORM1, dst, dstw, src2, src2w, TMP_REG2, 0);
   1631 			}
   1632 			if (TEST_UH_IMM(src2, src2w)) {
   1633 				compiler->imm = (src2w >> 16) & 0xffff;
   1634 				return emit_op(compiler, GET_OPCODE(op), flags | ALT_FORM2, dst, dstw, src1, src1w, TMP_REG2, 0);
   1635 			}
   1636 			if (TEST_UH_IMM(src1, src1w)) {
   1637 				compiler->imm = (src1w >> 16) & 0xffff;
   1638 				return emit_op(compiler, GET_OPCODE(op), flags | ALT_FORM2, dst, dstw, src2, src2w, TMP_REG2, 0);
   1639 			}
   1640 		}
   1641 		if (!GET_FLAGS(op) && GET_OPCODE(op) != SLJIT_AND) {
   1642 			if (TEST_UI_IMM(src2, src2w)) {
   1643 				compiler->imm = src2w;
   1644 				return emit_op(compiler, GET_OPCODE(op), flags | ALT_FORM3, dst, dstw, src1, src1w, TMP_REG2, 0);
   1645 			}
   1646 			if (TEST_UI_IMM(src1, src1w)) {
   1647 				compiler->imm = src1w;
   1648 				return emit_op(compiler, GET_OPCODE(op), flags | ALT_FORM3, dst, dstw, src2, src2w, TMP_REG2, 0);
   1649 			}
   1650 		}
   1651 		return emit_op(compiler, GET_OPCODE(op), flags, dst, dstw, src1, src1w, src2, src2w);
   1652 
   1653 	case SLJIT_ASHR:
   1654 		if (op & SLJIT_KEEP_FLAGS)
   1655 			flags |= ALT_FORM3;
   1656 		/* Fall through. */
   1657 	case SLJIT_SHL:
   1658 	case SLJIT_LSHR:
   1659 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
   1660 		if (op & SLJIT_INT_OP)
   1661 			flags |= ALT_FORM2;
   1662 #endif
   1663 		if (src2 & SLJIT_IMM) {
   1664 			compiler->imm = src2w;
   1665 			return emit_op(compiler, GET_OPCODE(op), flags | ALT_FORM1, dst, dstw, src1, src1w, TMP_REG2, 0);
   1666 		}
   1667 		return emit_op(compiler, GET_OPCODE(op), flags, dst, dstw, src1, src1w, src2, src2w);
   1668 	}
   1669 
   1670 	return SLJIT_SUCCESS;
   1671 }
   1672 
   1673 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_register_index(sljit_si reg)
   1674 {
   1675 	check_sljit_get_register_index(reg);
   1676 	return reg_map[reg];
   1677 }
   1678 
   1679 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_float_register_index(sljit_si reg)
   1680 {
   1681 	check_sljit_get_float_register_index(reg);
   1682 	return reg;
   1683 }
   1684 
   1685 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_custom(struct sljit_compiler *compiler,
   1686 	void *instruction, sljit_si size)
   1687 {
   1688 	CHECK_ERROR();
   1689 	check_sljit_emit_op_custom(compiler, instruction, size);
   1690 	SLJIT_ASSERT(size == 4);
   1691 
   1692 	return push_inst(compiler, *(sljit_ins*)instruction);
   1693 }
   1694 
   1695 /* --------------------------------------------------------------------- */
   1696 /*  Floating point operators                                             */
   1697 /* --------------------------------------------------------------------- */
   1698 
   1699 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_is_fpu_available(void)
   1700 {
   1701 #ifdef SLJIT_IS_FPU_AVAILABLE
   1702 	return SLJIT_IS_FPU_AVAILABLE;
   1703 #else
   1704 	/* Available by default. */
   1705 	return 1;
   1706 #endif
   1707 }
   1708 
   1709 #define FLOAT_DATA(op) (DOUBLE_DATA | ((op & SLJIT_SINGLE_OP) >> 6))
   1710 #define SELECT_FOP(op, single, double) ((op & SLJIT_SINGLE_OP) ? single : double)
   1711 
   1712 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
   1713 #define FLOAT_TMP_MEM_OFFSET (6 * sizeof(sljit_sw))
   1714 #else
   1715 #define FLOAT_TMP_MEM_OFFSET (2 * sizeof(sljit_sw))
   1716 
   1717 #if (defined SLJIT_LITTLE_ENDIAN && SLJIT_LITTLE_ENDIAN)
   1718 #define FLOAT_TMP_MEM_OFFSET_LOW (2 * sizeof(sljit_sw))
   1719 #define FLOAT_TMP_MEM_OFFSET_HI (3 * sizeof(sljit_sw))
   1720 #else
   1721 #define FLOAT_TMP_MEM_OFFSET_LOW (3 * sizeof(sljit_sw))
   1722 #define FLOAT_TMP_MEM_OFFSET_HI (2 * sizeof(sljit_sw))
   1723 #endif
   1724 
   1725 #endif /* SLJIT_CONFIG_PPC_64 */
   1726 
   1727 static SLJIT_INLINE sljit_si sljit_emit_fop1_convw_fromd(struct sljit_compiler *compiler, sljit_si op,
   1728 	sljit_si dst, sljit_sw dstw,
   1729 	sljit_si src, sljit_sw srcw)
   1730 {
   1731 	if (src & SLJIT_MEM) {
   1732 		/* We can ignore the temporary data store on the stack from caching point of view. */
   1733 		FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src, srcw, dst, dstw));
   1734 		src = TMP_FREG1;
   1735 	}
   1736 
   1737 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
   1738 	op = GET_OPCODE(op);
   1739 	FAIL_IF(push_inst(compiler, (op == SLJIT_CONVI_FROMD ? FCTIWZ : FCTIDZ) | FD(TMP_FREG1) | FB(src)));
   1740 
   1741 	if (dst == SLJIT_UNUSED)
   1742 		return SLJIT_SUCCESS;
   1743 
   1744 	if (op == SLJIT_CONVW_FROMD) {
   1745 		if (FAST_IS_REG(dst)) {
   1746 			FAIL_IF(emit_op_mem2(compiler, DOUBLE_DATA, TMP_FREG1, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET, 0, 0));
   1747 			return emit_op_mem2(compiler, WORD_DATA | LOAD_DATA, dst, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET, 0, 0);
   1748 		}
   1749 		return emit_op_mem2(compiler, DOUBLE_DATA, TMP_FREG1, dst, dstw, 0, 0);
   1750 	}
   1751 
   1752 #else
   1753 	FAIL_IF(push_inst(compiler, FCTIWZ | FD(TMP_FREG1) | FB(src)));
   1754 
   1755 	if (dst == SLJIT_UNUSED)
   1756 		return SLJIT_SUCCESS;
   1757 #endif
   1758 
   1759 	if (FAST_IS_REG(dst)) {
   1760 		FAIL_IF(load_immediate(compiler, TMP_REG1, FLOAT_TMP_MEM_OFFSET));
   1761 		FAIL_IF(push_inst(compiler, STFIWX | FS(TMP_FREG1) | A(SLJIT_SP) | B(TMP_REG1)));
   1762 		return emit_op_mem2(compiler, INT_DATA | LOAD_DATA, dst, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET, 0, 0);
   1763 	}
   1764 
   1765 	SLJIT_ASSERT(dst & SLJIT_MEM);
   1766 
   1767 	if (dst & OFFS_REG_MASK) {
   1768 		dstw &= 0x3;
   1769 		if (dstw) {
   1770 #if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
   1771 			FAIL_IF(push_inst(compiler, RLWINM | S(OFFS_REG(dst)) | A(TMP_REG1) | (dstw << 11) | ((31 - dstw) << 1)));
   1772 #else
   1773 			FAIL_IF(push_inst(compiler, RLDI(TMP_REG1, OFFS_REG(dst), dstw, 63 - dstw, 1)));
   1774 #endif
   1775 			dstw = TMP_REG1;
   1776 		}
   1777 		else
   1778 			dstw = OFFS_REG(dst);
   1779 	}
   1780 	else {
   1781 		if ((dst & REG_MASK) && !dstw) {
   1782 			dstw = dst & REG_MASK;
   1783 			dst = 0;
   1784 		}
   1785 		else {
   1786 			/* This works regardless we have SLJIT_MEM1 or SLJIT_MEM0. */
   1787 			FAIL_IF(load_immediate(compiler, TMP_REG1, dstw));
   1788 			dstw = TMP_REG1;
   1789 		}
   1790 	}
   1791 
   1792 	return push_inst(compiler, STFIWX | FS(TMP_FREG1) | A(dst & REG_MASK) | B(dstw));
   1793 }
   1794 
   1795 static SLJIT_INLINE sljit_si sljit_emit_fop1_convd_fromw(struct sljit_compiler *compiler, sljit_si op,
   1796 	sljit_si dst, sljit_sw dstw,
   1797 	sljit_si src, sljit_sw srcw)
   1798 {
   1799 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
   1800 
   1801 	sljit_si dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
   1802 
   1803 	if (src & SLJIT_IMM) {
   1804 		if (GET_OPCODE(op) == SLJIT_CONVD_FROMI)
   1805 			srcw = (sljit_si)srcw;
   1806 		FAIL_IF(load_immediate(compiler, TMP_REG1, srcw));
   1807 		src = TMP_REG1;
   1808 	}
   1809 	else if (GET_OPCODE(op) == SLJIT_CONVD_FROMI) {
   1810 		if (FAST_IS_REG(src))
   1811 			FAIL_IF(push_inst(compiler, EXTSW | S(src) | A(TMP_REG1)));
   1812 		else
   1813 			FAIL_IF(emit_op_mem2(compiler, INT_DATA | SIGNED_DATA | LOAD_DATA, TMP_REG1, src, srcw, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET));
   1814 		src = TMP_REG1;
   1815 	}
   1816 
   1817 	if (FAST_IS_REG(src)) {
   1818 		FAIL_IF(emit_op_mem2(compiler, WORD_DATA, src, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET));
   1819 		FAIL_IF(emit_op_mem2(compiler, DOUBLE_DATA | LOAD_DATA, TMP_FREG1, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET, dst, dstw));
   1820 	}
   1821 	else
   1822 		FAIL_IF(emit_op_mem2(compiler, DOUBLE_DATA | LOAD_DATA, TMP_FREG1, src, srcw, dst, dstw));
   1823 
   1824 	FAIL_IF(push_inst(compiler, FCFID | FD(dst_r) | FB(TMP_FREG1)));
   1825 
   1826 	if (dst & SLJIT_MEM)
   1827 		return emit_op_mem2(compiler, FLOAT_DATA(op), TMP_FREG1, dst, dstw, 0, 0);
   1828 	if (op & SLJIT_SINGLE_OP)
   1829 		return push_inst(compiler, FRSP | FD(dst_r) | FB(dst_r));
   1830 	return SLJIT_SUCCESS;
   1831 
   1832 #else
   1833 
   1834 	sljit_si dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
   1835 	sljit_si invert_sign = 1;
   1836 
   1837 	if (src & SLJIT_IMM) {
   1838 		FAIL_IF(load_immediate(compiler, TMP_REG1, srcw ^ 0x80000000));
   1839 		src = TMP_REG1;
   1840 		invert_sign = 0;
   1841 	}
   1842 	else if (!FAST_IS_REG(src)) {
   1843 		FAIL_IF(emit_op_mem2(compiler, WORD_DATA | SIGNED_DATA | LOAD_DATA, TMP_REG1, src, srcw, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET_LOW));
   1844 		src = TMP_REG1;
   1845 	}
   1846 
   1847 	/* First, a special double floating point value is constructed: (2^53 + (input xor (2^31)))
   1848 	   The double precision format has exactly 53 bit precision, so the lower 32 bit represents
   1849 	   the lower 32 bit of such value. The result of xor 2^31 is the same as adding 0x80000000
   1850 	   to the input, which shifts it into the 0 - 0xffffffff range. To get the converted floating
   1851 	   point value, we need to substract 2^53 + 2^31 from the constructed value. */
   1852 	FAIL_IF(push_inst(compiler, ADDIS | D(TMP_REG2) | A(0) | 0x4330));
   1853 	if (invert_sign)
   1854 		FAIL_IF(push_inst(compiler, XORIS | S(src) | A(TMP_REG1) | 0x8000));
   1855 	FAIL_IF(emit_op_mem2(compiler, WORD_DATA, TMP_REG2, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET_HI, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET));
   1856 	FAIL_IF(emit_op_mem2(compiler, WORD_DATA, TMP_REG1, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET_LOW, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET_HI));
   1857 	FAIL_IF(push_inst(compiler, ADDIS | D(TMP_REG1) | A(0) | 0x8000));
   1858 	FAIL_IF(emit_op_mem2(compiler, DOUBLE_DATA | LOAD_DATA, TMP_FREG1, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET_LOW));
   1859 	FAIL_IF(emit_op_mem2(compiler, WORD_DATA, TMP_REG1, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET_LOW, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET));
   1860 	FAIL_IF(emit_op_mem2(compiler, DOUBLE_DATA | LOAD_DATA, TMP_FREG2, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET_LOW));
   1861 
   1862 	FAIL_IF(push_inst(compiler, FSUB | FD(dst_r) | FA(TMP_FREG1) | FB(TMP_FREG2)));
   1863 
   1864 	if (dst & SLJIT_MEM)
   1865 		return emit_op_mem2(compiler, FLOAT_DATA(op), TMP_FREG1, dst, dstw, 0, 0);
   1866 	if (op & SLJIT_SINGLE_OP)
   1867 		return push_inst(compiler, FRSP | FD(dst_r) | FB(dst_r));
   1868 	return SLJIT_SUCCESS;
   1869 
   1870 #endif
   1871 }
   1872 
   1873 static SLJIT_INLINE sljit_si sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_si op,
   1874 	sljit_si src1, sljit_sw src1w,
   1875 	sljit_si src2, sljit_sw src2w)
   1876 {
   1877 	if (src1 & SLJIT_MEM) {
   1878 		FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w, src2, src2w));
   1879 		src1 = TMP_FREG1;
   1880 	}
   1881 
   1882 	if (src2 & SLJIT_MEM) {
   1883 		FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w, 0, 0));
   1884 		src2 = TMP_FREG2;
   1885 	}
   1886 
   1887 	return push_inst(compiler, FCMPU | CRD(4) | FA(src1) | FB(src2));
   1888 }
   1889 
   1890 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop1(struct sljit_compiler *compiler, sljit_si op,
   1891 	sljit_si dst, sljit_sw dstw,
   1892 	sljit_si src, sljit_sw srcw)
   1893 {
   1894 	sljit_si dst_r;
   1895 
   1896 	CHECK_ERROR();
   1897 	compiler->cache_arg = 0;
   1898 	compiler->cache_argw = 0;
   1899 
   1900 	SLJIT_COMPILE_ASSERT((SLJIT_SINGLE_OP == 0x100) && !(DOUBLE_DATA & 0x4), float_transfer_bit_error);
   1901 	SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw);
   1902 
   1903 	if (GET_OPCODE(op) == SLJIT_CONVD_FROMS)
   1904 		op ^= SLJIT_SINGLE_OP;
   1905 
   1906 	dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
   1907 
   1908 	if (src & SLJIT_MEM) {
   1909 		FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, dst_r, src, srcw, dst, dstw));
   1910 		src = dst_r;
   1911 	}
   1912 
   1913 	switch (GET_OPCODE(op)) {
   1914 	case SLJIT_CONVD_FROMS:
   1915 		op ^= SLJIT_SINGLE_OP;
   1916 		if (op & SLJIT_SINGLE_OP) {
   1917 			FAIL_IF(push_inst(compiler, FRSP | FD(dst_r) | FB(src)));
   1918 			break;
   1919 		}
   1920 		/* Fall through. */
   1921 	case SLJIT_MOVD:
   1922 		if (src != dst_r) {
   1923 			if (dst_r != TMP_FREG1)
   1924 				FAIL_IF(push_inst(compiler, FMR | FD(dst_r) | FB(src)));
   1925 			else
   1926 				dst_r = src;
   1927 		}
   1928 		break;
   1929 	case SLJIT_NEGD:
   1930 		FAIL_IF(push_inst(compiler, FNEG | FD(dst_r) | FB(src)));
   1931 		break;
   1932 	case SLJIT_ABSD:
   1933 		FAIL_IF(push_inst(compiler, FABS | FD(dst_r) | FB(src)));
   1934 		break;
   1935 	}
   1936 
   1937 	if (dst & SLJIT_MEM)
   1938 		FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op), dst_r, dst, dstw, 0, 0));
   1939 	return SLJIT_SUCCESS;
   1940 }
   1941 
   1942 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop2(struct sljit_compiler *compiler, sljit_si op,
   1943 	sljit_si dst, sljit_sw dstw,
   1944 	sljit_si src1, sljit_sw src1w,
   1945 	sljit_si src2, sljit_sw src2w)
   1946 {
   1947 	sljit_si dst_r, flags = 0;
   1948 
   1949 	CHECK_ERROR();
   1950 	check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w);
   1951 	ADJUST_LOCAL_OFFSET(dst, dstw);
   1952 	ADJUST_LOCAL_OFFSET(src1, src1w);
   1953 	ADJUST_LOCAL_OFFSET(src2, src2w);
   1954 
   1955 	compiler->cache_arg = 0;
   1956 	compiler->cache_argw = 0;
   1957 
   1958 	dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG2;
   1959 
   1960 	if (src1 & SLJIT_MEM) {
   1961 		if (getput_arg_fast(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w)) {
   1962 			FAIL_IF(compiler->error);
   1963 			src1 = TMP_FREG1;
   1964 		} else
   1965 			flags |= ALT_FORM1;
   1966 	}
   1967 
   1968 	if (src2 & SLJIT_MEM) {
   1969 		if (getput_arg_fast(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w)) {
   1970 			FAIL_IF(compiler->error);
   1971 			src2 = TMP_FREG2;
   1972 		} else
   1973 			flags |= ALT_FORM2;
   1974 	}
   1975 
   1976 	if ((flags & (ALT_FORM1 | ALT_FORM2)) == (ALT_FORM1 | ALT_FORM2)) {
   1977 		if (!can_cache(src1, src1w, src2, src2w) && can_cache(src1, src1w, dst, dstw)) {
   1978 			FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w, src1, src1w));
   1979 			FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w, dst, dstw));
   1980 		}
   1981 		else {
   1982 			FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w, src2, src2w));
   1983 			FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w, dst, dstw));
   1984 		}
   1985 	}
   1986 	else if (flags & ALT_FORM1)
   1987 		FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w, dst, dstw));
   1988 	else if (flags & ALT_FORM2)
   1989 		FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w, dst, dstw));
   1990 
   1991 	if (flags & ALT_FORM1)
   1992 		src1 = TMP_FREG1;
   1993 	if (flags & ALT_FORM2)
   1994 		src2 = TMP_FREG2;
   1995 
   1996 	switch (GET_OPCODE(op)) {
   1997 	case SLJIT_ADDD:
   1998 		FAIL_IF(push_inst(compiler, SELECT_FOP(op, FADDS, FADD) | FD(dst_r) | FA(src1) | FB(src2)));
   1999 		break;
   2000 
   2001 	case SLJIT_SUBD:
   2002 		FAIL_IF(push_inst(compiler, SELECT_FOP(op, FSUBS, FSUB) | FD(dst_r) | FA(src1) | FB(src2)));
   2003 		break;
   2004 
   2005 	case SLJIT_MULD:
   2006 		FAIL_IF(push_inst(compiler, SELECT_FOP(op, FMULS, FMUL) | FD(dst_r) | FA(src1) | FC(src2) /* FMUL use FC as src2 */));
   2007 		break;
   2008 
   2009 	case SLJIT_DIVD:
   2010 		FAIL_IF(push_inst(compiler, SELECT_FOP(op, FDIVS, FDIV) | FD(dst_r) | FA(src1) | FB(src2)));
   2011 		break;
   2012 	}
   2013 
   2014 	if (dst_r == TMP_FREG2)
   2015 		FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op), TMP_FREG2, dst, dstw, 0, 0));
   2016 
   2017 	return SLJIT_SUCCESS;
   2018 }
   2019 
   2020 #undef FLOAT_DATA
   2021 #undef SELECT_FOP
   2022 
   2023 /* --------------------------------------------------------------------- */
   2024 /*  Other instructions                                                   */
   2025 /* --------------------------------------------------------------------- */
   2026 
   2027 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw)
   2028 {
   2029 	CHECK_ERROR();
   2030 	check_sljit_emit_fast_enter(compiler, dst, dstw);
   2031 	ADJUST_LOCAL_OFFSET(dst, dstw);
   2032 
   2033 	/* For UNUSED dst. Uncommon, but possible. */
   2034 	if (dst == SLJIT_UNUSED)
   2035 		return SLJIT_SUCCESS;
   2036 
   2037 	if (FAST_IS_REG(dst))
   2038 		return push_inst(compiler, MFLR | D(dst));
   2039 
   2040 	/* Memory. */
   2041 	FAIL_IF(push_inst(compiler, MFLR | D(TMP_REG2)));
   2042 	return emit_op(compiler, SLJIT_MOV, WORD_DATA, dst, dstw, TMP_REG1, 0, TMP_REG2, 0);
   2043 }
   2044 
   2045 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_return(struct sljit_compiler *compiler, sljit_si src, sljit_sw srcw)
   2046 {
   2047 	CHECK_ERROR();
   2048 	check_sljit_emit_fast_return(compiler, src, srcw);
   2049 	ADJUST_LOCAL_OFFSET(src, srcw);
   2050 
   2051 	if (FAST_IS_REG(src))
   2052 		FAIL_IF(push_inst(compiler, MTLR | S(src)));
   2053 	else {
   2054 		if (src & SLJIT_MEM)
   2055 			FAIL_IF(emit_op(compiler, SLJIT_MOV, WORD_DATA, TMP_REG2, 0, TMP_REG1, 0, src, srcw));
   2056 		else if (src & SLJIT_IMM)
   2057 			FAIL_IF(load_immediate(compiler, TMP_REG2, srcw));
   2058 		FAIL_IF(push_inst(compiler, MTLR | S(TMP_REG2)));
   2059 	}
   2060 	return push_inst(compiler, BLR);
   2061 }
   2062 
   2063 /* --------------------------------------------------------------------- */
   2064 /*  Conditional instructions                                             */
   2065 /* --------------------------------------------------------------------- */
   2066 
   2067 SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler)
   2068 {
   2069 	struct sljit_label *label;
   2070 
   2071 	CHECK_ERROR_PTR();
   2072 	check_sljit_emit_label(compiler);
   2073 
   2074 	if (compiler->last_label && compiler->last_label->size == compiler->size)
   2075 		return compiler->last_label;
   2076 
   2077 	label = (struct sljit_label*)ensure_abuf(compiler, sizeof(struct sljit_label));
   2078 	PTR_FAIL_IF(!label);
   2079 	set_label(label, compiler);
   2080 	return label;
   2081 }
   2082 
   2083 static sljit_ins get_bo_bi_flags(sljit_si type)
   2084 {
   2085 	switch (type) {
   2086 	case SLJIT_C_EQUAL:
   2087 		return (12 << 21) | (2 << 16);
   2088 
   2089 	case SLJIT_C_NOT_EQUAL:
   2090 		return (4 << 21) | (2 << 16);
   2091 
   2092 	case SLJIT_C_LESS:
   2093 	case SLJIT_C_FLOAT_LESS:
   2094 		return (12 << 21) | ((4 + 0) << 16);
   2095 
   2096 	case SLJIT_C_GREATER_EQUAL:
   2097 	case SLJIT_C_FLOAT_GREATER_EQUAL:
   2098 		return (4 << 21) | ((4 + 0) << 16);
   2099 
   2100 	case SLJIT_C_GREATER:
   2101 	case SLJIT_C_FLOAT_GREATER:
   2102 		return (12 << 21) | ((4 + 1) << 16);
   2103 
   2104 	case SLJIT_C_LESS_EQUAL:
   2105 	case SLJIT_C_FLOAT_LESS_EQUAL:
   2106 		return (4 << 21) | ((4 + 1) << 16);
   2107 
   2108 	case SLJIT_C_SIG_LESS:
   2109 		return (12 << 21) | (0 << 16);
   2110 
   2111 	case SLJIT_C_SIG_GREATER_EQUAL:
   2112 		return (4 << 21) | (0 << 16);
   2113 
   2114 	case SLJIT_C_SIG_GREATER:
   2115 		return (12 << 21) | (1 << 16);
   2116 
   2117 	case SLJIT_C_SIG_LESS_EQUAL:
   2118 		return (4 << 21) | (1 << 16);
   2119 
   2120 	case SLJIT_C_OVERFLOW:
   2121 	case SLJIT_C_MUL_OVERFLOW:
   2122 		return (12 << 21) | (3 << 16);
   2123 
   2124 	case SLJIT_C_NOT_OVERFLOW:
   2125 	case SLJIT_C_MUL_NOT_OVERFLOW:
   2126 		return (4 << 21) | (3 << 16);
   2127 
   2128 	case SLJIT_C_FLOAT_EQUAL:
   2129 		return (12 << 21) | ((4 + 2) << 16);
   2130 
   2131 	case SLJIT_C_FLOAT_NOT_EQUAL:
   2132 		return (4 << 21) | ((4 + 2) << 16);
   2133 
   2134 	case SLJIT_C_FLOAT_UNORDERED:
   2135 		return (12 << 21) | ((4 + 3) << 16);
   2136 
   2137 	case SLJIT_C_FLOAT_ORDERED:
   2138 		return (4 << 21) | ((4 + 3) << 16);
   2139 
   2140 	default:
   2141 		SLJIT_ASSERT(type >= SLJIT_JUMP && type <= SLJIT_CALL3);
   2142 		return (20 << 21);
   2143 	}
   2144 }
   2145 
   2146 SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_si type)
   2147 {
   2148 	struct sljit_jump *jump;
   2149 	sljit_ins bo_bi_flags;
   2150 
   2151 	CHECK_ERROR_PTR();
   2152 	check_sljit_emit_jump(compiler, type);
   2153 
   2154 	bo_bi_flags = get_bo_bi_flags(type & 0xff);
   2155 	if (!bo_bi_flags)
   2156 		return NULL;
   2157 
   2158 	jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
   2159 	PTR_FAIL_IF(!jump);
   2160 	set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP);
   2161 	type &= 0xff;
   2162 
   2163 	/* In PPC, we don't need to touch the arguments. */
   2164 	if (type < SLJIT_JUMP)
   2165 		jump->flags |= IS_COND;
   2166 #if (defined SLJIT_PASS_ENTRY_ADDR_TO_CALL && SLJIT_PASS_ENTRY_ADDR_TO_CALL)
   2167 	if (type >= SLJIT_CALL0)
   2168 		jump->flags |= IS_CALL;
   2169 #endif
   2170 
   2171 	PTR_FAIL_IF(emit_const(compiler, TMP_CALL_REG, 0));
   2172 	PTR_FAIL_IF(push_inst(compiler, MTCTR | S(TMP_CALL_REG)));
   2173 	jump->addr = compiler->size;
   2174 	PTR_FAIL_IF(push_inst(compiler, BCCTR | bo_bi_flags | (type >= SLJIT_FAST_CALL ? 1 : 0)));
   2175 	return jump;
   2176 }
   2177 
   2178 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_ijump(struct sljit_compiler *compiler, sljit_si type, sljit_si src, sljit_sw srcw)
   2179 {
   2180 	struct sljit_jump *jump = NULL;
   2181 	sljit_si src_r;
   2182 
   2183 	CHECK_ERROR();
   2184 	check_sljit_emit_ijump(compiler, type, src, srcw);
   2185 	ADJUST_LOCAL_OFFSET(src, srcw);
   2186 
   2187 	if (FAST_IS_REG(src)) {
   2188 #if (defined SLJIT_PASS_ENTRY_ADDR_TO_CALL && SLJIT_PASS_ENTRY_ADDR_TO_CALL)
   2189 		if (type >= SLJIT_CALL0) {
   2190 			FAIL_IF(push_inst(compiler, OR | S(src) | A(TMP_CALL_REG) | B(src)));
   2191 			src_r = TMP_CALL_REG;
   2192 		}
   2193 		else
   2194 			src_r = src;
   2195 #else
   2196 		src_r = src;
   2197 #endif
   2198 	} else if (src & SLJIT_IMM) {
   2199 		jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
   2200 		FAIL_IF(!jump);
   2201 		set_jump(jump, compiler, JUMP_ADDR);
   2202 		jump->u.target = srcw;
   2203 #if (defined SLJIT_PASS_ENTRY_ADDR_TO_CALL && SLJIT_PASS_ENTRY_ADDR_TO_CALL)
   2204 		if (type >= SLJIT_CALL0)
   2205 			jump->flags |= IS_CALL;
   2206 #endif
   2207 		FAIL_IF(emit_const(compiler, TMP_CALL_REG, 0));
   2208 		src_r = TMP_CALL_REG;
   2209 	}
   2210 	else {
   2211 		FAIL_IF(emit_op(compiler, SLJIT_MOV, WORD_DATA, TMP_CALL_REG, 0, TMP_REG1, 0, src, srcw));
   2212 		src_r = TMP_CALL_REG;
   2213 	}
   2214 
   2215 	FAIL_IF(push_inst(compiler, MTCTR | S(src_r)));
   2216 	if (jump)
   2217 		jump->addr = compiler->size;
   2218 	return push_inst(compiler, BCCTR | (20 << 21) | (type >= SLJIT_FAST_CALL ? 1 : 0));
   2219 }
   2220 
   2221 /* Get a bit from CR, all other bits are zeroed. */
   2222 #define GET_CR_BIT(bit, dst) \
   2223 	FAIL_IF(push_inst(compiler, MFCR | D(dst))); \
   2224 	FAIL_IF(push_inst(compiler, RLWINM | S(dst) | A(dst) | ((1 + (bit)) << 11) | (31 << 6) | (31 << 1)));
   2225 
   2226 #define INVERT_BIT(dst) \
   2227 	FAIL_IF(push_inst(compiler, XORI | S(dst) | A(dst) | 0x1));
   2228 
   2229 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_si op,
   2230 	sljit_si dst, sljit_sw dstw,
   2231 	sljit_si src, sljit_sw srcw,
   2232 	sljit_si type)
   2233 {
   2234 	sljit_si reg, input_flags;
   2235 	sljit_si flags = GET_ALL_FLAGS(op);
   2236 	sljit_sw original_dstw = dstw;
   2237 
   2238 	CHECK_ERROR();
   2239 	check_sljit_emit_op_flags(compiler, op, dst, dstw, src, srcw, type);
   2240 	ADJUST_LOCAL_OFFSET(dst, dstw);
   2241 
   2242 	if (dst == SLJIT_UNUSED)
   2243 		return SLJIT_SUCCESS;
   2244 
   2245 	op = GET_OPCODE(op);
   2246 	reg = (op < SLJIT_ADD && FAST_IS_REG(dst)) ? dst : TMP_REG2;
   2247 
   2248 	compiler->cache_arg = 0;
   2249 	compiler->cache_argw = 0;
   2250 	if (op >= SLJIT_ADD && (src & SLJIT_MEM)) {
   2251 		ADJUST_LOCAL_OFFSET(src, srcw);
   2252 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
   2253 		input_flags = (flags & SLJIT_INT_OP) ? INT_DATA : WORD_DATA;
   2254 #else
   2255 		input_flags = WORD_DATA;
   2256 #endif
   2257 		FAIL_IF(emit_op_mem2(compiler, input_flags | LOAD_DATA, TMP_REG1, src, srcw, dst, dstw));
   2258 		src = TMP_REG1;
   2259 		srcw = 0;
   2260 	}
   2261 
   2262 	switch (type) {
   2263 	case SLJIT_C_EQUAL:
   2264 		GET_CR_BIT(2, reg);
   2265 		break;
   2266 
   2267 	case SLJIT_C_NOT_EQUAL:
   2268 		GET_CR_BIT(2, reg);
   2269 		INVERT_BIT(reg);
   2270 		break;
   2271 
   2272 	case SLJIT_C_LESS:
   2273 	case SLJIT_C_FLOAT_LESS:
   2274 		GET_CR_BIT(4 + 0, reg);
   2275 		break;
   2276 
   2277 	case SLJIT_C_GREATER_EQUAL:
   2278 	case SLJIT_C_FLOAT_GREATER_EQUAL:
   2279 		GET_CR_BIT(4 + 0, reg);
   2280 		INVERT_BIT(reg);
   2281 		break;
   2282 
   2283 	case SLJIT_C_GREATER:
   2284 	case SLJIT_C_FLOAT_GREATER:
   2285 		GET_CR_BIT(4 + 1, reg);
   2286 		break;
   2287 
   2288 	case SLJIT_C_LESS_EQUAL:
   2289 	case SLJIT_C_FLOAT_LESS_EQUAL:
   2290 		GET_CR_BIT(4 + 1, reg);
   2291 		INVERT_BIT(reg);
   2292 		break;
   2293 
   2294 	case SLJIT_C_SIG_LESS:
   2295 		GET_CR_BIT(0, reg);
   2296 		break;
   2297 
   2298 	case SLJIT_C_SIG_GREATER_EQUAL:
   2299 		GET_CR_BIT(0, reg);
   2300 		INVERT_BIT(reg);
   2301 		break;
   2302 
   2303 	case SLJIT_C_SIG_GREATER:
   2304 		GET_CR_BIT(1, reg);
   2305 		break;
   2306 
   2307 	case SLJIT_C_SIG_LESS_EQUAL:
   2308 		GET_CR_BIT(1, reg);
   2309 		INVERT_BIT(reg);
   2310 		break;
   2311 
   2312 	case SLJIT_C_OVERFLOW:
   2313 	case SLJIT_C_MUL_OVERFLOW:
   2314 		GET_CR_BIT(3, reg);
   2315 		break;
   2316 
   2317 	case SLJIT_C_NOT_OVERFLOW:
   2318 	case SLJIT_C_MUL_NOT_OVERFLOW:
   2319 		GET_CR_BIT(3, reg);
   2320 		INVERT_BIT(reg);
   2321 		break;
   2322 
   2323 	case SLJIT_C_FLOAT_EQUAL:
   2324 		GET_CR_BIT(4 + 2, reg);
   2325 		break;
   2326 
   2327 	case SLJIT_C_FLOAT_NOT_EQUAL:
   2328 		GET_CR_BIT(4 + 2, reg);
   2329 		INVERT_BIT(reg);
   2330 		break;
   2331 
   2332 	case SLJIT_C_FLOAT_UNORDERED:
   2333 		GET_CR_BIT(4 + 3, reg);
   2334 		break;
   2335 
   2336 	case SLJIT_C_FLOAT_ORDERED:
   2337 		GET_CR_BIT(4 + 3, reg);
   2338 		INVERT_BIT(reg);
   2339 		break;
   2340 
   2341 	default:
   2342 		SLJIT_ASSERT_STOP();
   2343 		break;
   2344 	}
   2345 
   2346 	if (op < SLJIT_ADD) {
   2347 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
   2348 		if (op == SLJIT_MOV)
   2349 			input_flags = WORD_DATA;
   2350 		else {
   2351 			op = SLJIT_MOV_UI;
   2352 			input_flags = INT_DATA;
   2353 		}
   2354 #else
   2355 		op = SLJIT_MOV;
   2356 		input_flags = WORD_DATA;
   2357 #endif
   2358 		if (reg != TMP_REG2)
   2359 			return SLJIT_SUCCESS;
   2360 		return emit_op(compiler, op, input_flags, dst, dstw, TMP_REG1, 0, TMP_REG2, 0);
   2361 	}
   2362 
   2363 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) || (defined SLJIT_DEBUG && SLJIT_DEBUG)
   2364 	compiler->skip_checks = 1;
   2365 #endif
   2366 	return sljit_emit_op2(compiler, op | flags, dst, original_dstw, src, srcw, TMP_REG2, 0);
   2367 }
   2368 
   2369 SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw, sljit_sw init_value)
   2370 {
   2371 	struct sljit_const *const_;
   2372 	sljit_si reg;
   2373 
   2374 	CHECK_ERROR_PTR();
   2375 	check_sljit_emit_const(compiler, dst, dstw, init_value);
   2376 	ADJUST_LOCAL_OFFSET(dst, dstw);
   2377 
   2378 	const_ = (struct sljit_const*)ensure_abuf(compiler, sizeof(struct sljit_const));
   2379 	PTR_FAIL_IF(!const_);
   2380 	set_const(const_, compiler);
   2381 
   2382 	reg = SLOW_IS_REG(dst) ? dst : TMP_REG2;
   2383 
   2384 	PTR_FAIL_IF(emit_const(compiler, reg, init_value));
   2385 
   2386 	if (dst & SLJIT_MEM)
   2387 		PTR_FAIL_IF(emit_op(compiler, SLJIT_MOV, WORD_DATA, dst, dstw, TMP_REG1, 0, TMP_REG2, 0));
   2388 	return const_;
   2389 }
   2390