Home | History | Annotate | Download | only in sljit
      1 /*
      2  *    Stack-less Just-In-Time compiler
      3  *
      4  *    Copyright Zoltan Herczeg (hzmester (at) freemail.hu). All rights reserved.
      5  *
      6  * Redistribution and use in source and binary forms, with or without modification, are
      7  * permitted provided that the following conditions are met:
      8  *
      9  *   1. Redistributions of source code must retain the above copyright notice, this list of
     10  *      conditions and the following disclaimer.
     11  *
     12  *   2. Redistributions in binary form must reproduce the above copyright notice, this list
     13  *      of conditions and the following disclaimer in the documentation and/or other materials
     14  *      provided with the distribution.
     15  *
     16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
     17  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
     18  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
     19  * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
     20  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
     21  * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
     22  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     23  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
     24  * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     25  */
     26 
     27 SLJIT_API_FUNC_ATTRIBUTE const char* sljit_get_platform_name(void)
     28 {
     29 	return "PowerPC" SLJIT_CPUINFO;
     30 }
     31 
     32 /* Length of an instruction word.
     33    Both for ppc-32 and ppc-64. */
     34 typedef sljit_u32 sljit_ins;
     35 
     36 #if ((defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) && (defined _AIX)) \
     37 	|| (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
     38 #define SLJIT_PPC_STACK_FRAME_V2 1
     39 #endif
     40 
     41 #ifdef _AIX
     42 #include <sys/cache.h>
     43 #endif
     44 
     45 #if (defined SLJIT_LITTLE_ENDIAN && SLJIT_LITTLE_ENDIAN)
     46 #define SLJIT_PASS_ENTRY_ADDR_TO_CALL 1
     47 #endif
     48 
     49 #if (defined SLJIT_CACHE_FLUSH_OWN_IMPL && SLJIT_CACHE_FLUSH_OWN_IMPL)
     50 
     51 static void ppc_cache_flush(sljit_ins *from, sljit_ins *to)
     52 {
     53 #ifdef _AIX
     54 	_sync_cache_range((caddr_t)from, (int)((size_t)to - (size_t)from));
     55 #elif defined(__GNUC__) || (defined(__IBM_GCC_ASM) && __IBM_GCC_ASM)
     56 #	if defined(_ARCH_PWR) || defined(_ARCH_PWR2)
     57 	/* Cache flush for POWER architecture. */
     58 	while (from < to) {
     59 		__asm__ volatile (
     60 			"clf 0, %0\n"
     61 			"dcs\n"
     62 			: : "r"(from)
     63 		);
     64 		from++;
     65 	}
     66 	__asm__ volatile ( "ics" );
     67 #	elif defined(_ARCH_COM) && !defined(_ARCH_PPC)
     68 #	error "Cache flush is not implemented for PowerPC/POWER common mode."
     69 #	else
     70 	/* Cache flush for PowerPC architecture. */
     71 	while (from < to) {
     72 		__asm__ volatile (
     73 			"dcbf 0, %0\n"
     74 			"sync\n"
     75 			"icbi 0, %0\n"
     76 			: : "r"(from)
     77 		);
     78 		from++;
     79 	}
     80 	__asm__ volatile ( "isync" );
     81 #	endif
     82 #	ifdef __xlc__
     83 #	warning "This file may fail to compile if -qfuncsect is used"
     84 #	endif
     85 #elif defined(__xlc__)
     86 #error "Please enable GCC syntax for inline assembly statements with -qasm=gcc"
     87 #else
     88 #error "This platform requires a cache flush implementation."
     89 #endif /* _AIX */
     90 }
     91 
     92 #endif /* (defined SLJIT_CACHE_FLUSH_OWN_IMPL && SLJIT_CACHE_FLUSH_OWN_IMPL) */
     93 
     94 #define TMP_REG1	(SLJIT_NUMBER_OF_REGISTERS + 2)
     95 #define TMP_REG2	(SLJIT_NUMBER_OF_REGISTERS + 3)
     96 #define TMP_ZERO	(SLJIT_NUMBER_OF_REGISTERS + 4)
     97 
     98 #if (defined SLJIT_PASS_ENTRY_ADDR_TO_CALL && SLJIT_PASS_ENTRY_ADDR_TO_CALL)
     99 #define TMP_CALL_REG	(SLJIT_NUMBER_OF_REGISTERS + 5)
    100 #else
    101 #define TMP_CALL_REG	TMP_REG2
    102 #endif
    103 
    104 #define TMP_FREG1	(SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1)
    105 #define TMP_FREG2	(SLJIT_NUMBER_OF_FLOAT_REGISTERS + 2)
    106 
    107 static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 7] = {
    108 	0, 3, 4, 5, 6, 7, 8, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 1, 9, 10, 31, 12
    109 };
    110 
    111 static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = {
    112 	0, 1, 2, 3, 4, 5, 6, 0, 7
    113 };
    114 
    115 /* --------------------------------------------------------------------- */
    116 /*  Instrucion forms                                                     */
    117 /* --------------------------------------------------------------------- */
    118 #define D(d)		(reg_map[d] << 21)
    119 #define S(s)		(reg_map[s] << 21)
    120 #define A(a)		(reg_map[a] << 16)
    121 #define B(b)		(reg_map[b] << 11)
    122 #define C(c)		(reg_map[c] << 6)
    123 #define FD(fd)		(freg_map[fd] << 21)
    124 #define FS(fs)		(freg_map[fs] << 21)
    125 #define FA(fa)		(freg_map[fa] << 16)
    126 #define FB(fb)		(freg_map[fb] << 11)
    127 #define FC(fc)		(freg_map[fc] << 6)
    128 #define IMM(imm)	((imm) & 0xffff)
    129 #define CRD(d)		((d) << 21)
    130 
    131 /* Instruction bit sections.
    132    OE and Rc flag (see ALT_SET_FLAGS). */
    133 #define OE(flags)	((flags) & ALT_SET_FLAGS)
    134 /* Rc flag (see ALT_SET_FLAGS). */
    135 #define RC(flags)	(((flags) & ALT_SET_FLAGS) >> 10)
    136 #define HI(opcode)	((opcode) << 26)
    137 #define LO(opcode)	((opcode) << 1)
    138 
    139 #define ADD		(HI(31) | LO(266))
    140 #define ADDC		(HI(31) | LO(10))
    141 #define ADDE		(HI(31) | LO(138))
    142 #define ADDI		(HI(14))
    143 #define ADDIC		(HI(13))
    144 #define ADDIS		(HI(15))
    145 #define ADDME		(HI(31) | LO(234))
    146 #define AND		(HI(31) | LO(28))
    147 #define ANDI		(HI(28))
    148 #define ANDIS		(HI(29))
    149 #define Bx		(HI(18))
    150 #define BCx		(HI(16))
    151 #define BCCTR		(HI(19) | LO(528) | (3 << 11))
    152 #define BLR		(HI(19) | LO(16) | (0x14 << 21))
    153 #define CNTLZD		(HI(31) | LO(58))
    154 #define CNTLZW		(HI(31) | LO(26))
    155 #define CMP		(HI(31) | LO(0))
    156 #define CMPI		(HI(11))
    157 #define CMPL		(HI(31) | LO(32))
    158 #define CMPLI		(HI(10))
    159 #define CROR		(HI(19) | LO(449))
    160 #define DCBT		(HI(31) | LO(278))
    161 #define DIVD		(HI(31) | LO(489))
    162 #define DIVDU		(HI(31) | LO(457))
    163 #define DIVW		(HI(31) | LO(491))
    164 #define DIVWU		(HI(31) | LO(459))
    165 #define EXTSB		(HI(31) | LO(954))
    166 #define EXTSH		(HI(31) | LO(922))
    167 #define EXTSW		(HI(31) | LO(986))
    168 #define FABS		(HI(63) | LO(264))
    169 #define FADD		(HI(63) | LO(21))
    170 #define FADDS		(HI(59) | LO(21))
    171 #define FCFID		(HI(63) | LO(846))
    172 #define FCMPU		(HI(63) | LO(0))
    173 #define FCTIDZ		(HI(63) | LO(815))
    174 #define FCTIWZ		(HI(63) | LO(15))
    175 #define FDIV		(HI(63) | LO(18))
    176 #define FDIVS		(HI(59) | LO(18))
    177 #define FMR		(HI(63) | LO(72))
    178 #define FMUL		(HI(63) | LO(25))
    179 #define FMULS		(HI(59) | LO(25))
    180 #define FNEG		(HI(63) | LO(40))
    181 #define FRSP		(HI(63) | LO(12))
    182 #define FSUB		(HI(63) | LO(20))
    183 #define FSUBS		(HI(59) | LO(20))
    184 #define LD		(HI(58) | 0)
    185 #define LWZ		(HI(32))
    186 #define MFCR		(HI(31) | LO(19))
    187 #define MFLR		(HI(31) | LO(339) | 0x80000)
    188 #define MFXER		(HI(31) | LO(339) | 0x10000)
    189 #define MTCTR		(HI(31) | LO(467) | 0x90000)
    190 #define MTLR		(HI(31) | LO(467) | 0x80000)
    191 #define MTXER		(HI(31) | LO(467) | 0x10000)
    192 #define MULHD		(HI(31) | LO(73))
    193 #define MULHDU		(HI(31) | LO(9))
    194 #define MULHW		(HI(31) | LO(75))
    195 #define MULHWU		(HI(31) | LO(11))
    196 #define MULLD		(HI(31) | LO(233))
    197 #define MULLI		(HI(7))
    198 #define MULLW		(HI(31) | LO(235))
    199 #define NEG		(HI(31) | LO(104))
    200 #define NOP		(HI(24))
    201 #define NOR		(HI(31) | LO(124))
    202 #define OR		(HI(31) | LO(444))
    203 #define ORI		(HI(24))
    204 #define ORIS		(HI(25))
    205 #define RLDICL		(HI(30))
    206 #define RLWINM		(HI(21))
    207 #define SLD		(HI(31) | LO(27))
    208 #define SLW		(HI(31) | LO(24))
    209 #define SRAD		(HI(31) | LO(794))
    210 #define SRADI		(HI(31) | LO(413 << 1))
    211 #define SRAW		(HI(31) | LO(792))
    212 #define SRAWI		(HI(31) | LO(824))
    213 #define SRD		(HI(31) | LO(539))
    214 #define SRW		(HI(31) | LO(536))
    215 #define STD		(HI(62) | 0)
    216 #define STDU		(HI(62) | 1)
    217 #define STDUX		(HI(31) | LO(181))
    218 #define STFIWX		(HI(31) | LO(983))
    219 #define STW		(HI(36))
    220 #define STWU		(HI(37))
    221 #define STWUX		(HI(31) | LO(183))
    222 #define SUBF		(HI(31) | LO(40))
    223 #define SUBFC		(HI(31) | LO(8))
    224 #define SUBFE		(HI(31) | LO(136))
    225 #define SUBFIC		(HI(8))
    226 #define XOR		(HI(31) | LO(316))
    227 #define XORI		(HI(26))
    228 #define XORIS		(HI(27))
    229 
    230 #define SIMM_MAX	(0x7fff)
    231 #define SIMM_MIN	(-0x8000)
    232 #define UIMM_MAX	(0xffff)
    233 
    234 #if (defined SLJIT_INDIRECT_CALL && SLJIT_INDIRECT_CALL)
    235 SLJIT_API_FUNC_ATTRIBUTE void sljit_set_function_context(void** func_ptr, struct sljit_function_context* context, sljit_sw addr, void* func)
    236 {
    237 	sljit_sw* ptrs;
    238 	if (func_ptr)
    239 		*func_ptr = (void*)context;
    240 	ptrs = (sljit_sw*)func;
    241 	context->addr = addr ? addr : ptrs[0];
    242 	context->r2 = ptrs[1];
    243 	context->r11 = ptrs[2];
    244 }
    245 #endif
    246 
    247 static sljit_s32 push_inst(struct sljit_compiler *compiler, sljit_ins ins)
    248 {
    249 	sljit_ins *ptr = (sljit_ins*)ensure_buf(compiler, sizeof(sljit_ins));
    250 	FAIL_IF(!ptr);
    251 	*ptr = ins;
    252 	compiler->size++;
    253 	return SLJIT_SUCCESS;
    254 }
    255 
    256 static SLJIT_INLINE sljit_s32 detect_jump_type(struct sljit_jump *jump, sljit_ins *code_ptr, sljit_ins *code, sljit_sw executable_offset)
    257 {
    258 	sljit_sw diff;
    259 	sljit_uw target_addr;
    260 	sljit_sw extra_jump_flags;
    261 
    262 #if (defined SLJIT_PASS_ENTRY_ADDR_TO_CALL && SLJIT_PASS_ENTRY_ADDR_TO_CALL) && (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
    263 	if (jump->flags & (SLJIT_REWRITABLE_JUMP | IS_CALL))
    264 		return 0;
    265 #else
    266 	if (jump->flags & SLJIT_REWRITABLE_JUMP)
    267 		return 0;
    268 #endif
    269 
    270 	if (jump->flags & JUMP_ADDR)
    271 		target_addr = jump->u.target;
    272 	else {
    273 		SLJIT_ASSERT(jump->flags & JUMP_LABEL);
    274 		target_addr = (sljit_uw)(code + jump->u.label->size) + (sljit_uw)executable_offset;
    275 	}
    276 
    277 #if (defined SLJIT_PASS_ENTRY_ADDR_TO_CALL && SLJIT_PASS_ENTRY_ADDR_TO_CALL) && (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
    278 	if (jump->flags & IS_CALL)
    279 		goto keep_address;
    280 #endif
    281 
    282 	diff = ((sljit_sw)target_addr - (sljit_sw)(code_ptr) - executable_offset) & ~0x3l;
    283 
    284 	extra_jump_flags = 0;
    285 	if (jump->flags & IS_COND) {
    286 		if (diff <= 0x7fff && diff >= -0x8000) {
    287 			jump->flags |= PATCH_B;
    288 			return 1;
    289 		}
    290 		if (target_addr <= 0xffff) {
    291 			jump->flags |= PATCH_B | PATCH_ABS_B;
    292 			return 1;
    293 		}
    294 		extra_jump_flags = REMOVE_COND;
    295 
    296 		diff -= sizeof(sljit_ins);
    297 	}
    298 
    299 	if (diff <= 0x01ffffff && diff >= -0x02000000) {
    300 		jump->flags |= PATCH_B | extra_jump_flags;
    301 		return 1;
    302 	}
    303 
    304 	if (target_addr <= 0x03ffffff) {
    305 		jump->flags |= PATCH_B | PATCH_ABS_B | extra_jump_flags;
    306 		return 1;
    307 	}
    308 
    309 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
    310 #if (defined SLJIT_PASS_ENTRY_ADDR_TO_CALL && SLJIT_PASS_ENTRY_ADDR_TO_CALL)
    311 keep_address:
    312 #endif
    313 	if (target_addr <= 0x7fffffff) {
    314 		jump->flags |= PATCH_ABS32;
    315 		return 1;
    316 	}
    317 
    318 	if (target_addr <= 0x7fffffffffffl) {
    319 		jump->flags |= PATCH_ABS48;
    320 		return 1;
    321 	}
    322 #endif
    323 
    324 	return 0;
    325 }
    326 
    327 SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler)
    328 {
    329 	struct sljit_memory_fragment *buf;
    330 	sljit_ins *code;
    331 	sljit_ins *code_ptr;
    332 	sljit_ins *buf_ptr;
    333 	sljit_ins *buf_end;
    334 	sljit_uw word_count;
    335 	sljit_sw executable_offset;
    336 	sljit_uw addr;
    337 
    338 	struct sljit_label *label;
    339 	struct sljit_jump *jump;
    340 	struct sljit_const *const_;
    341 
    342 	CHECK_ERROR_PTR();
    343 	CHECK_PTR(check_sljit_generate_code(compiler));
    344 	reverse_buf(compiler);
    345 
    346 #if (defined SLJIT_INDIRECT_CALL && SLJIT_INDIRECT_CALL)
    347 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
    348 	compiler->size += (compiler->size & 0x1) + (sizeof(struct sljit_function_context) / sizeof(sljit_ins));
    349 #else
    350 	compiler->size += (sizeof(struct sljit_function_context) / sizeof(sljit_ins));
    351 #endif
    352 #endif
    353 	code = (sljit_ins*)SLJIT_MALLOC_EXEC(compiler->size * sizeof(sljit_ins));
    354 	PTR_FAIL_WITH_EXEC_IF(code);
    355 	buf = compiler->buf;
    356 
    357 	code_ptr = code;
    358 	word_count = 0;
    359 	executable_offset = SLJIT_EXEC_OFFSET(code);
    360 
    361 	label = compiler->labels;
    362 	jump = compiler->jumps;
    363 	const_ = compiler->consts;
    364 
    365 	do {
    366 		buf_ptr = (sljit_ins*)buf->memory;
    367 		buf_end = buf_ptr + (buf->used_size >> 2);
    368 		do {
    369 			*code_ptr = *buf_ptr++;
    370 			SLJIT_ASSERT(!label || label->size >= word_count);
    371 			SLJIT_ASSERT(!jump || jump->addr >= word_count);
    372 			SLJIT_ASSERT(!const_ || const_->addr >= word_count);
    373 			/* These structures are ordered by their address. */
    374 			if (label && label->size == word_count) {
    375 				/* Just recording the address. */
    376 				label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
    377 				label->size = code_ptr - code;
    378 				label = label->next;
    379 			}
    380 			if (jump && jump->addr == word_count) {
    381 #if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
    382 				jump->addr = (sljit_uw)(code_ptr - 3);
    383 #else
    384 				jump->addr = (sljit_uw)(code_ptr - 6);
    385 #endif
    386 				if (detect_jump_type(jump, code_ptr, code, executable_offset)) {
    387 #if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
    388 					code_ptr[-3] = code_ptr[0];
    389 					code_ptr -= 3;
    390 #else
    391 					if (jump->flags & PATCH_ABS32) {
    392 						code_ptr -= 3;
    393 						code_ptr[-1] = code_ptr[2];
    394 						code_ptr[0] = code_ptr[3];
    395 					}
    396 					else if (jump->flags & PATCH_ABS48) {
    397 						code_ptr--;
    398 						code_ptr[-1] = code_ptr[0];
    399 						code_ptr[0] = code_ptr[1];
    400 						/* rldicr rX,rX,32,31 -> rX,rX,16,47 */
    401 						SLJIT_ASSERT((code_ptr[-3] & 0xfc00ffff) == 0x780007c6);
    402 						code_ptr[-3] ^= 0x8422;
    403 						/* oris -> ori */
    404 						code_ptr[-2] ^= 0x4000000;
    405 					}
    406 					else {
    407 						code_ptr[-6] = code_ptr[0];
    408 						code_ptr -= 6;
    409 					}
    410 #endif
    411 					if (jump->flags & REMOVE_COND) {
    412 						code_ptr[0] = BCx | (2 << 2) | ((code_ptr[0] ^ (8 << 21)) & 0x03ff0001);
    413 						code_ptr++;
    414 						jump->addr += sizeof(sljit_ins);
    415 						code_ptr[0] = Bx;
    416 						jump->flags -= IS_COND;
    417 					}
    418 				}
    419 				jump = jump->next;
    420 			}
    421 			if (const_ && const_->addr == word_count) {
    422 				const_->addr = (sljit_uw)code_ptr;
    423 				const_ = const_->next;
    424 			}
    425 			code_ptr ++;
    426 			word_count ++;
    427 		} while (buf_ptr < buf_end);
    428 
    429 		buf = buf->next;
    430 	} while (buf);
    431 
    432 	if (label && label->size == word_count) {
    433 		label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
    434 		label->size = code_ptr - code;
    435 		label = label->next;
    436 	}
    437 
    438 	SLJIT_ASSERT(!label);
    439 	SLJIT_ASSERT(!jump);
    440 	SLJIT_ASSERT(!const_);
    441 #if (defined SLJIT_INDIRECT_CALL && SLJIT_INDIRECT_CALL)
    442 	SLJIT_ASSERT(code_ptr - code <= (sljit_sw)compiler->size - (sizeof(struct sljit_function_context) / sizeof(sljit_ins)));
    443 #else
    444 	SLJIT_ASSERT(code_ptr - code <= (sljit_sw)compiler->size);
    445 #endif
    446 
    447 	jump = compiler->jumps;
    448 	while (jump) {
    449 		do {
    450 			addr = (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target;
    451 			buf_ptr = (sljit_ins *)jump->addr;
    452 
    453 			if (jump->flags & PATCH_B) {
    454 				if (jump->flags & IS_COND) {
    455 					if (!(jump->flags & PATCH_ABS_B)) {
    456 						addr -= (sljit_uw)SLJIT_ADD_EXEC_OFFSET(buf_ptr, executable_offset);
    457 						SLJIT_ASSERT((sljit_sw)addr <= 0x7fff && (sljit_sw)addr >= -0x8000);
    458 						*buf_ptr = BCx | (addr & 0xfffc) | ((*buf_ptr) & 0x03ff0001);
    459 					}
    460 					else {
    461 						SLJIT_ASSERT(addr <= 0xffff);
    462 						*buf_ptr = BCx | (addr & 0xfffc) | 0x2 | ((*buf_ptr) & 0x03ff0001);
    463 					}
    464 				}
    465 				else {
    466 					if (!(jump->flags & PATCH_ABS_B)) {
    467 						addr -= (sljit_uw)SLJIT_ADD_EXEC_OFFSET(buf_ptr, executable_offset);
    468 						SLJIT_ASSERT((sljit_sw)addr <= 0x01ffffff && (sljit_sw)addr >= -0x02000000);
    469 						*buf_ptr = Bx | (addr & 0x03fffffc) | ((*buf_ptr) & 0x1);
    470 					}
    471 					else {
    472 						SLJIT_ASSERT(addr <= 0x03ffffff);
    473 						*buf_ptr = Bx | (addr & 0x03fffffc) | 0x2 | ((*buf_ptr) & 0x1);
    474 					}
    475 				}
    476 				break;
    477 			}
    478 
    479 			/* Set the fields of immediate loads. */
    480 #if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
    481 			buf_ptr[0] = (buf_ptr[0] & 0xffff0000) | ((addr >> 16) & 0xffff);
    482 			buf_ptr[1] = (buf_ptr[1] & 0xffff0000) | (addr & 0xffff);
    483 #else
    484 			if (jump->flags & PATCH_ABS32) {
    485 				SLJIT_ASSERT(addr <= 0x7fffffff);
    486 				buf_ptr[0] = (buf_ptr[0] & 0xffff0000) | ((addr >> 16) & 0xffff);
    487 				buf_ptr[1] = (buf_ptr[1] & 0xffff0000) | (addr & 0xffff);
    488 				break;
    489 			}
    490 			if (jump->flags & PATCH_ABS48) {
    491 				SLJIT_ASSERT(addr <= 0x7fffffffffff);
    492 				buf_ptr[0] = (buf_ptr[0] & 0xffff0000) | ((addr >> 32) & 0xffff);
    493 				buf_ptr[1] = (buf_ptr[1] & 0xffff0000) | ((addr >> 16) & 0xffff);
    494 				buf_ptr[3] = (buf_ptr[3] & 0xffff0000) | (addr & 0xffff);
    495 				break;
    496 			}
    497 			buf_ptr[0] = (buf_ptr[0] & 0xffff0000) | ((addr >> 48) & 0xffff);
    498 			buf_ptr[1] = (buf_ptr[1] & 0xffff0000) | ((addr >> 32) & 0xffff);
    499 			buf_ptr[3] = (buf_ptr[3] & 0xffff0000) | ((addr >> 16) & 0xffff);
    500 			buf_ptr[4] = (buf_ptr[4] & 0xffff0000) | (addr & 0xffff);
    501 #endif
    502 		} while (0);
    503 		jump = jump->next;
    504 	}
    505 
    506 	compiler->error = SLJIT_ERR_COMPILED;
    507 	compiler->executable_offset = executable_offset;
    508 	compiler->executable_size = (code_ptr - code) * sizeof(sljit_ins);
    509 
    510 	code = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(code, executable_offset);
    511 
    512 #if (defined SLJIT_INDIRECT_CALL && SLJIT_INDIRECT_CALL)
    513 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
    514 	if (((sljit_sw)code_ptr) & 0x4)
    515 		code_ptr++;
    516 #endif
    517 	sljit_set_function_context(NULL, (struct sljit_function_context*)code_ptr, (sljit_sw)code, (void*)sljit_generate_code);
    518 #endif
    519 
    520 	code_ptr = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
    521 
    522 	SLJIT_CACHE_FLUSH(code, code_ptr);
    523 
    524 #if (defined SLJIT_INDIRECT_CALL && SLJIT_INDIRECT_CALL)
    525 	return code_ptr;
    526 #else
    527 	return code;
    528 #endif
    529 }
    530 
    531 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type)
    532 {
    533 	switch (feature_type) {
    534 	case SLJIT_HAS_FPU:
    535 #ifdef SLJIT_IS_FPU_AVAILABLE
    536 		return SLJIT_IS_FPU_AVAILABLE;
    537 #else
    538 		/* Available by default. */
    539 		return 1;
    540 #endif
    541 
    542 	case SLJIT_HAS_CLZ:
    543 		return 1;
    544 
    545 	default:
    546 		return 0;
    547 	}
    548 }
    549 
    550 /* --------------------------------------------------------------------- */
    551 /*  Entry, exit                                                          */
    552 /* --------------------------------------------------------------------- */
    553 
    554 /* inp_flags: */
    555 
    556 /* Creates an index in data_transfer_insts array. */
    557 #define LOAD_DATA	0x01
    558 #define INDEXED		0x02
    559 #define SIGNED_DATA	0x04
    560 
    561 #define WORD_DATA	0x00
    562 #define BYTE_DATA	0x08
    563 #define HALF_DATA	0x10
    564 #define INT_DATA	0x18
    565 /* Separates integer and floating point registers */
    566 #define GPR_REG		0x1f
    567 #define DOUBLE_DATA	0x20
    568 
    569 #define MEM_MASK	0x7f
    570 
    571 /* Other inp_flags. */
    572 
    573 /* Integer opertion and set flags -> requires exts on 64 bit systems. */
    574 #define ALT_SIGN_EXT	0x000100
    575 /* This flag affects the RC() and OERC() macros. */
    576 #define ALT_SET_FLAGS	0x000400
    577 #define ALT_FORM1	0x001000
    578 #define ALT_FORM2	0x002000
    579 #define ALT_FORM3	0x004000
    580 #define ALT_FORM4	0x008000
    581 #define ALT_FORM5	0x010000
    582 
    583 /* Source and destination is register. */
    584 #define REG_DEST	0x000001
    585 #define REG1_SOURCE	0x000002
    586 #define REG2_SOURCE	0x000004
    587 /*
    588 ALT_SIGN_EXT		0x000100
    589 ALT_SET_FLAGS		0x000200
    590 ALT_FORM1		0x001000
    591 ...
    592 ALT_FORM5		0x010000 */
    593 
    594 #if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
    595 #include "sljitNativePPC_32.c"
    596 #else
    597 #include "sljitNativePPC_64.c"
    598 #endif
    599 
    600 #if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
    601 #define STACK_STORE	STW
    602 #define STACK_LOAD	LWZ
    603 #else
    604 #define STACK_STORE	STD
    605 #define STACK_LOAD	LD
    606 #endif
    607 
    608 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler,
    609 	sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds,
    610 	sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
    611 {
    612 	sljit_s32 args, i, tmp, offs;
    613 
    614 	CHECK_ERROR();
    615 	CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size));
    616 	set_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size);
    617 
    618 	FAIL_IF(push_inst(compiler, MFLR | D(0)));
    619 	offs = -(sljit_s32)(sizeof(sljit_sw));
    620 	FAIL_IF(push_inst(compiler, STACK_STORE | S(TMP_ZERO) | A(SLJIT_SP) | IMM(offs)));
    621 
    622 	tmp = saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - saveds) : SLJIT_FIRST_SAVED_REG;
    623 	for (i = SLJIT_S0; i >= tmp; i--) {
    624 		offs -= (sljit_s32)(sizeof(sljit_sw));
    625 		FAIL_IF(push_inst(compiler, STACK_STORE | S(i) | A(SLJIT_SP) | IMM(offs)));
    626 	}
    627 
    628 	for (i = scratches; i >= SLJIT_FIRST_SAVED_REG; i--) {
    629 		offs -= (sljit_s32)(sizeof(sljit_sw));
    630 		FAIL_IF(push_inst(compiler, STACK_STORE | S(i) | A(SLJIT_SP) | IMM(offs)));
    631 	}
    632 
    633 	SLJIT_ASSERT(offs == -(sljit_s32)GET_SAVED_REGISTERS_SIZE(compiler->scratches, compiler->saveds, 1));
    634 
    635 #if (defined SLJIT_PPC_STACK_FRAME_V2 && SLJIT_PPC_STACK_FRAME_V2)
    636 	FAIL_IF(push_inst(compiler, STACK_STORE | S(0) | A(SLJIT_SP) | IMM(2 * sizeof(sljit_sw))));
    637 #else
    638 	FAIL_IF(push_inst(compiler, STACK_STORE | S(0) | A(SLJIT_SP) | IMM(sizeof(sljit_sw))));
    639 #endif
    640 
    641 	FAIL_IF(push_inst(compiler, ADDI | D(TMP_ZERO) | A(0) | 0));
    642 
    643 	args = get_arg_count(arg_types);
    644 
    645 	if (args >= 1)
    646 		FAIL_IF(push_inst(compiler, OR | S(SLJIT_R0) | A(SLJIT_S0) | B(SLJIT_R0)));
    647 	if (args >= 2)
    648 		FAIL_IF(push_inst(compiler, OR | S(SLJIT_R1) | A(SLJIT_S1) | B(SLJIT_R1)));
    649 	if (args >= 3)
    650 		FAIL_IF(push_inst(compiler, OR | S(SLJIT_R2) | A(SLJIT_S2) | B(SLJIT_R2)));
    651 
    652 	local_size += GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1) + SLJIT_LOCALS_OFFSET;
    653 	local_size = (local_size + 15) & ~0xf;
    654 	compiler->local_size = local_size;
    655 
    656 #if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
    657 	if (local_size <= SIMM_MAX)
    658 		FAIL_IF(push_inst(compiler, STWU | S(SLJIT_SP) | A(SLJIT_SP) | IMM(-local_size)));
    659 	else {
    660 		FAIL_IF(load_immediate(compiler, 0, -local_size));
    661 		FAIL_IF(push_inst(compiler, STWUX | S(SLJIT_SP) | A(SLJIT_SP) | B(0)));
    662 	}
    663 #else
    664 	if (local_size <= SIMM_MAX)
    665 		FAIL_IF(push_inst(compiler, STDU | S(SLJIT_SP) | A(SLJIT_SP) | IMM(-local_size)));
    666 	else {
    667 		FAIL_IF(load_immediate(compiler, 0, -local_size));
    668 		FAIL_IF(push_inst(compiler, STDUX | S(SLJIT_SP) | A(SLJIT_SP) | B(0)));
    669 	}
    670 #endif
    671 
    672 	return SLJIT_SUCCESS;
    673 }
    674 
    675 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *compiler,
    676 	sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds,
    677 	sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
    678 {
    679 	CHECK_ERROR();
    680 	CHECK(check_sljit_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size));
    681 	set_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size);
    682 
    683 	local_size += GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1) + SLJIT_LOCALS_OFFSET;
    684 	compiler->local_size = (local_size + 15) & ~0xf;
    685 	return SLJIT_SUCCESS;
    686 }
    687 
    688 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 src, sljit_sw srcw)
    689 {
    690 	sljit_s32 i, tmp, offs;
    691 
    692 	CHECK_ERROR();
    693 	CHECK(check_sljit_emit_return(compiler, op, src, srcw));
    694 
    695 	FAIL_IF(emit_mov_before_return(compiler, op, src, srcw));
    696 
    697 	if (compiler->local_size <= SIMM_MAX)
    698 		FAIL_IF(push_inst(compiler, ADDI | D(SLJIT_SP) | A(SLJIT_SP) | IMM(compiler->local_size)));
    699 	else {
    700 		FAIL_IF(load_immediate(compiler, 0, compiler->local_size));
    701 		FAIL_IF(push_inst(compiler, ADD | D(SLJIT_SP) | A(SLJIT_SP) | B(0)));
    702 	}
    703 
    704 #if (defined SLJIT_PPC_STACK_FRAME_V2 && SLJIT_PPC_STACK_FRAME_V2)
    705 	FAIL_IF(push_inst(compiler, STACK_LOAD | D(0) | A(SLJIT_SP) | IMM(2 * sizeof(sljit_sw))));
    706 #else
    707 	FAIL_IF(push_inst(compiler, STACK_LOAD | D(0) | A(SLJIT_SP) | IMM(sizeof(sljit_sw))));
    708 #endif
    709 
    710 	offs = -(sljit_s32)GET_SAVED_REGISTERS_SIZE(compiler->scratches, compiler->saveds, 1);
    711 
    712 	tmp = compiler->scratches;
    713 	for (i = SLJIT_FIRST_SAVED_REG; i <= tmp; i++) {
    714 		FAIL_IF(push_inst(compiler, STACK_LOAD | D(i) | A(SLJIT_SP) | IMM(offs)));
    715 		offs += (sljit_s32)(sizeof(sljit_sw));
    716 	}
    717 
    718 	tmp = compiler->saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - compiler->saveds) : SLJIT_FIRST_SAVED_REG;
    719 	for (i = tmp; i <= SLJIT_S0; i++) {
    720 		FAIL_IF(push_inst(compiler, STACK_LOAD | D(i) | A(SLJIT_SP) | IMM(offs)));
    721 		offs += (sljit_s32)(sizeof(sljit_sw));
    722 	}
    723 
    724 	FAIL_IF(push_inst(compiler, STACK_LOAD | D(TMP_ZERO) | A(SLJIT_SP) | IMM(offs)));
    725 	SLJIT_ASSERT(offs == -(sljit_sw)(sizeof(sljit_sw)));
    726 
    727 	FAIL_IF(push_inst(compiler, MTLR | S(0)));
    728 	FAIL_IF(push_inst(compiler, BLR));
    729 
    730 	return SLJIT_SUCCESS;
    731 }
    732 
    733 #undef STACK_STORE
    734 #undef STACK_LOAD
    735 
    736 /* --------------------------------------------------------------------- */
    737 /*  Operators                                                            */
    738 /* --------------------------------------------------------------------- */
    739 
    740 /* s/l - store/load (1 bit)
    741    i/x - immediate/indexed form
    742    u/s - signed/unsigned (1 bit)
    743    w/b/h/i - word/byte/half/int allowed (2 bit)
    744 
    745    Some opcodes are repeated (e.g. store signed / unsigned byte is the same instruction). */
    746 
    747 /* 64 bit only: [reg+imm] must be aligned to 4 bytes. */
    748 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
    749 #define INT_ALIGNED	0x10000
    750 #endif
    751 
    752 #if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
    753 #define ARCH_32_64(a, b)	a
    754 #define INST_CODE_AND_DST(inst, flags, reg) \
    755 	((inst) | (((flags) & MEM_MASK) <= GPR_REG ? D(reg) : FD(reg)))
    756 #else
    757 #define ARCH_32_64(a, b)	b
    758 #define INST_CODE_AND_DST(inst, flags, reg) \
    759 	(((inst) & ~INT_ALIGNED) | (((flags) & MEM_MASK) <= GPR_REG ? D(reg) : FD(reg)))
    760 #endif
    761 
    762 static const sljit_ins data_transfer_insts[64 + 16] = {
    763 
    764 /* -------- Integer -------- */
    765 
    766 /* Word. */
    767 
    768 /* w u i s */ ARCH_32_64(HI(36) /* stw */, HI(62) | INT_ALIGNED | 0x0 /* std */),
    769 /* w u i l */ ARCH_32_64(HI(32) /* lwz */, HI(58) | INT_ALIGNED | 0x0 /* ld */),
    770 /* w u x s */ ARCH_32_64(HI(31) | LO(151) /* stwx */, HI(31) | LO(149) /* stdx */),
    771 /* w u x l */ ARCH_32_64(HI(31) | LO(23) /* lwzx */, HI(31) | LO(21) /* ldx */),
    772 
    773 /* w s i s */ ARCH_32_64(HI(36) /* stw */, HI(62) | INT_ALIGNED | 0x0 /* std */),
    774 /* w s i l */ ARCH_32_64(HI(32) /* lwz */, HI(58) | INT_ALIGNED | 0x0 /* ld */),
    775 /* w s x s */ ARCH_32_64(HI(31) | LO(151) /* stwx */, HI(31) | LO(149) /* stdx */),
    776 /* w s x l */ ARCH_32_64(HI(31) | LO(23) /* lwzx */, HI(31) | LO(21) /* ldx */),
    777 
    778 /* Byte. */
    779 
    780 /* b u i s */ HI(38) /* stb */,
    781 /* b u i l */ HI(34) /* lbz */,
    782 /* b u x s */ HI(31) | LO(215) /* stbx */,
    783 /* b u x l */ HI(31) | LO(87) /* lbzx */,
    784 
    785 /* b s i s */ HI(38) /* stb */,
    786 /* b s i l */ HI(34) /* lbz */ /* EXTS_REQ */,
    787 /* b s x s */ HI(31) | LO(215) /* stbx */,
    788 /* b s x l */ HI(31) | LO(87) /* lbzx */ /* EXTS_REQ */,
    789 
    790 /* Half. */
    791 
    792 /* h u i s */ HI(44) /* sth */,
    793 /* h u i l */ HI(40) /* lhz */,
    794 /* h u x s */ HI(31) | LO(407) /* sthx */,
    795 /* h u x l */ HI(31) | LO(279) /* lhzx */,
    796 
    797 /* h s i s */ HI(44) /* sth */,
    798 /* h s i l */ HI(42) /* lha */,
    799 /* h s x s */ HI(31) | LO(407) /* sthx */,
    800 /* h s x l */ HI(31) | LO(343) /* lhax */,
    801 
    802 /* Int. */
    803 
    804 /* i u i s */ HI(36) /* stw */,
    805 /* i u i l */ HI(32) /* lwz */,
    806 /* i u x s */ HI(31) | LO(151) /* stwx */,
    807 /* i u x l */ HI(31) | LO(23) /* lwzx */,
    808 
    809 /* i s i s */ HI(36) /* stw */,
    810 /* i s i l */ ARCH_32_64(HI(32) /* lwz */, HI(58) | INT_ALIGNED | 0x2 /* lwa */),
    811 /* i s x s */ HI(31) | LO(151) /* stwx */,
    812 /* i s x l */ ARCH_32_64(HI(31) | LO(23) /* lwzx */, HI(31) | LO(341) /* lwax */),
    813 
    814 /* -------- Floating point -------- */
    815 
    816 /* d   i s */ HI(54) /* stfd */,
    817 /* d   i l */ HI(50) /* lfd */,
    818 /* d   x s */ HI(31) | LO(727) /* stfdx */,
    819 /* d   x l */ HI(31) | LO(599) /* lfdx */,
    820 
    821 /* s   i s */ HI(52) /* stfs */,
    822 /* s   i l */ HI(48) /* lfs */,
    823 /* s   x s */ HI(31) | LO(663) /* stfsx */,
    824 /* s   x l */ HI(31) | LO(535) /* lfsx */,
    825 };
    826 
    827 static const sljit_ins updated_data_transfer_insts[64] = {
    828 
    829 /* -------- Integer -------- */
    830 
    831 /* Word. */
    832 
    833 /* w u i s */ ARCH_32_64(HI(37) /* stwu */, HI(62) | INT_ALIGNED | 0x1 /* stdu */),
    834 /* w u i l */ ARCH_32_64(HI(33) /* lwzu */, HI(58) | INT_ALIGNED | 0x1 /* ldu */),
    835 /* w u x s */ ARCH_32_64(HI(31) | LO(183) /* stwux */, HI(31) | LO(181) /* stdux */),
    836 /* w u x l */ ARCH_32_64(HI(31) | LO(55) /* lwzux */, HI(31) | LO(53) /* ldux */),
    837 
    838 /* w s i s */ ARCH_32_64(HI(37) /* stwu */, HI(62) | INT_ALIGNED | 0x1 /* stdu */),
    839 /* w s i l */ ARCH_32_64(HI(33) /* lwzu */, HI(58) | INT_ALIGNED | 0x1 /* ldu */),
    840 /* w s x s */ ARCH_32_64(HI(31) | LO(183) /* stwux */, HI(31) | LO(181) /* stdux */),
    841 /* w s x l */ ARCH_32_64(HI(31) | LO(55) /* lwzux */, HI(31) | LO(53) /* ldux */),
    842 
    843 /* Byte. */
    844 
    845 /* b u i s */ HI(39) /* stbu */,
    846 /* b u i l */ HI(35) /* lbzu */,
    847 /* b u x s */ HI(31) | LO(247) /* stbux */,
    848 /* b u x l */ HI(31) | LO(119) /* lbzux */,
    849 
    850 /* b s i s */ HI(39) /* stbu */,
    851 /* b s i l */ 0 /* no such instruction */,
    852 /* b s x s */ HI(31) | LO(247) /* stbux */,
    853 /* b s x l */ 0 /* no such instruction */,
    854 
    855 /* Half. */
    856 
    857 /* h u i s */ HI(45) /* sthu */,
    858 /* h u i l */ HI(41) /* lhzu */,
    859 /* h u x s */ HI(31) | LO(439) /* sthux */,
    860 /* h u x l */ HI(31) | LO(311) /* lhzux */,
    861 
    862 /* h s i s */ HI(45) /* sthu */,
    863 /* h s i l */ HI(43) /* lhau */,
    864 /* h s x s */ HI(31) | LO(439) /* sthux */,
    865 /* h s x l */ HI(31) | LO(375) /* lhaux */,
    866 
    867 /* Int. */
    868 
    869 /* i u i s */ HI(37) /* stwu */,
    870 /* i u i l */ HI(33) /* lwzu */,
    871 /* i u x s */ HI(31) | LO(183) /* stwux */,
    872 /* i u x l */ HI(31) | LO(55) /* lwzux */,
    873 
    874 /* i s i s */ HI(37) /* stwu */,
    875 /* i s i l */ ARCH_32_64(HI(33) /* lwzu */, 0 /* no such instruction */),
    876 /* i s x s */ HI(31) | LO(183) /* stwux */,
    877 /* i s x l */ ARCH_32_64(HI(31) | LO(55) /* lwzux */, HI(31) | LO(373) /* lwaux */),
    878 
    879 /* -------- Floating point -------- */
    880 
    881 /* d   i s */ HI(55) /* stfdu */,
    882 /* d   i l */ HI(51) /* lfdu */,
    883 /* d   x s */ HI(31) | LO(759) /* stfdux */,
    884 /* d   x l */ HI(31) | LO(631) /* lfdux */,
    885 
    886 /* s   i s */ HI(53) /* stfsu */,
    887 /* s   i l */ HI(49) /* lfsu */,
    888 /* s   x s */ HI(31) | LO(695) /* stfsux */,
    889 /* s   x l */ HI(31) | LO(567) /* lfsux */,
    890 };
    891 
    892 #undef ARCH_32_64
    893 
    894 /* Simple cases, (no caching is required). */
    895 static sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit_s32 inp_flags, sljit_s32 reg,
    896 	sljit_s32 arg, sljit_sw argw, sljit_s32 tmp_reg)
    897 {
    898 	sljit_ins inst;
    899 	sljit_s32 offs_reg;
    900 	sljit_sw high_short;
    901 
    902 	/* Should work when (arg & REG_MASK) == 0. */
    903 	SLJIT_ASSERT(A(0) == 0);
    904 	SLJIT_ASSERT(arg & SLJIT_MEM);
    905 
    906 	if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) {
    907 		argw &= 0x3;
    908 		offs_reg = OFFS_REG(arg);
    909 
    910 		if (argw != 0) {
    911 #if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
    912 			FAIL_IF(push_inst(compiler, RLWINM | S(OFFS_REG(arg)) | A(tmp_reg) | (argw << 11) | ((31 - argw) << 1)));
    913 #else
    914 			FAIL_IF(push_inst(compiler, RLDI(tmp_reg, OFFS_REG(arg), argw, 63 - argw, 1)));
    915 #endif
    916 			offs_reg = tmp_reg;
    917 		}
    918 
    919 		inst = data_transfer_insts[(inp_flags | INDEXED) & MEM_MASK];
    920 
    921 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
    922 		SLJIT_ASSERT(!(inst & INT_ALIGNED));
    923 #endif
    924 
    925 		return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(arg & REG_MASK) | B(offs_reg));
    926 	}
    927 
    928 	inst = data_transfer_insts[inp_flags & MEM_MASK];
    929 	arg &= REG_MASK;
    930 
    931 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
    932 	if ((inst & INT_ALIGNED) && (argw & 0x3) != 0) {
    933 		FAIL_IF(load_immediate(compiler, tmp_reg, argw));
    934 
    935 		inst = data_transfer_insts[(inp_flags | INDEXED) & MEM_MASK];
    936 		return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(arg) | B(tmp_reg));
    937 	}
    938 #endif
    939 
    940 	if (argw <= SIMM_MAX && argw >= SIMM_MIN)
    941 		return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(arg) | IMM(argw));
    942 
    943 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
    944 	if (argw <= 0x7fff7fffl && argw >= -0x80000000l) {
    945 #endif
    946 
    947 		high_short = (sljit_s32)(argw + ((argw & 0x8000) << 1)) & ~0xffff;
    948 
    949 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
    950 		SLJIT_ASSERT(high_short && high_short <= 0x7fffffffl && high_short >= -0x80000000l);
    951 #else
    952 		SLJIT_ASSERT(high_short);
    953 #endif
    954 
    955 		FAIL_IF(push_inst(compiler, ADDIS | D(tmp_reg) | A(arg) | IMM(high_short >> 16)));
    956 		return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(tmp_reg) | IMM(argw));
    957 
    958 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
    959 	}
    960 
    961 	/* The rest is PPC-64 only. */
    962 
    963 	FAIL_IF(load_immediate(compiler, tmp_reg, argw));
    964 
    965 	inst = data_transfer_insts[(inp_flags | INDEXED) & MEM_MASK];
    966 	return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(arg) | B(tmp_reg));
    967 #endif
    968 }
    969 
    970 static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 input_flags,
    971 	sljit_s32 dst, sljit_sw dstw,
    972 	sljit_s32 src1, sljit_sw src1w,
    973 	sljit_s32 src2, sljit_sw src2w)
    974 {
    975 	/* arg1 goes to TMP_REG1 or src reg
    976 	   arg2 goes to TMP_REG2, imm or src reg
    977 	   result goes to TMP_REG2, so put result can use TMP_REG1. */
    978 	sljit_s32 dst_r = TMP_REG2;
    979 	sljit_s32 src1_r;
    980 	sljit_s32 src2_r;
    981 	sljit_s32 sugg_src2_r = TMP_REG2;
    982 	sljit_s32 flags = input_flags & (ALT_FORM1 | ALT_FORM2 | ALT_FORM3 | ALT_FORM4 | ALT_FORM5 | ALT_SIGN_EXT | ALT_SET_FLAGS);
    983 
    984 	/* Destination check. */
    985 	if (SLOW_IS_REG(dst)) {
    986 		dst_r = dst;
    987 		flags |= REG_DEST;
    988 
    989 		if (op >= SLJIT_MOV && op <= SLJIT_MOV_P)
    990 			sugg_src2_r = dst_r;
    991 	}
    992 
    993 	/* Source 1. */
    994 	if (FAST_IS_REG(src1)) {
    995 		src1_r = src1;
    996 		flags |= REG1_SOURCE;
    997 	}
    998 	else if (src1 & SLJIT_IMM) {
    999 		FAIL_IF(load_immediate(compiler, TMP_REG1, src1w));
   1000 		src1_r = TMP_REG1;
   1001 	}
   1002 	else {
   1003 		FAIL_IF(emit_op_mem(compiler, input_flags | LOAD_DATA, TMP_REG1, src1, src1w, TMP_REG1));
   1004 		src1_r = TMP_REG1;
   1005 	}
   1006 
   1007 	/* Source 2. */
   1008 	if (FAST_IS_REG(src2)) {
   1009 		src2_r = src2;
   1010 		flags |= REG2_SOURCE;
   1011 
   1012 		if (!(flags & REG_DEST) && op >= SLJIT_MOV && op <= SLJIT_MOV_P)
   1013 			dst_r = src2_r;
   1014 	}
   1015 	else if (src2 & SLJIT_IMM) {
   1016 		FAIL_IF(load_immediate(compiler, sugg_src2_r, src2w));
   1017 		src2_r = sugg_src2_r;
   1018 	}
   1019 	else {
   1020 		FAIL_IF(emit_op_mem(compiler, input_flags | LOAD_DATA, sugg_src2_r, src2, src2w, TMP_REG2));
   1021 		src2_r = sugg_src2_r;
   1022 	}
   1023 
   1024 	FAIL_IF(emit_single_op(compiler, op, flags, dst_r, src1_r, src2_r));
   1025 
   1026 	if (!(dst & SLJIT_MEM))
   1027 		return SLJIT_SUCCESS;
   1028 
   1029 	return emit_op_mem(compiler, input_flags, dst_r, dst, dstw, TMP_REG1);
   1030 }
   1031 
   1032 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compiler, sljit_s32 op)
   1033 {
   1034 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
   1035 	sljit_s32 int_op = op & SLJIT_I32_OP;
   1036 #endif
   1037 
   1038 	CHECK_ERROR();
   1039 	CHECK(check_sljit_emit_op0(compiler, op));
   1040 
   1041 	op = GET_OPCODE(op);
   1042 	switch (op) {
   1043 	case SLJIT_BREAKPOINT:
   1044 	case SLJIT_NOP:
   1045 		return push_inst(compiler, NOP);
   1046 	case SLJIT_LMUL_UW:
   1047 	case SLJIT_LMUL_SW:
   1048 		FAIL_IF(push_inst(compiler, OR | S(SLJIT_R0) | A(TMP_REG1) | B(SLJIT_R0)));
   1049 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
   1050 		FAIL_IF(push_inst(compiler, MULLD | D(SLJIT_R0) | A(TMP_REG1) | B(SLJIT_R1)));
   1051 		return push_inst(compiler, (op == SLJIT_LMUL_UW ? MULHDU : MULHD) | D(SLJIT_R1) | A(TMP_REG1) | B(SLJIT_R1));
   1052 #else
   1053 		FAIL_IF(push_inst(compiler, MULLW | D(SLJIT_R0) | A(TMP_REG1) | B(SLJIT_R1)));
   1054 		return push_inst(compiler, (op == SLJIT_LMUL_UW ? MULHWU : MULHW) | D(SLJIT_R1) | A(TMP_REG1) | B(SLJIT_R1));
   1055 #endif
   1056 	case SLJIT_DIVMOD_UW:
   1057 	case SLJIT_DIVMOD_SW:
   1058 		FAIL_IF(push_inst(compiler, OR | S(SLJIT_R0) | A(TMP_REG1) | B(SLJIT_R0)));
   1059 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
   1060 		FAIL_IF(push_inst(compiler, (int_op ? (op == SLJIT_DIVMOD_UW ? DIVWU : DIVW) : (op == SLJIT_DIVMOD_UW ? DIVDU : DIVD)) | D(SLJIT_R0) | A(SLJIT_R0) | B(SLJIT_R1)));
   1061 		FAIL_IF(push_inst(compiler, (int_op ? MULLW : MULLD) | D(SLJIT_R1) | A(SLJIT_R0) | B(SLJIT_R1)));
   1062 #else
   1063 		FAIL_IF(push_inst(compiler, (op == SLJIT_DIVMOD_UW ? DIVWU : DIVW) | D(SLJIT_R0) | A(SLJIT_R0) | B(SLJIT_R1)));
   1064 		FAIL_IF(push_inst(compiler, MULLW | D(SLJIT_R1) | A(SLJIT_R0) | B(SLJIT_R1)));
   1065 #endif
   1066 		return push_inst(compiler, SUBF | D(SLJIT_R1) | A(SLJIT_R1) | B(TMP_REG1));
   1067 	case SLJIT_DIV_UW:
   1068 	case SLJIT_DIV_SW:
   1069 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
   1070 		return push_inst(compiler, (int_op ? (op == SLJIT_DIV_UW ? DIVWU : DIVW) : (op == SLJIT_DIV_UW ? DIVDU : DIVD)) | D(SLJIT_R0) | A(SLJIT_R0) | B(SLJIT_R1));
   1071 #else
   1072 		return push_inst(compiler, (op == SLJIT_DIV_UW ? DIVWU : DIVW) | D(SLJIT_R0) | A(SLJIT_R0) | B(SLJIT_R1));
   1073 #endif
   1074 	}
   1075 
   1076 	return SLJIT_SUCCESS;
   1077 }
   1078 
   1079 static sljit_s32 emit_prefetch(struct sljit_compiler *compiler,
   1080         sljit_s32 src, sljit_sw srcw)
   1081 {
   1082 	if (!(src & OFFS_REG_MASK)) {
   1083 		if (srcw == 0 && (src & REG_MASK) != SLJIT_UNUSED)
   1084 			return push_inst(compiler, DCBT | A(0) | B(src & REG_MASK));
   1085 
   1086 		FAIL_IF(load_immediate(compiler, TMP_REG1, srcw));
   1087 		/* Works with SLJIT_MEM0() case as well. */
   1088 		return push_inst(compiler, DCBT | A(src & REG_MASK) | B(TMP_REG1));
   1089 	}
   1090 
   1091 	srcw &= 0x3;
   1092 
   1093 	if (srcw == 0)
   1094 		return push_inst(compiler, DCBT | A(src & REG_MASK) | B(OFFS_REG(src)));
   1095 
   1096 #if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
   1097 	FAIL_IF(push_inst(compiler, RLWINM | S(OFFS_REG(src)) | A(TMP_REG1) | (srcw << 11) | ((31 - srcw) << 1)));
   1098 #else
   1099 	FAIL_IF(push_inst(compiler, RLDI(TMP_REG1, OFFS_REG(src), srcw, 63 - srcw, 1)));
   1100 #endif
   1101 	return push_inst(compiler, DCBT | A(src & REG_MASK) | B(TMP_REG1));
   1102 }
   1103 
   1104 #define EMIT_MOV(type, type_flags, type_cast) \
   1105 	emit_op(compiler, (src & SLJIT_IMM) ? SLJIT_MOV : type, flags | (type_flags), dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? type_cast srcw : srcw)
   1106 
   1107 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compiler, sljit_s32 op,
   1108 	sljit_s32 dst, sljit_sw dstw,
   1109 	sljit_s32 src, sljit_sw srcw)
   1110 {
   1111 	sljit_s32 flags = HAS_FLAGS(op) ? ALT_SET_FLAGS : 0;
   1112 	sljit_s32 op_flags = GET_ALL_FLAGS(op);
   1113 
   1114 	CHECK_ERROR();
   1115 	CHECK(check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw));
   1116 	ADJUST_LOCAL_OFFSET(dst, dstw);
   1117 	ADJUST_LOCAL_OFFSET(src, srcw);
   1118 
   1119 	if (dst == SLJIT_UNUSED && !HAS_FLAGS(op)) {
   1120 		if (op <= SLJIT_MOV_P && (src & SLJIT_MEM))
   1121 			return emit_prefetch(compiler, src, srcw);
   1122 
   1123 		return SLJIT_SUCCESS;
   1124 	}
   1125 
   1126 	op = GET_OPCODE(op);
   1127 	if ((src & SLJIT_IMM) && srcw == 0)
   1128 		src = TMP_ZERO;
   1129 
   1130 	if (GET_FLAG_TYPE(op_flags) == SLJIT_OVERFLOW)
   1131 		FAIL_IF(push_inst(compiler, MTXER | S(TMP_ZERO)));
   1132 
   1133 	if (op < SLJIT_NOT && FAST_IS_REG(src) && src == dst) {
   1134 		if (!TYPE_CAST_NEEDED(op))
   1135 			return SLJIT_SUCCESS;
   1136 	}
   1137 
   1138 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
   1139 	if (op_flags & SLJIT_I32_OP) {
   1140 		if (op < SLJIT_NOT) {
   1141 			if (src & SLJIT_MEM) {
   1142 				if (op == SLJIT_MOV_S32)
   1143 					op = SLJIT_MOV_U32;
   1144 			}
   1145 			else if (src & SLJIT_IMM) {
   1146 				if (op == SLJIT_MOV_U32)
   1147 					op = SLJIT_MOV_S32;
   1148 			}
   1149 		}
   1150 		else {
   1151 			/* Most operations expect sign extended arguments. */
   1152 			flags |= INT_DATA | SIGNED_DATA;
   1153 			if (HAS_FLAGS(op_flags))
   1154 				flags |= ALT_SIGN_EXT;
   1155 		}
   1156 	}
   1157 #endif
   1158 
   1159 	switch (op) {
   1160 	case SLJIT_MOV:
   1161 	case SLJIT_MOV_P:
   1162 #if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
   1163 	case SLJIT_MOV_U32:
   1164 	case SLJIT_MOV_S32:
   1165 #endif
   1166 		return emit_op(compiler, SLJIT_MOV, flags | WORD_DATA, dst, dstw, TMP_REG1, 0, src, srcw);
   1167 
   1168 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
   1169 	case SLJIT_MOV_U32:
   1170 		return EMIT_MOV(SLJIT_MOV_U32, INT_DATA, (sljit_u32));
   1171 
   1172 	case SLJIT_MOV_S32:
   1173 		return EMIT_MOV(SLJIT_MOV_S32, INT_DATA | SIGNED_DATA, (sljit_s32));
   1174 #endif
   1175 
   1176 	case SLJIT_MOV_U8:
   1177 		return EMIT_MOV(SLJIT_MOV_U8, BYTE_DATA, (sljit_u8));
   1178 
   1179 	case SLJIT_MOV_S8:
   1180 		return EMIT_MOV(SLJIT_MOV_S8, BYTE_DATA | SIGNED_DATA, (sljit_s8));
   1181 
   1182 	case SLJIT_MOV_U16:
   1183 		return EMIT_MOV(SLJIT_MOV_U16, HALF_DATA, (sljit_u16));
   1184 
   1185 	case SLJIT_MOV_S16:
   1186 		return EMIT_MOV(SLJIT_MOV_S16, HALF_DATA | SIGNED_DATA, (sljit_s16));
   1187 
   1188 	case SLJIT_NOT:
   1189 		return emit_op(compiler, SLJIT_NOT, flags, dst, dstw, TMP_REG1, 0, src, srcw);
   1190 
   1191 	case SLJIT_NEG:
   1192 		return emit_op(compiler, SLJIT_NEG, flags | (GET_FLAG_TYPE(op_flags) ? ALT_FORM1 : 0), dst, dstw, TMP_REG1, 0, src, srcw);
   1193 
   1194 	case SLJIT_CLZ:
   1195 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
   1196 		return emit_op(compiler, SLJIT_CLZ, flags | (!(op_flags & SLJIT_I32_OP) ? 0 : ALT_FORM1), dst, dstw, TMP_REG1, 0, src, srcw);
   1197 #else
   1198 		return emit_op(compiler, SLJIT_CLZ, flags, dst, dstw, TMP_REG1, 0, src, srcw);
   1199 #endif
   1200 	}
   1201 
   1202 	return SLJIT_SUCCESS;
   1203 }
   1204 
   1205 #undef EMIT_MOV
   1206 
   1207 #define TEST_SL_IMM(src, srcw) \
   1208 	(((src) & SLJIT_IMM) && (srcw) <= SIMM_MAX && (srcw) >= SIMM_MIN)
   1209 
   1210 #define TEST_UL_IMM(src, srcw) \
   1211 	(((src) & SLJIT_IMM) && !((srcw) & ~0xffff))
   1212 
   1213 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
   1214 #define TEST_SH_IMM(src, srcw) \
   1215 	(((src) & SLJIT_IMM) && !((srcw) & 0xffff) && (srcw) <= 0x7fffffffl && (srcw) >= -0x80000000l)
   1216 #else
   1217 #define TEST_SH_IMM(src, srcw) \
   1218 	(((src) & SLJIT_IMM) && !((srcw) & 0xffff))
   1219 #endif
   1220 
   1221 #define TEST_UH_IMM(src, srcw) \
   1222 	(((src) & SLJIT_IMM) && !((srcw) & ~0xffff0000))
   1223 
   1224 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
   1225 #define TEST_ADD_IMM(src, srcw) \
   1226 	(((src) & SLJIT_IMM) && (srcw) <= 0x7fff7fffl && (srcw) >= -0x80000000l)
   1227 #else
   1228 #define TEST_ADD_IMM(src, srcw) \
   1229 	((src) & SLJIT_IMM)
   1230 #endif
   1231 
   1232 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
   1233 #define TEST_UI_IMM(src, srcw) \
   1234 	(((src) & SLJIT_IMM) && !((srcw) & ~0xffffffff))
   1235 #else
   1236 #define TEST_UI_IMM(src, srcw) \
   1237 	((src) & SLJIT_IMM)
   1238 #endif
   1239 
   1240 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compiler, sljit_s32 op,
   1241 	sljit_s32 dst, sljit_sw dstw,
   1242 	sljit_s32 src1, sljit_sw src1w,
   1243 	sljit_s32 src2, sljit_sw src2w)
   1244 {
   1245 	sljit_s32 flags = HAS_FLAGS(op) ? ALT_SET_FLAGS : 0;
   1246 
   1247 	CHECK_ERROR();
   1248 	CHECK(check_sljit_emit_op2(compiler, op, dst, dstw, src1, src1w, src2, src2w));
   1249 	ADJUST_LOCAL_OFFSET(dst, dstw);
   1250 	ADJUST_LOCAL_OFFSET(src1, src1w);
   1251 	ADJUST_LOCAL_OFFSET(src2, src2w);
   1252 
   1253 	if (dst == SLJIT_UNUSED && !HAS_FLAGS(op))
   1254 		return SLJIT_SUCCESS;
   1255 
   1256 	if ((src1 & SLJIT_IMM) && src1w == 0)
   1257 		src1 = TMP_ZERO;
   1258 	if ((src2 & SLJIT_IMM) && src2w == 0)
   1259 		src2 = TMP_ZERO;
   1260 
   1261 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
   1262 	if (op & SLJIT_I32_OP) {
   1263 		/* Most operations expect sign extended arguments. */
   1264 		flags |= INT_DATA | SIGNED_DATA;
   1265 		if (src1 & SLJIT_IMM)
   1266 			src1w = (sljit_s32)(src1w);
   1267 		if (src2 & SLJIT_IMM)
   1268 			src2w = (sljit_s32)(src2w);
   1269 		if (HAS_FLAGS(op))
   1270 			flags |= ALT_SIGN_EXT;
   1271 	}
   1272 #endif
   1273 	if (GET_FLAG_TYPE(op) == SLJIT_OVERFLOW)
   1274 		FAIL_IF(push_inst(compiler, MTXER | S(TMP_ZERO)));
   1275 
   1276 	switch (GET_OPCODE(op)) {
   1277 	case SLJIT_ADD:
   1278 		if (GET_FLAG_TYPE(op) == SLJIT_OVERFLOW)
   1279 			return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM1, dst, dstw, src1, src1w, src2, src2w);
   1280 
   1281 		if (!HAS_FLAGS(op) && ((src1 | src2) & SLJIT_IMM)) {
   1282 			if (TEST_SL_IMM(src2, src2w)) {
   1283 				compiler->imm = src2w & 0xffff;
   1284 				return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM2, dst, dstw, src1, src1w, TMP_REG2, 0);
   1285 			}
   1286 			if (TEST_SL_IMM(src1, src1w)) {
   1287 				compiler->imm = src1w & 0xffff;
   1288 				return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM2, dst, dstw, src2, src2w, TMP_REG2, 0);
   1289 			}
   1290 			if (TEST_SH_IMM(src2, src2w)) {
   1291 				compiler->imm = (src2w >> 16) & 0xffff;
   1292 				return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM2 | ALT_FORM3, dst, dstw, src1, src1w, TMP_REG2, 0);
   1293 			}
   1294 			if (TEST_SH_IMM(src1, src1w)) {
   1295 				compiler->imm = (src1w >> 16) & 0xffff;
   1296 				return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM2 | ALT_FORM3, dst, dstw, src2, src2w, TMP_REG2, 0);
   1297 			}
   1298 			/* Range between -1 and -32768 is covered above. */
   1299 			if (TEST_ADD_IMM(src2, src2w)) {
   1300 				compiler->imm = src2w & 0xffffffff;
   1301 				return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM2 | ALT_FORM4, dst, dstw, src1, src1w, TMP_REG2, 0);
   1302 			}
   1303 			if (TEST_ADD_IMM(src1, src1w)) {
   1304 				compiler->imm = src1w & 0xffffffff;
   1305 				return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM2 | ALT_FORM4, dst, dstw, src2, src2w, TMP_REG2, 0);
   1306 			}
   1307 		}
   1308 		if (HAS_FLAGS(op)) {
   1309 			if (TEST_SL_IMM(src2, src2w)) {
   1310 				compiler->imm = src2w & 0xffff;
   1311 				return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM3, dst, dstw, src1, src1w, TMP_REG2, 0);
   1312 			}
   1313 			if (TEST_SL_IMM(src1, src1w)) {
   1314 				compiler->imm = src1w & 0xffff;
   1315 				return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM3, dst, dstw, src2, src2w, TMP_REG2, 0);
   1316 			}
   1317 		}
   1318 		return emit_op(compiler, SLJIT_ADD, flags | ((GET_FLAG_TYPE(op) == GET_FLAG_TYPE(SLJIT_SET_CARRY)) ? ALT_FORM4 : 0), dst, dstw, src1, src1w, src2, src2w);
   1319 
   1320 	case SLJIT_ADDC:
   1321 		return emit_op(compiler, SLJIT_ADDC, flags, dst, dstw, src1, src1w, src2, src2w);
   1322 
   1323 	case SLJIT_SUB:
   1324 		if (GET_FLAG_TYPE(op) >= SLJIT_LESS && GET_FLAG_TYPE(op) <= SLJIT_LESS_EQUAL) {
   1325 			if (dst == SLJIT_UNUSED) {
   1326 				if (TEST_UL_IMM(src2, src2w)) {
   1327 					compiler->imm = src2w & 0xffff;
   1328 					return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM1 | ALT_FORM2, dst, dstw, src1, src1w, TMP_REG2, 0);
   1329 				}
   1330 				return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM1, dst, dstw, src1, src1w, src2, src2w);
   1331 			}
   1332 
   1333 			if ((src2 & SLJIT_IMM) && src2w >= 0 && src2w <= (SIMM_MAX + 1)) {
   1334 				compiler->imm = src2w;
   1335 				return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM1 | ALT_FORM2 | ALT_FORM3, dst, dstw, src1, src1w, TMP_REG2, 0);
   1336 			}
   1337 			return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM1 | ALT_FORM3, dst, dstw, src1, src1w, src2, src2w);
   1338 		}
   1339 
   1340 		if (GET_FLAG_TYPE(op) == SLJIT_OVERFLOW)
   1341 			return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM2, dst, dstw, src1, src1w, src2, src2w);
   1342 
   1343 		if (!HAS_FLAGS(op) && ((src1 | src2) & SLJIT_IMM)) {
   1344 			if (TEST_SL_IMM(src2, -src2w)) {
   1345 				compiler->imm = (-src2w) & 0xffff;
   1346 				return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM2, dst, dstw, src1, src1w, TMP_REG2, 0);
   1347 			}
   1348 			if (TEST_SL_IMM(src1, src1w)) {
   1349 				compiler->imm = src1w & 0xffff;
   1350 				return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM3, dst, dstw, src2, src2w, TMP_REG2, 0);
   1351 			}
   1352 			if (TEST_SH_IMM(src2, -src2w)) {
   1353 				compiler->imm = ((-src2w) >> 16) & 0xffff;
   1354 				return emit_op(compiler, SLJIT_ADD, flags |  ALT_FORM2 | ALT_FORM3, dst, dstw, src1, src1w, TMP_REG2, 0);
   1355 			}
   1356 			/* Range between -1 and -32768 is covered above. */
   1357 			if (TEST_ADD_IMM(src2, -src2w)) {
   1358 				compiler->imm = -src2w & 0xffffffff;
   1359 				return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM2 | ALT_FORM4, dst, dstw, src1, src1w, TMP_REG2, 0);
   1360 			}
   1361 		}
   1362 
   1363 		if (dst == SLJIT_UNUSED && GET_FLAG_TYPE(op) != GET_FLAG_TYPE(SLJIT_SET_CARRY)) {
   1364 			if (TEST_SL_IMM(src2, src2w)) {
   1365 				compiler->imm = src2w & 0xffff;
   1366 				return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM4 | ALT_FORM5, dst, dstw, src1, src1w, TMP_REG2, 0);
   1367 			}
   1368 			return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM4, dst, dstw, src1, src1w, src2, src2w);
   1369 		}
   1370 
   1371 		if (TEST_SL_IMM(src2, -src2w)) {
   1372 			compiler->imm = (-src2w) & 0xffff;
   1373 			return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM3, dst, dstw, src1, src1w, TMP_REG2, 0);
   1374 		}
   1375 		/* We know ALT_SIGN_EXT is set if it is an SLJIT_I32_OP on 64 bit systems. */
   1376 		return emit_op(compiler, SLJIT_SUB, flags | ((GET_FLAG_TYPE(op) == GET_FLAG_TYPE(SLJIT_SET_CARRY)) ? ALT_FORM5 : 0), dst, dstw, src1, src1w, src2, src2w);
   1377 
   1378 	case SLJIT_SUBC:
   1379 		return emit_op(compiler, SLJIT_SUBC, flags, dst, dstw, src1, src1w, src2, src2w);
   1380 
   1381 	case SLJIT_MUL:
   1382 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
   1383 		if (op & SLJIT_I32_OP)
   1384 			flags |= ALT_FORM2;
   1385 #endif
   1386 		if (!HAS_FLAGS(op)) {
   1387 			if (TEST_SL_IMM(src2, src2w)) {
   1388 				compiler->imm = src2w & 0xffff;
   1389 				return emit_op(compiler, SLJIT_MUL, flags | ALT_FORM1, dst, dstw, src1, src1w, TMP_REG2, 0);
   1390 			}
   1391 			if (TEST_SL_IMM(src1, src1w)) {
   1392 				compiler->imm = src1w & 0xffff;
   1393 				return emit_op(compiler, SLJIT_MUL, flags | ALT_FORM1, dst, dstw, src2, src2w, TMP_REG2, 0);
   1394 			}
   1395 		}
   1396 		else
   1397 			FAIL_IF(push_inst(compiler, MTXER | S(TMP_ZERO)));
   1398 		return emit_op(compiler, SLJIT_MUL, flags, dst, dstw, src1, src1w, src2, src2w);
   1399 
   1400 	case SLJIT_AND:
   1401 	case SLJIT_OR:
   1402 	case SLJIT_XOR:
   1403 		/* Commutative unsigned operations. */
   1404 		if (!HAS_FLAGS(op) || GET_OPCODE(op) == SLJIT_AND) {
   1405 			if (TEST_UL_IMM(src2, src2w)) {
   1406 				compiler->imm = src2w;
   1407 				return emit_op(compiler, GET_OPCODE(op), flags | ALT_FORM1, dst, dstw, src1, src1w, TMP_REG2, 0);
   1408 			}
   1409 			if (TEST_UL_IMM(src1, src1w)) {
   1410 				compiler->imm = src1w;
   1411 				return emit_op(compiler, GET_OPCODE(op), flags | ALT_FORM1, dst, dstw, src2, src2w, TMP_REG2, 0);
   1412 			}
   1413 			if (TEST_UH_IMM(src2, src2w)) {
   1414 				compiler->imm = (src2w >> 16) & 0xffff;
   1415 				return emit_op(compiler, GET_OPCODE(op), flags | ALT_FORM2, dst, dstw, src1, src1w, TMP_REG2, 0);
   1416 			}
   1417 			if (TEST_UH_IMM(src1, src1w)) {
   1418 				compiler->imm = (src1w >> 16) & 0xffff;
   1419 				return emit_op(compiler, GET_OPCODE(op), flags | ALT_FORM2, dst, dstw, src2, src2w, TMP_REG2, 0);
   1420 			}
   1421 		}
   1422 		if (GET_OPCODE(op) != SLJIT_AND && GET_OPCODE(op) != SLJIT_AND) {
   1423 			/* Unlike or and xor, and resets unwanted bits as well. */
   1424 			if (TEST_UI_IMM(src2, src2w)) {
   1425 				compiler->imm = src2w;
   1426 				return emit_op(compiler, GET_OPCODE(op), flags | ALT_FORM3, dst, dstw, src1, src1w, TMP_REG2, 0);
   1427 			}
   1428 			if (TEST_UI_IMM(src1, src1w)) {
   1429 				compiler->imm = src1w;
   1430 				return emit_op(compiler, GET_OPCODE(op), flags | ALT_FORM3, dst, dstw, src2, src2w, TMP_REG2, 0);
   1431 			}
   1432 		}
   1433 		return emit_op(compiler, GET_OPCODE(op), flags, dst, dstw, src1, src1w, src2, src2w);
   1434 
   1435 	case SLJIT_SHL:
   1436 	case SLJIT_LSHR:
   1437 	case SLJIT_ASHR:
   1438 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
   1439 		if (op & SLJIT_I32_OP)
   1440 			flags |= ALT_FORM2;
   1441 #endif
   1442 		if (src2 & SLJIT_IMM) {
   1443 			compiler->imm = src2w;
   1444 			return emit_op(compiler, GET_OPCODE(op), flags | ALT_FORM1, dst, dstw, src1, src1w, TMP_REG2, 0);
   1445 		}
   1446 		return emit_op(compiler, GET_OPCODE(op), flags, dst, dstw, src1, src1w, src2, src2w);
   1447 	}
   1448 
   1449 	return SLJIT_SUCCESS;
   1450 }
   1451 
   1452 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 reg)
   1453 {
   1454 	CHECK_REG_INDEX(check_sljit_get_register_index(reg));
   1455 	return reg_map[reg];
   1456 }
   1457 
   1458 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_float_register_index(sljit_s32 reg)
   1459 {
   1460 	CHECK_REG_INDEX(check_sljit_get_float_register_index(reg));
   1461 	return freg_map[reg];
   1462 }
   1463 
   1464 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler,
   1465 	void *instruction, sljit_s32 size)
   1466 {
   1467 	CHECK_ERROR();
   1468 	CHECK(check_sljit_emit_op_custom(compiler, instruction, size));
   1469 
   1470 	return push_inst(compiler, *(sljit_ins*)instruction);
   1471 }
   1472 
   1473 /* --------------------------------------------------------------------- */
   1474 /*  Floating point operators                                             */
   1475 /* --------------------------------------------------------------------- */
   1476 
   1477 #define FLOAT_DATA(op) (DOUBLE_DATA | ((op & SLJIT_F32_OP) >> 6))
   1478 #define SELECT_FOP(op, single, double) ((op & SLJIT_F32_OP) ? single : double)
   1479 
   1480 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
   1481 #define FLOAT_TMP_MEM_OFFSET (6 * sizeof(sljit_sw))
   1482 #else
   1483 #define FLOAT_TMP_MEM_OFFSET (2 * sizeof(sljit_sw))
   1484 
   1485 #if (defined SLJIT_LITTLE_ENDIAN && SLJIT_LITTLE_ENDIAN)
   1486 #define FLOAT_TMP_MEM_OFFSET_LOW (2 * sizeof(sljit_sw))
   1487 #define FLOAT_TMP_MEM_OFFSET_HI (3 * sizeof(sljit_sw))
   1488 #else
   1489 #define FLOAT_TMP_MEM_OFFSET_LOW (3 * sizeof(sljit_sw))
   1490 #define FLOAT_TMP_MEM_OFFSET_HI (2 * sizeof(sljit_sw))
   1491 #endif
   1492 
   1493 #endif /* SLJIT_CONFIG_PPC_64 */
   1494 
   1495 static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_compiler *compiler, sljit_s32 op,
   1496 	sljit_s32 dst, sljit_sw dstw,
   1497 	sljit_s32 src, sljit_sw srcw)
   1498 {
   1499 	if (src & SLJIT_MEM) {
   1500 		/* We can ignore the temporary data store on the stack from caching point of view. */
   1501 		FAIL_IF(emit_op_mem(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src, srcw, TMP_REG1));
   1502 		src = TMP_FREG1;
   1503 	}
   1504 
   1505 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
   1506 	op = GET_OPCODE(op);
   1507 	FAIL_IF(push_inst(compiler, (op == SLJIT_CONV_S32_FROM_F64 ? FCTIWZ : FCTIDZ) | FD(TMP_FREG1) | FB(src)));
   1508 
   1509 	if (op == SLJIT_CONV_SW_FROM_F64) {
   1510 		if (FAST_IS_REG(dst)) {
   1511 			FAIL_IF(emit_op_mem(compiler, DOUBLE_DATA, TMP_FREG1, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET, TMP_REG1));
   1512 			return emit_op_mem(compiler, WORD_DATA | LOAD_DATA, dst, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET, TMP_REG1);
   1513 		}
   1514 		return emit_op_mem(compiler, DOUBLE_DATA, TMP_FREG1, dst, dstw, TMP_REG1);
   1515 	}
   1516 #else
   1517 	FAIL_IF(push_inst(compiler, FCTIWZ | FD(TMP_FREG1) | FB(src)));
   1518 #endif
   1519 
   1520 	if (FAST_IS_REG(dst)) {
   1521 		FAIL_IF(load_immediate(compiler, TMP_REG1, FLOAT_TMP_MEM_OFFSET));
   1522 		FAIL_IF(push_inst(compiler, STFIWX | FS(TMP_FREG1) | A(SLJIT_SP) | B(TMP_REG1)));
   1523 		return emit_op_mem(compiler, INT_DATA | LOAD_DATA, dst, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET, TMP_REG1);
   1524 	}
   1525 
   1526 	SLJIT_ASSERT(dst & SLJIT_MEM);
   1527 
   1528 	if (dst & OFFS_REG_MASK) {
   1529 		dstw &= 0x3;
   1530 		if (dstw) {
   1531 #if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
   1532 			FAIL_IF(push_inst(compiler, RLWINM | S(OFFS_REG(dst)) | A(TMP_REG1) | (dstw << 11) | ((31 - dstw) << 1)));
   1533 #else
   1534 			FAIL_IF(push_inst(compiler, RLDI(TMP_REG1, OFFS_REG(dst), dstw, 63 - dstw, 1)));
   1535 #endif
   1536 			dstw = TMP_REG1;
   1537 		}
   1538 		else
   1539 			dstw = OFFS_REG(dst);
   1540 	}
   1541 	else {
   1542 		if ((dst & REG_MASK) && !dstw) {
   1543 			dstw = dst & REG_MASK;
   1544 			dst = 0;
   1545 		}
   1546 		else {
   1547 			/* This works regardless we have SLJIT_MEM1 or SLJIT_MEM0. */
   1548 			FAIL_IF(load_immediate(compiler, TMP_REG1, dstw));
   1549 			dstw = TMP_REG1;
   1550 		}
   1551 	}
   1552 
   1553 	return push_inst(compiler, STFIWX | FS(TMP_FREG1) | A(dst & REG_MASK) | B(dstw));
   1554 }
   1555 
   1556 static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_compiler *compiler, sljit_s32 op,
   1557 	sljit_s32 dst, sljit_sw dstw,
   1558 	sljit_s32 src, sljit_sw srcw)
   1559 {
   1560 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
   1561 
   1562 	sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
   1563 
   1564 	if (src & SLJIT_IMM) {
   1565 		if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_S32)
   1566 			srcw = (sljit_s32)srcw;
   1567 		FAIL_IF(load_immediate(compiler, TMP_REG1, srcw));
   1568 		src = TMP_REG1;
   1569 	}
   1570 	else if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_S32) {
   1571 		if (FAST_IS_REG(src))
   1572 			FAIL_IF(push_inst(compiler, EXTSW | S(src) | A(TMP_REG1)));
   1573 		else
   1574 			FAIL_IF(emit_op_mem(compiler, INT_DATA | SIGNED_DATA | LOAD_DATA, TMP_REG1, src, srcw, TMP_REG1));
   1575 		src = TMP_REG1;
   1576 	}
   1577 
   1578 	if (FAST_IS_REG(src)) {
   1579 		FAIL_IF(emit_op_mem(compiler, WORD_DATA, src, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET, TMP_REG1));
   1580 		FAIL_IF(emit_op_mem(compiler, DOUBLE_DATA | LOAD_DATA, TMP_FREG1, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET, TMP_REG1));
   1581 	}
   1582 	else
   1583 		FAIL_IF(emit_op_mem(compiler, DOUBLE_DATA | LOAD_DATA, TMP_FREG1, src, srcw, TMP_REG1));
   1584 
   1585 	FAIL_IF(push_inst(compiler, FCFID | FD(dst_r) | FB(TMP_FREG1)));
   1586 
   1587 	if (dst & SLJIT_MEM)
   1588 		return emit_op_mem(compiler, FLOAT_DATA(op), TMP_FREG1, dst, dstw, TMP_REG1);
   1589 	if (op & SLJIT_F32_OP)
   1590 		return push_inst(compiler, FRSP | FD(dst_r) | FB(dst_r));
   1591 	return SLJIT_SUCCESS;
   1592 
   1593 #else
   1594 
   1595 	sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
   1596 	sljit_s32 invert_sign = 1;
   1597 
   1598 	if (src & SLJIT_IMM) {
   1599 		FAIL_IF(load_immediate(compiler, TMP_REG1, srcw ^ 0x80000000));
   1600 		src = TMP_REG1;
   1601 		invert_sign = 0;
   1602 	}
   1603 	else if (!FAST_IS_REG(src)) {
   1604 		FAIL_IF(emit_op_mem(compiler, WORD_DATA | SIGNED_DATA | LOAD_DATA, TMP_REG1, src, srcw, TMP_REG1));
   1605 		src = TMP_REG1;
   1606 	}
   1607 
   1608 	/* First, a special double floating point value is constructed: (2^53 + (input xor (2^31)))
   1609 	   The double precision format has exactly 53 bit precision, so the lower 32 bit represents
   1610 	   the lower 32 bit of such value. The result of xor 2^31 is the same as adding 0x80000000
   1611 	   to the input, which shifts it into the 0 - 0xffffffff range. To get the converted floating
   1612 	   point value, we need to substract 2^53 + 2^31 from the constructed value. */
   1613 	FAIL_IF(push_inst(compiler, ADDIS | D(TMP_REG2) | A(0) | 0x4330));
   1614 	if (invert_sign)
   1615 		FAIL_IF(push_inst(compiler, XORIS | S(src) | A(TMP_REG1) | 0x8000));
   1616 	FAIL_IF(emit_op_mem(compiler, WORD_DATA, TMP_REG2, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET_HI, TMP_REG1));
   1617 	FAIL_IF(emit_op_mem(compiler, WORD_DATA, TMP_REG1, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET_LOW, TMP_REG2));
   1618 	FAIL_IF(push_inst(compiler, ADDIS | D(TMP_REG1) | A(0) | 0x8000));
   1619 	FAIL_IF(emit_op_mem(compiler, DOUBLE_DATA | LOAD_DATA, TMP_FREG1, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET, TMP_REG1));
   1620 	FAIL_IF(emit_op_mem(compiler, WORD_DATA, TMP_REG1, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET_LOW, TMP_REG2));
   1621 	FAIL_IF(emit_op_mem(compiler, DOUBLE_DATA | LOAD_DATA, TMP_FREG2, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET, TMP_REG1));
   1622 
   1623 	FAIL_IF(push_inst(compiler, FSUB | FD(dst_r) | FA(TMP_FREG1) | FB(TMP_FREG2)));
   1624 
   1625 	if (dst & SLJIT_MEM)
   1626 		return emit_op_mem(compiler, FLOAT_DATA(op), TMP_FREG1, dst, dstw, TMP_REG1);
   1627 	if (op & SLJIT_F32_OP)
   1628 		return push_inst(compiler, FRSP | FD(dst_r) | FB(dst_r));
   1629 	return SLJIT_SUCCESS;
   1630 
   1631 #endif
   1632 }
   1633 
   1634 static SLJIT_INLINE sljit_s32 sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_s32 op,
   1635 	sljit_s32 src1, sljit_sw src1w,
   1636 	sljit_s32 src2, sljit_sw src2w)
   1637 {
   1638 	if (src1 & SLJIT_MEM) {
   1639 		FAIL_IF(emit_op_mem(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w, TMP_REG1));
   1640 		src1 = TMP_FREG1;
   1641 	}
   1642 
   1643 	if (src2 & SLJIT_MEM) {
   1644 		FAIL_IF(emit_op_mem(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w, TMP_REG2));
   1645 		src2 = TMP_FREG2;
   1646 	}
   1647 
   1648 	return push_inst(compiler, FCMPU | CRD(4) | FA(src1) | FB(src2));
   1649 }
   1650 
   1651 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compiler, sljit_s32 op,
   1652 	sljit_s32 dst, sljit_sw dstw,
   1653 	sljit_s32 src, sljit_sw srcw)
   1654 {
   1655 	sljit_s32 dst_r;
   1656 
   1657 	CHECK_ERROR();
   1658 
   1659 	SLJIT_COMPILE_ASSERT((SLJIT_F32_OP == 0x100) && !(DOUBLE_DATA & 0x4), float_transfer_bit_error);
   1660 	SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw);
   1661 
   1662 	if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_F32)
   1663 		op ^= SLJIT_F32_OP;
   1664 
   1665 	dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
   1666 
   1667 	if (src & SLJIT_MEM) {
   1668 		FAIL_IF(emit_op_mem(compiler, FLOAT_DATA(op) | LOAD_DATA, dst_r, src, srcw, TMP_REG1));
   1669 		src = dst_r;
   1670 	}
   1671 
   1672 	switch (GET_OPCODE(op)) {
   1673 	case SLJIT_CONV_F64_FROM_F32:
   1674 		op ^= SLJIT_F32_OP;
   1675 		if (op & SLJIT_F32_OP) {
   1676 			FAIL_IF(push_inst(compiler, FRSP | FD(dst_r) | FB(src)));
   1677 			break;
   1678 		}
   1679 		/* Fall through. */
   1680 	case SLJIT_MOV_F64:
   1681 		if (src != dst_r) {
   1682 			if (dst_r != TMP_FREG1)
   1683 				FAIL_IF(push_inst(compiler, FMR | FD(dst_r) | FB(src)));
   1684 			else
   1685 				dst_r = src;
   1686 		}
   1687 		break;
   1688 	case SLJIT_NEG_F64:
   1689 		FAIL_IF(push_inst(compiler, FNEG | FD(dst_r) | FB(src)));
   1690 		break;
   1691 	case SLJIT_ABS_F64:
   1692 		FAIL_IF(push_inst(compiler, FABS | FD(dst_r) | FB(src)));
   1693 		break;
   1694 	}
   1695 
   1696 	if (dst & SLJIT_MEM)
   1697 		FAIL_IF(emit_op_mem(compiler, FLOAT_DATA(op), dst_r, dst, dstw, TMP_REG1));
   1698 	return SLJIT_SUCCESS;
   1699 }
   1700 
   1701 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compiler, sljit_s32 op,
   1702 	sljit_s32 dst, sljit_sw dstw,
   1703 	sljit_s32 src1, sljit_sw src1w,
   1704 	sljit_s32 src2, sljit_sw src2w)
   1705 {
   1706 	sljit_s32 dst_r;
   1707 
   1708 	CHECK_ERROR();
   1709 	CHECK(check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w));
   1710 	ADJUST_LOCAL_OFFSET(dst, dstw);
   1711 	ADJUST_LOCAL_OFFSET(src1, src1w);
   1712 	ADJUST_LOCAL_OFFSET(src2, src2w);
   1713 
   1714 	dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG2;
   1715 
   1716 	if (src1 & SLJIT_MEM) {
   1717 		FAIL_IF(emit_op_mem(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w, TMP_REG1));
   1718 		src1 = TMP_FREG1;
   1719 	}
   1720 
   1721 	if (src2 & SLJIT_MEM) {
   1722 		FAIL_IF(emit_op_mem(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w, TMP_REG2));
   1723 		src2 = TMP_FREG2;
   1724 	}
   1725 
   1726 	switch (GET_OPCODE(op)) {
   1727 	case SLJIT_ADD_F64:
   1728 		FAIL_IF(push_inst(compiler, SELECT_FOP(op, FADDS, FADD) | FD(dst_r) | FA(src1) | FB(src2)));
   1729 		break;
   1730 
   1731 	case SLJIT_SUB_F64:
   1732 		FAIL_IF(push_inst(compiler, SELECT_FOP(op, FSUBS, FSUB) | FD(dst_r) | FA(src1) | FB(src2)));
   1733 		break;
   1734 
   1735 	case SLJIT_MUL_F64:
   1736 		FAIL_IF(push_inst(compiler, SELECT_FOP(op, FMULS, FMUL) | FD(dst_r) | FA(src1) | FC(src2) /* FMUL use FC as src2 */));
   1737 		break;
   1738 
   1739 	case SLJIT_DIV_F64:
   1740 		FAIL_IF(push_inst(compiler, SELECT_FOP(op, FDIVS, FDIV) | FD(dst_r) | FA(src1) | FB(src2)));
   1741 		break;
   1742 	}
   1743 
   1744 	if (dst & SLJIT_MEM)
   1745 		FAIL_IF(emit_op_mem(compiler, FLOAT_DATA(op), TMP_FREG2, dst, dstw, TMP_REG1));
   1746 
   1747 	return SLJIT_SUCCESS;
   1748 }
   1749 
   1750 #undef SELECT_FOP
   1751 
   1752 /* --------------------------------------------------------------------- */
   1753 /*  Other instructions                                                   */
   1754 /* --------------------------------------------------------------------- */
   1755 
   1756 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw)
   1757 {
   1758 	CHECK_ERROR();
   1759 	CHECK(check_sljit_emit_fast_enter(compiler, dst, dstw));
   1760 	ADJUST_LOCAL_OFFSET(dst, dstw);
   1761 
   1762 	if (FAST_IS_REG(dst))
   1763 		return push_inst(compiler, MFLR | D(dst));
   1764 
   1765 	/* Memory. */
   1766 	FAIL_IF(push_inst(compiler, MFLR | D(TMP_REG2)));
   1767 	return emit_op(compiler, SLJIT_MOV, WORD_DATA, dst, dstw, TMP_REG1, 0, TMP_REG2, 0);
   1768 }
   1769 
   1770 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_return(struct sljit_compiler *compiler, sljit_s32 src, sljit_sw srcw)
   1771 {
   1772 	CHECK_ERROR();
   1773 	CHECK(check_sljit_emit_fast_return(compiler, src, srcw));
   1774 	ADJUST_LOCAL_OFFSET(src, srcw);
   1775 
   1776 	if (FAST_IS_REG(src))
   1777 		FAIL_IF(push_inst(compiler, MTLR | S(src)));
   1778 	else {
   1779 		FAIL_IF(emit_op(compiler, SLJIT_MOV, WORD_DATA, TMP_REG2, 0, TMP_REG1, 0, src, srcw));
   1780 		FAIL_IF(push_inst(compiler, MTLR | S(TMP_REG2)));
   1781 	}
   1782 
   1783 	return push_inst(compiler, BLR);
   1784 }
   1785 
   1786 /* --------------------------------------------------------------------- */
   1787 /*  Conditional instructions                                             */
   1788 /* --------------------------------------------------------------------- */
   1789 
   1790 SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler)
   1791 {
   1792 	struct sljit_label *label;
   1793 
   1794 	CHECK_ERROR_PTR();
   1795 	CHECK_PTR(check_sljit_emit_label(compiler));
   1796 
   1797 	if (compiler->last_label && compiler->last_label->size == compiler->size)
   1798 		return compiler->last_label;
   1799 
   1800 	label = (struct sljit_label*)ensure_abuf(compiler, sizeof(struct sljit_label));
   1801 	PTR_FAIL_IF(!label);
   1802 	set_label(label, compiler);
   1803 	return label;
   1804 }
   1805 
   1806 static sljit_ins get_bo_bi_flags(sljit_s32 type)
   1807 {
   1808 	switch (type) {
   1809 	case SLJIT_EQUAL:
   1810 		return (12 << 21) | (2 << 16);
   1811 
   1812 	case SLJIT_NOT_EQUAL:
   1813 		return (4 << 21) | (2 << 16);
   1814 
   1815 	case SLJIT_LESS:
   1816 	case SLJIT_SIG_LESS:
   1817 		return (12 << 21) | (0 << 16);
   1818 
   1819 	case SLJIT_GREATER_EQUAL:
   1820 	case SLJIT_SIG_GREATER_EQUAL:
   1821 		return (4 << 21) | (0 << 16);
   1822 
   1823 	case SLJIT_GREATER:
   1824 	case SLJIT_SIG_GREATER:
   1825 		return (12 << 21) | (1 << 16);
   1826 
   1827 	case SLJIT_LESS_EQUAL:
   1828 	case SLJIT_SIG_LESS_EQUAL:
   1829 		return (4 << 21) | (1 << 16);
   1830 
   1831 	case SLJIT_LESS_F64:
   1832 		return (12 << 21) | ((4 + 0) << 16);
   1833 
   1834 	case SLJIT_GREATER_EQUAL_F64:
   1835 		return (4 << 21) | ((4 + 0) << 16);
   1836 
   1837 	case SLJIT_GREATER_F64:
   1838 		return (12 << 21) | ((4 + 1) << 16);
   1839 
   1840 	case SLJIT_LESS_EQUAL_F64:
   1841 		return (4 << 21) | ((4 + 1) << 16);
   1842 
   1843 	case SLJIT_OVERFLOW:
   1844 	case SLJIT_MUL_OVERFLOW:
   1845 		return (12 << 21) | (3 << 16);
   1846 
   1847 	case SLJIT_NOT_OVERFLOW:
   1848 	case SLJIT_MUL_NOT_OVERFLOW:
   1849 		return (4 << 21) | (3 << 16);
   1850 
   1851 	case SLJIT_EQUAL_F64:
   1852 		return (12 << 21) | ((4 + 2) << 16);
   1853 
   1854 	case SLJIT_NOT_EQUAL_F64:
   1855 		return (4 << 21) | ((4 + 2) << 16);
   1856 
   1857 	case SLJIT_UNORDERED_F64:
   1858 		return (12 << 21) | ((4 + 3) << 16);
   1859 
   1860 	case SLJIT_ORDERED_F64:
   1861 		return (4 << 21) | ((4 + 3) << 16);
   1862 
   1863 	default:
   1864 		SLJIT_ASSERT(type >= SLJIT_JUMP && type <= SLJIT_CALL_CDECL);
   1865 		return (20 << 21);
   1866 	}
   1867 }
   1868 
   1869 SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_s32 type)
   1870 {
   1871 	struct sljit_jump *jump;
   1872 	sljit_ins bo_bi_flags;
   1873 
   1874 	CHECK_ERROR_PTR();
   1875 	CHECK_PTR(check_sljit_emit_jump(compiler, type));
   1876 
   1877 	bo_bi_flags = get_bo_bi_flags(type & 0xff);
   1878 	if (!bo_bi_flags)
   1879 		return NULL;
   1880 
   1881 	jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
   1882 	PTR_FAIL_IF(!jump);
   1883 	set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP);
   1884 	type &= 0xff;
   1885 
   1886 	/* In PPC, we don't need to touch the arguments. */
   1887 	if (type < SLJIT_JUMP)
   1888 		jump->flags |= IS_COND;
   1889 #if (defined SLJIT_PASS_ENTRY_ADDR_TO_CALL && SLJIT_PASS_ENTRY_ADDR_TO_CALL)
   1890 	if (type >= SLJIT_CALL)
   1891 		jump->flags |= IS_CALL;
   1892 #endif
   1893 
   1894 	PTR_FAIL_IF(emit_const(compiler, TMP_CALL_REG, 0));
   1895 	PTR_FAIL_IF(push_inst(compiler, MTCTR | S(TMP_CALL_REG)));
   1896 	jump->addr = compiler->size;
   1897 	PTR_FAIL_IF(push_inst(compiler, BCCTR | bo_bi_flags | (type >= SLJIT_FAST_CALL ? 1 : 0)));
   1898 	return jump;
   1899 }
   1900 
   1901 SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compiler *compiler, sljit_s32 type,
   1902 	sljit_s32 arg_types)
   1903 {
   1904 	CHECK_ERROR_PTR();
   1905 	CHECK_PTR(check_sljit_emit_call(compiler, type, arg_types));
   1906 
   1907 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
   1908 	PTR_FAIL_IF(call_with_args(compiler, arg_types, NULL));
   1909 #endif
   1910 
   1911 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
   1912 		|| (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
   1913 	compiler->skip_checks = 1;
   1914 #endif
   1915 
   1916 	return sljit_emit_jump(compiler, type);
   1917 }
   1918 
   1919 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 src, sljit_sw srcw)
   1920 {
   1921 	struct sljit_jump *jump = NULL;
   1922 	sljit_s32 src_r;
   1923 
   1924 	CHECK_ERROR();
   1925 	CHECK(check_sljit_emit_ijump(compiler, type, src, srcw));
   1926 	ADJUST_LOCAL_OFFSET(src, srcw);
   1927 
   1928 	if (FAST_IS_REG(src)) {
   1929 #if (defined SLJIT_PASS_ENTRY_ADDR_TO_CALL && SLJIT_PASS_ENTRY_ADDR_TO_CALL)
   1930 		if (type >= SLJIT_CALL) {
   1931 			FAIL_IF(push_inst(compiler, OR | S(src) | A(TMP_CALL_REG) | B(src)));
   1932 			src_r = TMP_CALL_REG;
   1933 		}
   1934 		else
   1935 			src_r = src;
   1936 #else
   1937 		src_r = src;
   1938 #endif
   1939 	} else if (src & SLJIT_IMM) {
   1940 		/* These jumps are converted to jump/call instructions when possible. */
   1941 		jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
   1942 		FAIL_IF(!jump);
   1943 		set_jump(jump, compiler, JUMP_ADDR);
   1944 		jump->u.target = srcw;
   1945 #if (defined SLJIT_PASS_ENTRY_ADDR_TO_CALL && SLJIT_PASS_ENTRY_ADDR_TO_CALL)
   1946 		if (type >= SLJIT_CALL)
   1947 			jump->flags |= IS_CALL;
   1948 #endif
   1949 		FAIL_IF(emit_const(compiler, TMP_CALL_REG, 0));
   1950 		src_r = TMP_CALL_REG;
   1951 	}
   1952 	else {
   1953 		FAIL_IF(emit_op(compiler, SLJIT_MOV, WORD_DATA, TMP_CALL_REG, 0, TMP_REG1, 0, src, srcw));
   1954 		src_r = TMP_CALL_REG;
   1955 	}
   1956 
   1957 	FAIL_IF(push_inst(compiler, MTCTR | S(src_r)));
   1958 	if (jump)
   1959 		jump->addr = compiler->size;
   1960 	return push_inst(compiler, BCCTR | (20 << 21) | (type >= SLJIT_FAST_CALL ? 1 : 0));
   1961 }
   1962 
   1963 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compiler, sljit_s32 type,
   1964 	sljit_s32 arg_types,
   1965 	sljit_s32 src, sljit_sw srcw)
   1966 {
   1967 	CHECK_ERROR();
   1968 	CHECK(check_sljit_emit_icall(compiler, type, arg_types, src, srcw));
   1969 
   1970 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
   1971 	if (src & SLJIT_MEM) {
   1972 		ADJUST_LOCAL_OFFSET(src, srcw);
   1973 		FAIL_IF(emit_op(compiler, SLJIT_MOV, WORD_DATA, TMP_CALL_REG, 0, TMP_REG1, 0, src, srcw));
   1974 		src = TMP_CALL_REG;
   1975 	}
   1976 
   1977 	FAIL_IF(call_with_args(compiler, arg_types, &src));
   1978 #endif
   1979 
   1980 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
   1981 		|| (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
   1982 	compiler->skip_checks = 1;
   1983 #endif
   1984 
   1985 	return sljit_emit_ijump(compiler, type, src, srcw);
   1986 }
   1987 
   1988 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_s32 op,
   1989 	sljit_s32 dst, sljit_sw dstw,
   1990 	sljit_s32 type)
   1991 {
   1992 	sljit_s32 reg, input_flags, cr_bit, invert;
   1993 	sljit_s32 saved_op = op;
   1994 	sljit_sw saved_dstw = dstw;
   1995 
   1996 	CHECK_ERROR();
   1997 	CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, type));
   1998 	ADJUST_LOCAL_OFFSET(dst, dstw);
   1999 
   2000 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
   2001 	input_flags = (op & SLJIT_I32_OP) ? INT_DATA : WORD_DATA;
   2002 #else
   2003 	input_flags = WORD_DATA;
   2004 #endif
   2005 
   2006 	op = GET_OPCODE(op);
   2007 	reg = (op < SLJIT_ADD && FAST_IS_REG(dst)) ? dst : TMP_REG2;
   2008 
   2009 	if (op >= SLJIT_ADD && (dst & SLJIT_MEM))
   2010 		FAIL_IF(emit_op_mem(compiler, input_flags | LOAD_DATA, TMP_REG1, dst, dstw, TMP_REG1));
   2011 
   2012 	invert = 0;
   2013 	cr_bit = 0;
   2014 
   2015 	switch (type & 0xff) {
   2016 	case SLJIT_LESS:
   2017 	case SLJIT_SIG_LESS:
   2018 		break;
   2019 
   2020 	case SLJIT_GREATER_EQUAL:
   2021 	case SLJIT_SIG_GREATER_EQUAL:
   2022 		invert = 1;
   2023 		break;
   2024 
   2025 	case SLJIT_GREATER:
   2026 	case SLJIT_SIG_GREATER:
   2027 		cr_bit = 1;
   2028 		break;
   2029 
   2030 	case SLJIT_LESS_EQUAL:
   2031 	case SLJIT_SIG_LESS_EQUAL:
   2032 		cr_bit = 1;
   2033 		invert = 1;
   2034 		break;
   2035 
   2036 	case SLJIT_EQUAL:
   2037 		cr_bit = 2;
   2038 		break;
   2039 
   2040 	case SLJIT_NOT_EQUAL:
   2041 		cr_bit = 2;
   2042 		invert = 1;
   2043 		break;
   2044 
   2045 	case SLJIT_OVERFLOW:
   2046 	case SLJIT_MUL_OVERFLOW:
   2047 		cr_bit = 3;
   2048 		break;
   2049 
   2050 	case SLJIT_NOT_OVERFLOW:
   2051 	case SLJIT_MUL_NOT_OVERFLOW:
   2052 		cr_bit = 3;
   2053 		invert = 1;
   2054 		break;
   2055 
   2056 	case SLJIT_LESS_F64:
   2057 		cr_bit = 4 + 0;
   2058 		break;
   2059 
   2060 	case SLJIT_GREATER_EQUAL_F64:
   2061 		cr_bit = 4 + 0;
   2062 		invert = 1;
   2063 		break;
   2064 
   2065 	case SLJIT_GREATER_F64:
   2066 		cr_bit = 4 + 1;
   2067 		break;
   2068 
   2069 	case SLJIT_LESS_EQUAL_F64:
   2070 		cr_bit = 4 + 1;
   2071 		invert = 1;
   2072 		break;
   2073 
   2074 	case SLJIT_EQUAL_F64:
   2075 		cr_bit = 4 + 2;
   2076 		break;
   2077 
   2078 	case SLJIT_NOT_EQUAL_F64:
   2079 		cr_bit = 4 + 2;
   2080 		invert = 1;
   2081 		break;
   2082 
   2083 	case SLJIT_UNORDERED_F64:
   2084 		cr_bit = 4 + 3;
   2085 		break;
   2086 
   2087 	case SLJIT_ORDERED_F64:
   2088 		cr_bit = 4 + 3;
   2089 		invert = 1;
   2090 		break;
   2091 
   2092 	default:
   2093 		SLJIT_UNREACHABLE();
   2094 		break;
   2095 	}
   2096 
   2097 	FAIL_IF(push_inst(compiler, MFCR | D(reg)));
   2098 	FAIL_IF(push_inst(compiler, RLWINM | S(reg) | A(reg) | ((1 + (cr_bit)) << 11) | (31 << 6) | (31 << 1)));
   2099 
   2100 	if (invert)
   2101 		FAIL_IF(push_inst(compiler, XORI | S(reg) | A(reg) | 0x1));
   2102 
   2103 	if (op < SLJIT_ADD) {
   2104 		if (!(dst & SLJIT_MEM))
   2105 			return SLJIT_SUCCESS;
   2106 		return emit_op_mem(compiler, input_flags, reg, dst, dstw, TMP_REG1);
   2107 	}
   2108 
   2109 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
   2110 		|| (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
   2111 	compiler->skip_checks = 1;
   2112 #endif
   2113 	if (dst & SLJIT_MEM)
   2114 		return sljit_emit_op2(compiler, saved_op, dst, saved_dstw, TMP_REG1, 0, TMP_REG2, 0);
   2115 	return sljit_emit_op2(compiler, saved_op, dst, 0, dst, 0, TMP_REG2, 0);
   2116 }
   2117 
   2118 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_cmov(struct sljit_compiler *compiler, sljit_s32 type,
   2119 	sljit_s32 dst_reg,
   2120 	sljit_s32 src, sljit_sw srcw)
   2121 {
   2122 	CHECK_ERROR();
   2123 	CHECK(check_sljit_emit_cmov(compiler, type, dst_reg, src, srcw));
   2124 
   2125 	return sljit_emit_cmov_generic(compiler, type, dst_reg, src, srcw);;
   2126 }
   2127 
   2128 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem(struct sljit_compiler *compiler, sljit_s32 type,
   2129 	sljit_s32 reg,
   2130 	sljit_s32 mem, sljit_sw memw)
   2131 {
   2132 	sljit_s32 mem_flags;
   2133 	sljit_ins inst;
   2134 
   2135 	CHECK_ERROR();
   2136 	CHECK(check_sljit_emit_mem(compiler, type, reg, mem, memw));
   2137 
   2138 	if (type & SLJIT_MEM_POST)
   2139 		return SLJIT_ERR_UNSUPPORTED;
   2140 
   2141 	switch (type & 0xff) {
   2142 	case SLJIT_MOV:
   2143 	case SLJIT_MOV_P:
   2144 #if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
   2145 	case SLJIT_MOV_U32:
   2146 	case SLJIT_MOV_S32:
   2147 #endif
   2148 		mem_flags = WORD_DATA;
   2149 		break;
   2150 
   2151 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
   2152 	case SLJIT_MOV_U32:
   2153 		mem_flags = INT_DATA;
   2154 		break;
   2155 
   2156 	case SLJIT_MOV_S32:
   2157 		mem_flags = INT_DATA;
   2158 
   2159 		if (!(type & SLJIT_MEM_STORE) && !(type & SLJIT_I32_OP)) {
   2160 			if (mem & OFFS_REG_MASK)
   2161 				mem_flags |= SIGNED_DATA;
   2162 			else
   2163 				return SLJIT_ERR_UNSUPPORTED;
   2164 		}
   2165 		break;
   2166 #endif
   2167 
   2168 	case SLJIT_MOV_U8:
   2169 	case SLJIT_MOV_S8:
   2170 		mem_flags = BYTE_DATA;
   2171 		break;
   2172 
   2173 	case SLJIT_MOV_U16:
   2174 		mem_flags = HALF_DATA;
   2175 		break;
   2176 
   2177 	case SLJIT_MOV_S16:
   2178 		mem_flags = HALF_DATA | SIGNED_DATA;
   2179 		break;
   2180 
   2181 	default:
   2182 		SLJIT_UNREACHABLE();
   2183 		mem_flags = WORD_DATA;
   2184 		break;
   2185 	}
   2186 
   2187 	if (!(type & SLJIT_MEM_STORE))
   2188 		mem_flags |= LOAD_DATA;
   2189 
   2190 	if (SLJIT_UNLIKELY(mem & OFFS_REG_MASK)) {
   2191 		if (memw != 0)
   2192 			return SLJIT_ERR_UNSUPPORTED;
   2193 
   2194 		if (type & SLJIT_MEM_SUPP)
   2195 			return SLJIT_SUCCESS;
   2196 
   2197 		inst = updated_data_transfer_insts[mem_flags | INDEXED];
   2198 		FAIL_IF(push_inst(compiler, INST_CODE_AND_DST(inst, 0, reg) | A(mem & REG_MASK) | B(OFFS_REG(mem))));
   2199 	}
   2200 	else {
   2201 		if (memw > SIMM_MAX || memw < SIMM_MIN)
   2202 			return SLJIT_ERR_UNSUPPORTED;
   2203 
   2204 		inst = updated_data_transfer_insts[mem_flags];
   2205 
   2206 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
   2207 		if ((inst & INT_ALIGNED) && (memw & 0x3) != 0)
   2208 			return SLJIT_ERR_UNSUPPORTED;
   2209 #endif
   2210 
   2211 		if (type & SLJIT_MEM_SUPP)
   2212 			return SLJIT_SUCCESS;
   2213 
   2214 		FAIL_IF(push_inst(compiler, INST_CODE_AND_DST(inst, 0, reg) | A(mem & REG_MASK) | IMM(memw)));
   2215 	}
   2216 
   2217 	if ((mem_flags & LOAD_DATA) && (type & 0xff) == SLJIT_MOV_S8)
   2218 		return push_inst(compiler, EXTSB | S(reg) | A(reg));
   2219 	return SLJIT_SUCCESS;
   2220 }
   2221 
   2222 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fmem(struct sljit_compiler *compiler, sljit_s32 type,
   2223 	sljit_s32 freg,
   2224 	sljit_s32 mem, sljit_sw memw)
   2225 {
   2226 	sljit_s32 mem_flags;
   2227 	sljit_ins inst;
   2228 
   2229 	CHECK_ERROR();
   2230 	CHECK(check_sljit_emit_fmem(compiler, type, freg, mem, memw));
   2231 
   2232 	if (type & SLJIT_MEM_POST)
   2233 		return SLJIT_ERR_UNSUPPORTED;
   2234 
   2235 	if (SLJIT_UNLIKELY(mem & OFFS_REG_MASK)) {
   2236 		if (memw != 0)
   2237 			return SLJIT_ERR_UNSUPPORTED;
   2238 	}
   2239 	else {
   2240 		if (memw > SIMM_MAX || memw < SIMM_MIN)
   2241 			return SLJIT_ERR_UNSUPPORTED;
   2242 	}
   2243 
   2244 	if (type & SLJIT_MEM_SUPP)
   2245 		return SLJIT_SUCCESS;
   2246 
   2247 	mem_flags = FLOAT_DATA(type);
   2248 
   2249 	if (!(type & SLJIT_MEM_STORE))
   2250 		mem_flags |= LOAD_DATA;
   2251 
   2252 	if (SLJIT_UNLIKELY(mem & OFFS_REG_MASK)) {
   2253 		inst = updated_data_transfer_insts[mem_flags | INDEXED];
   2254 		return push_inst(compiler, INST_CODE_AND_DST(inst, DOUBLE_DATA, freg) | A(mem & REG_MASK) | B(OFFS_REG(mem)));
   2255 	}
   2256 
   2257 	inst = updated_data_transfer_insts[mem_flags];
   2258 	return push_inst(compiler, INST_CODE_AND_DST(inst, DOUBLE_DATA, freg) | A(mem & REG_MASK) | IMM(memw));
   2259 }
   2260 
   2261 SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw init_value)
   2262 {
   2263 	struct sljit_const *const_;
   2264 	sljit_s32 reg;
   2265 
   2266 	CHECK_ERROR_PTR();
   2267 	CHECK_PTR(check_sljit_emit_const(compiler, dst, dstw, init_value));
   2268 	ADJUST_LOCAL_OFFSET(dst, dstw);
   2269 
   2270 	const_ = (struct sljit_const*)ensure_abuf(compiler, sizeof(struct sljit_const));
   2271 	PTR_FAIL_IF(!const_);
   2272 	set_const(const_, compiler);
   2273 
   2274 	reg = FAST_IS_REG(dst) ? dst : TMP_REG2;
   2275 
   2276 	PTR_FAIL_IF(emit_const(compiler, reg, init_value));
   2277 
   2278 	if (dst & SLJIT_MEM)
   2279 		PTR_FAIL_IF(emit_op(compiler, SLJIT_MOV, WORD_DATA, dst, dstw, TMP_REG1, 0, TMP_REG2, 0));
   2280 	return const_;
   2281 }
   2282