1 /* 2 * Stack-less Just-In-Time compiler 3 * 4 * Copyright 2013-2013 Tilera Corporation(jiwang (at) tilera.com). All rights reserved. 5 * Copyright 2009-2012 Zoltan Herczeg (hzmester (at) freemail.hu). All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without modification, are 8 * permitted provided that the following conditions are met: 9 * 10 * 1. Redistributions of source code must retain the above copyright notice, this list of 11 * conditions and the following disclaimer. 12 * 13 * 2. Redistributions in binary form must reproduce the above copyright notice, this list 14 * of conditions and the following disclaimer in the documentation and/or other materials 15 * provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY 18 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT 20 * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED 22 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR 23 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 25 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 */ 27 28 /* TileGX architecture. */ 29 /* Contributed by Tilera Corporation. */ 30 #include "sljitNativeTILEGX-encoder.c" 31 32 #define SIMM_8BIT_MAX (0x7f) 33 #define SIMM_8BIT_MIN (-0x80) 34 #define SIMM_16BIT_MAX (0x7fff) 35 #define SIMM_16BIT_MIN (-0x8000) 36 #define SIMM_17BIT_MAX (0xffff) 37 #define SIMM_17BIT_MIN (-0x10000) 38 #define SIMM_32BIT_MAX (0x7fffffff) 39 #define SIMM_32BIT_MIN (-0x7fffffff - 1) 40 #define SIMM_48BIT_MAX (0x7fffffff0000L) 41 #define SIMM_48BIT_MIN (-0x800000000000L) 42 #define IMM16(imm) ((imm) & 0xffff) 43 44 #define UIMM_16BIT_MAX (0xffff) 45 46 #define TMP_REG1 (SLJIT_NUMBER_OF_REGISTERS + 2) 47 #define TMP_REG2 (SLJIT_NUMBER_OF_REGISTERS + 3) 48 #define TMP_REG3 (SLJIT_NUMBER_OF_REGISTERS + 4) 49 #define ADDR_TMP (SLJIT_NUMBER_OF_REGISTERS + 5) 50 #define PIC_ADDR_REG TMP_REG2 51 52 static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 6] = { 53 63, 0, 1, 2, 3, 4, 30, 31, 32, 33, 34, 54, 5, 16, 6, 7 54 }; 55 56 #define SLJIT_LOCALS_REG_mapped 54 57 #define TMP_REG1_mapped 5 58 #define TMP_REG2_mapped 16 59 #define TMP_REG3_mapped 6 60 #define ADDR_TMP_mapped 7 61 62 /* Flags are keept in volatile registers. */ 63 #define EQUAL_FLAG 8 64 /* And carry flag as well. */ 65 #define ULESS_FLAG 9 66 #define UGREATER_FLAG 10 67 #define LESS_FLAG 11 68 #define GREATER_FLAG 12 69 #define OVERFLOW_FLAG 13 70 71 #define ZERO 63 72 #define RA 55 73 #define TMP_EREG1 14 74 #define TMP_EREG2 15 75 76 #define LOAD_DATA 0x01 77 #define WORD_DATA 0x00 78 #define BYTE_DATA 0x02 79 #define HALF_DATA 0x04 80 #define INT_DATA 0x06 81 #define SIGNED_DATA 0x08 82 #define DOUBLE_DATA 0x10 83 84 /* Separates integer and floating point registers */ 85 #define GPR_REG 0xf 86 87 #define MEM_MASK 0x1f 88 89 #define WRITE_BACK 0x00020 90 #define ARG_TEST 0x00040 91 #define ALT_KEEP_CACHE 0x00080 92 #define CUMULATIVE_OP 0x00100 93 #define LOGICAL_OP 0x00200 94 #define IMM_OP 0x00400 95 #define SRC2_IMM 0x00800 96 97 #define UNUSED_DEST 0x01000 98 #define REG_DEST 0x02000 99 #define REG1_SOURCE 0x04000 100 #define REG2_SOURCE 0x08000 101 #define SLOW_SRC1 0x10000 102 #define SLOW_SRC2 0x20000 103 #define SLOW_DEST 0x40000 104 105 /* Only these flags are set. UNUSED_DEST is not set when no flags should be set. 106 */ 107 #define CHECK_FLAGS(list) (!(flags & UNUSED_DEST) || (op & GET_FLAGS(~(list)))) 108 109 SLJIT_API_FUNC_ATTRIBUTE const char *sljit_get_platform_name(void) 110 { 111 return "TileGX" SLJIT_CPUINFO; 112 } 113 114 /* Length of an instruction word */ 115 typedef sljit_uw sljit_ins; 116 117 struct jit_instr { 118 const struct tilegx_opcode* opcode; 119 tilegx_pipeline pipe; 120 unsigned long input_registers; 121 unsigned long output_registers; 122 int operand_value[4]; 123 int line; 124 }; 125 126 /* Opcode Helper Macros */ 127 #define TILEGX_X_MODE 0 128 129 #define X_MODE create_Mode(TILEGX_X_MODE) 130 131 #define FNOP_X0 \ 132 create_Opcode_X0(RRR_0_OPCODE_X0) | \ 133 create_RRROpcodeExtension_X0(UNARY_RRR_0_OPCODE_X0) | \ 134 create_UnaryOpcodeExtension_X0(FNOP_UNARY_OPCODE_X0) 135 136 #define FNOP_X1 \ 137 create_Opcode_X1(RRR_0_OPCODE_X1) | \ 138 create_RRROpcodeExtension_X1(UNARY_RRR_0_OPCODE_X1) | \ 139 create_UnaryOpcodeExtension_X1(FNOP_UNARY_OPCODE_X1) 140 141 #define NOP \ 142 create_Mode(TILEGX_X_MODE) | FNOP_X0 | FNOP_X1 143 144 #define ANOP_X0 \ 145 create_Opcode_X0(RRR_0_OPCODE_X0) | \ 146 create_RRROpcodeExtension_X0(UNARY_RRR_0_OPCODE_X0) | \ 147 create_UnaryOpcodeExtension_X0(NOP_UNARY_OPCODE_X0) 148 149 #define BPT create_Mode(TILEGX_X_MODE) | create_Opcode_X1(RRR_0_OPCODE_X1) | \ 150 create_RRROpcodeExtension_X1(UNARY_RRR_0_OPCODE_X1) | \ 151 create_UnaryOpcodeExtension_X1(ILL_UNARY_OPCODE_X1) | \ 152 create_Dest_X1(0x1C) | create_SrcA_X1(0x25) | ANOP_X0 153 154 #define ADD_X1 \ 155 create_Mode(TILEGX_X_MODE) | create_Opcode_X1(RRR_0_OPCODE_X1) | \ 156 create_RRROpcodeExtension_X1(ADD_RRR_0_OPCODE_X1) | FNOP_X0 157 158 #define ADDI_X1 \ 159 create_Mode(TILEGX_X_MODE) | create_Opcode_X1(IMM8_OPCODE_X1) | \ 160 create_Imm8OpcodeExtension_X1(ADDI_IMM8_OPCODE_X1) | FNOP_X0 161 162 #define SUB_X1 \ 163 create_Mode(TILEGX_X_MODE) | create_Opcode_X1(RRR_0_OPCODE_X1) | \ 164 create_RRROpcodeExtension_X1(SUB_RRR_0_OPCODE_X1) | FNOP_X0 165 166 #define NOR_X1 \ 167 create_Mode(TILEGX_X_MODE) | create_Opcode_X1(RRR_0_OPCODE_X1) | \ 168 create_RRROpcodeExtension_X1(NOR_RRR_0_OPCODE_X1) | FNOP_X0 169 170 #define OR_X1 \ 171 create_Mode(TILEGX_X_MODE) | create_Opcode_X1(RRR_0_OPCODE_X1) | \ 172 create_RRROpcodeExtension_X1(OR_RRR_0_OPCODE_X1) | FNOP_X0 173 174 #define AND_X1 \ 175 create_Mode(TILEGX_X_MODE) | create_Opcode_X1(RRR_0_OPCODE_X1) | \ 176 create_RRROpcodeExtension_X1(AND_RRR_0_OPCODE_X1) | FNOP_X0 177 178 #define XOR_X1 \ 179 create_Mode(TILEGX_X_MODE) | create_Opcode_X1(RRR_0_OPCODE_X1) | \ 180 create_RRROpcodeExtension_X1(XOR_RRR_0_OPCODE_X1) | FNOP_X0 181 182 #define CMOVNEZ_X0 \ 183 create_Mode(TILEGX_X_MODE) | create_Opcode_X0(RRR_0_OPCODE_X0) | \ 184 create_RRROpcodeExtension_X0(CMOVNEZ_RRR_0_OPCODE_X0) | FNOP_X1 185 186 #define CMOVEQZ_X0 \ 187 create_Mode(TILEGX_X_MODE) | create_Opcode_X0(RRR_0_OPCODE_X0) | \ 188 create_RRROpcodeExtension_X0(CMOVEQZ_RRR_0_OPCODE_X0) | FNOP_X1 189 190 #define ADDLI_X1 \ 191 create_Mode(TILEGX_X_MODE) | create_Opcode_X1(ADDLI_OPCODE_X1) | FNOP_X0 192 193 #define V4INT_L_X1 \ 194 create_Mode(TILEGX_X_MODE) | create_Opcode_X1(RRR_0_OPCODE_X1) | \ 195 create_RRROpcodeExtension_X1(V4INT_L_RRR_0_OPCODE_X1) | FNOP_X0 196 197 #define BFEXTU_X0 \ 198 create_Mode(TILEGX_X_MODE) | create_Opcode_X0(BF_OPCODE_X0) | \ 199 create_BFOpcodeExtension_X0(BFEXTU_BF_OPCODE_X0) | FNOP_X1 200 201 #define BFEXTS_X0 \ 202 create_Mode(TILEGX_X_MODE) | create_Opcode_X0(BF_OPCODE_X0) | \ 203 create_BFOpcodeExtension_X0(BFEXTS_BF_OPCODE_X0) | FNOP_X1 204 205 #define SHL16INSLI_X1 \ 206 create_Mode(TILEGX_X_MODE) | create_Opcode_X1(SHL16INSLI_OPCODE_X1) | FNOP_X0 207 208 #define ST_X1 \ 209 create_Mode(TILEGX_X_MODE) | create_Opcode_X1(RRR_0_OPCODE_X1) | \ 210 create_RRROpcodeExtension_X1(ST_RRR_0_OPCODE_X1) | create_Dest_X1(0x0) | FNOP_X0 211 212 #define LD_X1 \ 213 create_Mode(TILEGX_X_MODE) | create_Opcode_X1(RRR_0_OPCODE_X1) | \ 214 create_RRROpcodeExtension_X1(UNARY_RRR_0_OPCODE_X1) | \ 215 create_UnaryOpcodeExtension_X1(LD_UNARY_OPCODE_X1) | FNOP_X0 216 217 #define JR_X1 \ 218 create_Mode(TILEGX_X_MODE) | create_Opcode_X1(RRR_0_OPCODE_X1) | \ 219 create_RRROpcodeExtension_X1(UNARY_RRR_0_OPCODE_X1) | \ 220 create_UnaryOpcodeExtension_X1(JR_UNARY_OPCODE_X1) | FNOP_X0 221 222 #define JALR_X1 \ 223 create_Mode(TILEGX_X_MODE) | create_Opcode_X1(RRR_0_OPCODE_X1) | \ 224 create_RRROpcodeExtension_X1(UNARY_RRR_0_OPCODE_X1) | \ 225 create_UnaryOpcodeExtension_X1(JALR_UNARY_OPCODE_X1) | FNOP_X0 226 227 #define CLZ_X0 \ 228 create_Mode(TILEGX_X_MODE) | create_Opcode_X0(RRR_0_OPCODE_X0) | \ 229 create_RRROpcodeExtension_X0(UNARY_RRR_0_OPCODE_X0) | \ 230 create_UnaryOpcodeExtension_X0(CNTLZ_UNARY_OPCODE_X0) | FNOP_X1 231 232 #define CMPLTUI_X1 \ 233 create_Mode(TILEGX_X_MODE) | create_Opcode_X1(IMM8_OPCODE_X1) | \ 234 create_Imm8OpcodeExtension_X1(CMPLTUI_IMM8_OPCODE_X1) | FNOP_X0 235 236 #define CMPLTU_X1 \ 237 create_Mode(TILEGX_X_MODE) | create_Opcode_X1(RRR_0_OPCODE_X1) | \ 238 create_RRROpcodeExtension_X1(CMPLTU_RRR_0_OPCODE_X1) | FNOP_X0 239 240 #define CMPLTS_X1 \ 241 create_Mode(TILEGX_X_MODE) | create_Opcode_X1(RRR_0_OPCODE_X1) | \ 242 create_RRROpcodeExtension_X1(CMPLTS_RRR_0_OPCODE_X1) | FNOP_X0 243 244 #define XORI_X1 \ 245 create_Mode(TILEGX_X_MODE) | create_Opcode_X1(IMM8_OPCODE_X1) | \ 246 create_Imm8OpcodeExtension_X1(XORI_IMM8_OPCODE_X1) | FNOP_X0 247 248 #define ORI_X1 \ 249 create_Mode(TILEGX_X_MODE) | create_Opcode_X1(IMM8_OPCODE_X1) | \ 250 create_Imm8OpcodeExtension_X1(ORI_IMM8_OPCODE_X1) | FNOP_X0 251 252 #define ANDI_X1 \ 253 create_Mode(TILEGX_X_MODE) | create_Opcode_X1(IMM8_OPCODE_X1) | \ 254 create_Imm8OpcodeExtension_X1(ANDI_IMM8_OPCODE_X1) | FNOP_X0 255 256 #define SHLI_X1 \ 257 create_Mode(TILEGX_X_MODE) | create_Opcode_X1(SHIFT_OPCODE_X1) | \ 258 create_ShiftOpcodeExtension_X1(SHLI_SHIFT_OPCODE_X1) | FNOP_X0 259 260 #define SHL_X1 \ 261 create_Mode(TILEGX_X_MODE) | create_Opcode_X1(RRR_0_OPCODE_X1) | \ 262 create_RRROpcodeExtension_X1(SHL_RRR_0_OPCODE_X1) | FNOP_X0 263 264 #define SHRSI_X1 \ 265 create_Mode(TILEGX_X_MODE) | create_Opcode_X1(SHIFT_OPCODE_X1) | \ 266 create_ShiftOpcodeExtension_X1(SHRSI_SHIFT_OPCODE_X1) | FNOP_X0 267 268 #define SHRS_X1 \ 269 create_Mode(TILEGX_X_MODE) | create_Opcode_X1(RRR_0_OPCODE_X1) | \ 270 create_RRROpcodeExtension_X1(SHRS_RRR_0_OPCODE_X1) | FNOP_X0 271 272 #define SHRUI_X1 \ 273 create_Mode(TILEGX_X_MODE) | create_Opcode_X1(SHIFT_OPCODE_X1) | \ 274 create_ShiftOpcodeExtension_X1(SHRUI_SHIFT_OPCODE_X1) | FNOP_X0 275 276 #define SHRU_X1 \ 277 create_Mode(TILEGX_X_MODE) | create_Opcode_X1(RRR_0_OPCODE_X1) | \ 278 create_RRROpcodeExtension_X1(SHRU_RRR_0_OPCODE_X1) | FNOP_X0 279 280 #define BEQZ_X1 \ 281 create_Mode(TILEGX_X_MODE) | create_Opcode_X1(BRANCH_OPCODE_X1) | \ 282 create_BrType_X1(BEQZ_BRANCH_OPCODE_X1) | FNOP_X0 283 284 #define BNEZ_X1 \ 285 create_Mode(TILEGX_X_MODE) | create_Opcode_X1(BRANCH_OPCODE_X1) | \ 286 create_BrType_X1(BNEZ_BRANCH_OPCODE_X1) | FNOP_X0 287 288 #define J_X1 \ 289 create_Mode(TILEGX_X_MODE) | create_Opcode_X1(JUMP_OPCODE_X1) | \ 290 create_JumpOpcodeExtension_X1(J_JUMP_OPCODE_X1) | FNOP_X0 291 292 #define JAL_X1 \ 293 create_Mode(TILEGX_X_MODE) | create_Opcode_X1(JUMP_OPCODE_X1) | \ 294 create_JumpOpcodeExtension_X1(JAL_JUMP_OPCODE_X1) | FNOP_X0 295 296 #define DEST_X0(x) create_Dest_X0(x) 297 #define SRCA_X0(x) create_SrcA_X0(x) 298 #define SRCB_X0(x) create_SrcB_X0(x) 299 #define DEST_X1(x) create_Dest_X1(x) 300 #define SRCA_X1(x) create_SrcA_X1(x) 301 #define SRCB_X1(x) create_SrcB_X1(x) 302 #define IMM16_X1(x) create_Imm16_X1(x) 303 #define IMM8_X1(x) create_Imm8_X1(x) 304 #define BFSTART_X0(x) create_BFStart_X0(x) 305 #define BFEND_X0(x) create_BFEnd_X0(x) 306 #define SHIFTIMM_X1(x) create_ShAmt_X1(x) 307 #define JOFF_X1(x) create_JumpOff_X1(x) 308 #define BOFF_X1(x) create_BrOff_X1(x) 309 310 static const tilegx_mnemonic data_transfer_insts[16] = { 311 /* u w s */ TILEGX_OPC_ST /* st */, 312 /* u w l */ TILEGX_OPC_LD /* ld */, 313 /* u b s */ TILEGX_OPC_ST1 /* st1 */, 314 /* u b l */ TILEGX_OPC_LD1U /* ld1u */, 315 /* u h s */ TILEGX_OPC_ST2 /* st2 */, 316 /* u h l */ TILEGX_OPC_LD2U /* ld2u */, 317 /* u i s */ TILEGX_OPC_ST4 /* st4 */, 318 /* u i l */ TILEGX_OPC_LD4U /* ld4u */, 319 /* s w s */ TILEGX_OPC_ST /* st */, 320 /* s w l */ TILEGX_OPC_LD /* ld */, 321 /* s b s */ TILEGX_OPC_ST1 /* st1 */, 322 /* s b l */ TILEGX_OPC_LD1S /* ld1s */, 323 /* s h s */ TILEGX_OPC_ST2 /* st2 */, 324 /* s h l */ TILEGX_OPC_LD2S /* ld2s */, 325 /* s i s */ TILEGX_OPC_ST4 /* st4 */, 326 /* s i l */ TILEGX_OPC_LD4S /* ld4s */, 327 }; 328 329 #ifdef TILEGX_JIT_DEBUG 330 static sljit_s32 push_inst_debug(struct sljit_compiler *compiler, sljit_ins ins, int line) 331 { 332 sljit_ins *ptr = (sljit_ins *)ensure_buf(compiler, sizeof(sljit_ins)); 333 FAIL_IF(!ptr); 334 *ptr = ins; 335 compiler->size++; 336 printf("|%04d|S0|:\t\t", line); 337 print_insn_tilegx(ptr); 338 return SLJIT_SUCCESS; 339 } 340 341 static sljit_s32 push_inst_nodebug(struct sljit_compiler *compiler, sljit_ins ins) 342 { 343 sljit_ins *ptr = (sljit_ins *)ensure_buf(compiler, sizeof(sljit_ins)); 344 FAIL_IF(!ptr); 345 *ptr = ins; 346 compiler->size++; 347 return SLJIT_SUCCESS; 348 } 349 350 #define push_inst(a, b) push_inst_debug(a, b, __LINE__) 351 #else 352 static sljit_s32 push_inst(struct sljit_compiler *compiler, sljit_ins ins) 353 { 354 sljit_ins *ptr = (sljit_ins *)ensure_buf(compiler, sizeof(sljit_ins)); 355 FAIL_IF(!ptr); 356 *ptr = ins; 357 compiler->size++; 358 return SLJIT_SUCCESS; 359 } 360 #endif 361 362 #define BUNDLE_FORMAT_MASK(p0, p1, p2) \ 363 ((p0) | ((p1) << 8) | ((p2) << 16)) 364 365 #define BUNDLE_FORMAT(p0, p1, p2) \ 366 { \ 367 { \ 368 (tilegx_pipeline)(p0), \ 369 (tilegx_pipeline)(p1), \ 370 (tilegx_pipeline)(p2) \ 371 }, \ 372 BUNDLE_FORMAT_MASK(1 << (p0), 1 << (p1), (1 << (p2))) \ 373 } 374 375 #define NO_PIPELINE TILEGX_NUM_PIPELINE_ENCODINGS 376 377 #define tilegx_is_x_pipeline(p) ((int)(p) <= (int)TILEGX_PIPELINE_X1) 378 379 #define PI(encoding) \ 380 push_inst(compiler, encoding) 381 382 #define PB3(opcode, dst, srca, srcb) \ 383 push_3_buffer(compiler, opcode, dst, srca, srcb, __LINE__) 384 385 #define PB2(opcode, dst, src) \ 386 push_2_buffer(compiler, opcode, dst, src, __LINE__) 387 388 #define JR(reg) \ 389 push_jr_buffer(compiler, TILEGX_OPC_JR, reg, __LINE__) 390 391 #define ADD(dst, srca, srcb) \ 392 push_3_buffer(compiler, TILEGX_OPC_ADD, dst, srca, srcb, __LINE__) 393 394 #define SUB(dst, srca, srcb) \ 395 push_3_buffer(compiler, TILEGX_OPC_SUB, dst, srca, srcb, __LINE__) 396 397 #define MUL(dst, srca, srcb) \ 398 push_3_buffer(compiler, TILEGX_OPC_MULX, dst, srca, srcb, __LINE__) 399 400 #define NOR(dst, srca, srcb) \ 401 push_3_buffer(compiler, TILEGX_OPC_NOR, dst, srca, srcb, __LINE__) 402 403 #define OR(dst, srca, srcb) \ 404 push_3_buffer(compiler, TILEGX_OPC_OR, dst, srca, srcb, __LINE__) 405 406 #define XOR(dst, srca, srcb) \ 407 push_3_buffer(compiler, TILEGX_OPC_XOR, dst, srca, srcb, __LINE__) 408 409 #define AND(dst, srca, srcb) \ 410 push_3_buffer(compiler, TILEGX_OPC_AND, dst, srca, srcb, __LINE__) 411 412 #define CLZ(dst, src) \ 413 push_2_buffer(compiler, TILEGX_OPC_CLZ, dst, src, __LINE__) 414 415 #define SHLI(dst, srca, srcb) \ 416 push_3_buffer(compiler, TILEGX_OPC_SHLI, dst, srca, srcb, __LINE__) 417 418 #define SHRUI(dst, srca, imm) \ 419 push_3_buffer(compiler, TILEGX_OPC_SHRUI, dst, srca, imm, __LINE__) 420 421 #define XORI(dst, srca, imm) \ 422 push_3_buffer(compiler, TILEGX_OPC_XORI, dst, srca, imm, __LINE__) 423 424 #define ORI(dst, srca, imm) \ 425 push_3_buffer(compiler, TILEGX_OPC_ORI, dst, srca, imm, __LINE__) 426 427 #define CMPLTU(dst, srca, srcb) \ 428 push_3_buffer(compiler, TILEGX_OPC_CMPLTU, dst, srca, srcb, __LINE__) 429 430 #define CMPLTS(dst, srca, srcb) \ 431 push_3_buffer(compiler, TILEGX_OPC_CMPLTS, dst, srca, srcb, __LINE__) 432 433 #define CMPLTUI(dst, srca, imm) \ 434 push_3_buffer(compiler, TILEGX_OPC_CMPLTUI, dst, srca, imm, __LINE__) 435 436 #define CMOVNEZ(dst, srca, srcb) \ 437 push_3_buffer(compiler, TILEGX_OPC_CMOVNEZ, dst, srca, srcb, __LINE__) 438 439 #define CMOVEQZ(dst, srca, srcb) \ 440 push_3_buffer(compiler, TILEGX_OPC_CMOVEQZ, dst, srca, srcb, __LINE__) 441 442 #define ADDLI(dst, srca, srcb) \ 443 push_3_buffer(compiler, TILEGX_OPC_ADDLI, dst, srca, srcb, __LINE__) 444 445 #define SHL16INSLI(dst, srca, srcb) \ 446 push_3_buffer(compiler, TILEGX_OPC_SHL16INSLI, dst, srca, srcb, __LINE__) 447 448 #define LD_ADD(dst, addr, adjust) \ 449 push_3_buffer(compiler, TILEGX_OPC_LD_ADD, dst, addr, adjust, __LINE__) 450 451 #define ST_ADD(src, addr, adjust) \ 452 push_3_buffer(compiler, TILEGX_OPC_ST_ADD, src, addr, adjust, __LINE__) 453 454 #define LD(dst, addr) \ 455 push_2_buffer(compiler, TILEGX_OPC_LD, dst, addr, __LINE__) 456 457 #define BFEXTU(dst, src, start, end) \ 458 push_4_buffer(compiler, TILEGX_OPC_BFEXTU, dst, src, start, end, __LINE__) 459 460 #define BFEXTS(dst, src, start, end) \ 461 push_4_buffer(compiler, TILEGX_OPC_BFEXTS, dst, src, start, end, __LINE__) 462 463 #define ADD_SOLO(dest, srca, srcb) \ 464 push_inst(compiler, ADD_X1 | DEST_X1(dest) | SRCA_X1(srca) | SRCB_X1(srcb)) 465 466 #define ADDI_SOLO(dest, srca, imm) \ 467 push_inst(compiler, ADDI_X1 | DEST_X1(dest) | SRCA_X1(srca) | IMM8_X1(imm)) 468 469 #define ADDLI_SOLO(dest, srca, imm) \ 470 push_inst(compiler, ADDLI_X1 | DEST_X1(dest) | SRCA_X1(srca) | IMM16_X1(imm)) 471 472 #define SHL16INSLI_SOLO(dest, srca, imm) \ 473 push_inst(compiler, SHL16INSLI_X1 | DEST_X1(dest) | SRCA_X1(srca) | IMM16_X1(imm)) 474 475 #define JALR_SOLO(reg) \ 476 push_inst(compiler, JALR_X1 | SRCA_X1(reg)) 477 478 #define JR_SOLO(reg) \ 479 push_inst(compiler, JR_X1 | SRCA_X1(reg)) 480 481 struct Format { 482 /* Mapping of bundle issue slot to assigned pipe. */ 483 tilegx_pipeline pipe[TILEGX_MAX_INSTRUCTIONS_PER_BUNDLE]; 484 485 /* Mask of pipes used by this bundle. */ 486 unsigned int pipe_mask; 487 }; 488 489 const struct Format formats[] = 490 { 491 /* In Y format we must always have something in Y2, since it has 492 * no fnop, so this conveys that Y2 must always be used. */ 493 BUNDLE_FORMAT(TILEGX_PIPELINE_Y0, TILEGX_PIPELINE_Y2, NO_PIPELINE), 494 BUNDLE_FORMAT(TILEGX_PIPELINE_Y1, TILEGX_PIPELINE_Y2, NO_PIPELINE), 495 BUNDLE_FORMAT(TILEGX_PIPELINE_Y2, TILEGX_PIPELINE_Y0, NO_PIPELINE), 496 BUNDLE_FORMAT(TILEGX_PIPELINE_Y2, TILEGX_PIPELINE_Y1, NO_PIPELINE), 497 498 /* Y format has three instructions. */ 499 BUNDLE_FORMAT(TILEGX_PIPELINE_Y0, TILEGX_PIPELINE_Y1, TILEGX_PIPELINE_Y2), 500 BUNDLE_FORMAT(TILEGX_PIPELINE_Y0, TILEGX_PIPELINE_Y2, TILEGX_PIPELINE_Y1), 501 BUNDLE_FORMAT(TILEGX_PIPELINE_Y1, TILEGX_PIPELINE_Y0, TILEGX_PIPELINE_Y2), 502 BUNDLE_FORMAT(TILEGX_PIPELINE_Y1, TILEGX_PIPELINE_Y2, TILEGX_PIPELINE_Y0), 503 BUNDLE_FORMAT(TILEGX_PIPELINE_Y2, TILEGX_PIPELINE_Y0, TILEGX_PIPELINE_Y1), 504 BUNDLE_FORMAT(TILEGX_PIPELINE_Y2, TILEGX_PIPELINE_Y1, TILEGX_PIPELINE_Y0), 505 506 /* X format has only two instructions. */ 507 BUNDLE_FORMAT(TILEGX_PIPELINE_X0, TILEGX_PIPELINE_X1, NO_PIPELINE), 508 BUNDLE_FORMAT(TILEGX_PIPELINE_X1, TILEGX_PIPELINE_X0, NO_PIPELINE) 509 }; 510 511 512 struct jit_instr inst_buf[TILEGX_MAX_INSTRUCTIONS_PER_BUNDLE]; 513 unsigned long inst_buf_index; 514 515 tilegx_pipeline get_any_valid_pipe(const struct tilegx_opcode* opcode) 516 { 517 /* FIXME: tile: we could pregenerate this. */ 518 int pipe; 519 for (pipe = 0; ((opcode->pipes & (1 << pipe)) == 0 && pipe < TILEGX_NUM_PIPELINE_ENCODINGS); pipe++) 520 ; 521 return (tilegx_pipeline)(pipe); 522 } 523 524 void insert_nop(tilegx_mnemonic opc, int line) 525 { 526 const struct tilegx_opcode* opcode = NULL; 527 528 memmove(&inst_buf[1], &inst_buf[0], inst_buf_index * sizeof inst_buf[0]); 529 530 opcode = &tilegx_opcodes[opc]; 531 inst_buf[0].opcode = opcode; 532 inst_buf[0].pipe = get_any_valid_pipe(opcode); 533 inst_buf[0].input_registers = 0; 534 inst_buf[0].output_registers = 0; 535 inst_buf[0].line = line; 536 ++inst_buf_index; 537 } 538 539 const struct Format* compute_format() 540 { 541 unsigned int compatible_pipes = BUNDLE_FORMAT_MASK( 542 inst_buf[0].opcode->pipes, 543 inst_buf[1].opcode->pipes, 544 (inst_buf_index == 3 ? inst_buf[2].opcode->pipes : (1 << NO_PIPELINE))); 545 546 const struct Format* match = NULL; 547 const struct Format *b = NULL; 548 unsigned int i; 549 for (i = 0; i < sizeof formats / sizeof formats[0]; i++) { 550 b = &formats[i]; 551 if ((b->pipe_mask & compatible_pipes) == b->pipe_mask) { 552 match = b; 553 break; 554 } 555 } 556 557 return match; 558 } 559 560 sljit_s32 assign_pipes() 561 { 562 unsigned long output_registers = 0; 563 unsigned int i = 0; 564 565 if (inst_buf_index == 1) { 566 tilegx_mnemonic opc = inst_buf[0].opcode->can_bundle 567 ? TILEGX_OPC_FNOP : TILEGX_OPC_NOP; 568 insert_nop(opc, __LINE__); 569 } 570 571 const struct Format* match = compute_format(); 572 573 if (match == NULL) 574 return -1; 575 576 for (i = 0; i < inst_buf_index; i++) { 577 578 if ((i > 0) && ((inst_buf[i].input_registers & output_registers) != 0)) 579 return -1; 580 581 if ((i > 0) && ((inst_buf[i].output_registers & output_registers) != 0)) 582 return -1; 583 584 /* Don't include Rzero in the match set, to avoid triggering 585 needlessly on 'prefetch' instrs. */ 586 587 output_registers |= inst_buf[i].output_registers & 0xFFFFFFFFFFFFFFL; 588 589 inst_buf[i].pipe = match->pipe[i]; 590 } 591 592 /* If only 2 instrs, and in Y-mode, insert a nop. */ 593 if (inst_buf_index == 2 && !tilegx_is_x_pipeline(match->pipe[0])) { 594 insert_nop(TILEGX_OPC_FNOP, __LINE__); 595 596 /* Select the yet unassigned pipe. */ 597 tilegx_pipeline pipe = (tilegx_pipeline)(((TILEGX_PIPELINE_Y0 598 + TILEGX_PIPELINE_Y1 + TILEGX_PIPELINE_Y2) 599 - (inst_buf[1].pipe + inst_buf[2].pipe))); 600 601 inst_buf[0].pipe = pipe; 602 } 603 604 return 0; 605 } 606 607 tilegx_bundle_bits get_bundle_bit(struct jit_instr *inst) 608 { 609 int i, val; 610 const struct tilegx_opcode* opcode = inst->opcode; 611 tilegx_bundle_bits bits = opcode->fixed_bit_values[inst->pipe]; 612 613 const struct tilegx_operand* operand = NULL; 614 for (i = 0; i < opcode->num_operands; i++) { 615 operand = &tilegx_operands[opcode->operands[inst->pipe][i]]; 616 val = inst->operand_value[i]; 617 618 bits |= operand->insert(val); 619 } 620 621 return bits; 622 } 623 624 static sljit_s32 update_buffer(struct sljit_compiler *compiler) 625 { 626 int i; 627 int orig_index = inst_buf_index; 628 struct jit_instr inst0 = inst_buf[0]; 629 struct jit_instr inst1 = inst_buf[1]; 630 struct jit_instr inst2 = inst_buf[2]; 631 tilegx_bundle_bits bits = 0; 632 633 /* If the bundle is valid as is, perform the encoding and return 1. */ 634 if (assign_pipes() == 0) { 635 for (i = 0; i < inst_buf_index; i++) { 636 bits |= get_bundle_bit(inst_buf + i); 637 #ifdef TILEGX_JIT_DEBUG 638 printf("|%04d", inst_buf[i].line); 639 #endif 640 } 641 #ifdef TILEGX_JIT_DEBUG 642 if (inst_buf_index == 3) 643 printf("|M0|:\t"); 644 else 645 printf("|M0|:\t\t"); 646 print_insn_tilegx(&bits); 647 #endif 648 649 inst_buf_index = 0; 650 651 #ifdef TILEGX_JIT_DEBUG 652 return push_inst_nodebug(compiler, bits); 653 #else 654 return push_inst(compiler, bits); 655 #endif 656 } 657 658 /* If the bundle is invalid, split it in two. First encode the first two 659 (or possibly 1) instructions, and then the last, separately. Note that 660 assign_pipes may have re-ordered the instrs (by inserting no-ops in 661 lower slots) so we need to reset them. */ 662 663 inst_buf_index = orig_index - 1; 664 inst_buf[0] = inst0; 665 inst_buf[1] = inst1; 666 inst_buf[2] = inst2; 667 if (assign_pipes() == 0) { 668 for (i = 0; i < inst_buf_index; i++) { 669 bits |= get_bundle_bit(inst_buf + i); 670 #ifdef TILEGX_JIT_DEBUG 671 printf("|%04d", inst_buf[i].line); 672 #endif 673 } 674 675 #ifdef TILEGX_JIT_DEBUG 676 if (inst_buf_index == 3) 677 printf("|M1|:\t"); 678 else 679 printf("|M1|:\t\t"); 680 print_insn_tilegx(&bits); 681 #endif 682 683 if ((orig_index - 1) == 2) { 684 inst_buf[0] = inst2; 685 inst_buf_index = 1; 686 } else if ((orig_index - 1) == 1) { 687 inst_buf[0] = inst1; 688 inst_buf_index = 1; 689 } else 690 SLJIT_ASSERT_STOP(); 691 692 #ifdef TILEGX_JIT_DEBUG 693 return push_inst_nodebug(compiler, bits); 694 #else 695 return push_inst(compiler, bits); 696 #endif 697 } else { 698 /* We had 3 instrs of which the first 2 can't live in the same bundle. 699 Split those two. Note that we don't try to then combine the second 700 and third instr into a single bundle. First instruction: */ 701 inst_buf_index = 1; 702 inst_buf[0] = inst0; 703 inst_buf[1] = inst1; 704 inst_buf[2] = inst2; 705 if (assign_pipes() == 0) { 706 for (i = 0; i < inst_buf_index; i++) { 707 bits |= get_bundle_bit(inst_buf + i); 708 #ifdef TILEGX_JIT_DEBUG 709 printf("|%04d", inst_buf[i].line); 710 #endif 711 } 712 713 #ifdef TILEGX_JIT_DEBUG 714 if (inst_buf_index == 3) 715 printf("|M2|:\t"); 716 else 717 printf("|M2|:\t\t"); 718 print_insn_tilegx(&bits); 719 #endif 720 721 inst_buf[0] = inst1; 722 inst_buf[1] = inst2; 723 inst_buf_index = orig_index - 1; 724 #ifdef TILEGX_JIT_DEBUG 725 return push_inst_nodebug(compiler, bits); 726 #else 727 return push_inst(compiler, bits); 728 #endif 729 } else 730 SLJIT_ASSERT_STOP(); 731 } 732 733 SLJIT_ASSERT_STOP(); 734 } 735 736 static sljit_s32 flush_buffer(struct sljit_compiler *compiler) 737 { 738 while (inst_buf_index != 0) { 739 FAIL_IF(update_buffer(compiler)); 740 } 741 return SLJIT_SUCCESS; 742 } 743 744 static sljit_s32 push_4_buffer(struct sljit_compiler *compiler, tilegx_mnemonic opc, int op0, int op1, int op2, int op3, int line) 745 { 746 if (inst_buf_index == TILEGX_MAX_INSTRUCTIONS_PER_BUNDLE) 747 FAIL_IF(update_buffer(compiler)); 748 749 const struct tilegx_opcode* opcode = &tilegx_opcodes[opc]; 750 inst_buf[inst_buf_index].opcode = opcode; 751 inst_buf[inst_buf_index].pipe = get_any_valid_pipe(opcode); 752 inst_buf[inst_buf_index].operand_value[0] = op0; 753 inst_buf[inst_buf_index].operand_value[1] = op1; 754 inst_buf[inst_buf_index].operand_value[2] = op2; 755 inst_buf[inst_buf_index].operand_value[3] = op3; 756 inst_buf[inst_buf_index].input_registers = 1L << op1; 757 inst_buf[inst_buf_index].output_registers = 1L << op0; 758 inst_buf[inst_buf_index].line = line; 759 inst_buf_index++; 760 761 return SLJIT_SUCCESS; 762 } 763 764 static sljit_s32 push_3_buffer(struct sljit_compiler *compiler, tilegx_mnemonic opc, int op0, int op1, int op2, int line) 765 { 766 if (inst_buf_index == TILEGX_MAX_INSTRUCTIONS_PER_BUNDLE) 767 FAIL_IF(update_buffer(compiler)); 768 769 const struct tilegx_opcode* opcode = &tilegx_opcodes[opc]; 770 inst_buf[inst_buf_index].opcode = opcode; 771 inst_buf[inst_buf_index].pipe = get_any_valid_pipe(opcode); 772 inst_buf[inst_buf_index].operand_value[0] = op0; 773 inst_buf[inst_buf_index].operand_value[1] = op1; 774 inst_buf[inst_buf_index].operand_value[2] = op2; 775 inst_buf[inst_buf_index].line = line; 776 777 switch (opc) { 778 case TILEGX_OPC_ST_ADD: 779 inst_buf[inst_buf_index].input_registers = (1L << op0) | (1L << op1); 780 inst_buf[inst_buf_index].output_registers = 1L << op0; 781 break; 782 case TILEGX_OPC_LD_ADD: 783 inst_buf[inst_buf_index].input_registers = 1L << op1; 784 inst_buf[inst_buf_index].output_registers = (1L << op0) | (1L << op1); 785 break; 786 case TILEGX_OPC_ADD: 787 case TILEGX_OPC_AND: 788 case TILEGX_OPC_SUB: 789 case TILEGX_OPC_MULX: 790 case TILEGX_OPC_OR: 791 case TILEGX_OPC_XOR: 792 case TILEGX_OPC_NOR: 793 case TILEGX_OPC_SHL: 794 case TILEGX_OPC_SHRU: 795 case TILEGX_OPC_SHRS: 796 case TILEGX_OPC_CMPLTU: 797 case TILEGX_OPC_CMPLTS: 798 case TILEGX_OPC_CMOVEQZ: 799 case TILEGX_OPC_CMOVNEZ: 800 inst_buf[inst_buf_index].input_registers = (1L << op1) | (1L << op2); 801 inst_buf[inst_buf_index].output_registers = 1L << op0; 802 break; 803 case TILEGX_OPC_ADDLI: 804 case TILEGX_OPC_XORI: 805 case TILEGX_OPC_ORI: 806 case TILEGX_OPC_SHLI: 807 case TILEGX_OPC_SHRUI: 808 case TILEGX_OPC_SHRSI: 809 case TILEGX_OPC_SHL16INSLI: 810 case TILEGX_OPC_CMPLTUI: 811 case TILEGX_OPC_CMPLTSI: 812 inst_buf[inst_buf_index].input_registers = 1L << op1; 813 inst_buf[inst_buf_index].output_registers = 1L << op0; 814 break; 815 default: 816 printf("unrecoginzed opc: %s\n", opcode->name); 817 SLJIT_ASSERT_STOP(); 818 } 819 820 inst_buf_index++; 821 822 return SLJIT_SUCCESS; 823 } 824 825 static sljit_s32 push_2_buffer(struct sljit_compiler *compiler, tilegx_mnemonic opc, int op0, int op1, int line) 826 { 827 if (inst_buf_index == TILEGX_MAX_INSTRUCTIONS_PER_BUNDLE) 828 FAIL_IF(update_buffer(compiler)); 829 830 const struct tilegx_opcode* opcode = &tilegx_opcodes[opc]; 831 inst_buf[inst_buf_index].opcode = opcode; 832 inst_buf[inst_buf_index].pipe = get_any_valid_pipe(opcode); 833 inst_buf[inst_buf_index].operand_value[0] = op0; 834 inst_buf[inst_buf_index].operand_value[1] = op1; 835 inst_buf[inst_buf_index].line = line; 836 837 switch (opc) { 838 case TILEGX_OPC_BEQZ: 839 case TILEGX_OPC_BNEZ: 840 inst_buf[inst_buf_index].input_registers = 1L << op0; 841 break; 842 case TILEGX_OPC_ST: 843 case TILEGX_OPC_ST1: 844 case TILEGX_OPC_ST2: 845 case TILEGX_OPC_ST4: 846 inst_buf[inst_buf_index].input_registers = (1L << op0) | (1L << op1); 847 inst_buf[inst_buf_index].output_registers = 0; 848 break; 849 case TILEGX_OPC_CLZ: 850 case TILEGX_OPC_LD: 851 case TILEGX_OPC_LD1U: 852 case TILEGX_OPC_LD1S: 853 case TILEGX_OPC_LD2U: 854 case TILEGX_OPC_LD2S: 855 case TILEGX_OPC_LD4U: 856 case TILEGX_OPC_LD4S: 857 inst_buf[inst_buf_index].input_registers = 1L << op1; 858 inst_buf[inst_buf_index].output_registers = 1L << op0; 859 break; 860 default: 861 printf("unrecoginzed opc: %s\n", opcode->name); 862 SLJIT_ASSERT_STOP(); 863 } 864 865 inst_buf_index++; 866 867 return SLJIT_SUCCESS; 868 } 869 870 static sljit_s32 push_0_buffer(struct sljit_compiler *compiler, tilegx_mnemonic opc, int line) 871 { 872 if (inst_buf_index == TILEGX_MAX_INSTRUCTIONS_PER_BUNDLE) 873 FAIL_IF(update_buffer(compiler)); 874 875 const struct tilegx_opcode* opcode = &tilegx_opcodes[opc]; 876 inst_buf[inst_buf_index].opcode = opcode; 877 inst_buf[inst_buf_index].pipe = get_any_valid_pipe(opcode); 878 inst_buf[inst_buf_index].input_registers = 0; 879 inst_buf[inst_buf_index].output_registers = 0; 880 inst_buf[inst_buf_index].line = line; 881 inst_buf_index++; 882 883 return SLJIT_SUCCESS; 884 } 885 886 static sljit_s32 push_jr_buffer(struct sljit_compiler *compiler, tilegx_mnemonic opc, int op0, int line) 887 { 888 if (inst_buf_index == TILEGX_MAX_INSTRUCTIONS_PER_BUNDLE) 889 FAIL_IF(update_buffer(compiler)); 890 891 const struct tilegx_opcode* opcode = &tilegx_opcodes[opc]; 892 inst_buf[inst_buf_index].opcode = opcode; 893 inst_buf[inst_buf_index].pipe = get_any_valid_pipe(opcode); 894 inst_buf[inst_buf_index].operand_value[0] = op0; 895 inst_buf[inst_buf_index].input_registers = 1L << op0; 896 inst_buf[inst_buf_index].output_registers = 0; 897 inst_buf[inst_buf_index].line = line; 898 inst_buf_index++; 899 900 return flush_buffer(compiler); 901 } 902 903 static SLJIT_INLINE sljit_ins * detect_jump_type(struct sljit_jump *jump, sljit_ins *code_ptr, sljit_ins *code) 904 { 905 sljit_sw diff; 906 sljit_uw target_addr; 907 sljit_ins *inst; 908 909 if (jump->flags & SLJIT_REWRITABLE_JUMP) 910 return code_ptr; 911 912 if (jump->flags & JUMP_ADDR) 913 target_addr = jump->u.target; 914 else { 915 SLJIT_ASSERT(jump->flags & JUMP_LABEL); 916 target_addr = (sljit_uw)(code + jump->u.label->size); 917 } 918 919 inst = (sljit_ins *)jump->addr; 920 if (jump->flags & IS_COND) 921 inst--; 922 923 diff = ((sljit_sw) target_addr - (sljit_sw) inst) >> 3; 924 if (diff <= SIMM_17BIT_MAX && diff >= SIMM_17BIT_MIN) { 925 jump->flags |= PATCH_B; 926 927 if (!(jump->flags & IS_COND)) { 928 if (jump->flags & IS_JAL) { 929 jump->flags &= ~(PATCH_B); 930 jump->flags |= PATCH_J; 931 inst[0] = JAL_X1; 932 933 #ifdef TILEGX_JIT_DEBUG 934 printf("[runtime relocate]%04d:\t", __LINE__); 935 print_insn_tilegx(inst); 936 #endif 937 } else { 938 inst[0] = BEQZ_X1 | SRCA_X1(ZERO); 939 940 #ifdef TILEGX_JIT_DEBUG 941 printf("[runtime relocate]%04d:\t", __LINE__); 942 print_insn_tilegx(inst); 943 #endif 944 } 945 946 return inst; 947 } 948 949 inst[0] = inst[0] ^ (0x7L << 55); 950 951 #ifdef TILEGX_JIT_DEBUG 952 printf("[runtime relocate]%04d:\t", __LINE__); 953 print_insn_tilegx(inst); 954 #endif 955 jump->addr -= sizeof(sljit_ins); 956 return inst; 957 } 958 959 if (jump->flags & IS_COND) { 960 if ((target_addr & ~0x3FFFFFFFL) == ((jump->addr + sizeof(sljit_ins)) & ~0x3FFFFFFFL)) { 961 jump->flags |= PATCH_J; 962 inst[0] = (inst[0] & ~(BOFF_X1(-1))) | BOFF_X1(2); 963 inst[1] = J_X1; 964 return inst + 1; 965 } 966 967 return code_ptr; 968 } 969 970 if ((target_addr & ~0x3FFFFFFFL) == ((jump->addr + sizeof(sljit_ins)) & ~0x3FFFFFFFL)) { 971 jump->flags |= PATCH_J; 972 973 if (jump->flags & IS_JAL) { 974 inst[0] = JAL_X1; 975 976 #ifdef TILEGX_JIT_DEBUG 977 printf("[runtime relocate]%04d:\t", __LINE__); 978 print_insn_tilegx(inst); 979 #endif 980 981 } else { 982 inst[0] = J_X1; 983 984 #ifdef TILEGX_JIT_DEBUG 985 printf("[runtime relocate]%04d:\t", __LINE__); 986 print_insn_tilegx(inst); 987 #endif 988 } 989 990 return inst; 991 } 992 993 return code_ptr; 994 } 995 996 SLJIT_API_FUNC_ATTRIBUTE void * sljit_generate_code(struct sljit_compiler *compiler) 997 { 998 struct sljit_memory_fragment *buf; 999 sljit_ins *code; 1000 sljit_ins *code_ptr; 1001 sljit_ins *buf_ptr; 1002 sljit_ins *buf_end; 1003 sljit_uw word_count; 1004 sljit_uw addr; 1005 1006 struct sljit_label *label; 1007 struct sljit_jump *jump; 1008 struct sljit_const *const_; 1009 1010 CHECK_ERROR_PTR(); 1011 CHECK_PTR(check_sljit_generate_code(compiler)); 1012 reverse_buf(compiler); 1013 1014 code = (sljit_ins *)SLJIT_MALLOC_EXEC(compiler->size * sizeof(sljit_ins)); 1015 PTR_FAIL_WITH_EXEC_IF(code); 1016 buf = compiler->buf; 1017 1018 code_ptr = code; 1019 word_count = 0; 1020 label = compiler->labels; 1021 jump = compiler->jumps; 1022 const_ = compiler->consts; 1023 do { 1024 buf_ptr = (sljit_ins *)buf->memory; 1025 buf_end = buf_ptr + (buf->used_size >> 3); 1026 do { 1027 *code_ptr = *buf_ptr++; 1028 SLJIT_ASSERT(!label || label->size >= word_count); 1029 SLJIT_ASSERT(!jump || jump->addr >= word_count); 1030 SLJIT_ASSERT(!const_ || const_->addr >= word_count); 1031 /* These structures are ordered by their address. */ 1032 if (label && label->size == word_count) { 1033 /* Just recording the address. */ 1034 label->addr = (sljit_uw) code_ptr; 1035 label->size = code_ptr - code; 1036 label = label->next; 1037 } 1038 1039 if (jump && jump->addr == word_count) { 1040 if (jump->flags & IS_JAL) 1041 jump->addr = (sljit_uw)(code_ptr - 4); 1042 else 1043 jump->addr = (sljit_uw)(code_ptr - 3); 1044 1045 code_ptr = detect_jump_type(jump, code_ptr, code); 1046 jump = jump->next; 1047 } 1048 1049 if (const_ && const_->addr == word_count) { 1050 /* Just recording the address. */ 1051 const_->addr = (sljit_uw) code_ptr; 1052 const_ = const_->next; 1053 } 1054 1055 code_ptr++; 1056 word_count++; 1057 } while (buf_ptr < buf_end); 1058 1059 buf = buf->next; 1060 } while (buf); 1061 1062 if (label && label->size == word_count) { 1063 label->addr = (sljit_uw) code_ptr; 1064 label->size = code_ptr - code; 1065 label = label->next; 1066 } 1067 1068 SLJIT_ASSERT(!label); 1069 SLJIT_ASSERT(!jump); 1070 SLJIT_ASSERT(!const_); 1071 SLJIT_ASSERT(code_ptr - code <= (sljit_sw)compiler->size); 1072 1073 jump = compiler->jumps; 1074 while (jump) { 1075 do { 1076 addr = (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target; 1077 buf_ptr = (sljit_ins *)jump->addr; 1078 1079 if (jump->flags & PATCH_B) { 1080 addr = (sljit_sw)(addr - (jump->addr)) >> 3; 1081 SLJIT_ASSERT((sljit_sw) addr <= SIMM_17BIT_MAX && (sljit_sw) addr >= SIMM_17BIT_MIN); 1082 buf_ptr[0] = (buf_ptr[0] & ~(BOFF_X1(-1))) | BOFF_X1(addr); 1083 1084 #ifdef TILEGX_JIT_DEBUG 1085 printf("[runtime relocate]%04d:\t", __LINE__); 1086 print_insn_tilegx(buf_ptr); 1087 #endif 1088 break; 1089 } 1090 1091 if (jump->flags & PATCH_J) { 1092 SLJIT_ASSERT((addr & ~0x3FFFFFFFL) == ((jump->addr + sizeof(sljit_ins)) & ~0x3FFFFFFFL)); 1093 addr = (sljit_sw)(addr - (jump->addr)) >> 3; 1094 buf_ptr[0] = (buf_ptr[0] & ~(JOFF_X1(-1))) | JOFF_X1(addr); 1095 1096 #ifdef TILEGX_JIT_DEBUG 1097 printf("[runtime relocate]%04d:\t", __LINE__); 1098 print_insn_tilegx(buf_ptr); 1099 #endif 1100 break; 1101 } 1102 1103 SLJIT_ASSERT(!(jump->flags & IS_JAL)); 1104 1105 /* Set the fields of immediate loads. */ 1106 buf_ptr[0] = (buf_ptr[0] & ~(0xFFFFL << 43)) | (((addr >> 32) & 0xFFFFL) << 43); 1107 buf_ptr[1] = (buf_ptr[1] & ~(0xFFFFL << 43)) | (((addr >> 16) & 0xFFFFL) << 43); 1108 buf_ptr[2] = (buf_ptr[2] & ~(0xFFFFL << 43)) | ((addr & 0xFFFFL) << 43); 1109 } while (0); 1110 1111 jump = jump->next; 1112 } 1113 1114 compiler->error = SLJIT_ERR_COMPILED; 1115 compiler->executable_size = (code_ptr - code) * sizeof(sljit_ins); 1116 SLJIT_CACHE_FLUSH(code, code_ptr); 1117 return code; 1118 } 1119 1120 static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 dst_ar, sljit_sw imm) 1121 { 1122 1123 if (imm <= SIMM_16BIT_MAX && imm >= SIMM_16BIT_MIN) 1124 return ADDLI(dst_ar, ZERO, imm); 1125 1126 if (imm <= SIMM_32BIT_MAX && imm >= SIMM_32BIT_MIN) { 1127 FAIL_IF(ADDLI(dst_ar, ZERO, imm >> 16)); 1128 return SHL16INSLI(dst_ar, dst_ar, imm); 1129 } 1130 1131 if (imm <= SIMM_48BIT_MAX && imm >= SIMM_48BIT_MIN) { 1132 FAIL_IF(ADDLI(dst_ar, ZERO, imm >> 32)); 1133 FAIL_IF(SHL16INSLI(dst_ar, dst_ar, imm >> 16)); 1134 return SHL16INSLI(dst_ar, dst_ar, imm); 1135 } 1136 1137 FAIL_IF(ADDLI(dst_ar, ZERO, imm >> 48)); 1138 FAIL_IF(SHL16INSLI(dst_ar, dst_ar, imm >> 32)); 1139 FAIL_IF(SHL16INSLI(dst_ar, dst_ar, imm >> 16)); 1140 return SHL16INSLI(dst_ar, dst_ar, imm); 1141 } 1142 1143 static sljit_s32 emit_const(struct sljit_compiler *compiler, sljit_s32 dst_ar, sljit_sw imm, int flush) 1144 { 1145 /* Should *not* be optimized as load_immediate, as pcre relocation 1146 mechanism will match this fixed 4-instruction pattern. */ 1147 if (flush) { 1148 FAIL_IF(ADDLI_SOLO(dst_ar, ZERO, imm >> 32)); 1149 FAIL_IF(SHL16INSLI_SOLO(dst_ar, dst_ar, imm >> 16)); 1150 return SHL16INSLI_SOLO(dst_ar, dst_ar, imm); 1151 } 1152 1153 FAIL_IF(ADDLI(dst_ar, ZERO, imm >> 32)); 1154 FAIL_IF(SHL16INSLI(dst_ar, dst_ar, imm >> 16)); 1155 return SHL16INSLI(dst_ar, dst_ar, imm); 1156 } 1157 1158 static sljit_s32 emit_const_64(struct sljit_compiler *compiler, sljit_s32 dst_ar, sljit_sw imm, int flush) 1159 { 1160 /* Should *not* be optimized as load_immediate, as pcre relocation 1161 mechanism will match this fixed 4-instruction pattern. */ 1162 if (flush) { 1163 FAIL_IF(ADDLI_SOLO(reg_map[dst_ar], ZERO, imm >> 48)); 1164 FAIL_IF(SHL16INSLI_SOLO(reg_map[dst_ar], reg_map[dst_ar], imm >> 32)); 1165 FAIL_IF(SHL16INSLI_SOLO(reg_map[dst_ar], reg_map[dst_ar], imm >> 16)); 1166 return SHL16INSLI_SOLO(reg_map[dst_ar], reg_map[dst_ar], imm); 1167 } 1168 1169 FAIL_IF(ADDLI(reg_map[dst_ar], ZERO, imm >> 48)); 1170 FAIL_IF(SHL16INSLI(reg_map[dst_ar], reg_map[dst_ar], imm >> 32)); 1171 FAIL_IF(SHL16INSLI(reg_map[dst_ar], reg_map[dst_ar], imm >> 16)); 1172 return SHL16INSLI(reg_map[dst_ar], reg_map[dst_ar], imm); 1173 } 1174 1175 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler, 1176 sljit_s32 options, sljit_s32 args, sljit_s32 scratches, sljit_s32 saveds, 1177 sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) 1178 { 1179 sljit_ins base; 1180 sljit_s32 i, tmp; 1181 1182 CHECK_ERROR(); 1183 CHECK(check_sljit_emit_enter(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size)); 1184 set_emit_enter(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size); 1185 1186 local_size += GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1); 1187 local_size = (local_size + 7) & ~7; 1188 compiler->local_size = local_size; 1189 1190 if (local_size <= SIMM_16BIT_MAX) { 1191 /* Frequent case. */ 1192 FAIL_IF(ADDLI(SLJIT_LOCALS_REG_mapped, SLJIT_LOCALS_REG_mapped, -local_size)); 1193 base = SLJIT_LOCALS_REG_mapped; 1194 } else { 1195 FAIL_IF(load_immediate(compiler, TMP_REG1_mapped, local_size)); 1196 FAIL_IF(ADD(TMP_REG2_mapped, SLJIT_LOCALS_REG_mapped, ZERO)); 1197 FAIL_IF(SUB(SLJIT_LOCALS_REG_mapped, SLJIT_LOCALS_REG_mapped, TMP_REG1_mapped)); 1198 base = TMP_REG2_mapped; 1199 local_size = 0; 1200 } 1201 1202 /* Save the return address. */ 1203 FAIL_IF(ADDLI(ADDR_TMP_mapped, base, local_size - 8)); 1204 FAIL_IF(ST_ADD(ADDR_TMP_mapped, RA, -8)); 1205 1206 /* Save the S registers. */ 1207 tmp = saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - saveds) : SLJIT_FIRST_SAVED_REG; 1208 for (i = SLJIT_S0; i >= tmp; i--) { 1209 FAIL_IF(ST_ADD(ADDR_TMP_mapped, reg_map[i], -8)); 1210 } 1211 1212 /* Save the R registers that need to be reserved. */ 1213 for (i = scratches; i >= SLJIT_FIRST_SAVED_REG; i--) { 1214 FAIL_IF(ST_ADD(ADDR_TMP_mapped, reg_map[i], -8)); 1215 } 1216 1217 /* Move the arguments to S registers. */ 1218 for (i = 0; i < args; i++) { 1219 FAIL_IF(ADD(reg_map[SLJIT_S0 - i], i, ZERO)); 1220 } 1221 1222 return SLJIT_SUCCESS; 1223 } 1224 1225 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *compiler, 1226 sljit_s32 options, sljit_s32 args, sljit_s32 scratches, sljit_s32 saveds, 1227 sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) 1228 { 1229 CHECK_ERROR(); 1230 CHECK(check_sljit_set_context(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size)); 1231 set_set_context(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size); 1232 1233 local_size += GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1); 1234 compiler->local_size = (local_size + 7) & ~7; 1235 1236 return SLJIT_SUCCESS; 1237 } 1238 1239 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 src, sljit_sw srcw) 1240 { 1241 sljit_s32 local_size; 1242 sljit_ins base; 1243 sljit_s32 i, tmp; 1244 sljit_s32 saveds; 1245 1246 CHECK_ERROR(); 1247 CHECK(check_sljit_emit_return(compiler, op, src, srcw)); 1248 1249 FAIL_IF(emit_mov_before_return(compiler, op, src, srcw)); 1250 1251 local_size = compiler->local_size; 1252 if (local_size <= SIMM_16BIT_MAX) 1253 base = SLJIT_LOCALS_REG_mapped; 1254 else { 1255 FAIL_IF(load_immediate(compiler, TMP_REG1_mapped, local_size)); 1256 FAIL_IF(ADD(TMP_REG1_mapped, SLJIT_LOCALS_REG_mapped, TMP_REG1_mapped)); 1257 base = TMP_REG1_mapped; 1258 local_size = 0; 1259 } 1260 1261 /* Restore the return address. */ 1262 FAIL_IF(ADDLI(ADDR_TMP_mapped, base, local_size - 8)); 1263 FAIL_IF(LD_ADD(RA, ADDR_TMP_mapped, -8)); 1264 1265 /* Restore the S registers. */ 1266 saveds = compiler->saveds; 1267 tmp = saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - saveds) : SLJIT_FIRST_SAVED_REG; 1268 for (i = SLJIT_S0; i >= tmp; i--) { 1269 FAIL_IF(LD_ADD(reg_map[i], ADDR_TMP_mapped, -8)); 1270 } 1271 1272 /* Restore the R registers that need to be reserved. */ 1273 for (i = compiler->scratches; i >= SLJIT_FIRST_SAVED_REG; i--) { 1274 FAIL_IF(LD_ADD(reg_map[i], ADDR_TMP_mapped, -8)); 1275 } 1276 1277 if (compiler->local_size <= SIMM_16BIT_MAX) 1278 FAIL_IF(ADDLI(SLJIT_LOCALS_REG_mapped, SLJIT_LOCALS_REG_mapped, compiler->local_size)); 1279 else 1280 FAIL_IF(ADD(SLJIT_LOCALS_REG_mapped, TMP_REG1_mapped, ZERO)); 1281 1282 return JR(RA); 1283 } 1284 1285 /* reg_ar is an absoulute register! */ 1286 1287 /* Can perform an operation using at most 1 instruction. */ 1288 static sljit_s32 getput_arg_fast(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg_ar, sljit_s32 arg, sljit_sw argw) 1289 { 1290 SLJIT_ASSERT(arg & SLJIT_MEM); 1291 1292 if ((!(flags & WRITE_BACK) || !(arg & REG_MASK)) 1293 && !(arg & OFFS_REG_MASK) && argw <= SIMM_16BIT_MAX && argw >= SIMM_16BIT_MIN) { 1294 /* Works for both absoulte and relative addresses. */ 1295 if (SLJIT_UNLIKELY(flags & ARG_TEST)) 1296 return 1; 1297 1298 FAIL_IF(ADDLI(ADDR_TMP_mapped, reg_map[arg & REG_MASK], argw)); 1299 1300 if (flags & LOAD_DATA) 1301 FAIL_IF(PB2(data_transfer_insts[flags & MEM_MASK], reg_ar, ADDR_TMP_mapped)); 1302 else 1303 FAIL_IF(PB2(data_transfer_insts[flags & MEM_MASK], ADDR_TMP_mapped, reg_ar)); 1304 1305 return -1; 1306 } 1307 1308 return 0; 1309 } 1310 1311 /* See getput_arg below. 1312 Note: can_cache is called only for binary operators. Those 1313 operators always uses word arguments without write back. */ 1314 static sljit_s32 can_cache(sljit_s32 arg, sljit_sw argw, sljit_s32 next_arg, sljit_sw next_argw) 1315 { 1316 SLJIT_ASSERT((arg & SLJIT_MEM) && (next_arg & SLJIT_MEM)); 1317 1318 /* Simple operation except for updates. */ 1319 if (arg & OFFS_REG_MASK) { 1320 argw &= 0x3; 1321 next_argw &= 0x3; 1322 if (argw && argw == next_argw 1323 && (arg == next_arg || (arg & OFFS_REG_MASK) == (next_arg & OFFS_REG_MASK))) 1324 return 1; 1325 return 0; 1326 } 1327 1328 if (arg == next_arg) { 1329 if (((next_argw - argw) <= SIMM_16BIT_MAX 1330 && (next_argw - argw) >= SIMM_16BIT_MIN)) 1331 return 1; 1332 1333 return 0; 1334 } 1335 1336 return 0; 1337 } 1338 1339 /* Emit the necessary instructions. See can_cache above. */ 1340 static sljit_s32 getput_arg(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg_ar, sljit_s32 arg, sljit_sw argw, sljit_s32 next_arg, sljit_sw next_argw) 1341 { 1342 sljit_s32 tmp_ar, base; 1343 1344 SLJIT_ASSERT(arg & SLJIT_MEM); 1345 if (!(next_arg & SLJIT_MEM)) { 1346 next_arg = 0; 1347 next_argw = 0; 1348 } 1349 1350 if ((flags & MEM_MASK) <= GPR_REG && (flags & LOAD_DATA)) 1351 tmp_ar = reg_ar; 1352 else 1353 tmp_ar = TMP_REG1_mapped; 1354 1355 base = arg & REG_MASK; 1356 1357 if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) { 1358 argw &= 0x3; 1359 1360 if ((flags & WRITE_BACK) && reg_ar == reg_map[base]) { 1361 SLJIT_ASSERT(!(flags & LOAD_DATA) && reg_map[TMP_REG1] != reg_ar); 1362 FAIL_IF(ADD(TMP_REG1_mapped, reg_ar, ZERO)); 1363 reg_ar = TMP_REG1_mapped; 1364 } 1365 1366 /* Using the cache. */ 1367 if (argw == compiler->cache_argw) { 1368 if (!(flags & WRITE_BACK)) { 1369 if (arg == compiler->cache_arg) { 1370 if (flags & LOAD_DATA) 1371 return PB2(data_transfer_insts[flags & MEM_MASK], reg_ar, TMP_REG3_mapped); 1372 else 1373 return PB2(data_transfer_insts[flags & MEM_MASK], TMP_REG3_mapped, reg_ar); 1374 } 1375 1376 if ((SLJIT_MEM | (arg & OFFS_REG_MASK)) == compiler->cache_arg) { 1377 if (arg == next_arg && argw == (next_argw & 0x3)) { 1378 compiler->cache_arg = arg; 1379 compiler->cache_argw = argw; 1380 FAIL_IF(ADD(TMP_REG3_mapped, reg_map[base], TMP_REG3_mapped)); 1381 if (flags & LOAD_DATA) 1382 return PB2(data_transfer_insts[flags & MEM_MASK], reg_ar, TMP_REG3_mapped); 1383 else 1384 return PB2(data_transfer_insts[flags & MEM_MASK], TMP_REG3_mapped, reg_ar); 1385 } 1386 1387 FAIL_IF(ADD(tmp_ar, reg_map[base], TMP_REG3_mapped)); 1388 if (flags & LOAD_DATA) 1389 return PB2(data_transfer_insts[flags & MEM_MASK], reg_ar, tmp_ar); 1390 else 1391 return PB2(data_transfer_insts[flags & MEM_MASK], tmp_ar, reg_ar); 1392 } 1393 } else { 1394 if ((SLJIT_MEM | (arg & OFFS_REG_MASK)) == compiler->cache_arg) { 1395 FAIL_IF(ADD(reg_map[base], reg_map[base], TMP_REG3_mapped)); 1396 if (flags & LOAD_DATA) 1397 return PB2(data_transfer_insts[flags & MEM_MASK], reg_ar, reg_map[base]); 1398 else 1399 return PB2(data_transfer_insts[flags & MEM_MASK], reg_map[base], reg_ar); 1400 } 1401 } 1402 } 1403 1404 if (SLJIT_UNLIKELY(argw)) { 1405 compiler->cache_arg = SLJIT_MEM | (arg & OFFS_REG_MASK); 1406 compiler->cache_argw = argw; 1407 FAIL_IF(SHLI(TMP_REG3_mapped, reg_map[OFFS_REG(arg)], argw)); 1408 } 1409 1410 if (!(flags & WRITE_BACK)) { 1411 if (arg == next_arg && argw == (next_argw & 0x3)) { 1412 compiler->cache_arg = arg; 1413 compiler->cache_argw = argw; 1414 FAIL_IF(ADD(TMP_REG3_mapped, reg_map[base], reg_map[!argw ? OFFS_REG(arg) : TMP_REG3])); 1415 tmp_ar = TMP_REG3_mapped; 1416 } else 1417 FAIL_IF(ADD(tmp_ar, reg_map[base], reg_map[!argw ? OFFS_REG(arg) : TMP_REG3])); 1418 1419 if (flags & LOAD_DATA) 1420 return PB2(data_transfer_insts[flags & MEM_MASK], reg_ar, tmp_ar); 1421 else 1422 return PB2(data_transfer_insts[flags & MEM_MASK], tmp_ar, reg_ar); 1423 } 1424 1425 FAIL_IF(ADD(reg_map[base], reg_map[base], reg_map[!argw ? OFFS_REG(arg) : TMP_REG3])); 1426 1427 if (flags & LOAD_DATA) 1428 return PB2(data_transfer_insts[flags & MEM_MASK], reg_ar, reg_map[base]); 1429 else 1430 return PB2(data_transfer_insts[flags & MEM_MASK], reg_map[base], reg_ar); 1431 } 1432 1433 if (SLJIT_UNLIKELY(flags & WRITE_BACK) && base) { 1434 /* Update only applies if a base register exists. */ 1435 if (reg_ar == reg_map[base]) { 1436 SLJIT_ASSERT(!(flags & LOAD_DATA) && TMP_REG1_mapped != reg_ar); 1437 if (argw <= SIMM_16BIT_MAX && argw >= SIMM_16BIT_MIN) { 1438 FAIL_IF(ADDLI(ADDR_TMP_mapped, reg_map[base], argw)); 1439 if (flags & LOAD_DATA) 1440 FAIL_IF(PB2(data_transfer_insts[flags & MEM_MASK], reg_ar, ADDR_TMP_mapped)); 1441 else 1442 FAIL_IF(PB2(data_transfer_insts[flags & MEM_MASK], ADDR_TMP_mapped, reg_ar)); 1443 1444 if (argw) 1445 return ADDLI(reg_map[base], reg_map[base], argw); 1446 1447 return SLJIT_SUCCESS; 1448 } 1449 1450 FAIL_IF(ADD(TMP_REG1_mapped, reg_ar, ZERO)); 1451 reg_ar = TMP_REG1_mapped; 1452 } 1453 1454 if (argw <= SIMM_16BIT_MAX && argw >= SIMM_16BIT_MIN) { 1455 if (argw) 1456 FAIL_IF(ADDLI(reg_map[base], reg_map[base], argw)); 1457 } else { 1458 if (compiler->cache_arg == SLJIT_MEM 1459 && argw - compiler->cache_argw <= SIMM_16BIT_MAX 1460 && argw - compiler->cache_argw >= SIMM_16BIT_MIN) { 1461 if (argw != compiler->cache_argw) { 1462 FAIL_IF(ADD(TMP_REG3_mapped, TMP_REG3_mapped, argw - compiler->cache_argw)); 1463 compiler->cache_argw = argw; 1464 } 1465 1466 FAIL_IF(ADD(reg_map[base], reg_map[base], TMP_REG3_mapped)); 1467 } else { 1468 compiler->cache_arg = SLJIT_MEM; 1469 compiler->cache_argw = argw; 1470 FAIL_IF(load_immediate(compiler, TMP_REG3_mapped, argw)); 1471 FAIL_IF(ADD(reg_map[base], reg_map[base], TMP_REG3_mapped)); 1472 } 1473 } 1474 1475 if (flags & LOAD_DATA) 1476 return PB2(data_transfer_insts[flags & MEM_MASK], reg_ar, reg_map[base]); 1477 else 1478 return PB2(data_transfer_insts[flags & MEM_MASK], reg_map[base], reg_ar); 1479 } 1480 1481 if (compiler->cache_arg == arg 1482 && argw - compiler->cache_argw <= SIMM_16BIT_MAX 1483 && argw - compiler->cache_argw >= SIMM_16BIT_MIN) { 1484 if (argw != compiler->cache_argw) { 1485 FAIL_IF(ADDLI(TMP_REG3_mapped, TMP_REG3_mapped, argw - compiler->cache_argw)); 1486 compiler->cache_argw = argw; 1487 } 1488 1489 if (flags & LOAD_DATA) 1490 return PB2(data_transfer_insts[flags & MEM_MASK], reg_ar, TMP_REG3_mapped); 1491 else 1492 return PB2(data_transfer_insts[flags & MEM_MASK], TMP_REG3_mapped, reg_ar); 1493 } 1494 1495 if (compiler->cache_arg == SLJIT_MEM 1496 && argw - compiler->cache_argw <= SIMM_16BIT_MAX 1497 && argw - compiler->cache_argw >= SIMM_16BIT_MIN) { 1498 if (argw != compiler->cache_argw) 1499 FAIL_IF(ADDLI(TMP_REG3_mapped, TMP_REG3_mapped, argw - compiler->cache_argw)); 1500 } else { 1501 compiler->cache_arg = SLJIT_MEM; 1502 FAIL_IF(load_immediate(compiler, TMP_REG3_mapped, argw)); 1503 } 1504 1505 compiler->cache_argw = argw; 1506 1507 if (!base) { 1508 if (flags & LOAD_DATA) 1509 return PB2(data_transfer_insts[flags & MEM_MASK], reg_ar, TMP_REG3_mapped); 1510 else 1511 return PB2(data_transfer_insts[flags & MEM_MASK], TMP_REG3_mapped, reg_ar); 1512 } 1513 1514 if (arg == next_arg 1515 && next_argw - argw <= SIMM_16BIT_MAX 1516 && next_argw - argw >= SIMM_16BIT_MIN) { 1517 compiler->cache_arg = arg; 1518 FAIL_IF(ADD(TMP_REG3_mapped, TMP_REG3_mapped, reg_map[base])); 1519 if (flags & LOAD_DATA) 1520 return PB2(data_transfer_insts[flags & MEM_MASK], reg_ar, TMP_REG3_mapped); 1521 else 1522 return PB2(data_transfer_insts[flags & MEM_MASK], TMP_REG3_mapped, reg_ar); 1523 } 1524 1525 FAIL_IF(ADD(tmp_ar, TMP_REG3_mapped, reg_map[base])); 1526 1527 if (flags & LOAD_DATA) 1528 return PB2(data_transfer_insts[flags & MEM_MASK], reg_ar, tmp_ar); 1529 else 1530 return PB2(data_transfer_insts[flags & MEM_MASK], tmp_ar, reg_ar); 1531 } 1532 1533 static SLJIT_INLINE sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg_ar, sljit_s32 arg, sljit_sw argw) 1534 { 1535 if (getput_arg_fast(compiler, flags, reg_ar, arg, argw)) 1536 return compiler->error; 1537 1538 compiler->cache_arg = 0; 1539 compiler->cache_argw = 0; 1540 return getput_arg(compiler, flags, reg_ar, arg, argw, 0, 0); 1541 } 1542 1543 static SLJIT_INLINE sljit_s32 emit_op_mem2(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg1, sljit_sw arg1w, sljit_s32 arg2, sljit_sw arg2w) 1544 { 1545 if (getput_arg_fast(compiler, flags, reg, arg1, arg1w)) 1546 return compiler->error; 1547 return getput_arg(compiler, flags, reg, arg1, arg1w, arg2, arg2w); 1548 } 1549 1550 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw) 1551 { 1552 CHECK_ERROR(); 1553 CHECK(check_sljit_emit_fast_enter(compiler, dst, dstw)); 1554 ADJUST_LOCAL_OFFSET(dst, dstw); 1555 1556 /* For UNUSED dst. Uncommon, but possible. */ 1557 if (dst == SLJIT_UNUSED) 1558 return SLJIT_SUCCESS; 1559 1560 if (FAST_IS_REG(dst)) 1561 return ADD(reg_map[dst], RA, ZERO); 1562 1563 /* Memory. */ 1564 return emit_op_mem(compiler, WORD_DATA, RA, dst, dstw); 1565 } 1566 1567 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_return(struct sljit_compiler *compiler, sljit_s32 src, sljit_sw srcw) 1568 { 1569 CHECK_ERROR(); 1570 CHECK(check_sljit_emit_fast_return(compiler, src, srcw)); 1571 ADJUST_LOCAL_OFFSET(src, srcw); 1572 1573 if (FAST_IS_REG(src)) 1574 FAIL_IF(ADD(RA, reg_map[src], ZERO)); 1575 1576 else if (src & SLJIT_MEM) 1577 FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, RA, src, srcw)); 1578 1579 else if (src & SLJIT_IMM) 1580 FAIL_IF(load_immediate(compiler, RA, srcw)); 1581 1582 return JR(RA); 1583 } 1584 1585 static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 flags, sljit_s32 dst, sljit_s32 src1, sljit_sw src2) 1586 { 1587 sljit_s32 overflow_ra = 0; 1588 1589 switch (GET_OPCODE(op)) { 1590 case SLJIT_MOV: 1591 case SLJIT_MOV_P: 1592 SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); 1593 if (dst != src2) 1594 return ADD(reg_map[dst], reg_map[src2], ZERO); 1595 return SLJIT_SUCCESS; 1596 1597 case SLJIT_MOV_U32: 1598 case SLJIT_MOV_S32: 1599 SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); 1600 if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) { 1601 if (op == SLJIT_MOV_S32) 1602 return BFEXTS(reg_map[dst], reg_map[src2], 0, 31); 1603 1604 return BFEXTU(reg_map[dst], reg_map[src2], 0, 31); 1605 } else if (dst != src2) { 1606 SLJIT_ASSERT(src2 == 0); 1607 return ADD(reg_map[dst], reg_map[src2], ZERO); 1608 } 1609 1610 return SLJIT_SUCCESS; 1611 1612 case SLJIT_MOV_U8: 1613 case SLJIT_MOV_S8: 1614 SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); 1615 if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) { 1616 if (op == SLJIT_MOV_S8) 1617 return BFEXTS(reg_map[dst], reg_map[src2], 0, 7); 1618 1619 return BFEXTU(reg_map[dst], reg_map[src2], 0, 7); 1620 } else if (dst != src2) { 1621 SLJIT_ASSERT(src2 == 0); 1622 return ADD(reg_map[dst], reg_map[src2], ZERO); 1623 } 1624 1625 return SLJIT_SUCCESS; 1626 1627 case SLJIT_MOV_U16: 1628 case SLJIT_MOV_S16: 1629 SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); 1630 if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) { 1631 if (op == SLJIT_MOV_S16) 1632 return BFEXTS(reg_map[dst], reg_map[src2], 0, 15); 1633 1634 return BFEXTU(reg_map[dst], reg_map[src2], 0, 15); 1635 } else if (dst != src2) { 1636 SLJIT_ASSERT(src2 == 0); 1637 return ADD(reg_map[dst], reg_map[src2], ZERO); 1638 } 1639 1640 return SLJIT_SUCCESS; 1641 1642 case SLJIT_NOT: 1643 SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); 1644 if (op & SLJIT_SET_E) 1645 FAIL_IF(NOR(EQUAL_FLAG, reg_map[src2], reg_map[src2])); 1646 if (CHECK_FLAGS(SLJIT_SET_E)) 1647 FAIL_IF(NOR(reg_map[dst], reg_map[src2], reg_map[src2])); 1648 1649 return SLJIT_SUCCESS; 1650 1651 case SLJIT_CLZ: 1652 SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); 1653 if (op & SLJIT_SET_E) 1654 FAIL_IF(CLZ(EQUAL_FLAG, reg_map[src2])); 1655 if (CHECK_FLAGS(SLJIT_SET_E)) 1656 FAIL_IF(CLZ(reg_map[dst], reg_map[src2])); 1657 1658 return SLJIT_SUCCESS; 1659 1660 case SLJIT_ADD: 1661 if (flags & SRC2_IMM) { 1662 if (op & SLJIT_SET_O) { 1663 FAIL_IF(SHRUI(TMP_EREG1, reg_map[src1], 63)); 1664 if (src2 < 0) 1665 FAIL_IF(XORI(TMP_EREG1, TMP_EREG1, 1)); 1666 } 1667 1668 if (op & SLJIT_SET_E) 1669 FAIL_IF(ADDLI(EQUAL_FLAG, reg_map[src1], src2)); 1670 1671 if (op & SLJIT_SET_C) { 1672 if (src2 >= 0) 1673 FAIL_IF(ORI(ULESS_FLAG ,reg_map[src1], src2)); 1674 else { 1675 FAIL_IF(ADDLI(ULESS_FLAG ,ZERO, src2)); 1676 FAIL_IF(OR(ULESS_FLAG,reg_map[src1],ULESS_FLAG)); 1677 } 1678 } 1679 1680 /* dst may be the same as src1 or src2. */ 1681 if (CHECK_FLAGS(SLJIT_SET_E)) 1682 FAIL_IF(ADDLI(reg_map[dst], reg_map[src1], src2)); 1683 1684 if (op & SLJIT_SET_O) { 1685 FAIL_IF(SHRUI(OVERFLOW_FLAG, reg_map[dst], 63)); 1686 1687 if (src2 < 0) 1688 FAIL_IF(XORI(OVERFLOW_FLAG, OVERFLOW_FLAG, 1)); 1689 } 1690 } else { 1691 if (op & SLJIT_SET_O) { 1692 FAIL_IF(XOR(TMP_EREG1, reg_map[src1], reg_map[src2])); 1693 FAIL_IF(SHRUI(TMP_EREG1, TMP_EREG1, 63)); 1694 1695 if (src1 != dst) 1696 overflow_ra = reg_map[src1]; 1697 else if (src2 != dst) 1698 overflow_ra = reg_map[src2]; 1699 else { 1700 /* Rare ocasion. */ 1701 FAIL_IF(ADD(TMP_EREG2, reg_map[src1], ZERO)); 1702 overflow_ra = TMP_EREG2; 1703 } 1704 } 1705 1706 if (op & SLJIT_SET_E) 1707 FAIL_IF(ADD(EQUAL_FLAG ,reg_map[src1], reg_map[src2])); 1708 1709 if (op & SLJIT_SET_C) 1710 FAIL_IF(OR(ULESS_FLAG,reg_map[src1], reg_map[src2])); 1711 1712 /* dst may be the same as src1 or src2. */ 1713 if (CHECK_FLAGS(SLJIT_SET_E)) 1714 FAIL_IF(ADD(reg_map[dst],reg_map[src1], reg_map[src2])); 1715 1716 if (op & SLJIT_SET_O) { 1717 FAIL_IF(XOR(OVERFLOW_FLAG,reg_map[dst], overflow_ra)); 1718 FAIL_IF(SHRUI(OVERFLOW_FLAG, OVERFLOW_FLAG, 63)); 1719 } 1720 } 1721 1722 /* a + b >= a | b (otherwise, the carry should be set to 1). */ 1723 if (op & SLJIT_SET_C) 1724 FAIL_IF(CMPLTU(ULESS_FLAG ,reg_map[dst] ,ULESS_FLAG)); 1725 1726 if (op & SLJIT_SET_O) 1727 return CMOVNEZ(OVERFLOW_FLAG, TMP_EREG1, ZERO); 1728 1729 return SLJIT_SUCCESS; 1730 1731 case SLJIT_ADDC: 1732 if (flags & SRC2_IMM) { 1733 if (op & SLJIT_SET_C) { 1734 if (src2 >= 0) 1735 FAIL_IF(ORI(TMP_EREG1, reg_map[src1], src2)); 1736 else { 1737 FAIL_IF(ADDLI(TMP_EREG1, ZERO, src2)); 1738 FAIL_IF(OR(TMP_EREG1, reg_map[src1], TMP_EREG1)); 1739 } 1740 } 1741 1742 FAIL_IF(ADDLI(reg_map[dst], reg_map[src1], src2)); 1743 1744 } else { 1745 if (op & SLJIT_SET_C) 1746 FAIL_IF(OR(TMP_EREG1, reg_map[src1], reg_map[src2])); 1747 1748 /* dst may be the same as src1 or src2. */ 1749 FAIL_IF(ADD(reg_map[dst], reg_map[src1], reg_map[src2])); 1750 } 1751 1752 if (op & SLJIT_SET_C) 1753 FAIL_IF(CMPLTU(TMP_EREG1, reg_map[dst], TMP_EREG1)); 1754 1755 FAIL_IF(ADD(reg_map[dst], reg_map[dst], ULESS_FLAG)); 1756 1757 if (!(op & SLJIT_SET_C)) 1758 return SLJIT_SUCCESS; 1759 1760 /* Set TMP_EREG2 (dst == 0) && (ULESS_FLAG == 1). */ 1761 FAIL_IF(CMPLTUI(TMP_EREG2, reg_map[dst], 1)); 1762 FAIL_IF(AND(TMP_EREG2, TMP_EREG2, ULESS_FLAG)); 1763 /* Set carry flag. */ 1764 return OR(ULESS_FLAG, TMP_EREG2, TMP_EREG1); 1765 1766 case SLJIT_SUB: 1767 if ((flags & SRC2_IMM) && ((op & (SLJIT_SET_U | SLJIT_SET_S)) || src2 == SIMM_16BIT_MIN)) { 1768 FAIL_IF(ADDLI(TMP_REG2_mapped, ZERO, src2)); 1769 src2 = TMP_REG2; 1770 flags &= ~SRC2_IMM; 1771 } 1772 1773 if (flags & SRC2_IMM) { 1774 if (op & SLJIT_SET_O) { 1775 FAIL_IF(SHRUI(TMP_EREG1,reg_map[src1], 63)); 1776 1777 if (src2 < 0) 1778 FAIL_IF(XORI(TMP_EREG1, TMP_EREG1, 1)); 1779 1780 if (src1 != dst) 1781 overflow_ra = reg_map[src1]; 1782 else { 1783 /* Rare ocasion. */ 1784 FAIL_IF(ADD(TMP_EREG2, reg_map[src1], ZERO)); 1785 overflow_ra = TMP_EREG2; 1786 } 1787 } 1788 1789 if (op & SLJIT_SET_E) 1790 FAIL_IF(ADDLI(EQUAL_FLAG, reg_map[src1], -src2)); 1791 1792 if (op & SLJIT_SET_C) { 1793 FAIL_IF(load_immediate(compiler, ADDR_TMP_mapped, src2)); 1794 FAIL_IF(CMPLTU(ULESS_FLAG, reg_map[src1], ADDR_TMP_mapped)); 1795 } 1796 1797 /* dst may be the same as src1 or src2. */ 1798 if (CHECK_FLAGS(SLJIT_SET_E)) 1799 FAIL_IF(ADDLI(reg_map[dst], reg_map[src1], -src2)); 1800 1801 } else { 1802 1803 if (op & SLJIT_SET_O) { 1804 FAIL_IF(XOR(TMP_EREG1, reg_map[src1], reg_map[src2])); 1805 FAIL_IF(SHRUI(TMP_EREG1, TMP_EREG1, 63)); 1806 1807 if (src1 != dst) 1808 overflow_ra = reg_map[src1]; 1809 else { 1810 /* Rare ocasion. */ 1811 FAIL_IF(ADD(TMP_EREG2, reg_map[src1], ZERO)); 1812 overflow_ra = TMP_EREG2; 1813 } 1814 } 1815 1816 if (op & SLJIT_SET_E) 1817 FAIL_IF(SUB(EQUAL_FLAG, reg_map[src1], reg_map[src2])); 1818 1819 if (op & (SLJIT_SET_U | SLJIT_SET_C)) 1820 FAIL_IF(CMPLTU(ULESS_FLAG, reg_map[src1], reg_map[src2])); 1821 1822 if (op & SLJIT_SET_U) 1823 FAIL_IF(CMPLTU(UGREATER_FLAG, reg_map[src2], reg_map[src1])); 1824 1825 if (op & SLJIT_SET_S) { 1826 FAIL_IF(CMPLTS(LESS_FLAG ,reg_map[src1] ,reg_map[src2])); 1827 FAIL_IF(CMPLTS(GREATER_FLAG ,reg_map[src2] ,reg_map[src1])); 1828 } 1829 1830 /* dst may be the same as src1 or src2. */ 1831 if (CHECK_FLAGS(SLJIT_SET_E | SLJIT_SET_U | SLJIT_SET_S | SLJIT_SET_C)) 1832 FAIL_IF(SUB(reg_map[dst], reg_map[src1], reg_map[src2])); 1833 } 1834 1835 if (op & SLJIT_SET_O) { 1836 FAIL_IF(XOR(OVERFLOW_FLAG, reg_map[dst], overflow_ra)); 1837 FAIL_IF(SHRUI(OVERFLOW_FLAG, OVERFLOW_FLAG, 63)); 1838 return CMOVEQZ(OVERFLOW_FLAG, TMP_EREG1, ZERO); 1839 } 1840 1841 return SLJIT_SUCCESS; 1842 1843 case SLJIT_SUBC: 1844 if ((flags & SRC2_IMM) && src2 == SIMM_16BIT_MIN) { 1845 FAIL_IF(ADDLI(TMP_REG2_mapped, ZERO, src2)); 1846 src2 = TMP_REG2; 1847 flags &= ~SRC2_IMM; 1848 } 1849 1850 if (flags & SRC2_IMM) { 1851 if (op & SLJIT_SET_C) { 1852 FAIL_IF(load_immediate(compiler, ADDR_TMP_mapped, -src2)); 1853 FAIL_IF(CMPLTU(TMP_EREG1, reg_map[src1], ADDR_TMP_mapped)); 1854 } 1855 1856 /* dst may be the same as src1 or src2. */ 1857 FAIL_IF(ADDLI(reg_map[dst], reg_map[src1], -src2)); 1858 1859 } else { 1860 if (op & SLJIT_SET_C) 1861 FAIL_IF(CMPLTU(TMP_EREG1, reg_map[src1], reg_map[src2])); 1862 /* dst may be the same as src1 or src2. */ 1863 FAIL_IF(SUB(reg_map[dst], reg_map[src1], reg_map[src2])); 1864 } 1865 1866 if (op & SLJIT_SET_C) 1867 FAIL_IF(CMOVEQZ(TMP_EREG1, reg_map[dst], ULESS_FLAG)); 1868 1869 FAIL_IF(SUB(reg_map[dst], reg_map[dst], ULESS_FLAG)); 1870 1871 if (op & SLJIT_SET_C) 1872 FAIL_IF(ADD(ULESS_FLAG, TMP_EREG1, ZERO)); 1873 1874 return SLJIT_SUCCESS; 1875 1876 case SLJIT_MUL: 1877 if (flags & SRC2_IMM) { 1878 FAIL_IF(load_immediate(compiler, TMP_REG2_mapped, src2)); 1879 src2 = TMP_REG2; 1880 flags &= ~SRC2_IMM; 1881 } 1882 1883 FAIL_IF(MUL(reg_map[dst], reg_map[src1], reg_map[src2])); 1884 1885 return SLJIT_SUCCESS; 1886 1887 #define EMIT_LOGICAL(op_imm, op_norm) \ 1888 if (flags & SRC2_IMM) { \ 1889 FAIL_IF(load_immediate(compiler, ADDR_TMP_mapped, src2)); \ 1890 if (op & SLJIT_SET_E) \ 1891 FAIL_IF(push_3_buffer( \ 1892 compiler, op_norm, EQUAL_FLAG, reg_map[src1], \ 1893 ADDR_TMP_mapped, __LINE__)); \ 1894 if (CHECK_FLAGS(SLJIT_SET_E)) \ 1895 FAIL_IF(push_3_buffer( \ 1896 compiler, op_norm, reg_map[dst], reg_map[src1], \ 1897 ADDR_TMP_mapped, __LINE__)); \ 1898 } else { \ 1899 if (op & SLJIT_SET_E) \ 1900 FAIL_IF(push_3_buffer( \ 1901 compiler, op_norm, EQUAL_FLAG, reg_map[src1], \ 1902 reg_map[src2], __LINE__)); \ 1903 if (CHECK_FLAGS(SLJIT_SET_E)) \ 1904 FAIL_IF(push_3_buffer( \ 1905 compiler, op_norm, reg_map[dst], reg_map[src1], \ 1906 reg_map[src2], __LINE__)); \ 1907 } 1908 1909 case SLJIT_AND: 1910 EMIT_LOGICAL(TILEGX_OPC_ANDI, TILEGX_OPC_AND); 1911 return SLJIT_SUCCESS; 1912 1913 case SLJIT_OR: 1914 EMIT_LOGICAL(TILEGX_OPC_ORI, TILEGX_OPC_OR); 1915 return SLJIT_SUCCESS; 1916 1917 case SLJIT_XOR: 1918 EMIT_LOGICAL(TILEGX_OPC_XORI, TILEGX_OPC_XOR); 1919 return SLJIT_SUCCESS; 1920 1921 #define EMIT_SHIFT(op_imm, op_norm) \ 1922 if (flags & SRC2_IMM) { \ 1923 if (op & SLJIT_SET_E) \ 1924 FAIL_IF(push_3_buffer( \ 1925 compiler, op_imm, EQUAL_FLAG, reg_map[src1], \ 1926 src2 & 0x3F, __LINE__)); \ 1927 if (CHECK_FLAGS(SLJIT_SET_E)) \ 1928 FAIL_IF(push_3_buffer( \ 1929 compiler, op_imm, reg_map[dst], reg_map[src1], \ 1930 src2 & 0x3F, __LINE__)); \ 1931 } else { \ 1932 if (op & SLJIT_SET_E) \ 1933 FAIL_IF(push_3_buffer( \ 1934 compiler, op_norm, EQUAL_FLAG, reg_map[src1], \ 1935 reg_map[src2], __LINE__)); \ 1936 if (CHECK_FLAGS(SLJIT_SET_E)) \ 1937 FAIL_IF(push_3_buffer( \ 1938 compiler, op_norm, reg_map[dst], reg_map[src1], \ 1939 reg_map[src2], __LINE__)); \ 1940 } 1941 1942 case SLJIT_SHL: 1943 EMIT_SHIFT(TILEGX_OPC_SHLI, TILEGX_OPC_SHL); 1944 return SLJIT_SUCCESS; 1945 1946 case SLJIT_LSHR: 1947 EMIT_SHIFT(TILEGX_OPC_SHRUI, TILEGX_OPC_SHRU); 1948 return SLJIT_SUCCESS; 1949 1950 case SLJIT_ASHR: 1951 EMIT_SHIFT(TILEGX_OPC_SHRSI, TILEGX_OPC_SHRS); 1952 return SLJIT_SUCCESS; 1953 } 1954 1955 SLJIT_ASSERT_STOP(); 1956 return SLJIT_SUCCESS; 1957 } 1958 1959 static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 flags, sljit_s32 dst, sljit_sw dstw, sljit_s32 src1, sljit_sw src1w, sljit_s32 src2, sljit_sw src2w) 1960 { 1961 /* arg1 goes to TMP_REG1 or src reg. 1962 arg2 goes to TMP_REG2, imm or src reg. 1963 TMP_REG3 can be used for caching. 1964 result goes to TMP_REG2, so put result can use TMP_REG1 and TMP_REG3. */ 1965 sljit_s32 dst_r = TMP_REG2; 1966 sljit_s32 src1_r; 1967 sljit_sw src2_r = 0; 1968 sljit_s32 sugg_src2_r = TMP_REG2; 1969 1970 if (!(flags & ALT_KEEP_CACHE)) { 1971 compiler->cache_arg = 0; 1972 compiler->cache_argw = 0; 1973 } 1974 1975 if (SLJIT_UNLIKELY(dst == SLJIT_UNUSED)) { 1976 if (op >= SLJIT_MOV && op <= SLJIT_MOVU_S32 && !(src2 & SLJIT_MEM)) 1977 return SLJIT_SUCCESS; 1978 if (GET_FLAGS(op)) 1979 flags |= UNUSED_DEST; 1980 } else if (FAST_IS_REG(dst)) { 1981 dst_r = dst; 1982 flags |= REG_DEST; 1983 if (op >= SLJIT_MOV && op <= SLJIT_MOVU_S32) 1984 sugg_src2_r = dst_r; 1985 } else if ((dst & SLJIT_MEM) && !getput_arg_fast(compiler, flags | ARG_TEST, TMP_REG1_mapped, dst, dstw)) 1986 flags |= SLOW_DEST; 1987 1988 if (flags & IMM_OP) { 1989 if ((src2 & SLJIT_IMM) && src2w) { 1990 if ((!(flags & LOGICAL_OP) 1991 && (src2w <= SIMM_16BIT_MAX && src2w >= SIMM_16BIT_MIN)) 1992 || ((flags & LOGICAL_OP) && !(src2w & ~UIMM_16BIT_MAX))) { 1993 flags |= SRC2_IMM; 1994 src2_r = src2w; 1995 } 1996 } 1997 1998 if (!(flags & SRC2_IMM) && (flags & CUMULATIVE_OP) && (src1 & SLJIT_IMM) && src1w) { 1999 if ((!(flags & LOGICAL_OP) 2000 && (src1w <= SIMM_16BIT_MAX && src1w >= SIMM_16BIT_MIN)) 2001 || ((flags & LOGICAL_OP) && !(src1w & ~UIMM_16BIT_MAX))) { 2002 flags |= SRC2_IMM; 2003 src2_r = src1w; 2004 2005 /* And swap arguments. */ 2006 src1 = src2; 2007 src1w = src2w; 2008 src2 = SLJIT_IMM; 2009 /* src2w = src2_r unneeded. */ 2010 } 2011 } 2012 } 2013 2014 /* Source 1. */ 2015 if (FAST_IS_REG(src1)) { 2016 src1_r = src1; 2017 flags |= REG1_SOURCE; 2018 } else if (src1 & SLJIT_IMM) { 2019 if (src1w) { 2020 FAIL_IF(load_immediate(compiler, TMP_REG1_mapped, src1w)); 2021 src1_r = TMP_REG1; 2022 } else 2023 src1_r = 0; 2024 } else { 2025 if (getput_arg_fast(compiler, flags | LOAD_DATA, TMP_REG1_mapped, src1, src1w)) 2026 FAIL_IF(compiler->error); 2027 else 2028 flags |= SLOW_SRC1; 2029 src1_r = TMP_REG1; 2030 } 2031 2032 /* Source 2. */ 2033 if (FAST_IS_REG(src2)) { 2034 src2_r = src2; 2035 flags |= REG2_SOURCE; 2036 if (!(flags & REG_DEST) && op >= SLJIT_MOV && op <= SLJIT_MOVU_S32) 2037 dst_r = src2_r; 2038 } else if (src2 & SLJIT_IMM) { 2039 if (!(flags & SRC2_IMM)) { 2040 if (src2w) { 2041 FAIL_IF(load_immediate(compiler, reg_map[sugg_src2_r], src2w)); 2042 src2_r = sugg_src2_r; 2043 } else { 2044 src2_r = 0; 2045 if ((op >= SLJIT_MOV && op <= SLJIT_MOVU_S32) && (dst & SLJIT_MEM)) 2046 dst_r = 0; 2047 } 2048 } 2049 } else { 2050 if (getput_arg_fast(compiler, flags | LOAD_DATA, reg_map[sugg_src2_r], src2, src2w)) 2051 FAIL_IF(compiler->error); 2052 else 2053 flags |= SLOW_SRC2; 2054 src2_r = sugg_src2_r; 2055 } 2056 2057 if ((flags & (SLOW_SRC1 | SLOW_SRC2)) == (SLOW_SRC1 | SLOW_SRC2)) { 2058 SLJIT_ASSERT(src2_r == TMP_REG2); 2059 if (!can_cache(src1, src1w, src2, src2w) && can_cache(src1, src1w, dst, dstw)) { 2060 FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, TMP_REG2_mapped, src2, src2w, src1, src1w)); 2061 FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, TMP_REG1_mapped, src1, src1w, dst, dstw)); 2062 } else { 2063 FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, TMP_REG1_mapped, src1, src1w, src2, src2w)); 2064 FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, TMP_REG2_mapped, src2, src2w, dst, dstw)); 2065 } 2066 } else if (flags & SLOW_SRC1) 2067 FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, TMP_REG1_mapped, src1, src1w, dst, dstw)); 2068 else if (flags & SLOW_SRC2) 2069 FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, reg_map[sugg_src2_r], src2, src2w, dst, dstw)); 2070 2071 FAIL_IF(emit_single_op(compiler, op, flags, dst_r, src1_r, src2_r)); 2072 2073 if (dst & SLJIT_MEM) { 2074 if (!(flags & SLOW_DEST)) { 2075 getput_arg_fast(compiler, flags, reg_map[dst_r], dst, dstw); 2076 return compiler->error; 2077 } 2078 2079 return getput_arg(compiler, flags, reg_map[dst_r], dst, dstw, 0, 0); 2080 } 2081 2082 return SLJIT_SUCCESS; 2083 } 2084 2085 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 dst, sljit_sw dstw, sljit_s32 src, sljit_sw srcw, sljit_s32 type) 2086 { 2087 sljit_s32 sugg_dst_ar, dst_ar; 2088 sljit_s32 flags = GET_ALL_FLAGS(op); 2089 sljit_s32 mem_type = (op & SLJIT_I32_OP) ? (INT_DATA | SIGNED_DATA) : WORD_DATA; 2090 2091 CHECK_ERROR(); 2092 CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, src, srcw, type)); 2093 ADJUST_LOCAL_OFFSET(dst, dstw); 2094 2095 if (dst == SLJIT_UNUSED) 2096 return SLJIT_SUCCESS; 2097 2098 op = GET_OPCODE(op); 2099 if (op == SLJIT_MOV_S32 || op == SLJIT_MOV_U32) 2100 mem_type = INT_DATA | SIGNED_DATA; 2101 sugg_dst_ar = reg_map[(op < SLJIT_ADD && FAST_IS_REG(dst)) ? dst : TMP_REG2]; 2102 2103 compiler->cache_arg = 0; 2104 compiler->cache_argw = 0; 2105 if (op >= SLJIT_ADD && (src & SLJIT_MEM)) { 2106 ADJUST_LOCAL_OFFSET(src, srcw); 2107 FAIL_IF(emit_op_mem2(compiler, mem_type | LOAD_DATA, TMP_REG1_mapped, src, srcw, dst, dstw)); 2108 src = TMP_REG1; 2109 srcw = 0; 2110 } 2111 2112 switch (type & 0xff) { 2113 case SLJIT_EQUAL: 2114 case SLJIT_NOT_EQUAL: 2115 FAIL_IF(CMPLTUI(sugg_dst_ar, EQUAL_FLAG, 1)); 2116 dst_ar = sugg_dst_ar; 2117 break; 2118 case SLJIT_LESS: 2119 case SLJIT_GREATER_EQUAL: 2120 dst_ar = ULESS_FLAG; 2121 break; 2122 case SLJIT_GREATER: 2123 case SLJIT_LESS_EQUAL: 2124 dst_ar = UGREATER_FLAG; 2125 break; 2126 case SLJIT_SIG_LESS: 2127 case SLJIT_SIG_GREATER_EQUAL: 2128 dst_ar = LESS_FLAG; 2129 break; 2130 case SLJIT_SIG_GREATER: 2131 case SLJIT_SIG_LESS_EQUAL: 2132 dst_ar = GREATER_FLAG; 2133 break; 2134 case SLJIT_OVERFLOW: 2135 case SLJIT_NOT_OVERFLOW: 2136 dst_ar = OVERFLOW_FLAG; 2137 break; 2138 case SLJIT_MUL_OVERFLOW: 2139 case SLJIT_MUL_NOT_OVERFLOW: 2140 FAIL_IF(CMPLTUI(sugg_dst_ar, OVERFLOW_FLAG, 1)); 2141 dst_ar = sugg_dst_ar; 2142 type ^= 0x1; /* Flip type bit for the XORI below. */ 2143 break; 2144 2145 default: 2146 SLJIT_ASSERT_STOP(); 2147 dst_ar = sugg_dst_ar; 2148 break; 2149 } 2150 2151 if (type & 0x1) { 2152 FAIL_IF(XORI(sugg_dst_ar, dst_ar, 1)); 2153 dst_ar = sugg_dst_ar; 2154 } 2155 2156 if (op >= SLJIT_ADD) { 2157 if (TMP_REG2_mapped != dst_ar) 2158 FAIL_IF(ADD(TMP_REG2_mapped, dst_ar, ZERO)); 2159 return emit_op(compiler, op | flags, mem_type | CUMULATIVE_OP | LOGICAL_OP | IMM_OP | ALT_KEEP_CACHE, dst, dstw, src, srcw, TMP_REG2, 0); 2160 } 2161 2162 if (dst & SLJIT_MEM) 2163 return emit_op_mem(compiler, mem_type, dst_ar, dst, dstw); 2164 2165 if (sugg_dst_ar != dst_ar) 2166 return ADD(sugg_dst_ar, dst_ar, ZERO); 2167 2168 return SLJIT_SUCCESS; 2169 } 2170 2171 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compiler, sljit_s32 op) { 2172 CHECK_ERROR(); 2173 CHECK(check_sljit_emit_op0(compiler, op)); 2174 2175 op = GET_OPCODE(op); 2176 switch (op) { 2177 case SLJIT_NOP: 2178 return push_0_buffer(compiler, TILEGX_OPC_FNOP, __LINE__); 2179 2180 case SLJIT_BREAKPOINT: 2181 return PI(BPT); 2182 2183 case SLJIT_LMUL_UW: 2184 case SLJIT_LMUL_SW: 2185 case SLJIT_DIVMOD_UW: 2186 case SLJIT_DIVMOD_SW: 2187 case SLJIT_DIV_UW: 2188 case SLJIT_DIV_SW: 2189 SLJIT_ASSERT_STOP(); 2190 } 2191 2192 return SLJIT_SUCCESS; 2193 } 2194 2195 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 dst, sljit_sw dstw, sljit_s32 src, sljit_sw srcw) 2196 { 2197 CHECK_ERROR(); 2198 CHECK(check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw)); 2199 ADJUST_LOCAL_OFFSET(dst, dstw); 2200 ADJUST_LOCAL_OFFSET(src, srcw); 2201 2202 switch (GET_OPCODE(op)) { 2203 case SLJIT_MOV: 2204 case SLJIT_MOV_P: 2205 return emit_op(compiler, SLJIT_MOV, WORD_DATA, dst, dstw, TMP_REG1, 0, src, srcw); 2206 2207 case SLJIT_MOV_U32: 2208 return emit_op(compiler, SLJIT_MOV_U32, INT_DATA, dst, dstw, TMP_REG1, 0, src, srcw); 2209 2210 case SLJIT_MOV_S32: 2211 return emit_op(compiler, SLJIT_MOV_S32, INT_DATA | SIGNED_DATA, dst, dstw, TMP_REG1, 0, src, srcw); 2212 2213 case SLJIT_MOV_U8: 2214 return emit_op(compiler, SLJIT_MOV_U8, BYTE_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_u8) srcw : srcw); 2215 2216 case SLJIT_MOV_S8: 2217 return emit_op(compiler, SLJIT_MOV_S8, BYTE_DATA | SIGNED_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_s8) srcw : srcw); 2218 2219 case SLJIT_MOV_U16: 2220 return emit_op(compiler, SLJIT_MOV_U16, HALF_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_u16) srcw : srcw); 2221 2222 case SLJIT_MOV_S16: 2223 return emit_op(compiler, SLJIT_MOV_S16, HALF_DATA | SIGNED_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_s16) srcw : srcw); 2224 2225 case SLJIT_MOVU: 2226 case SLJIT_MOVU_P: 2227 return emit_op(compiler, SLJIT_MOV, WORD_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, srcw); 2228 2229 case SLJIT_MOVU_U32: 2230 return emit_op(compiler, SLJIT_MOV_U32, INT_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, srcw); 2231 2232 case SLJIT_MOVU_S32: 2233 return emit_op(compiler, SLJIT_MOV_S32, INT_DATA | SIGNED_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, srcw); 2234 2235 case SLJIT_MOVU_U8: 2236 return emit_op(compiler, SLJIT_MOV_U8, BYTE_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_u8) srcw : srcw); 2237 2238 case SLJIT_MOVU_S8: 2239 return emit_op(compiler, SLJIT_MOV_S8, BYTE_DATA | SIGNED_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_s8) srcw : srcw); 2240 2241 case SLJIT_MOVU_U16: 2242 return emit_op(compiler, SLJIT_MOV_U16, HALF_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_u16) srcw : srcw); 2243 2244 case SLJIT_MOVU_S16: 2245 return emit_op(compiler, SLJIT_MOV_S16, HALF_DATA | SIGNED_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_s16) srcw : srcw); 2246 2247 case SLJIT_NOT: 2248 return emit_op(compiler, op, 0, dst, dstw, TMP_REG1, 0, src, srcw); 2249 2250 case SLJIT_NEG: 2251 return emit_op(compiler, SLJIT_SUB | GET_ALL_FLAGS(op), IMM_OP, dst, dstw, SLJIT_IMM, 0, src, srcw); 2252 2253 case SLJIT_CLZ: 2254 return emit_op(compiler, op, (op & SLJIT_I32_OP) ? INT_DATA : WORD_DATA, dst, dstw, TMP_REG1, 0, src, srcw); 2255 } 2256 2257 return SLJIT_SUCCESS; 2258 } 2259 2260 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 dst, sljit_sw dstw, sljit_s32 src1, sljit_sw src1w, sljit_s32 src2, sljit_sw src2w) 2261 { 2262 CHECK_ERROR(); 2263 CHECK(check_sljit_emit_op2(compiler, op, dst, dstw, src1, src1w, src2, src2w)); 2264 ADJUST_LOCAL_OFFSET(dst, dstw); 2265 ADJUST_LOCAL_OFFSET(src1, src1w); 2266 ADJUST_LOCAL_OFFSET(src2, src2w); 2267 2268 switch (GET_OPCODE(op)) { 2269 case SLJIT_ADD: 2270 case SLJIT_ADDC: 2271 return emit_op(compiler, op, CUMULATIVE_OP | IMM_OP, dst, dstw, src1, src1w, src2, src2w); 2272 2273 case SLJIT_SUB: 2274 case SLJIT_SUBC: 2275 return emit_op(compiler, op, IMM_OP, dst, dstw, src1, src1w, src2, src2w); 2276 2277 case SLJIT_MUL: 2278 return emit_op(compiler, op, CUMULATIVE_OP, dst, dstw, src1, src1w, src2, src2w); 2279 2280 case SLJIT_AND: 2281 case SLJIT_OR: 2282 case SLJIT_XOR: 2283 return emit_op(compiler, op, CUMULATIVE_OP | LOGICAL_OP | IMM_OP, dst, dstw, src1, src1w, src2, src2w); 2284 2285 case SLJIT_SHL: 2286 case SLJIT_LSHR: 2287 case SLJIT_ASHR: 2288 if (src2 & SLJIT_IMM) 2289 src2w &= 0x3f; 2290 if (op & SLJIT_I32_OP) 2291 src2w &= 0x1f; 2292 2293 return emit_op(compiler, op, IMM_OP, dst, dstw, src1, src1w, src2, src2w); 2294 } 2295 2296 return SLJIT_SUCCESS; 2297 } 2298 2299 SLJIT_API_FUNC_ATTRIBUTE struct sljit_label * sljit_emit_label(struct sljit_compiler *compiler) 2300 { 2301 struct sljit_label *label; 2302 2303 flush_buffer(compiler); 2304 2305 CHECK_ERROR_PTR(); 2306 CHECK_PTR(check_sljit_emit_label(compiler)); 2307 2308 if (compiler->last_label && compiler->last_label->size == compiler->size) 2309 return compiler->last_label; 2310 2311 label = (struct sljit_label *)ensure_abuf(compiler, sizeof(struct sljit_label)); 2312 PTR_FAIL_IF(!label); 2313 set_label(label, compiler); 2314 return label; 2315 } 2316 2317 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 src, sljit_sw srcw) 2318 { 2319 sljit_s32 src_r = TMP_REG2; 2320 struct sljit_jump *jump = NULL; 2321 2322 flush_buffer(compiler); 2323 2324 CHECK_ERROR(); 2325 CHECK(check_sljit_emit_ijump(compiler, type, src, srcw)); 2326 ADJUST_LOCAL_OFFSET(src, srcw); 2327 2328 if (FAST_IS_REG(src)) { 2329 if (reg_map[src] != 0) 2330 src_r = src; 2331 else 2332 FAIL_IF(ADD_SOLO(TMP_REG2_mapped, reg_map[src], ZERO)); 2333 } 2334 2335 if (type >= SLJIT_CALL0) { 2336 SLJIT_ASSERT(reg_map[PIC_ADDR_REG] == 16 && PIC_ADDR_REG == TMP_REG2); 2337 if (src & (SLJIT_IMM | SLJIT_MEM)) { 2338 if (src & SLJIT_IMM) 2339 FAIL_IF(emit_const(compiler, reg_map[PIC_ADDR_REG], srcw, 1)); 2340 else { 2341 SLJIT_ASSERT(src_r == TMP_REG2 && (src & SLJIT_MEM)); 2342 FAIL_IF(emit_op(compiler, SLJIT_MOV, WORD_DATA, TMP_REG2, 0, TMP_REG1, 0, src, srcw)); 2343 } 2344 2345 FAIL_IF(ADD_SOLO(0, reg_map[SLJIT_R0], ZERO)); 2346 2347 FAIL_IF(ADDI_SOLO(54, 54, -16)); 2348 2349 FAIL_IF(JALR_SOLO(reg_map[PIC_ADDR_REG])); 2350 2351 return ADDI_SOLO(54, 54, 16); 2352 } 2353 2354 /* Register input. */ 2355 if (type >= SLJIT_CALL1) 2356 FAIL_IF(ADD_SOLO(0, reg_map[SLJIT_R0], ZERO)); 2357 2358 FAIL_IF(ADD_SOLO(reg_map[PIC_ADDR_REG], reg_map[src_r], ZERO)); 2359 2360 FAIL_IF(ADDI_SOLO(54, 54, -16)); 2361 2362 FAIL_IF(JALR_SOLO(reg_map[src_r])); 2363 2364 return ADDI_SOLO(54, 54, 16); 2365 } 2366 2367 if (src & SLJIT_IMM) { 2368 jump = (struct sljit_jump *)ensure_abuf(compiler, sizeof(struct sljit_jump)); 2369 FAIL_IF(!jump); 2370 set_jump(jump, compiler, JUMP_ADDR | ((type >= SLJIT_FAST_CALL) ? IS_JAL : 0)); 2371 jump->u.target = srcw; 2372 FAIL_IF(emit_const(compiler, TMP_REG2_mapped, 0, 1)); 2373 2374 if (type >= SLJIT_FAST_CALL) { 2375 FAIL_IF(ADD_SOLO(ZERO, ZERO, ZERO)); 2376 jump->addr = compiler->size; 2377 FAIL_IF(JR_SOLO(reg_map[src_r])); 2378 } else { 2379 jump->addr = compiler->size; 2380 FAIL_IF(JR_SOLO(reg_map[src_r])); 2381 } 2382 2383 return SLJIT_SUCCESS; 2384 2385 } else if (src & SLJIT_MEM) { 2386 FAIL_IF(emit_op(compiler, SLJIT_MOV, WORD_DATA, TMP_REG2, 0, TMP_REG1, 0, src, srcw)); 2387 flush_buffer(compiler); 2388 } 2389 2390 FAIL_IF(JR_SOLO(reg_map[src_r])); 2391 2392 if (jump) 2393 jump->addr = compiler->size; 2394 2395 return SLJIT_SUCCESS; 2396 } 2397 2398 #define BR_Z(src) \ 2399 inst = BEQZ_X1 | SRCA_X1(src); \ 2400 flags = IS_COND; 2401 2402 #define BR_NZ(src) \ 2403 inst = BNEZ_X1 | SRCA_X1(src); \ 2404 flags = IS_COND; 2405 2406 SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump * sljit_emit_jump(struct sljit_compiler *compiler, sljit_s32 type) 2407 { 2408 struct sljit_jump *jump; 2409 sljit_ins inst; 2410 sljit_s32 flags = 0; 2411 2412 flush_buffer(compiler); 2413 2414 CHECK_ERROR_PTR(); 2415 CHECK_PTR(check_sljit_emit_jump(compiler, type)); 2416 2417 jump = (struct sljit_jump *)ensure_abuf(compiler, sizeof(struct sljit_jump)); 2418 PTR_FAIL_IF(!jump); 2419 set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP); 2420 type &= 0xff; 2421 2422 switch (type) { 2423 case SLJIT_EQUAL: 2424 BR_NZ(EQUAL_FLAG); 2425 break; 2426 case SLJIT_NOT_EQUAL: 2427 BR_Z(EQUAL_FLAG); 2428 break; 2429 case SLJIT_LESS: 2430 BR_Z(ULESS_FLAG); 2431 break; 2432 case SLJIT_GREATER_EQUAL: 2433 BR_NZ(ULESS_FLAG); 2434 break; 2435 case SLJIT_GREATER: 2436 BR_Z(UGREATER_FLAG); 2437 break; 2438 case SLJIT_LESS_EQUAL: 2439 BR_NZ(UGREATER_FLAG); 2440 break; 2441 case SLJIT_SIG_LESS: 2442 BR_Z(LESS_FLAG); 2443 break; 2444 case SLJIT_SIG_GREATER_EQUAL: 2445 BR_NZ(LESS_FLAG); 2446 break; 2447 case SLJIT_SIG_GREATER: 2448 BR_Z(GREATER_FLAG); 2449 break; 2450 case SLJIT_SIG_LESS_EQUAL: 2451 BR_NZ(GREATER_FLAG); 2452 break; 2453 case SLJIT_OVERFLOW: 2454 case SLJIT_MUL_OVERFLOW: 2455 BR_Z(OVERFLOW_FLAG); 2456 break; 2457 case SLJIT_NOT_OVERFLOW: 2458 case SLJIT_MUL_NOT_OVERFLOW: 2459 BR_NZ(OVERFLOW_FLAG); 2460 break; 2461 default: 2462 /* Not conditional branch. */ 2463 inst = 0; 2464 break; 2465 } 2466 2467 jump->flags |= flags; 2468 2469 if (inst) { 2470 inst = inst | ((type <= SLJIT_JUMP) ? BOFF_X1(5) : BOFF_X1(6)); 2471 PTR_FAIL_IF(PI(inst)); 2472 } 2473 2474 PTR_FAIL_IF(emit_const(compiler, TMP_REG2_mapped, 0, 1)); 2475 if (type <= SLJIT_JUMP) { 2476 jump->addr = compiler->size; 2477 PTR_FAIL_IF(JR_SOLO(TMP_REG2_mapped)); 2478 } else { 2479 SLJIT_ASSERT(reg_map[PIC_ADDR_REG] == 16 && PIC_ADDR_REG == TMP_REG2); 2480 /* Cannot be optimized out if type is >= CALL0. */ 2481 jump->flags |= IS_JAL | (type >= SLJIT_CALL0 ? SLJIT_REWRITABLE_JUMP : 0); 2482 PTR_FAIL_IF(ADD_SOLO(0, reg_map[SLJIT_R0], ZERO)); 2483 jump->addr = compiler->size; 2484 PTR_FAIL_IF(JALR_SOLO(TMP_REG2_mapped)); 2485 } 2486 2487 return jump; 2488 } 2489 2490 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_is_fpu_available(void) 2491 { 2492 return 0; 2493 } 2494 2495 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 dst, sljit_sw dstw, sljit_s32 src, sljit_sw srcw) 2496 { 2497 SLJIT_ASSERT_STOP(); 2498 } 2499 2500 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 dst, sljit_sw dstw, sljit_s32 src1, sljit_sw src1w, sljit_s32 src2, sljit_sw src2w) 2501 { 2502 SLJIT_ASSERT_STOP(); 2503 } 2504 2505 SLJIT_API_FUNC_ATTRIBUTE struct sljit_const * sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw init_value) 2506 { 2507 struct sljit_const *const_; 2508 sljit_s32 reg; 2509 2510 flush_buffer(compiler); 2511 2512 CHECK_ERROR_PTR(); 2513 CHECK_PTR(check_sljit_emit_const(compiler, dst, dstw, init_value)); 2514 ADJUST_LOCAL_OFFSET(dst, dstw); 2515 2516 const_ = (struct sljit_const *)ensure_abuf(compiler, sizeof(struct sljit_const)); 2517 PTR_FAIL_IF(!const_); 2518 set_const(const_, compiler); 2519 2520 reg = FAST_IS_REG(dst) ? dst : TMP_REG2; 2521 2522 PTR_FAIL_IF(emit_const_64(compiler, reg, init_value, 1)); 2523 2524 if (dst & SLJIT_MEM) 2525 PTR_FAIL_IF(emit_op(compiler, SLJIT_MOV, WORD_DATA, dst, dstw, TMP_REG1, 0, TMP_REG2, 0)); 2526 return const_; 2527 } 2528 2529 SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_addr) 2530 { 2531 sljit_ins *inst = (sljit_ins *)addr; 2532 2533 inst[0] = (inst[0] & ~(0xFFFFL << 43)) | (((new_addr >> 32) & 0xffff) << 43); 2534 inst[1] = (inst[1] & ~(0xFFFFL << 43)) | (((new_addr >> 16) & 0xffff) << 43); 2535 inst[2] = (inst[2] & ~(0xFFFFL << 43)) | ((new_addr & 0xffff) << 43); 2536 SLJIT_CACHE_FLUSH(inst, inst + 3); 2537 } 2538 2539 SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant) 2540 { 2541 sljit_ins *inst = (sljit_ins *)addr; 2542 2543 inst[0] = (inst[0] & ~(0xFFFFL << 43)) | (((new_constant >> 48) & 0xFFFFL) << 43); 2544 inst[1] = (inst[1] & ~(0xFFFFL << 43)) | (((new_constant >> 32) & 0xFFFFL) << 43); 2545 inst[2] = (inst[2] & ~(0xFFFFL << 43)) | (((new_constant >> 16) & 0xFFFFL) << 43); 2546 inst[3] = (inst[3] & ~(0xFFFFL << 43)) | ((new_constant & 0xFFFFL) << 43); 2547 SLJIT_CACHE_FLUSH(inst, inst + 4); 2548 } 2549 2550 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 reg) 2551 { 2552 CHECK_REG_INDEX(check_sljit_get_register_index(reg)); 2553 return reg_map[reg]; 2554 } 2555 2556 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler, 2557 void *instruction, sljit_s32 size) 2558 { 2559 CHECK_ERROR(); 2560 CHECK(check_sljit_emit_op_custom(compiler, instruction, size)); 2561 return SLJIT_ERR_UNSUPPORTED; 2562 } 2563 2564