1 /* 2 * Stack-less Just-In-Time compiler 3 * 4 * Copyright 2013-2013 Tilera Corporation(jiwang (at) tilera.com). All rights reserved. 5 * Copyright 2009-2012 Zoltan Herczeg (hzmester (at) freemail.hu). All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without modification, are 8 * permitted provided that the following conditions are met: 9 * 10 * 1. Redistributions of source code must retain the above copyright notice, this list of 11 * conditions and the following disclaimer. 12 * 13 * 2. Redistributions in binary form must reproduce the above copyright notice, this list 14 * of conditions and the following disclaimer in the documentation and/or other materials 15 * provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY 18 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT 20 * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED 22 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR 23 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 25 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 */ 27 28 /* TileGX architecture. */ 29 /* Contributed by Tilera Corporation. */ 30 #include "sljitNativeTILEGX-encoder.c" 31 32 #define SIMM_8BIT_MAX (0x7f) 33 #define SIMM_8BIT_MIN (-0x80) 34 #define SIMM_16BIT_MAX (0x7fff) 35 #define SIMM_16BIT_MIN (-0x8000) 36 #define SIMM_17BIT_MAX (0xffff) 37 #define SIMM_17BIT_MIN (-0x10000) 38 #define SIMM_32BIT_MIN (-0x80000000) 39 #define SIMM_32BIT_MAX (0x7fffffff) 40 #define SIMM_48BIT_MIN (0x800000000000L) 41 #define SIMM_48BIT_MAX (0x7fffffff0000L) 42 #define IMM16(imm) ((imm) & 0xffff) 43 44 #define UIMM_16BIT_MAX (0xffff) 45 46 #define TMP_REG1 (SLJIT_NO_REGISTERS + 1) 47 #define TMP_REG2 (SLJIT_NO_REGISTERS + 2) 48 #define TMP_REG3 (SLJIT_NO_REGISTERS + 3) 49 #define ADDR_TMP (SLJIT_NO_REGISTERS + 4) 50 #define PIC_ADDR_REG TMP_REG2 51 52 static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 5] = { 53 63, 0, 1, 2, 3, 4, 30, 31, 32, 33, 34, 54, 5, 16, 6, 7 54 }; 55 56 #define SLJIT_LOCALS_REG_mapped 54 57 #define TMP_REG1_mapped 5 58 #define TMP_REG2_mapped 16 59 #define TMP_REG3_mapped 6 60 #define ADDR_TMP_mapped 7 61 #define SLJIT_SAVED_REG1_mapped 30 62 #define SLJIT_SAVED_REG2_mapped 31 63 #define SLJIT_SAVED_REG3_mapped 32 64 #define SLJIT_SAVED_EREG1_mapped 33 65 #define SLJIT_SAVED_EREG2_mapped 34 66 67 /* Flags are keept in volatile registers. */ 68 #define EQUAL_FLAG 8 69 /* And carry flag as well. */ 70 #define ULESS_FLAG 9 71 #define UGREATER_FLAG 10 72 #define LESS_FLAG 11 73 #define GREATER_FLAG 12 74 #define OVERFLOW_FLAG 13 75 76 #define ZERO 63 77 #define RA 55 78 #define TMP_EREG1 14 79 #define TMP_EREG2 15 80 81 #define LOAD_DATA 0x01 82 #define WORD_DATA 0x00 83 #define BYTE_DATA 0x02 84 #define HALF_DATA 0x04 85 #define INT_DATA 0x06 86 #define SIGNED_DATA 0x08 87 #define DOUBLE_DATA 0x10 88 89 /* Separates integer and floating point registers */ 90 #define GPR_REG 0xf 91 92 #define MEM_MASK 0x1f 93 94 #define WRITE_BACK 0x00020 95 #define ARG_TEST 0x00040 96 #define ALT_KEEP_CACHE 0x00080 97 #define CUMULATIVE_OP 0x00100 98 #define LOGICAL_OP 0x00200 99 #define IMM_OP 0x00400 100 #define SRC2_IMM 0x00800 101 102 #define UNUSED_DEST 0x01000 103 #define REG_DEST 0x02000 104 #define REG1_SOURCE 0x04000 105 #define REG2_SOURCE 0x08000 106 #define SLOW_SRC1 0x10000 107 #define SLOW_SRC2 0x20000 108 #define SLOW_DEST 0x40000 109 110 /* Only these flags are set. UNUSED_DEST is not set when no flags should be set. 111 */ 112 #define CHECK_FLAGS(list) (!(flags & UNUSED_DEST) || (op & GET_FLAGS(~(list)))) 113 114 SLJIT_API_FUNC_ATTRIBUTE SLJIT_CONST char *sljit_get_platform_name(void) 115 { 116 return "TileGX" SLJIT_CPUINFO; 117 } 118 119 /* Length of an instruction word */ 120 typedef sljit_uw sljit_ins; 121 122 struct jit_instr { 123 const struct tilegx_opcode* opcode; 124 tilegx_pipeline pipe; 125 unsigned long input_registers; 126 unsigned long output_registers; 127 int operand_value[4]; 128 int line; 129 }; 130 131 /* Opcode Helper Macros */ 132 #define TILEGX_X_MODE 0 133 134 #define X_MODE create_Mode(TILEGX_X_MODE) 135 136 #define FNOP_X0 \ 137 create_Opcode_X0(RRR_0_OPCODE_X0) | \ 138 create_RRROpcodeExtension_X0(UNARY_RRR_0_OPCODE_X0) | \ 139 create_UnaryOpcodeExtension_X0(FNOP_UNARY_OPCODE_X0) 140 141 #define FNOP_X1 \ 142 create_Opcode_X1(RRR_0_OPCODE_X1) | \ 143 create_RRROpcodeExtension_X1(UNARY_RRR_0_OPCODE_X1) | \ 144 create_UnaryOpcodeExtension_X1(FNOP_UNARY_OPCODE_X1) 145 146 #define NOP \ 147 create_Mode(TILEGX_X_MODE) | FNOP_X0 | FNOP_X1 148 149 #define ANOP_X0 \ 150 create_Opcode_X0(RRR_0_OPCODE_X0) | \ 151 create_RRROpcodeExtension_X0(UNARY_RRR_0_OPCODE_X0) | \ 152 create_UnaryOpcodeExtension_X0(NOP_UNARY_OPCODE_X0) 153 154 #define BPT create_Mode(TILEGX_X_MODE) | create_Opcode_X1(RRR_0_OPCODE_X1) | \ 155 create_RRROpcodeExtension_X1(UNARY_RRR_0_OPCODE_X1) | \ 156 create_UnaryOpcodeExtension_X1(ILL_UNARY_OPCODE_X1) | \ 157 create_Dest_X1(0x1C) | create_SrcA_X1(0x25) | ANOP_X0 158 159 #define ADD_X1 \ 160 create_Mode(TILEGX_X_MODE) | create_Opcode_X1(RRR_0_OPCODE_X1) | \ 161 create_RRROpcodeExtension_X1(ADD_RRR_0_OPCODE_X1) | FNOP_X0 162 163 #define ADDI_X1 \ 164 create_Mode(TILEGX_X_MODE) | create_Opcode_X1(IMM8_OPCODE_X1) | \ 165 create_Imm8OpcodeExtension_X1(ADDI_IMM8_OPCODE_X1) | FNOP_X0 166 167 #define SUB_X1 \ 168 create_Mode(TILEGX_X_MODE) | create_Opcode_X1(RRR_0_OPCODE_X1) | \ 169 create_RRROpcodeExtension_X1(SUB_RRR_0_OPCODE_X1) | FNOP_X0 170 171 #define NOR_X1 \ 172 create_Mode(TILEGX_X_MODE) | create_Opcode_X1(RRR_0_OPCODE_X1) | \ 173 create_RRROpcodeExtension_X1(NOR_RRR_0_OPCODE_X1) | FNOP_X0 174 175 #define OR_X1 \ 176 create_Mode(TILEGX_X_MODE) | create_Opcode_X1(RRR_0_OPCODE_X1) | \ 177 create_RRROpcodeExtension_X1(OR_RRR_0_OPCODE_X1) | FNOP_X0 178 179 #define AND_X1 \ 180 create_Mode(TILEGX_X_MODE) | create_Opcode_X1(RRR_0_OPCODE_X1) | \ 181 create_RRROpcodeExtension_X1(AND_RRR_0_OPCODE_X1) | FNOP_X0 182 183 #define XOR_X1 \ 184 create_Mode(TILEGX_X_MODE) | create_Opcode_X1(RRR_0_OPCODE_X1) | \ 185 create_RRROpcodeExtension_X1(XOR_RRR_0_OPCODE_X1) | FNOP_X0 186 187 #define CMOVNEZ_X0 \ 188 create_Mode(TILEGX_X_MODE) | create_Opcode_X0(RRR_0_OPCODE_X0) | \ 189 create_RRROpcodeExtension_X0(CMOVNEZ_RRR_0_OPCODE_X0) | FNOP_X1 190 191 #define CMOVEQZ_X0 \ 192 create_Mode(TILEGX_X_MODE) | create_Opcode_X0(RRR_0_OPCODE_X0) | \ 193 create_RRROpcodeExtension_X0(CMOVEQZ_RRR_0_OPCODE_X0) | FNOP_X1 194 195 #define ADDLI_X1 \ 196 create_Mode(TILEGX_X_MODE) | create_Opcode_X1(ADDLI_OPCODE_X1) | FNOP_X0 197 198 #define V4INT_L_X1 \ 199 create_Mode(TILEGX_X_MODE) | create_Opcode_X1(RRR_0_OPCODE_X1) | \ 200 create_RRROpcodeExtension_X1(V4INT_L_RRR_0_OPCODE_X1) | FNOP_X0 201 202 #define BFEXTU_X0 \ 203 create_Mode(TILEGX_X_MODE) | create_Opcode_X0(BF_OPCODE_X0) | \ 204 create_BFOpcodeExtension_X0(BFEXTU_BF_OPCODE_X0) | FNOP_X1 205 206 #define BFEXTS_X0 \ 207 create_Mode(TILEGX_X_MODE) | create_Opcode_X0(BF_OPCODE_X0) | \ 208 create_BFOpcodeExtension_X0(BFEXTS_BF_OPCODE_X0) | FNOP_X1 209 210 #define SHL16INSLI_X1 \ 211 create_Mode(TILEGX_X_MODE) | create_Opcode_X1(SHL16INSLI_OPCODE_X1) | FNOP_X0 212 213 #define ST_X1 \ 214 create_Mode(TILEGX_X_MODE) | create_Opcode_X1(RRR_0_OPCODE_X1) | \ 215 create_RRROpcodeExtension_X1(ST_RRR_0_OPCODE_X1) | create_Dest_X1(0x0) | FNOP_X0 216 217 #define LD_X1 \ 218 create_Mode(TILEGX_X_MODE) | create_Opcode_X1(RRR_0_OPCODE_X1) | \ 219 create_RRROpcodeExtension_X1(UNARY_RRR_0_OPCODE_X1) | \ 220 create_UnaryOpcodeExtension_X1(LD_UNARY_OPCODE_X1) | FNOP_X0 221 222 #define JR_X1 \ 223 create_Mode(TILEGX_X_MODE) | create_Opcode_X1(RRR_0_OPCODE_X1) | \ 224 create_RRROpcodeExtension_X1(UNARY_RRR_0_OPCODE_X1) | \ 225 create_UnaryOpcodeExtension_X1(JR_UNARY_OPCODE_X1) | FNOP_X0 226 227 #define JALR_X1 \ 228 create_Mode(TILEGX_X_MODE) | create_Opcode_X1(RRR_0_OPCODE_X1) | \ 229 create_RRROpcodeExtension_X1(UNARY_RRR_0_OPCODE_X1) | \ 230 create_UnaryOpcodeExtension_X1(JALR_UNARY_OPCODE_X1) | FNOP_X0 231 232 #define CLZ_X0 \ 233 create_Mode(TILEGX_X_MODE) | create_Opcode_X0(RRR_0_OPCODE_X0) | \ 234 create_RRROpcodeExtension_X0(UNARY_RRR_0_OPCODE_X0) | \ 235 create_UnaryOpcodeExtension_X0(CNTLZ_UNARY_OPCODE_X0) | FNOP_X1 236 237 #define CMPLTUI_X1 \ 238 create_Mode(TILEGX_X_MODE) | create_Opcode_X1(IMM8_OPCODE_X1) | \ 239 create_Imm8OpcodeExtension_X1(CMPLTUI_IMM8_OPCODE_X1) | FNOP_X0 240 241 #define CMPLTU_X1 \ 242 create_Mode(TILEGX_X_MODE) | create_Opcode_X1(RRR_0_OPCODE_X1) | \ 243 create_RRROpcodeExtension_X1(CMPLTU_RRR_0_OPCODE_X1) | FNOP_X0 244 245 #define CMPLTS_X1 \ 246 create_Mode(TILEGX_X_MODE) | create_Opcode_X1(RRR_0_OPCODE_X1) | \ 247 create_RRROpcodeExtension_X1(CMPLTS_RRR_0_OPCODE_X1) | FNOP_X0 248 249 #define XORI_X1 \ 250 create_Mode(TILEGX_X_MODE) | create_Opcode_X1(IMM8_OPCODE_X1) | \ 251 create_Imm8OpcodeExtension_X1(XORI_IMM8_OPCODE_X1) | FNOP_X0 252 253 #define ORI_X1 \ 254 create_Mode(TILEGX_X_MODE) | create_Opcode_X1(IMM8_OPCODE_X1) | \ 255 create_Imm8OpcodeExtension_X1(ORI_IMM8_OPCODE_X1) | FNOP_X0 256 257 #define ANDI_X1 \ 258 create_Mode(TILEGX_X_MODE) | create_Opcode_X1(IMM8_OPCODE_X1) | \ 259 create_Imm8OpcodeExtension_X1(ANDI_IMM8_OPCODE_X1) | FNOP_X0 260 261 #define SHLI_X1 \ 262 create_Mode(TILEGX_X_MODE) | create_Opcode_X1(SHIFT_OPCODE_X1) | \ 263 create_ShiftOpcodeExtension_X1(SHLI_SHIFT_OPCODE_X1) | FNOP_X0 264 265 #define SHL_X1 \ 266 create_Mode(TILEGX_X_MODE) | create_Opcode_X1(RRR_0_OPCODE_X1) | \ 267 create_RRROpcodeExtension_X1(SHL_RRR_0_OPCODE_X1) | FNOP_X0 268 269 #define SHRSI_X1 \ 270 create_Mode(TILEGX_X_MODE) | create_Opcode_X1(SHIFT_OPCODE_X1) | \ 271 create_ShiftOpcodeExtension_X1(SHRSI_SHIFT_OPCODE_X1) | FNOP_X0 272 273 #define SHRS_X1 \ 274 create_Mode(TILEGX_X_MODE) | create_Opcode_X1(RRR_0_OPCODE_X1) | \ 275 create_RRROpcodeExtension_X1(SHRS_RRR_0_OPCODE_X1) | FNOP_X0 276 277 #define SHRUI_X1 \ 278 create_Mode(TILEGX_X_MODE) | create_Opcode_X1(SHIFT_OPCODE_X1) | \ 279 create_ShiftOpcodeExtension_X1(SHRUI_SHIFT_OPCODE_X1) | FNOP_X0 280 281 #define SHRU_X1 \ 282 create_Mode(TILEGX_X_MODE) | create_Opcode_X1(RRR_0_OPCODE_X1) | \ 283 create_RRROpcodeExtension_X1(SHRU_RRR_0_OPCODE_X1) | FNOP_X0 284 285 #define BEQZ_X1 \ 286 create_Mode(TILEGX_X_MODE) | create_Opcode_X1(BRANCH_OPCODE_X1) | \ 287 create_BrType_X1(BEQZ_BRANCH_OPCODE_X1) | FNOP_X0 288 289 #define BNEZ_X1 \ 290 create_Mode(TILEGX_X_MODE) | create_Opcode_X1(BRANCH_OPCODE_X1) | \ 291 create_BrType_X1(BNEZ_BRANCH_OPCODE_X1) | FNOP_X0 292 293 #define J_X1 \ 294 create_Mode(TILEGX_X_MODE) | create_Opcode_X1(JUMP_OPCODE_X1) | \ 295 create_JumpOpcodeExtension_X1(J_JUMP_OPCODE_X1) | FNOP_X0 296 297 #define JAL_X1 \ 298 create_Mode(TILEGX_X_MODE) | create_Opcode_X1(JUMP_OPCODE_X1) | \ 299 create_JumpOpcodeExtension_X1(JAL_JUMP_OPCODE_X1) | FNOP_X0 300 301 #define DEST_X0(x) create_Dest_X0(x) 302 #define SRCA_X0(x) create_SrcA_X0(x) 303 #define SRCB_X0(x) create_SrcB_X0(x) 304 #define DEST_X1(x) create_Dest_X1(x) 305 #define SRCA_X1(x) create_SrcA_X1(x) 306 #define SRCB_X1(x) create_SrcB_X1(x) 307 #define IMM16_X1(x) create_Imm16_X1(x) 308 #define IMM8_X1(x) create_Imm8_X1(x) 309 #define BFSTART_X0(x) create_BFStart_X0(x) 310 #define BFEND_X0(x) create_BFEnd_X0(x) 311 #define SHIFTIMM_X1(x) create_ShAmt_X1(x) 312 #define JOFF_X1(x) create_JumpOff_X1(x) 313 #define BOFF_X1(x) create_BrOff_X1(x) 314 315 static SLJIT_CONST tilegx_mnemonic data_transfer_insts[16] = { 316 /* u w s */ TILEGX_OPC_ST /* st */, 317 /* u w l */ TILEGX_OPC_LD /* ld */, 318 /* u b s */ TILEGX_OPC_ST1 /* st1 */, 319 /* u b l */ TILEGX_OPC_LD1U /* ld1u */, 320 /* u h s */ TILEGX_OPC_ST2 /* st2 */, 321 /* u h l */ TILEGX_OPC_LD2U /* ld2u */, 322 /* u i s */ TILEGX_OPC_ST4 /* st4 */, 323 /* u i l */ TILEGX_OPC_LD4U /* ld4u */, 324 /* s w s */ TILEGX_OPC_ST /* st */, 325 /* s w l */ TILEGX_OPC_LD /* ld */, 326 /* s b s */ TILEGX_OPC_ST1 /* st1 */, 327 /* s b l */ TILEGX_OPC_LD1S /* ld1s */, 328 /* s h s */ TILEGX_OPC_ST2 /* st2 */, 329 /* s h l */ TILEGX_OPC_LD2S /* ld2s */, 330 /* s i s */ TILEGX_OPC_ST4 /* st4 */, 331 /* s i l */ TILEGX_OPC_LD4S /* ld4s */, 332 }; 333 334 #ifdef TILEGX_JIT_DEBUG 335 static sljit_si push_inst_debug(struct sljit_compiler *compiler, sljit_ins ins, int line) 336 { 337 sljit_ins *ptr = (sljit_ins *)ensure_buf(compiler, sizeof(sljit_ins)); 338 FAIL_IF(!ptr); 339 *ptr = ins; 340 compiler->size++; 341 printf("|%04d|S0|:\t\t", line); 342 print_insn_tilegx(ptr); 343 return SLJIT_SUCCESS; 344 } 345 346 static sljit_si push_inst_nodebug(struct sljit_compiler *compiler, sljit_ins ins) 347 { 348 sljit_ins *ptr = (sljit_ins *)ensure_buf(compiler, sizeof(sljit_ins)); 349 FAIL_IF(!ptr); 350 *ptr = ins; 351 compiler->size++; 352 return SLJIT_SUCCESS; 353 } 354 355 #define push_inst(a, b) push_inst_debug(a, b, __LINE__) 356 #else 357 static sljit_si push_inst(struct sljit_compiler *compiler, sljit_ins ins) 358 { 359 sljit_ins *ptr = (sljit_ins *)ensure_buf(compiler, sizeof(sljit_ins)); 360 FAIL_IF(!ptr); 361 *ptr = ins; 362 compiler->size++; 363 return SLJIT_SUCCESS; 364 } 365 #endif 366 367 #define BUNDLE_FORMAT_MASK(p0, p1, p2) \ 368 ((p0) | ((p1) << 8) | ((p2) << 16)) 369 370 #define BUNDLE_FORMAT(p0, p1, p2) \ 371 { \ 372 { \ 373 (tilegx_pipeline)(p0), \ 374 (tilegx_pipeline)(p1), \ 375 (tilegx_pipeline)(p2) \ 376 }, \ 377 BUNDLE_FORMAT_MASK(1 << (p0), 1 << (p1), (1 << (p2))) \ 378 } 379 380 #define NO_PIPELINE TILEGX_NUM_PIPELINE_ENCODINGS 381 382 #define tilegx_is_x_pipeline(p) ((int)(p) <= (int)TILEGX_PIPELINE_X1) 383 384 #define PI(encoding) \ 385 push_inst(compiler, encoding) 386 387 #define PB3(opcode, dst, srca, srcb) \ 388 push_3_buffer(compiler, opcode, dst, srca, srcb, __LINE__) 389 390 #define PB2(opcode, dst, src) \ 391 push_2_buffer(compiler, opcode, dst, src, __LINE__) 392 393 #define JR(reg) \ 394 push_jr_buffer(compiler, TILEGX_OPC_JR, reg, __LINE__) 395 396 #define ADD(dst, srca, srcb) \ 397 push_3_buffer(compiler, TILEGX_OPC_ADD, dst, srca, srcb, __LINE__) 398 399 #define SUB(dst, srca, srcb) \ 400 push_3_buffer(compiler, TILEGX_OPC_SUB, dst, srca, srcb, __LINE__) 401 402 #define NOR(dst, srca, srcb) \ 403 push_3_buffer(compiler, TILEGX_OPC_NOR, dst, srca, srcb, __LINE__) 404 405 #define OR(dst, srca, srcb) \ 406 push_3_buffer(compiler, TILEGX_OPC_OR, dst, srca, srcb, __LINE__) 407 408 #define XOR(dst, srca, srcb) \ 409 push_3_buffer(compiler, TILEGX_OPC_XOR, dst, srca, srcb, __LINE__) 410 411 #define AND(dst, srca, srcb) \ 412 push_3_buffer(compiler, TILEGX_OPC_AND, dst, srca, srcb, __LINE__) 413 414 #define CLZ(dst, src) \ 415 push_2_buffer(compiler, TILEGX_OPC_CLZ, dst, src, __LINE__) 416 417 #define SHLI(dst, srca, srcb) \ 418 push_3_buffer(compiler, TILEGX_OPC_SHLI, dst, srca, srcb, __LINE__) 419 420 #define SHRUI(dst, srca, imm) \ 421 push_3_buffer(compiler, TILEGX_OPC_SHRUI, dst, srca, imm, __LINE__) 422 423 #define XORI(dst, srca, imm) \ 424 push_3_buffer(compiler, TILEGX_OPC_XORI, dst, srca, imm, __LINE__) 425 426 #define ORI(dst, srca, imm) \ 427 push_3_buffer(compiler, TILEGX_OPC_ORI, dst, srca, imm, __LINE__) 428 429 #define CMPLTU(dst, srca, srcb) \ 430 push_3_buffer(compiler, TILEGX_OPC_CMPLTU, dst, srca, srcb, __LINE__) 431 432 #define CMPLTS(dst, srca, srcb) \ 433 push_3_buffer(compiler, TILEGX_OPC_CMPLTS, dst, srca, srcb, __LINE__) 434 435 #define CMPLTUI(dst, srca, imm) \ 436 push_3_buffer(compiler, TILEGX_OPC_CMPLTUI, dst, srca, imm, __LINE__) 437 438 #define CMOVNEZ(dst, srca, srcb) \ 439 push_3_buffer(compiler, TILEGX_OPC_CMOVNEZ, dst, srca, srcb, __LINE__) 440 441 #define CMOVEQZ(dst, srca, srcb) \ 442 push_3_buffer(compiler, TILEGX_OPC_CMOVEQZ, dst, srca, srcb, __LINE__) 443 444 #define ADDLI(dst, srca, srcb) \ 445 push_3_buffer(compiler, TILEGX_OPC_ADDLI, dst, srca, srcb, __LINE__) 446 447 #define SHL16INSLI(dst, srca, srcb) \ 448 push_3_buffer(compiler, TILEGX_OPC_SHL16INSLI, dst, srca, srcb, __LINE__) 449 450 #define LD_ADD(dst, addr, adjust) \ 451 push_3_buffer(compiler, TILEGX_OPC_LD_ADD, dst, addr, adjust, __LINE__) 452 453 #define ST_ADD(src, addr, adjust) \ 454 push_3_buffer(compiler, TILEGX_OPC_ST_ADD, src, addr, adjust, __LINE__) 455 456 #define LD(dst, addr) \ 457 push_2_buffer(compiler, TILEGX_OPC_LD, dst, addr, __LINE__) 458 459 #define BFEXTU(dst, src, start, end) \ 460 push_4_buffer(compiler, TILEGX_OPC_BFEXTU, dst, src, start, end, __LINE__) 461 462 #define BFEXTS(dst, src, start, end) \ 463 push_4_buffer(compiler, TILEGX_OPC_BFEXTS, dst, src, start, end, __LINE__) 464 465 #define ADD_SOLO(dest, srca, srcb) \ 466 push_inst(compiler, ADD_X1 | DEST_X1(dest) | SRCA_X1(srca) | SRCB_X1(srcb)) 467 468 #define ADDI_SOLO(dest, srca, imm) \ 469 push_inst(compiler, ADDI_X1 | DEST_X1(dest) | SRCA_X1(srca) | IMM8_X1(imm)) 470 471 #define ADDLI_SOLO(dest, srca, imm) \ 472 push_inst(compiler, ADDLI_X1 | DEST_X1(dest) | SRCA_X1(srca) | IMM16_X1(imm)) 473 474 #define SHL16INSLI_SOLO(dest, srca, imm) \ 475 push_inst(compiler, SHL16INSLI_X1 | DEST_X1(dest) | SRCA_X1(srca) | IMM16_X1(imm)) 476 477 #define JALR_SOLO(reg) \ 478 push_inst(compiler, JALR_X1 | SRCA_X1(reg)) 479 480 #define JR_SOLO(reg) \ 481 push_inst(compiler, JR_X1 | SRCA_X1(reg)) 482 483 struct Format { 484 /* Mapping of bundle issue slot to assigned pipe. */ 485 tilegx_pipeline pipe[TILEGX_MAX_INSTRUCTIONS_PER_BUNDLE]; 486 487 /* Mask of pipes used by this bundle. */ 488 unsigned int pipe_mask; 489 }; 490 491 const struct Format formats[] = 492 { 493 /* In Y format we must always have something in Y2, since it has 494 * no fnop, so this conveys that Y2 must always be used. */ 495 BUNDLE_FORMAT(TILEGX_PIPELINE_Y0, TILEGX_PIPELINE_Y2, NO_PIPELINE), 496 BUNDLE_FORMAT(TILEGX_PIPELINE_Y1, TILEGX_PIPELINE_Y2, NO_PIPELINE), 497 BUNDLE_FORMAT(TILEGX_PIPELINE_Y2, TILEGX_PIPELINE_Y0, NO_PIPELINE), 498 BUNDLE_FORMAT(TILEGX_PIPELINE_Y2, TILEGX_PIPELINE_Y1, NO_PIPELINE), 499 500 /* Y format has three instructions. */ 501 BUNDLE_FORMAT(TILEGX_PIPELINE_Y0, TILEGX_PIPELINE_Y1, TILEGX_PIPELINE_Y2), 502 BUNDLE_FORMAT(TILEGX_PIPELINE_Y0, TILEGX_PIPELINE_Y2, TILEGX_PIPELINE_Y1), 503 BUNDLE_FORMAT(TILEGX_PIPELINE_Y1, TILEGX_PIPELINE_Y0, TILEGX_PIPELINE_Y2), 504 BUNDLE_FORMAT(TILEGX_PIPELINE_Y1, TILEGX_PIPELINE_Y2, TILEGX_PIPELINE_Y0), 505 BUNDLE_FORMAT(TILEGX_PIPELINE_Y2, TILEGX_PIPELINE_Y0, TILEGX_PIPELINE_Y1), 506 BUNDLE_FORMAT(TILEGX_PIPELINE_Y2, TILEGX_PIPELINE_Y1, TILEGX_PIPELINE_Y0), 507 508 /* X format has only two instructions. */ 509 BUNDLE_FORMAT(TILEGX_PIPELINE_X0, TILEGX_PIPELINE_X1, NO_PIPELINE), 510 BUNDLE_FORMAT(TILEGX_PIPELINE_X1, TILEGX_PIPELINE_X0, NO_PIPELINE) 511 }; 512 513 514 struct jit_instr inst_buf[TILEGX_MAX_INSTRUCTIONS_PER_BUNDLE]; 515 unsigned long inst_buf_index; 516 517 tilegx_pipeline get_any_valid_pipe(const struct tilegx_opcode* opcode) 518 { 519 /* FIXME: tile: we could pregenerate this. */ 520 int pipe; 521 for (pipe = 0; ((opcode->pipes & (1 << pipe)) == 0 && pipe < TILEGX_NUM_PIPELINE_ENCODINGS); pipe++) 522 ; 523 return (tilegx_pipeline)(pipe); 524 } 525 526 void insert_nop(tilegx_mnemonic opc, int line) 527 { 528 const struct tilegx_opcode* opcode = NULL; 529 530 memmove(&inst_buf[1], &inst_buf[0], inst_buf_index * sizeof inst_buf[0]); 531 532 opcode = &tilegx_opcodes[opc]; 533 inst_buf[0].opcode = opcode; 534 inst_buf[0].pipe = get_any_valid_pipe(opcode); 535 inst_buf[0].input_registers = 0; 536 inst_buf[0].output_registers = 0; 537 inst_buf[0].line = line; 538 ++inst_buf_index; 539 } 540 541 const struct Format* compute_format() 542 { 543 unsigned int compatible_pipes = BUNDLE_FORMAT_MASK( 544 inst_buf[0].opcode->pipes, 545 inst_buf[1].opcode->pipes, 546 (inst_buf_index == 3 ? inst_buf[2].opcode->pipes : (1 << NO_PIPELINE))); 547 548 const struct Format* match = NULL; 549 const struct Format *b = NULL; 550 unsigned int i = 0; 551 for (i; i < sizeof formats / sizeof formats[0]; i++) { 552 b = &formats[i]; 553 if ((b->pipe_mask & compatible_pipes) == b->pipe_mask) { 554 match = b; 555 break; 556 } 557 } 558 559 return match; 560 } 561 562 sljit_si assign_pipes() 563 { 564 unsigned long output_registers = 0; 565 unsigned int i = 0; 566 567 if (inst_buf_index == 1) { 568 tilegx_mnemonic opc = inst_buf[0].opcode->can_bundle 569 ? TILEGX_OPC_FNOP : TILEGX_OPC_NOP; 570 insert_nop(opc, __LINE__); 571 } 572 573 const struct Format* match = compute_format(); 574 575 if (match == NULL) 576 return -1; 577 578 for (i = 0; i < inst_buf_index; i++) { 579 580 if ((i > 0) && ((inst_buf[i].input_registers & output_registers) != 0)) 581 return -1; 582 583 if ((i > 0) && ((inst_buf[i].output_registers & output_registers) != 0)) 584 return -1; 585 586 /* Don't include Rzero in the match set, to avoid triggering 587 needlessly on 'prefetch' instrs. */ 588 589 output_registers |= inst_buf[i].output_registers & 0xFFFFFFFFFFFFFFL; 590 591 inst_buf[i].pipe = match->pipe[i]; 592 } 593 594 /* If only 2 instrs, and in Y-mode, insert a nop. */ 595 if (inst_buf_index == 2 && !tilegx_is_x_pipeline(match->pipe[0])) { 596 insert_nop(TILEGX_OPC_FNOP, __LINE__); 597 598 /* Select the yet unassigned pipe. */ 599 tilegx_pipeline pipe = (tilegx_pipeline)(((TILEGX_PIPELINE_Y0 600 + TILEGX_PIPELINE_Y1 + TILEGX_PIPELINE_Y2) 601 - (inst_buf[1].pipe + inst_buf[2].pipe))); 602 603 inst_buf[0].pipe = pipe; 604 } 605 606 return 0; 607 } 608 609 tilegx_bundle_bits get_bundle_bit(struct jit_instr *inst) 610 { 611 int i, val; 612 const struct tilegx_opcode* opcode = inst->opcode; 613 tilegx_bundle_bits bits = opcode->fixed_bit_values[inst->pipe]; 614 615 const struct tilegx_operand* operand = NULL; 616 for (i = 0; i < opcode->num_operands; i++) { 617 operand = &tilegx_operands[opcode->operands[inst->pipe][i]]; 618 val = inst->operand_value[i]; 619 620 bits |= operand->insert(val); 621 } 622 623 return bits; 624 } 625 626 static sljit_si update_buffer(struct sljit_compiler *compiler) 627 { 628 int count; 629 int i; 630 int orig_index = inst_buf_index; 631 struct jit_instr inst0 = inst_buf[0]; 632 struct jit_instr inst1 = inst_buf[1]; 633 struct jit_instr inst2 = inst_buf[2]; 634 tilegx_bundle_bits bits = 0; 635 636 /* If the bundle is valid as is, perform the encoding and return 1. */ 637 if (assign_pipes() == 0) { 638 for (i = 0; i < inst_buf_index; i++) { 639 bits |= get_bundle_bit(inst_buf + i); 640 #ifdef TILEGX_JIT_DEBUG 641 printf("|%04d", inst_buf[i].line); 642 #endif 643 } 644 #ifdef TILEGX_JIT_DEBUG 645 if (inst_buf_index == 3) 646 printf("|M0|:\t"); 647 else 648 printf("|M0|:\t\t"); 649 print_insn_tilegx(&bits); 650 #endif 651 652 inst_buf_index = 0; 653 654 #ifdef TILEGX_JIT_DEBUG 655 return push_inst_nodebug(compiler, bits); 656 #else 657 return push_inst(compiler, bits); 658 #endif 659 } 660 661 /* If the bundle is invalid, split it in two. First encode the first two 662 (or possibly 1) instructions, and then the last, separately. Note that 663 assign_pipes may have re-ordered the instrs (by inserting no-ops in 664 lower slots) so we need to reset them. */ 665 666 inst_buf_index = orig_index - 1; 667 inst_buf[0] = inst0; 668 inst_buf[1] = inst1; 669 inst_buf[2] = inst2; 670 if (assign_pipes() == 0) { 671 for (i = 0; i < inst_buf_index; i++) { 672 bits |= get_bundle_bit(inst_buf + i); 673 #ifdef TILEGX_JIT_DEBUG 674 printf("|%04d", inst_buf[i].line); 675 #endif 676 } 677 678 #ifdef TILEGX_JIT_DEBUG 679 if (inst_buf_index == 3) 680 printf("|M1|:\t"); 681 else 682 printf("|M1|:\t\t"); 683 print_insn_tilegx(&bits); 684 #endif 685 686 if ((orig_index - 1) == 2) { 687 inst_buf[0] = inst2; 688 inst_buf_index = 1; 689 } else if ((orig_index - 1) == 1) { 690 inst_buf[0] = inst1; 691 inst_buf_index = 1; 692 } else 693 SLJIT_ASSERT_STOP(); 694 695 #ifdef TILEGX_JIT_DEBUG 696 return push_inst_nodebug(compiler, bits); 697 #else 698 return push_inst(compiler, bits); 699 #endif 700 } else { 701 /* We had 3 instrs of which the first 2 can't live in the same bundle. 702 Split those two. Note that we don't try to then combine the second 703 and third instr into a single bundle. First instruction: */ 704 inst_buf_index = 1; 705 inst_buf[0] = inst0; 706 inst_buf[1] = inst1; 707 inst_buf[2] = inst2; 708 if (assign_pipes() == 0) { 709 for (i = 0; i < inst_buf_index; i++) { 710 bits |= get_bundle_bit(inst_buf + i); 711 #ifdef TILEGX_JIT_DEBUG 712 printf("|%04d", inst_buf[i].line); 713 #endif 714 } 715 716 #ifdef TILEGX_JIT_DEBUG 717 if (inst_buf_index == 3) 718 printf("|M2|:\t"); 719 else 720 printf("|M2|:\t\t"); 721 print_insn_tilegx(&bits); 722 #endif 723 724 inst_buf[0] = inst1; 725 inst_buf[1] = inst2; 726 inst_buf_index = orig_index - 1; 727 #ifdef TILEGX_JIT_DEBUG 728 return push_inst_nodebug(compiler, bits); 729 #else 730 return push_inst(compiler, bits); 731 #endif 732 } else 733 SLJIT_ASSERT_STOP(); 734 } 735 736 SLJIT_ASSERT_STOP(); 737 } 738 739 static sljit_si flush_buffer(struct sljit_compiler *compiler) 740 { 741 while (inst_buf_index != 0) 742 update_buffer(compiler); 743 } 744 745 static sljit_si push_4_buffer(struct sljit_compiler *compiler, tilegx_mnemonic opc, int op0, int op1, int op2, int op3, int line) 746 { 747 if (inst_buf_index == TILEGX_MAX_INSTRUCTIONS_PER_BUNDLE) 748 FAIL_IF(update_buffer(compiler)); 749 750 const struct tilegx_opcode* opcode = &tilegx_opcodes[opc]; 751 inst_buf[inst_buf_index].opcode = opcode; 752 inst_buf[inst_buf_index].pipe = get_any_valid_pipe(opcode); 753 inst_buf[inst_buf_index].operand_value[0] = op0; 754 inst_buf[inst_buf_index].operand_value[1] = op1; 755 inst_buf[inst_buf_index].operand_value[2] = op2; 756 inst_buf[inst_buf_index].operand_value[3] = op3; 757 inst_buf[inst_buf_index].input_registers = 1L << op1; 758 inst_buf[inst_buf_index].output_registers = 1L << op0; 759 inst_buf[inst_buf_index].line = line; 760 inst_buf_index++; 761 762 return SLJIT_SUCCESS; 763 } 764 765 static sljit_si push_3_buffer(struct sljit_compiler *compiler, tilegx_mnemonic opc, int op0, int op1, int op2, int line) 766 { 767 if (inst_buf_index == TILEGX_MAX_INSTRUCTIONS_PER_BUNDLE) 768 FAIL_IF(update_buffer(compiler)); 769 770 const struct tilegx_opcode* opcode = &tilegx_opcodes[opc]; 771 inst_buf[inst_buf_index].opcode = opcode; 772 inst_buf[inst_buf_index].pipe = get_any_valid_pipe(opcode); 773 inst_buf[inst_buf_index].operand_value[0] = op0; 774 inst_buf[inst_buf_index].operand_value[1] = op1; 775 inst_buf[inst_buf_index].operand_value[2] = op2; 776 inst_buf[inst_buf_index].line = line; 777 778 switch (opc) { 779 case TILEGX_OPC_ST_ADD: 780 inst_buf[inst_buf_index].input_registers = (1L << op0) | (1L << op1); 781 inst_buf[inst_buf_index].output_registers = 1L << op0; 782 break; 783 case TILEGX_OPC_LD_ADD: 784 inst_buf[inst_buf_index].input_registers = 1L << op1; 785 inst_buf[inst_buf_index].output_registers = (1L << op0) | (1L << op1); 786 break; 787 case TILEGX_OPC_ADD: 788 case TILEGX_OPC_AND: 789 case TILEGX_OPC_SUB: 790 case TILEGX_OPC_OR: 791 case TILEGX_OPC_XOR: 792 case TILEGX_OPC_NOR: 793 case TILEGX_OPC_SHL: 794 case TILEGX_OPC_SHRU: 795 case TILEGX_OPC_SHRS: 796 case TILEGX_OPC_CMPLTU: 797 case TILEGX_OPC_CMPLTS: 798 case TILEGX_OPC_CMOVEQZ: 799 case TILEGX_OPC_CMOVNEZ: 800 inst_buf[inst_buf_index].input_registers = (1L << op1) | (1L << op2); 801 inst_buf[inst_buf_index].output_registers = 1L << op0; 802 break; 803 case TILEGX_OPC_ADDLI: 804 case TILEGX_OPC_XORI: 805 case TILEGX_OPC_ORI: 806 case TILEGX_OPC_SHLI: 807 case TILEGX_OPC_SHRUI: 808 case TILEGX_OPC_SHRSI: 809 case TILEGX_OPC_SHL16INSLI: 810 case TILEGX_OPC_CMPLTUI: 811 case TILEGX_OPC_CMPLTSI: 812 inst_buf[inst_buf_index].input_registers = 1L << op1; 813 inst_buf[inst_buf_index].output_registers = 1L << op0; 814 break; 815 default: 816 printf("unrecoginzed opc: %s\n", opcode->name); 817 SLJIT_ASSERT_STOP(); 818 } 819 820 inst_buf_index++; 821 822 return SLJIT_SUCCESS; 823 } 824 825 static sljit_si push_2_buffer(struct sljit_compiler *compiler, tilegx_mnemonic opc, int op0, int op1, int line) 826 { 827 if (inst_buf_index == TILEGX_MAX_INSTRUCTIONS_PER_BUNDLE) 828 FAIL_IF(update_buffer(compiler)); 829 830 const struct tilegx_opcode* opcode = &tilegx_opcodes[opc]; 831 inst_buf[inst_buf_index].opcode = opcode; 832 inst_buf[inst_buf_index].pipe = get_any_valid_pipe(opcode); 833 inst_buf[inst_buf_index].operand_value[0] = op0; 834 inst_buf[inst_buf_index].operand_value[1] = op1; 835 inst_buf[inst_buf_index].line = line; 836 837 switch (opc) { 838 case TILEGX_OPC_BEQZ: 839 case TILEGX_OPC_BNEZ: 840 inst_buf[inst_buf_index].input_registers = 1L << op0; 841 break; 842 case TILEGX_OPC_ST: 843 case TILEGX_OPC_ST1: 844 case TILEGX_OPC_ST2: 845 case TILEGX_OPC_ST4: 846 inst_buf[inst_buf_index].input_registers = (1L << op0) | (1L << op1); 847 inst_buf[inst_buf_index].output_registers = 0; 848 break; 849 case TILEGX_OPC_CLZ: 850 case TILEGX_OPC_LD: 851 case TILEGX_OPC_LD1U: 852 case TILEGX_OPC_LD1S: 853 case TILEGX_OPC_LD2U: 854 case TILEGX_OPC_LD2S: 855 case TILEGX_OPC_LD4U: 856 case TILEGX_OPC_LD4S: 857 inst_buf[inst_buf_index].input_registers = 1L << op1; 858 inst_buf[inst_buf_index].output_registers = 1L << op0; 859 break; 860 default: 861 printf("unrecoginzed opc: %s\n", opcode->name); 862 SLJIT_ASSERT_STOP(); 863 } 864 865 inst_buf_index++; 866 867 return SLJIT_SUCCESS; 868 } 869 870 static sljit_si push_0_buffer(struct sljit_compiler *compiler, tilegx_mnemonic opc, int line) 871 { 872 if (inst_buf_index == TILEGX_MAX_INSTRUCTIONS_PER_BUNDLE) 873 FAIL_IF(update_buffer(compiler)); 874 875 const struct tilegx_opcode* opcode = &tilegx_opcodes[opc]; 876 inst_buf[inst_buf_index].opcode = opcode; 877 inst_buf[inst_buf_index].pipe = get_any_valid_pipe(opcode); 878 inst_buf[inst_buf_index].input_registers = 0; 879 inst_buf[inst_buf_index].output_registers = 0; 880 inst_buf[inst_buf_index].line = line; 881 inst_buf_index++; 882 883 return SLJIT_SUCCESS; 884 } 885 886 static sljit_si push_jr_buffer(struct sljit_compiler *compiler, tilegx_mnemonic opc, int op0, int line) 887 { 888 if (inst_buf_index == TILEGX_MAX_INSTRUCTIONS_PER_BUNDLE) 889 FAIL_IF(update_buffer(compiler)); 890 891 const struct tilegx_opcode* opcode = &tilegx_opcodes[opc]; 892 inst_buf[inst_buf_index].opcode = opcode; 893 inst_buf[inst_buf_index].pipe = get_any_valid_pipe(opcode); 894 inst_buf[inst_buf_index].operand_value[0] = op0; 895 inst_buf[inst_buf_index].input_registers = 1L << op0; 896 inst_buf[inst_buf_index].output_registers = 0; 897 inst_buf[inst_buf_index].line = line; 898 inst_buf_index++; 899 900 return flush_buffer(compiler); 901 } 902 903 static SLJIT_INLINE sljit_ins * detect_jump_type(struct sljit_jump *jump, sljit_ins *code_ptr, sljit_ins *code) 904 { 905 sljit_sw diff; 906 sljit_uw target_addr; 907 sljit_ins *inst; 908 sljit_ins saved_inst; 909 910 if (jump->flags & SLJIT_REWRITABLE_JUMP) 911 return code_ptr; 912 913 if (jump->flags & JUMP_ADDR) 914 target_addr = jump->u.target; 915 else { 916 SLJIT_ASSERT(jump->flags & JUMP_LABEL); 917 target_addr = (sljit_uw)(code + jump->u.label->size); 918 } 919 920 inst = (sljit_ins *)jump->addr; 921 if (jump->flags & IS_COND) 922 inst--; 923 924 diff = ((sljit_sw) target_addr - (sljit_sw) inst) >> 3; 925 if (diff <= SIMM_17BIT_MAX && diff >= SIMM_17BIT_MIN) { 926 jump->flags |= PATCH_B; 927 928 if (!(jump->flags & IS_COND)) { 929 if (jump->flags & IS_JAL) { 930 jump->flags &= ~(PATCH_B); 931 jump->flags |= PATCH_J; 932 inst[0] = JAL_X1; 933 934 #ifdef TILEGX_JIT_DEBUG 935 printf("[runtime relocate]%04d:\t", __LINE__); 936 print_insn_tilegx(inst); 937 #endif 938 } else { 939 inst[0] = BEQZ_X1 | SRCA_X1(ZERO); 940 941 #ifdef TILEGX_JIT_DEBUG 942 printf("[runtime relocate]%04d:\t", __LINE__); 943 print_insn_tilegx(inst); 944 #endif 945 } 946 947 return inst; 948 } 949 950 inst[0] = inst[0] ^ (0x7L << 55); 951 952 #ifdef TILEGX_JIT_DEBUG 953 printf("[runtime relocate]%04d:\t", __LINE__); 954 print_insn_tilegx(inst); 955 #endif 956 jump->addr -= sizeof(sljit_ins); 957 return inst; 958 } 959 960 if (jump->flags & IS_COND) { 961 if ((target_addr & ~0x3FFFFFFFL) == ((jump->addr + sizeof(sljit_ins)) & ~0x3FFFFFFFL)) { 962 jump->flags |= PATCH_J; 963 inst[0] = (inst[0] & ~(BOFF_X1(-1))) | BOFF_X1(2); 964 inst[1] = J_X1; 965 return inst + 1; 966 } 967 968 return code_ptr; 969 } 970 971 if ((target_addr & ~0x3FFFFFFFL) == ((jump->addr + sizeof(sljit_ins)) & ~0x3FFFFFFFL)) { 972 jump->flags |= PATCH_J; 973 974 if (jump->flags & IS_JAL) { 975 inst[0] = JAL_X1; 976 977 #ifdef TILEGX_JIT_DEBUG 978 printf("[runtime relocate]%04d:\t", __LINE__); 979 print_insn_tilegx(inst); 980 #endif 981 982 } else { 983 inst[0] = J_X1; 984 985 #ifdef TILEGX_JIT_DEBUG 986 printf("[runtime relocate]%04d:\t", __LINE__); 987 print_insn_tilegx(inst); 988 #endif 989 } 990 991 return inst; 992 } 993 994 return code_ptr; 995 } 996 997 SLJIT_API_FUNC_ATTRIBUTE void * sljit_generate_code(struct sljit_compiler *compiler) 998 { 999 struct sljit_memory_fragment *buf; 1000 sljit_ins *code; 1001 sljit_ins *code_ptr; 1002 sljit_ins *buf_ptr; 1003 sljit_ins *buf_end; 1004 sljit_uw word_count; 1005 sljit_uw addr; 1006 1007 struct sljit_label *label; 1008 struct sljit_jump *jump; 1009 struct sljit_const *const_; 1010 1011 CHECK_ERROR_PTR(); 1012 check_sljit_generate_code(compiler); 1013 reverse_buf(compiler); 1014 1015 code = (sljit_ins *)SLJIT_MALLOC_EXEC(compiler->size * sizeof(sljit_ins)); 1016 PTR_FAIL_WITH_EXEC_IF(code); 1017 buf = compiler->buf; 1018 1019 code_ptr = code; 1020 word_count = 0; 1021 label = compiler->labels; 1022 jump = compiler->jumps; 1023 const_ = compiler->consts; 1024 do { 1025 buf_ptr = (sljit_ins *)buf->memory; 1026 buf_end = buf_ptr + (buf->used_size >> 3); 1027 do { 1028 *code_ptr = *buf_ptr++; 1029 SLJIT_ASSERT(!label || label->size >= word_count); 1030 SLJIT_ASSERT(!jump || jump->addr >= word_count); 1031 SLJIT_ASSERT(!const_ || const_->addr >= word_count); 1032 /* These structures are ordered by their address. */ 1033 if (label && label->size == word_count) { 1034 /* Just recording the address. */ 1035 label->addr = (sljit_uw) code_ptr; 1036 label->size = code_ptr - code; 1037 label = label->next; 1038 } 1039 1040 if (jump && jump->addr == word_count) { 1041 if (jump->flags & IS_JAL) 1042 jump->addr = (sljit_uw)(code_ptr - 4); 1043 else 1044 jump->addr = (sljit_uw)(code_ptr - 3); 1045 1046 code_ptr = detect_jump_type(jump, code_ptr, code); 1047 jump = jump->next; 1048 } 1049 1050 if (const_ && const_->addr == word_count) { 1051 /* Just recording the address. */ 1052 const_->addr = (sljit_uw) code_ptr; 1053 const_ = const_->next; 1054 } 1055 1056 code_ptr++; 1057 word_count++; 1058 } while (buf_ptr < buf_end); 1059 1060 buf = buf->next; 1061 } while (buf); 1062 1063 if (label && label->size == word_count) { 1064 label->addr = (sljit_uw) code_ptr; 1065 label->size = code_ptr - code; 1066 label = label->next; 1067 } 1068 1069 SLJIT_ASSERT(!label); 1070 SLJIT_ASSERT(!jump); 1071 SLJIT_ASSERT(!const_); 1072 SLJIT_ASSERT(code_ptr - code <= (sljit_sw)compiler->size); 1073 1074 jump = compiler->jumps; 1075 while (jump) { 1076 do { 1077 addr = (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target; 1078 buf_ptr = (sljit_ins *)jump->addr; 1079 1080 if (jump->flags & PATCH_B) { 1081 addr = (sljit_sw)(addr - (jump->addr)) >> 3; 1082 SLJIT_ASSERT((sljit_sw) addr <= SIMM_17BIT_MAX && (sljit_sw) addr >= SIMM_17BIT_MIN); 1083 buf_ptr[0] = (buf_ptr[0] & ~(BOFF_X1(-1))) | BOFF_X1(addr); 1084 1085 #ifdef TILEGX_JIT_DEBUG 1086 printf("[runtime relocate]%04d:\t", __LINE__); 1087 print_insn_tilegx(buf_ptr); 1088 #endif 1089 break; 1090 } 1091 1092 if (jump->flags & PATCH_J) { 1093 SLJIT_ASSERT((addr & ~0x3FFFFFFFL) == ((jump->addr + sizeof(sljit_ins)) & ~0x3FFFFFFFL)); 1094 addr = (sljit_sw)(addr - (jump->addr)) >> 3; 1095 buf_ptr[0] = (buf_ptr[0] & ~(JOFF_X1(-1))) | JOFF_X1(addr); 1096 1097 #ifdef TILEGX_JIT_DEBUG 1098 printf("[runtime relocate]%04d:\t", __LINE__); 1099 print_insn_tilegx(buf_ptr); 1100 #endif 1101 break; 1102 } 1103 1104 SLJIT_ASSERT(!(jump->flags & IS_JAL)); 1105 1106 /* Set the fields of immediate loads. */ 1107 buf_ptr[0] = (buf_ptr[0] & ~(0xFFFFL << 43)) | (((addr >> 32) & 0xFFFFL) << 43); 1108 buf_ptr[1] = (buf_ptr[1] & ~(0xFFFFL << 43)) | (((addr >> 16) & 0xFFFFL) << 43); 1109 buf_ptr[2] = (buf_ptr[2] & ~(0xFFFFL << 43)) | ((addr & 0xFFFFL) << 43); 1110 } while (0); 1111 1112 jump = jump->next; 1113 } 1114 1115 compiler->error = SLJIT_ERR_COMPILED; 1116 compiler->executable_size = (code_ptr - code) * sizeof(sljit_ins); 1117 SLJIT_CACHE_FLUSH(code, code_ptr); 1118 return code; 1119 } 1120 1121 static sljit_si load_immediate(struct sljit_compiler *compiler, sljit_si dst_ar, sljit_sw imm) 1122 { 1123 1124 if (imm <= SIMM_16BIT_MAX && imm >= SIMM_16BIT_MIN) 1125 return ADDLI(dst_ar, ZERO, imm); 1126 1127 if (imm <= SIMM_32BIT_MAX && imm >= SIMM_32BIT_MIN) { 1128 FAIL_IF(ADDLI(dst_ar, ZERO, imm >> 16)); 1129 return SHL16INSLI(dst_ar, dst_ar, imm); 1130 } 1131 1132 if (imm <= SIMM_48BIT_MAX && imm >= SIMM_48BIT_MIN) { 1133 FAIL_IF(ADDLI(dst_ar, ZERO, imm >> 32)); 1134 FAIL_IF(SHL16INSLI(dst_ar, dst_ar, imm >> 16)); 1135 return SHL16INSLI(dst_ar, dst_ar, imm); 1136 } 1137 1138 FAIL_IF(ADDLI(dst_ar, ZERO, imm >> 48)); 1139 FAIL_IF(SHL16INSLI(dst_ar, dst_ar, imm >> 32)); 1140 FAIL_IF(SHL16INSLI(dst_ar, dst_ar, imm >> 16)); 1141 return SHL16INSLI(dst_ar, dst_ar, imm); 1142 } 1143 1144 static sljit_si emit_const(struct sljit_compiler *compiler, sljit_si dst_ar, sljit_sw imm, int flush) 1145 { 1146 /* Should *not* be optimized as load_immediate, as pcre relocation 1147 mechanism will match this fixed 4-instruction pattern. */ 1148 if (flush) { 1149 FAIL_IF(ADDLI_SOLO(dst_ar, ZERO, imm >> 32)); 1150 FAIL_IF(SHL16INSLI_SOLO(dst_ar, dst_ar, imm >> 16)); 1151 return SHL16INSLI_SOLO(dst_ar, dst_ar, imm); 1152 } 1153 1154 FAIL_IF(ADDLI(dst_ar, ZERO, imm >> 32)); 1155 FAIL_IF(SHL16INSLI(dst_ar, dst_ar, imm >> 16)); 1156 return SHL16INSLI(dst_ar, dst_ar, imm); 1157 } 1158 1159 static sljit_si emit_const_64(struct sljit_compiler *compiler, sljit_si dst_ar, sljit_sw imm, int flush) 1160 { 1161 /* Should *not* be optimized as load_immediate, as pcre relocation 1162 mechanism will match this fixed 4-instruction pattern. */ 1163 if (flush) { 1164 FAIL_IF(ADDLI_SOLO(reg_map[dst_ar], ZERO, imm >> 48)); 1165 FAIL_IF(SHL16INSLI_SOLO(reg_map[dst_ar], reg_map[dst_ar], imm >> 32)); 1166 FAIL_IF(SHL16INSLI_SOLO(reg_map[dst_ar], reg_map[dst_ar], imm >> 16)); 1167 return SHL16INSLI_SOLO(reg_map[dst_ar], reg_map[dst_ar], imm); 1168 } 1169 1170 FAIL_IF(ADDLI(reg_map[dst_ar], ZERO, imm >> 48)); 1171 FAIL_IF(SHL16INSLI(reg_map[dst_ar], reg_map[dst_ar], imm >> 32)); 1172 FAIL_IF(SHL16INSLI(reg_map[dst_ar], reg_map[dst_ar], imm >> 16)); 1173 return SHL16INSLI(reg_map[dst_ar], reg_map[dst_ar], imm); 1174 } 1175 1176 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compiler, 1177 sljit_si options, sljit_si args, sljit_si scratches, sljit_si saveds, 1178 sljit_si fscratches, sljit_si fsaveds, sljit_si local_size) 1179 { 1180 sljit_ins base; 1181 sljit_ins bundle = 0; 1182 1183 CHECK_ERROR(); 1184 check_sljit_emit_enter(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size); 1185 1186 compiler->options = options; 1187 compiler->scratches = scratches; 1188 compiler->saveds = saveds; 1189 compiler->fscratches = fscratches; 1190 compiler->fsaveds = fsaveds; 1191 #if (defined SLJIT_DEBUG && SLJIT_DEBUG) 1192 compiler->logical_local_size = local_size; 1193 #endif 1194 1195 local_size += (saveds + 1) * sizeof(sljit_sw); 1196 local_size = (local_size + 7) & ~7; 1197 compiler->local_size = local_size; 1198 1199 if (local_size <= SIMM_16BIT_MAX) { 1200 /* Frequent case. */ 1201 FAIL_IF(ADDLI(SLJIT_LOCALS_REG_mapped, SLJIT_LOCALS_REG_mapped, -local_size)); 1202 base = SLJIT_LOCALS_REG_mapped; 1203 } else { 1204 FAIL_IF(load_immediate(compiler, TMP_REG1_mapped, local_size)); 1205 FAIL_IF(ADD(TMP_REG2_mapped, SLJIT_LOCALS_REG_mapped, ZERO)); 1206 FAIL_IF(SUB(SLJIT_LOCALS_REG_mapped, SLJIT_LOCALS_REG_mapped, TMP_REG1_mapped)); 1207 base = TMP_REG2_mapped; 1208 local_size = 0; 1209 } 1210 1211 FAIL_IF(ADDLI(ADDR_TMP_mapped, base, local_size - 8)); 1212 FAIL_IF(ST_ADD(ADDR_TMP_mapped, RA, -8)); 1213 1214 if (saveds >= 1) 1215 FAIL_IF(ST_ADD(ADDR_TMP_mapped, SLJIT_SAVED_REG1_mapped, -8)); 1216 1217 if (saveds >= 2) 1218 FAIL_IF(ST_ADD(ADDR_TMP_mapped, SLJIT_SAVED_REG2_mapped, -8)); 1219 1220 if (saveds >= 3) 1221 FAIL_IF(ST_ADD(ADDR_TMP_mapped, SLJIT_SAVED_REG3_mapped, -8)); 1222 1223 if (saveds >= 4) 1224 FAIL_IF(ST_ADD(ADDR_TMP_mapped, SLJIT_SAVED_EREG1_mapped, -8)); 1225 1226 if (saveds >= 5) 1227 FAIL_IF(ST_ADD(ADDR_TMP_mapped, SLJIT_SAVED_EREG2_mapped, -8)); 1228 1229 if (args >= 1) 1230 FAIL_IF(ADD(SLJIT_SAVED_REG1_mapped, 0, ZERO)); 1231 1232 if (args >= 2) 1233 FAIL_IF(ADD(SLJIT_SAVED_REG2_mapped, 1, ZERO)); 1234 1235 if (args >= 3) 1236 FAIL_IF(ADD(SLJIT_SAVED_REG3_mapped, 2, ZERO)); 1237 1238 return SLJIT_SUCCESS; 1239 } 1240 1241 SLJIT_API_FUNC_ATTRIBUTE void sljit_set_context(struct sljit_compiler *compiler, 1242 sljit_si options, sljit_si args, sljit_si scratches, sljit_si saveds, 1243 sljit_si fscratches, sljit_si fsaveds, sljit_si local_size) 1244 { 1245 CHECK_ERROR_VOID(); 1246 check_sljit_set_context(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size); 1247 1248 compiler->options = options; 1249 compiler->scratches = scratches; 1250 compiler->saveds = saveds; 1251 compiler->fscratches = fscratches; 1252 compiler->fsaveds = fsaveds; 1253 #if (defined SLJIT_DEBUG && SLJIT_DEBUG) 1254 compiler->logical_local_size = local_size; 1255 #endif 1256 1257 local_size += (saveds + 1) * sizeof(sljit_sw); 1258 compiler->local_size = (local_size + 7) & ~7; 1259 } 1260 1261 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_return(struct sljit_compiler *compiler, sljit_si op, sljit_si src, sljit_sw srcw) 1262 { 1263 sljit_si local_size; 1264 sljit_ins base; 1265 int addr_initialized = 0; 1266 1267 CHECK_ERROR(); 1268 check_sljit_emit_return(compiler, op, src, srcw); 1269 1270 FAIL_IF(emit_mov_before_return(compiler, op, src, srcw)); 1271 1272 local_size = compiler->local_size; 1273 if (local_size <= SIMM_16BIT_MAX) 1274 base = SLJIT_LOCALS_REG_mapped; 1275 else { 1276 FAIL_IF(load_immediate(compiler, TMP_REG1_mapped, local_size)); 1277 FAIL_IF(ADD(TMP_REG1_mapped, SLJIT_LOCALS_REG_mapped, TMP_REG1_mapped)); 1278 base = TMP_REG1_mapped; 1279 local_size = 0; 1280 } 1281 1282 FAIL_IF(ADDLI(ADDR_TMP_mapped, base, local_size - 8)); 1283 FAIL_IF(LD(RA, ADDR_TMP_mapped)); 1284 1285 if (compiler->saveds >= 5) { 1286 FAIL_IF(ADDLI(ADDR_TMP_mapped, base, local_size - 48)); 1287 addr_initialized = 1; 1288 1289 FAIL_IF(LD_ADD(SLJIT_SAVED_EREG2_mapped, ADDR_TMP_mapped, 8)); 1290 } 1291 1292 if (compiler->saveds >= 4) { 1293 if (addr_initialized == 0) { 1294 FAIL_IF(ADDLI(ADDR_TMP_mapped, base, local_size - 40)); 1295 addr_initialized = 1; 1296 } 1297 1298 FAIL_IF(LD_ADD(SLJIT_SAVED_EREG1_mapped, ADDR_TMP_mapped, 8)); 1299 } 1300 1301 if (compiler->saveds >= 3) { 1302 if (addr_initialized == 0) { 1303 FAIL_IF(ADDLI(ADDR_TMP_mapped, base, local_size - 32)); 1304 addr_initialized = 1; 1305 } 1306 1307 FAIL_IF(LD_ADD(SLJIT_SAVED_REG3_mapped, ADDR_TMP_mapped, 8)); 1308 } 1309 1310 if (compiler->saveds >= 2) { 1311 if (addr_initialized == 0) { 1312 FAIL_IF(ADDLI(ADDR_TMP_mapped, base, local_size - 24)); 1313 addr_initialized = 1; 1314 } 1315 1316 FAIL_IF(LD_ADD(SLJIT_SAVED_REG2_mapped, ADDR_TMP_mapped, 8)); 1317 } 1318 1319 if (compiler->saveds >= 1) { 1320 if (addr_initialized == 0) { 1321 FAIL_IF(ADDLI(ADDR_TMP_mapped, base, local_size - 16)); 1322 /* addr_initialized = 1; no need to initialize as it's the last one. */ 1323 } 1324 1325 FAIL_IF(LD_ADD(SLJIT_SAVED_REG1_mapped, ADDR_TMP_mapped, 8)); 1326 } 1327 1328 if (compiler->local_size <= SIMM_16BIT_MAX) 1329 FAIL_IF(ADDLI(SLJIT_LOCALS_REG_mapped, SLJIT_LOCALS_REG_mapped, compiler->local_size)); 1330 else 1331 FAIL_IF(ADD(SLJIT_LOCALS_REG_mapped, TMP_REG1_mapped, ZERO)); 1332 1333 return JR(RA); 1334 } 1335 1336 /* reg_ar is an absoulute register! */ 1337 1338 /* Can perform an operation using at most 1 instruction. */ 1339 static sljit_si getput_arg_fast(struct sljit_compiler *compiler, sljit_si flags, sljit_si reg_ar, sljit_si arg, sljit_sw argw) 1340 { 1341 SLJIT_ASSERT(arg & SLJIT_MEM); 1342 1343 if ((!(flags & WRITE_BACK) || !(arg & REG_MASK)) 1344 && !(arg & OFFS_REG_MASK) && argw <= SIMM_16BIT_MAX && argw >= SIMM_16BIT_MIN) { 1345 /* Works for both absoulte and relative addresses. */ 1346 if (SLJIT_UNLIKELY(flags & ARG_TEST)) 1347 return 1; 1348 1349 FAIL_IF(ADDLI(ADDR_TMP_mapped, reg_map[arg & REG_MASK], argw)); 1350 1351 if (flags & LOAD_DATA) 1352 FAIL_IF(PB2(data_transfer_insts[flags & MEM_MASK], reg_ar, ADDR_TMP_mapped)); 1353 else 1354 FAIL_IF(PB2(data_transfer_insts[flags & MEM_MASK], ADDR_TMP_mapped, reg_ar)); 1355 1356 return -1; 1357 } 1358 1359 return 0; 1360 } 1361 1362 /* See getput_arg below. 1363 Note: can_cache is called only for binary operators. Those 1364 operators always uses word arguments without write back. */ 1365 static sljit_si can_cache(sljit_si arg, sljit_sw argw, sljit_si next_arg, sljit_sw next_argw) 1366 { 1367 SLJIT_ASSERT((arg & SLJIT_MEM) && (next_arg & SLJIT_MEM)); 1368 1369 /* Simple operation except for updates. */ 1370 if (arg & OFFS_REG_MASK) { 1371 argw &= 0x3; 1372 next_argw &= 0x3; 1373 if (argw && argw == next_argw 1374 && (arg == next_arg || (arg & OFFS_REG_MASK) == (next_arg & OFFS_REG_MASK))) 1375 return 1; 1376 return 0; 1377 } 1378 1379 if (arg == next_arg) { 1380 if (((next_argw - argw) <= SIMM_16BIT_MAX 1381 && (next_argw - argw) >= SIMM_16BIT_MIN)) 1382 return 1; 1383 1384 return 0; 1385 } 1386 1387 return 0; 1388 } 1389 1390 /* Emit the necessary instructions. See can_cache above. */ 1391 static sljit_si getput_arg(struct sljit_compiler *compiler, sljit_si flags, sljit_si reg_ar, sljit_si arg, sljit_sw argw, sljit_si next_arg, sljit_sw next_argw) 1392 { 1393 sljit_si tmp_ar, base; 1394 1395 SLJIT_ASSERT(arg & SLJIT_MEM); 1396 if (!(next_arg & SLJIT_MEM)) { 1397 next_arg = 0; 1398 next_argw = 0; 1399 } 1400 1401 if ((flags & MEM_MASK) <= GPR_REG && (flags & LOAD_DATA)) 1402 tmp_ar = reg_ar; 1403 else 1404 tmp_ar = TMP_REG1_mapped; 1405 1406 base = arg & REG_MASK; 1407 1408 if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) { 1409 argw &= 0x3; 1410 1411 if ((flags & WRITE_BACK) && reg_ar == reg_map[base]) { 1412 SLJIT_ASSERT(!(flags & LOAD_DATA) && reg_map[TMP_REG1] != reg_ar); 1413 FAIL_IF(ADD(TMP_REG1_mapped, reg_ar, ZERO)); 1414 reg_ar = TMP_REG1_mapped; 1415 } 1416 1417 /* Using the cache. */ 1418 if (argw == compiler->cache_argw) { 1419 if (!(flags & WRITE_BACK)) { 1420 if (arg == compiler->cache_arg) { 1421 if (flags & LOAD_DATA) 1422 return PB2(data_transfer_insts[flags & MEM_MASK], reg_ar, TMP_REG3_mapped); 1423 else 1424 return PB2(data_transfer_insts[flags & MEM_MASK], TMP_REG3_mapped, reg_ar); 1425 } 1426 1427 if ((SLJIT_MEM | (arg & OFFS_REG_MASK)) == compiler->cache_arg) { 1428 if (arg == next_arg && argw == (next_argw & 0x3)) { 1429 compiler->cache_arg = arg; 1430 compiler->cache_argw = argw; 1431 FAIL_IF(ADD(TMP_REG3_mapped, reg_map[base], TMP_REG3_mapped)); 1432 if (flags & LOAD_DATA) 1433 return PB2(data_transfer_insts[flags & MEM_MASK], reg_ar, TMP_REG3_mapped); 1434 else 1435 return PB2(data_transfer_insts[flags & MEM_MASK], TMP_REG3_mapped, reg_ar); 1436 } 1437 1438 FAIL_IF(ADD(tmp_ar, reg_map[base], TMP_REG3_mapped)); 1439 if (flags & LOAD_DATA) 1440 return PB2(data_transfer_insts[flags & MEM_MASK], reg_ar, tmp_ar); 1441 else 1442 return PB2(data_transfer_insts[flags & MEM_MASK], tmp_ar, reg_ar); 1443 } 1444 } else { 1445 if ((SLJIT_MEM | (arg & OFFS_REG_MASK)) == compiler->cache_arg) { 1446 FAIL_IF(ADD(reg_map[base], reg_map[base], TMP_REG3_mapped)); 1447 if (flags & LOAD_DATA) 1448 return PB2(data_transfer_insts[flags & MEM_MASK], reg_ar, reg_map[base]); 1449 else 1450 return PB2(data_transfer_insts[flags & MEM_MASK], reg_map[base], reg_ar); 1451 } 1452 } 1453 } 1454 1455 if (SLJIT_UNLIKELY(argw)) { 1456 compiler->cache_arg = SLJIT_MEM | (arg & OFFS_REG_MASK); 1457 compiler->cache_argw = argw; 1458 FAIL_IF(SHLI(TMP_REG3_mapped, reg_map[OFFS_REG(arg)], argw)); 1459 } 1460 1461 if (!(flags & WRITE_BACK)) { 1462 if (arg == next_arg && argw == (next_argw & 0x3)) { 1463 compiler->cache_arg = arg; 1464 compiler->cache_argw = argw; 1465 FAIL_IF(ADD(TMP_REG3_mapped, reg_map[base], reg_map[!argw ? OFFS_REG(arg) : TMP_REG3])); 1466 tmp_ar = TMP_REG3_mapped; 1467 } else 1468 FAIL_IF(ADD(tmp_ar, reg_map[base], reg_map[!argw ? OFFS_REG(arg) : TMP_REG3])); 1469 1470 if (flags & LOAD_DATA) 1471 return PB2(data_transfer_insts[flags & MEM_MASK], reg_ar, tmp_ar); 1472 else 1473 return PB2(data_transfer_insts[flags & MEM_MASK], tmp_ar, reg_ar); 1474 } 1475 1476 FAIL_IF(ADD(reg_map[base], reg_map[base], reg_map[!argw ? OFFS_REG(arg) : TMP_REG3])); 1477 1478 if (flags & LOAD_DATA) 1479 return PB2(data_transfer_insts[flags & MEM_MASK], reg_ar, reg_map[base]); 1480 else 1481 return PB2(data_transfer_insts[flags & MEM_MASK], reg_map[base], reg_ar); 1482 } 1483 1484 if (SLJIT_UNLIKELY(flags & WRITE_BACK) && base) { 1485 /* Update only applies if a base register exists. */ 1486 if (reg_ar == reg_map[base]) { 1487 SLJIT_ASSERT(!(flags & LOAD_DATA) && TMP_REG1_mapped != reg_ar); 1488 if (argw <= SIMM_16BIT_MAX && argw >= SIMM_16BIT_MIN) { 1489 FAIL_IF(ADDLI(ADDR_TMP_mapped, reg_map[base], argw)); 1490 if (flags & LOAD_DATA) 1491 FAIL_IF(PB2(data_transfer_insts[flags & MEM_MASK], reg_ar, ADDR_TMP_mapped)); 1492 else 1493 FAIL_IF(PB2(data_transfer_insts[flags & MEM_MASK], ADDR_TMP_mapped, reg_ar)); 1494 1495 if (argw) 1496 return ADDLI(reg_map[base], reg_map[base], argw); 1497 1498 return SLJIT_SUCCESS; 1499 } 1500 1501 FAIL_IF(ADD(TMP_REG1_mapped, reg_ar, ZERO)); 1502 reg_ar = TMP_REG1_mapped; 1503 } 1504 1505 if (argw <= SIMM_16BIT_MAX && argw >= SIMM_16BIT_MIN) { 1506 if (argw) 1507 FAIL_IF(ADDLI(reg_map[base], reg_map[base], argw)); 1508 } else { 1509 if (compiler->cache_arg == SLJIT_MEM 1510 && argw - compiler->cache_argw <= SIMM_16BIT_MAX 1511 && argw - compiler->cache_argw >= SIMM_16BIT_MIN) { 1512 if (argw != compiler->cache_argw) { 1513 FAIL_IF(ADD(TMP_REG3_mapped, TMP_REG3_mapped, argw - compiler->cache_argw)); 1514 compiler->cache_argw = argw; 1515 } 1516 1517 FAIL_IF(ADD(reg_map[base], reg_map[base], TMP_REG3_mapped)); 1518 } else { 1519 compiler->cache_arg = SLJIT_MEM; 1520 compiler->cache_argw = argw; 1521 FAIL_IF(load_immediate(compiler, TMP_REG3_mapped, argw)); 1522 FAIL_IF(ADD(reg_map[base], reg_map[base], TMP_REG3_mapped)); 1523 } 1524 } 1525 1526 if (flags & LOAD_DATA) 1527 return PB2(data_transfer_insts[flags & MEM_MASK], reg_ar, reg_map[base]); 1528 else 1529 return PB2(data_transfer_insts[flags & MEM_MASK], reg_map[base], reg_ar); 1530 } 1531 1532 if (compiler->cache_arg == arg 1533 && argw - compiler->cache_argw <= SIMM_16BIT_MAX 1534 && argw - compiler->cache_argw >= SIMM_16BIT_MIN) { 1535 if (argw != compiler->cache_argw) { 1536 FAIL_IF(ADDLI(TMP_REG3_mapped, TMP_REG3_mapped, argw - compiler->cache_argw)); 1537 compiler->cache_argw = argw; 1538 } 1539 1540 if (flags & LOAD_DATA) 1541 return PB2(data_transfer_insts[flags & MEM_MASK], reg_ar, TMP_REG3_mapped); 1542 else 1543 return PB2(data_transfer_insts[flags & MEM_MASK], TMP_REG3_mapped, reg_ar); 1544 } 1545 1546 if (compiler->cache_arg == SLJIT_MEM 1547 && argw - compiler->cache_argw <= SIMM_16BIT_MAX 1548 && argw - compiler->cache_argw >= SIMM_16BIT_MIN) { 1549 if (argw != compiler->cache_argw) 1550 FAIL_IF(ADDLI(TMP_REG3_mapped, TMP_REG3_mapped, argw - compiler->cache_argw)); 1551 } else { 1552 compiler->cache_arg = SLJIT_MEM; 1553 FAIL_IF(load_immediate(compiler, TMP_REG3_mapped, argw)); 1554 } 1555 1556 compiler->cache_argw = argw; 1557 1558 if (!base) { 1559 if (flags & LOAD_DATA) 1560 return PB2(data_transfer_insts[flags & MEM_MASK], reg_ar, TMP_REG3_mapped); 1561 else 1562 return PB2(data_transfer_insts[flags & MEM_MASK], TMP_REG3_mapped, reg_ar); 1563 } 1564 1565 if (arg == next_arg 1566 && next_argw - argw <= SIMM_16BIT_MAX 1567 && next_argw - argw >= SIMM_16BIT_MIN) { 1568 compiler->cache_arg = arg; 1569 FAIL_IF(ADD(TMP_REG3_mapped, TMP_REG3_mapped, reg_map[base])); 1570 if (flags & LOAD_DATA) 1571 return PB2(data_transfer_insts[flags & MEM_MASK], reg_ar, TMP_REG3_mapped); 1572 else 1573 return PB2(data_transfer_insts[flags & MEM_MASK], TMP_REG3_mapped, reg_ar); 1574 } 1575 1576 FAIL_IF(ADD(tmp_ar, TMP_REG3_mapped, reg_map[base])); 1577 1578 if (flags & LOAD_DATA) 1579 return PB2(data_transfer_insts[flags & MEM_MASK], reg_ar, tmp_ar); 1580 else 1581 return PB2(data_transfer_insts[flags & MEM_MASK], tmp_ar, reg_ar); 1582 } 1583 1584 static SLJIT_INLINE sljit_si emit_op_mem(struct sljit_compiler *compiler, sljit_si flags, sljit_si reg_ar, sljit_si arg, sljit_sw argw) 1585 { 1586 if (getput_arg_fast(compiler, flags, reg_ar, arg, argw)) 1587 return compiler->error; 1588 1589 compiler->cache_arg = 0; 1590 compiler->cache_argw = 0; 1591 return getput_arg(compiler, flags, reg_ar, arg, argw, 0, 0); 1592 } 1593 1594 static SLJIT_INLINE sljit_si emit_op_mem2(struct sljit_compiler *compiler, sljit_si flags, sljit_si reg, sljit_si arg1, sljit_sw arg1w, sljit_si arg2, sljit_sw arg2w) 1595 { 1596 if (getput_arg_fast(compiler, flags, reg, arg1, arg1w)) 1597 return compiler->error; 1598 return getput_arg(compiler, flags, reg, arg1, arg1w, arg2, arg2w); 1599 } 1600 1601 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw) 1602 { 1603 CHECK_ERROR(); 1604 check_sljit_emit_fast_enter(compiler, dst, dstw); 1605 ADJUST_LOCAL_OFFSET(dst, dstw); 1606 1607 /* For UNUSED dst. Uncommon, but possible. */ 1608 if (dst == SLJIT_UNUSED) 1609 return SLJIT_SUCCESS; 1610 1611 if (FAST_IS_REG(dst)) 1612 return ADD(reg_map[dst], RA, ZERO); 1613 1614 /* Memory. */ 1615 return emit_op_mem(compiler, WORD_DATA, RA, dst, dstw); 1616 } 1617 1618 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_return(struct sljit_compiler *compiler, sljit_si src, sljit_sw srcw) 1619 { 1620 CHECK_ERROR(); 1621 check_sljit_emit_fast_return(compiler, src, srcw); 1622 ADJUST_LOCAL_OFFSET(src, srcw); 1623 1624 if (FAST_IS_REG(src)) 1625 FAIL_IF(ADD(RA, reg_map[src], ZERO)); 1626 1627 else if (src & SLJIT_MEM) 1628 FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, RA, src, srcw)); 1629 1630 else if (src & SLJIT_IMM) 1631 FAIL_IF(load_immediate(compiler, RA, srcw)); 1632 1633 return JR(RA); 1634 } 1635 1636 static SLJIT_INLINE sljit_si emit_single_op(struct sljit_compiler *compiler, sljit_si op, sljit_si flags, sljit_si dst, sljit_si src1, sljit_sw src2) 1637 { 1638 sljit_si overflow_ra = 0; 1639 1640 switch (GET_OPCODE(op)) { 1641 case SLJIT_MOV: 1642 case SLJIT_MOV_P: 1643 SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); 1644 if (dst != src2) 1645 return ADD(reg_map[dst], reg_map[src2], ZERO); 1646 return SLJIT_SUCCESS; 1647 1648 case SLJIT_MOV_UI: 1649 case SLJIT_MOV_SI: 1650 SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); 1651 if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) { 1652 if (op == SLJIT_MOV_SI) 1653 return BFEXTS(reg_map[dst], reg_map[src2], 0, 31); 1654 1655 return BFEXTU(reg_map[dst], reg_map[src2], 0, 31); 1656 } else if (dst != src2) 1657 SLJIT_ASSERT_STOP(); 1658 1659 return SLJIT_SUCCESS; 1660 1661 case SLJIT_MOV_UB: 1662 case SLJIT_MOV_SB: 1663 SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); 1664 if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) { 1665 if (op == SLJIT_MOV_SB) 1666 return BFEXTS(reg_map[dst], reg_map[src2], 0, 7); 1667 1668 return BFEXTU(reg_map[dst], reg_map[src2], 0, 7); 1669 } else if (dst != src2) 1670 SLJIT_ASSERT_STOP(); 1671 1672 return SLJIT_SUCCESS; 1673 1674 case SLJIT_MOV_UH: 1675 case SLJIT_MOV_SH: 1676 SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); 1677 if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) { 1678 if (op == SLJIT_MOV_SH) 1679 return BFEXTS(reg_map[dst], reg_map[src2], 0, 15); 1680 1681 return BFEXTU(reg_map[dst], reg_map[src2], 0, 15); 1682 } else if (dst != src2) 1683 SLJIT_ASSERT_STOP(); 1684 1685 return SLJIT_SUCCESS; 1686 1687 case SLJIT_NOT: 1688 SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); 1689 if (op & SLJIT_SET_E) 1690 FAIL_IF(NOR(EQUAL_FLAG, reg_map[src2], reg_map[src2])); 1691 if (CHECK_FLAGS(SLJIT_SET_E)) 1692 FAIL_IF(NOR(reg_map[dst], reg_map[src2], reg_map[src2])); 1693 1694 return SLJIT_SUCCESS; 1695 1696 case SLJIT_CLZ: 1697 SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); 1698 if (op & SLJIT_SET_E) 1699 FAIL_IF(CLZ(EQUAL_FLAG, reg_map[src2])); 1700 if (CHECK_FLAGS(SLJIT_SET_E)) 1701 FAIL_IF(CLZ(reg_map[dst], reg_map[src2])); 1702 1703 return SLJIT_SUCCESS; 1704 1705 case SLJIT_ADD: 1706 if (flags & SRC2_IMM) { 1707 if (op & SLJIT_SET_O) { 1708 FAIL_IF(SHRUI(TMP_EREG1, reg_map[src1], 63)); 1709 if (src2 < 0) 1710 FAIL_IF(XORI(TMP_EREG1, TMP_EREG1, 1)); 1711 } 1712 1713 if (op & SLJIT_SET_E) 1714 FAIL_IF(ADDLI(EQUAL_FLAG, reg_map[src1], src2)); 1715 1716 if (op & SLJIT_SET_C) { 1717 if (src2 >= 0) 1718 FAIL_IF(ORI(ULESS_FLAG ,reg_map[src1], src2)); 1719 else { 1720 FAIL_IF(ADDLI(ULESS_FLAG ,ZERO, src2)); 1721 FAIL_IF(OR(ULESS_FLAG,reg_map[src1],ULESS_FLAG)); 1722 } 1723 } 1724 1725 /* dst may be the same as src1 or src2. */ 1726 if (CHECK_FLAGS(SLJIT_SET_E)) 1727 FAIL_IF(ADDLI(reg_map[dst], reg_map[src1], src2)); 1728 1729 if (op & SLJIT_SET_O) { 1730 FAIL_IF(SHRUI(OVERFLOW_FLAG, reg_map[dst], 63)); 1731 1732 if (src2 < 0) 1733 FAIL_IF(XORI(OVERFLOW_FLAG, OVERFLOW_FLAG, 1)); 1734 } 1735 } else { 1736 if (op & SLJIT_SET_O) { 1737 FAIL_IF(XOR(TMP_EREG1, reg_map[src1], reg_map[src2])); 1738 FAIL_IF(SHRUI(TMP_EREG1, TMP_EREG1, 63)); 1739 1740 if (src1 != dst) 1741 overflow_ra = reg_map[src1]; 1742 else if (src2 != dst) 1743 overflow_ra = reg_map[src2]; 1744 else { 1745 /* Rare ocasion. */ 1746 FAIL_IF(ADD(TMP_EREG2, reg_map[src1], ZERO)); 1747 overflow_ra = TMP_EREG2; 1748 } 1749 } 1750 1751 if (op & SLJIT_SET_E) 1752 FAIL_IF(ADD(EQUAL_FLAG ,reg_map[src1], reg_map[src2])); 1753 1754 if (op & SLJIT_SET_C) 1755 FAIL_IF(OR(ULESS_FLAG,reg_map[src1], reg_map[src2])); 1756 1757 /* dst may be the same as src1 or src2. */ 1758 if (CHECK_FLAGS(SLJIT_SET_E)) 1759 FAIL_IF(ADD(reg_map[dst],reg_map[src1], reg_map[src2])); 1760 1761 if (op & SLJIT_SET_O) { 1762 FAIL_IF(XOR(OVERFLOW_FLAG,reg_map[dst], overflow_ra)); 1763 FAIL_IF(SHRUI(OVERFLOW_FLAG, OVERFLOW_FLAG, 63)); 1764 } 1765 } 1766 1767 /* a + b >= a | b (otherwise, the carry should be set to 1). */ 1768 if (op & SLJIT_SET_C) 1769 FAIL_IF(CMPLTU(ULESS_FLAG ,reg_map[dst] ,ULESS_FLAG)); 1770 1771 if (op & SLJIT_SET_O) 1772 return CMOVNEZ(OVERFLOW_FLAG, TMP_EREG1, ZERO); 1773 1774 return SLJIT_SUCCESS; 1775 1776 case SLJIT_ADDC: 1777 if (flags & SRC2_IMM) { 1778 if (op & SLJIT_SET_C) { 1779 if (src2 >= 0) 1780 FAIL_IF(ORI(TMP_EREG1, reg_map[src1], src2)); 1781 else { 1782 FAIL_IF(ADDLI(TMP_EREG1, ZERO, src2)); 1783 FAIL_IF(OR(TMP_EREG1, reg_map[src1], TMP_EREG1)); 1784 } 1785 } 1786 1787 FAIL_IF(ADDLI(reg_map[dst], reg_map[src1], src2)); 1788 1789 } else { 1790 if (op & SLJIT_SET_C) 1791 FAIL_IF(OR(TMP_EREG1, reg_map[src1], reg_map[src2])); 1792 1793 /* dst may be the same as src1 or src2. */ 1794 FAIL_IF(ADD(reg_map[dst], reg_map[src1], reg_map[src2])); 1795 } 1796 1797 if (op & SLJIT_SET_C) 1798 FAIL_IF(CMPLTU(TMP_EREG1, reg_map[dst], TMP_EREG1)); 1799 1800 FAIL_IF(ADD(reg_map[dst], reg_map[dst], ULESS_FLAG)); 1801 1802 if (!(op & SLJIT_SET_C)) 1803 return SLJIT_SUCCESS; 1804 1805 /* Set TMP_EREG2 (dst == 0) && (ULESS_FLAG == 1). */ 1806 FAIL_IF(CMPLTUI(TMP_EREG2, reg_map[dst], 1)); 1807 FAIL_IF(AND(TMP_EREG2, TMP_EREG2, ULESS_FLAG)); 1808 /* Set carry flag. */ 1809 return OR(ULESS_FLAG, TMP_EREG2, TMP_EREG1); 1810 1811 case SLJIT_SUB: 1812 if ((flags & SRC2_IMM) && ((op & (SLJIT_SET_U | SLJIT_SET_S)) || src2 == SIMM_16BIT_MIN)) { 1813 FAIL_IF(ADDLI(TMP_REG2_mapped, ZERO, src2)); 1814 src2 = TMP_REG2; 1815 flags &= ~SRC2_IMM; 1816 } 1817 1818 if (flags & SRC2_IMM) { 1819 if (op & SLJIT_SET_O) { 1820 FAIL_IF(SHRUI(TMP_EREG1,reg_map[src1], 63)); 1821 1822 if (src2 < 0) 1823 FAIL_IF(XORI(TMP_EREG1, TMP_EREG1, 1)); 1824 1825 if (src1 != dst) 1826 overflow_ra = reg_map[src1]; 1827 else { 1828 /* Rare ocasion. */ 1829 FAIL_IF(ADD(TMP_EREG2, reg_map[src1], ZERO)); 1830 1831 overflow_ra = TMP_EREG2; 1832 } 1833 } 1834 1835 if (op & SLJIT_SET_E) 1836 FAIL_IF(ADDLI(EQUAL_FLAG, reg_map[src1], -src2)); 1837 1838 if (op & SLJIT_SET_C) { 1839 FAIL_IF(load_immediate(compiler, ADDR_TMP_mapped, src2)); 1840 FAIL_IF(CMPLTU(ULESS_FLAG, reg_map[src1], ADDR_TMP_mapped)); 1841 } 1842 1843 /* dst may be the same as src1 or src2. */ 1844 if (CHECK_FLAGS(SLJIT_SET_E)) 1845 FAIL_IF(ADDLI(reg_map[dst], reg_map[src1], -src2)); 1846 1847 } else { 1848 1849 if (op & SLJIT_SET_O) { 1850 FAIL_IF(XOR(TMP_EREG1, reg_map[src1], reg_map[src2])); 1851 FAIL_IF(SHRUI(TMP_EREG1, TMP_EREG1, 63)); 1852 1853 if (src1 != dst) 1854 overflow_ra = reg_map[src1]; 1855 else { 1856 /* Rare ocasion. */ 1857 FAIL_IF(ADD(TMP_EREG2, reg_map[src1], ZERO)); 1858 overflow_ra = TMP_EREG2; 1859 } 1860 } 1861 1862 if (op & SLJIT_SET_E) 1863 FAIL_IF(SUB(EQUAL_FLAG, reg_map[src1], reg_map[src2])); 1864 1865 if (op & (SLJIT_SET_U | SLJIT_SET_C)) 1866 FAIL_IF(CMPLTU(ULESS_FLAG, reg_map[src1], reg_map[src2])); 1867 1868 if (op & SLJIT_SET_U) 1869 FAIL_IF(CMPLTU(UGREATER_FLAG, reg_map[src2], reg_map[src1])); 1870 1871 if (op & SLJIT_SET_S) { 1872 FAIL_IF(CMPLTS(LESS_FLAG ,reg_map[src1] ,reg_map[src2])); 1873 FAIL_IF(CMPLTS(GREATER_FLAG ,reg_map[src2] ,reg_map[src1])); 1874 } 1875 1876 /* dst may be the same as src1 or src2. */ 1877 if (CHECK_FLAGS(SLJIT_SET_E | SLJIT_SET_U | SLJIT_SET_S | SLJIT_SET_C)) 1878 FAIL_IF(SUB(reg_map[dst], reg_map[src1], reg_map[src2])); 1879 } 1880 1881 if (op & SLJIT_SET_O) { 1882 FAIL_IF(XOR(OVERFLOW_FLAG, reg_map[dst], overflow_ra)); 1883 FAIL_IF(SHRUI(OVERFLOW_FLAG, OVERFLOW_FLAG, 63)); 1884 return CMOVEQZ(OVERFLOW_FLAG, TMP_EREG1, ZERO); 1885 } 1886 1887 return SLJIT_SUCCESS; 1888 1889 case SLJIT_SUBC: 1890 if ((flags & SRC2_IMM) && src2 == SIMM_16BIT_MIN) { 1891 FAIL_IF(ADDLI(TMP_REG2_mapped, ZERO, src2)); 1892 src2 = TMP_REG2; 1893 flags &= ~SRC2_IMM; 1894 } 1895 1896 if (flags & SRC2_IMM) { 1897 if (op & SLJIT_SET_C) { 1898 FAIL_IF(load_immediate(compiler, ADDR_TMP_mapped, -src2)); 1899 FAIL_IF(CMPLTU(TMP_EREG1, reg_map[src1], ADDR_TMP_mapped)); 1900 } 1901 1902 /* dst may be the same as src1 or src2. */ 1903 FAIL_IF(ADDLI(reg_map[dst], reg_map[src1], -src2)); 1904 1905 } else { 1906 if (op & SLJIT_SET_C) 1907 FAIL_IF(CMPLTU(TMP_EREG1, reg_map[src1], reg_map[src2])); 1908 /* dst may be the same as src1 or src2. */ 1909 FAIL_IF(SUB(reg_map[dst], reg_map[src1], reg_map[src2])); 1910 } 1911 1912 if (op & SLJIT_SET_C) 1913 FAIL_IF(CMOVEQZ(TMP_EREG1, reg_map[dst], ULESS_FLAG)); 1914 1915 FAIL_IF(SUB(reg_map[dst], reg_map[dst], ULESS_FLAG)); 1916 1917 if (op & SLJIT_SET_C) 1918 FAIL_IF(ADD(ULESS_FLAG, TMP_EREG1, ZERO)); 1919 1920 return SLJIT_SUCCESS; 1921 1922 #define EMIT_LOGICAL(op_imm, op_norm) \ 1923 if (flags & SRC2_IMM) { \ 1924 FAIL_IF(load_immediate(compiler, ADDR_TMP_mapped, src2)); \ 1925 if (op & SLJIT_SET_E) \ 1926 FAIL_IF(push_3_buffer( \ 1927 compiler, op_norm, EQUAL_FLAG, reg_map[src1], \ 1928 ADDR_TMP_mapped, __LINE__)); \ 1929 if (CHECK_FLAGS(SLJIT_SET_E)) \ 1930 FAIL_IF(push_3_buffer( \ 1931 compiler, op_norm, reg_map[dst], reg_map[src1], \ 1932 ADDR_TMP_mapped, __LINE__)); \ 1933 } else { \ 1934 if (op & SLJIT_SET_E) \ 1935 FAIL_IF(push_3_buffer( \ 1936 compiler, op_norm, EQUAL_FLAG, reg_map[src1], \ 1937 reg_map[src2], __LINE__)); \ 1938 if (CHECK_FLAGS(SLJIT_SET_E)) \ 1939 FAIL_IF(push_3_buffer( \ 1940 compiler, op_norm, reg_map[dst], reg_map[src1], \ 1941 reg_map[src2], __LINE__)); \ 1942 } 1943 1944 case SLJIT_AND: 1945 EMIT_LOGICAL(TILEGX_OPC_ANDI, TILEGX_OPC_AND); 1946 return SLJIT_SUCCESS; 1947 1948 case SLJIT_OR: 1949 EMIT_LOGICAL(TILEGX_OPC_ORI, TILEGX_OPC_OR); 1950 return SLJIT_SUCCESS; 1951 1952 case SLJIT_XOR: 1953 EMIT_LOGICAL(TILEGX_OPC_XORI, TILEGX_OPC_XOR); 1954 return SLJIT_SUCCESS; 1955 1956 #define EMIT_SHIFT(op_imm, op_norm) \ 1957 if (flags & SRC2_IMM) { \ 1958 if (op & SLJIT_SET_E) \ 1959 FAIL_IF(push_3_buffer( \ 1960 compiler, op_imm, EQUAL_FLAG, reg_map[src1], \ 1961 src2 & 0x3F, __LINE__)); \ 1962 if (CHECK_FLAGS(SLJIT_SET_E)) \ 1963 FAIL_IF(push_3_buffer( \ 1964 compiler, op_imm, reg_map[dst], reg_map[src1], \ 1965 src2 & 0x3F, __LINE__)); \ 1966 } else { \ 1967 if (op & SLJIT_SET_E) \ 1968 FAIL_IF(push_3_buffer( \ 1969 compiler, op_imm, reg_map[dst], reg_map[src1], \ 1970 src2 & 0x3F, __LINE__)); \ 1971 if (CHECK_FLAGS(SLJIT_SET_E)) \ 1972 FAIL_IF(push_3_buffer( \ 1973 compiler, op_norm, reg_map[dst], reg_map[src1], \ 1974 reg_map[src2], __LINE__)); \ 1975 } 1976 1977 case SLJIT_SHL: 1978 EMIT_SHIFT(TILEGX_OPC_SHLI, TILEGX_OPC_SHL); 1979 return SLJIT_SUCCESS; 1980 1981 case SLJIT_LSHR: 1982 EMIT_SHIFT(TILEGX_OPC_SHRUI, TILEGX_OPC_SHRU); 1983 return SLJIT_SUCCESS; 1984 1985 case SLJIT_ASHR: 1986 EMIT_SHIFT(TILEGX_OPC_SHRSI, TILEGX_OPC_SHRS); 1987 return SLJIT_SUCCESS; 1988 } 1989 1990 SLJIT_ASSERT_STOP(); 1991 return SLJIT_SUCCESS; 1992 } 1993 1994 static sljit_si emit_op(struct sljit_compiler *compiler, sljit_si op, sljit_si flags, sljit_si dst, sljit_sw dstw, sljit_si src1, sljit_sw src1w, sljit_si src2, sljit_sw src2w) 1995 { 1996 /* arg1 goes to TMP_REG1 or src reg. 1997 arg2 goes to TMP_REG2, imm or src reg. 1998 TMP_REG3 can be used for caching. 1999 result goes to TMP_REG2, so put result can use TMP_REG1 and TMP_REG3. */ 2000 sljit_si dst_r = TMP_REG2; 2001 sljit_si src1_r; 2002 sljit_sw src2_r = 0; 2003 sljit_si sugg_src2_r = TMP_REG2; 2004 2005 if (!(flags & ALT_KEEP_CACHE)) { 2006 compiler->cache_arg = 0; 2007 compiler->cache_argw = 0; 2008 } 2009 2010 if (SLJIT_UNLIKELY(dst == SLJIT_UNUSED)) { 2011 if (op >= SLJIT_MOV && op <= SLJIT_MOVU_SI && !(src2 & SLJIT_MEM)) 2012 return SLJIT_SUCCESS; 2013 if (GET_FLAGS(op)) 2014 flags |= UNUSED_DEST; 2015 } else if (FAST_IS_REG(dst)) { 2016 dst_r = dst; 2017 flags |= REG_DEST; 2018 if (op >= SLJIT_MOV && op <= SLJIT_MOVU_SI) 2019 sugg_src2_r = dst_r; 2020 } else if ((dst & SLJIT_MEM) && !getput_arg_fast(compiler, flags | ARG_TEST, TMP_REG1_mapped, dst, dstw)) 2021 flags |= SLOW_DEST; 2022 2023 if (flags & IMM_OP) { 2024 if ((src2 & SLJIT_IMM) && src2w) { 2025 if ((!(flags & LOGICAL_OP) 2026 && (src2w <= SIMM_16BIT_MAX && src2w >= SIMM_16BIT_MIN)) 2027 || ((flags & LOGICAL_OP) && !(src2w & ~UIMM_16BIT_MAX))) { 2028 flags |= SRC2_IMM; 2029 src2_r = src2w; 2030 } 2031 } 2032 2033 if (!(flags & SRC2_IMM) && (flags & CUMULATIVE_OP) && (src1 & SLJIT_IMM) && src1w) { 2034 if ((!(flags & LOGICAL_OP) 2035 && (src1w <= SIMM_16BIT_MAX && src1w >= SIMM_16BIT_MIN)) 2036 || ((flags & LOGICAL_OP) && !(src1w & ~UIMM_16BIT_MAX))) { 2037 flags |= SRC2_IMM; 2038 src2_r = src1w; 2039 2040 /* And swap arguments. */ 2041 src1 = src2; 2042 src1w = src2w; 2043 src2 = SLJIT_IMM; 2044 /* src2w = src2_r unneeded. */ 2045 } 2046 } 2047 } 2048 2049 /* Source 1. */ 2050 if (FAST_IS_REG(src1)) { 2051 src1_r = src1; 2052 flags |= REG1_SOURCE; 2053 } else if (src1 & SLJIT_IMM) { 2054 if (src1w) { 2055 FAIL_IF(load_immediate(compiler, TMP_REG1_mapped, src1w)); 2056 src1_r = TMP_REG1; 2057 } else 2058 src1_r = 0; 2059 } else { 2060 if (getput_arg_fast(compiler, flags | LOAD_DATA, TMP_REG1_mapped, src1, src1w)) 2061 FAIL_IF(compiler->error); 2062 else 2063 flags |= SLOW_SRC1; 2064 src1_r = TMP_REG1; 2065 } 2066 2067 /* Source 2. */ 2068 if (FAST_IS_REG(src2)) { 2069 src2_r = src2; 2070 flags |= REG2_SOURCE; 2071 if (!(flags & REG_DEST) && op >= SLJIT_MOV && op <= SLJIT_MOVU_SI) 2072 dst_r = src2_r; 2073 } else if (src2 & SLJIT_IMM) { 2074 if (!(flags & SRC2_IMM)) { 2075 if (src2w) { 2076 FAIL_IF(load_immediate(compiler, reg_map[sugg_src2_r], src2w)); 2077 src2_r = sugg_src2_r; 2078 } else { 2079 src2_r = 0; 2080 if ((op >= SLJIT_MOV && op <= SLJIT_MOVU_SI) && (dst & SLJIT_MEM)) 2081 dst_r = 0; 2082 } 2083 } 2084 } else { 2085 if (getput_arg_fast(compiler, flags | LOAD_DATA, reg_map[sugg_src2_r], src2, src2w)) 2086 FAIL_IF(compiler->error); 2087 else 2088 flags |= SLOW_SRC2; 2089 src2_r = sugg_src2_r; 2090 } 2091 2092 if ((flags & (SLOW_SRC1 | SLOW_SRC2)) == (SLOW_SRC1 | SLOW_SRC2)) { 2093 SLJIT_ASSERT(src2_r == TMP_REG2); 2094 if (!can_cache(src1, src1w, src2, src2w) && can_cache(src1, src1w, dst, dstw)) { 2095 FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, TMP_REG2_mapped, src2, src2w, src1, src1w)); 2096 FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, TMP_REG1_mapped, src1, src1w, dst, dstw)); 2097 } else { 2098 FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, TMP_REG1_mapped, src1, src1w, src2, src2w)); 2099 FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, TMP_REG2_mapped, src2, src2w, dst, dstw)); 2100 } 2101 } else if (flags & SLOW_SRC1) 2102 FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, TMP_REG1_mapped, src1, src1w, dst, dstw)); 2103 else if (flags & SLOW_SRC2) 2104 FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, reg_map[sugg_src2_r], src2, src2w, dst, dstw)); 2105 2106 FAIL_IF(emit_single_op(compiler, op, flags, dst_r, src1_r, src2_r)); 2107 2108 if (dst & SLJIT_MEM) { 2109 if (!(flags & SLOW_DEST)) { 2110 getput_arg_fast(compiler, flags, reg_map[dst_r], dst, dstw); 2111 return compiler->error; 2112 } 2113 2114 return getput_arg(compiler, flags, reg_map[dst_r], dst, dstw, 0, 0); 2115 } 2116 2117 return SLJIT_SUCCESS; 2118 } 2119 2120 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_si op, sljit_si dst, sljit_sw dstw, sljit_si src, sljit_sw srcw, sljit_si type) 2121 { 2122 sljit_si sugg_dst_ar, dst_ar; 2123 sljit_si flags = GET_ALL_FLAGS(op); 2124 2125 CHECK_ERROR(); 2126 check_sljit_emit_op_flags(compiler, op, dst, dstw, src, srcw, type); 2127 ADJUST_LOCAL_OFFSET(dst, dstw); 2128 2129 if (dst == SLJIT_UNUSED) 2130 return SLJIT_SUCCESS; 2131 2132 op = GET_OPCODE(op); 2133 sugg_dst_ar = reg_map[(op < SLJIT_ADD && FAST_IS_REG(dst)) ? dst : TMP_REG2]; 2134 2135 compiler->cache_arg = 0; 2136 compiler->cache_argw = 0; 2137 if (op >= SLJIT_ADD && (src & SLJIT_MEM)) { 2138 ADJUST_LOCAL_OFFSET(src, srcw); 2139 FAIL_IF(emit_op_mem2(compiler, WORD_DATA | LOAD_DATA, TMP_REG1_mapped, src, srcw, dst, dstw)); 2140 src = TMP_REG1; 2141 srcw = 0; 2142 } 2143 2144 switch (type) { 2145 case SLJIT_C_EQUAL: 2146 case SLJIT_C_NOT_EQUAL: 2147 FAIL_IF(CMPLTUI(sugg_dst_ar, EQUAL_FLAG, 1)); 2148 dst_ar = sugg_dst_ar; 2149 break; 2150 case SLJIT_C_LESS: 2151 case SLJIT_C_GREATER_EQUAL: 2152 case SLJIT_C_FLOAT_LESS: 2153 case SLJIT_C_FLOAT_GREATER_EQUAL: 2154 dst_ar = ULESS_FLAG; 2155 break; 2156 case SLJIT_C_GREATER: 2157 case SLJIT_C_LESS_EQUAL: 2158 case SLJIT_C_FLOAT_GREATER: 2159 case SLJIT_C_FLOAT_LESS_EQUAL: 2160 dst_ar = UGREATER_FLAG; 2161 break; 2162 case SLJIT_C_SIG_LESS: 2163 case SLJIT_C_SIG_GREATER_EQUAL: 2164 dst_ar = LESS_FLAG; 2165 break; 2166 case SLJIT_C_SIG_GREATER: 2167 case SLJIT_C_SIG_LESS_EQUAL: 2168 dst_ar = GREATER_FLAG; 2169 break; 2170 case SLJIT_C_OVERFLOW: 2171 case SLJIT_C_NOT_OVERFLOW: 2172 dst_ar = OVERFLOW_FLAG; 2173 break; 2174 case SLJIT_C_MUL_OVERFLOW: 2175 case SLJIT_C_MUL_NOT_OVERFLOW: 2176 FAIL_IF(CMPLTUI(sugg_dst_ar, OVERFLOW_FLAG, 1)); 2177 dst_ar = sugg_dst_ar; 2178 type ^= 0x1; /* Flip type bit for the XORI below. */ 2179 break; 2180 case SLJIT_C_FLOAT_EQUAL: 2181 case SLJIT_C_FLOAT_NOT_EQUAL: 2182 dst_ar = EQUAL_FLAG; 2183 break; 2184 2185 default: 2186 SLJIT_ASSERT_STOP(); 2187 dst_ar = sugg_dst_ar; 2188 break; 2189 } 2190 2191 if (type & 0x1) { 2192 FAIL_IF(XORI(sugg_dst_ar, dst_ar, 1)); 2193 dst_ar = sugg_dst_ar; 2194 } 2195 2196 if (op >= SLJIT_ADD) { 2197 if (TMP_REG2_mapped != dst_ar) 2198 FAIL_IF(ADD(TMP_REG2_mapped, dst_ar, ZERO)); 2199 return emit_op(compiler, op | flags, CUMULATIVE_OP | LOGICAL_OP | IMM_OP | ALT_KEEP_CACHE, dst, dstw, src, srcw, TMP_REG2, 0); 2200 } 2201 2202 if (dst & SLJIT_MEM) 2203 return emit_op_mem(compiler, WORD_DATA, dst_ar, dst, dstw); 2204 2205 if (sugg_dst_ar != dst_ar) 2206 return ADD(sugg_dst_ar, dst_ar, ZERO); 2207 2208 return SLJIT_SUCCESS; 2209 } 2210 2211 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op0(struct sljit_compiler *compiler, sljit_si op) { 2212 CHECK_ERROR(); 2213 check_sljit_emit_op0(compiler, op); 2214 2215 op = GET_OPCODE(op); 2216 switch (op) { 2217 case SLJIT_NOP: 2218 return push_0_buffer(compiler, TILEGX_OPC_FNOP, __LINE__); 2219 2220 case SLJIT_BREAKPOINT: 2221 return PI(BPT); 2222 2223 case SLJIT_UMUL: 2224 case SLJIT_SMUL: 2225 case SLJIT_UDIV: 2226 case SLJIT_SDIV: 2227 SLJIT_ASSERT_STOP(); 2228 } 2229 2230 return SLJIT_SUCCESS; 2231 } 2232 2233 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op1(struct sljit_compiler *compiler, sljit_si op, sljit_si dst, sljit_sw dstw, sljit_si src, sljit_sw srcw) 2234 { 2235 CHECK_ERROR(); 2236 check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw); 2237 ADJUST_LOCAL_OFFSET(dst, dstw); 2238 ADJUST_LOCAL_OFFSET(src, srcw); 2239 2240 switch (GET_OPCODE(op)) { 2241 case SLJIT_MOV: 2242 case SLJIT_MOV_P: 2243 return emit_op(compiler, SLJIT_MOV, WORD_DATA, dst, dstw, TMP_REG1, 0, src, srcw); 2244 2245 case SLJIT_MOV_UI: 2246 return emit_op(compiler, SLJIT_MOV_UI, INT_DATA, dst, dstw, TMP_REG1, 0, src, srcw); 2247 2248 case SLJIT_MOV_SI: 2249 return emit_op(compiler, SLJIT_MOV_SI, INT_DATA | SIGNED_DATA, dst, dstw, TMP_REG1, 0, src, srcw); 2250 2251 case SLJIT_MOV_UB: 2252 return emit_op(compiler, SLJIT_MOV_UB, BYTE_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_ub) srcw : srcw); 2253 2254 case SLJIT_MOV_SB: 2255 return emit_op(compiler, SLJIT_MOV_SB, BYTE_DATA | SIGNED_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_sb) srcw : srcw); 2256 2257 case SLJIT_MOV_UH: 2258 return emit_op(compiler, SLJIT_MOV_UH, HALF_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_uh) srcw : srcw); 2259 2260 case SLJIT_MOV_SH: 2261 return emit_op(compiler, SLJIT_MOV_SH, HALF_DATA | SIGNED_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_sh) srcw : srcw); 2262 2263 case SLJIT_MOVU: 2264 case SLJIT_MOVU_P: 2265 return emit_op(compiler, SLJIT_MOV, WORD_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, srcw); 2266 2267 case SLJIT_MOVU_UI: 2268 return emit_op(compiler, SLJIT_MOV_UI, INT_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, srcw); 2269 2270 case SLJIT_MOVU_SI: 2271 return emit_op(compiler, SLJIT_MOV_SI, INT_DATA | SIGNED_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, srcw); 2272 2273 case SLJIT_MOVU_UB: 2274 return emit_op(compiler, SLJIT_MOV_UB, BYTE_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_ub) srcw : srcw); 2275 2276 case SLJIT_MOVU_SB: 2277 return emit_op(compiler, SLJIT_MOV_SB, BYTE_DATA | SIGNED_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_sb) srcw : srcw); 2278 2279 case SLJIT_MOVU_UH: 2280 return emit_op(compiler, SLJIT_MOV_UH, HALF_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_uh) srcw : srcw); 2281 2282 case SLJIT_MOVU_SH: 2283 return emit_op(compiler, SLJIT_MOV_SH, HALF_DATA | SIGNED_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_sh) srcw : srcw); 2284 2285 case SLJIT_NOT: 2286 return emit_op(compiler, op, 0, dst, dstw, TMP_REG1, 0, src, srcw); 2287 2288 case SLJIT_NEG: 2289 return emit_op(compiler, SLJIT_SUB | GET_ALL_FLAGS(op), IMM_OP, dst, dstw, SLJIT_IMM, 0, src, srcw); 2290 2291 case SLJIT_CLZ: 2292 return emit_op(compiler, op, 0, dst, dstw, TMP_REG1, 0, src, srcw); 2293 } 2294 2295 return SLJIT_SUCCESS; 2296 } 2297 2298 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op2(struct sljit_compiler *compiler, sljit_si op, sljit_si dst, sljit_sw dstw, sljit_si src1, sljit_sw src1w, sljit_si src2, sljit_sw src2w) 2299 { 2300 CHECK_ERROR(); 2301 check_sljit_emit_op2(compiler, op, dst, dstw, src1, src1w, src2, src2w); 2302 ADJUST_LOCAL_OFFSET(dst, dstw); 2303 ADJUST_LOCAL_OFFSET(src1, src1w); 2304 ADJUST_LOCAL_OFFSET(src2, src2w); 2305 2306 switch (GET_OPCODE(op)) { 2307 case SLJIT_ADD: 2308 case SLJIT_ADDC: 2309 return emit_op(compiler, op, CUMULATIVE_OP | IMM_OP, dst, dstw, src1, src1w, src2, src2w); 2310 2311 case SLJIT_SUB: 2312 case SLJIT_SUBC: 2313 return emit_op(compiler, op, IMM_OP, dst, dstw, src1, src1w, src2, src2w); 2314 2315 case SLJIT_MUL: 2316 return emit_op(compiler, op, CUMULATIVE_OP, dst, dstw, src1, src1w, src2, src2w); 2317 2318 case SLJIT_AND: 2319 case SLJIT_OR: 2320 case SLJIT_XOR: 2321 return emit_op(compiler, op, CUMULATIVE_OP | LOGICAL_OP | IMM_OP, dst, dstw, src1, src1w, src2, src2w); 2322 2323 case SLJIT_SHL: 2324 case SLJIT_LSHR: 2325 case SLJIT_ASHR: 2326 if (src2 & SLJIT_IMM) 2327 src2w &= 0x3f; 2328 if (op & SLJIT_INT_OP) 2329 src2w &= 0x1f; 2330 2331 return emit_op(compiler, op, IMM_OP, dst, dstw, src1, src1w, src2, src2w); 2332 } 2333 2334 return SLJIT_SUCCESS; 2335 } 2336 2337 SLJIT_API_FUNC_ATTRIBUTE struct sljit_label * sljit_emit_label(struct sljit_compiler *compiler) 2338 { 2339 struct sljit_label *label; 2340 2341 flush_buffer(compiler); 2342 2343 CHECK_ERROR_PTR(); 2344 check_sljit_emit_label(compiler); 2345 2346 if (compiler->last_label && compiler->last_label->size == compiler->size) 2347 return compiler->last_label; 2348 2349 label = (struct sljit_label *)ensure_abuf(compiler, sizeof(struct sljit_label)); 2350 PTR_FAIL_IF(!label); 2351 set_label(label, compiler); 2352 return label; 2353 } 2354 2355 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_ijump(struct sljit_compiler *compiler, sljit_si type, sljit_si src, sljit_sw srcw) 2356 { 2357 sljit_si src_r = TMP_REG2; 2358 struct sljit_jump *jump = NULL; 2359 2360 flush_buffer(compiler); 2361 2362 CHECK_ERROR(); 2363 check_sljit_emit_ijump(compiler, type, src, srcw); 2364 ADJUST_LOCAL_OFFSET(src, srcw); 2365 2366 if (FAST_IS_REG(src)) { 2367 if (reg_map[src] != 0) 2368 src_r = src; 2369 else 2370 FAIL_IF(ADD_SOLO(TMP_REG2_mapped, reg_map[src], ZERO)); 2371 } 2372 2373 if (type >= SLJIT_CALL0) { 2374 SLJIT_ASSERT(reg_map[PIC_ADDR_REG] == 16 && PIC_ADDR_REG == TMP_REG2); 2375 if (src & (SLJIT_IMM | SLJIT_MEM)) { 2376 if (src & SLJIT_IMM) 2377 FAIL_IF(emit_const(compiler, reg_map[PIC_ADDR_REG], srcw, 1)); 2378 else { 2379 SLJIT_ASSERT(src_r == TMP_REG2 && (src & SLJIT_MEM)); 2380 FAIL_IF(emit_op(compiler, SLJIT_MOV, WORD_DATA, TMP_REG2, 0, TMP_REG1, 0, src, srcw)); 2381 } 2382 2383 FAIL_IF(ADD_SOLO(0, reg_map[SLJIT_R0], ZERO)); 2384 2385 FAIL_IF(ADDI_SOLO(54, 54, -16)); 2386 2387 FAIL_IF(JALR_SOLO(reg_map[PIC_ADDR_REG])); 2388 2389 return ADDI_SOLO(54, 54, 16); 2390 } 2391 2392 /* Register input. */ 2393 if (type >= SLJIT_CALL1) 2394 FAIL_IF(ADD_SOLO(0, reg_map[SLJIT_R0], ZERO)); 2395 2396 FAIL_IF(ADD_SOLO(reg_map[PIC_ADDR_REG], reg_map[src_r], ZERO)); 2397 2398 FAIL_IF(ADDI_SOLO(54, 54, -16)); 2399 2400 FAIL_IF(JALR_SOLO(reg_map[src_r])); 2401 2402 return ADDI_SOLO(54, 54, 16); 2403 } 2404 2405 if (src & SLJIT_IMM) { 2406 jump = (struct sljit_jump *)ensure_abuf(compiler, sizeof(struct sljit_jump)); 2407 FAIL_IF(!jump); 2408 set_jump(jump, compiler, JUMP_ADDR | ((type >= SLJIT_FAST_CALL) ? IS_JAL : 0)); 2409 jump->u.target = srcw; 2410 FAIL_IF(emit_const(compiler, TMP_REG2_mapped, 0, 1)); 2411 2412 if (type >= SLJIT_FAST_CALL) { 2413 FAIL_IF(ADD_SOLO(ZERO, ZERO, ZERO)); 2414 jump->addr = compiler->size; 2415 FAIL_IF(JR_SOLO(reg_map[src_r])); 2416 } else { 2417 jump->addr = compiler->size; 2418 FAIL_IF(JR_SOLO(reg_map[src_r])); 2419 } 2420 2421 return SLJIT_SUCCESS; 2422 2423 } else if (src & SLJIT_MEM) 2424 FAIL_IF(emit_op(compiler, SLJIT_MOV, WORD_DATA, TMP_REG2, 0, TMP_REG1, 0, src, srcw)); 2425 2426 FAIL_IF(JR_SOLO(reg_map[src_r])); 2427 2428 if (jump) 2429 jump->addr = compiler->size; 2430 2431 return SLJIT_SUCCESS; 2432 } 2433 2434 #define BR_Z(src) \ 2435 inst = BEQZ_X1 | SRCA_X1(src); \ 2436 flags = IS_COND; 2437 2438 #define BR_NZ(src) \ 2439 inst = BNEZ_X1 | SRCA_X1(src); \ 2440 flags = IS_COND; 2441 2442 SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump * sljit_emit_jump(struct sljit_compiler *compiler, sljit_si type) 2443 { 2444 struct sljit_jump *jump; 2445 sljit_ins inst; 2446 sljit_si flags = 0; 2447 2448 flush_buffer(compiler); 2449 2450 CHECK_ERROR_PTR(); 2451 check_sljit_emit_jump(compiler, type); 2452 2453 jump = (struct sljit_jump *)ensure_abuf(compiler, sizeof(struct sljit_jump)); 2454 PTR_FAIL_IF(!jump); 2455 set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP); 2456 type &= 0xff; 2457 2458 switch (type) { 2459 case SLJIT_C_EQUAL: 2460 case SLJIT_C_FLOAT_NOT_EQUAL: 2461 BR_NZ(EQUAL_FLAG); 2462 break; 2463 case SLJIT_C_NOT_EQUAL: 2464 case SLJIT_C_FLOAT_EQUAL: 2465 BR_Z(EQUAL_FLAG); 2466 break; 2467 case SLJIT_C_LESS: 2468 case SLJIT_C_FLOAT_LESS: 2469 BR_Z(ULESS_FLAG); 2470 break; 2471 case SLJIT_C_GREATER_EQUAL: 2472 case SLJIT_C_FLOAT_GREATER_EQUAL: 2473 BR_NZ(ULESS_FLAG); 2474 break; 2475 case SLJIT_C_GREATER: 2476 case SLJIT_C_FLOAT_GREATER: 2477 BR_Z(UGREATER_FLAG); 2478 break; 2479 case SLJIT_C_LESS_EQUAL: 2480 case SLJIT_C_FLOAT_LESS_EQUAL: 2481 BR_NZ(UGREATER_FLAG); 2482 break; 2483 case SLJIT_C_SIG_LESS: 2484 BR_Z(LESS_FLAG); 2485 break; 2486 case SLJIT_C_SIG_GREATER_EQUAL: 2487 BR_NZ(LESS_FLAG); 2488 break; 2489 case SLJIT_C_SIG_GREATER: 2490 BR_Z(GREATER_FLAG); 2491 break; 2492 case SLJIT_C_SIG_LESS_EQUAL: 2493 BR_NZ(GREATER_FLAG); 2494 break; 2495 case SLJIT_C_OVERFLOW: 2496 case SLJIT_C_MUL_OVERFLOW: 2497 BR_Z(OVERFLOW_FLAG); 2498 break; 2499 case SLJIT_C_NOT_OVERFLOW: 2500 case SLJIT_C_MUL_NOT_OVERFLOW: 2501 BR_NZ(OVERFLOW_FLAG); 2502 break; 2503 default: 2504 /* Not conditional branch. */ 2505 inst = 0; 2506 break; 2507 } 2508 2509 jump->flags |= flags; 2510 2511 if (inst) { 2512 inst = inst | ((type <= SLJIT_JUMP) ? BOFF_X1(5) : BOFF_X1(6)); 2513 PTR_FAIL_IF(PI(inst)); 2514 } 2515 2516 PTR_FAIL_IF(emit_const(compiler, TMP_REG2_mapped, 0, 1)); 2517 if (type <= SLJIT_JUMP) { 2518 jump->addr = compiler->size; 2519 PTR_FAIL_IF(JR_SOLO(TMP_REG2_mapped)); 2520 } else { 2521 SLJIT_ASSERT(reg_map[PIC_ADDR_REG] == 16 && PIC_ADDR_REG == TMP_REG2); 2522 /* Cannot be optimized out if type is >= CALL0. */ 2523 jump->flags |= IS_JAL | (type >= SLJIT_CALL0 ? SLJIT_REWRITABLE_JUMP : 0); 2524 PTR_FAIL_IF(ADD_SOLO(0, reg_map[SLJIT_R0], ZERO)); 2525 jump->addr = compiler->size; 2526 PTR_FAIL_IF(JALR_SOLO(TMP_REG2_mapped)); 2527 } 2528 2529 return jump; 2530 } 2531 2532 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_is_fpu_available(void) 2533 { 2534 return 0; 2535 } 2536 2537 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop1(struct sljit_compiler *compiler, sljit_si op, sljit_si dst, sljit_sw dstw, sljit_si src, sljit_sw srcw) 2538 { 2539 SLJIT_ASSERT_STOP(); 2540 } 2541 2542 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop2(struct sljit_compiler *compiler, sljit_si op, sljit_si dst, sljit_sw dstw, sljit_si src1, sljit_sw src1w, sljit_si src2, sljit_sw src2w) 2543 { 2544 SLJIT_ASSERT_STOP(); 2545 } 2546 2547 SLJIT_API_FUNC_ATTRIBUTE struct sljit_const * sljit_emit_const(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw, sljit_sw init_value) 2548 { 2549 struct sljit_const *const_; 2550 sljit_si reg; 2551 2552 flush_buffer(compiler); 2553 2554 CHECK_ERROR_PTR(); 2555 check_sljit_emit_const(compiler, dst, dstw, init_value); 2556 ADJUST_LOCAL_OFFSET(dst, dstw); 2557 2558 const_ = (struct sljit_const *)ensure_abuf(compiler, sizeof(struct sljit_const)); 2559 PTR_FAIL_IF(!const_); 2560 set_const(const_, compiler); 2561 2562 reg = FAST_IS_REG(dst) ? dst : TMP_REG2; 2563 2564 PTR_FAIL_IF(emit_const_64(compiler, reg, init_value, 1)); 2565 2566 if (dst & SLJIT_MEM) 2567 PTR_FAIL_IF(emit_op(compiler, SLJIT_MOV, WORD_DATA, dst, dstw, TMP_REG1, 0, TMP_REG2, 0)); 2568 return const_; 2569 } 2570 2571 SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_addr) 2572 { 2573 sljit_ins *inst = (sljit_ins *)addr; 2574 2575 inst[0] = (inst[0] & ~(0xFFFFL << 43)) | (((new_addr >> 32) & 0xffff) << 43); 2576 inst[1] = (inst[1] & ~(0xFFFFL << 43)) | (((new_addr >> 16) & 0xffff) << 43); 2577 inst[2] = (inst[2] & ~(0xFFFFL << 43)) | ((new_addr & 0xffff) << 43); 2578 SLJIT_CACHE_FLUSH(inst, inst + 3); 2579 } 2580 2581 SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant) 2582 { 2583 sljit_ins *inst = (sljit_ins *)addr; 2584 2585 inst[0] = (inst[0] & ~(0xFFFFL << 43)) | (((new_constant >> 48) & 0xFFFFL) << 43); 2586 inst[1] = (inst[1] & ~(0xFFFFL << 43)) | (((new_constant >> 32) & 0xFFFFL) << 43); 2587 inst[2] = (inst[2] & ~(0xFFFFL << 43)) | (((new_constant >> 16) & 0xFFFFL) << 43); 2588 inst[3] = (inst[3] & ~(0xFFFFL << 43)) | ((new_constant & 0xFFFFL) << 43); 2589 SLJIT_CACHE_FLUSH(inst, inst + 4); 2590 } 2591