1 /* 2 * Stack-less Just-In-Time compiler 3 * 4 * Copyright 2009-2012 Zoltan Herczeg (hzmester (at) freemail.hu). All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without modification, are 7 * permitted provided that the following conditions are met: 8 * 9 * 1. Redistributions of source code must retain the above copyright notice, this list of 10 * conditions and the following disclaimer. 11 * 12 * 2. Redistributions in binary form must reproduce the above copyright notice, this list 13 * of conditions and the following disclaimer in the documentation and/or other materials 14 * provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY 17 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT 19 * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED 21 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR 22 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 23 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 24 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 */ 26 27 SLJIT_API_FUNC_ATTRIBUTE const char* sljit_get_platform_name(void) 28 { 29 #if (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7) 30 return "ARMv7" SLJIT_CPUINFO; 31 #elif (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) 32 return "ARMv5" SLJIT_CPUINFO; 33 #else 34 #error "Internal error: Unknown ARM architecture" 35 #endif 36 } 37 38 /* Last register + 1. */ 39 #define TMP_REG1 (SLJIT_NUMBER_OF_REGISTERS + 2) 40 #define TMP_REG2 (SLJIT_NUMBER_OF_REGISTERS + 3) 41 #define TMP_REG3 (SLJIT_NUMBER_OF_REGISTERS + 4) 42 #define TMP_PC (SLJIT_NUMBER_OF_REGISTERS + 5) 43 44 #define TMP_FREG1 (0) 45 #define TMP_FREG2 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1) 46 47 /* In ARM instruction words. 48 Cache lines are usually 32 byte aligned. */ 49 #define CONST_POOL_ALIGNMENT 8 50 #define CONST_POOL_EMPTY 0xffffffff 51 52 #define ALIGN_INSTRUCTION(ptr) \ 53 (sljit_uw*)(((sljit_uw)(ptr) + (CONST_POOL_ALIGNMENT * sizeof(sljit_uw)) - 1) & ~((CONST_POOL_ALIGNMENT * sizeof(sljit_uw)) - 1)) 54 #define MAX_DIFFERENCE(max_diff) \ 55 (((max_diff) / (sljit_s32)sizeof(sljit_uw)) - (CONST_POOL_ALIGNMENT - 1)) 56 57 /* See sljit_emit_enter and sljit_emit_op0 if you want to change them. */ 58 static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 6] = { 59 0, 0, 1, 2, 11, 10, 9, 8, 7, 6, 5, 4, 13, 3, 12, 14, 15 60 }; 61 62 #define RM(rm) (reg_map[rm]) 63 #define RD(rd) (reg_map[rd] << 12) 64 #define RN(rn) (reg_map[rn] << 16) 65 66 /* --------------------------------------------------------------------- */ 67 /* Instrucion forms */ 68 /* --------------------------------------------------------------------- */ 69 70 /* The instruction includes the AL condition. 71 INST_NAME - CONDITIONAL remove this flag. */ 72 #define COND_MASK 0xf0000000 73 #define CONDITIONAL 0xe0000000 74 #define PUSH_POOL 0xff000000 75 76 /* DP - Data Processing instruction (use with EMIT_DATA_PROCESS_INS). */ 77 #define ADC_DP 0x5 78 #define ADD_DP 0x4 79 #define AND_DP 0x0 80 #define B 0xea000000 81 #define BIC_DP 0xe 82 #define BL 0xeb000000 83 #define BLX 0xe12fff30 84 #define BX 0xe12fff10 85 #define CLZ 0xe16f0f10 86 #define CMP_DP 0xa 87 #define BKPT 0xe1200070 88 #define EOR_DP 0x1 89 #define MOV_DP 0xd 90 #define MUL 0xe0000090 91 #define MVN_DP 0xf 92 #define NOP 0xe1a00000 93 #define ORR_DP 0xc 94 #define PUSH 0xe92d0000 95 #define POP 0xe8bd0000 96 #define RSB_DP 0x3 97 #define RSC_DP 0x7 98 #define SBC_DP 0x6 99 #define SMULL 0xe0c00090 100 #define SUB_DP 0x2 101 #define UMULL 0xe0800090 102 #define VABS_F32 0xeeb00ac0 103 #define VADD_F32 0xee300a00 104 #define VCMP_F32 0xeeb40a40 105 #define VCVT_F32_S32 0xeeb80ac0 106 #define VCVT_F64_F32 0xeeb70ac0 107 #define VCVT_S32_F32 0xeebd0ac0 108 #define VDIV_F32 0xee800a00 109 #define VMOV_F32 0xeeb00a40 110 #define VMOV 0xee000a10 111 #define VMRS 0xeef1fa10 112 #define VMUL_F32 0xee200a00 113 #define VNEG_F32 0xeeb10a40 114 #define VSTR_F32 0xed000a00 115 #define VSUB_F32 0xee300a40 116 117 #if (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7) 118 /* Arm v7 specific instructions. */ 119 #define MOVW 0xe3000000 120 #define MOVT 0xe3400000 121 #define SXTB 0xe6af0070 122 #define SXTH 0xe6bf0070 123 #define UXTB 0xe6ef0070 124 #define UXTH 0xe6ff0070 125 #endif 126 127 #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) 128 129 static sljit_s32 push_cpool(struct sljit_compiler *compiler) 130 { 131 /* Pushing the constant pool into the instruction stream. */ 132 sljit_uw* inst; 133 sljit_uw* cpool_ptr; 134 sljit_uw* cpool_end; 135 sljit_s32 i; 136 137 /* The label could point the address after the constant pool. */ 138 if (compiler->last_label && compiler->last_label->size == compiler->size) 139 compiler->last_label->size += compiler->cpool_fill + (CONST_POOL_ALIGNMENT - 1) + 1; 140 141 SLJIT_ASSERT(compiler->cpool_fill > 0 && compiler->cpool_fill <= CPOOL_SIZE); 142 inst = (sljit_uw*)ensure_buf(compiler, sizeof(sljit_uw)); 143 FAIL_IF(!inst); 144 compiler->size++; 145 *inst = 0xff000000 | compiler->cpool_fill; 146 147 for (i = 0; i < CONST_POOL_ALIGNMENT - 1; i++) { 148 inst = (sljit_uw*)ensure_buf(compiler, sizeof(sljit_uw)); 149 FAIL_IF(!inst); 150 compiler->size++; 151 *inst = 0; 152 } 153 154 cpool_ptr = compiler->cpool; 155 cpool_end = cpool_ptr + compiler->cpool_fill; 156 while (cpool_ptr < cpool_end) { 157 inst = (sljit_uw*)ensure_buf(compiler, sizeof(sljit_uw)); 158 FAIL_IF(!inst); 159 compiler->size++; 160 *inst = *cpool_ptr++; 161 } 162 compiler->cpool_diff = CONST_POOL_EMPTY; 163 compiler->cpool_fill = 0; 164 return SLJIT_SUCCESS; 165 } 166 167 static sljit_s32 push_inst(struct sljit_compiler *compiler, sljit_uw inst) 168 { 169 sljit_uw* ptr; 170 171 if (SLJIT_UNLIKELY(compiler->cpool_diff != CONST_POOL_EMPTY && compiler->size - compiler->cpool_diff >= MAX_DIFFERENCE(4092))) 172 FAIL_IF(push_cpool(compiler)); 173 174 ptr = (sljit_uw*)ensure_buf(compiler, sizeof(sljit_uw)); 175 FAIL_IF(!ptr); 176 compiler->size++; 177 *ptr = inst; 178 return SLJIT_SUCCESS; 179 } 180 181 static sljit_s32 push_inst_with_literal(struct sljit_compiler *compiler, sljit_uw inst, sljit_uw literal) 182 { 183 sljit_uw* ptr; 184 sljit_uw cpool_index = CPOOL_SIZE; 185 sljit_uw* cpool_ptr; 186 sljit_uw* cpool_end; 187 sljit_u8* cpool_unique_ptr; 188 189 if (SLJIT_UNLIKELY(compiler->cpool_diff != CONST_POOL_EMPTY && compiler->size - compiler->cpool_diff >= MAX_DIFFERENCE(4092))) 190 FAIL_IF(push_cpool(compiler)); 191 else if (compiler->cpool_fill > 0) { 192 cpool_ptr = compiler->cpool; 193 cpool_end = cpool_ptr + compiler->cpool_fill; 194 cpool_unique_ptr = compiler->cpool_unique; 195 do { 196 if ((*cpool_ptr == literal) && !(*cpool_unique_ptr)) { 197 cpool_index = cpool_ptr - compiler->cpool; 198 break; 199 } 200 cpool_ptr++; 201 cpool_unique_ptr++; 202 } while (cpool_ptr < cpool_end); 203 } 204 205 if (cpool_index == CPOOL_SIZE) { 206 /* Must allocate a new entry in the literal pool. */ 207 if (compiler->cpool_fill < CPOOL_SIZE) { 208 cpool_index = compiler->cpool_fill; 209 compiler->cpool_fill++; 210 } 211 else { 212 FAIL_IF(push_cpool(compiler)); 213 cpool_index = 0; 214 compiler->cpool_fill = 1; 215 } 216 } 217 218 SLJIT_ASSERT((inst & 0xfff) == 0); 219 ptr = (sljit_uw*)ensure_buf(compiler, sizeof(sljit_uw)); 220 FAIL_IF(!ptr); 221 compiler->size++; 222 *ptr = inst | cpool_index; 223 224 compiler->cpool[cpool_index] = literal; 225 compiler->cpool_unique[cpool_index] = 0; 226 if (compiler->cpool_diff == CONST_POOL_EMPTY) 227 compiler->cpool_diff = compiler->size; 228 return SLJIT_SUCCESS; 229 } 230 231 static sljit_s32 push_inst_with_unique_literal(struct sljit_compiler *compiler, sljit_uw inst, sljit_uw literal) 232 { 233 sljit_uw* ptr; 234 if (SLJIT_UNLIKELY((compiler->cpool_diff != CONST_POOL_EMPTY && compiler->size - compiler->cpool_diff >= MAX_DIFFERENCE(4092)) || compiler->cpool_fill >= CPOOL_SIZE)) 235 FAIL_IF(push_cpool(compiler)); 236 237 SLJIT_ASSERT(compiler->cpool_fill < CPOOL_SIZE && (inst & 0xfff) == 0); 238 ptr = (sljit_uw*)ensure_buf(compiler, sizeof(sljit_uw)); 239 FAIL_IF(!ptr); 240 compiler->size++; 241 *ptr = inst | compiler->cpool_fill; 242 243 compiler->cpool[compiler->cpool_fill] = literal; 244 compiler->cpool_unique[compiler->cpool_fill] = 1; 245 compiler->cpool_fill++; 246 if (compiler->cpool_diff == CONST_POOL_EMPTY) 247 compiler->cpool_diff = compiler->size; 248 return SLJIT_SUCCESS; 249 } 250 251 static SLJIT_INLINE sljit_s32 prepare_blx(struct sljit_compiler *compiler) 252 { 253 /* Place for at least two instruction (doesn't matter whether the first has a literal). */ 254 if (SLJIT_UNLIKELY(compiler->cpool_diff != CONST_POOL_EMPTY && compiler->size - compiler->cpool_diff >= MAX_DIFFERENCE(4088))) 255 return push_cpool(compiler); 256 return SLJIT_SUCCESS; 257 } 258 259 static SLJIT_INLINE sljit_s32 emit_blx(struct sljit_compiler *compiler) 260 { 261 /* Must follow tightly the previous instruction (to be able to convert it to bl instruction). */ 262 SLJIT_ASSERT(compiler->cpool_diff == CONST_POOL_EMPTY || compiler->size - compiler->cpool_diff < MAX_DIFFERENCE(4092)); 263 return push_inst(compiler, BLX | RM(TMP_REG1)); 264 } 265 266 static sljit_uw patch_pc_relative_loads(sljit_uw *last_pc_patch, sljit_uw *code_ptr, sljit_uw* const_pool, sljit_uw cpool_size) 267 { 268 sljit_uw diff; 269 sljit_uw ind; 270 sljit_uw counter = 0; 271 sljit_uw* clear_const_pool = const_pool; 272 sljit_uw* clear_const_pool_end = const_pool + cpool_size; 273 274 SLJIT_ASSERT(const_pool - code_ptr <= CONST_POOL_ALIGNMENT); 275 /* Set unused flag for all literals in the constant pool. 276 I.e.: unused literals can belong to branches, which can be encoded as B or BL. 277 We can "compress" the constant pool by discarding these literals. */ 278 while (clear_const_pool < clear_const_pool_end) 279 *clear_const_pool++ = (sljit_uw)(-1); 280 281 while (last_pc_patch < code_ptr) { 282 /* Data transfer instruction with Rn == r15. */ 283 if ((*last_pc_patch & 0x0c0f0000) == 0x040f0000) { 284 diff = const_pool - last_pc_patch; 285 ind = (*last_pc_patch) & 0xfff; 286 287 /* Must be a load instruction with immediate offset. */ 288 SLJIT_ASSERT(ind < cpool_size && !(*last_pc_patch & (1 << 25)) && (*last_pc_patch & (1 << 20))); 289 if ((sljit_s32)const_pool[ind] < 0) { 290 const_pool[ind] = counter; 291 ind = counter; 292 counter++; 293 } 294 else 295 ind = const_pool[ind]; 296 297 SLJIT_ASSERT(diff >= 1); 298 if (diff >= 2 || ind > 0) { 299 diff = (diff + ind - 2) << 2; 300 SLJIT_ASSERT(diff <= 0xfff); 301 *last_pc_patch = (*last_pc_patch & ~0xfff) | diff; 302 } 303 else 304 *last_pc_patch = (*last_pc_patch & ~(0xfff | (1 << 23))) | 0x004; 305 } 306 last_pc_patch++; 307 } 308 return counter; 309 } 310 311 /* In some rare ocasions we may need future patches. The probability is close to 0 in practice. */ 312 struct future_patch { 313 struct future_patch* next; 314 sljit_s32 index; 315 sljit_s32 value; 316 }; 317 318 static sljit_s32 resolve_const_pool_index(struct sljit_compiler *compiler, struct future_patch **first_patch, sljit_uw cpool_current_index, sljit_uw *cpool_start_address, sljit_uw *buf_ptr) 319 { 320 sljit_s32 value; 321 struct future_patch *curr_patch, *prev_patch; 322 323 SLJIT_UNUSED_ARG(compiler); 324 325 /* Using the values generated by patch_pc_relative_loads. */ 326 if (!*first_patch) 327 value = (sljit_s32)cpool_start_address[cpool_current_index]; 328 else { 329 curr_patch = *first_patch; 330 prev_patch = NULL; 331 while (1) { 332 if (!curr_patch) { 333 value = (sljit_s32)cpool_start_address[cpool_current_index]; 334 break; 335 } 336 if ((sljit_uw)curr_patch->index == cpool_current_index) { 337 value = curr_patch->value; 338 if (prev_patch) 339 prev_patch->next = curr_patch->next; 340 else 341 *first_patch = curr_patch->next; 342 SLJIT_FREE(curr_patch, compiler->allocator_data); 343 break; 344 } 345 prev_patch = curr_patch; 346 curr_patch = curr_patch->next; 347 } 348 } 349 350 if (value >= 0) { 351 if ((sljit_uw)value > cpool_current_index) { 352 curr_patch = (struct future_patch*)SLJIT_MALLOC(sizeof(struct future_patch), compiler->allocator_data); 353 if (!curr_patch) { 354 while (*first_patch) { 355 curr_patch = *first_patch; 356 *first_patch = (*first_patch)->next; 357 SLJIT_FREE(curr_patch, compiler->allocator_data); 358 } 359 return SLJIT_ERR_ALLOC_FAILED; 360 } 361 curr_patch->next = *first_patch; 362 curr_patch->index = value; 363 curr_patch->value = cpool_start_address[value]; 364 *first_patch = curr_patch; 365 } 366 cpool_start_address[value] = *buf_ptr; 367 } 368 return SLJIT_SUCCESS; 369 } 370 371 #else 372 373 static sljit_s32 push_inst(struct sljit_compiler *compiler, sljit_uw inst) 374 { 375 sljit_uw* ptr; 376 377 ptr = (sljit_uw*)ensure_buf(compiler, sizeof(sljit_uw)); 378 FAIL_IF(!ptr); 379 compiler->size++; 380 *ptr = inst; 381 return SLJIT_SUCCESS; 382 } 383 384 static SLJIT_INLINE sljit_s32 emit_imm(struct sljit_compiler *compiler, sljit_s32 reg, sljit_sw imm) 385 { 386 FAIL_IF(push_inst(compiler, MOVW | RD(reg) | ((imm << 4) & 0xf0000) | (imm & 0xfff))); 387 return push_inst(compiler, MOVT | RD(reg) | ((imm >> 12) & 0xf0000) | ((imm >> 16) & 0xfff)); 388 } 389 390 #endif 391 392 static SLJIT_INLINE sljit_s32 detect_jump_type(struct sljit_jump *jump, sljit_uw *code_ptr, sljit_uw *code) 393 { 394 sljit_sw diff; 395 396 if (jump->flags & SLJIT_REWRITABLE_JUMP) 397 return 0; 398 399 #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) 400 if (jump->flags & IS_BL) 401 code_ptr--; 402 403 if (jump->flags & JUMP_ADDR) 404 diff = ((sljit_sw)jump->u.target - (sljit_sw)(code_ptr + 2)); 405 else { 406 SLJIT_ASSERT(jump->flags & JUMP_LABEL); 407 diff = ((sljit_sw)(code + jump->u.label->size) - (sljit_sw)(code_ptr + 2)); 408 } 409 410 /* Branch to Thumb code has not been optimized yet. */ 411 if (diff & 0x3) 412 return 0; 413 414 if (jump->flags & IS_BL) { 415 if (diff <= 0x01ffffff && diff >= -0x02000000) { 416 *code_ptr = (BL - CONDITIONAL) | (*(code_ptr + 1) & COND_MASK); 417 jump->flags |= PATCH_B; 418 return 1; 419 } 420 } 421 else { 422 if (diff <= 0x01ffffff && diff >= -0x02000000) { 423 *code_ptr = (B - CONDITIONAL) | (*code_ptr & COND_MASK); 424 jump->flags |= PATCH_B; 425 } 426 } 427 #else 428 if (jump->flags & JUMP_ADDR) 429 diff = ((sljit_sw)jump->u.target - (sljit_sw)code_ptr); 430 else { 431 SLJIT_ASSERT(jump->flags & JUMP_LABEL); 432 diff = ((sljit_sw)(code + jump->u.label->size) - (sljit_sw)code_ptr); 433 } 434 435 /* Branch to Thumb code has not been optimized yet. */ 436 if (diff & 0x3) 437 return 0; 438 439 if (diff <= 0x01ffffff && diff >= -0x02000000) { 440 code_ptr -= 2; 441 *code_ptr = ((jump->flags & IS_BL) ? (BL - CONDITIONAL) : (B - CONDITIONAL)) | (code_ptr[2] & COND_MASK); 442 jump->flags |= PATCH_B; 443 return 1; 444 } 445 #endif 446 return 0; 447 } 448 449 static SLJIT_INLINE void inline_set_jump_addr(sljit_uw addr, sljit_uw new_addr, sljit_s32 flush) 450 { 451 #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) 452 sljit_uw *ptr = (sljit_uw*)addr; 453 sljit_uw *inst = (sljit_uw*)ptr[0]; 454 sljit_uw mov_pc = ptr[1]; 455 sljit_s32 bl = (mov_pc & 0x0000f000) != RD(TMP_PC); 456 sljit_sw diff = (sljit_sw)(((sljit_sw)new_addr - (sljit_sw)(inst + 2)) >> 2); 457 458 if (diff <= 0x7fffff && diff >= -0x800000) { 459 /* Turn to branch. */ 460 if (!bl) { 461 inst[0] = (mov_pc & COND_MASK) | (B - CONDITIONAL) | (diff & 0xffffff); 462 if (flush) { 463 SLJIT_CACHE_FLUSH(inst, inst + 1); 464 } 465 } else { 466 inst[0] = (mov_pc & COND_MASK) | (BL - CONDITIONAL) | (diff & 0xffffff); 467 inst[1] = NOP; 468 if (flush) { 469 SLJIT_CACHE_FLUSH(inst, inst + 2); 470 } 471 } 472 } else { 473 /* Get the position of the constant. */ 474 if (mov_pc & (1 << 23)) 475 ptr = inst + ((mov_pc & 0xfff) >> 2) + 2; 476 else 477 ptr = inst + 1; 478 479 if (*inst != mov_pc) { 480 inst[0] = mov_pc; 481 if (!bl) { 482 if (flush) { 483 SLJIT_CACHE_FLUSH(inst, inst + 1); 484 } 485 } else { 486 inst[1] = BLX | RM(TMP_REG1); 487 if (flush) { 488 SLJIT_CACHE_FLUSH(inst, inst + 2); 489 } 490 } 491 } 492 *ptr = new_addr; 493 } 494 #else 495 sljit_uw *inst = (sljit_uw*)addr; 496 SLJIT_ASSERT((inst[0] & 0xfff00000) == MOVW && (inst[1] & 0xfff00000) == MOVT); 497 inst[0] = MOVW | (inst[0] & 0xf000) | ((new_addr << 4) & 0xf0000) | (new_addr & 0xfff); 498 inst[1] = MOVT | (inst[1] & 0xf000) | ((new_addr >> 12) & 0xf0000) | ((new_addr >> 16) & 0xfff); 499 if (flush) { 500 SLJIT_CACHE_FLUSH(inst, inst + 2); 501 } 502 #endif 503 } 504 505 static sljit_uw get_imm(sljit_uw imm); 506 507 static SLJIT_INLINE void inline_set_const(sljit_uw addr, sljit_sw new_constant, sljit_s32 flush) 508 { 509 #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) 510 sljit_uw *ptr = (sljit_uw*)addr; 511 sljit_uw *inst = (sljit_uw*)ptr[0]; 512 sljit_uw ldr_literal = ptr[1]; 513 sljit_uw src2; 514 515 src2 = get_imm(new_constant); 516 if (src2) { 517 *inst = 0xe3a00000 | (ldr_literal & 0xf000) | src2; 518 if (flush) { 519 SLJIT_CACHE_FLUSH(inst, inst + 1); 520 } 521 return; 522 } 523 524 src2 = get_imm(~new_constant); 525 if (src2) { 526 *inst = 0xe3e00000 | (ldr_literal & 0xf000) | src2; 527 if (flush) { 528 SLJIT_CACHE_FLUSH(inst, inst + 1); 529 } 530 return; 531 } 532 533 if (ldr_literal & (1 << 23)) 534 ptr = inst + ((ldr_literal & 0xfff) >> 2) + 2; 535 else 536 ptr = inst + 1; 537 538 if (*inst != ldr_literal) { 539 *inst = ldr_literal; 540 if (flush) { 541 SLJIT_CACHE_FLUSH(inst, inst + 1); 542 } 543 } 544 *ptr = new_constant; 545 #else 546 sljit_uw *inst = (sljit_uw*)addr; 547 SLJIT_ASSERT((inst[0] & 0xfff00000) == MOVW && (inst[1] & 0xfff00000) == MOVT); 548 inst[0] = MOVW | (inst[0] & 0xf000) | ((new_constant << 4) & 0xf0000) | (new_constant & 0xfff); 549 inst[1] = MOVT | (inst[1] & 0xf000) | ((new_constant >> 12) & 0xf0000) | ((new_constant >> 16) & 0xfff); 550 if (flush) { 551 SLJIT_CACHE_FLUSH(inst, inst + 2); 552 } 553 #endif 554 } 555 556 SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler) 557 { 558 struct sljit_memory_fragment *buf; 559 sljit_uw *code; 560 sljit_uw *code_ptr; 561 sljit_uw *buf_ptr; 562 sljit_uw *buf_end; 563 sljit_uw size; 564 sljit_uw word_count; 565 #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) 566 sljit_uw cpool_size; 567 sljit_uw cpool_skip_alignment; 568 sljit_uw cpool_current_index; 569 sljit_uw *cpool_start_address; 570 sljit_uw *last_pc_patch; 571 struct future_patch *first_patch; 572 #endif 573 574 struct sljit_label *label; 575 struct sljit_jump *jump; 576 struct sljit_const *const_; 577 578 CHECK_ERROR_PTR(); 579 CHECK_PTR(check_sljit_generate_code(compiler)); 580 reverse_buf(compiler); 581 582 /* Second code generation pass. */ 583 #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) 584 size = compiler->size + (compiler->patches << 1); 585 if (compiler->cpool_fill > 0) 586 size += compiler->cpool_fill + CONST_POOL_ALIGNMENT - 1; 587 #else 588 size = compiler->size; 589 #endif 590 code = (sljit_uw*)SLJIT_MALLOC_EXEC(size * sizeof(sljit_uw)); 591 PTR_FAIL_WITH_EXEC_IF(code); 592 buf = compiler->buf; 593 594 #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) 595 cpool_size = 0; 596 cpool_skip_alignment = 0; 597 cpool_current_index = 0; 598 cpool_start_address = NULL; 599 first_patch = NULL; 600 last_pc_patch = code; 601 #endif 602 603 code_ptr = code; 604 word_count = 0; 605 606 label = compiler->labels; 607 jump = compiler->jumps; 608 const_ = compiler->consts; 609 610 if (label && label->size == 0) { 611 label->addr = (sljit_uw)code; 612 label->size = 0; 613 label = label->next; 614 } 615 616 do { 617 buf_ptr = (sljit_uw*)buf->memory; 618 buf_end = buf_ptr + (buf->used_size >> 2); 619 do { 620 word_count++; 621 #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) 622 if (cpool_size > 0) { 623 if (cpool_skip_alignment > 0) { 624 buf_ptr++; 625 cpool_skip_alignment--; 626 } 627 else { 628 if (SLJIT_UNLIKELY(resolve_const_pool_index(compiler, &first_patch, cpool_current_index, cpool_start_address, buf_ptr))) { 629 SLJIT_FREE_EXEC(code); 630 compiler->error = SLJIT_ERR_ALLOC_FAILED; 631 return NULL; 632 } 633 buf_ptr++; 634 if (++cpool_current_index >= cpool_size) { 635 SLJIT_ASSERT(!first_patch); 636 cpool_size = 0; 637 if (label && label->size == word_count) { 638 /* Points after the current instruction. */ 639 label->addr = (sljit_uw)code_ptr; 640 label->size = code_ptr - code; 641 label = label->next; 642 } 643 } 644 } 645 } 646 else if ((*buf_ptr & 0xff000000) != PUSH_POOL) { 647 #endif 648 *code_ptr = *buf_ptr++; 649 /* These structures are ordered by their address. */ 650 SLJIT_ASSERT(!label || label->size >= word_count); 651 SLJIT_ASSERT(!jump || jump->addr >= word_count); 652 SLJIT_ASSERT(!const_ || const_->addr >= word_count); 653 if (jump && jump->addr == word_count) { 654 #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) 655 if (detect_jump_type(jump, code_ptr, code)) 656 code_ptr--; 657 jump->addr = (sljit_uw)code_ptr; 658 #else 659 jump->addr = (sljit_uw)(code_ptr - 2); 660 if (detect_jump_type(jump, code_ptr, code)) 661 code_ptr -= 2; 662 #endif 663 jump = jump->next; 664 } 665 if (label && label->size == word_count) { 666 /* code_ptr can be affected above. */ 667 label->addr = (sljit_uw)(code_ptr + 1); 668 label->size = (code_ptr + 1) - code; 669 label = label->next; 670 } 671 if (const_ && const_->addr == word_count) { 672 #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) 673 const_->addr = (sljit_uw)code_ptr; 674 #else 675 const_->addr = (sljit_uw)(code_ptr - 1); 676 #endif 677 const_ = const_->next; 678 } 679 code_ptr++; 680 #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) 681 } 682 else { 683 /* Fortunately, no need to shift. */ 684 cpool_size = *buf_ptr++ & ~PUSH_POOL; 685 SLJIT_ASSERT(cpool_size > 0); 686 cpool_start_address = ALIGN_INSTRUCTION(code_ptr + 1); 687 cpool_current_index = patch_pc_relative_loads(last_pc_patch, code_ptr, cpool_start_address, cpool_size); 688 if (cpool_current_index > 0) { 689 /* Unconditional branch. */ 690 *code_ptr = B | (((cpool_start_address - code_ptr) + cpool_current_index - 2) & ~PUSH_POOL); 691 code_ptr = cpool_start_address + cpool_current_index; 692 } 693 cpool_skip_alignment = CONST_POOL_ALIGNMENT - 1; 694 cpool_current_index = 0; 695 last_pc_patch = code_ptr; 696 } 697 #endif 698 } while (buf_ptr < buf_end); 699 buf = buf->next; 700 } while (buf); 701 702 SLJIT_ASSERT(!label); 703 SLJIT_ASSERT(!jump); 704 SLJIT_ASSERT(!const_); 705 706 #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) 707 SLJIT_ASSERT(cpool_size == 0); 708 if (compiler->cpool_fill > 0) { 709 cpool_start_address = ALIGN_INSTRUCTION(code_ptr); 710 cpool_current_index = patch_pc_relative_loads(last_pc_patch, code_ptr, cpool_start_address, compiler->cpool_fill); 711 if (cpool_current_index > 0) 712 code_ptr = cpool_start_address + cpool_current_index; 713 714 buf_ptr = compiler->cpool; 715 buf_end = buf_ptr + compiler->cpool_fill; 716 cpool_current_index = 0; 717 while (buf_ptr < buf_end) { 718 if (SLJIT_UNLIKELY(resolve_const_pool_index(compiler, &first_patch, cpool_current_index, cpool_start_address, buf_ptr))) { 719 SLJIT_FREE_EXEC(code); 720 compiler->error = SLJIT_ERR_ALLOC_FAILED; 721 return NULL; 722 } 723 buf_ptr++; 724 cpool_current_index++; 725 } 726 SLJIT_ASSERT(!first_patch); 727 } 728 #endif 729 730 jump = compiler->jumps; 731 while (jump) { 732 buf_ptr = (sljit_uw*)jump->addr; 733 734 if (jump->flags & PATCH_B) { 735 if (!(jump->flags & JUMP_ADDR)) { 736 SLJIT_ASSERT(jump->flags & JUMP_LABEL); 737 SLJIT_ASSERT(((sljit_sw)jump->u.label->addr - (sljit_sw)(buf_ptr + 2)) <= 0x01ffffff && ((sljit_sw)jump->u.label->addr - (sljit_sw)(buf_ptr + 2)) >= -0x02000000); 738 *buf_ptr |= (((sljit_sw)jump->u.label->addr - (sljit_sw)(buf_ptr + 2)) >> 2) & 0x00ffffff; 739 } 740 else { 741 SLJIT_ASSERT(((sljit_sw)jump->u.target - (sljit_sw)(buf_ptr + 2)) <= 0x01ffffff && ((sljit_sw)jump->u.target - (sljit_sw)(buf_ptr + 2)) >= -0x02000000); 742 *buf_ptr |= (((sljit_sw)jump->u.target - (sljit_sw)(buf_ptr + 2)) >> 2) & 0x00ffffff; 743 } 744 } 745 else if (jump->flags & SLJIT_REWRITABLE_JUMP) { 746 #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) 747 jump->addr = (sljit_uw)code_ptr; 748 code_ptr[0] = (sljit_uw)buf_ptr; 749 code_ptr[1] = *buf_ptr; 750 inline_set_jump_addr((sljit_uw)code_ptr, (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target, 0); 751 code_ptr += 2; 752 #else 753 inline_set_jump_addr((sljit_uw)buf_ptr, (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target, 0); 754 #endif 755 } 756 else { 757 #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) 758 if (jump->flags & IS_BL) 759 buf_ptr--; 760 if (*buf_ptr & (1 << 23)) 761 buf_ptr += ((*buf_ptr & 0xfff) >> 2) + 2; 762 else 763 buf_ptr += 1; 764 *buf_ptr = (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target; 765 #else 766 inline_set_jump_addr((sljit_uw)buf_ptr, (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target, 0); 767 #endif 768 } 769 jump = jump->next; 770 } 771 772 #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) 773 const_ = compiler->consts; 774 while (const_) { 775 buf_ptr = (sljit_uw*)const_->addr; 776 const_->addr = (sljit_uw)code_ptr; 777 778 code_ptr[0] = (sljit_uw)buf_ptr; 779 code_ptr[1] = *buf_ptr; 780 if (*buf_ptr & (1 << 23)) 781 buf_ptr += ((*buf_ptr & 0xfff) >> 2) + 2; 782 else 783 buf_ptr += 1; 784 /* Set the value again (can be a simple constant). */ 785 inline_set_const((sljit_uw)code_ptr, *buf_ptr, 0); 786 code_ptr += 2; 787 788 const_ = const_->next; 789 } 790 #endif 791 792 SLJIT_ASSERT(code_ptr - code <= (sljit_s32)size); 793 794 compiler->error = SLJIT_ERR_COMPILED; 795 compiler->executable_size = (code_ptr - code) * sizeof(sljit_uw); 796 SLJIT_CACHE_FLUSH(code, code_ptr); 797 return code; 798 } 799 800 /* --------------------------------------------------------------------- */ 801 /* Entry, exit */ 802 /* --------------------------------------------------------------------- */ 803 804 /* emit_op inp_flags. 805 WRITE_BACK must be the first, since it is a flag. */ 806 #define WRITE_BACK 0x01 807 #define ALLOW_IMM 0x02 808 #define ALLOW_INV_IMM 0x04 809 #define ALLOW_ANY_IMM (ALLOW_IMM | ALLOW_INV_IMM) 810 #define ARG_TEST 0x08 811 812 /* Creates an index in data_transfer_insts array. */ 813 #define WORD_DATA 0x00 814 #define BYTE_DATA 0x10 815 #define HALF_DATA 0x20 816 #define SIGNED_DATA 0x40 817 #define LOAD_DATA 0x80 818 819 /* Condition: AL. */ 820 #define EMIT_DATA_PROCESS_INS(opcode, set_flags, dst, src1, src2) \ 821 (0xe0000000 | ((opcode) << 21) | (set_flags) | RD(dst) | RN(src1) | (src2)) 822 823 static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 inp_flags, 824 sljit_s32 dst, sljit_sw dstw, 825 sljit_s32 src1, sljit_sw src1w, 826 sljit_s32 src2, sljit_sw src2w); 827 828 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler, 829 sljit_s32 options, sljit_s32 args, sljit_s32 scratches, sljit_s32 saveds, 830 sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) 831 { 832 sljit_s32 size, i, tmp; 833 sljit_uw push; 834 835 CHECK_ERROR(); 836 CHECK(check_sljit_emit_enter(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size)); 837 set_emit_enter(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size); 838 839 /* Push saved registers, temporary registers 840 stmdb sp!, {..., lr} */ 841 push = PUSH | (1 << 14); 842 843 tmp = saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - saveds) : SLJIT_FIRST_SAVED_REG; 844 for (i = SLJIT_S0; i >= tmp; i--) 845 push |= 1 << reg_map[i]; 846 847 for (i = scratches; i >= SLJIT_FIRST_SAVED_REG; i--) 848 push |= 1 << reg_map[i]; 849 850 FAIL_IF(push_inst(compiler, push)); 851 852 /* Stack must be aligned to 8 bytes: */ 853 size = GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1); 854 local_size = ((size + local_size + 7) & ~7) - size; 855 compiler->local_size = local_size; 856 if (local_size > 0) 857 FAIL_IF(emit_op(compiler, SLJIT_SUB, ALLOW_IMM, SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, local_size)); 858 859 if (args >= 1) 860 FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, SLJIT_S0, SLJIT_UNUSED, RM(SLJIT_R0)))); 861 if (args >= 2) 862 FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, SLJIT_S1, SLJIT_UNUSED, RM(SLJIT_R1)))); 863 if (args >= 3) 864 FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, SLJIT_S2, SLJIT_UNUSED, RM(SLJIT_R2)))); 865 866 return SLJIT_SUCCESS; 867 } 868 869 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *compiler, 870 sljit_s32 options, sljit_s32 args, sljit_s32 scratches, sljit_s32 saveds, 871 sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) 872 { 873 sljit_s32 size; 874 875 CHECK_ERROR(); 876 CHECK(check_sljit_set_context(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size)); 877 set_set_context(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size); 878 879 size = GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1); 880 compiler->local_size = ((size + local_size + 7) & ~7) - size; 881 return SLJIT_SUCCESS; 882 } 883 884 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 src, sljit_sw srcw) 885 { 886 sljit_s32 i, tmp; 887 sljit_uw pop; 888 889 CHECK_ERROR(); 890 CHECK(check_sljit_emit_return(compiler, op, src, srcw)); 891 892 FAIL_IF(emit_mov_before_return(compiler, op, src, srcw)); 893 894 if (compiler->local_size > 0) 895 FAIL_IF(emit_op(compiler, SLJIT_ADD, ALLOW_IMM, SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, compiler->local_size)); 896 897 /* Push saved registers, temporary registers 898 ldmia sp!, {..., pc} */ 899 pop = POP | (1 << 15); 900 901 tmp = compiler->saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - compiler->saveds) : SLJIT_FIRST_SAVED_REG; 902 for (i = SLJIT_S0; i >= tmp; i--) 903 pop |= 1 << reg_map[i]; 904 905 for (i = compiler->scratches; i >= SLJIT_FIRST_SAVED_REG; i--) 906 pop |= 1 << reg_map[i]; 907 908 return push_inst(compiler, pop); 909 } 910 911 /* --------------------------------------------------------------------- */ 912 /* Operators */ 913 /* --------------------------------------------------------------------- */ 914 915 /* s/l - store/load (1 bit) 916 u/s - signed/unsigned (1 bit) 917 w/b/h/N - word/byte/half/NOT allowed (2 bit) 918 It contans 16 items, but not all are different. */ 919 920 static sljit_sw data_transfer_insts[16] = { 921 /* s u w */ 0xe5000000 /* str */, 922 /* s u b */ 0xe5400000 /* strb */, 923 /* s u h */ 0xe10000b0 /* strh */, 924 /* s u N */ 0x00000000 /* not allowed */, 925 /* s s w */ 0xe5000000 /* str */, 926 /* s s b */ 0xe5400000 /* strb */, 927 /* s s h */ 0xe10000b0 /* strh */, 928 /* s s N */ 0x00000000 /* not allowed */, 929 930 /* l u w */ 0xe5100000 /* ldr */, 931 /* l u b */ 0xe5500000 /* ldrb */, 932 /* l u h */ 0xe11000b0 /* ldrh */, 933 /* l u N */ 0x00000000 /* not allowed */, 934 /* l s w */ 0xe5100000 /* ldr */, 935 /* l s b */ 0xe11000d0 /* ldrsb */, 936 /* l s h */ 0xe11000f0 /* ldrsh */, 937 /* l s N */ 0x00000000 /* not allowed */, 938 }; 939 940 #define EMIT_DATA_TRANSFER(type, add, wb, target, base1, base2) \ 941 (data_transfer_insts[(type) >> 4] | ((add) << 23) | ((wb) << 21) | (reg_map[target] << 12) | (reg_map[base1] << 16) | (base2)) 942 /* Normal ldr/str instruction. 943 Type2: ldrsb, ldrh, ldrsh */ 944 #define IS_TYPE1_TRANSFER(type) \ 945 (data_transfer_insts[(type) >> 4] & 0x04000000) 946 #define TYPE2_TRANSFER_IMM(imm) \ 947 (((imm) & 0xf) | (((imm) & 0xf0) << 4) | (1 << 22)) 948 949 /* flags: */ 950 /* Arguments are swapped. */ 951 #define ARGS_SWAPPED 0x01 952 /* Inverted immediate. */ 953 #define INV_IMM 0x02 954 /* Source and destination is register. */ 955 #define REG_DEST 0x04 956 #define REG_SOURCE 0x08 957 /* One instruction is enough. */ 958 #define FAST_DEST 0x10 959 /* Multiple instructions are required. */ 960 #define SLOW_DEST 0x20 961 /* SET_FLAGS must be (1 << 20) as it is also the value of S bit (can be used for optimization). */ 962 #define SET_FLAGS (1 << 20) 963 /* dst: reg 964 src1: reg 965 src2: reg or imm (if allowed) 966 SRC2_IMM must be (1 << 25) as it is also the value of I bit (can be used for optimization). */ 967 #define SRC2_IMM (1 << 25) 968 969 #define EMIT_DATA_PROCESS_INS_AND_RETURN(opcode) \ 970 return push_inst(compiler, EMIT_DATA_PROCESS_INS(opcode, flags & SET_FLAGS, dst, src1, (src2 & SRC2_IMM) ? src2 : RM(src2))) 971 972 #define EMIT_FULL_DATA_PROCESS_INS_AND_RETURN(opcode, dst, src1, src2) \ 973 return push_inst(compiler, EMIT_DATA_PROCESS_INS(opcode, flags & SET_FLAGS, dst, src1, src2)) 974 975 #define EMIT_SHIFT_INS_AND_RETURN(opcode) \ 976 SLJIT_ASSERT(!(flags & INV_IMM) && !(src2 & SRC2_IMM)); \ 977 if (compiler->shift_imm != 0x20) { \ 978 SLJIT_ASSERT(src1 == TMP_REG1); \ 979 SLJIT_ASSERT(!(flags & ARGS_SWAPPED)); \ 980 if (compiler->shift_imm != 0) \ 981 return push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, flags & SET_FLAGS, dst, SLJIT_UNUSED, (compiler->shift_imm << 7) | (opcode << 5) | reg_map[src2])); \ 982 return push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, flags & SET_FLAGS, dst, SLJIT_UNUSED, reg_map[src2])); \ 983 } \ 984 return push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, flags & SET_FLAGS, dst, SLJIT_UNUSED, (reg_map[(flags & ARGS_SWAPPED) ? src1 : src2] << 8) | (opcode << 5) | 0x10 | ((flags & ARGS_SWAPPED) ? reg_map[src2] : reg_map[src1]))); 985 986 static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 flags, 987 sljit_s32 dst, sljit_s32 src1, sljit_s32 src2) 988 { 989 sljit_sw mul_inst; 990 991 switch (GET_OPCODE(op)) { 992 case SLJIT_MOV: 993 SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & ARGS_SWAPPED)); 994 if (dst != src2) { 995 if (src2 & SRC2_IMM) { 996 if (flags & INV_IMM) 997 EMIT_FULL_DATA_PROCESS_INS_AND_RETURN(MVN_DP, dst, SLJIT_UNUSED, src2); 998 EMIT_FULL_DATA_PROCESS_INS_AND_RETURN(MOV_DP, dst, SLJIT_UNUSED, src2); 999 } 1000 EMIT_FULL_DATA_PROCESS_INS_AND_RETURN(MOV_DP, dst, SLJIT_UNUSED, reg_map[src2]); 1001 } 1002 return SLJIT_SUCCESS; 1003 1004 case SLJIT_MOV_U8: 1005 case SLJIT_MOV_S8: 1006 SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & ARGS_SWAPPED)); 1007 if ((flags & (REG_DEST | REG_SOURCE)) == (REG_DEST | REG_SOURCE)) { 1008 #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) 1009 if (op == SLJIT_MOV_U8) 1010 return push_inst(compiler, EMIT_DATA_PROCESS_INS(AND_DP, 0, dst, src2, SRC2_IMM | 0xff)); 1011 FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, dst, SLJIT_UNUSED, (24 << 7) | reg_map[src2]))); 1012 return push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, dst, SLJIT_UNUSED, (24 << 7) | (op == SLJIT_MOV_U8 ? 0x20 : 0x40) | reg_map[dst])); 1013 #else 1014 return push_inst(compiler, (op == SLJIT_MOV_U8 ? UXTB : SXTB) | RD(dst) | RM(src2)); 1015 #endif 1016 } 1017 else if (dst != src2) { 1018 SLJIT_ASSERT(src2 & SRC2_IMM); 1019 if (flags & INV_IMM) 1020 EMIT_FULL_DATA_PROCESS_INS_AND_RETURN(MVN_DP, dst, SLJIT_UNUSED, src2); 1021 EMIT_FULL_DATA_PROCESS_INS_AND_RETURN(MOV_DP, dst, SLJIT_UNUSED, src2); 1022 } 1023 return SLJIT_SUCCESS; 1024 1025 case SLJIT_MOV_U16: 1026 case SLJIT_MOV_S16: 1027 SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & ARGS_SWAPPED)); 1028 if ((flags & (REG_DEST | REG_SOURCE)) == (REG_DEST | REG_SOURCE)) { 1029 #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) 1030 FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, dst, SLJIT_UNUSED, (16 << 7) | reg_map[src2]))); 1031 return push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, dst, SLJIT_UNUSED, (16 << 7) | (op == SLJIT_MOV_U16 ? 0x20 : 0x40) | reg_map[dst])); 1032 #else 1033 return push_inst(compiler, (op == SLJIT_MOV_U16 ? UXTH : SXTH) | RD(dst) | RM(src2)); 1034 #endif 1035 } 1036 else if (dst != src2) { 1037 SLJIT_ASSERT(src2 & SRC2_IMM); 1038 if (flags & INV_IMM) 1039 EMIT_FULL_DATA_PROCESS_INS_AND_RETURN(MVN_DP, dst, SLJIT_UNUSED, src2); 1040 EMIT_FULL_DATA_PROCESS_INS_AND_RETURN(MOV_DP, dst, SLJIT_UNUSED, src2); 1041 } 1042 return SLJIT_SUCCESS; 1043 1044 case SLJIT_NOT: 1045 if (src2 & SRC2_IMM) { 1046 if (flags & INV_IMM) 1047 EMIT_FULL_DATA_PROCESS_INS_AND_RETURN(MOV_DP, dst, SLJIT_UNUSED, src2); 1048 EMIT_FULL_DATA_PROCESS_INS_AND_RETURN(MVN_DP, dst, SLJIT_UNUSED, src2); 1049 } 1050 EMIT_FULL_DATA_PROCESS_INS_AND_RETURN(MVN_DP, dst, SLJIT_UNUSED, RM(src2)); 1051 1052 case SLJIT_CLZ: 1053 SLJIT_ASSERT(!(flags & INV_IMM)); 1054 SLJIT_ASSERT(!(src2 & SRC2_IMM)); 1055 FAIL_IF(push_inst(compiler, CLZ | RD(dst) | RM(src2))); 1056 if (flags & SET_FLAGS) 1057 EMIT_FULL_DATA_PROCESS_INS_AND_RETURN(CMP_DP, SLJIT_UNUSED, dst, SRC2_IMM); 1058 return SLJIT_SUCCESS; 1059 1060 case SLJIT_ADD: 1061 SLJIT_ASSERT(!(flags & INV_IMM)); 1062 EMIT_DATA_PROCESS_INS_AND_RETURN(ADD_DP); 1063 1064 case SLJIT_ADDC: 1065 SLJIT_ASSERT(!(flags & INV_IMM)); 1066 EMIT_DATA_PROCESS_INS_AND_RETURN(ADC_DP); 1067 1068 case SLJIT_SUB: 1069 SLJIT_ASSERT(!(flags & INV_IMM)); 1070 if (!(flags & ARGS_SWAPPED)) 1071 EMIT_DATA_PROCESS_INS_AND_RETURN(SUB_DP); 1072 EMIT_DATA_PROCESS_INS_AND_RETURN(RSB_DP); 1073 1074 case SLJIT_SUBC: 1075 SLJIT_ASSERT(!(flags & INV_IMM)); 1076 if (!(flags & ARGS_SWAPPED)) 1077 EMIT_DATA_PROCESS_INS_AND_RETURN(SBC_DP); 1078 EMIT_DATA_PROCESS_INS_AND_RETURN(RSC_DP); 1079 1080 case SLJIT_MUL: 1081 SLJIT_ASSERT(!(flags & INV_IMM)); 1082 SLJIT_ASSERT(!(src2 & SRC2_IMM)); 1083 if (SLJIT_UNLIKELY(op & SLJIT_SET_O)) 1084 mul_inst = SMULL | (reg_map[TMP_REG3] << 16) | (reg_map[dst] << 12); 1085 else 1086 mul_inst = MUL | (reg_map[dst] << 16); 1087 1088 if (dst != src2) 1089 FAIL_IF(push_inst(compiler, mul_inst | (reg_map[src1] << 8) | reg_map[src2])); 1090 else if (dst != src1) 1091 FAIL_IF(push_inst(compiler, mul_inst | (reg_map[src2] << 8) | reg_map[src1])); 1092 else { 1093 /* Rm and Rd must not be the same register. */ 1094 SLJIT_ASSERT(dst != TMP_REG1); 1095 FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, TMP_REG1, SLJIT_UNUSED, reg_map[src2]))); 1096 FAIL_IF(push_inst(compiler, mul_inst | (reg_map[src2] << 8) | reg_map[TMP_REG1])); 1097 } 1098 1099 if (!(op & SLJIT_SET_O)) 1100 return SLJIT_SUCCESS; 1101 1102 /* We need to use TMP_REG3. */ 1103 compiler->cache_arg = 0; 1104 compiler->cache_argw = 0; 1105 /* cmp TMP_REG2, dst asr #31. */ 1106 return push_inst(compiler, EMIT_DATA_PROCESS_INS(CMP_DP, SET_FLAGS, SLJIT_UNUSED, TMP_REG3, RM(dst) | 0xfc0)); 1107 1108 case SLJIT_AND: 1109 if (!(flags & INV_IMM)) 1110 EMIT_DATA_PROCESS_INS_AND_RETURN(AND_DP); 1111 EMIT_DATA_PROCESS_INS_AND_RETURN(BIC_DP); 1112 1113 case SLJIT_OR: 1114 SLJIT_ASSERT(!(flags & INV_IMM)); 1115 EMIT_DATA_PROCESS_INS_AND_RETURN(ORR_DP); 1116 1117 case SLJIT_XOR: 1118 SLJIT_ASSERT(!(flags & INV_IMM)); 1119 EMIT_DATA_PROCESS_INS_AND_RETURN(EOR_DP); 1120 1121 case SLJIT_SHL: 1122 EMIT_SHIFT_INS_AND_RETURN(0); 1123 1124 case SLJIT_LSHR: 1125 EMIT_SHIFT_INS_AND_RETURN(1); 1126 1127 case SLJIT_ASHR: 1128 EMIT_SHIFT_INS_AND_RETURN(2); 1129 } 1130 SLJIT_ASSERT_STOP(); 1131 return SLJIT_SUCCESS; 1132 } 1133 1134 #undef EMIT_DATA_PROCESS_INS_AND_RETURN 1135 #undef EMIT_FULL_DATA_PROCESS_INS_AND_RETURN 1136 #undef EMIT_SHIFT_INS_AND_RETURN 1137 1138 /* Tests whether the immediate can be stored in the 12 bit imm field. 1139 Returns with 0 if not possible. */ 1140 static sljit_uw get_imm(sljit_uw imm) 1141 { 1142 sljit_s32 rol; 1143 1144 if (imm <= 0xff) 1145 return SRC2_IMM | imm; 1146 1147 if (!(imm & 0xff000000)) { 1148 imm <<= 8; 1149 rol = 8; 1150 } 1151 else { 1152 imm = (imm << 24) | (imm >> 8); 1153 rol = 0; 1154 } 1155 1156 if (!(imm & 0xff000000)) { 1157 imm <<= 8; 1158 rol += 4; 1159 } 1160 1161 if (!(imm & 0xf0000000)) { 1162 imm <<= 4; 1163 rol += 2; 1164 } 1165 1166 if (!(imm & 0xc0000000)) { 1167 imm <<= 2; 1168 rol += 1; 1169 } 1170 1171 if (!(imm & 0x00ffffff)) 1172 return SRC2_IMM | (imm >> 24) | (rol << 8); 1173 else 1174 return 0; 1175 } 1176 1177 #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) 1178 static sljit_s32 generate_int(struct sljit_compiler *compiler, sljit_s32 reg, sljit_uw imm, sljit_s32 positive) 1179 { 1180 sljit_uw mask; 1181 sljit_uw imm1; 1182 sljit_uw imm2; 1183 sljit_s32 rol; 1184 1185 /* Step1: Search a zero byte (8 continous zero bit). */ 1186 mask = 0xff000000; 1187 rol = 8; 1188 while(1) { 1189 if (!(imm & mask)) { 1190 /* Rol imm by rol. */ 1191 imm = (imm << rol) | (imm >> (32 - rol)); 1192 /* Calculate arm rol. */ 1193 rol = 4 + (rol >> 1); 1194 break; 1195 } 1196 rol += 2; 1197 mask >>= 2; 1198 if (mask & 0x3) { 1199 /* rol by 8. */ 1200 imm = (imm << 8) | (imm >> 24); 1201 mask = 0xff00; 1202 rol = 24; 1203 while (1) { 1204 if (!(imm & mask)) { 1205 /* Rol imm by rol. */ 1206 imm = (imm << rol) | (imm >> (32 - rol)); 1207 /* Calculate arm rol. */ 1208 rol = (rol >> 1) - 8; 1209 break; 1210 } 1211 rol += 2; 1212 mask >>= 2; 1213 if (mask & 0x3) 1214 return 0; 1215 } 1216 break; 1217 } 1218 } 1219 1220 /* The low 8 bit must be zero. */ 1221 SLJIT_ASSERT(!(imm & 0xff)); 1222 1223 if (!(imm & 0xff000000)) { 1224 imm1 = SRC2_IMM | ((imm >> 16) & 0xff) | (((rol + 4) & 0xf) << 8); 1225 imm2 = SRC2_IMM | ((imm >> 8) & 0xff) | (((rol + 8) & 0xf) << 8); 1226 } 1227 else if (imm & 0xc0000000) { 1228 imm1 = SRC2_IMM | ((imm >> 24) & 0xff) | ((rol & 0xf) << 8); 1229 imm <<= 8; 1230 rol += 4; 1231 1232 if (!(imm & 0xff000000)) { 1233 imm <<= 8; 1234 rol += 4; 1235 } 1236 1237 if (!(imm & 0xf0000000)) { 1238 imm <<= 4; 1239 rol += 2; 1240 } 1241 1242 if (!(imm & 0xc0000000)) { 1243 imm <<= 2; 1244 rol += 1; 1245 } 1246 1247 if (!(imm & 0x00ffffff)) 1248 imm2 = SRC2_IMM | (imm >> 24) | ((rol & 0xf) << 8); 1249 else 1250 return 0; 1251 } 1252 else { 1253 if (!(imm & 0xf0000000)) { 1254 imm <<= 4; 1255 rol += 2; 1256 } 1257 1258 if (!(imm & 0xc0000000)) { 1259 imm <<= 2; 1260 rol += 1; 1261 } 1262 1263 imm1 = SRC2_IMM | ((imm >> 24) & 0xff) | ((rol & 0xf) << 8); 1264 imm <<= 8; 1265 rol += 4; 1266 1267 if (!(imm & 0xf0000000)) { 1268 imm <<= 4; 1269 rol += 2; 1270 } 1271 1272 if (!(imm & 0xc0000000)) { 1273 imm <<= 2; 1274 rol += 1; 1275 } 1276 1277 if (!(imm & 0x00ffffff)) 1278 imm2 = SRC2_IMM | (imm >> 24) | ((rol & 0xf) << 8); 1279 else 1280 return 0; 1281 } 1282 1283 FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(positive ? MOV_DP : MVN_DP, 0, reg, SLJIT_UNUSED, imm1))); 1284 FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(positive ? ORR_DP : BIC_DP, 0, reg, reg, imm2))); 1285 return 1; 1286 } 1287 #endif 1288 1289 static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 reg, sljit_uw imm) 1290 { 1291 sljit_uw tmp; 1292 1293 #if (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7) 1294 if (!(imm & ~0xffff)) 1295 return push_inst(compiler, MOVW | RD(reg) | ((imm << 4) & 0xf0000) | (imm & 0xfff)); 1296 #endif 1297 1298 /* Create imm by 1 inst. */ 1299 tmp = get_imm(imm); 1300 if (tmp) 1301 return push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, reg, SLJIT_UNUSED, tmp)); 1302 1303 tmp = get_imm(~imm); 1304 if (tmp) 1305 return push_inst(compiler, EMIT_DATA_PROCESS_INS(MVN_DP, 0, reg, SLJIT_UNUSED, tmp)); 1306 1307 #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) 1308 /* Create imm by 2 inst. */ 1309 FAIL_IF(generate_int(compiler, reg, imm, 1)); 1310 FAIL_IF(generate_int(compiler, reg, ~imm, 0)); 1311 1312 /* Load integer. */ 1313 return push_inst_with_literal(compiler, EMIT_DATA_TRANSFER(WORD_DATA | LOAD_DATA, 1, 0, reg, TMP_PC, 0), imm); 1314 #else 1315 return emit_imm(compiler, reg, imm); 1316 #endif 1317 } 1318 1319 /* Helper function. Dst should be reg + value, using at most 1 instruction, flags does not set. */ 1320 static sljit_s32 emit_set_delta(struct sljit_compiler *compiler, sljit_s32 dst, sljit_s32 reg, sljit_sw value) 1321 { 1322 if (value >= 0) { 1323 value = get_imm(value); 1324 if (value) 1325 return push_inst(compiler, EMIT_DATA_PROCESS_INS(ADD_DP, 0, dst, reg, value)); 1326 } 1327 else { 1328 value = get_imm(-value); 1329 if (value) 1330 return push_inst(compiler, EMIT_DATA_PROCESS_INS(SUB_DP, 0, dst, reg, value)); 1331 } 1332 return SLJIT_ERR_UNSUPPORTED; 1333 } 1334 1335 /* Can perform an operation using at most 1 instruction. */ 1336 static sljit_s32 getput_arg_fast(struct sljit_compiler *compiler, sljit_s32 inp_flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw) 1337 { 1338 sljit_uw imm; 1339 1340 if (arg & SLJIT_IMM) { 1341 imm = get_imm(argw); 1342 if (imm) { 1343 if (inp_flags & ARG_TEST) 1344 return 1; 1345 FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, reg, SLJIT_UNUSED, imm))); 1346 return -1; 1347 } 1348 imm = get_imm(~argw); 1349 if (imm) { 1350 if (inp_flags & ARG_TEST) 1351 return 1; 1352 FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(MVN_DP, 0, reg, SLJIT_UNUSED, imm))); 1353 return -1; 1354 } 1355 return 0; 1356 } 1357 1358 SLJIT_ASSERT(arg & SLJIT_MEM); 1359 1360 /* Fast loads/stores. */ 1361 if (!(arg & REG_MASK)) 1362 return 0; 1363 1364 if (arg & OFFS_REG_MASK) { 1365 if ((argw & 0x3) != 0 && !IS_TYPE1_TRANSFER(inp_flags)) 1366 return 0; 1367 1368 if (inp_flags & ARG_TEST) 1369 return 1; 1370 FAIL_IF(push_inst(compiler, EMIT_DATA_TRANSFER(inp_flags, 1, inp_flags & WRITE_BACK, reg, arg & REG_MASK, 1371 RM(OFFS_REG(arg)) | (IS_TYPE1_TRANSFER(inp_flags) ? SRC2_IMM : 0) | ((argw & 0x3) << 7)))); 1372 return -1; 1373 } 1374 1375 if (IS_TYPE1_TRANSFER(inp_flags)) { 1376 if (argw >= 0 && argw <= 0xfff) { 1377 if (inp_flags & ARG_TEST) 1378 return 1; 1379 FAIL_IF(push_inst(compiler, EMIT_DATA_TRANSFER(inp_flags, 1, inp_flags & WRITE_BACK, reg, arg & REG_MASK, argw))); 1380 return -1; 1381 } 1382 if (argw < 0 && argw >= -0xfff) { 1383 if (inp_flags & ARG_TEST) 1384 return 1; 1385 FAIL_IF(push_inst(compiler, EMIT_DATA_TRANSFER(inp_flags, 0, inp_flags & WRITE_BACK, reg, arg & REG_MASK, -argw))); 1386 return -1; 1387 } 1388 } 1389 else { 1390 if (argw >= 0 && argw <= 0xff) { 1391 if (inp_flags & ARG_TEST) 1392 return 1; 1393 FAIL_IF(push_inst(compiler, EMIT_DATA_TRANSFER(inp_flags, 1, inp_flags & WRITE_BACK, reg, arg & REG_MASK, TYPE2_TRANSFER_IMM(argw)))); 1394 return -1; 1395 } 1396 if (argw < 0 && argw >= -0xff) { 1397 if (inp_flags & ARG_TEST) 1398 return 1; 1399 argw = -argw; 1400 FAIL_IF(push_inst(compiler, EMIT_DATA_TRANSFER(inp_flags, 0, inp_flags & WRITE_BACK, reg, arg & REG_MASK, TYPE2_TRANSFER_IMM(argw)))); 1401 return -1; 1402 } 1403 } 1404 1405 return 0; 1406 } 1407 1408 /* See getput_arg below. 1409 Note: can_cache is called only for binary operators. Those 1410 operators always uses word arguments without write back. */ 1411 static sljit_s32 can_cache(sljit_s32 arg, sljit_sw argw, sljit_s32 next_arg, sljit_sw next_argw) 1412 { 1413 /* Immediate caching is not supported as it would be an operation on constant arguments. */ 1414 if (arg & SLJIT_IMM) 1415 return 0; 1416 1417 /* Always a simple operation. */ 1418 if (arg & OFFS_REG_MASK) 1419 return 0; 1420 1421 if (!(arg & REG_MASK)) { 1422 /* Immediate access. */ 1423 if ((next_arg & SLJIT_MEM) && ((sljit_uw)argw - (sljit_uw)next_argw <= 0xfff || (sljit_uw)next_argw - (sljit_uw)argw <= 0xfff)) 1424 return 1; 1425 return 0; 1426 } 1427 1428 if (argw <= 0xfffff && argw >= -0xfffff) 1429 return 0; 1430 1431 if (argw == next_argw && (next_arg & SLJIT_MEM)) 1432 return 1; 1433 1434 if (arg == next_arg && ((sljit_uw)argw - (sljit_uw)next_argw <= 0xfff || (sljit_uw)next_argw - (sljit_uw)argw <= 0xfff)) 1435 return 1; 1436 1437 return 0; 1438 } 1439 1440 #define GETPUT_ARG_DATA_TRANSFER(add, wb, target, base, imm) \ 1441 if (max_delta & 0xf00) \ 1442 FAIL_IF(push_inst(compiler, EMIT_DATA_TRANSFER(inp_flags, add, wb, target, base, imm))); \ 1443 else \ 1444 FAIL_IF(push_inst(compiler, EMIT_DATA_TRANSFER(inp_flags, add, wb, target, base, TYPE2_TRANSFER_IMM(imm)))); 1445 1446 #define TEST_WRITE_BACK() \ 1447 if (inp_flags & WRITE_BACK) { \ 1448 tmp_r = arg & REG_MASK; \ 1449 if (reg == tmp_r) { \ 1450 /* This can only happen for stores */ \ 1451 /* since ldr reg, [reg, ...]! has no meaning */ \ 1452 SLJIT_ASSERT(!(inp_flags & LOAD_DATA)); \ 1453 FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, TMP_REG3, SLJIT_UNUSED, RM(reg)))); \ 1454 reg = TMP_REG3; \ 1455 } \ 1456 } 1457 1458 /* Emit the necessary instructions. See can_cache above. */ 1459 static sljit_s32 getput_arg(struct sljit_compiler *compiler, sljit_s32 inp_flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw, sljit_s32 next_arg, sljit_sw next_argw) 1460 { 1461 sljit_s32 tmp_r; 1462 sljit_sw max_delta; 1463 sljit_sw sign; 1464 sljit_uw imm; 1465 1466 if (arg & SLJIT_IMM) { 1467 SLJIT_ASSERT(inp_flags & LOAD_DATA); 1468 return load_immediate(compiler, reg, argw); 1469 } 1470 1471 SLJIT_ASSERT(arg & SLJIT_MEM); 1472 1473 tmp_r = (inp_flags & LOAD_DATA) ? reg : TMP_REG3; 1474 max_delta = IS_TYPE1_TRANSFER(inp_flags) ? 0xfff : 0xff; 1475 1476 if ((arg & REG_MASK) == SLJIT_UNUSED) { 1477 /* Write back is not used. */ 1478 imm = (sljit_uw)(argw - compiler->cache_argw); 1479 if ((compiler->cache_arg & SLJIT_IMM) && (imm <= (sljit_uw)max_delta || imm >= (sljit_uw)-max_delta)) { 1480 if (imm <= (sljit_uw)max_delta) { 1481 sign = 1; 1482 argw = argw - compiler->cache_argw; 1483 } 1484 else { 1485 sign = 0; 1486 argw = compiler->cache_argw - argw; 1487 } 1488 1489 GETPUT_ARG_DATA_TRANSFER(sign, 0, reg, TMP_REG3, argw); 1490 return SLJIT_SUCCESS; 1491 } 1492 1493 /* With write back, we can create some sophisticated loads, but 1494 it is hard to decide whether we should convert downward (0s) or upward (1s). */ 1495 imm = (sljit_uw)(argw - next_argw); 1496 if ((next_arg & SLJIT_MEM) && (imm <= (sljit_uw)max_delta || imm >= (sljit_uw)-max_delta)) { 1497 SLJIT_ASSERT(inp_flags & LOAD_DATA); 1498 1499 compiler->cache_arg = SLJIT_IMM; 1500 compiler->cache_argw = argw; 1501 tmp_r = TMP_REG3; 1502 } 1503 1504 FAIL_IF(load_immediate(compiler, tmp_r, argw)); 1505 GETPUT_ARG_DATA_TRANSFER(1, 0, reg, tmp_r, 0); 1506 return SLJIT_SUCCESS; 1507 } 1508 1509 if (arg & OFFS_REG_MASK) { 1510 SLJIT_ASSERT((argw & 0x3) && !(max_delta & 0xf00)); 1511 if (inp_flags & WRITE_BACK) 1512 tmp_r = arg & REG_MASK; 1513 FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(ADD_DP, 0, tmp_r, arg & REG_MASK, RM(OFFS_REG(arg)) | ((argw & 0x3) << 7)))); 1514 return push_inst(compiler, EMIT_DATA_TRANSFER(inp_flags, 1, 0, reg, tmp_r, TYPE2_TRANSFER_IMM(0))); 1515 } 1516 1517 imm = (sljit_uw)(argw - compiler->cache_argw); 1518 if (compiler->cache_arg == arg && imm <= (sljit_uw)max_delta) { 1519 SLJIT_ASSERT(!(inp_flags & WRITE_BACK)); 1520 GETPUT_ARG_DATA_TRANSFER(1, 0, reg, TMP_REG3, imm); 1521 return SLJIT_SUCCESS; 1522 } 1523 if (compiler->cache_arg == arg && imm >= (sljit_uw)-max_delta) { 1524 SLJIT_ASSERT(!(inp_flags & WRITE_BACK)); 1525 imm = (sljit_uw)-(sljit_sw)imm; 1526 GETPUT_ARG_DATA_TRANSFER(0, 0, reg, TMP_REG3, imm); 1527 return SLJIT_SUCCESS; 1528 } 1529 1530 imm = get_imm(argw & ~max_delta); 1531 if (imm) { 1532 TEST_WRITE_BACK(); 1533 FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(ADD_DP, 0, tmp_r, arg & REG_MASK, imm))); 1534 GETPUT_ARG_DATA_TRANSFER(1, inp_flags & WRITE_BACK, reg, tmp_r, argw & max_delta); 1535 return SLJIT_SUCCESS; 1536 } 1537 1538 imm = get_imm(-argw & ~max_delta); 1539 if (imm) { 1540 argw = -argw; 1541 TEST_WRITE_BACK(); 1542 FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(SUB_DP, 0, tmp_r, arg & REG_MASK, imm))); 1543 GETPUT_ARG_DATA_TRANSFER(0, inp_flags & WRITE_BACK, reg, tmp_r, argw & max_delta); 1544 return SLJIT_SUCCESS; 1545 } 1546 1547 if ((compiler->cache_arg & SLJIT_IMM) && compiler->cache_argw == argw) { 1548 TEST_WRITE_BACK(); 1549 return push_inst(compiler, EMIT_DATA_TRANSFER(inp_flags, 1, inp_flags & WRITE_BACK, reg, arg & REG_MASK, RM(TMP_REG3) | (max_delta & 0xf00 ? SRC2_IMM : 0))); 1550 } 1551 1552 if (argw == next_argw && (next_arg & SLJIT_MEM)) { 1553 SLJIT_ASSERT(inp_flags & LOAD_DATA); 1554 FAIL_IF(load_immediate(compiler, TMP_REG3, argw)); 1555 1556 compiler->cache_arg = SLJIT_IMM; 1557 compiler->cache_argw = argw; 1558 1559 TEST_WRITE_BACK(); 1560 return push_inst(compiler, EMIT_DATA_TRANSFER(inp_flags, 1, inp_flags & WRITE_BACK, reg, arg & REG_MASK, RM(TMP_REG3) | (max_delta & 0xf00 ? SRC2_IMM : 0))); 1561 } 1562 1563 imm = (sljit_uw)(argw - next_argw); 1564 if (arg == next_arg && !(inp_flags & WRITE_BACK) && (imm <= (sljit_uw)max_delta || imm >= (sljit_uw)-max_delta)) { 1565 SLJIT_ASSERT(inp_flags & LOAD_DATA); 1566 FAIL_IF(load_immediate(compiler, TMP_REG3, argw)); 1567 FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(ADD_DP, 0, TMP_REG3, TMP_REG3, reg_map[arg & REG_MASK]))); 1568 1569 compiler->cache_arg = arg; 1570 compiler->cache_argw = argw; 1571 1572 GETPUT_ARG_DATA_TRANSFER(1, 0, reg, TMP_REG3, 0); 1573 return SLJIT_SUCCESS; 1574 } 1575 1576 if ((arg & REG_MASK) == tmp_r) { 1577 compiler->cache_arg = SLJIT_IMM; 1578 compiler->cache_argw = argw; 1579 tmp_r = TMP_REG3; 1580 } 1581 1582 FAIL_IF(load_immediate(compiler, tmp_r, argw)); 1583 return push_inst(compiler, EMIT_DATA_TRANSFER(inp_flags, 1, inp_flags & WRITE_BACK, reg, arg & REG_MASK, reg_map[tmp_r] | (max_delta & 0xf00 ? SRC2_IMM : 0))); 1584 } 1585 1586 static SLJIT_INLINE sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw) 1587 { 1588 if (getput_arg_fast(compiler, flags, reg, arg, argw)) 1589 return compiler->error; 1590 compiler->cache_arg = 0; 1591 compiler->cache_argw = 0; 1592 return getput_arg(compiler, flags, reg, arg, argw, 0, 0); 1593 } 1594 1595 static SLJIT_INLINE sljit_s32 emit_op_mem2(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg1, sljit_sw arg1w, sljit_s32 arg2, sljit_sw arg2w) 1596 { 1597 if (getput_arg_fast(compiler, flags, reg, arg1, arg1w)) 1598 return compiler->error; 1599 return getput_arg(compiler, flags, reg, arg1, arg1w, arg2, arg2w); 1600 } 1601 1602 static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 inp_flags, 1603 sljit_s32 dst, sljit_sw dstw, 1604 sljit_s32 src1, sljit_sw src1w, 1605 sljit_s32 src2, sljit_sw src2w) 1606 { 1607 /* arg1 goes to TMP_REG1 or src reg 1608 arg2 goes to TMP_REG2, imm or src reg 1609 TMP_REG3 can be used for caching 1610 result goes to TMP_REG2, so put result can use TMP_REG1 and TMP_REG3. */ 1611 1612 /* We prefers register and simple consts. */ 1613 sljit_s32 dst_r; 1614 sljit_s32 src1_r; 1615 sljit_s32 src2_r = 0; 1616 sljit_s32 sugg_src2_r = TMP_REG2; 1617 sljit_s32 flags = GET_FLAGS(op) ? SET_FLAGS : 0; 1618 1619 compiler->cache_arg = 0; 1620 compiler->cache_argw = 0; 1621 1622 /* Destination check. */ 1623 if (SLJIT_UNLIKELY(dst == SLJIT_UNUSED)) { 1624 if (op >= SLJIT_MOV && op <= SLJIT_MOVU_S32 && !(src2 & SLJIT_MEM)) 1625 return SLJIT_SUCCESS; 1626 dst_r = TMP_REG2; 1627 } 1628 else if (FAST_IS_REG(dst)) { 1629 dst_r = dst; 1630 flags |= REG_DEST; 1631 if (op >= SLJIT_MOV && op <= SLJIT_MOVU_S32) 1632 sugg_src2_r = dst_r; 1633 } 1634 else { 1635 SLJIT_ASSERT(dst & SLJIT_MEM); 1636 if (getput_arg_fast(compiler, inp_flags | ARG_TEST, TMP_REG2, dst, dstw)) { 1637 flags |= FAST_DEST; 1638 dst_r = TMP_REG2; 1639 } 1640 else { 1641 flags |= SLOW_DEST; 1642 dst_r = 0; 1643 } 1644 } 1645 1646 /* Source 1. */ 1647 if (FAST_IS_REG(src1)) 1648 src1_r = src1; 1649 else if (FAST_IS_REG(src2)) { 1650 flags |= ARGS_SWAPPED; 1651 src1_r = src2; 1652 src2 = src1; 1653 src2w = src1w; 1654 } 1655 else do { /* do { } while(0) is used because of breaks. */ 1656 src1_r = 0; 1657 if ((inp_flags & ALLOW_ANY_IMM) && (src1 & SLJIT_IMM)) { 1658 /* The second check will generate a hit. */ 1659 src2_r = get_imm(src1w); 1660 if (src2_r) { 1661 flags |= ARGS_SWAPPED; 1662 src1 = src2; 1663 src1w = src2w; 1664 break; 1665 } 1666 if (inp_flags & ALLOW_INV_IMM) { 1667 src2_r = get_imm(~src1w); 1668 if (src2_r) { 1669 flags |= ARGS_SWAPPED | INV_IMM; 1670 src1 = src2; 1671 src1w = src2w; 1672 break; 1673 } 1674 } 1675 if (GET_OPCODE(op) == SLJIT_ADD) { 1676 src2_r = get_imm(-src1w); 1677 if (src2_r) { 1678 /* Note: ARGS_SWAPPED is intentionally not applied! */ 1679 src1 = src2; 1680 src1w = src2w; 1681 op = SLJIT_SUB | GET_ALL_FLAGS(op); 1682 break; 1683 } 1684 } 1685 } 1686 1687 if (getput_arg_fast(compiler, inp_flags | LOAD_DATA, TMP_REG1, src1, src1w)) { 1688 FAIL_IF(compiler->error); 1689 src1_r = TMP_REG1; 1690 } 1691 } while (0); 1692 1693 /* Source 2. */ 1694 if (src2_r == 0) { 1695 if (FAST_IS_REG(src2)) { 1696 src2_r = src2; 1697 flags |= REG_SOURCE; 1698 if (!(flags & REG_DEST) && op >= SLJIT_MOV && op <= SLJIT_MOVU_S32) 1699 dst_r = src2_r; 1700 } 1701 else do { /* do { } while(0) is used because of breaks. */ 1702 if ((inp_flags & ALLOW_ANY_IMM) && (src2 & SLJIT_IMM)) { 1703 src2_r = get_imm(src2w); 1704 if (src2_r) 1705 break; 1706 if (inp_flags & ALLOW_INV_IMM) { 1707 src2_r = get_imm(~src2w); 1708 if (src2_r) { 1709 flags |= INV_IMM; 1710 break; 1711 } 1712 } 1713 if (GET_OPCODE(op) == SLJIT_ADD) { 1714 src2_r = get_imm(-src2w); 1715 if (src2_r) { 1716 op = SLJIT_SUB | GET_ALL_FLAGS(op); 1717 flags &= ~ARGS_SWAPPED; 1718 break; 1719 } 1720 } 1721 if (GET_OPCODE(op) == SLJIT_SUB && !(flags & ARGS_SWAPPED)) { 1722 src2_r = get_imm(-src2w); 1723 if (src2_r) { 1724 op = SLJIT_ADD | GET_ALL_FLAGS(op); 1725 flags &= ~ARGS_SWAPPED; 1726 break; 1727 } 1728 } 1729 } 1730 1731 /* src2_r is 0. */ 1732 if (getput_arg_fast(compiler, inp_flags | LOAD_DATA, sugg_src2_r, src2, src2w)) { 1733 FAIL_IF(compiler->error); 1734 src2_r = sugg_src2_r; 1735 } 1736 } while (0); 1737 } 1738 1739 /* src1_r, src2_r and dst_r can be zero (=unprocessed) or non-zero. 1740 If they are zero, they must not be registers. */ 1741 if (src1_r == 0 && src2_r == 0 && dst_r == 0) { 1742 if (!can_cache(src1, src1w, src2, src2w) && can_cache(src1, src1w, dst, dstw)) { 1743 SLJIT_ASSERT(!(flags & ARGS_SWAPPED)); 1744 flags |= ARGS_SWAPPED; 1745 FAIL_IF(getput_arg(compiler, inp_flags | LOAD_DATA, TMP_REG1, src2, src2w, src1, src1w)); 1746 FAIL_IF(getput_arg(compiler, inp_flags | LOAD_DATA, TMP_REG2, src1, src1w, dst, dstw)); 1747 } 1748 else { 1749 FAIL_IF(getput_arg(compiler, inp_flags | LOAD_DATA, TMP_REG1, src1, src1w, src2, src2w)); 1750 FAIL_IF(getput_arg(compiler, inp_flags | LOAD_DATA, TMP_REG2, src2, src2w, dst, dstw)); 1751 } 1752 src1_r = TMP_REG1; 1753 src2_r = TMP_REG2; 1754 } 1755 else if (src1_r == 0 && src2_r == 0) { 1756 FAIL_IF(getput_arg(compiler, inp_flags | LOAD_DATA, TMP_REG1, src1, src1w, src2, src2w)); 1757 src1_r = TMP_REG1; 1758 } 1759 else if (src1_r == 0 && dst_r == 0) { 1760 FAIL_IF(getput_arg(compiler, inp_flags | LOAD_DATA, TMP_REG1, src1, src1w, dst, dstw)); 1761 src1_r = TMP_REG1; 1762 } 1763 else if (src2_r == 0 && dst_r == 0) { 1764 FAIL_IF(getput_arg(compiler, inp_flags | LOAD_DATA, sugg_src2_r, src2, src2w, dst, dstw)); 1765 src2_r = sugg_src2_r; 1766 } 1767 1768 if (dst_r == 0) 1769 dst_r = TMP_REG2; 1770 1771 if (src1_r == 0) { 1772 FAIL_IF(getput_arg(compiler, inp_flags | LOAD_DATA, TMP_REG1, src1, src1w, 0, 0)); 1773 src1_r = TMP_REG1; 1774 } 1775 1776 if (src2_r == 0) { 1777 FAIL_IF(getput_arg(compiler, inp_flags | LOAD_DATA, sugg_src2_r, src2, src2w, 0, 0)); 1778 src2_r = sugg_src2_r; 1779 } 1780 1781 FAIL_IF(emit_single_op(compiler, op, flags, dst_r, src1_r, src2_r)); 1782 1783 if (flags & (FAST_DEST | SLOW_DEST)) { 1784 if (flags & FAST_DEST) 1785 FAIL_IF(getput_arg_fast(compiler, inp_flags, dst_r, dst, dstw)); 1786 else 1787 FAIL_IF(getput_arg(compiler, inp_flags, dst_r, dst, dstw, 0, 0)); 1788 } 1789 return SLJIT_SUCCESS; 1790 } 1791 1792 #ifdef __cplusplus 1793 extern "C" { 1794 #endif 1795 1796 #if defined(__GNUC__) 1797 extern unsigned int __aeabi_uidivmod(unsigned int numerator, unsigned int denominator); 1798 extern int __aeabi_idivmod(int numerator, int denominator); 1799 #else 1800 #error "Software divmod functions are needed" 1801 #endif 1802 1803 #ifdef __cplusplus 1804 } 1805 #endif 1806 1807 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compiler, sljit_s32 op) 1808 { 1809 CHECK_ERROR(); 1810 CHECK(check_sljit_emit_op0(compiler, op)); 1811 1812 op = GET_OPCODE(op); 1813 switch (op) { 1814 case SLJIT_BREAKPOINT: 1815 FAIL_IF(push_inst(compiler, BKPT)); 1816 break; 1817 case SLJIT_NOP: 1818 FAIL_IF(push_inst(compiler, NOP)); 1819 break; 1820 case SLJIT_LMUL_UW: 1821 case SLJIT_LMUL_SW: 1822 #if (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7) 1823 return push_inst(compiler, (op == SLJIT_LMUL_UW ? UMULL : SMULL) 1824 | (reg_map[SLJIT_R1] << 16) 1825 | (reg_map[SLJIT_R0] << 12) 1826 | (reg_map[SLJIT_R0] << 8) 1827 | reg_map[SLJIT_R1]); 1828 #else 1829 FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, TMP_REG1, SLJIT_UNUSED, RM(SLJIT_R1)))); 1830 return push_inst(compiler, (op == SLJIT_LMUL_UW ? UMULL : SMULL) 1831 | (reg_map[SLJIT_R1] << 16) 1832 | (reg_map[SLJIT_R0] << 12) 1833 | (reg_map[SLJIT_R0] << 8) 1834 | reg_map[TMP_REG1]); 1835 #endif 1836 case SLJIT_DIVMOD_UW: 1837 case SLJIT_DIVMOD_SW: 1838 case SLJIT_DIV_UW: 1839 case SLJIT_DIV_SW: 1840 SLJIT_COMPILE_ASSERT((SLJIT_DIVMOD_UW & 0x2) == 0 && SLJIT_DIV_UW - 0x2 == SLJIT_DIVMOD_UW, bad_div_opcode_assignments); 1841 SLJIT_COMPILE_ASSERT(reg_map[2] == 1 && reg_map[3] == 2, bad_register_mapping); 1842 1843 if ((op >= SLJIT_DIV_UW) && (compiler->scratches >= 3)) { 1844 FAIL_IF(push_inst(compiler, 0xe52d2008 /* str r2, [sp, #-8]! */)); 1845 FAIL_IF(push_inst(compiler, 0xe58d1004 /* str r1, [sp, #4] */)); 1846 } 1847 else if ((op >= SLJIT_DIV_UW) || (compiler->scratches >= 3)) 1848 FAIL_IF(push_inst(compiler, 0xe52d0008 | (op >= SLJIT_DIV_UW ? 0x1000 : 0x2000) /* str r1/r2, [sp, #-8]! */)); 1849 1850 #if defined(__GNUC__) 1851 FAIL_IF(sljit_emit_ijump(compiler, SLJIT_FAST_CALL, SLJIT_IMM, 1852 ((op | 0x2) == SLJIT_DIV_UW ? SLJIT_FUNC_OFFSET(__aeabi_uidivmod) : SLJIT_FUNC_OFFSET(__aeabi_idivmod)))); 1853 #else 1854 #error "Software divmod functions are needed" 1855 #endif 1856 1857 if ((op >= SLJIT_DIV_UW) && (compiler->scratches >= 3)) { 1858 FAIL_IF(push_inst(compiler, 0xe59d1004 /* ldr r1, [sp, #4] */)); 1859 FAIL_IF(push_inst(compiler, 0xe49d2008 /* ldr r2, [sp], #8 */)); 1860 } 1861 else if ((op >= SLJIT_DIV_UW) || (compiler->scratches >= 3)) 1862 return push_inst(compiler, 0xe49d0008 | (op >= SLJIT_DIV_UW ? 0x1000 : 0x2000) /* ldr r1/r2, [sp], #8 */); 1863 return SLJIT_SUCCESS; 1864 } 1865 1866 return SLJIT_SUCCESS; 1867 } 1868 1869 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compiler, sljit_s32 op, 1870 sljit_s32 dst, sljit_sw dstw, 1871 sljit_s32 src, sljit_sw srcw) 1872 { 1873 CHECK_ERROR(); 1874 CHECK(check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw)); 1875 ADJUST_LOCAL_OFFSET(dst, dstw); 1876 ADJUST_LOCAL_OFFSET(src, srcw); 1877 1878 switch (GET_OPCODE(op)) { 1879 case SLJIT_MOV: 1880 case SLJIT_MOV_U32: 1881 case SLJIT_MOV_S32: 1882 case SLJIT_MOV_P: 1883 return emit_op(compiler, SLJIT_MOV, ALLOW_ANY_IMM, dst, dstw, TMP_REG1, 0, src, srcw); 1884 1885 case SLJIT_MOV_U8: 1886 return emit_op(compiler, SLJIT_MOV_U8, ALLOW_ANY_IMM | BYTE_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_u8)srcw : srcw); 1887 1888 case SLJIT_MOV_S8: 1889 return emit_op(compiler, SLJIT_MOV_S8, ALLOW_ANY_IMM | SIGNED_DATA | BYTE_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_s8)srcw : srcw); 1890 1891 case SLJIT_MOV_U16: 1892 return emit_op(compiler, SLJIT_MOV_U16, ALLOW_ANY_IMM | HALF_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_u16)srcw : srcw); 1893 1894 case SLJIT_MOV_S16: 1895 return emit_op(compiler, SLJIT_MOV_S16, ALLOW_ANY_IMM | SIGNED_DATA | HALF_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_s16)srcw : srcw); 1896 1897 case SLJIT_MOVU: 1898 case SLJIT_MOVU_U32: 1899 case SLJIT_MOVU_S32: 1900 case SLJIT_MOVU_P: 1901 return emit_op(compiler, SLJIT_MOV, ALLOW_ANY_IMM | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, srcw); 1902 1903 case SLJIT_MOVU_U8: 1904 return emit_op(compiler, SLJIT_MOV_U8, ALLOW_ANY_IMM | BYTE_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_u8)srcw : srcw); 1905 1906 case SLJIT_MOVU_S8: 1907 return emit_op(compiler, SLJIT_MOV_S8, ALLOW_ANY_IMM | SIGNED_DATA | BYTE_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_s8)srcw : srcw); 1908 1909 case SLJIT_MOVU_U16: 1910 return emit_op(compiler, SLJIT_MOV_U16, ALLOW_ANY_IMM | HALF_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_u16)srcw : srcw); 1911 1912 case SLJIT_MOVU_S16: 1913 return emit_op(compiler, SLJIT_MOV_S16, ALLOW_ANY_IMM | SIGNED_DATA | HALF_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_s16)srcw : srcw); 1914 1915 case SLJIT_NOT: 1916 return emit_op(compiler, op, ALLOW_ANY_IMM, dst, dstw, TMP_REG1, 0, src, srcw); 1917 1918 case SLJIT_NEG: 1919 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ 1920 || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) 1921 compiler->skip_checks = 1; 1922 #endif 1923 return sljit_emit_op2(compiler, SLJIT_SUB | GET_ALL_FLAGS(op), dst, dstw, SLJIT_IMM, 0, src, srcw); 1924 1925 case SLJIT_CLZ: 1926 return emit_op(compiler, op, 0, dst, dstw, TMP_REG1, 0, src, srcw); 1927 } 1928 1929 return SLJIT_SUCCESS; 1930 } 1931 1932 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compiler, sljit_s32 op, 1933 sljit_s32 dst, sljit_sw dstw, 1934 sljit_s32 src1, sljit_sw src1w, 1935 sljit_s32 src2, sljit_sw src2w) 1936 { 1937 CHECK_ERROR(); 1938 CHECK(check_sljit_emit_op2(compiler, op, dst, dstw, src1, src1w, src2, src2w)); 1939 ADJUST_LOCAL_OFFSET(dst, dstw); 1940 ADJUST_LOCAL_OFFSET(src1, src1w); 1941 ADJUST_LOCAL_OFFSET(src2, src2w); 1942 1943 switch (GET_OPCODE(op)) { 1944 case SLJIT_ADD: 1945 case SLJIT_ADDC: 1946 case SLJIT_SUB: 1947 case SLJIT_SUBC: 1948 case SLJIT_OR: 1949 case SLJIT_XOR: 1950 return emit_op(compiler, op, ALLOW_IMM, dst, dstw, src1, src1w, src2, src2w); 1951 1952 case SLJIT_MUL: 1953 return emit_op(compiler, op, 0, dst, dstw, src1, src1w, src2, src2w); 1954 1955 case SLJIT_AND: 1956 return emit_op(compiler, op, ALLOW_ANY_IMM, dst, dstw, src1, src1w, src2, src2w); 1957 1958 case SLJIT_SHL: 1959 case SLJIT_LSHR: 1960 case SLJIT_ASHR: 1961 if (src2 & SLJIT_IMM) { 1962 compiler->shift_imm = src2w & 0x1f; 1963 return emit_op(compiler, op, 0, dst, dstw, TMP_REG1, 0, src1, src1w); 1964 } 1965 else { 1966 compiler->shift_imm = 0x20; 1967 return emit_op(compiler, op, 0, dst, dstw, src1, src1w, src2, src2w); 1968 } 1969 } 1970 1971 return SLJIT_SUCCESS; 1972 } 1973 1974 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 reg) 1975 { 1976 CHECK_REG_INDEX(check_sljit_get_register_index(reg)); 1977 return reg_map[reg]; 1978 } 1979 1980 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_float_register_index(sljit_s32 reg) 1981 { 1982 CHECK_REG_INDEX(check_sljit_get_float_register_index(reg)); 1983 return reg << 1; 1984 } 1985 1986 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler, 1987 void *instruction, sljit_s32 size) 1988 { 1989 CHECK_ERROR(); 1990 CHECK(check_sljit_emit_op_custom(compiler, instruction, size)); 1991 1992 return push_inst(compiler, *(sljit_uw*)instruction); 1993 } 1994 1995 /* --------------------------------------------------------------------- */ 1996 /* Floating point operators */ 1997 /* --------------------------------------------------------------------- */ 1998 1999 #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) 2000 2001 /* 0 - no fpu 2002 1 - vfp */ 2003 static sljit_s32 arm_fpu_type = -1; 2004 2005 static void init_compiler(void) 2006 { 2007 if (arm_fpu_type != -1) 2008 return; 2009 2010 /* TODO: Only the OS can help to determine the correct fpu type. */ 2011 arm_fpu_type = 1; 2012 } 2013 2014 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_is_fpu_available(void) 2015 { 2016 #ifdef SLJIT_IS_FPU_AVAILABLE 2017 return SLJIT_IS_FPU_AVAILABLE; 2018 #else 2019 if (arm_fpu_type == -1) 2020 init_compiler(); 2021 return arm_fpu_type; 2022 #endif 2023 } 2024 2025 #else 2026 2027 #define arm_fpu_type 1 2028 2029 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_is_fpu_available(void) 2030 { 2031 /* Always available. */ 2032 return 1; 2033 } 2034 2035 #endif 2036 2037 #define FPU_LOAD (1 << 20) 2038 #define EMIT_FPU_DATA_TRANSFER(inst, add, base, freg, offs) \ 2039 ((inst) | ((add) << 23) | (reg_map[base] << 16) | (freg << 12) | (offs)) 2040 #define EMIT_FPU_OPERATION(opcode, mode, dst, src1, src2) \ 2041 ((opcode) | (mode) | ((dst) << 12) | (src1) | ((src2) << 16)) 2042 2043 static sljit_s32 emit_fop_mem(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw) 2044 { 2045 sljit_sw tmp; 2046 sljit_uw imm; 2047 sljit_sw inst = VSTR_F32 | (flags & (SLJIT_F32_OP | FPU_LOAD)); 2048 SLJIT_ASSERT(arg & SLJIT_MEM); 2049 2050 if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) { 2051 FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(ADD_DP, 0, TMP_REG1, arg & REG_MASK, RM(OFFS_REG(arg)) | ((argw & 0x3) << 7)))); 2052 arg = SLJIT_MEM | TMP_REG1; 2053 argw = 0; 2054 } 2055 2056 /* Fast loads and stores. */ 2057 if ((arg & REG_MASK)) { 2058 if (!(argw & ~0x3fc)) 2059 return push_inst(compiler, EMIT_FPU_DATA_TRANSFER(inst, 1, arg & REG_MASK, reg, argw >> 2)); 2060 if (!(-argw & ~0x3fc)) 2061 return push_inst(compiler, EMIT_FPU_DATA_TRANSFER(inst, 0, arg & REG_MASK, reg, (-argw) >> 2)); 2062 } 2063 2064 if (compiler->cache_arg == arg) { 2065 tmp = argw - compiler->cache_argw; 2066 if (!(tmp & ~0x3fc)) 2067 return push_inst(compiler, EMIT_FPU_DATA_TRANSFER(inst, 1, TMP_REG3, reg, tmp >> 2)); 2068 if (!(-tmp & ~0x3fc)) 2069 return push_inst(compiler, EMIT_FPU_DATA_TRANSFER(inst, 0, TMP_REG3, reg, -tmp >> 2)); 2070 if (emit_set_delta(compiler, TMP_REG3, TMP_REG3, tmp) != SLJIT_ERR_UNSUPPORTED) { 2071 FAIL_IF(compiler->error); 2072 compiler->cache_argw = argw; 2073 return push_inst(compiler, EMIT_FPU_DATA_TRANSFER(inst, 1, TMP_REG3, reg, 0)); 2074 } 2075 } 2076 2077 if (arg & REG_MASK) { 2078 if (emit_set_delta(compiler, TMP_REG1, arg & REG_MASK, argw) != SLJIT_ERR_UNSUPPORTED) { 2079 FAIL_IF(compiler->error); 2080 return push_inst(compiler, EMIT_FPU_DATA_TRANSFER(inst, 1, TMP_REG1, reg, 0)); 2081 } 2082 imm = get_imm(argw & ~0x3fc); 2083 if (imm) { 2084 FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(ADD_DP, 0, TMP_REG1, arg & REG_MASK, imm))); 2085 return push_inst(compiler, EMIT_FPU_DATA_TRANSFER(inst, 1, TMP_REG1, reg, (argw & 0x3fc) >> 2)); 2086 } 2087 imm = get_imm(-argw & ~0x3fc); 2088 if (imm) { 2089 argw = -argw; 2090 FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(SUB_DP, 0, TMP_REG1, arg & REG_MASK, imm))); 2091 return push_inst(compiler, EMIT_FPU_DATA_TRANSFER(inst, 0, TMP_REG1, reg, (argw & 0x3fc) >> 2)); 2092 } 2093 } 2094 2095 compiler->cache_arg = arg; 2096 compiler->cache_argw = argw; 2097 if (arg & REG_MASK) { 2098 FAIL_IF(load_immediate(compiler, TMP_REG1, argw)); 2099 FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(ADD_DP, 0, TMP_REG3, arg & REG_MASK, reg_map[TMP_REG1]))); 2100 } 2101 else 2102 FAIL_IF(load_immediate(compiler, TMP_REG3, argw)); 2103 2104 return push_inst(compiler, EMIT_FPU_DATA_TRANSFER(inst, 1, TMP_REG3, reg, 0)); 2105 } 2106 2107 static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_compiler *compiler, sljit_s32 op, 2108 sljit_s32 dst, sljit_sw dstw, 2109 sljit_s32 src, sljit_sw srcw) 2110 { 2111 if (src & SLJIT_MEM) { 2112 FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_F32_OP) | FPU_LOAD, TMP_FREG1, src, srcw)); 2113 src = TMP_FREG1; 2114 } 2115 2116 FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VCVT_S32_F32, op & SLJIT_F32_OP, TMP_FREG1, src, 0))); 2117 2118 if (dst == SLJIT_UNUSED) 2119 return SLJIT_SUCCESS; 2120 2121 if (FAST_IS_REG(dst)) 2122 return push_inst(compiler, VMOV | (1 << 20) | RD(dst) | (TMP_FREG1 << 16)); 2123 2124 /* Store the integer value from a VFP register. */ 2125 return emit_fop_mem(compiler, 0, TMP_FREG1, dst, dstw); 2126 } 2127 2128 static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_compiler *compiler, sljit_s32 op, 2129 sljit_s32 dst, sljit_sw dstw, 2130 sljit_s32 src, sljit_sw srcw) 2131 { 2132 sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1; 2133 2134 if (FAST_IS_REG(src)) 2135 FAIL_IF(push_inst(compiler, VMOV | RD(src) | (TMP_FREG1 << 16))); 2136 else if (src & SLJIT_MEM) { 2137 /* Load the integer value into a VFP register. */ 2138 FAIL_IF(emit_fop_mem(compiler, FPU_LOAD, TMP_FREG1, src, srcw)); 2139 } 2140 else { 2141 FAIL_IF(load_immediate(compiler, TMP_REG1, srcw)); 2142 FAIL_IF(push_inst(compiler, VMOV | RD(TMP_REG1) | (TMP_FREG1 << 16))); 2143 } 2144 2145 FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VCVT_F32_S32, op & SLJIT_F32_OP, dst_r, TMP_FREG1, 0))); 2146 2147 if (dst & SLJIT_MEM) 2148 return emit_fop_mem(compiler, (op & SLJIT_F32_OP), TMP_FREG1, dst, dstw); 2149 return SLJIT_SUCCESS; 2150 } 2151 2152 static SLJIT_INLINE sljit_s32 sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_s32 op, 2153 sljit_s32 src1, sljit_sw src1w, 2154 sljit_s32 src2, sljit_sw src2w) 2155 { 2156 if (src1 & SLJIT_MEM) { 2157 FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_F32_OP) | FPU_LOAD, TMP_FREG1, src1, src1w)); 2158 src1 = TMP_FREG1; 2159 } 2160 2161 if (src2 & SLJIT_MEM) { 2162 FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_F32_OP) | FPU_LOAD, TMP_FREG2, src2, src2w)); 2163 src2 = TMP_FREG2; 2164 } 2165 2166 FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VCMP_F32, op & SLJIT_F32_OP, src1, src2, 0))); 2167 return push_inst(compiler, VMRS); 2168 } 2169 2170 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compiler, sljit_s32 op, 2171 sljit_s32 dst, sljit_sw dstw, 2172 sljit_s32 src, sljit_sw srcw) 2173 { 2174 sljit_s32 dst_r; 2175 2176 CHECK_ERROR(); 2177 compiler->cache_arg = 0; 2178 compiler->cache_argw = 0; 2179 if (GET_OPCODE(op) != SLJIT_CONV_F64_FROM_F32) 2180 op ^= SLJIT_F32_OP; 2181 2182 SLJIT_COMPILE_ASSERT((SLJIT_F32_OP == 0x100), float_transfer_bit_error); 2183 SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw); 2184 2185 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1; 2186 2187 if (src & SLJIT_MEM) { 2188 FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_F32_OP) | FPU_LOAD, dst_r, src, srcw)); 2189 src = dst_r; 2190 } 2191 2192 switch (GET_OPCODE(op)) { 2193 case SLJIT_MOV_F64: 2194 if (src != dst_r) { 2195 if (dst_r != TMP_FREG1) 2196 FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VMOV_F32, op & SLJIT_F32_OP, dst_r, src, 0))); 2197 else 2198 dst_r = src; 2199 } 2200 break; 2201 case SLJIT_NEG_F64: 2202 FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VNEG_F32, op & SLJIT_F32_OP, dst_r, src, 0))); 2203 break; 2204 case SLJIT_ABS_F64: 2205 FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VABS_F32, op & SLJIT_F32_OP, dst_r, src, 0))); 2206 break; 2207 case SLJIT_CONV_F64_FROM_F32: 2208 FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VCVT_F64_F32, op & SLJIT_F32_OP, dst_r, src, 0))); 2209 op ^= SLJIT_F32_OP; 2210 break; 2211 } 2212 2213 if (dst & SLJIT_MEM) 2214 return emit_fop_mem(compiler, (op & SLJIT_F32_OP), dst_r, dst, dstw); 2215 return SLJIT_SUCCESS; 2216 } 2217 2218 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compiler, sljit_s32 op, 2219 sljit_s32 dst, sljit_sw dstw, 2220 sljit_s32 src1, sljit_sw src1w, 2221 sljit_s32 src2, sljit_sw src2w) 2222 { 2223 sljit_s32 dst_r; 2224 2225 CHECK_ERROR(); 2226 CHECK(check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w)); 2227 ADJUST_LOCAL_OFFSET(dst, dstw); 2228 ADJUST_LOCAL_OFFSET(src1, src1w); 2229 ADJUST_LOCAL_OFFSET(src2, src2w); 2230 2231 compiler->cache_arg = 0; 2232 compiler->cache_argw = 0; 2233 op ^= SLJIT_F32_OP; 2234 2235 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1; 2236 2237 if (src2 & SLJIT_MEM) { 2238 FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_F32_OP) | FPU_LOAD, TMP_FREG2, src2, src2w)); 2239 src2 = TMP_FREG2; 2240 } 2241 2242 if (src1 & SLJIT_MEM) { 2243 FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_F32_OP) | FPU_LOAD, TMP_FREG1, src1, src1w)); 2244 src1 = TMP_FREG1; 2245 } 2246 2247 switch (GET_OPCODE(op)) { 2248 case SLJIT_ADD_F64: 2249 FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VADD_F32, op & SLJIT_F32_OP, dst_r, src2, src1))); 2250 break; 2251 2252 case SLJIT_SUB_F64: 2253 FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VSUB_F32, op & SLJIT_F32_OP, dst_r, src2, src1))); 2254 break; 2255 2256 case SLJIT_MUL_F64: 2257 FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VMUL_F32, op & SLJIT_F32_OP, dst_r, src2, src1))); 2258 break; 2259 2260 case SLJIT_DIV_F64: 2261 FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VDIV_F32, op & SLJIT_F32_OP, dst_r, src2, src1))); 2262 break; 2263 } 2264 2265 if (dst_r == TMP_FREG1) 2266 FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_F32_OP), TMP_FREG1, dst, dstw)); 2267 2268 return SLJIT_SUCCESS; 2269 } 2270 2271 #undef FPU_LOAD 2272 #undef EMIT_FPU_DATA_TRANSFER 2273 #undef EMIT_FPU_OPERATION 2274 2275 /* --------------------------------------------------------------------- */ 2276 /* Other instructions */ 2277 /* --------------------------------------------------------------------- */ 2278 2279 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw) 2280 { 2281 CHECK_ERROR(); 2282 CHECK(check_sljit_emit_fast_enter(compiler, dst, dstw)); 2283 ADJUST_LOCAL_OFFSET(dst, dstw); 2284 2285 /* For UNUSED dst. Uncommon, but possible. */ 2286 if (dst == SLJIT_UNUSED) 2287 return SLJIT_SUCCESS; 2288 2289 if (FAST_IS_REG(dst)) 2290 return push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, dst, SLJIT_UNUSED, RM(TMP_REG3))); 2291 2292 /* Memory. */ 2293 if (getput_arg_fast(compiler, WORD_DATA, TMP_REG3, dst, dstw)) 2294 return compiler->error; 2295 /* TMP_REG3 is used for caching. */ 2296 FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, TMP_REG2, SLJIT_UNUSED, RM(TMP_REG3)))); 2297 compiler->cache_arg = 0; 2298 compiler->cache_argw = 0; 2299 return getput_arg(compiler, WORD_DATA, TMP_REG2, dst, dstw, 0, 0); 2300 } 2301 2302 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_return(struct sljit_compiler *compiler, sljit_s32 src, sljit_sw srcw) 2303 { 2304 CHECK_ERROR(); 2305 CHECK(check_sljit_emit_fast_return(compiler, src, srcw)); 2306 ADJUST_LOCAL_OFFSET(src, srcw); 2307 2308 if (FAST_IS_REG(src)) 2309 FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, TMP_REG3, SLJIT_UNUSED, RM(src)))); 2310 else if (src & SLJIT_MEM) { 2311 if (getput_arg_fast(compiler, WORD_DATA | LOAD_DATA, TMP_REG3, src, srcw)) 2312 FAIL_IF(compiler->error); 2313 else { 2314 compiler->cache_arg = 0; 2315 compiler->cache_argw = 0; 2316 FAIL_IF(getput_arg(compiler, WORD_DATA | LOAD_DATA, TMP_REG2, src, srcw, 0, 0)); 2317 FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, TMP_REG3, SLJIT_UNUSED, RM(TMP_REG2)))); 2318 } 2319 } 2320 else if (src & SLJIT_IMM) 2321 FAIL_IF(load_immediate(compiler, TMP_REG3, srcw)); 2322 return push_inst(compiler, BLX | RM(TMP_REG3)); 2323 } 2324 2325 /* --------------------------------------------------------------------- */ 2326 /* Conditional instructions */ 2327 /* --------------------------------------------------------------------- */ 2328 2329 static sljit_uw get_cc(sljit_s32 type) 2330 { 2331 switch (type) { 2332 case SLJIT_EQUAL: 2333 case SLJIT_MUL_NOT_OVERFLOW: 2334 case SLJIT_EQUAL_F64: 2335 return 0x00000000; 2336 2337 case SLJIT_NOT_EQUAL: 2338 case SLJIT_MUL_OVERFLOW: 2339 case SLJIT_NOT_EQUAL_F64: 2340 return 0x10000000; 2341 2342 case SLJIT_LESS: 2343 case SLJIT_LESS_F64: 2344 return 0x30000000; 2345 2346 case SLJIT_GREATER_EQUAL: 2347 case SLJIT_GREATER_EQUAL_F64: 2348 return 0x20000000; 2349 2350 case SLJIT_GREATER: 2351 case SLJIT_GREATER_F64: 2352 return 0x80000000; 2353 2354 case SLJIT_LESS_EQUAL: 2355 case SLJIT_LESS_EQUAL_F64: 2356 return 0x90000000; 2357 2358 case SLJIT_SIG_LESS: 2359 return 0xb0000000; 2360 2361 case SLJIT_SIG_GREATER_EQUAL: 2362 return 0xa0000000; 2363 2364 case SLJIT_SIG_GREATER: 2365 return 0xc0000000; 2366 2367 case SLJIT_SIG_LESS_EQUAL: 2368 return 0xd0000000; 2369 2370 case SLJIT_OVERFLOW: 2371 case SLJIT_UNORDERED_F64: 2372 return 0x60000000; 2373 2374 case SLJIT_NOT_OVERFLOW: 2375 case SLJIT_ORDERED_F64: 2376 return 0x70000000; 2377 2378 default: 2379 SLJIT_ASSERT(type >= SLJIT_JUMP && type <= SLJIT_CALL3); 2380 return 0xe0000000; 2381 } 2382 } 2383 2384 SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler) 2385 { 2386 struct sljit_label *label; 2387 2388 CHECK_ERROR_PTR(); 2389 CHECK_PTR(check_sljit_emit_label(compiler)); 2390 2391 if (compiler->last_label && compiler->last_label->size == compiler->size) 2392 return compiler->last_label; 2393 2394 label = (struct sljit_label*)ensure_abuf(compiler, sizeof(struct sljit_label)); 2395 PTR_FAIL_IF(!label); 2396 set_label(label, compiler); 2397 return label; 2398 } 2399 2400 SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_s32 type) 2401 { 2402 struct sljit_jump *jump; 2403 2404 CHECK_ERROR_PTR(); 2405 CHECK_PTR(check_sljit_emit_jump(compiler, type)); 2406 2407 jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); 2408 PTR_FAIL_IF(!jump); 2409 set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP); 2410 type &= 0xff; 2411 2412 /* In ARM, we don't need to touch the arguments. */ 2413 #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) 2414 if (type >= SLJIT_FAST_CALL) 2415 PTR_FAIL_IF(prepare_blx(compiler)); 2416 PTR_FAIL_IF(push_inst_with_unique_literal(compiler, ((EMIT_DATA_TRANSFER(WORD_DATA | LOAD_DATA, 1, 0, 2417 type <= SLJIT_JUMP ? TMP_PC : TMP_REG1, TMP_PC, 0)) & ~COND_MASK) | get_cc(type), 0)); 2418 2419 if (jump->flags & SLJIT_REWRITABLE_JUMP) { 2420 jump->addr = compiler->size; 2421 compiler->patches++; 2422 } 2423 2424 if (type >= SLJIT_FAST_CALL) { 2425 jump->flags |= IS_BL; 2426 PTR_FAIL_IF(emit_blx(compiler)); 2427 } 2428 2429 if (!(jump->flags & SLJIT_REWRITABLE_JUMP)) 2430 jump->addr = compiler->size; 2431 #else 2432 if (type >= SLJIT_FAST_CALL) 2433 jump->flags |= IS_BL; 2434 PTR_FAIL_IF(emit_imm(compiler, TMP_REG1, 0)); 2435 PTR_FAIL_IF(push_inst(compiler, (((type <= SLJIT_JUMP ? BX : BLX) | RM(TMP_REG1)) & ~COND_MASK) | get_cc(type))); 2436 jump->addr = compiler->size; 2437 #endif 2438 return jump; 2439 } 2440 2441 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 src, sljit_sw srcw) 2442 { 2443 struct sljit_jump *jump; 2444 2445 CHECK_ERROR(); 2446 CHECK(check_sljit_emit_ijump(compiler, type, src, srcw)); 2447 ADJUST_LOCAL_OFFSET(src, srcw); 2448 2449 /* In ARM, we don't need to touch the arguments. */ 2450 if (!(src & SLJIT_IMM)) { 2451 if (FAST_IS_REG(src)) 2452 return push_inst(compiler, (type <= SLJIT_JUMP ? BX : BLX) | RM(src)); 2453 2454 SLJIT_ASSERT(src & SLJIT_MEM); 2455 FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, TMP_REG2, src, srcw)); 2456 return push_inst(compiler, (type <= SLJIT_JUMP ? BX : BLX) | RM(TMP_REG2)); 2457 } 2458 2459 jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); 2460 FAIL_IF(!jump); 2461 set_jump(jump, compiler, JUMP_ADDR | ((type >= SLJIT_FAST_CALL) ? IS_BL : 0)); 2462 jump->u.target = srcw; 2463 2464 #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) 2465 if (type >= SLJIT_FAST_CALL) 2466 FAIL_IF(prepare_blx(compiler)); 2467 FAIL_IF(push_inst_with_unique_literal(compiler, EMIT_DATA_TRANSFER(WORD_DATA | LOAD_DATA, 1, 0, type <= SLJIT_JUMP ? TMP_PC : TMP_REG1, TMP_PC, 0), 0)); 2468 if (type >= SLJIT_FAST_CALL) 2469 FAIL_IF(emit_blx(compiler)); 2470 #else 2471 FAIL_IF(emit_imm(compiler, TMP_REG1, 0)); 2472 FAIL_IF(push_inst(compiler, (type <= SLJIT_JUMP ? BX : BLX) | RM(TMP_REG1))); 2473 #endif 2474 jump->addr = compiler->size; 2475 return SLJIT_SUCCESS; 2476 } 2477 2478 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_s32 op, 2479 sljit_s32 dst, sljit_sw dstw, 2480 sljit_s32 src, sljit_sw srcw, 2481 sljit_s32 type) 2482 { 2483 sljit_s32 dst_r, flags = GET_ALL_FLAGS(op); 2484 sljit_uw cc, ins; 2485 2486 CHECK_ERROR(); 2487 CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, src, srcw, type)); 2488 ADJUST_LOCAL_OFFSET(dst, dstw); 2489 ADJUST_LOCAL_OFFSET(src, srcw); 2490 2491 if (dst == SLJIT_UNUSED) 2492 return SLJIT_SUCCESS; 2493 2494 op = GET_OPCODE(op); 2495 cc = get_cc(type & 0xff); 2496 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2; 2497 2498 if (op < SLJIT_ADD) { 2499 FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, dst_r, SLJIT_UNUSED, SRC2_IMM | 0))); 2500 FAIL_IF(push_inst(compiler, (EMIT_DATA_PROCESS_INS(MOV_DP, 0, dst_r, SLJIT_UNUSED, SRC2_IMM | 1) & ~COND_MASK) | cc)); 2501 return (dst_r == TMP_REG2) ? emit_op_mem(compiler, WORD_DATA, TMP_REG2, dst, dstw) : SLJIT_SUCCESS; 2502 } 2503 2504 ins = (op == SLJIT_AND ? AND_DP : (op == SLJIT_OR ? ORR_DP : EOR_DP)); 2505 if ((op == SLJIT_OR || op == SLJIT_XOR) && FAST_IS_REG(dst) && dst == src) { 2506 FAIL_IF(push_inst(compiler, (EMIT_DATA_PROCESS_INS(ins, 0, dst, dst, SRC2_IMM | 1) & ~COND_MASK) | cc)); 2507 /* The condition must always be set, even if the ORR/EOR is not executed above. */ 2508 return (flags & SLJIT_SET_E) ? push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, SET_FLAGS, TMP_REG1, SLJIT_UNUSED, RM(dst))) : SLJIT_SUCCESS; 2509 } 2510 2511 compiler->cache_arg = 0; 2512 compiler->cache_argw = 0; 2513 if (src & SLJIT_MEM) { 2514 FAIL_IF(emit_op_mem2(compiler, WORD_DATA | LOAD_DATA, TMP_REG1, src, srcw, dst, dstw)); 2515 src = TMP_REG1; 2516 srcw = 0; 2517 } else if (src & SLJIT_IMM) { 2518 FAIL_IF(load_immediate(compiler, TMP_REG1, srcw)); 2519 src = TMP_REG1; 2520 srcw = 0; 2521 } 2522 2523 FAIL_IF(push_inst(compiler, (EMIT_DATA_PROCESS_INS(ins, 0, dst_r, src, SRC2_IMM | 1) & ~COND_MASK) | cc)); 2524 FAIL_IF(push_inst(compiler, (EMIT_DATA_PROCESS_INS(ins, 0, dst_r, src, SRC2_IMM | 0) & ~COND_MASK) | (cc ^ 0x10000000))); 2525 if (dst_r == TMP_REG2) 2526 FAIL_IF(emit_op_mem2(compiler, WORD_DATA, TMP_REG2, dst, dstw, 0, 0)); 2527 2528 return (flags & SLJIT_SET_E) ? push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, SET_FLAGS, TMP_REG1, SLJIT_UNUSED, RM(dst_r))) : SLJIT_SUCCESS; 2529 } 2530 2531 SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw init_value) 2532 { 2533 struct sljit_const *const_; 2534 sljit_s32 reg; 2535 2536 CHECK_ERROR_PTR(); 2537 CHECK_PTR(check_sljit_emit_const(compiler, dst, dstw, init_value)); 2538 ADJUST_LOCAL_OFFSET(dst, dstw); 2539 2540 const_ = (struct sljit_const*)ensure_abuf(compiler, sizeof(struct sljit_const)); 2541 PTR_FAIL_IF(!const_); 2542 2543 reg = SLOW_IS_REG(dst) ? dst : TMP_REG2; 2544 2545 #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) 2546 PTR_FAIL_IF(push_inst_with_unique_literal(compiler, EMIT_DATA_TRANSFER(WORD_DATA | LOAD_DATA, 1, 0, reg, TMP_PC, 0), init_value)); 2547 compiler->patches++; 2548 #else 2549 PTR_FAIL_IF(emit_imm(compiler, reg, init_value)); 2550 #endif 2551 set_const(const_, compiler); 2552 2553 if (dst & SLJIT_MEM) 2554 PTR_FAIL_IF(emit_op_mem(compiler, WORD_DATA, TMP_REG2, dst, dstw)); 2555 return const_; 2556 } 2557 2558 SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_addr) 2559 { 2560 inline_set_jump_addr(addr, new_addr, 1); 2561 } 2562 2563 SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant) 2564 { 2565 inline_set_const(addr, new_constant, 1); 2566 } 2567