1 /* 2 * Stack-less Just-In-Time compiler 3 * 4 * Copyright Zoltan Herczeg (hzmester (at) freemail.hu). All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without modification, are 7 * permitted provided that the following conditions are met: 8 * 9 * 1. Redistributions of source code must retain the above copyright notice, this list of 10 * conditions and the following disclaimer. 11 * 12 * 2. Redistributions in binary form must reproduce the above copyright notice, this list 13 * of conditions and the following disclaimer in the documentation and/or other materials 14 * provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY 17 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT 19 * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED 21 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR 22 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 23 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 24 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 */ 26 27 /* x86 32-bit arch dependent functions. */ 28 29 static sljit_s32 emit_do_imm(struct sljit_compiler *compiler, sljit_u8 opcode, sljit_sw imm) 30 { 31 sljit_u8 *inst; 32 33 inst = (sljit_u8*)ensure_buf(compiler, 1 + 1 + sizeof(sljit_sw)); 34 FAIL_IF(!inst); 35 INC_SIZE(1 + sizeof(sljit_sw)); 36 *inst++ = opcode; 37 sljit_unaligned_store_sw(inst, imm); 38 return SLJIT_SUCCESS; 39 } 40 41 static sljit_u8* generate_far_jump_code(struct sljit_jump *jump, sljit_u8 *code_ptr, sljit_s32 type, sljit_sw executable_offset) 42 { 43 if (type == SLJIT_JUMP) { 44 *code_ptr++ = JMP_i32; 45 jump->addr++; 46 } 47 else if (type >= SLJIT_FAST_CALL) { 48 *code_ptr++ = CALL_i32; 49 jump->addr++; 50 } 51 else { 52 *code_ptr++ = GROUP_0F; 53 *code_ptr++ = get_jump_code(type); 54 jump->addr += 2; 55 } 56 57 if (jump->flags & JUMP_LABEL) 58 jump->flags |= PATCH_MW; 59 else 60 sljit_unaligned_store_sw(code_ptr, jump->u.target - (jump->addr + 4) - (sljit_uw)executable_offset); 61 code_ptr += 4; 62 63 return code_ptr; 64 } 65 66 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler, 67 sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds, 68 sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) 69 { 70 sljit_s32 args, size; 71 sljit_u8 *inst; 72 73 CHECK_ERROR(); 74 CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size)); 75 set_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size); 76 77 args = get_arg_count(arg_types); 78 compiler->args = args; 79 80 /* [esp+0] for saving temporaries and function calls. */ 81 compiler->stack_tmp_size = 2 * sizeof(sljit_sw); 82 83 #if !(defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) 84 if (scratches > 3) 85 compiler->stack_tmp_size = 3 * sizeof(sljit_sw); 86 #endif 87 88 compiler->saveds_offset = compiler->stack_tmp_size; 89 if (scratches > 3) 90 compiler->saveds_offset += ((scratches > (3 + 6)) ? 6 : (scratches - 3)) * sizeof(sljit_sw); 91 92 compiler->locals_offset = compiler->saveds_offset; 93 94 if (saveds > 3) 95 compiler->locals_offset += (saveds - 3) * sizeof(sljit_sw); 96 97 if (options & SLJIT_F64_ALIGNMENT) 98 compiler->locals_offset = (compiler->locals_offset + sizeof(sljit_f64) - 1) & ~(sizeof(sljit_f64) - 1); 99 100 size = 1 + (scratches > 9 ? (scratches - 9) : 0) + (saveds <= 3 ? saveds : 3); 101 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) 102 size += (args > 0 ? (args * 2) : 0) + (args > 2 ? 2 : 0); 103 #else 104 size += (args > 0 ? (2 + args * 3) : 0); 105 #endif 106 inst = (sljit_u8*)ensure_buf(compiler, 1 + size); 107 FAIL_IF(!inst); 108 109 INC_SIZE(size); 110 PUSH_REG(reg_map[TMP_REG1]); 111 #if !(defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) 112 if (args > 0) { 113 *inst++ = MOV_r_rm; 114 *inst++ = MOD_REG | (reg_map[TMP_REG1] << 3) | 0x4 /* esp */; 115 } 116 #endif 117 if (saveds > 2 || scratches > 9) 118 PUSH_REG(reg_map[SLJIT_S2]); 119 if (saveds > 1 || scratches > 10) 120 PUSH_REG(reg_map[SLJIT_S1]); 121 if (saveds > 0 || scratches > 11) 122 PUSH_REG(reg_map[SLJIT_S0]); 123 124 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) 125 if (args > 0) { 126 inst[0] = MOV_r_rm; 127 inst[1] = MOD_REG | (reg_map[SLJIT_S0] << 3) | reg_map[SLJIT_R2]; 128 inst += 2; 129 } 130 if (args > 1) { 131 inst[0] = MOV_r_rm; 132 inst[1] = MOD_REG | (reg_map[SLJIT_S1] << 3) | reg_map[SLJIT_R1]; 133 inst += 2; 134 } 135 if (args > 2) { 136 inst[0] = MOV_r_rm; 137 inst[1] = MOD_DISP8 | (reg_map[SLJIT_S2] << 3) | 0x4 /* esp */; 138 inst[2] = 0x24; 139 inst[3] = sizeof(sljit_sw) * (3 + 2); /* saveds >= 3 as well. */ 140 } 141 #else 142 if (args > 0) { 143 inst[0] = MOV_r_rm; 144 inst[1] = MOD_DISP8 | (reg_map[SLJIT_S0] << 3) | reg_map[TMP_REG1]; 145 inst[2] = sizeof(sljit_sw) * 2; 146 inst += 3; 147 } 148 if (args > 1) { 149 inst[0] = MOV_r_rm; 150 inst[1] = MOD_DISP8 | (reg_map[SLJIT_S1] << 3) | reg_map[TMP_REG1]; 151 inst[2] = sizeof(sljit_sw) * 3; 152 inst += 3; 153 } 154 if (args > 2) { 155 inst[0] = MOV_r_rm; 156 inst[1] = MOD_DISP8 | (reg_map[SLJIT_S2] << 3) | reg_map[TMP_REG1]; 157 inst[2] = sizeof(sljit_sw) * 4; 158 } 159 #endif 160 161 SLJIT_ASSERT(SLJIT_LOCALS_OFFSET > 0); 162 163 #if defined(__APPLE__) 164 /* Ignore pushed registers and SLJIT_LOCALS_OFFSET when computing the aligned local size. */ 165 saveds = (2 + (scratches > 9 ? (scratches - 9) : 0) + (saveds <= 3 ? saveds : 3)) * sizeof(sljit_uw); 166 local_size = ((SLJIT_LOCALS_OFFSET + saveds + local_size + 15) & ~15) - saveds; 167 #else 168 if (options & SLJIT_F64_ALIGNMENT) 169 local_size = SLJIT_LOCALS_OFFSET + ((local_size + sizeof(sljit_f64) - 1) & ~(sizeof(sljit_f64) - 1)); 170 else 171 local_size = SLJIT_LOCALS_OFFSET + ((local_size + sizeof(sljit_sw) - 1) & ~(sizeof(sljit_sw) - 1)); 172 #endif 173 174 compiler->local_size = local_size; 175 176 #ifdef _WIN32 177 if (local_size > 0) { 178 if (local_size <= 4 * 4096) { 179 if (local_size > 4096) 180 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), -4096); 181 if (local_size > 2 * 4096) 182 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), -4096 * 2); 183 if (local_size > 3 * 4096) 184 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), -4096 * 3); 185 } 186 else { 187 EMIT_MOV(compiler, SLJIT_R0, 0, SLJIT_SP, 0); 188 EMIT_MOV(compiler, SLJIT_R1, 0, SLJIT_IMM, (local_size - 1) >> 12); 189 190 SLJIT_ASSERT (reg_map[SLJIT_R0] == 0); 191 192 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_R0), -4096); 193 FAIL_IF(emit_non_cum_binary(compiler, BINARY_OPCODE(SUB), 194 SLJIT_R0, 0, SLJIT_R0, 0, SLJIT_IMM, 4096)); 195 FAIL_IF(emit_non_cum_binary(compiler, BINARY_OPCODE(SUB), 196 SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1)); 197 198 inst = (sljit_u8*)ensure_buf(compiler, 1 + 2); 199 FAIL_IF(!inst); 200 201 INC_SIZE(2); 202 inst[0] = JNE_i8; 203 inst[1] = (sljit_s8) -16; 204 } 205 206 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), -local_size); 207 } 208 #endif 209 210 SLJIT_ASSERT(local_size > 0); 211 212 #if !defined(__APPLE__) 213 if (options & SLJIT_F64_ALIGNMENT) { 214 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_SP, 0); 215 216 /* Some space might allocated during sljit_grow_stack() above on WIN32. */ 217 FAIL_IF(emit_non_cum_binary(compiler, BINARY_OPCODE(SUB), 218 SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, local_size + sizeof(sljit_sw))); 219 220 #if defined _WIN32 && !(defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) 221 if (compiler->local_size > 1024) 222 FAIL_IF(emit_cum_binary(compiler, BINARY_OPCODE(ADD), 223 TMP_REG1, 0, TMP_REG1, 0, SLJIT_IMM, sizeof(sljit_sw))); 224 #endif 225 226 inst = (sljit_u8*)ensure_buf(compiler, 1 + 6); 227 FAIL_IF(!inst); 228 229 INC_SIZE(6); 230 inst[0] = GROUP_BINARY_81; 231 inst[1] = MOD_REG | AND | reg_map[SLJIT_SP]; 232 sljit_unaligned_store_sw(inst + 2, ~(sizeof(sljit_f64) - 1)); 233 234 /* The real local size must be used. */ 235 return emit_mov(compiler, SLJIT_MEM1(SLJIT_SP), compiler->local_size, TMP_REG1, 0); 236 } 237 #endif 238 return emit_non_cum_binary(compiler, BINARY_OPCODE(SUB), 239 SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, local_size); 240 } 241 242 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *compiler, 243 sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds, 244 sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) 245 { 246 CHECK_ERROR(); 247 CHECK(check_sljit_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size)); 248 set_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size); 249 250 compiler->args = get_arg_count(arg_types); 251 252 /* [esp+0] for saving temporaries and function calls. */ 253 compiler->stack_tmp_size = 2 * sizeof(sljit_sw); 254 255 #if !(defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) 256 if (scratches > 3) 257 compiler->stack_tmp_size = 3 * sizeof(sljit_sw); 258 #endif 259 260 compiler->saveds_offset = compiler->stack_tmp_size; 261 if (scratches > 3) 262 compiler->saveds_offset += ((scratches > (3 + 6)) ? 6 : (scratches - 3)) * sizeof(sljit_sw); 263 264 compiler->locals_offset = compiler->saveds_offset; 265 266 if (saveds > 3) 267 compiler->locals_offset += (saveds - 3) * sizeof(sljit_sw); 268 269 if (options & SLJIT_F64_ALIGNMENT) 270 compiler->locals_offset = (compiler->locals_offset + sizeof(sljit_f64) - 1) & ~(sizeof(sljit_f64) - 1); 271 272 #if defined(__APPLE__) 273 saveds = (2 + (scratches > 9 ? (scratches - 9) : 0) + (saveds <= 3 ? saveds : 3)) * sizeof(sljit_uw); 274 compiler->local_size = ((SLJIT_LOCALS_OFFSET + saveds + local_size + 15) & ~15) - saveds; 275 #else 276 if (options & SLJIT_F64_ALIGNMENT) 277 compiler->local_size = SLJIT_LOCALS_OFFSET + ((local_size + sizeof(sljit_f64) - 1) & ~(sizeof(sljit_f64) - 1)); 278 else 279 compiler->local_size = SLJIT_LOCALS_OFFSET + ((local_size + sizeof(sljit_sw) - 1) & ~(sizeof(sljit_sw) - 1)); 280 #endif 281 return SLJIT_SUCCESS; 282 } 283 284 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 src, sljit_sw srcw) 285 { 286 sljit_s32 size; 287 sljit_u8 *inst; 288 289 CHECK_ERROR(); 290 CHECK(check_sljit_emit_return(compiler, op, src, srcw)); 291 SLJIT_ASSERT(compiler->args >= 0); 292 293 FAIL_IF(emit_mov_before_return(compiler, op, src, srcw)); 294 295 SLJIT_ASSERT(compiler->local_size > 0); 296 297 #if !defined(__APPLE__) 298 if (compiler->options & SLJIT_F64_ALIGNMENT) 299 EMIT_MOV(compiler, SLJIT_SP, 0, SLJIT_MEM1(SLJIT_SP), compiler->local_size) 300 else 301 FAIL_IF(emit_cum_binary(compiler, BINARY_OPCODE(ADD), 302 SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, compiler->local_size)); 303 #else 304 FAIL_IF(emit_cum_binary(compiler, BINARY_OPCODE(ADD), 305 SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, compiler->local_size)); 306 #endif 307 308 size = 2 + (compiler->scratches > 7 ? (compiler->scratches - 7) : 0) + 309 (compiler->saveds <= 3 ? compiler->saveds : 3); 310 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) 311 if (compiler->args > 2) 312 size += 2; 313 #else 314 if (compiler->args > 0) 315 size += 2; 316 #endif 317 inst = (sljit_u8*)ensure_buf(compiler, 1 + size); 318 FAIL_IF(!inst); 319 320 INC_SIZE(size); 321 322 if (compiler->saveds > 0 || compiler->scratches > 11) 323 POP_REG(reg_map[SLJIT_S0]); 324 if (compiler->saveds > 1 || compiler->scratches > 10) 325 POP_REG(reg_map[SLJIT_S1]); 326 if (compiler->saveds > 2 || compiler->scratches > 9) 327 POP_REG(reg_map[SLJIT_S2]); 328 POP_REG(reg_map[TMP_REG1]); 329 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) 330 if (compiler->args > 2) 331 RET_I16(sizeof(sljit_sw)); 332 else 333 RET(); 334 #else 335 RET(); 336 #endif 337 338 return SLJIT_SUCCESS; 339 } 340 341 /* --------------------------------------------------------------------- */ 342 /* Operators */ 343 /* --------------------------------------------------------------------- */ 344 345 /* Size contains the flags as well. */ 346 static sljit_u8* emit_x86_instruction(struct sljit_compiler *compiler, sljit_s32 size, 347 /* The register or immediate operand. */ 348 sljit_s32 a, sljit_sw imma, 349 /* The general operand (not immediate). */ 350 sljit_s32 b, sljit_sw immb) 351 { 352 sljit_u8 *inst; 353 sljit_u8 *buf_ptr; 354 sljit_s32 flags = size & ~0xf; 355 sljit_s32 inst_size; 356 357 /* Both cannot be switched on. */ 358 SLJIT_ASSERT((flags & (EX86_BIN_INS | EX86_SHIFT_INS)) != (EX86_BIN_INS | EX86_SHIFT_INS)); 359 /* Size flags not allowed for typed instructions. */ 360 SLJIT_ASSERT(!(flags & (EX86_BIN_INS | EX86_SHIFT_INS)) || (flags & (EX86_BYTE_ARG | EX86_HALF_ARG)) == 0); 361 /* Both size flags cannot be switched on. */ 362 SLJIT_ASSERT((flags & (EX86_BYTE_ARG | EX86_HALF_ARG)) != (EX86_BYTE_ARG | EX86_HALF_ARG)); 363 /* SSE2 and immediate is not possible. */ 364 SLJIT_ASSERT(!(a & SLJIT_IMM) || !(flags & EX86_SSE2)); 365 SLJIT_ASSERT((flags & (EX86_PREF_F2 | EX86_PREF_F3)) != (EX86_PREF_F2 | EX86_PREF_F3) 366 && (flags & (EX86_PREF_F2 | EX86_PREF_66)) != (EX86_PREF_F2 | EX86_PREF_66) 367 && (flags & (EX86_PREF_F3 | EX86_PREF_66)) != (EX86_PREF_F3 | EX86_PREF_66)); 368 369 size &= 0xf; 370 inst_size = size; 371 372 if (flags & (EX86_PREF_F2 | EX86_PREF_F3)) 373 inst_size++; 374 if (flags & EX86_PREF_66) 375 inst_size++; 376 377 /* Calculate size of b. */ 378 inst_size += 1; /* mod r/m byte. */ 379 if (b & SLJIT_MEM) { 380 if ((b & REG_MASK) == SLJIT_UNUSED) 381 inst_size += sizeof(sljit_sw); 382 else if (immb != 0 && !(b & OFFS_REG_MASK)) { 383 /* Immediate operand. */ 384 if (immb <= 127 && immb >= -128) 385 inst_size += sizeof(sljit_s8); 386 else 387 inst_size += sizeof(sljit_sw); 388 } 389 390 if ((b & REG_MASK) == SLJIT_SP && !(b & OFFS_REG_MASK)) 391 b |= TO_OFFS_REG(SLJIT_SP); 392 393 if ((b & OFFS_REG_MASK) != SLJIT_UNUSED) 394 inst_size += 1; /* SIB byte. */ 395 } 396 397 /* Calculate size of a. */ 398 if (a & SLJIT_IMM) { 399 if (flags & EX86_BIN_INS) { 400 if (imma <= 127 && imma >= -128) { 401 inst_size += 1; 402 flags |= EX86_BYTE_ARG; 403 } else 404 inst_size += 4; 405 } 406 else if (flags & EX86_SHIFT_INS) { 407 imma &= 0x1f; 408 if (imma != 1) { 409 inst_size ++; 410 flags |= EX86_BYTE_ARG; 411 } 412 } else if (flags & EX86_BYTE_ARG) 413 inst_size++; 414 else if (flags & EX86_HALF_ARG) 415 inst_size += sizeof(short); 416 else 417 inst_size += sizeof(sljit_sw); 418 } 419 else 420 SLJIT_ASSERT(!(flags & EX86_SHIFT_INS) || a == SLJIT_PREF_SHIFT_REG); 421 422 inst = (sljit_u8*)ensure_buf(compiler, 1 + inst_size); 423 PTR_FAIL_IF(!inst); 424 425 /* Encoding the byte. */ 426 INC_SIZE(inst_size); 427 if (flags & EX86_PREF_F2) 428 *inst++ = 0xf2; 429 if (flags & EX86_PREF_F3) 430 *inst++ = 0xf3; 431 if (flags & EX86_PREF_66) 432 *inst++ = 0x66; 433 434 buf_ptr = inst + size; 435 436 /* Encode mod/rm byte. */ 437 if (!(flags & EX86_SHIFT_INS)) { 438 if ((flags & EX86_BIN_INS) && (a & SLJIT_IMM)) 439 *inst = (flags & EX86_BYTE_ARG) ? GROUP_BINARY_83 : GROUP_BINARY_81; 440 441 if (a & SLJIT_IMM) 442 *buf_ptr = 0; 443 else if (!(flags & EX86_SSE2_OP1)) 444 *buf_ptr = reg_map[a] << 3; 445 else 446 *buf_ptr = a << 3; 447 } 448 else { 449 if (a & SLJIT_IMM) { 450 if (imma == 1) 451 *inst = GROUP_SHIFT_1; 452 else 453 *inst = GROUP_SHIFT_N; 454 } else 455 *inst = GROUP_SHIFT_CL; 456 *buf_ptr = 0; 457 } 458 459 if (!(b & SLJIT_MEM)) 460 *buf_ptr++ |= MOD_REG + ((!(flags & EX86_SSE2_OP2)) ? reg_map[b] : b); 461 else if ((b & REG_MASK) != SLJIT_UNUSED) { 462 if ((b & OFFS_REG_MASK) == SLJIT_UNUSED || (b & OFFS_REG_MASK) == TO_OFFS_REG(SLJIT_SP)) { 463 if (immb != 0) { 464 if (immb <= 127 && immb >= -128) 465 *buf_ptr |= 0x40; 466 else 467 *buf_ptr |= 0x80; 468 } 469 470 if ((b & OFFS_REG_MASK) == SLJIT_UNUSED) 471 *buf_ptr++ |= reg_map[b & REG_MASK]; 472 else { 473 *buf_ptr++ |= 0x04; 474 *buf_ptr++ = reg_map[b & REG_MASK] | (reg_map[OFFS_REG(b)] << 3); 475 } 476 477 if (immb != 0) { 478 if (immb <= 127 && immb >= -128) 479 *buf_ptr++ = immb; /* 8 bit displacement. */ 480 else { 481 sljit_unaligned_store_sw(buf_ptr, immb); /* 32 bit displacement. */ 482 buf_ptr += sizeof(sljit_sw); 483 } 484 } 485 } 486 else { 487 *buf_ptr++ |= 0x04; 488 *buf_ptr++ = reg_map[b & REG_MASK] | (reg_map[OFFS_REG(b)] << 3) | (immb << 6); 489 } 490 } 491 else { 492 *buf_ptr++ |= 0x05; 493 sljit_unaligned_store_sw(buf_ptr, immb); /* 32 bit displacement. */ 494 buf_ptr += sizeof(sljit_sw); 495 } 496 497 if (a & SLJIT_IMM) { 498 if (flags & EX86_BYTE_ARG) 499 *buf_ptr = imma; 500 else if (flags & EX86_HALF_ARG) 501 sljit_unaligned_store_s16(buf_ptr, imma); 502 else if (!(flags & EX86_SHIFT_INS)) 503 sljit_unaligned_store_sw(buf_ptr, imma); 504 } 505 506 return !(flags & EX86_SHIFT_INS) ? inst : (inst + 1); 507 } 508 509 /* --------------------------------------------------------------------- */ 510 /* Call / return instructions */ 511 /* --------------------------------------------------------------------- */ 512 513 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) 514 515 static sljit_s32 c_fast_call_get_stack_size(sljit_s32 arg_types, sljit_s32 *word_arg_count_ptr) 516 { 517 sljit_s32 stack_size = 0; 518 sljit_s32 word_arg_count = 0; 519 520 arg_types >>= SLJIT_DEF_SHIFT; 521 522 while (arg_types) { 523 switch (arg_types & SLJIT_DEF_MASK) { 524 case SLJIT_ARG_TYPE_F32: 525 stack_size += sizeof(sljit_f32); 526 break; 527 case SLJIT_ARG_TYPE_F64: 528 stack_size += sizeof(sljit_f64); 529 break; 530 default: 531 word_arg_count++; 532 if (word_arg_count > 2) 533 stack_size += sizeof(sljit_sw); 534 break; 535 } 536 537 arg_types >>= SLJIT_DEF_SHIFT; 538 } 539 540 if (word_arg_count_ptr) 541 *word_arg_count_ptr = word_arg_count; 542 543 return stack_size; 544 } 545 546 static sljit_s32 c_fast_call_with_args(struct sljit_compiler *compiler, 547 sljit_s32 arg_types, sljit_s32 stack_size, sljit_s32 word_arg_count, sljit_s32 swap_args) 548 { 549 sljit_u8 *inst; 550 sljit_s32 float_arg_count; 551 552 if (stack_size == sizeof(sljit_sw) && word_arg_count == 3) { 553 inst = (sljit_u8*)ensure_buf(compiler, 1 + 1); 554 FAIL_IF(!inst); 555 INC_SIZE(1); 556 PUSH_REG(reg_map[SLJIT_R2]); 557 } 558 else if (stack_size > 0) { 559 if (word_arg_count >= 4) 560 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), compiler->saveds_offset - sizeof(sljit_sw)); 561 562 FAIL_IF(emit_non_cum_binary(compiler, BINARY_OPCODE(SUB), 563 SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, stack_size)); 564 565 stack_size = 0; 566 arg_types >>= SLJIT_DEF_SHIFT; 567 word_arg_count = 0; 568 float_arg_count = 0; 569 while (arg_types) { 570 switch (arg_types & SLJIT_DEF_MASK) { 571 case SLJIT_ARG_TYPE_F32: 572 float_arg_count++; 573 FAIL_IF(emit_sse2_store(compiler, 1, SLJIT_MEM1(SLJIT_SP), stack_size, float_arg_count)); 574 stack_size += sizeof(sljit_f32); 575 break; 576 case SLJIT_ARG_TYPE_F64: 577 float_arg_count++; 578 FAIL_IF(emit_sse2_store(compiler, 0, SLJIT_MEM1(SLJIT_SP), stack_size, float_arg_count)); 579 stack_size += sizeof(sljit_f64); 580 break; 581 default: 582 word_arg_count++; 583 if (word_arg_count == 3) { 584 EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), stack_size, SLJIT_R2, 0); 585 stack_size += sizeof(sljit_sw); 586 } 587 else if (word_arg_count == 4) { 588 EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), stack_size, TMP_REG1, 0); 589 stack_size += sizeof(sljit_sw); 590 } 591 break; 592 } 593 594 arg_types >>= SLJIT_DEF_SHIFT; 595 } 596 } 597 598 if (word_arg_count > 0) { 599 if (swap_args) { 600 inst = (sljit_u8*)ensure_buf(compiler, 1 + 1); 601 FAIL_IF(!inst); 602 INC_SIZE(1); 603 604 *inst++ = XCHG_EAX_r | reg_map[SLJIT_R2]; 605 } 606 else { 607 inst = (sljit_u8*)ensure_buf(compiler, 1 + 2); 608 FAIL_IF(!inst); 609 INC_SIZE(2); 610 611 *inst++ = MOV_r_rm; 612 *inst++ = MOD_REG | (reg_map[SLJIT_R2] << 3) | reg_map[SLJIT_R0]; 613 } 614 } 615 616 return SLJIT_SUCCESS; 617 } 618 619 #endif 620 621 static sljit_s32 cdecl_call_get_stack_size(struct sljit_compiler *compiler, sljit_s32 arg_types, sljit_s32 *word_arg_count_ptr) 622 { 623 sljit_s32 stack_size = 0; 624 sljit_s32 word_arg_count = 0; 625 626 arg_types >>= SLJIT_DEF_SHIFT; 627 628 while (arg_types) { 629 switch (arg_types & SLJIT_DEF_MASK) { 630 case SLJIT_ARG_TYPE_F32: 631 stack_size += sizeof(sljit_f32); 632 break; 633 case SLJIT_ARG_TYPE_F64: 634 stack_size += sizeof(sljit_f64); 635 break; 636 default: 637 word_arg_count++; 638 stack_size += sizeof(sljit_sw); 639 break; 640 } 641 642 arg_types >>= SLJIT_DEF_SHIFT; 643 } 644 645 if (word_arg_count_ptr) 646 *word_arg_count_ptr = word_arg_count; 647 648 if (stack_size <= compiler->stack_tmp_size) 649 return 0; 650 651 #if defined(__APPLE__) 652 return ((stack_size - compiler->stack_tmp_size + 15) & ~15); 653 #else 654 return stack_size - compiler->stack_tmp_size; 655 #endif 656 } 657 658 static sljit_s32 cdecl_call_with_args(struct sljit_compiler *compiler, 659 sljit_s32 arg_types, sljit_s32 stack_size, sljit_s32 word_arg_count) 660 { 661 sljit_s32 float_arg_count = 0; 662 663 if (word_arg_count >= 4) 664 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), compiler->saveds_offset - sizeof(sljit_sw)); 665 666 if (stack_size > 0) 667 FAIL_IF(emit_non_cum_binary(compiler, BINARY_OPCODE(SUB), 668 SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, stack_size)); 669 670 stack_size = 0; 671 word_arg_count = 0; 672 arg_types >>= SLJIT_DEF_SHIFT; 673 674 while (arg_types) { 675 switch (arg_types & SLJIT_DEF_MASK) { 676 case SLJIT_ARG_TYPE_F32: 677 float_arg_count++; 678 FAIL_IF(emit_sse2_store(compiler, 1, SLJIT_MEM1(SLJIT_SP), stack_size, float_arg_count)); 679 stack_size += sizeof(sljit_f32); 680 break; 681 case SLJIT_ARG_TYPE_F64: 682 float_arg_count++; 683 FAIL_IF(emit_sse2_store(compiler, 0, SLJIT_MEM1(SLJIT_SP), stack_size, float_arg_count)); 684 stack_size += sizeof(sljit_f64); 685 break; 686 default: 687 word_arg_count++; 688 EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), stack_size, (word_arg_count >= 4) ? TMP_REG1 : word_arg_count, 0); 689 stack_size += sizeof(sljit_sw); 690 break; 691 } 692 693 arg_types >>= SLJIT_DEF_SHIFT; 694 } 695 696 return SLJIT_SUCCESS; 697 } 698 699 static sljit_s32 post_call_with_args(struct sljit_compiler *compiler, 700 sljit_s32 arg_types, sljit_s32 stack_size) 701 { 702 sljit_u8 *inst; 703 sljit_s32 single; 704 705 if (stack_size > 0) 706 FAIL_IF(emit_cum_binary(compiler, BINARY_OPCODE(ADD), 707 SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, stack_size)); 708 709 if ((arg_types & SLJIT_DEF_MASK) < SLJIT_ARG_TYPE_F32) 710 return SLJIT_SUCCESS; 711 712 single = ((arg_types & SLJIT_DEF_MASK) == SLJIT_ARG_TYPE_F32); 713 714 inst = (sljit_u8*)ensure_buf(compiler, 1 + 3); 715 FAIL_IF(!inst); 716 INC_SIZE(3); 717 inst[0] = single ? FSTPS : FSTPD; 718 inst[1] = (0x03 << 3) | 0x04; 719 inst[2] = (0x04 << 3) | reg_map[SLJIT_SP]; 720 721 return emit_sse2_load(compiler, single, SLJIT_FR0, SLJIT_MEM1(SLJIT_SP), 0); 722 } 723 724 SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compiler *compiler, sljit_s32 type, 725 sljit_s32 arg_types) 726 { 727 struct sljit_jump *jump; 728 sljit_s32 stack_size = 0; 729 sljit_s32 word_arg_count; 730 731 CHECK_ERROR_PTR(); 732 CHECK_PTR(check_sljit_emit_call(compiler, type, arg_types)); 733 734 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) 735 if ((type & 0xff) == SLJIT_CALL) { 736 stack_size = c_fast_call_get_stack_size(arg_types, &word_arg_count); 737 PTR_FAIL_IF(c_fast_call_with_args(compiler, arg_types, stack_size, word_arg_count, 0)); 738 739 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ 740 || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) 741 compiler->skip_checks = 1; 742 #endif 743 744 jump = sljit_emit_jump(compiler, type); 745 PTR_FAIL_IF(jump == NULL); 746 747 PTR_FAIL_IF(post_call_with_args(compiler, arg_types, 0)); 748 return jump; 749 } 750 #endif 751 752 stack_size = cdecl_call_get_stack_size(compiler, arg_types, &word_arg_count); 753 PTR_FAIL_IF(cdecl_call_with_args(compiler, arg_types, stack_size, word_arg_count)); 754 755 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ 756 || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) 757 compiler->skip_checks = 1; 758 #endif 759 760 jump = sljit_emit_jump(compiler, type); 761 PTR_FAIL_IF(jump == NULL); 762 763 PTR_FAIL_IF(post_call_with_args(compiler, arg_types, stack_size)); 764 return jump; 765 } 766 767 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compiler, sljit_s32 type, 768 sljit_s32 arg_types, 769 sljit_s32 src, sljit_sw srcw) 770 { 771 sljit_s32 stack_size = 0; 772 sljit_s32 word_arg_count; 773 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) 774 sljit_s32 swap_args; 775 #endif 776 777 CHECK_ERROR(); 778 CHECK(check_sljit_emit_icall(compiler, type, arg_types, src, srcw)); 779 780 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) 781 SLJIT_ASSERT(reg_map[SLJIT_R0] == 0 && reg_map[SLJIT_R2] == 1 && SLJIT_R0 == 1 && SLJIT_R2 == 3); 782 783 if ((type & 0xff) == SLJIT_CALL) { 784 stack_size = c_fast_call_get_stack_size(arg_types, &word_arg_count); 785 swap_args = 0; 786 787 if (word_arg_count > 0) { 788 if ((src & REG_MASK) == SLJIT_R2 || OFFS_REG(src) == SLJIT_R2) { 789 swap_args = 1; 790 if (((src & REG_MASK) | 0x2) == SLJIT_R2) 791 src ^= 0x2; 792 if ((OFFS_REG(src) | 0x2) == SLJIT_R2) 793 src ^= TO_OFFS_REG(0x2); 794 } 795 } 796 797 FAIL_IF(c_fast_call_with_args(compiler, arg_types, stack_size, word_arg_count, swap_args)); 798 799 compiler->saveds_offset += stack_size; 800 compiler->locals_offset += stack_size; 801 802 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ 803 || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) 804 compiler->skip_checks = 1; 805 #endif 806 FAIL_IF(sljit_emit_ijump(compiler, type, src, srcw)); 807 808 compiler->saveds_offset -= stack_size; 809 compiler->locals_offset -= stack_size; 810 811 return post_call_with_args(compiler, arg_types, 0); 812 } 813 #endif 814 815 stack_size = cdecl_call_get_stack_size(compiler, arg_types, &word_arg_count); 816 FAIL_IF(cdecl_call_with_args(compiler, arg_types, stack_size, word_arg_count)); 817 818 compiler->saveds_offset += stack_size; 819 compiler->locals_offset += stack_size; 820 821 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ 822 || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) 823 compiler->skip_checks = 1; 824 #endif 825 FAIL_IF(sljit_emit_ijump(compiler, type, src, srcw)); 826 827 compiler->saveds_offset -= stack_size; 828 compiler->locals_offset -= stack_size; 829 830 return post_call_with_args(compiler, arg_types, stack_size); 831 } 832 833 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw) 834 { 835 sljit_u8 *inst; 836 837 CHECK_ERROR(); 838 CHECK(check_sljit_emit_fast_enter(compiler, dst, dstw)); 839 ADJUST_LOCAL_OFFSET(dst, dstw); 840 841 CHECK_EXTRA_REGS(dst, dstw, (void)0); 842 843 /* For UNUSED dst. Uncommon, but possible. */ 844 if (dst == SLJIT_UNUSED) 845 dst = TMP_REG1; 846 847 if (FAST_IS_REG(dst)) { 848 /* Unused dest is possible here. */ 849 inst = (sljit_u8*)ensure_buf(compiler, 1 + 1); 850 FAIL_IF(!inst); 851 852 INC_SIZE(1); 853 POP_REG(reg_map[dst]); 854 return SLJIT_SUCCESS; 855 } 856 857 /* Memory. */ 858 inst = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw); 859 FAIL_IF(!inst); 860 *inst++ = POP_rm; 861 return SLJIT_SUCCESS; 862 } 863 864 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_return(struct sljit_compiler *compiler, sljit_s32 src, sljit_sw srcw) 865 { 866 sljit_u8 *inst; 867 868 CHECK_ERROR(); 869 CHECK(check_sljit_emit_fast_return(compiler, src, srcw)); 870 ADJUST_LOCAL_OFFSET(src, srcw); 871 872 CHECK_EXTRA_REGS(src, srcw, (void)0); 873 874 if (FAST_IS_REG(src)) { 875 inst = (sljit_u8*)ensure_buf(compiler, 1 + 1 + 1); 876 FAIL_IF(!inst); 877 878 INC_SIZE(1 + 1); 879 PUSH_REG(reg_map[src]); 880 } 881 else { 882 inst = emit_x86_instruction(compiler, 1, 0, 0, src, srcw); 883 FAIL_IF(!inst); 884 *inst++ = GROUP_FF; 885 *inst |= PUSH_rm; 886 887 inst = (sljit_u8*)ensure_buf(compiler, 1 + 1); 888 FAIL_IF(!inst); 889 INC_SIZE(1); 890 } 891 892 RET(); 893 return SLJIT_SUCCESS; 894 } 895