1 /************************************************************************** 2 * 3 * Copyright (C) 1999-2005 Brian Paul All Rights Reserved. 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 * and/or sell copies of the Software, and to permit persons to whom the 10 * Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice shall be included 13 * in all copies or substantial portions of the Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 16 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN 19 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 20 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 21 * 22 **************************************************************************/ 23 24 #include "pipe/p_config.h" 25 #include "util/u_cpu_detect.h" 26 27 #if defined(PIPE_ARCH_X86) || (defined(PIPE_ARCH_X86_64) && !defined(__MINGW32__)) 28 29 #include "pipe/p_compiler.h" 30 #include "util/u_debug.h" 31 #include "util/u_pointer.h" 32 33 #include "rtasm_execmem.h" 34 #include "rtasm_x86sse.h" 35 36 #define DISASSEM 0 37 #define X86_TWOB 0x0f 38 39 40 #define DUMP_SSE 0 41 42 43 void x86_print_reg( struct x86_reg reg ) 44 { 45 if (reg.mod != mod_REG) 46 debug_printf( "[" ); 47 48 switch( reg.file ) { 49 case file_REG32: 50 switch( reg.idx ) { 51 case reg_AX: debug_printf( "EAX" ); break; 52 case reg_CX: debug_printf( "ECX" ); break; 53 case reg_DX: debug_printf( "EDX" ); break; 54 case reg_BX: debug_printf( "EBX" ); break; 55 case reg_SP: debug_printf( "ESP" ); break; 56 case reg_BP: debug_printf( "EBP" ); break; 57 case reg_SI: debug_printf( "ESI" ); break; 58 case reg_DI: debug_printf( "EDI" ); break; 59 } 60 break; 61 case file_MMX: 62 debug_printf( "MMX%u", reg.idx ); 63 break; 64 case file_XMM: 65 debug_printf( "XMM%u", reg.idx ); 66 break; 67 case file_x87: 68 debug_printf( "fp%u", reg.idx ); 69 break; 70 } 71 72 if (reg.mod == mod_DISP8 || 73 reg.mod == mod_DISP32) 74 debug_printf("+%d", reg.disp); 75 76 if (reg.mod != mod_REG) 77 debug_printf( "]" ); 78 } 79 80 #if DUMP_SSE 81 82 #define DUMP_START() debug_printf( "\n" ) 83 #define DUMP_END() debug_printf( "\n" ) 84 85 #define DUMP() do { \ 86 const char *foo = __FUNCTION__; \ 87 while (*foo && *foo != '_') \ 88 foo++; \ 89 if (*foo) \ 90 foo++; \ 91 debug_printf( "\n%4x %14s ", p->csr - p->store, foo ); \ 92 } while (0) 93 94 #define DUMP_I( I ) do { \ 95 DUMP(); \ 96 debug_printf( "%u", I ); \ 97 } while( 0 ) 98 99 #define DUMP_R( R0 ) do { \ 100 DUMP(); \ 101 x86_print_reg( R0 ); \ 102 } while( 0 ) 103 104 #define DUMP_RR( R0, R1 ) do { \ 105 DUMP(); \ 106 x86_print_reg( R0 ); \ 107 debug_printf( ", " ); \ 108 x86_print_reg( R1 ); \ 109 } while( 0 ) 110 111 #define DUMP_RI( R0, I ) do { \ 112 DUMP(); \ 113 x86_print_reg( R0 ); \ 114 debug_printf( ", %u", I ); \ 115 } while( 0 ) 116 117 #define DUMP_RRI( R0, R1, I ) do { \ 118 DUMP(); \ 119 x86_print_reg( R0 ); \ 120 debug_printf( ", " ); \ 121 x86_print_reg( R1 ); \ 122 debug_printf( ", %u", I ); \ 123 } while( 0 ) 124 125 #else 126 127 #define DUMP_START() 128 #define DUMP_END() 129 #define DUMP( ) 130 #define DUMP_I( I ) 131 #define DUMP_R( R0 ) 132 #define DUMP_RR( R0, R1 ) 133 #define DUMP_RI( R0, I ) 134 #define DUMP_RRI( R0, R1, I ) 135 136 #endif 137 138 139 static void do_realloc( struct x86_function *p ) 140 { 141 if (p->store == p->error_overflow) { 142 p->csr = p->store; 143 } 144 else if (p->size == 0) { 145 p->size = 1024; 146 p->store = rtasm_exec_malloc(p->size); 147 p->csr = p->store; 148 } 149 else { 150 uintptr_t used = pointer_to_uintptr( p->csr ) - pointer_to_uintptr( p->store ); 151 unsigned char *tmp = p->store; 152 p->size *= 2; 153 p->store = rtasm_exec_malloc(p->size); 154 155 if (p->store) { 156 memcpy(p->store, tmp, used); 157 p->csr = p->store + used; 158 } 159 else { 160 p->csr = p->store; 161 } 162 163 rtasm_exec_free(tmp); 164 } 165 166 if (p->store == NULL) { 167 p->store = p->csr = p->error_overflow; 168 p->size = sizeof(p->error_overflow); 169 } 170 } 171 172 /* Emit bytes to the instruction stream: 173 */ 174 static unsigned char *reserve( struct x86_function *p, int bytes ) 175 { 176 if (p->csr + bytes - p->store > (int) p->size) 177 do_realloc(p); 178 179 { 180 unsigned char *csr = p->csr; 181 p->csr += bytes; 182 return csr; 183 } 184 } 185 186 187 188 static void emit_1b( struct x86_function *p, char b0 ) 189 { 190 char *csr = (char *)reserve(p, 1); 191 *csr = b0; 192 } 193 194 static void emit_1i( struct x86_function *p, int i0 ) 195 { 196 int *icsr = (int *)reserve(p, sizeof(i0)); 197 *icsr = i0; 198 } 199 200 static void emit_1ub( struct x86_function *p, unsigned char b0 ) 201 { 202 unsigned char *csr = reserve(p, 1); 203 *csr++ = b0; 204 } 205 206 static void emit_2ub( struct x86_function *p, unsigned char b0, unsigned char b1 ) 207 { 208 unsigned char *csr = reserve(p, 2); 209 *csr++ = b0; 210 *csr++ = b1; 211 } 212 213 static void emit_3ub( struct x86_function *p, unsigned char b0, unsigned char b1, unsigned char b2 ) 214 { 215 unsigned char *csr = reserve(p, 3); 216 *csr++ = b0; 217 *csr++ = b1; 218 *csr++ = b2; 219 } 220 221 222 /* Build a modRM byte + possible displacement. No treatment of SIB 223 * indexing. BZZT - no way to encode an absolute address. 224 * 225 * This is the "/r" field in the x86 manuals... 226 */ 227 static void emit_modrm( struct x86_function *p, 228 struct x86_reg reg, 229 struct x86_reg regmem ) 230 { 231 unsigned char val = 0; 232 233 assert(reg.mod == mod_REG); 234 235 /* TODO: support extended x86-64 registers */ 236 assert(reg.idx < 8); 237 assert(regmem.idx < 8); 238 239 val |= regmem.mod << 6; /* mod field */ 240 val |= reg.idx << 3; /* reg field */ 241 val |= regmem.idx; /* r/m field */ 242 243 emit_1ub(p, val); 244 245 /* Oh-oh we've stumbled into the SIB thing. 246 */ 247 if (regmem.file == file_REG32 && 248 regmem.idx == reg_SP && 249 regmem.mod != mod_REG) { 250 emit_1ub(p, 0x24); /* simplistic! */ 251 } 252 253 switch (regmem.mod) { 254 case mod_REG: 255 case mod_INDIRECT: 256 break; 257 case mod_DISP8: 258 emit_1b(p, (char) regmem.disp); 259 break; 260 case mod_DISP32: 261 emit_1i(p, regmem.disp); 262 break; 263 default: 264 assert(0); 265 break; 266 } 267 } 268 269 /* Emits the "/0".."/7" specialized versions of the modrm ("/r") bytes. 270 */ 271 static void emit_modrm_noreg( struct x86_function *p, 272 unsigned op, 273 struct x86_reg regmem ) 274 { 275 struct x86_reg dummy = x86_make_reg(file_REG32, op); 276 emit_modrm(p, dummy, regmem); 277 } 278 279 /* Many x86 instructions have two opcodes to cope with the situations 280 * where the destination is a register or memory reference 281 * respectively. This function selects the correct opcode based on 282 * the arguments presented. 283 */ 284 static void emit_op_modrm( struct x86_function *p, 285 unsigned char op_dst_is_reg, 286 unsigned char op_dst_is_mem, 287 struct x86_reg dst, 288 struct x86_reg src ) 289 { 290 switch (dst.mod) { 291 case mod_REG: 292 emit_1ub(p, op_dst_is_reg); 293 emit_modrm(p, dst, src); 294 break; 295 case mod_INDIRECT: 296 case mod_DISP32: 297 case mod_DISP8: 298 assert(src.mod == mod_REG); 299 emit_1ub(p, op_dst_is_mem); 300 emit_modrm(p, src, dst); 301 break; 302 default: 303 assert(0); 304 break; 305 } 306 } 307 308 309 310 311 312 313 314 /* Create and manipulate registers and regmem values: 315 */ 316 struct x86_reg x86_make_reg( enum x86_reg_file file, 317 enum x86_reg_name idx ) 318 { 319 struct x86_reg reg; 320 321 reg.file = file; 322 reg.idx = idx; 323 reg.mod = mod_REG; 324 reg.disp = 0; 325 326 return reg; 327 } 328 329 struct x86_reg x86_make_disp( struct x86_reg reg, 330 int disp ) 331 { 332 assert(reg.file == file_REG32); 333 334 if (reg.mod == mod_REG) 335 reg.disp = disp; 336 else 337 reg.disp += disp; 338 339 if (reg.disp == 0 && reg.idx != reg_BP) 340 reg.mod = mod_INDIRECT; 341 else if (reg.disp <= 127 && reg.disp >= -128) 342 reg.mod = mod_DISP8; 343 else 344 reg.mod = mod_DISP32; 345 346 return reg; 347 } 348 349 struct x86_reg x86_deref( struct x86_reg reg ) 350 { 351 return x86_make_disp(reg, 0); 352 } 353 354 struct x86_reg x86_get_base_reg( struct x86_reg reg ) 355 { 356 return x86_make_reg( reg.file, reg.idx ); 357 } 358 359 int x86_get_label( struct x86_function *p ) 360 { 361 return p->csr - p->store; 362 } 363 364 365 366 /*********************************************************************** 367 * x86 instructions 368 */ 369 370 371 void x64_rexw(struct x86_function *p) 372 { 373 if(x86_target(p) != X86_32) 374 emit_1ub(p, 0x48); 375 } 376 377 void x86_jcc( struct x86_function *p, 378 enum x86_cc cc, 379 int label ) 380 { 381 int offset = label - (x86_get_label(p) + 2); 382 DUMP_I(cc); 383 384 if (offset < 0) { 385 /*assert(p->csr - p->store > -offset);*/ 386 if (p->csr - p->store <= -offset) { 387 /* probably out of memory (using the error_overflow buffer) */ 388 return; 389 } 390 } 391 392 if (offset <= 127 && offset >= -128) { 393 emit_1ub(p, 0x70 + cc); 394 emit_1b(p, (char) offset); 395 } 396 else { 397 offset = label - (x86_get_label(p) + 6); 398 emit_2ub(p, 0x0f, 0x80 + cc); 399 emit_1i(p, offset); 400 } 401 } 402 403 /* Always use a 32bit offset for forward jumps: 404 */ 405 int x86_jcc_forward( struct x86_function *p, 406 enum x86_cc cc ) 407 { 408 DUMP_I(cc); 409 emit_2ub(p, 0x0f, 0x80 + cc); 410 emit_1i(p, 0); 411 return x86_get_label(p); 412 } 413 414 int x86_jmp_forward( struct x86_function *p) 415 { 416 DUMP(); 417 emit_1ub(p, 0xe9); 418 emit_1i(p, 0); 419 return x86_get_label(p); 420 } 421 422 int x86_call_forward( struct x86_function *p) 423 { 424 DUMP(); 425 426 emit_1ub(p, 0xe8); 427 emit_1i(p, 0); 428 return x86_get_label(p); 429 } 430 431 /* Fixup offset from forward jump: 432 */ 433 void x86_fixup_fwd_jump( struct x86_function *p, 434 int fixup ) 435 { 436 *(int *)(p->store + fixup - 4) = x86_get_label(p) - fixup; 437 } 438 439 void x86_jmp( struct x86_function *p, int label) 440 { 441 DUMP_I( label ); 442 emit_1ub(p, 0xe9); 443 emit_1i(p, label - x86_get_label(p) - 4); 444 } 445 446 void x86_call( struct x86_function *p, struct x86_reg reg) 447 { 448 DUMP_R( reg ); 449 emit_1ub(p, 0xff); 450 emit_modrm_noreg(p, 2, reg); 451 } 452 453 454 void x86_mov_reg_imm( struct x86_function *p, struct x86_reg dst, int imm ) 455 { 456 DUMP_RI( dst, imm ); 457 assert(dst.file == file_REG32); 458 assert(dst.mod == mod_REG); 459 emit_1ub(p, 0xb8 + dst.idx); 460 emit_1i(p, imm); 461 } 462 463 void x86_mov_imm( struct x86_function *p, struct x86_reg dst, int imm ) 464 { 465 DUMP_RI( dst, imm ); 466 if(dst.mod == mod_REG) 467 x86_mov_reg_imm(p, dst, imm); 468 else 469 { 470 emit_1ub(p, 0xc7); 471 emit_modrm_noreg(p, 0, dst); 472 emit_1i(p, imm); 473 } 474 } 475 476 void x86_mov16_imm( struct x86_function *p, struct x86_reg dst, uint16_t imm ) 477 { 478 DUMP_RI( dst, imm ); 479 emit_1ub(p, 0x66); 480 if(dst.mod == mod_REG) 481 { 482 emit_1ub(p, 0xb8 + dst.idx); 483 emit_2ub(p, imm & 0xff, imm >> 8); 484 } 485 else 486 { 487 emit_1ub(p, 0xc7); 488 emit_modrm_noreg(p, 0, dst); 489 emit_2ub(p, imm & 0xff, imm >> 8); 490 } 491 } 492 493 void x86_mov8_imm( struct x86_function *p, struct x86_reg dst, uint8_t imm ) 494 { 495 DUMP_RI( dst, imm ); 496 if(dst.mod == mod_REG) 497 { 498 emit_1ub(p, 0xb0 + dst.idx); 499 emit_1ub(p, imm); 500 } 501 else 502 { 503 emit_1ub(p, 0xc6); 504 emit_modrm_noreg(p, 0, dst); 505 emit_1ub(p, imm); 506 } 507 } 508 509 /** 510 * Immediate group 1 instructions. 511 */ 512 static INLINE void 513 x86_group1_imm( struct x86_function *p, 514 unsigned op, struct x86_reg dst, int imm ) 515 { 516 assert(dst.file == file_REG32); 517 assert(dst.mod == mod_REG); 518 if(-0x80 <= imm && imm < 0x80) { 519 emit_1ub(p, 0x83); 520 emit_modrm_noreg(p, op, dst); 521 emit_1b(p, (char)imm); 522 } 523 else { 524 emit_1ub(p, 0x81); 525 emit_modrm_noreg(p, op, dst); 526 emit_1i(p, imm); 527 } 528 } 529 530 void x86_add_imm( struct x86_function *p, struct x86_reg dst, int imm ) 531 { 532 DUMP_RI( dst, imm ); 533 x86_group1_imm(p, 0, dst, imm); 534 } 535 536 void x86_or_imm( struct x86_function *p, struct x86_reg dst, int imm ) 537 { 538 DUMP_RI( dst, imm ); 539 x86_group1_imm(p, 1, dst, imm); 540 } 541 542 void x86_and_imm( struct x86_function *p, struct x86_reg dst, int imm ) 543 { 544 DUMP_RI( dst, imm ); 545 x86_group1_imm(p, 4, dst, imm); 546 } 547 548 void x86_sub_imm( struct x86_function *p, struct x86_reg dst, int imm ) 549 { 550 DUMP_RI( dst, imm ); 551 x86_group1_imm(p, 5, dst, imm); 552 } 553 554 void x86_xor_imm( struct x86_function *p, struct x86_reg dst, int imm ) 555 { 556 DUMP_RI( dst, imm ); 557 x86_group1_imm(p, 6, dst, imm); 558 } 559 560 void x86_cmp_imm( struct x86_function *p, struct x86_reg dst, int imm ) 561 { 562 DUMP_RI( dst, imm ); 563 x86_group1_imm(p, 7, dst, imm); 564 } 565 566 567 void x86_push( struct x86_function *p, 568 struct x86_reg reg ) 569 { 570 DUMP_R( reg ); 571 if (reg.mod == mod_REG) 572 emit_1ub(p, 0x50 + reg.idx); 573 else 574 { 575 emit_1ub(p, 0xff); 576 emit_modrm_noreg(p, 6, reg); 577 } 578 579 580 p->stack_offset += sizeof(void*); 581 } 582 583 void x86_push_imm32( struct x86_function *p, 584 int imm32 ) 585 { 586 DUMP_I( imm32 ); 587 emit_1ub(p, 0x68); 588 emit_1i(p, imm32); 589 590 p->stack_offset += sizeof(void*); 591 } 592 593 594 void x86_pop( struct x86_function *p, 595 struct x86_reg reg ) 596 { 597 DUMP_R( reg ); 598 assert(reg.mod == mod_REG); 599 emit_1ub(p, 0x58 + reg.idx); 600 p->stack_offset -= sizeof(void*); 601 } 602 603 void x86_inc( struct x86_function *p, 604 struct x86_reg reg ) 605 { 606 DUMP_R( reg ); 607 if(x86_target(p) == X86_32 && reg.mod == mod_REG) 608 { 609 emit_1ub(p, 0x40 + reg.idx); 610 return; 611 } 612 emit_1ub(p, 0xff); 613 emit_modrm_noreg(p, 0, reg); 614 } 615 616 void x86_dec( struct x86_function *p, 617 struct x86_reg reg ) 618 { 619 DUMP_R( reg ); 620 if(x86_target(p) == X86_32 && reg.mod == mod_REG) 621 { 622 emit_1ub(p, 0x48 + reg.idx); 623 return; 624 } 625 emit_1ub(p, 0xff); 626 emit_modrm_noreg(p, 1, reg); 627 } 628 629 void x86_ret( struct x86_function *p ) 630 { 631 DUMP(); 632 assert(p->stack_offset == 0); 633 emit_1ub(p, 0xc3); 634 } 635 636 void x86_retw( struct x86_function *p, unsigned short imm ) 637 { 638 DUMP(); 639 emit_3ub(p, 0xc2, imm & 0xff, (imm >> 8) & 0xff); 640 } 641 642 void x86_sahf( struct x86_function *p ) 643 { 644 DUMP(); 645 emit_1ub(p, 0x9e); 646 } 647 648 void x86_mov( struct x86_function *p, 649 struct x86_reg dst, 650 struct x86_reg src ) 651 { 652 DUMP_RR( dst, src ); 653 /* special hack for reading arguments until we support x86-64 registers everywhere */ 654 if(src.mod == mod_REG && dst.mod == mod_REG && (src.idx >= 8 || dst.idx >= 8)) 655 { 656 uint8_t rex = 0x40; 657 if(dst.idx >= 8) 658 { 659 rex |= 4; 660 dst.idx -= 8; 661 } 662 if(src.idx >= 8) 663 { 664 rex |= 1; 665 src.idx -= 8; 666 } 667 emit_1ub(p, rex); 668 } 669 emit_op_modrm( p, 0x8b, 0x89, dst, src ); 670 } 671 672 void x86_mov16( struct x86_function *p, 673 struct x86_reg dst, 674 struct x86_reg src ) 675 { 676 DUMP_RR( dst, src ); 677 emit_1ub(p, 0x66); 678 emit_op_modrm( p, 0x8b, 0x89, dst, src ); 679 } 680 681 void x86_mov8( struct x86_function *p, 682 struct x86_reg dst, 683 struct x86_reg src ) 684 { 685 DUMP_RR( dst, src ); 686 emit_op_modrm( p, 0x8a, 0x88, dst, src ); 687 } 688 689 void x64_mov64( struct x86_function *p, 690 struct x86_reg dst, 691 struct x86_reg src ) 692 { 693 uint8_t rex = 0x48; 694 DUMP_RR( dst, src ); 695 assert(x86_target(p) != X86_32); 696 697 /* special hack for reading arguments until we support x86-64 registers everywhere */ 698 if(src.mod == mod_REG && dst.mod == mod_REG && (src.idx >= 8 || dst.idx >= 8)) 699 { 700 if(dst.idx >= 8) 701 { 702 rex |= 4; 703 dst.idx -= 8; 704 } 705 if(src.idx >= 8) 706 { 707 rex |= 1; 708 src.idx -= 8; 709 } 710 } 711 emit_1ub(p, rex); 712 emit_op_modrm( p, 0x8b, 0x89, dst, src ); 713 } 714 715 void x86_movzx8(struct x86_function *p, struct x86_reg dst, struct x86_reg src ) 716 { 717 DUMP_RR( dst, src ); 718 emit_2ub(p, 0x0f, 0xb6); 719 emit_modrm(p, dst, src); 720 } 721 722 void x86_movzx16(struct x86_function *p, struct x86_reg dst, struct x86_reg src ) 723 { 724 DUMP_RR( dst, src ); 725 emit_2ub(p, 0x0f, 0xb7); 726 emit_modrm(p, dst, src); 727 } 728 729 void x86_cmovcc( struct x86_function *p, 730 struct x86_reg dst, 731 struct x86_reg src, 732 enum x86_cc cc) 733 { 734 DUMP_RRI( dst, src, cc ); 735 emit_2ub( p, 0x0f, 0x40 + cc ); 736 emit_modrm( p, dst, src ); 737 } 738 739 void x86_xor( struct x86_function *p, 740 struct x86_reg dst, 741 struct x86_reg src ) 742 { 743 DUMP_RR( dst, src ); 744 emit_op_modrm( p, 0x33, 0x31, dst, src ); 745 } 746 747 void x86_cmp( struct x86_function *p, 748 struct x86_reg dst, 749 struct x86_reg src ) 750 { 751 DUMP_RR( dst, src ); 752 emit_op_modrm( p, 0x3b, 0x39, dst, src ); 753 } 754 755 void x86_lea( struct x86_function *p, 756 struct x86_reg dst, 757 struct x86_reg src ) 758 { 759 DUMP_RR( dst, src ); 760 emit_1ub(p, 0x8d); 761 emit_modrm( p, dst, src ); 762 } 763 764 void x86_test( struct x86_function *p, 765 struct x86_reg dst, 766 struct x86_reg src ) 767 { 768 DUMP_RR( dst, src ); 769 emit_1ub(p, 0x85); 770 emit_modrm( p, dst, src ); 771 } 772 773 void x86_add( struct x86_function *p, 774 struct x86_reg dst, 775 struct x86_reg src ) 776 { 777 DUMP_RR( dst, src ); 778 emit_op_modrm(p, 0x03, 0x01, dst, src ); 779 } 780 781 /* Calculate EAX * src, results in EDX:EAX. 782 */ 783 void x86_mul( struct x86_function *p, 784 struct x86_reg src ) 785 { 786 DUMP_R( src ); 787 emit_1ub(p, 0xf7); 788 emit_modrm_noreg(p, 4, src ); 789 } 790 791 792 void x86_imul( struct x86_function *p, 793 struct x86_reg dst, 794 struct x86_reg src ) 795 { 796 DUMP_RR( dst, src ); 797 emit_2ub(p, X86_TWOB, 0xAF); 798 emit_modrm(p, dst, src); 799 } 800 801 802 void x86_sub( struct x86_function *p, 803 struct x86_reg dst, 804 struct x86_reg src ) 805 { 806 DUMP_RR( dst, src ); 807 emit_op_modrm(p, 0x2b, 0x29, dst, src ); 808 } 809 810 void x86_or( struct x86_function *p, 811 struct x86_reg dst, 812 struct x86_reg src ) 813 { 814 DUMP_RR( dst, src ); 815 emit_op_modrm( p, 0x0b, 0x09, dst, src ); 816 } 817 818 void x86_and( struct x86_function *p, 819 struct x86_reg dst, 820 struct x86_reg src ) 821 { 822 DUMP_RR( dst, src ); 823 emit_op_modrm( p, 0x23, 0x21, dst, src ); 824 } 825 826 void x86_div( struct x86_function *p, 827 struct x86_reg src ) 828 { 829 assert(src.file == file_REG32 && src.mod == mod_REG); 830 emit_op_modrm(p, 0xf7, 0, x86_make_reg(file_REG32, 6), src); 831 } 832 833 void x86_bswap( struct x86_function *p, struct x86_reg reg ) 834 { 835 DUMP_R(reg); 836 assert(reg.file == file_REG32); 837 assert(reg.mod == mod_REG); 838 emit_2ub(p, 0x0f, 0xc8 + reg.idx); 839 } 840 841 void x86_shr_imm( struct x86_function *p, struct x86_reg reg, unsigned imm ) 842 { 843 DUMP_RI(reg, imm); 844 if(imm == 1) 845 { 846 emit_1ub(p, 0xd1); 847 emit_modrm_noreg(p, 5, reg); 848 } 849 else 850 { 851 emit_1ub(p, 0xc1); 852 emit_modrm_noreg(p, 5, reg); 853 emit_1ub(p, imm); 854 } 855 } 856 857 void x86_sar_imm( struct x86_function *p, struct x86_reg reg, unsigned imm ) 858 { 859 DUMP_RI(reg, imm); 860 if(imm == 1) 861 { 862 emit_1ub(p, 0xd1); 863 emit_modrm_noreg(p, 7, reg); 864 } 865 else 866 { 867 emit_1ub(p, 0xc1); 868 emit_modrm_noreg(p, 7, reg); 869 emit_1ub(p, imm); 870 } 871 } 872 873 void x86_shl_imm( struct x86_function *p, struct x86_reg reg, unsigned imm ) 874 { 875 DUMP_RI(reg, imm); 876 if(imm == 1) 877 { 878 emit_1ub(p, 0xd1); 879 emit_modrm_noreg(p, 4, reg); 880 } 881 else 882 { 883 emit_1ub(p, 0xc1); 884 emit_modrm_noreg(p, 4, reg); 885 emit_1ub(p, imm); 886 } 887 } 888 889 890 /*********************************************************************** 891 * SSE instructions 892 */ 893 894 void sse_prefetchnta( struct x86_function *p, struct x86_reg ptr) 895 { 896 DUMP_R( ptr ); 897 assert(ptr.mod != mod_REG); 898 emit_2ub(p, 0x0f, 0x18); 899 emit_modrm_noreg(p, 0, ptr); 900 } 901 902 void sse_prefetch0( struct x86_function *p, struct x86_reg ptr) 903 { 904 DUMP_R( ptr ); 905 assert(ptr.mod != mod_REG); 906 emit_2ub(p, 0x0f, 0x18); 907 emit_modrm_noreg(p, 1, ptr); 908 } 909 910 void sse_prefetch1( struct x86_function *p, struct x86_reg ptr) 911 { 912 DUMP_R( ptr ); 913 assert(ptr.mod != mod_REG); 914 emit_2ub(p, 0x0f, 0x18); 915 emit_modrm_noreg(p, 2, ptr); 916 } 917 918 void sse_movntps( struct x86_function *p, 919 struct x86_reg dst, 920 struct x86_reg src) 921 { 922 DUMP_RR( dst, src ); 923 924 assert(dst.mod != mod_REG); 925 assert(src.mod == mod_REG); 926 emit_2ub(p, 0x0f, 0x2b); 927 emit_modrm(p, src, dst); 928 } 929 930 931 932 933 void sse_movss( struct x86_function *p, 934 struct x86_reg dst, 935 struct x86_reg src ) 936 { 937 DUMP_RR( dst, src ); 938 emit_2ub(p, 0xF3, X86_TWOB); 939 emit_op_modrm( p, 0x10, 0x11, dst, src ); 940 } 941 942 void sse_movaps( struct x86_function *p, 943 struct x86_reg dst, 944 struct x86_reg src ) 945 { 946 DUMP_RR( dst, src ); 947 emit_1ub(p, X86_TWOB); 948 emit_op_modrm( p, 0x28, 0x29, dst, src ); 949 } 950 951 void sse_movups( struct x86_function *p, 952 struct x86_reg dst, 953 struct x86_reg src ) 954 { 955 DUMP_RR( dst, src ); 956 emit_1ub(p, X86_TWOB); 957 emit_op_modrm( p, 0x10, 0x11, dst, src ); 958 } 959 960 void sse_movhps( struct x86_function *p, 961 struct x86_reg dst, 962 struct x86_reg src ) 963 { 964 DUMP_RR( dst, src ); 965 assert(dst.mod != mod_REG || src.mod != mod_REG); 966 emit_1ub(p, X86_TWOB); 967 emit_op_modrm( p, 0x16, 0x17, dst, src ); /* cf movlhps */ 968 } 969 970 void sse_movlps( struct x86_function *p, 971 struct x86_reg dst, 972 struct x86_reg src ) 973 { 974 DUMP_RR( dst, src ); 975 assert(dst.mod != mod_REG || src.mod != mod_REG); 976 emit_1ub(p, X86_TWOB); 977 emit_op_modrm( p, 0x12, 0x13, dst, src ); /* cf movhlps */ 978 } 979 980 void sse_maxps( struct x86_function *p, 981 struct x86_reg dst, 982 struct x86_reg src ) 983 { 984 DUMP_RR( dst, src ); 985 emit_2ub(p, X86_TWOB, 0x5F); 986 emit_modrm( p, dst, src ); 987 } 988 989 void sse_maxss( struct x86_function *p, 990 struct x86_reg dst, 991 struct x86_reg src ) 992 { 993 DUMP_RR( dst, src ); 994 emit_3ub(p, 0xF3, X86_TWOB, 0x5F); 995 emit_modrm( p, dst, src ); 996 } 997 998 void sse_divss( struct x86_function *p, 999 struct x86_reg dst, 1000 struct x86_reg src ) 1001 { 1002 DUMP_RR( dst, src ); 1003 emit_3ub(p, 0xF3, X86_TWOB, 0x5E); 1004 emit_modrm( p, dst, src ); 1005 } 1006 1007 void sse_minps( struct x86_function *p, 1008 struct x86_reg dst, 1009 struct x86_reg src ) 1010 { 1011 DUMP_RR( dst, src ); 1012 emit_2ub(p, X86_TWOB, 0x5D); 1013 emit_modrm( p, dst, src ); 1014 } 1015 1016 void sse_subps( struct x86_function *p, 1017 struct x86_reg dst, 1018 struct x86_reg src ) 1019 { 1020 DUMP_RR( dst, src ); 1021 emit_2ub(p, X86_TWOB, 0x5C); 1022 emit_modrm( p, dst, src ); 1023 } 1024 1025 void sse_mulps( struct x86_function *p, 1026 struct x86_reg dst, 1027 struct x86_reg src ) 1028 { 1029 DUMP_RR( dst, src ); 1030 emit_2ub(p, X86_TWOB, 0x59); 1031 emit_modrm( p, dst, src ); 1032 } 1033 1034 void sse_mulss( struct x86_function *p, 1035 struct x86_reg dst, 1036 struct x86_reg src ) 1037 { 1038 DUMP_RR( dst, src ); 1039 emit_3ub(p, 0xF3, X86_TWOB, 0x59); 1040 emit_modrm( p, dst, src ); 1041 } 1042 1043 void sse_addps( struct x86_function *p, 1044 struct x86_reg dst, 1045 struct x86_reg src ) 1046 { 1047 DUMP_RR( dst, src ); 1048 emit_2ub(p, X86_TWOB, 0x58); 1049 emit_modrm( p, dst, src ); 1050 } 1051 1052 void sse_addss( struct x86_function *p, 1053 struct x86_reg dst, 1054 struct x86_reg src ) 1055 { 1056 DUMP_RR( dst, src ); 1057 emit_3ub(p, 0xF3, X86_TWOB, 0x58); 1058 emit_modrm( p, dst, src ); 1059 } 1060 1061 void sse_andnps( struct x86_function *p, 1062 struct x86_reg dst, 1063 struct x86_reg src ) 1064 { 1065 DUMP_RR( dst, src ); 1066 emit_2ub(p, X86_TWOB, 0x55); 1067 emit_modrm( p, dst, src ); 1068 } 1069 1070 void sse_andps( struct x86_function *p, 1071 struct x86_reg dst, 1072 struct x86_reg src ) 1073 { 1074 DUMP_RR( dst, src ); 1075 emit_2ub(p, X86_TWOB, 0x54); 1076 emit_modrm( p, dst, src ); 1077 } 1078 1079 void sse_rsqrtps( struct x86_function *p, 1080 struct x86_reg dst, 1081 struct x86_reg src ) 1082 { 1083 DUMP_RR( dst, src ); 1084 emit_2ub(p, X86_TWOB, 0x52); 1085 emit_modrm( p, dst, src ); 1086 } 1087 1088 void sse_rsqrtss( struct x86_function *p, 1089 struct x86_reg dst, 1090 struct x86_reg src ) 1091 { 1092 DUMP_RR( dst, src ); 1093 emit_3ub(p, 0xF3, X86_TWOB, 0x52); 1094 emit_modrm( p, dst, src ); 1095 1096 } 1097 1098 void sse_movhlps( struct x86_function *p, 1099 struct x86_reg dst, 1100 struct x86_reg src ) 1101 { 1102 DUMP_RR( dst, src ); 1103 assert(dst.mod == mod_REG && src.mod == mod_REG); 1104 emit_2ub(p, X86_TWOB, 0x12); 1105 emit_modrm( p, dst, src ); 1106 } 1107 1108 void sse_movlhps( struct x86_function *p, 1109 struct x86_reg dst, 1110 struct x86_reg src ) 1111 { 1112 DUMP_RR( dst, src ); 1113 assert(dst.mod == mod_REG && src.mod == mod_REG); 1114 emit_2ub(p, X86_TWOB, 0x16); 1115 emit_modrm( p, dst, src ); 1116 } 1117 1118 void sse_orps( struct x86_function *p, 1119 struct x86_reg dst, 1120 struct x86_reg src ) 1121 { 1122 DUMP_RR( dst, src ); 1123 emit_2ub(p, X86_TWOB, 0x56); 1124 emit_modrm( p, dst, src ); 1125 } 1126 1127 void sse_xorps( struct x86_function *p, 1128 struct x86_reg dst, 1129 struct x86_reg src ) 1130 { 1131 DUMP_RR( dst, src ); 1132 emit_2ub(p, X86_TWOB, 0x57); 1133 emit_modrm( p, dst, src ); 1134 } 1135 1136 void sse_cvtps2pi( struct x86_function *p, 1137 struct x86_reg dst, 1138 struct x86_reg src ) 1139 { 1140 DUMP_RR( dst, src ); 1141 assert(dst.file == file_MMX && 1142 (src.file == file_XMM || src.mod != mod_REG)); 1143 1144 p->need_emms = 1; 1145 1146 emit_2ub(p, X86_TWOB, 0x2d); 1147 emit_modrm( p, dst, src ); 1148 } 1149 1150 void sse2_cvtdq2ps( struct x86_function *p, 1151 struct x86_reg dst, 1152 struct x86_reg src ) 1153 { 1154 DUMP_RR( dst, src ); 1155 emit_2ub(p, X86_TWOB, 0x5b); 1156 emit_modrm( p, dst, src ); 1157 } 1158 1159 1160 /* Shufps can also be used to implement a reduced swizzle when dest == 1161 * arg0. 1162 */ 1163 void sse_shufps( struct x86_function *p, 1164 struct x86_reg dst, 1165 struct x86_reg src, 1166 unsigned char shuf) 1167 { 1168 DUMP_RRI( dst, src, shuf ); 1169 emit_2ub(p, X86_TWOB, 0xC6); 1170 emit_modrm(p, dst, src); 1171 emit_1ub(p, shuf); 1172 } 1173 1174 void sse_unpckhps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) 1175 { 1176 DUMP_RR( dst, src ); 1177 emit_2ub( p, X86_TWOB, 0x15 ); 1178 emit_modrm( p, dst, src ); 1179 } 1180 1181 void sse_unpcklps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) 1182 { 1183 DUMP_RR( dst, src ); 1184 emit_2ub( p, X86_TWOB, 0x14 ); 1185 emit_modrm( p, dst, src ); 1186 } 1187 1188 void sse_cmpps( struct x86_function *p, 1189 struct x86_reg dst, 1190 struct x86_reg src, 1191 enum sse_cc cc) 1192 { 1193 DUMP_RRI( dst, src, cc ); 1194 emit_2ub(p, X86_TWOB, 0xC2); 1195 emit_modrm(p, dst, src); 1196 emit_1ub(p, cc); 1197 } 1198 1199 void sse_pmovmskb( struct x86_function *p, 1200 struct x86_reg dst, 1201 struct x86_reg src) 1202 { 1203 DUMP_RR( dst, src ); 1204 emit_3ub(p, 0x66, X86_TWOB, 0xD7); 1205 emit_modrm(p, dst, src); 1206 } 1207 1208 void sse_movmskps( struct x86_function *p, 1209 struct x86_reg dst, 1210 struct x86_reg src) 1211 { 1212 DUMP_RR( dst, src ); 1213 emit_2ub(p, X86_TWOB, 0x50); 1214 emit_modrm(p, dst, src); 1215 } 1216 1217 /*********************************************************************** 1218 * SSE2 instructions 1219 */ 1220 1221 void sse2_movd( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) 1222 { 1223 DUMP_RR(dst, src); 1224 emit_2ub(p, 0x66, 0x0f); 1225 if(dst.mod == mod_REG && dst.file == file_REG32) 1226 { 1227 emit_1ub(p, 0x7e); 1228 emit_modrm(p, src, dst); 1229 } 1230 else 1231 { 1232 emit_op_modrm(p, 0x6e, 0x7e, dst, src); 1233 } 1234 } 1235 1236 void sse2_movq( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) 1237 { 1238 DUMP_RR(dst, src); 1239 switch (dst.mod) { 1240 case mod_REG: 1241 emit_3ub(p, 0xf3, 0x0f, 0x7e); 1242 emit_modrm(p, dst, src); 1243 break; 1244 case mod_INDIRECT: 1245 case mod_DISP32: 1246 case mod_DISP8: 1247 assert(src.mod == mod_REG); 1248 emit_3ub(p, 0x66, 0x0f, 0xd6); 1249 emit_modrm(p, src, dst); 1250 break; 1251 default: 1252 assert(0); 1253 break; 1254 } 1255 } 1256 1257 void sse2_movdqu( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) 1258 { 1259 DUMP_RR(dst, src); 1260 emit_2ub(p, 0xf3, 0x0f); 1261 emit_op_modrm(p, 0x6f, 0x7f, dst, src); 1262 } 1263 1264 void sse2_movdqa( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) 1265 { 1266 DUMP_RR(dst, src); 1267 emit_2ub(p, 0x66, 0x0f); 1268 emit_op_modrm(p, 0x6f, 0x7f, dst, src); 1269 } 1270 1271 void sse2_movsd( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) 1272 { 1273 DUMP_RR(dst, src); 1274 emit_2ub(p, 0xf2, 0x0f); 1275 emit_op_modrm(p, 0x10, 0x11, dst, src); 1276 } 1277 1278 void sse2_movupd( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) 1279 { 1280 DUMP_RR(dst, src); 1281 emit_2ub(p, 0x66, 0x0f); 1282 emit_op_modrm(p, 0x10, 0x11, dst, src); 1283 } 1284 1285 void sse2_movapd( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) 1286 { 1287 DUMP_RR(dst, src); 1288 emit_2ub(p, 0x66, 0x0f); 1289 emit_op_modrm(p, 0x28, 0x29, dst, src); 1290 } 1291 1292 /** 1293 * Perform a reduced swizzle: 1294 */ 1295 void sse2_pshufd( struct x86_function *p, 1296 struct x86_reg dst, 1297 struct x86_reg src, 1298 unsigned char shuf) 1299 { 1300 DUMP_RRI( dst, src, shuf ); 1301 emit_3ub(p, 0x66, X86_TWOB, 0x70); 1302 emit_modrm(p, dst, src); 1303 emit_1ub(p, shuf); 1304 } 1305 1306 void sse2_pshuflw( struct x86_function *p, 1307 struct x86_reg dst, 1308 struct x86_reg src, 1309 unsigned char shuf) 1310 { 1311 DUMP_RRI( dst, src, shuf ); 1312 emit_3ub(p, 0xf2, X86_TWOB, 0x70); 1313 emit_modrm(p, dst, src); 1314 emit_1ub(p, shuf); 1315 } 1316 1317 void sse2_pshufhw( struct x86_function *p, 1318 struct x86_reg dst, 1319 struct x86_reg src, 1320 unsigned char shuf) 1321 { 1322 DUMP_RRI( dst, src, shuf ); 1323 emit_3ub(p, 0xf3, X86_TWOB, 0x70); 1324 emit_modrm(p, dst, src); 1325 emit_1ub(p, shuf); 1326 } 1327 1328 void sse2_cvttps2dq( struct x86_function *p, 1329 struct x86_reg dst, 1330 struct x86_reg src ) 1331 { 1332 DUMP_RR( dst, src ); 1333 emit_3ub( p, 0xF3, X86_TWOB, 0x5B ); 1334 emit_modrm( p, dst, src ); 1335 } 1336 1337 void sse2_cvtps2dq( struct x86_function *p, 1338 struct x86_reg dst, 1339 struct x86_reg src ) 1340 { 1341 DUMP_RR( dst, src ); 1342 emit_3ub(p, 0x66, X86_TWOB, 0x5B); 1343 emit_modrm( p, dst, src ); 1344 } 1345 1346 void sse2_cvtsd2ss( struct x86_function *p, 1347 struct x86_reg dst, 1348 struct x86_reg src ) 1349 { 1350 DUMP_RR( dst, src ); 1351 emit_3ub(p, 0xf2, 0x0f, 0x5a); 1352 emit_modrm( p, dst, src ); 1353 } 1354 1355 void sse2_cvtpd2ps( struct x86_function *p, 1356 struct x86_reg dst, 1357 struct x86_reg src ) 1358 { 1359 DUMP_RR( dst, src ); 1360 emit_3ub(p, 0x66, 0x0f, 0x5a); 1361 emit_modrm( p, dst, src ); 1362 } 1363 1364 void sse2_packssdw( struct x86_function *p, 1365 struct x86_reg dst, 1366 struct x86_reg src ) 1367 { 1368 DUMP_RR( dst, src ); 1369 emit_3ub(p, 0x66, X86_TWOB, 0x6B); 1370 emit_modrm( p, dst, src ); 1371 } 1372 1373 void sse2_packsswb( struct x86_function *p, 1374 struct x86_reg dst, 1375 struct x86_reg src ) 1376 { 1377 DUMP_RR( dst, src ); 1378 emit_3ub(p, 0x66, X86_TWOB, 0x63); 1379 emit_modrm( p, dst, src ); 1380 } 1381 1382 void sse2_packuswb( struct x86_function *p, 1383 struct x86_reg dst, 1384 struct x86_reg src ) 1385 { 1386 DUMP_RR( dst, src ); 1387 emit_3ub(p, 0x66, X86_TWOB, 0x67); 1388 emit_modrm( p, dst, src ); 1389 } 1390 1391 void sse2_punpcklbw( struct x86_function *p, 1392 struct x86_reg dst, 1393 struct x86_reg src ) 1394 { 1395 DUMP_RR( dst, src ); 1396 emit_3ub(p, 0x66, X86_TWOB, 0x60); 1397 emit_modrm( p, dst, src ); 1398 } 1399 1400 void sse2_punpcklwd( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) 1401 { 1402 DUMP_RR( dst, src ); 1403 emit_3ub(p, 0x66, 0x0f, 0x61); 1404 emit_modrm( p, dst, src ); 1405 } 1406 1407 void sse2_punpckldq( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) 1408 { 1409 DUMP_RR( dst, src ); 1410 emit_3ub(p, 0x66, 0x0f, 0x62); 1411 emit_modrm( p, dst, src ); 1412 } 1413 1414 void sse2_punpcklqdq( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) 1415 { 1416 DUMP_RR( dst, src ); 1417 emit_3ub(p, 0x66, 0x0f, 0x6c); 1418 emit_modrm( p, dst, src ); 1419 } 1420 1421 void sse2_psllw_imm( struct x86_function *p, struct x86_reg dst, unsigned imm ) 1422 { 1423 DUMP_RI(dst, imm); 1424 emit_3ub(p, 0x66, 0x0f, 0x71); 1425 emit_modrm_noreg(p, 6, dst); 1426 emit_1ub(p, imm); 1427 } 1428 1429 void sse2_pslld_imm( struct x86_function *p, struct x86_reg dst, unsigned imm ) 1430 { 1431 DUMP_RI(dst, imm); 1432 emit_3ub(p, 0x66, 0x0f, 0x72); 1433 emit_modrm_noreg(p, 6, dst); 1434 emit_1ub(p, imm); 1435 } 1436 1437 void sse2_psllq_imm( struct x86_function *p, struct x86_reg dst, unsigned imm ) 1438 { 1439 DUMP_RI(dst, imm); 1440 emit_3ub(p, 0x66, 0x0f, 0x73); 1441 emit_modrm_noreg(p, 6, dst); 1442 emit_1ub(p, imm); 1443 } 1444 1445 void sse2_psrlw_imm( struct x86_function *p, struct x86_reg dst, unsigned imm ) 1446 { 1447 DUMP_RI(dst, imm); 1448 emit_3ub(p, 0x66, 0x0f, 0x71); 1449 emit_modrm_noreg(p, 2, dst); 1450 emit_1ub(p, imm); 1451 } 1452 1453 void sse2_psrld_imm( struct x86_function *p, struct x86_reg dst, unsigned imm ) 1454 { 1455 DUMP_RI(dst, imm); 1456 emit_3ub(p, 0x66, 0x0f, 0x72); 1457 emit_modrm_noreg(p, 2, dst); 1458 emit_1ub(p, imm); 1459 } 1460 1461 void sse2_psrlq_imm( struct x86_function *p, struct x86_reg dst, unsigned imm ) 1462 { 1463 DUMP_RI(dst, imm); 1464 emit_3ub(p, 0x66, 0x0f, 0x73); 1465 emit_modrm_noreg(p, 2, dst); 1466 emit_1ub(p, imm); 1467 } 1468 1469 void sse2_psraw_imm( struct x86_function *p, struct x86_reg dst, unsigned imm ) 1470 { 1471 DUMP_RI(dst, imm); 1472 emit_3ub(p, 0x66, 0x0f, 0x71); 1473 emit_modrm_noreg(p, 4, dst); 1474 emit_1ub(p, imm); 1475 } 1476 1477 void sse2_psrad_imm( struct x86_function *p, struct x86_reg dst, unsigned imm ) 1478 { 1479 DUMP_RI(dst, imm); 1480 emit_3ub(p, 0x66, 0x0f, 0x72); 1481 emit_modrm_noreg(p, 4, dst); 1482 emit_1ub(p, imm); 1483 } 1484 1485 void sse2_por( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) 1486 { 1487 DUMP_RR(dst, src); 1488 emit_3ub(p, 0x66, 0x0f, 0xeb); 1489 emit_modrm(p, dst, src); 1490 } 1491 1492 void sse2_rcpps( struct x86_function *p, 1493 struct x86_reg dst, 1494 struct x86_reg src ) 1495 { 1496 DUMP_RR( dst, src ); 1497 emit_2ub(p, X86_TWOB, 0x53); 1498 emit_modrm( p, dst, src ); 1499 } 1500 1501 void sse2_rcpss( struct x86_function *p, 1502 struct x86_reg dst, 1503 struct x86_reg src ) 1504 { 1505 DUMP_RR( dst, src ); 1506 emit_3ub(p, 0xF3, X86_TWOB, 0x53); 1507 emit_modrm( p, dst, src ); 1508 } 1509 1510 /*********************************************************************** 1511 * x87 instructions 1512 */ 1513 static void note_x87_pop( struct x86_function *p ) 1514 { 1515 p->x87_stack--; 1516 assert(p->x87_stack >= 0); 1517 } 1518 1519 static void note_x87_push( struct x86_function *p ) 1520 { 1521 p->x87_stack++; 1522 assert(p->x87_stack <= 7); 1523 } 1524 1525 void x87_assert_stack_empty( struct x86_function *p ) 1526 { 1527 assert (p->x87_stack == 0); 1528 } 1529 1530 1531 void x87_fist( struct x86_function *p, struct x86_reg dst ) 1532 { 1533 DUMP_R( dst ); 1534 emit_1ub(p, 0xdb); 1535 emit_modrm_noreg(p, 2, dst); 1536 } 1537 1538 void x87_fistp( struct x86_function *p, struct x86_reg dst ) 1539 { 1540 DUMP_R( dst ); 1541 emit_1ub(p, 0xdb); 1542 emit_modrm_noreg(p, 3, dst); 1543 note_x87_pop(p); 1544 } 1545 1546 void x87_fild( struct x86_function *p, struct x86_reg arg ) 1547 { 1548 DUMP_R( arg ); 1549 emit_1ub(p, 0xdf); 1550 emit_modrm_noreg(p, 0, arg); 1551 note_x87_push(p); 1552 } 1553 1554 void x87_fldz( struct x86_function *p ) 1555 { 1556 DUMP(); 1557 emit_2ub(p, 0xd9, 0xee); 1558 note_x87_push(p); 1559 } 1560 1561 1562 void x87_fldcw( struct x86_function *p, struct x86_reg arg ) 1563 { 1564 DUMP_R( arg ); 1565 assert(arg.file == file_REG32); 1566 assert(arg.mod != mod_REG); 1567 emit_1ub(p, 0xd9); 1568 emit_modrm_noreg(p, 5, arg); 1569 } 1570 1571 void x87_fld1( struct x86_function *p ) 1572 { 1573 DUMP(); 1574 emit_2ub(p, 0xd9, 0xe8); 1575 note_x87_push(p); 1576 } 1577 1578 void x87_fldl2e( struct x86_function *p ) 1579 { 1580 DUMP(); 1581 emit_2ub(p, 0xd9, 0xea); 1582 note_x87_push(p); 1583 } 1584 1585 void x87_fldln2( struct x86_function *p ) 1586 { 1587 DUMP(); 1588 emit_2ub(p, 0xd9, 0xed); 1589 note_x87_push(p); 1590 } 1591 1592 void x87_fwait( struct x86_function *p ) 1593 { 1594 DUMP(); 1595 emit_1ub(p, 0x9b); 1596 } 1597 1598 void x87_fnclex( struct x86_function *p ) 1599 { 1600 DUMP(); 1601 emit_2ub(p, 0xdb, 0xe2); 1602 } 1603 1604 void x87_fclex( struct x86_function *p ) 1605 { 1606 x87_fwait(p); 1607 x87_fnclex(p); 1608 } 1609 1610 void x87_fcmovb( struct x86_function *p, struct x86_reg arg ) 1611 { 1612 DUMP_R( arg ); 1613 assert(arg.file == file_x87); 1614 emit_2ub(p, 0xda, 0xc0+arg.idx); 1615 } 1616 1617 void x87_fcmove( struct x86_function *p, struct x86_reg arg ) 1618 { 1619 DUMP_R( arg ); 1620 assert(arg.file == file_x87); 1621 emit_2ub(p, 0xda, 0xc8+arg.idx); 1622 } 1623 1624 void x87_fcmovbe( struct x86_function *p, struct x86_reg arg ) 1625 { 1626 DUMP_R( arg ); 1627 assert(arg.file == file_x87); 1628 emit_2ub(p, 0xda, 0xd0+arg.idx); 1629 } 1630 1631 void x87_fcmovnb( struct x86_function *p, struct x86_reg arg ) 1632 { 1633 DUMP_R( arg ); 1634 assert(arg.file == file_x87); 1635 emit_2ub(p, 0xdb, 0xc0+arg.idx); 1636 } 1637 1638 void x87_fcmovne( struct x86_function *p, struct x86_reg arg ) 1639 { 1640 DUMP_R( arg ); 1641 assert(arg.file == file_x87); 1642 emit_2ub(p, 0xdb, 0xc8+arg.idx); 1643 } 1644 1645 void x87_fcmovnbe( struct x86_function *p, struct x86_reg arg ) 1646 { 1647 DUMP_R( arg ); 1648 assert(arg.file == file_x87); 1649 emit_2ub(p, 0xdb, 0xd0+arg.idx); 1650 } 1651 1652 1653 1654 static void x87_arith_op( struct x86_function *p, struct x86_reg dst, struct x86_reg arg, 1655 unsigned char dst0ub0, 1656 unsigned char dst0ub1, 1657 unsigned char arg0ub0, 1658 unsigned char arg0ub1, 1659 unsigned char argmem_noreg) 1660 { 1661 assert(dst.file == file_x87); 1662 1663 if (arg.file == file_x87) { 1664 if (dst.idx == 0) 1665 emit_2ub(p, dst0ub0, dst0ub1+arg.idx); 1666 else if (arg.idx == 0) 1667 emit_2ub(p, arg0ub0, arg0ub1+arg.idx); 1668 else 1669 assert(0); 1670 } 1671 else if (dst.idx == 0) { 1672 assert(arg.file == file_REG32); 1673 emit_1ub(p, 0xd8); 1674 emit_modrm_noreg(p, argmem_noreg, arg); 1675 } 1676 else 1677 assert(0); 1678 } 1679 1680 void x87_fmul( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) 1681 { 1682 DUMP_RR( dst, src ); 1683 x87_arith_op(p, dst, src, 1684 0xd8, 0xc8, 1685 0xdc, 0xc8, 1686 4); 1687 } 1688 1689 void x87_fsub( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) 1690 { 1691 DUMP_RR( dst, src ); 1692 x87_arith_op(p, dst, src, 1693 0xd8, 0xe0, 1694 0xdc, 0xe8, 1695 4); 1696 } 1697 1698 void x87_fsubr( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) 1699 { 1700 DUMP_RR( dst, src ); 1701 x87_arith_op(p, dst, src, 1702 0xd8, 0xe8, 1703 0xdc, 0xe0, 1704 5); 1705 } 1706 1707 void x87_fadd( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) 1708 { 1709 DUMP_RR( dst, src ); 1710 x87_arith_op(p, dst, src, 1711 0xd8, 0xc0, 1712 0xdc, 0xc0, 1713 0); 1714 } 1715 1716 void x87_fdiv( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) 1717 { 1718 DUMP_RR( dst, src ); 1719 x87_arith_op(p, dst, src, 1720 0xd8, 0xf0, 1721 0xdc, 0xf8, 1722 6); 1723 } 1724 1725 void x87_fdivr( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) 1726 { 1727 DUMP_RR( dst, src ); 1728 x87_arith_op(p, dst, src, 1729 0xd8, 0xf8, 1730 0xdc, 0xf0, 1731 7); 1732 } 1733 1734 void x87_fmulp( struct x86_function *p, struct x86_reg dst ) 1735 { 1736 DUMP_R( dst ); 1737 assert(dst.file == file_x87); 1738 assert(dst.idx >= 1); 1739 emit_2ub(p, 0xde, 0xc8+dst.idx); 1740 note_x87_pop(p); 1741 } 1742 1743 void x87_fsubp( struct x86_function *p, struct x86_reg dst ) 1744 { 1745 DUMP_R( dst ); 1746 assert(dst.file == file_x87); 1747 assert(dst.idx >= 1); 1748 emit_2ub(p, 0xde, 0xe8+dst.idx); 1749 note_x87_pop(p); 1750 } 1751 1752 void x87_fsubrp( struct x86_function *p, struct x86_reg dst ) 1753 { 1754 DUMP_R( dst ); 1755 assert(dst.file == file_x87); 1756 assert(dst.idx >= 1); 1757 emit_2ub(p, 0xde, 0xe0+dst.idx); 1758 note_x87_pop(p); 1759 } 1760 1761 void x87_faddp( struct x86_function *p, struct x86_reg dst ) 1762 { 1763 DUMP_R( dst ); 1764 assert(dst.file == file_x87); 1765 assert(dst.idx >= 1); 1766 emit_2ub(p, 0xde, 0xc0+dst.idx); 1767 note_x87_pop(p); 1768 } 1769 1770 void x87_fdivp( struct x86_function *p, struct x86_reg dst ) 1771 { 1772 DUMP_R( dst ); 1773 assert(dst.file == file_x87); 1774 assert(dst.idx >= 1); 1775 emit_2ub(p, 0xde, 0xf8+dst.idx); 1776 note_x87_pop(p); 1777 } 1778 1779 void x87_fdivrp( struct x86_function *p, struct x86_reg dst ) 1780 { 1781 DUMP_R( dst ); 1782 assert(dst.file == file_x87); 1783 assert(dst.idx >= 1); 1784 emit_2ub(p, 0xde, 0xf0+dst.idx); 1785 note_x87_pop(p); 1786 } 1787 1788 void x87_ftst( struct x86_function *p ) 1789 { 1790 DUMP(); 1791 emit_2ub(p, 0xd9, 0xe4); 1792 } 1793 1794 void x87_fucom( struct x86_function *p, struct x86_reg arg ) 1795 { 1796 DUMP_R( arg ); 1797 assert(arg.file == file_x87); 1798 emit_2ub(p, 0xdd, 0xe0+arg.idx); 1799 } 1800 1801 void x87_fucomp( struct x86_function *p, struct x86_reg arg ) 1802 { 1803 DUMP_R( arg ); 1804 assert(arg.file == file_x87); 1805 emit_2ub(p, 0xdd, 0xe8+arg.idx); 1806 note_x87_pop(p); 1807 } 1808 1809 void x87_fucompp( struct x86_function *p ) 1810 { 1811 DUMP(); 1812 emit_2ub(p, 0xda, 0xe9); 1813 note_x87_pop(p); /* pop twice */ 1814 note_x87_pop(p); /* pop twice */ 1815 } 1816 1817 void x87_fxch( struct x86_function *p, struct x86_reg arg ) 1818 { 1819 DUMP_R( arg ); 1820 assert(arg.file == file_x87); 1821 emit_2ub(p, 0xd9, 0xc8+arg.idx); 1822 } 1823 1824 void x87_fabs( struct x86_function *p ) 1825 { 1826 DUMP(); 1827 emit_2ub(p, 0xd9, 0xe1); 1828 } 1829 1830 void x87_fchs( struct x86_function *p ) 1831 { 1832 DUMP(); 1833 emit_2ub(p, 0xd9, 0xe0); 1834 } 1835 1836 void x87_fcos( struct x86_function *p ) 1837 { 1838 DUMP(); 1839 emit_2ub(p, 0xd9, 0xff); 1840 } 1841 1842 1843 void x87_fprndint( struct x86_function *p ) 1844 { 1845 DUMP(); 1846 emit_2ub(p, 0xd9, 0xfc); 1847 } 1848 1849 void x87_fscale( struct x86_function *p ) 1850 { 1851 DUMP(); 1852 emit_2ub(p, 0xd9, 0xfd); 1853 } 1854 1855 void x87_fsin( struct x86_function *p ) 1856 { 1857 DUMP(); 1858 emit_2ub(p, 0xd9, 0xfe); 1859 } 1860 1861 void x87_fsincos( struct x86_function *p ) 1862 { 1863 DUMP(); 1864 emit_2ub(p, 0xd9, 0xfb); 1865 } 1866 1867 void x87_fsqrt( struct x86_function *p ) 1868 { 1869 DUMP(); 1870 emit_2ub(p, 0xd9, 0xfa); 1871 } 1872 1873 void x87_fxtract( struct x86_function *p ) 1874 { 1875 DUMP(); 1876 emit_2ub(p, 0xd9, 0xf4); 1877 } 1878 1879 /* st0 = (2^st0)-1 1880 * 1881 * Restrictions: -1.0 <= st0 <= 1.0 1882 */ 1883 void x87_f2xm1( struct x86_function *p ) 1884 { 1885 DUMP(); 1886 emit_2ub(p, 0xd9, 0xf0); 1887 } 1888 1889 /* st1 = st1 * log2(st0); 1890 * pop_stack; 1891 */ 1892 void x87_fyl2x( struct x86_function *p ) 1893 { 1894 DUMP(); 1895 emit_2ub(p, 0xd9, 0xf1); 1896 note_x87_pop(p); 1897 } 1898 1899 /* st1 = st1 * log2(st0 + 1.0); 1900 * pop_stack; 1901 * 1902 * A fast operation, with restrictions: -.29 < st0 < .29 1903 */ 1904 void x87_fyl2xp1( struct x86_function *p ) 1905 { 1906 DUMP(); 1907 emit_2ub(p, 0xd9, 0xf9); 1908 note_x87_pop(p); 1909 } 1910 1911 1912 void x87_fld( struct x86_function *p, struct x86_reg arg ) 1913 { 1914 DUMP_R( arg ); 1915 if (arg.file == file_x87) 1916 emit_2ub(p, 0xd9, 0xc0 + arg.idx); 1917 else { 1918 emit_1ub(p, 0xd9); 1919 emit_modrm_noreg(p, 0, arg); 1920 } 1921 note_x87_push(p); 1922 } 1923 1924 void x87_fst( struct x86_function *p, struct x86_reg dst ) 1925 { 1926 DUMP_R( dst ); 1927 if (dst.file == file_x87) 1928 emit_2ub(p, 0xdd, 0xd0 + dst.idx); 1929 else { 1930 emit_1ub(p, 0xd9); 1931 emit_modrm_noreg(p, 2, dst); 1932 } 1933 } 1934 1935 void x87_fstp( struct x86_function *p, struct x86_reg dst ) 1936 { 1937 DUMP_R( dst ); 1938 if (dst.file == file_x87) 1939 emit_2ub(p, 0xdd, 0xd8 + dst.idx); 1940 else { 1941 emit_1ub(p, 0xd9); 1942 emit_modrm_noreg(p, 3, dst); 1943 } 1944 note_x87_pop(p); 1945 } 1946 1947 void x87_fpop( struct x86_function *p ) 1948 { 1949 x87_fstp( p, x86_make_reg( file_x87, 0 )); 1950 } 1951 1952 1953 void x87_fcom( struct x86_function *p, struct x86_reg dst ) 1954 { 1955 DUMP_R( dst ); 1956 if (dst.file == file_x87) 1957 emit_2ub(p, 0xd8, 0xd0 + dst.idx); 1958 else { 1959 emit_1ub(p, 0xd8); 1960 emit_modrm_noreg(p, 2, dst); 1961 } 1962 } 1963 1964 1965 void x87_fcomp( struct x86_function *p, struct x86_reg dst ) 1966 { 1967 DUMP_R( dst ); 1968 if (dst.file == file_x87) 1969 emit_2ub(p, 0xd8, 0xd8 + dst.idx); 1970 else { 1971 emit_1ub(p, 0xd8); 1972 emit_modrm_noreg(p, 3, dst); 1973 } 1974 note_x87_pop(p); 1975 } 1976 1977 void x87_fcomi( struct x86_function *p, struct x86_reg arg ) 1978 { 1979 DUMP_R( arg ); 1980 emit_2ub(p, 0xdb, 0xf0+arg.idx); 1981 } 1982 1983 void x87_fcomip( struct x86_function *p, struct x86_reg arg ) 1984 { 1985 DUMP_R( arg ); 1986 emit_2ub(p, 0xdb, 0xf0+arg.idx); 1987 note_x87_pop(p); 1988 } 1989 1990 1991 void x87_fnstsw( struct x86_function *p, struct x86_reg dst ) 1992 { 1993 DUMP_R( dst ); 1994 assert(dst.file == file_REG32); 1995 1996 if (dst.idx == reg_AX && 1997 dst.mod == mod_REG) 1998 emit_2ub(p, 0xdf, 0xe0); 1999 else { 2000 emit_1ub(p, 0xdd); 2001 emit_modrm_noreg(p, 7, dst); 2002 } 2003 } 2004 2005 2006 void x87_fnstcw( struct x86_function *p, struct x86_reg dst ) 2007 { 2008 DUMP_R( dst ); 2009 assert(dst.file == file_REG32); 2010 2011 emit_1ub(p, 0x9b); /* WAIT -- needed? */ 2012 emit_1ub(p, 0xd9); 2013 emit_modrm_noreg(p, 7, dst); 2014 } 2015 2016 2017 2018 2019 /*********************************************************************** 2020 * MMX instructions 2021 */ 2022 2023 void mmx_emms( struct x86_function *p ) 2024 { 2025 DUMP(); 2026 assert(p->need_emms); 2027 emit_2ub(p, 0x0f, 0x77); 2028 p->need_emms = 0; 2029 } 2030 2031 void mmx_packssdw( struct x86_function *p, 2032 struct x86_reg dst, 2033 struct x86_reg src ) 2034 { 2035 DUMP_RR( dst, src ); 2036 assert(dst.file == file_MMX && 2037 (src.file == file_MMX || src.mod != mod_REG)); 2038 2039 p->need_emms = 1; 2040 2041 emit_2ub(p, X86_TWOB, 0x6b); 2042 emit_modrm( p, dst, src ); 2043 } 2044 2045 void mmx_packuswb( struct x86_function *p, 2046 struct x86_reg dst, 2047 struct x86_reg src ) 2048 { 2049 DUMP_RR( dst, src ); 2050 assert(dst.file == file_MMX && 2051 (src.file == file_MMX || src.mod != mod_REG)); 2052 2053 p->need_emms = 1; 2054 2055 emit_2ub(p, X86_TWOB, 0x67); 2056 emit_modrm( p, dst, src ); 2057 } 2058 2059 void mmx_movd( struct x86_function *p, 2060 struct x86_reg dst, 2061 struct x86_reg src ) 2062 { 2063 DUMP_RR( dst, src ); 2064 p->need_emms = 1; 2065 emit_1ub(p, X86_TWOB); 2066 emit_op_modrm( p, 0x6e, 0x7e, dst, src ); 2067 } 2068 2069 void mmx_movq( struct x86_function *p, 2070 struct x86_reg dst, 2071 struct x86_reg src ) 2072 { 2073 DUMP_RR( dst, src ); 2074 p->need_emms = 1; 2075 emit_1ub(p, X86_TWOB); 2076 emit_op_modrm( p, 0x6f, 0x7f, dst, src ); 2077 } 2078 2079 2080 /*********************************************************************** 2081 * Helper functions 2082 */ 2083 2084 2085 void x86_cdecl_caller_push_regs( struct x86_function *p ) 2086 { 2087 x86_push(p, x86_make_reg(file_REG32, reg_AX)); 2088 x86_push(p, x86_make_reg(file_REG32, reg_CX)); 2089 x86_push(p, x86_make_reg(file_REG32, reg_DX)); 2090 } 2091 2092 void x86_cdecl_caller_pop_regs( struct x86_function *p ) 2093 { 2094 x86_pop(p, x86_make_reg(file_REG32, reg_DX)); 2095 x86_pop(p, x86_make_reg(file_REG32, reg_CX)); 2096 x86_pop(p, x86_make_reg(file_REG32, reg_AX)); 2097 } 2098 2099 2100 struct x86_reg x86_fn_arg( struct x86_function *p, 2101 unsigned arg ) 2102 { 2103 switch(x86_target(p)) 2104 { 2105 case X86_64_WIN64_ABI: 2106 /* Microsoft uses a different calling convention than the rest of the world */ 2107 switch(arg) 2108 { 2109 case 1: 2110 return x86_make_reg(file_REG32, reg_CX); 2111 case 2: 2112 return x86_make_reg(file_REG32, reg_DX); 2113 case 3: 2114 return x86_make_reg(file_REG32, reg_R8); 2115 case 4: 2116 return x86_make_reg(file_REG32, reg_R9); 2117 default: 2118 /* Win64 allocates stack slots as if it pushed the first 4 arguments too */ 2119 return x86_make_disp(x86_make_reg(file_REG32, reg_SP), 2120 p->stack_offset + arg * 8); 2121 } 2122 case X86_64_STD_ABI: 2123 switch(arg) 2124 { 2125 case 1: 2126 return x86_make_reg(file_REG32, reg_DI); 2127 case 2: 2128 return x86_make_reg(file_REG32, reg_SI); 2129 case 3: 2130 return x86_make_reg(file_REG32, reg_DX); 2131 case 4: 2132 return x86_make_reg(file_REG32, reg_CX); 2133 case 5: 2134 return x86_make_reg(file_REG32, reg_R8); 2135 case 6: 2136 return x86_make_reg(file_REG32, reg_R9); 2137 default: 2138 return x86_make_disp(x86_make_reg(file_REG32, reg_SP), 2139 p->stack_offset + (arg - 6) * 8); /* ??? */ 2140 } 2141 case X86_32: 2142 return x86_make_disp(x86_make_reg(file_REG32, reg_SP), 2143 p->stack_offset + arg * 4); /* ??? */ 2144 default: 2145 assert(0 && "Unexpected x86 target ABI in x86_fn_arg"); 2146 return x86_make_reg(file_REG32, reg_CX); /* not used / silence warning */ 2147 } 2148 } 2149 2150 static void x86_init_func_common( struct x86_function *p ) 2151 { 2152 util_cpu_detect(); 2153 p->caps = 0; 2154 if(util_cpu_caps.has_mmx) 2155 p->caps |= X86_MMX; 2156 if(util_cpu_caps.has_mmx2) 2157 p->caps |= X86_MMX2; 2158 if(util_cpu_caps.has_sse) 2159 p->caps |= X86_SSE; 2160 if(util_cpu_caps.has_sse2) 2161 p->caps |= X86_SSE2; 2162 if(util_cpu_caps.has_sse3) 2163 p->caps |= X86_SSE3; 2164 if(util_cpu_caps.has_sse4_1) 2165 p->caps |= X86_SSE4_1; 2166 p->csr = p->store; 2167 DUMP_START(); 2168 } 2169 2170 void x86_init_func( struct x86_function *p ) 2171 { 2172 p->size = 0; 2173 p->store = NULL; 2174 x86_init_func_common(p); 2175 } 2176 2177 void x86_init_func_size( struct x86_function *p, unsigned code_size ) 2178 { 2179 p->size = code_size; 2180 p->store = rtasm_exec_malloc(code_size); 2181 if (p->store == NULL) { 2182 p->store = p->error_overflow; 2183 } 2184 x86_init_func_common(p); 2185 } 2186 2187 void x86_release_func( struct x86_function *p ) 2188 { 2189 if (p->store && p->store != p->error_overflow) 2190 rtasm_exec_free(p->store); 2191 2192 p->store = NULL; 2193 p->csr = NULL; 2194 p->size = 0; 2195 } 2196 2197 2198 static INLINE x86_func 2199 voidptr_to_x86_func(void *v) 2200 { 2201 union { 2202 void *v; 2203 x86_func f; 2204 } u; 2205 assert(sizeof(u.v) == sizeof(u.f)); 2206 u.v = v; 2207 return u.f; 2208 } 2209 2210 2211 x86_func x86_get_func( struct x86_function *p ) 2212 { 2213 DUMP_END(); 2214 if (DISASSEM && p->store) 2215 debug_printf("disassemble %p %p\n", p->store, p->csr); 2216 2217 if (p->store == p->error_overflow) 2218 return voidptr_to_x86_func(NULL); 2219 else 2220 return voidptr_to_x86_func(p->store); 2221 } 2222 2223 #else 2224 2225 void x86sse_dummy( void ); 2226 2227 void x86sse_dummy( void ) 2228 { 2229 } 2230 2231 #endif 2232