1 /************************************************************************** 2 * 3 * Copyright (C) 1999-2005 Brian Paul All Rights Reserved. 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 * and/or sell copies of the Software, and to permit persons to whom the 10 * Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice shall be included 13 * in all copies or substantial portions of the Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 16 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR 19 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 20 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 21 * OTHER DEALINGS IN THE SOFTWARE. 22 * 23 **************************************************************************/ 24 25 #include "pipe/p_config.h" 26 #include "util/u_cpu_detect.h" 27 28 #if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) 29 30 #include "pipe/p_compiler.h" 31 #include "util/u_debug.h" 32 #include "util/u_pointer.h" 33 34 #include "rtasm_execmem.h" 35 #include "rtasm_x86sse.h" 36 37 #define DISASSEM 0 38 #define X86_TWOB 0x0f 39 40 41 #define DUMP_SSE 0 42 43 44 void x86_print_reg( struct x86_reg reg ) 45 { 46 if (reg.mod != mod_REG) 47 debug_printf( "[" ); 48 49 switch( reg.file ) { 50 case file_REG32: 51 switch( reg.idx ) { 52 case reg_AX: debug_printf( "EAX" ); break; 53 case reg_CX: debug_printf( "ECX" ); break; 54 case reg_DX: debug_printf( "EDX" ); break; 55 case reg_BX: debug_printf( "EBX" ); break; 56 case reg_SP: debug_printf( "ESP" ); break; 57 case reg_BP: debug_printf( "EBP" ); break; 58 case reg_SI: debug_printf( "ESI" ); break; 59 case reg_DI: debug_printf( "EDI" ); break; 60 } 61 break; 62 case file_MMX: 63 debug_printf( "MMX%u", reg.idx ); 64 break; 65 case file_XMM: 66 debug_printf( "XMM%u", reg.idx ); 67 break; 68 case file_x87: 69 debug_printf( "fp%u", reg.idx ); 70 break; 71 } 72 73 if (reg.mod == mod_DISP8 || 74 reg.mod == mod_DISP32) 75 debug_printf("+%d", reg.disp); 76 77 if (reg.mod != mod_REG) 78 debug_printf( "]" ); 79 } 80 81 #if DUMP_SSE 82 83 #define DUMP_START() debug_printf( "\n" ) 84 #define DUMP_END() debug_printf( "\n" ) 85 86 #define DUMP() do { \ 87 const char *foo = __FUNCTION__; \ 88 while (*foo && *foo != '_') \ 89 foo++; \ 90 if (*foo) \ 91 foo++; \ 92 debug_printf( "\n%4x %14s ", p->csr - p->store, foo ); \ 93 } while (0) 94 95 #define DUMP_I( I ) do { \ 96 DUMP(); \ 97 debug_printf( "%u", I ); \ 98 } while( 0 ) 99 100 #define DUMP_R( R0 ) do { \ 101 DUMP(); \ 102 x86_print_reg( R0 ); \ 103 } while( 0 ) 104 105 #define DUMP_RR( R0, R1 ) do { \ 106 DUMP(); \ 107 x86_print_reg( R0 ); \ 108 debug_printf( ", " ); \ 109 x86_print_reg( R1 ); \ 110 } while( 0 ) 111 112 #define DUMP_RI( R0, I ) do { \ 113 DUMP(); \ 114 x86_print_reg( R0 ); \ 115 debug_printf( ", %u", I ); \ 116 } while( 0 ) 117 118 #define DUMP_RRI( R0, R1, I ) do { \ 119 DUMP(); \ 120 x86_print_reg( R0 ); \ 121 debug_printf( ", " ); \ 122 x86_print_reg( R1 ); \ 123 debug_printf( ", %u", I ); \ 124 } while( 0 ) 125 126 #else 127 128 #define DUMP_START() 129 #define DUMP_END() 130 #define DUMP( ) 131 #define DUMP_I( I ) 132 #define DUMP_R( R0 ) 133 #define DUMP_RR( R0, R1 ) 134 #define DUMP_RI( R0, I ) 135 #define DUMP_RRI( R0, R1, I ) 136 137 #endif 138 139 140 static void do_realloc( struct x86_function *p ) 141 { 142 if (p->store == p->error_overflow) { 143 p->csr = p->store; 144 } 145 else if (p->size == 0) { 146 p->size = 1024; 147 p->store = rtasm_exec_malloc(p->size); 148 p->csr = p->store; 149 } 150 else { 151 uintptr_t used = pointer_to_uintptr( p->csr ) - pointer_to_uintptr( p->store ); 152 unsigned char *tmp = p->store; 153 p->size *= 2; 154 p->store = rtasm_exec_malloc(p->size); 155 156 if (p->store) { 157 memcpy(p->store, tmp, used); 158 p->csr = p->store + used; 159 } 160 else { 161 p->csr = p->store; 162 } 163 164 rtasm_exec_free(tmp); 165 } 166 167 if (p->store == NULL) { 168 p->store = p->csr = p->error_overflow; 169 p->size = sizeof(p->error_overflow); 170 } 171 } 172 173 /* Emit bytes to the instruction stream: 174 */ 175 static unsigned char *reserve( struct x86_function *p, int bytes ) 176 { 177 if (p->csr + bytes - p->store > (int) p->size) 178 do_realloc(p); 179 180 { 181 unsigned char *csr = p->csr; 182 p->csr += bytes; 183 return csr; 184 } 185 } 186 187 188 189 static void emit_1b( struct x86_function *p, char b0 ) 190 { 191 char *csr = (char *)reserve(p, 1); 192 *csr = b0; 193 } 194 195 static void emit_1i( struct x86_function *p, int i0 ) 196 { 197 int *icsr = (int *)reserve(p, sizeof(i0)); 198 *icsr = i0; 199 } 200 201 static void emit_1ub( struct x86_function *p, unsigned char b0 ) 202 { 203 unsigned char *csr = reserve(p, 1); 204 *csr++ = b0; 205 } 206 207 static void emit_2ub( struct x86_function *p, unsigned char b0, unsigned char b1 ) 208 { 209 unsigned char *csr = reserve(p, 2); 210 *csr++ = b0; 211 *csr++ = b1; 212 } 213 214 static void emit_3ub( struct x86_function *p, unsigned char b0, unsigned char b1, unsigned char b2 ) 215 { 216 unsigned char *csr = reserve(p, 3); 217 *csr++ = b0; 218 *csr++ = b1; 219 *csr++ = b2; 220 } 221 222 223 /* Build a modRM byte + possible displacement. No treatment of SIB 224 * indexing. BZZT - no way to encode an absolute address. 225 * 226 * This is the "/r" field in the x86 manuals... 227 */ 228 static void emit_modrm( struct x86_function *p, 229 struct x86_reg reg, 230 struct x86_reg regmem ) 231 { 232 unsigned char val = 0; 233 234 assert(reg.mod == mod_REG); 235 236 /* TODO: support extended x86-64 registers */ 237 assert(reg.idx < 8); 238 assert(regmem.idx < 8); 239 240 val |= regmem.mod << 6; /* mod field */ 241 val |= reg.idx << 3; /* reg field */ 242 val |= regmem.idx; /* r/m field */ 243 244 emit_1ub(p, val); 245 246 /* Oh-oh we've stumbled into the SIB thing. 247 */ 248 if (regmem.file == file_REG32 && 249 regmem.idx == reg_SP && 250 regmem.mod != mod_REG) { 251 emit_1ub(p, 0x24); /* simplistic! */ 252 } 253 254 switch (regmem.mod) { 255 case mod_REG: 256 case mod_INDIRECT: 257 break; 258 case mod_DISP8: 259 emit_1b(p, (char) regmem.disp); 260 break; 261 case mod_DISP32: 262 emit_1i(p, regmem.disp); 263 break; 264 default: 265 assert(0); 266 break; 267 } 268 } 269 270 /* Emits the "/0".."/7" specialized versions of the modrm ("/r") bytes. 271 */ 272 static void emit_modrm_noreg( struct x86_function *p, 273 unsigned op, 274 struct x86_reg regmem ) 275 { 276 struct x86_reg dummy = x86_make_reg(file_REG32, op); 277 emit_modrm(p, dummy, regmem); 278 } 279 280 /* Many x86 instructions have two opcodes to cope with the situations 281 * where the destination is a register or memory reference 282 * respectively. This function selects the correct opcode based on 283 * the arguments presented. 284 */ 285 static void emit_op_modrm( struct x86_function *p, 286 unsigned char op_dst_is_reg, 287 unsigned char op_dst_is_mem, 288 struct x86_reg dst, 289 struct x86_reg src ) 290 { 291 switch (dst.mod) { 292 case mod_REG: 293 emit_1ub(p, op_dst_is_reg); 294 emit_modrm(p, dst, src); 295 break; 296 case mod_INDIRECT: 297 case mod_DISP32: 298 case mod_DISP8: 299 assert(src.mod == mod_REG); 300 emit_1ub(p, op_dst_is_mem); 301 emit_modrm(p, src, dst); 302 break; 303 default: 304 assert(0); 305 break; 306 } 307 } 308 309 310 311 312 313 314 315 /* Create and manipulate registers and regmem values: 316 */ 317 struct x86_reg x86_make_reg( enum x86_reg_file file, 318 enum x86_reg_name idx ) 319 { 320 struct x86_reg reg; 321 322 reg.file = file; 323 reg.idx = idx; 324 reg.mod = mod_REG; 325 reg.disp = 0; 326 327 return reg; 328 } 329 330 struct x86_reg x86_make_disp( struct x86_reg reg, 331 int disp ) 332 { 333 assert(reg.file == file_REG32); 334 335 if (reg.mod == mod_REG) 336 reg.disp = disp; 337 else 338 reg.disp += disp; 339 340 if (reg.disp == 0 && reg.idx != reg_BP) 341 reg.mod = mod_INDIRECT; 342 else if (reg.disp <= 127 && reg.disp >= -128) 343 reg.mod = mod_DISP8; 344 else 345 reg.mod = mod_DISP32; 346 347 return reg; 348 } 349 350 struct x86_reg x86_deref( struct x86_reg reg ) 351 { 352 return x86_make_disp(reg, 0); 353 } 354 355 struct x86_reg x86_get_base_reg( struct x86_reg reg ) 356 { 357 return x86_make_reg( reg.file, reg.idx ); 358 } 359 360 int x86_get_label( struct x86_function *p ) 361 { 362 return p->csr - p->store; 363 } 364 365 366 367 /*********************************************************************** 368 * x86 instructions 369 */ 370 371 372 void x64_rexw(struct x86_function *p) 373 { 374 if(x86_target(p) != X86_32) 375 emit_1ub(p, 0x48); 376 } 377 378 void x86_jcc( struct x86_function *p, 379 enum x86_cc cc, 380 int label ) 381 { 382 int offset = label - (x86_get_label(p) + 2); 383 DUMP_I(cc); 384 385 if (offset < 0) { 386 /*assert(p->csr - p->store > -offset);*/ 387 if (p->csr - p->store <= -offset) { 388 /* probably out of memory (using the error_overflow buffer) */ 389 return; 390 } 391 } 392 393 if (offset <= 127 && offset >= -128) { 394 emit_1ub(p, 0x70 + cc); 395 emit_1b(p, (char) offset); 396 } 397 else { 398 offset = label - (x86_get_label(p) + 6); 399 emit_2ub(p, 0x0f, 0x80 + cc); 400 emit_1i(p, offset); 401 } 402 } 403 404 /* Always use a 32bit offset for forward jumps: 405 */ 406 int x86_jcc_forward( struct x86_function *p, 407 enum x86_cc cc ) 408 { 409 DUMP_I(cc); 410 emit_2ub(p, 0x0f, 0x80 + cc); 411 emit_1i(p, 0); 412 return x86_get_label(p); 413 } 414 415 int x86_jmp_forward( struct x86_function *p) 416 { 417 DUMP(); 418 emit_1ub(p, 0xe9); 419 emit_1i(p, 0); 420 return x86_get_label(p); 421 } 422 423 int x86_call_forward( struct x86_function *p) 424 { 425 DUMP(); 426 427 emit_1ub(p, 0xe8); 428 emit_1i(p, 0); 429 return x86_get_label(p); 430 } 431 432 /* Fixup offset from forward jump: 433 */ 434 void x86_fixup_fwd_jump( struct x86_function *p, 435 int fixup ) 436 { 437 *(int *)(p->store + fixup - 4) = x86_get_label(p) - fixup; 438 } 439 440 void x86_jmp( struct x86_function *p, int label) 441 { 442 DUMP_I( label ); 443 emit_1ub(p, 0xe9); 444 emit_1i(p, label - x86_get_label(p) - 4); 445 } 446 447 void x86_call( struct x86_function *p, struct x86_reg reg) 448 { 449 DUMP_R( reg ); 450 emit_1ub(p, 0xff); 451 emit_modrm_noreg(p, 2, reg); 452 } 453 454 455 void x86_mov_reg_imm( struct x86_function *p, struct x86_reg dst, int imm ) 456 { 457 DUMP_RI( dst, imm ); 458 assert(dst.file == file_REG32); 459 assert(dst.mod == mod_REG); 460 emit_1ub(p, 0xb8 + dst.idx); 461 emit_1i(p, imm); 462 } 463 464 void x86_mov_imm( struct x86_function *p, struct x86_reg dst, int imm ) 465 { 466 DUMP_RI( dst, imm ); 467 if(dst.mod == mod_REG) 468 x86_mov_reg_imm(p, dst, imm); 469 else 470 { 471 emit_1ub(p, 0xc7); 472 emit_modrm_noreg(p, 0, dst); 473 emit_1i(p, imm); 474 } 475 } 476 477 void x86_mov16_imm( struct x86_function *p, struct x86_reg dst, uint16_t imm ) 478 { 479 DUMP_RI( dst, imm ); 480 emit_1ub(p, 0x66); 481 if(dst.mod == mod_REG) 482 { 483 emit_1ub(p, 0xb8 + dst.idx); 484 emit_2ub(p, imm & 0xff, imm >> 8); 485 } 486 else 487 { 488 emit_1ub(p, 0xc7); 489 emit_modrm_noreg(p, 0, dst); 490 emit_2ub(p, imm & 0xff, imm >> 8); 491 } 492 } 493 494 void x86_mov8_imm( struct x86_function *p, struct x86_reg dst, uint8_t imm ) 495 { 496 DUMP_RI( dst, imm ); 497 if(dst.mod == mod_REG) 498 { 499 emit_1ub(p, 0xb0 + dst.idx); 500 emit_1ub(p, imm); 501 } 502 else 503 { 504 emit_1ub(p, 0xc6); 505 emit_modrm_noreg(p, 0, dst); 506 emit_1ub(p, imm); 507 } 508 } 509 510 /** 511 * Immediate group 1 instructions. 512 */ 513 static inline void 514 x86_group1_imm( struct x86_function *p, 515 unsigned op, struct x86_reg dst, int imm ) 516 { 517 assert(dst.file == file_REG32); 518 assert(dst.mod == mod_REG); 519 if(-0x80 <= imm && imm < 0x80) { 520 emit_1ub(p, 0x83); 521 emit_modrm_noreg(p, op, dst); 522 emit_1b(p, (char)imm); 523 } 524 else { 525 emit_1ub(p, 0x81); 526 emit_modrm_noreg(p, op, dst); 527 emit_1i(p, imm); 528 } 529 } 530 531 void x86_add_imm( struct x86_function *p, struct x86_reg dst, int imm ) 532 { 533 DUMP_RI( dst, imm ); 534 x86_group1_imm(p, 0, dst, imm); 535 } 536 537 void x86_or_imm( struct x86_function *p, struct x86_reg dst, int imm ) 538 { 539 DUMP_RI( dst, imm ); 540 x86_group1_imm(p, 1, dst, imm); 541 } 542 543 void x86_and_imm( struct x86_function *p, struct x86_reg dst, int imm ) 544 { 545 DUMP_RI( dst, imm ); 546 x86_group1_imm(p, 4, dst, imm); 547 } 548 549 void x86_sub_imm( struct x86_function *p, struct x86_reg dst, int imm ) 550 { 551 DUMP_RI( dst, imm ); 552 x86_group1_imm(p, 5, dst, imm); 553 } 554 555 void x86_xor_imm( struct x86_function *p, struct x86_reg dst, int imm ) 556 { 557 DUMP_RI( dst, imm ); 558 x86_group1_imm(p, 6, dst, imm); 559 } 560 561 void x86_cmp_imm( struct x86_function *p, struct x86_reg dst, int imm ) 562 { 563 DUMP_RI( dst, imm ); 564 x86_group1_imm(p, 7, dst, imm); 565 } 566 567 568 void x86_push( struct x86_function *p, 569 struct x86_reg reg ) 570 { 571 DUMP_R( reg ); 572 if (reg.mod == mod_REG) 573 emit_1ub(p, 0x50 + reg.idx); 574 else 575 { 576 emit_1ub(p, 0xff); 577 emit_modrm_noreg(p, 6, reg); 578 } 579 580 581 p->stack_offset += sizeof(void*); 582 } 583 584 void x86_push_imm32( struct x86_function *p, 585 int imm32 ) 586 { 587 DUMP_I( imm32 ); 588 emit_1ub(p, 0x68); 589 emit_1i(p, imm32); 590 591 p->stack_offset += sizeof(void*); 592 } 593 594 595 void x86_pop( struct x86_function *p, 596 struct x86_reg reg ) 597 { 598 DUMP_R( reg ); 599 assert(reg.mod == mod_REG); 600 emit_1ub(p, 0x58 + reg.idx); 601 p->stack_offset -= sizeof(void*); 602 } 603 604 void x86_inc( struct x86_function *p, 605 struct x86_reg reg ) 606 { 607 DUMP_R( reg ); 608 if(x86_target(p) == X86_32 && reg.mod == mod_REG) 609 { 610 emit_1ub(p, 0x40 + reg.idx); 611 return; 612 } 613 emit_1ub(p, 0xff); 614 emit_modrm_noreg(p, 0, reg); 615 } 616 617 void x86_dec( struct x86_function *p, 618 struct x86_reg reg ) 619 { 620 DUMP_R( reg ); 621 if(x86_target(p) == X86_32 && reg.mod == mod_REG) 622 { 623 emit_1ub(p, 0x48 + reg.idx); 624 return; 625 } 626 emit_1ub(p, 0xff); 627 emit_modrm_noreg(p, 1, reg); 628 } 629 630 void x86_ret( struct x86_function *p ) 631 { 632 DUMP(); 633 assert(p->stack_offset == 0); 634 emit_1ub(p, 0xc3); 635 } 636 637 void x86_retw( struct x86_function *p, unsigned short imm ) 638 { 639 DUMP(); 640 emit_3ub(p, 0xc2, imm & 0xff, (imm >> 8) & 0xff); 641 } 642 643 void x86_sahf( struct x86_function *p ) 644 { 645 DUMP(); 646 emit_1ub(p, 0x9e); 647 } 648 649 void x86_mov( struct x86_function *p, 650 struct x86_reg dst, 651 struct x86_reg src ) 652 { 653 DUMP_RR( dst, src ); 654 /* special hack for reading arguments until we support x86-64 registers everywhere */ 655 if(src.mod == mod_REG && dst.mod == mod_REG && (src.idx >= 8 || dst.idx >= 8)) 656 { 657 uint8_t rex = 0x40; 658 if(dst.idx >= 8) 659 { 660 rex |= 4; 661 dst.idx -= 8; 662 } 663 if(src.idx >= 8) 664 { 665 rex |= 1; 666 src.idx -= 8; 667 } 668 emit_1ub(p, rex); 669 } 670 emit_op_modrm( p, 0x8b, 0x89, dst, src ); 671 } 672 673 void x86_mov16( struct x86_function *p, 674 struct x86_reg dst, 675 struct x86_reg src ) 676 { 677 DUMP_RR( dst, src ); 678 emit_1ub(p, 0x66); 679 emit_op_modrm( p, 0x8b, 0x89, dst, src ); 680 } 681 682 void x86_mov8( struct x86_function *p, 683 struct x86_reg dst, 684 struct x86_reg src ) 685 { 686 DUMP_RR( dst, src ); 687 emit_op_modrm( p, 0x8a, 0x88, dst, src ); 688 } 689 690 void x64_mov64( struct x86_function *p, 691 struct x86_reg dst, 692 struct x86_reg src ) 693 { 694 uint8_t rex = 0x48; 695 DUMP_RR( dst, src ); 696 assert(x86_target(p) != X86_32); 697 698 /* special hack for reading arguments until we support x86-64 registers everywhere */ 699 if(src.mod == mod_REG && dst.mod == mod_REG && (src.idx >= 8 || dst.idx >= 8)) 700 { 701 if(dst.idx >= 8) 702 { 703 rex |= 4; 704 dst.idx -= 8; 705 } 706 if(src.idx >= 8) 707 { 708 rex |= 1; 709 src.idx -= 8; 710 } 711 } 712 emit_1ub(p, rex); 713 emit_op_modrm( p, 0x8b, 0x89, dst, src ); 714 } 715 716 void x86_movzx8(struct x86_function *p, struct x86_reg dst, struct x86_reg src ) 717 { 718 DUMP_RR( dst, src ); 719 emit_2ub(p, 0x0f, 0xb6); 720 emit_modrm(p, dst, src); 721 } 722 723 void x86_movzx16(struct x86_function *p, struct x86_reg dst, struct x86_reg src ) 724 { 725 DUMP_RR( dst, src ); 726 emit_2ub(p, 0x0f, 0xb7); 727 emit_modrm(p, dst, src); 728 } 729 730 void x86_cmovcc( struct x86_function *p, 731 struct x86_reg dst, 732 struct x86_reg src, 733 enum x86_cc cc) 734 { 735 DUMP_RRI( dst, src, cc ); 736 emit_2ub( p, 0x0f, 0x40 + cc ); 737 emit_modrm( p, dst, src ); 738 } 739 740 void x86_xor( struct x86_function *p, 741 struct x86_reg dst, 742 struct x86_reg src ) 743 { 744 DUMP_RR( dst, src ); 745 emit_op_modrm( p, 0x33, 0x31, dst, src ); 746 } 747 748 void x86_cmp( struct x86_function *p, 749 struct x86_reg dst, 750 struct x86_reg src ) 751 { 752 DUMP_RR( dst, src ); 753 emit_op_modrm( p, 0x3b, 0x39, dst, src ); 754 } 755 756 void x86_lea( struct x86_function *p, 757 struct x86_reg dst, 758 struct x86_reg src ) 759 { 760 DUMP_RR( dst, src ); 761 emit_1ub(p, 0x8d); 762 emit_modrm( p, dst, src ); 763 } 764 765 void x86_test( struct x86_function *p, 766 struct x86_reg dst, 767 struct x86_reg src ) 768 { 769 DUMP_RR( dst, src ); 770 emit_1ub(p, 0x85); 771 emit_modrm( p, dst, src ); 772 } 773 774 void x86_add( struct x86_function *p, 775 struct x86_reg dst, 776 struct x86_reg src ) 777 { 778 DUMP_RR( dst, src ); 779 emit_op_modrm(p, 0x03, 0x01, dst, src ); 780 } 781 782 /* Calculate EAX * src, results in EDX:EAX. 783 */ 784 void x86_mul( struct x86_function *p, 785 struct x86_reg src ) 786 { 787 DUMP_R( src ); 788 emit_1ub(p, 0xf7); 789 emit_modrm_noreg(p, 4, src ); 790 } 791 792 793 void x86_imul( struct x86_function *p, 794 struct x86_reg dst, 795 struct x86_reg src ) 796 { 797 DUMP_RR( dst, src ); 798 emit_2ub(p, X86_TWOB, 0xAF); 799 emit_modrm(p, dst, src); 800 } 801 802 803 void x86_sub( struct x86_function *p, 804 struct x86_reg dst, 805 struct x86_reg src ) 806 { 807 DUMP_RR( dst, src ); 808 emit_op_modrm(p, 0x2b, 0x29, dst, src ); 809 } 810 811 void x86_or( struct x86_function *p, 812 struct x86_reg dst, 813 struct x86_reg src ) 814 { 815 DUMP_RR( dst, src ); 816 emit_op_modrm( p, 0x0b, 0x09, dst, src ); 817 } 818 819 void x86_and( struct x86_function *p, 820 struct x86_reg dst, 821 struct x86_reg src ) 822 { 823 DUMP_RR( dst, src ); 824 emit_op_modrm( p, 0x23, 0x21, dst, src ); 825 } 826 827 void x86_div( struct x86_function *p, 828 struct x86_reg src ) 829 { 830 assert(src.file == file_REG32 && src.mod == mod_REG); 831 emit_op_modrm(p, 0xf7, 0, x86_make_reg(file_REG32, 6), src); 832 } 833 834 void x86_bswap( struct x86_function *p, struct x86_reg reg ) 835 { 836 DUMP_R(reg); 837 assert(reg.file == file_REG32); 838 assert(reg.mod == mod_REG); 839 emit_2ub(p, 0x0f, 0xc8 + reg.idx); 840 } 841 842 void x86_shr_imm( struct x86_function *p, struct x86_reg reg, unsigned imm ) 843 { 844 DUMP_RI(reg, imm); 845 if(imm == 1) 846 { 847 emit_1ub(p, 0xd1); 848 emit_modrm_noreg(p, 5, reg); 849 } 850 else 851 { 852 emit_1ub(p, 0xc1); 853 emit_modrm_noreg(p, 5, reg); 854 emit_1ub(p, imm); 855 } 856 } 857 858 void x86_sar_imm( struct x86_function *p, struct x86_reg reg, unsigned imm ) 859 { 860 DUMP_RI(reg, imm); 861 if(imm == 1) 862 { 863 emit_1ub(p, 0xd1); 864 emit_modrm_noreg(p, 7, reg); 865 } 866 else 867 { 868 emit_1ub(p, 0xc1); 869 emit_modrm_noreg(p, 7, reg); 870 emit_1ub(p, imm); 871 } 872 } 873 874 void x86_shl_imm( struct x86_function *p, struct x86_reg reg, unsigned imm ) 875 { 876 DUMP_RI(reg, imm); 877 if(imm == 1) 878 { 879 emit_1ub(p, 0xd1); 880 emit_modrm_noreg(p, 4, reg); 881 } 882 else 883 { 884 emit_1ub(p, 0xc1); 885 emit_modrm_noreg(p, 4, reg); 886 emit_1ub(p, imm); 887 } 888 } 889 890 891 /*********************************************************************** 892 * SSE instructions 893 */ 894 895 void sse_prefetchnta( struct x86_function *p, struct x86_reg ptr) 896 { 897 DUMP_R( ptr ); 898 assert(ptr.mod != mod_REG); 899 emit_2ub(p, 0x0f, 0x18); 900 emit_modrm_noreg(p, 0, ptr); 901 } 902 903 void sse_prefetch0( struct x86_function *p, struct x86_reg ptr) 904 { 905 DUMP_R( ptr ); 906 assert(ptr.mod != mod_REG); 907 emit_2ub(p, 0x0f, 0x18); 908 emit_modrm_noreg(p, 1, ptr); 909 } 910 911 void sse_prefetch1( struct x86_function *p, struct x86_reg ptr) 912 { 913 DUMP_R( ptr ); 914 assert(ptr.mod != mod_REG); 915 emit_2ub(p, 0x0f, 0x18); 916 emit_modrm_noreg(p, 2, ptr); 917 } 918 919 void sse_movntps( struct x86_function *p, 920 struct x86_reg dst, 921 struct x86_reg src) 922 { 923 DUMP_RR( dst, src ); 924 925 assert(dst.mod != mod_REG); 926 assert(src.mod == mod_REG); 927 emit_2ub(p, 0x0f, 0x2b); 928 emit_modrm(p, src, dst); 929 } 930 931 932 933 934 void sse_movss( struct x86_function *p, 935 struct x86_reg dst, 936 struct x86_reg src ) 937 { 938 DUMP_RR( dst, src ); 939 emit_2ub(p, 0xF3, X86_TWOB); 940 emit_op_modrm( p, 0x10, 0x11, dst, src ); 941 } 942 943 void sse_movaps( struct x86_function *p, 944 struct x86_reg dst, 945 struct x86_reg src ) 946 { 947 DUMP_RR( dst, src ); 948 emit_1ub(p, X86_TWOB); 949 emit_op_modrm( p, 0x28, 0x29, dst, src ); 950 } 951 952 void sse_movups( struct x86_function *p, 953 struct x86_reg dst, 954 struct x86_reg src ) 955 { 956 DUMP_RR( dst, src ); 957 emit_1ub(p, X86_TWOB); 958 emit_op_modrm( p, 0x10, 0x11, dst, src ); 959 } 960 961 void sse_movhps( struct x86_function *p, 962 struct x86_reg dst, 963 struct x86_reg src ) 964 { 965 DUMP_RR( dst, src ); 966 assert(dst.mod != mod_REG || src.mod != mod_REG); 967 emit_1ub(p, X86_TWOB); 968 emit_op_modrm( p, 0x16, 0x17, dst, src ); /* cf movlhps */ 969 } 970 971 void sse_movlps( struct x86_function *p, 972 struct x86_reg dst, 973 struct x86_reg src ) 974 { 975 DUMP_RR( dst, src ); 976 assert(dst.mod != mod_REG || src.mod != mod_REG); 977 emit_1ub(p, X86_TWOB); 978 emit_op_modrm( p, 0x12, 0x13, dst, src ); /* cf movhlps */ 979 } 980 981 void sse_maxps( struct x86_function *p, 982 struct x86_reg dst, 983 struct x86_reg src ) 984 { 985 DUMP_RR( dst, src ); 986 emit_2ub(p, X86_TWOB, 0x5F); 987 emit_modrm( p, dst, src ); 988 } 989 990 void sse_maxss( struct x86_function *p, 991 struct x86_reg dst, 992 struct x86_reg src ) 993 { 994 DUMP_RR( dst, src ); 995 emit_3ub(p, 0xF3, X86_TWOB, 0x5F); 996 emit_modrm( p, dst, src ); 997 } 998 999 void sse_divss( struct x86_function *p, 1000 struct x86_reg dst, 1001 struct x86_reg src ) 1002 { 1003 DUMP_RR( dst, src ); 1004 emit_3ub(p, 0xF3, X86_TWOB, 0x5E); 1005 emit_modrm( p, dst, src ); 1006 } 1007 1008 void sse_minps( struct x86_function *p, 1009 struct x86_reg dst, 1010 struct x86_reg src ) 1011 { 1012 DUMP_RR( dst, src ); 1013 emit_2ub(p, X86_TWOB, 0x5D); 1014 emit_modrm( p, dst, src ); 1015 } 1016 1017 void sse_subps( struct x86_function *p, 1018 struct x86_reg dst, 1019 struct x86_reg src ) 1020 { 1021 DUMP_RR( dst, src ); 1022 emit_2ub(p, X86_TWOB, 0x5C); 1023 emit_modrm( p, dst, src ); 1024 } 1025 1026 void sse_mulps( struct x86_function *p, 1027 struct x86_reg dst, 1028 struct x86_reg src ) 1029 { 1030 DUMP_RR( dst, src ); 1031 emit_2ub(p, X86_TWOB, 0x59); 1032 emit_modrm( p, dst, src ); 1033 } 1034 1035 void sse_mulss( struct x86_function *p, 1036 struct x86_reg dst, 1037 struct x86_reg src ) 1038 { 1039 DUMP_RR( dst, src ); 1040 emit_3ub(p, 0xF3, X86_TWOB, 0x59); 1041 emit_modrm( p, dst, src ); 1042 } 1043 1044 void sse_addps( struct x86_function *p, 1045 struct x86_reg dst, 1046 struct x86_reg src ) 1047 { 1048 DUMP_RR( dst, src ); 1049 emit_2ub(p, X86_TWOB, 0x58); 1050 emit_modrm( p, dst, src ); 1051 } 1052 1053 void sse_addss( struct x86_function *p, 1054 struct x86_reg dst, 1055 struct x86_reg src ) 1056 { 1057 DUMP_RR( dst, src ); 1058 emit_3ub(p, 0xF3, X86_TWOB, 0x58); 1059 emit_modrm( p, dst, src ); 1060 } 1061 1062 void sse_andnps( struct x86_function *p, 1063 struct x86_reg dst, 1064 struct x86_reg src ) 1065 { 1066 DUMP_RR( dst, src ); 1067 emit_2ub(p, X86_TWOB, 0x55); 1068 emit_modrm( p, dst, src ); 1069 } 1070 1071 void sse_andps( struct x86_function *p, 1072 struct x86_reg dst, 1073 struct x86_reg src ) 1074 { 1075 DUMP_RR( dst, src ); 1076 emit_2ub(p, X86_TWOB, 0x54); 1077 emit_modrm( p, dst, src ); 1078 } 1079 1080 void sse_rsqrtps( struct x86_function *p, 1081 struct x86_reg dst, 1082 struct x86_reg src ) 1083 { 1084 DUMP_RR( dst, src ); 1085 emit_2ub(p, X86_TWOB, 0x52); 1086 emit_modrm( p, dst, src ); 1087 } 1088 1089 void sse_rsqrtss( struct x86_function *p, 1090 struct x86_reg dst, 1091 struct x86_reg src ) 1092 { 1093 DUMP_RR( dst, src ); 1094 emit_3ub(p, 0xF3, X86_TWOB, 0x52); 1095 emit_modrm( p, dst, src ); 1096 1097 } 1098 1099 void sse_movhlps( struct x86_function *p, 1100 struct x86_reg dst, 1101 struct x86_reg src ) 1102 { 1103 DUMP_RR( dst, src ); 1104 assert(dst.mod == mod_REG && src.mod == mod_REG); 1105 emit_2ub(p, X86_TWOB, 0x12); 1106 emit_modrm( p, dst, src ); 1107 } 1108 1109 void sse_movlhps( struct x86_function *p, 1110 struct x86_reg dst, 1111 struct x86_reg src ) 1112 { 1113 DUMP_RR( dst, src ); 1114 assert(dst.mod == mod_REG && src.mod == mod_REG); 1115 emit_2ub(p, X86_TWOB, 0x16); 1116 emit_modrm( p, dst, src ); 1117 } 1118 1119 void sse_orps( struct x86_function *p, 1120 struct x86_reg dst, 1121 struct x86_reg src ) 1122 { 1123 DUMP_RR( dst, src ); 1124 emit_2ub(p, X86_TWOB, 0x56); 1125 emit_modrm( p, dst, src ); 1126 } 1127 1128 void sse_xorps( struct x86_function *p, 1129 struct x86_reg dst, 1130 struct x86_reg src ) 1131 { 1132 DUMP_RR( dst, src ); 1133 emit_2ub(p, X86_TWOB, 0x57); 1134 emit_modrm( p, dst, src ); 1135 } 1136 1137 void sse_cvtps2pi( struct x86_function *p, 1138 struct x86_reg dst, 1139 struct x86_reg src ) 1140 { 1141 DUMP_RR( dst, src ); 1142 assert(dst.file == file_MMX && 1143 (src.file == file_XMM || src.mod != mod_REG)); 1144 1145 p->need_emms = 1; 1146 1147 emit_2ub(p, X86_TWOB, 0x2d); 1148 emit_modrm( p, dst, src ); 1149 } 1150 1151 void sse2_cvtdq2ps( struct x86_function *p, 1152 struct x86_reg dst, 1153 struct x86_reg src ) 1154 { 1155 DUMP_RR( dst, src ); 1156 emit_2ub(p, X86_TWOB, 0x5b); 1157 emit_modrm( p, dst, src ); 1158 } 1159 1160 1161 /* Shufps can also be used to implement a reduced swizzle when dest == 1162 * arg0. 1163 */ 1164 void sse_shufps( struct x86_function *p, 1165 struct x86_reg dst, 1166 struct x86_reg src, 1167 unsigned char shuf) 1168 { 1169 DUMP_RRI( dst, src, shuf ); 1170 emit_2ub(p, X86_TWOB, 0xC6); 1171 emit_modrm(p, dst, src); 1172 emit_1ub(p, shuf); 1173 } 1174 1175 void sse_unpckhps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) 1176 { 1177 DUMP_RR( dst, src ); 1178 emit_2ub( p, X86_TWOB, 0x15 ); 1179 emit_modrm( p, dst, src ); 1180 } 1181 1182 void sse_unpcklps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) 1183 { 1184 DUMP_RR( dst, src ); 1185 emit_2ub( p, X86_TWOB, 0x14 ); 1186 emit_modrm( p, dst, src ); 1187 } 1188 1189 void sse_cmpps( struct x86_function *p, 1190 struct x86_reg dst, 1191 struct x86_reg src, 1192 enum sse_cc cc) 1193 { 1194 DUMP_RRI( dst, src, cc ); 1195 emit_2ub(p, X86_TWOB, 0xC2); 1196 emit_modrm(p, dst, src); 1197 emit_1ub(p, cc); 1198 } 1199 1200 void sse_pmovmskb( struct x86_function *p, 1201 struct x86_reg dst, 1202 struct x86_reg src) 1203 { 1204 DUMP_RR( dst, src ); 1205 emit_3ub(p, 0x66, X86_TWOB, 0xD7); 1206 emit_modrm(p, dst, src); 1207 } 1208 1209 void sse_movmskps( struct x86_function *p, 1210 struct x86_reg dst, 1211 struct x86_reg src) 1212 { 1213 DUMP_RR( dst, src ); 1214 emit_2ub(p, X86_TWOB, 0x50); 1215 emit_modrm(p, dst, src); 1216 } 1217 1218 /*********************************************************************** 1219 * SSE2 instructions 1220 */ 1221 1222 void sse2_movd( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) 1223 { 1224 DUMP_RR(dst, src); 1225 emit_2ub(p, 0x66, 0x0f); 1226 if(dst.mod == mod_REG && dst.file == file_REG32) 1227 { 1228 emit_1ub(p, 0x7e); 1229 emit_modrm(p, src, dst); 1230 } 1231 else 1232 { 1233 emit_op_modrm(p, 0x6e, 0x7e, dst, src); 1234 } 1235 } 1236 1237 void sse2_movq( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) 1238 { 1239 DUMP_RR(dst, src); 1240 switch (dst.mod) { 1241 case mod_REG: 1242 emit_3ub(p, 0xf3, 0x0f, 0x7e); 1243 emit_modrm(p, dst, src); 1244 break; 1245 case mod_INDIRECT: 1246 case mod_DISP32: 1247 case mod_DISP8: 1248 assert(src.mod == mod_REG); 1249 emit_3ub(p, 0x66, 0x0f, 0xd6); 1250 emit_modrm(p, src, dst); 1251 break; 1252 default: 1253 assert(0); 1254 break; 1255 } 1256 } 1257 1258 void sse2_movdqu( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) 1259 { 1260 DUMP_RR(dst, src); 1261 emit_2ub(p, 0xf3, 0x0f); 1262 emit_op_modrm(p, 0x6f, 0x7f, dst, src); 1263 } 1264 1265 void sse2_movdqa( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) 1266 { 1267 DUMP_RR(dst, src); 1268 emit_2ub(p, 0x66, 0x0f); 1269 emit_op_modrm(p, 0x6f, 0x7f, dst, src); 1270 } 1271 1272 void sse2_movsd( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) 1273 { 1274 DUMP_RR(dst, src); 1275 emit_2ub(p, 0xf2, 0x0f); 1276 emit_op_modrm(p, 0x10, 0x11, dst, src); 1277 } 1278 1279 void sse2_movupd( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) 1280 { 1281 DUMP_RR(dst, src); 1282 emit_2ub(p, 0x66, 0x0f); 1283 emit_op_modrm(p, 0x10, 0x11, dst, src); 1284 } 1285 1286 void sse2_movapd( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) 1287 { 1288 DUMP_RR(dst, src); 1289 emit_2ub(p, 0x66, 0x0f); 1290 emit_op_modrm(p, 0x28, 0x29, dst, src); 1291 } 1292 1293 /** 1294 * Perform a reduced swizzle: 1295 */ 1296 void sse2_pshufd( struct x86_function *p, 1297 struct x86_reg dst, 1298 struct x86_reg src, 1299 unsigned char shuf) 1300 { 1301 DUMP_RRI( dst, src, shuf ); 1302 emit_3ub(p, 0x66, X86_TWOB, 0x70); 1303 emit_modrm(p, dst, src); 1304 emit_1ub(p, shuf); 1305 } 1306 1307 void sse2_pshuflw( struct x86_function *p, 1308 struct x86_reg dst, 1309 struct x86_reg src, 1310 unsigned char shuf) 1311 { 1312 DUMP_RRI( dst, src, shuf ); 1313 emit_3ub(p, 0xf2, X86_TWOB, 0x70); 1314 emit_modrm(p, dst, src); 1315 emit_1ub(p, shuf); 1316 } 1317 1318 void sse2_pshufhw( struct x86_function *p, 1319 struct x86_reg dst, 1320 struct x86_reg src, 1321 unsigned char shuf) 1322 { 1323 DUMP_RRI( dst, src, shuf ); 1324 emit_3ub(p, 0xf3, X86_TWOB, 0x70); 1325 emit_modrm(p, dst, src); 1326 emit_1ub(p, shuf); 1327 } 1328 1329 void sse2_cvttps2dq( struct x86_function *p, 1330 struct x86_reg dst, 1331 struct x86_reg src ) 1332 { 1333 DUMP_RR( dst, src ); 1334 emit_3ub( p, 0xF3, X86_TWOB, 0x5B ); 1335 emit_modrm( p, dst, src ); 1336 } 1337 1338 void sse2_cvtps2dq( struct x86_function *p, 1339 struct x86_reg dst, 1340 struct x86_reg src ) 1341 { 1342 DUMP_RR( dst, src ); 1343 emit_3ub(p, 0x66, X86_TWOB, 0x5B); 1344 emit_modrm( p, dst, src ); 1345 } 1346 1347 void sse2_cvtsd2ss( struct x86_function *p, 1348 struct x86_reg dst, 1349 struct x86_reg src ) 1350 { 1351 DUMP_RR( dst, src ); 1352 emit_3ub(p, 0xf2, 0x0f, 0x5a); 1353 emit_modrm( p, dst, src ); 1354 } 1355 1356 void sse2_cvtpd2ps( struct x86_function *p, 1357 struct x86_reg dst, 1358 struct x86_reg src ) 1359 { 1360 DUMP_RR( dst, src ); 1361 emit_3ub(p, 0x66, 0x0f, 0x5a); 1362 emit_modrm( p, dst, src ); 1363 } 1364 1365 void sse2_packssdw( struct x86_function *p, 1366 struct x86_reg dst, 1367 struct x86_reg src ) 1368 { 1369 DUMP_RR( dst, src ); 1370 emit_3ub(p, 0x66, X86_TWOB, 0x6B); 1371 emit_modrm( p, dst, src ); 1372 } 1373 1374 void sse2_packsswb( struct x86_function *p, 1375 struct x86_reg dst, 1376 struct x86_reg src ) 1377 { 1378 DUMP_RR( dst, src ); 1379 emit_3ub(p, 0x66, X86_TWOB, 0x63); 1380 emit_modrm( p, dst, src ); 1381 } 1382 1383 void sse2_packuswb( struct x86_function *p, 1384 struct x86_reg dst, 1385 struct x86_reg src ) 1386 { 1387 DUMP_RR( dst, src ); 1388 emit_3ub(p, 0x66, X86_TWOB, 0x67); 1389 emit_modrm( p, dst, src ); 1390 } 1391 1392 void sse2_punpcklbw( struct x86_function *p, 1393 struct x86_reg dst, 1394 struct x86_reg src ) 1395 { 1396 DUMP_RR( dst, src ); 1397 emit_3ub(p, 0x66, X86_TWOB, 0x60); 1398 emit_modrm( p, dst, src ); 1399 } 1400 1401 void sse2_punpcklwd( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) 1402 { 1403 DUMP_RR( dst, src ); 1404 emit_3ub(p, 0x66, 0x0f, 0x61); 1405 emit_modrm( p, dst, src ); 1406 } 1407 1408 void sse2_punpckldq( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) 1409 { 1410 DUMP_RR( dst, src ); 1411 emit_3ub(p, 0x66, 0x0f, 0x62); 1412 emit_modrm( p, dst, src ); 1413 } 1414 1415 void sse2_punpcklqdq( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) 1416 { 1417 DUMP_RR( dst, src ); 1418 emit_3ub(p, 0x66, 0x0f, 0x6c); 1419 emit_modrm( p, dst, src ); 1420 } 1421 1422 void sse2_psllw_imm( struct x86_function *p, struct x86_reg dst, unsigned imm ) 1423 { 1424 DUMP_RI(dst, imm); 1425 emit_3ub(p, 0x66, 0x0f, 0x71); 1426 emit_modrm_noreg(p, 6, dst); 1427 emit_1ub(p, imm); 1428 } 1429 1430 void sse2_pslld_imm( struct x86_function *p, struct x86_reg dst, unsigned imm ) 1431 { 1432 DUMP_RI(dst, imm); 1433 emit_3ub(p, 0x66, 0x0f, 0x72); 1434 emit_modrm_noreg(p, 6, dst); 1435 emit_1ub(p, imm); 1436 } 1437 1438 void sse2_psllq_imm( struct x86_function *p, struct x86_reg dst, unsigned imm ) 1439 { 1440 DUMP_RI(dst, imm); 1441 emit_3ub(p, 0x66, 0x0f, 0x73); 1442 emit_modrm_noreg(p, 6, dst); 1443 emit_1ub(p, imm); 1444 } 1445 1446 void sse2_psrlw_imm( struct x86_function *p, struct x86_reg dst, unsigned imm ) 1447 { 1448 DUMP_RI(dst, imm); 1449 emit_3ub(p, 0x66, 0x0f, 0x71); 1450 emit_modrm_noreg(p, 2, dst); 1451 emit_1ub(p, imm); 1452 } 1453 1454 void sse2_psrld_imm( struct x86_function *p, struct x86_reg dst, unsigned imm ) 1455 { 1456 DUMP_RI(dst, imm); 1457 emit_3ub(p, 0x66, 0x0f, 0x72); 1458 emit_modrm_noreg(p, 2, dst); 1459 emit_1ub(p, imm); 1460 } 1461 1462 void sse2_psrlq_imm( struct x86_function *p, struct x86_reg dst, unsigned imm ) 1463 { 1464 DUMP_RI(dst, imm); 1465 emit_3ub(p, 0x66, 0x0f, 0x73); 1466 emit_modrm_noreg(p, 2, dst); 1467 emit_1ub(p, imm); 1468 } 1469 1470 void sse2_psraw_imm( struct x86_function *p, struct x86_reg dst, unsigned imm ) 1471 { 1472 DUMP_RI(dst, imm); 1473 emit_3ub(p, 0x66, 0x0f, 0x71); 1474 emit_modrm_noreg(p, 4, dst); 1475 emit_1ub(p, imm); 1476 } 1477 1478 void sse2_psrad_imm( struct x86_function *p, struct x86_reg dst, unsigned imm ) 1479 { 1480 DUMP_RI(dst, imm); 1481 emit_3ub(p, 0x66, 0x0f, 0x72); 1482 emit_modrm_noreg(p, 4, dst); 1483 emit_1ub(p, imm); 1484 } 1485 1486 void sse2_por( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) 1487 { 1488 DUMP_RR(dst, src); 1489 emit_3ub(p, 0x66, 0x0f, 0xeb); 1490 emit_modrm(p, dst, src); 1491 } 1492 1493 void sse2_rcpps( struct x86_function *p, 1494 struct x86_reg dst, 1495 struct x86_reg src ) 1496 { 1497 DUMP_RR( dst, src ); 1498 emit_2ub(p, X86_TWOB, 0x53); 1499 emit_modrm( p, dst, src ); 1500 } 1501 1502 void sse2_rcpss( struct x86_function *p, 1503 struct x86_reg dst, 1504 struct x86_reg src ) 1505 { 1506 DUMP_RR( dst, src ); 1507 emit_3ub(p, 0xF3, X86_TWOB, 0x53); 1508 emit_modrm( p, dst, src ); 1509 } 1510 1511 /*********************************************************************** 1512 * x87 instructions 1513 */ 1514 static void note_x87_pop( struct x86_function *p ) 1515 { 1516 p->x87_stack--; 1517 assert(p->x87_stack >= 0); 1518 } 1519 1520 static void note_x87_push( struct x86_function *p ) 1521 { 1522 p->x87_stack++; 1523 assert(p->x87_stack <= 7); 1524 } 1525 1526 void x87_assert_stack_empty( struct x86_function *p ) 1527 { 1528 assert (p->x87_stack == 0); 1529 } 1530 1531 1532 void x87_fist( struct x86_function *p, struct x86_reg dst ) 1533 { 1534 DUMP_R( dst ); 1535 emit_1ub(p, 0xdb); 1536 emit_modrm_noreg(p, 2, dst); 1537 } 1538 1539 void x87_fistp( struct x86_function *p, struct x86_reg dst ) 1540 { 1541 DUMP_R( dst ); 1542 emit_1ub(p, 0xdb); 1543 emit_modrm_noreg(p, 3, dst); 1544 note_x87_pop(p); 1545 } 1546 1547 void x87_fild( struct x86_function *p, struct x86_reg arg ) 1548 { 1549 DUMP_R( arg ); 1550 emit_1ub(p, 0xdf); 1551 emit_modrm_noreg(p, 0, arg); 1552 note_x87_push(p); 1553 } 1554 1555 void x87_fldz( struct x86_function *p ) 1556 { 1557 DUMP(); 1558 emit_2ub(p, 0xd9, 0xee); 1559 note_x87_push(p); 1560 } 1561 1562 1563 void x87_fldcw( struct x86_function *p, struct x86_reg arg ) 1564 { 1565 DUMP_R( arg ); 1566 assert(arg.file == file_REG32); 1567 assert(arg.mod != mod_REG); 1568 emit_1ub(p, 0xd9); 1569 emit_modrm_noreg(p, 5, arg); 1570 } 1571 1572 void x87_fld1( struct x86_function *p ) 1573 { 1574 DUMP(); 1575 emit_2ub(p, 0xd9, 0xe8); 1576 note_x87_push(p); 1577 } 1578 1579 void x87_fldl2e( struct x86_function *p ) 1580 { 1581 DUMP(); 1582 emit_2ub(p, 0xd9, 0xea); 1583 note_x87_push(p); 1584 } 1585 1586 void x87_fldln2( struct x86_function *p ) 1587 { 1588 DUMP(); 1589 emit_2ub(p, 0xd9, 0xed); 1590 note_x87_push(p); 1591 } 1592 1593 void x87_fwait( struct x86_function *p ) 1594 { 1595 DUMP(); 1596 emit_1ub(p, 0x9b); 1597 } 1598 1599 void x87_fnclex( struct x86_function *p ) 1600 { 1601 DUMP(); 1602 emit_2ub(p, 0xdb, 0xe2); 1603 } 1604 1605 void x87_fclex( struct x86_function *p ) 1606 { 1607 x87_fwait(p); 1608 x87_fnclex(p); 1609 } 1610 1611 void x87_fcmovb( struct x86_function *p, struct x86_reg arg ) 1612 { 1613 DUMP_R( arg ); 1614 assert(arg.file == file_x87); 1615 emit_2ub(p, 0xda, 0xc0+arg.idx); 1616 } 1617 1618 void x87_fcmove( struct x86_function *p, struct x86_reg arg ) 1619 { 1620 DUMP_R( arg ); 1621 assert(arg.file == file_x87); 1622 emit_2ub(p, 0xda, 0xc8+arg.idx); 1623 } 1624 1625 void x87_fcmovbe( struct x86_function *p, struct x86_reg arg ) 1626 { 1627 DUMP_R( arg ); 1628 assert(arg.file == file_x87); 1629 emit_2ub(p, 0xda, 0xd0+arg.idx); 1630 } 1631 1632 void x87_fcmovnb( struct x86_function *p, struct x86_reg arg ) 1633 { 1634 DUMP_R( arg ); 1635 assert(arg.file == file_x87); 1636 emit_2ub(p, 0xdb, 0xc0+arg.idx); 1637 } 1638 1639 void x87_fcmovne( struct x86_function *p, struct x86_reg arg ) 1640 { 1641 DUMP_R( arg ); 1642 assert(arg.file == file_x87); 1643 emit_2ub(p, 0xdb, 0xc8+arg.idx); 1644 } 1645 1646 void x87_fcmovnbe( struct x86_function *p, struct x86_reg arg ) 1647 { 1648 DUMP_R( arg ); 1649 assert(arg.file == file_x87); 1650 emit_2ub(p, 0xdb, 0xd0+arg.idx); 1651 } 1652 1653 1654 1655 static void x87_arith_op( struct x86_function *p, struct x86_reg dst, struct x86_reg arg, 1656 unsigned char dst0ub0, 1657 unsigned char dst0ub1, 1658 unsigned char arg0ub0, 1659 unsigned char arg0ub1, 1660 unsigned char argmem_noreg) 1661 { 1662 assert(dst.file == file_x87); 1663 1664 if (arg.file == file_x87) { 1665 if (dst.idx == 0) 1666 emit_2ub(p, dst0ub0, dst0ub1+arg.idx); 1667 else if (arg.idx == 0) 1668 emit_2ub(p, arg0ub0, arg0ub1+arg.idx); 1669 else 1670 assert(0); 1671 } 1672 else if (dst.idx == 0) { 1673 assert(arg.file == file_REG32); 1674 emit_1ub(p, 0xd8); 1675 emit_modrm_noreg(p, argmem_noreg, arg); 1676 } 1677 else 1678 assert(0); 1679 } 1680 1681 void x87_fmul( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) 1682 { 1683 DUMP_RR( dst, src ); 1684 x87_arith_op(p, dst, src, 1685 0xd8, 0xc8, 1686 0xdc, 0xc8, 1687 4); 1688 } 1689 1690 void x87_fsub( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) 1691 { 1692 DUMP_RR( dst, src ); 1693 x87_arith_op(p, dst, src, 1694 0xd8, 0xe0, 1695 0xdc, 0xe8, 1696 4); 1697 } 1698 1699 void x87_fsubr( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) 1700 { 1701 DUMP_RR( dst, src ); 1702 x87_arith_op(p, dst, src, 1703 0xd8, 0xe8, 1704 0xdc, 0xe0, 1705 5); 1706 } 1707 1708 void x87_fadd( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) 1709 { 1710 DUMP_RR( dst, src ); 1711 x87_arith_op(p, dst, src, 1712 0xd8, 0xc0, 1713 0xdc, 0xc0, 1714 0); 1715 } 1716 1717 void x87_fdiv( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) 1718 { 1719 DUMP_RR( dst, src ); 1720 x87_arith_op(p, dst, src, 1721 0xd8, 0xf0, 1722 0xdc, 0xf8, 1723 6); 1724 } 1725 1726 void x87_fdivr( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) 1727 { 1728 DUMP_RR( dst, src ); 1729 x87_arith_op(p, dst, src, 1730 0xd8, 0xf8, 1731 0xdc, 0xf0, 1732 7); 1733 } 1734 1735 void x87_fmulp( struct x86_function *p, struct x86_reg dst ) 1736 { 1737 DUMP_R( dst ); 1738 assert(dst.file == file_x87); 1739 assert(dst.idx >= 1); 1740 emit_2ub(p, 0xde, 0xc8+dst.idx); 1741 note_x87_pop(p); 1742 } 1743 1744 void x87_fsubp( struct x86_function *p, struct x86_reg dst ) 1745 { 1746 DUMP_R( dst ); 1747 assert(dst.file == file_x87); 1748 assert(dst.idx >= 1); 1749 emit_2ub(p, 0xde, 0xe8+dst.idx); 1750 note_x87_pop(p); 1751 } 1752 1753 void x87_fsubrp( struct x86_function *p, struct x86_reg dst ) 1754 { 1755 DUMP_R( dst ); 1756 assert(dst.file == file_x87); 1757 assert(dst.idx >= 1); 1758 emit_2ub(p, 0xde, 0xe0+dst.idx); 1759 note_x87_pop(p); 1760 } 1761 1762 void x87_faddp( struct x86_function *p, struct x86_reg dst ) 1763 { 1764 DUMP_R( dst ); 1765 assert(dst.file == file_x87); 1766 assert(dst.idx >= 1); 1767 emit_2ub(p, 0xde, 0xc0+dst.idx); 1768 note_x87_pop(p); 1769 } 1770 1771 void x87_fdivp( struct x86_function *p, struct x86_reg dst ) 1772 { 1773 DUMP_R( dst ); 1774 assert(dst.file == file_x87); 1775 assert(dst.idx >= 1); 1776 emit_2ub(p, 0xde, 0xf8+dst.idx); 1777 note_x87_pop(p); 1778 } 1779 1780 void x87_fdivrp( struct x86_function *p, struct x86_reg dst ) 1781 { 1782 DUMP_R( dst ); 1783 assert(dst.file == file_x87); 1784 assert(dst.idx >= 1); 1785 emit_2ub(p, 0xde, 0xf0+dst.idx); 1786 note_x87_pop(p); 1787 } 1788 1789 void x87_ftst( struct x86_function *p ) 1790 { 1791 DUMP(); 1792 emit_2ub(p, 0xd9, 0xe4); 1793 } 1794 1795 void x87_fucom( struct x86_function *p, struct x86_reg arg ) 1796 { 1797 DUMP_R( arg ); 1798 assert(arg.file == file_x87); 1799 emit_2ub(p, 0xdd, 0xe0+arg.idx); 1800 } 1801 1802 void x87_fucomp( struct x86_function *p, struct x86_reg arg ) 1803 { 1804 DUMP_R( arg ); 1805 assert(arg.file == file_x87); 1806 emit_2ub(p, 0xdd, 0xe8+arg.idx); 1807 note_x87_pop(p); 1808 } 1809 1810 void x87_fucompp( struct x86_function *p ) 1811 { 1812 DUMP(); 1813 emit_2ub(p, 0xda, 0xe9); 1814 note_x87_pop(p); /* pop twice */ 1815 note_x87_pop(p); /* pop twice */ 1816 } 1817 1818 void x87_fxch( struct x86_function *p, struct x86_reg arg ) 1819 { 1820 DUMP_R( arg ); 1821 assert(arg.file == file_x87); 1822 emit_2ub(p, 0xd9, 0xc8+arg.idx); 1823 } 1824 1825 void x87_fabs( struct x86_function *p ) 1826 { 1827 DUMP(); 1828 emit_2ub(p, 0xd9, 0xe1); 1829 } 1830 1831 void x87_fchs( struct x86_function *p ) 1832 { 1833 DUMP(); 1834 emit_2ub(p, 0xd9, 0xe0); 1835 } 1836 1837 void x87_fcos( struct x86_function *p ) 1838 { 1839 DUMP(); 1840 emit_2ub(p, 0xd9, 0xff); 1841 } 1842 1843 1844 void x87_fprndint( struct x86_function *p ) 1845 { 1846 DUMP(); 1847 emit_2ub(p, 0xd9, 0xfc); 1848 } 1849 1850 void x87_fscale( struct x86_function *p ) 1851 { 1852 DUMP(); 1853 emit_2ub(p, 0xd9, 0xfd); 1854 } 1855 1856 void x87_fsin( struct x86_function *p ) 1857 { 1858 DUMP(); 1859 emit_2ub(p, 0xd9, 0xfe); 1860 } 1861 1862 void x87_fsincos( struct x86_function *p ) 1863 { 1864 DUMP(); 1865 emit_2ub(p, 0xd9, 0xfb); 1866 } 1867 1868 void x87_fsqrt( struct x86_function *p ) 1869 { 1870 DUMP(); 1871 emit_2ub(p, 0xd9, 0xfa); 1872 } 1873 1874 void x87_fxtract( struct x86_function *p ) 1875 { 1876 DUMP(); 1877 emit_2ub(p, 0xd9, 0xf4); 1878 } 1879 1880 /* st0 = (2^st0)-1 1881 * 1882 * Restrictions: -1.0 <= st0 <= 1.0 1883 */ 1884 void x87_f2xm1( struct x86_function *p ) 1885 { 1886 DUMP(); 1887 emit_2ub(p, 0xd9, 0xf0); 1888 } 1889 1890 /* st1 = st1 * log2(st0); 1891 * pop_stack; 1892 */ 1893 void x87_fyl2x( struct x86_function *p ) 1894 { 1895 DUMP(); 1896 emit_2ub(p, 0xd9, 0xf1); 1897 note_x87_pop(p); 1898 } 1899 1900 /* st1 = st1 * log2(st0 + 1.0); 1901 * pop_stack; 1902 * 1903 * A fast operation, with restrictions: -.29 < st0 < .29 1904 */ 1905 void x87_fyl2xp1( struct x86_function *p ) 1906 { 1907 DUMP(); 1908 emit_2ub(p, 0xd9, 0xf9); 1909 note_x87_pop(p); 1910 } 1911 1912 1913 void x87_fld( struct x86_function *p, struct x86_reg arg ) 1914 { 1915 DUMP_R( arg ); 1916 if (arg.file == file_x87) 1917 emit_2ub(p, 0xd9, 0xc0 + arg.idx); 1918 else { 1919 emit_1ub(p, 0xd9); 1920 emit_modrm_noreg(p, 0, arg); 1921 } 1922 note_x87_push(p); 1923 } 1924 1925 void x87_fst( struct x86_function *p, struct x86_reg dst ) 1926 { 1927 DUMP_R( dst ); 1928 if (dst.file == file_x87) 1929 emit_2ub(p, 0xdd, 0xd0 + dst.idx); 1930 else { 1931 emit_1ub(p, 0xd9); 1932 emit_modrm_noreg(p, 2, dst); 1933 } 1934 } 1935 1936 void x87_fstp( struct x86_function *p, struct x86_reg dst ) 1937 { 1938 DUMP_R( dst ); 1939 if (dst.file == file_x87) 1940 emit_2ub(p, 0xdd, 0xd8 + dst.idx); 1941 else { 1942 emit_1ub(p, 0xd9); 1943 emit_modrm_noreg(p, 3, dst); 1944 } 1945 note_x87_pop(p); 1946 } 1947 1948 void x87_fpop( struct x86_function *p ) 1949 { 1950 x87_fstp( p, x86_make_reg( file_x87, 0 )); 1951 } 1952 1953 1954 void x87_fcom( struct x86_function *p, struct x86_reg dst ) 1955 { 1956 DUMP_R( dst ); 1957 if (dst.file == file_x87) 1958 emit_2ub(p, 0xd8, 0xd0 + dst.idx); 1959 else { 1960 emit_1ub(p, 0xd8); 1961 emit_modrm_noreg(p, 2, dst); 1962 } 1963 } 1964 1965 1966 void x87_fcomp( struct x86_function *p, struct x86_reg dst ) 1967 { 1968 DUMP_R( dst ); 1969 if (dst.file == file_x87) 1970 emit_2ub(p, 0xd8, 0xd8 + dst.idx); 1971 else { 1972 emit_1ub(p, 0xd8); 1973 emit_modrm_noreg(p, 3, dst); 1974 } 1975 note_x87_pop(p); 1976 } 1977 1978 void x87_fcomi( struct x86_function *p, struct x86_reg arg ) 1979 { 1980 DUMP_R( arg ); 1981 emit_2ub(p, 0xdb, 0xf0+arg.idx); 1982 } 1983 1984 void x87_fcomip( struct x86_function *p, struct x86_reg arg ) 1985 { 1986 DUMP_R( arg ); 1987 emit_2ub(p, 0xdb, 0xf0+arg.idx); 1988 note_x87_pop(p); 1989 } 1990 1991 1992 void x87_fnstsw( struct x86_function *p, struct x86_reg dst ) 1993 { 1994 DUMP_R( dst ); 1995 assert(dst.file == file_REG32); 1996 1997 if (dst.idx == reg_AX && 1998 dst.mod == mod_REG) 1999 emit_2ub(p, 0xdf, 0xe0); 2000 else { 2001 emit_1ub(p, 0xdd); 2002 emit_modrm_noreg(p, 7, dst); 2003 } 2004 } 2005 2006 2007 void x87_fnstcw( struct x86_function *p, struct x86_reg dst ) 2008 { 2009 DUMP_R( dst ); 2010 assert(dst.file == file_REG32); 2011 2012 emit_1ub(p, 0x9b); /* WAIT -- needed? */ 2013 emit_1ub(p, 0xd9); 2014 emit_modrm_noreg(p, 7, dst); 2015 } 2016 2017 2018 2019 2020 /*********************************************************************** 2021 * MMX instructions 2022 */ 2023 2024 void mmx_emms( struct x86_function *p ) 2025 { 2026 DUMP(); 2027 assert(p->need_emms); 2028 emit_2ub(p, 0x0f, 0x77); 2029 p->need_emms = 0; 2030 } 2031 2032 void mmx_packssdw( struct x86_function *p, 2033 struct x86_reg dst, 2034 struct x86_reg src ) 2035 { 2036 DUMP_RR( dst, src ); 2037 assert(dst.file == file_MMX && 2038 (src.file == file_MMX || src.mod != mod_REG)); 2039 2040 p->need_emms = 1; 2041 2042 emit_2ub(p, X86_TWOB, 0x6b); 2043 emit_modrm( p, dst, src ); 2044 } 2045 2046 void mmx_packuswb( struct x86_function *p, 2047 struct x86_reg dst, 2048 struct x86_reg src ) 2049 { 2050 DUMP_RR( dst, src ); 2051 assert(dst.file == file_MMX && 2052 (src.file == file_MMX || src.mod != mod_REG)); 2053 2054 p->need_emms = 1; 2055 2056 emit_2ub(p, X86_TWOB, 0x67); 2057 emit_modrm( p, dst, src ); 2058 } 2059 2060 void mmx_movd( struct x86_function *p, 2061 struct x86_reg dst, 2062 struct x86_reg src ) 2063 { 2064 DUMP_RR( dst, src ); 2065 p->need_emms = 1; 2066 emit_1ub(p, X86_TWOB); 2067 emit_op_modrm( p, 0x6e, 0x7e, dst, src ); 2068 } 2069 2070 void mmx_movq( struct x86_function *p, 2071 struct x86_reg dst, 2072 struct x86_reg src ) 2073 { 2074 DUMP_RR( dst, src ); 2075 p->need_emms = 1; 2076 emit_1ub(p, X86_TWOB); 2077 emit_op_modrm( p, 0x6f, 0x7f, dst, src ); 2078 } 2079 2080 2081 /*********************************************************************** 2082 * Helper functions 2083 */ 2084 2085 2086 void x86_cdecl_caller_push_regs( struct x86_function *p ) 2087 { 2088 x86_push(p, x86_make_reg(file_REG32, reg_AX)); 2089 x86_push(p, x86_make_reg(file_REG32, reg_CX)); 2090 x86_push(p, x86_make_reg(file_REG32, reg_DX)); 2091 } 2092 2093 void x86_cdecl_caller_pop_regs( struct x86_function *p ) 2094 { 2095 x86_pop(p, x86_make_reg(file_REG32, reg_DX)); 2096 x86_pop(p, x86_make_reg(file_REG32, reg_CX)); 2097 x86_pop(p, x86_make_reg(file_REG32, reg_AX)); 2098 } 2099 2100 2101 struct x86_reg x86_fn_arg( struct x86_function *p, 2102 unsigned arg ) 2103 { 2104 switch(x86_target(p)) 2105 { 2106 case X86_64_WIN64_ABI: 2107 /* Microsoft uses a different calling convention than the rest of the world */ 2108 switch(arg) 2109 { 2110 case 1: 2111 return x86_make_reg(file_REG32, reg_CX); 2112 case 2: 2113 return x86_make_reg(file_REG32, reg_DX); 2114 case 3: 2115 return x86_make_reg(file_REG32, reg_R8); 2116 case 4: 2117 return x86_make_reg(file_REG32, reg_R9); 2118 default: 2119 /* Win64 allocates stack slots as if it pushed the first 4 arguments too */ 2120 return x86_make_disp(x86_make_reg(file_REG32, reg_SP), 2121 p->stack_offset + arg * 8); 2122 } 2123 case X86_64_STD_ABI: 2124 switch(arg) 2125 { 2126 case 1: 2127 return x86_make_reg(file_REG32, reg_DI); 2128 case 2: 2129 return x86_make_reg(file_REG32, reg_SI); 2130 case 3: 2131 return x86_make_reg(file_REG32, reg_DX); 2132 case 4: 2133 return x86_make_reg(file_REG32, reg_CX); 2134 case 5: 2135 return x86_make_reg(file_REG32, reg_R8); 2136 case 6: 2137 return x86_make_reg(file_REG32, reg_R9); 2138 default: 2139 return x86_make_disp(x86_make_reg(file_REG32, reg_SP), 2140 p->stack_offset + (arg - 6) * 8); /* ??? */ 2141 } 2142 case X86_32: 2143 return x86_make_disp(x86_make_reg(file_REG32, reg_SP), 2144 p->stack_offset + arg * 4); /* ??? */ 2145 default: 2146 assert(0 && "Unexpected x86 target ABI in x86_fn_arg"); 2147 return x86_make_reg(file_REG32, reg_CX); /* not used / silence warning */ 2148 } 2149 } 2150 2151 static void x86_init_func_common( struct x86_function *p ) 2152 { 2153 util_cpu_detect(); 2154 p->caps = 0; 2155 if(util_cpu_caps.has_mmx) 2156 p->caps |= X86_MMX; 2157 if(util_cpu_caps.has_mmx2) 2158 p->caps |= X86_MMX2; 2159 if(util_cpu_caps.has_sse) 2160 p->caps |= X86_SSE; 2161 if(util_cpu_caps.has_sse2) 2162 p->caps |= X86_SSE2; 2163 if(util_cpu_caps.has_sse3) 2164 p->caps |= X86_SSE3; 2165 if(util_cpu_caps.has_sse4_1) 2166 p->caps |= X86_SSE4_1; 2167 p->csr = p->store; 2168 DUMP_START(); 2169 } 2170 2171 void x86_init_func( struct x86_function *p ) 2172 { 2173 p->size = 0; 2174 p->store = NULL; 2175 x86_init_func_common(p); 2176 } 2177 2178 void x86_init_func_size( struct x86_function *p, unsigned code_size ) 2179 { 2180 p->size = code_size; 2181 p->store = rtasm_exec_malloc(code_size); 2182 if (p->store == NULL) { 2183 p->store = p->error_overflow; 2184 } 2185 x86_init_func_common(p); 2186 } 2187 2188 void x86_release_func( struct x86_function *p ) 2189 { 2190 if (p->store && p->store != p->error_overflow) 2191 rtasm_exec_free(p->store); 2192 2193 p->store = NULL; 2194 p->csr = NULL; 2195 p->size = 0; 2196 } 2197 2198 2199 static inline x86_func 2200 voidptr_to_x86_func(void *v) 2201 { 2202 union { 2203 void *v; 2204 x86_func f; 2205 } u; 2206 STATIC_ASSERT(sizeof(u.v) == sizeof(u.f)); 2207 u.v = v; 2208 return u.f; 2209 } 2210 2211 2212 x86_func x86_get_func( struct x86_function *p ) 2213 { 2214 DUMP_END(); 2215 if (DISASSEM && p->store) 2216 debug_printf("disassemble %p %p\n", p->store, p->csr); 2217 2218 if (p->store == p->error_overflow) 2219 return voidptr_to_x86_func(NULL); 2220 else 2221 return voidptr_to_x86_func(p->store); 2222 } 2223 2224 #else 2225 2226 void x86sse_dummy( void ); 2227 2228 void x86sse_dummy( void ) 2229 { 2230 } 2231 2232 #endif 2233