1 #ifdef USE_X86_ASM 2 #if defined(__i386__) || defined(__386__) 3 4 #include "main/imports.h" 5 #include "x86sse.h" 6 7 #define DISASSEM 0 8 #define X86_TWOB 0x0f 9 10 #if 0 11 static unsigned char *cptr( void (*label)() ) 12 { 13 return (unsigned char *)(unsigned long)label; 14 } 15 #endif 16 17 18 static void do_realloc( struct x86_function *p ) 19 { 20 if (p->size == 0) { 21 p->size = 1024; 22 p->store = _mesa_exec_malloc(p->size); 23 p->csr = p->store; 24 } 25 else { 26 unsigned used = p->csr - p->store; 27 unsigned char *tmp = p->store; 28 p->size *= 2; 29 p->store = _mesa_exec_malloc(p->size); 30 memcpy(p->store, tmp, used); 31 p->csr = p->store + used; 32 _mesa_exec_free(tmp); 33 } 34 } 35 36 /* Emit bytes to the instruction stream: 37 */ 38 static unsigned char *reserve( struct x86_function *p, int bytes ) 39 { 40 if (p->csr + bytes - p->store > p->size) 41 do_realloc(p); 42 43 { 44 unsigned char *csr = p->csr; 45 p->csr += bytes; 46 return csr; 47 } 48 } 49 50 51 52 static void emit_1b( struct x86_function *p, char b0 ) 53 { 54 char *csr = (char *)reserve(p, 1); 55 *csr = b0; 56 } 57 58 static void emit_1i( struct x86_function *p, int i0 ) 59 { 60 int *icsr = (int *)reserve(p, sizeof(i0)); 61 *icsr = i0; 62 } 63 64 static void emit_1ub( struct x86_function *p, unsigned char b0 ) 65 { 66 unsigned char *csr = reserve(p, 1); 67 *csr++ = b0; 68 } 69 70 static void emit_2ub( struct x86_function *p, unsigned char b0, unsigned char b1 ) 71 { 72 unsigned char *csr = reserve(p, 2); 73 *csr++ = b0; 74 *csr++ = b1; 75 } 76 77 static void emit_3ub( struct x86_function *p, unsigned char b0, unsigned char b1, unsigned char b2 ) 78 { 79 unsigned char *csr = reserve(p, 3); 80 *csr++ = b0; 81 *csr++ = b1; 82 *csr++ = b2; 83 } 84 85 86 /* Build a modRM byte + possible displacement. No treatment of SIB 87 * indexing. BZZT - no way to encode an absolute address. 88 */ 89 static void emit_modrm( struct x86_function *p, 90 struct x86_reg reg, 91 struct x86_reg regmem ) 92 { 93 unsigned char val = 0; 94 95 assert(reg.mod == mod_REG); 96 97 val |= regmem.mod << 6; /* mod field */ 98 val |= reg.idx << 3; /* reg field */ 99 val |= regmem.idx; /* r/m field */ 100 101 emit_1ub(p, val); 102 103 /* Oh-oh we've stumbled into the SIB thing. 104 */ 105 if (regmem.file == file_REG32 && 106 regmem.idx == reg_SP) { 107 emit_1ub(p, 0x24); /* simplistic! */ 108 } 109 110 switch (regmem.mod) { 111 case mod_REG: 112 case mod_INDIRECT: 113 break; 114 case mod_DISP8: 115 emit_1b(p, regmem.disp); 116 break; 117 case mod_DISP32: 118 emit_1i(p, regmem.disp); 119 break; 120 default: 121 assert(0); 122 break; 123 } 124 } 125 126 127 static void emit_modrm_noreg( struct x86_function *p, 128 unsigned op, 129 struct x86_reg regmem ) 130 { 131 struct x86_reg dummy = x86_make_reg(file_REG32, op); 132 emit_modrm(p, dummy, regmem); 133 } 134 135 /* Many x86 instructions have two opcodes to cope with the situations 136 * where the destination is a register or memory reference 137 * respectively. This function selects the correct opcode based on 138 * the arguments presented. 139 */ 140 static void emit_op_modrm( struct x86_function *p, 141 unsigned char op_dst_is_reg, 142 unsigned char op_dst_is_mem, 143 struct x86_reg dst, 144 struct x86_reg src ) 145 { 146 switch (dst.mod) { 147 case mod_REG: 148 emit_1ub(p, op_dst_is_reg); 149 emit_modrm(p, dst, src); 150 break; 151 case mod_INDIRECT: 152 case mod_DISP32: 153 case mod_DISP8: 154 assert(src.mod == mod_REG); 155 emit_1ub(p, op_dst_is_mem); 156 emit_modrm(p, src, dst); 157 break; 158 default: 159 assert(0); 160 break; 161 } 162 } 163 164 165 166 167 168 169 170 /* Create and manipulate registers and regmem values: 171 */ 172 struct x86_reg x86_make_reg( enum x86_reg_file file, 173 enum x86_reg_name idx ) 174 { 175 struct x86_reg reg; 176 177 reg.file = file; 178 reg.idx = idx; 179 reg.mod = mod_REG; 180 reg.disp = 0; 181 182 return reg; 183 } 184 185 struct x86_reg x86_make_disp( struct x86_reg reg, 186 int disp ) 187 { 188 assert(reg.file == file_REG32); 189 190 if (reg.mod == mod_REG) 191 reg.disp = disp; 192 else 193 reg.disp += disp; 194 195 if (reg.disp == 0) 196 reg.mod = mod_INDIRECT; 197 else if (reg.disp <= 127 && reg.disp >= -128) 198 reg.mod = mod_DISP8; 199 else 200 reg.mod = mod_DISP32; 201 202 return reg; 203 } 204 205 struct x86_reg x86_deref( struct x86_reg reg ) 206 { 207 return x86_make_disp(reg, 0); 208 } 209 210 struct x86_reg x86_get_base_reg( struct x86_reg reg ) 211 { 212 return x86_make_reg( reg.file, reg.idx ); 213 } 214 215 unsigned char *x86_get_label( struct x86_function *p ) 216 { 217 return p->csr; 218 } 219 220 221 222 /*********************************************************************** 223 * x86 instructions 224 */ 225 226 227 void x86_jcc( struct x86_function *p, 228 enum x86_cc cc, 229 unsigned char *label ) 230 { 231 int offset = label - (x86_get_label(p) + 2); 232 233 if (offset <= 127 && offset >= -128) { 234 emit_1ub(p, 0x70 + cc); 235 emit_1b(p, (char) offset); 236 } 237 else { 238 offset = label - (x86_get_label(p) + 6); 239 emit_2ub(p, 0x0f, 0x80 + cc); 240 emit_1i(p, offset); 241 } 242 } 243 244 /* Always use a 32bit offset for forward jumps: 245 */ 246 unsigned char *x86_jcc_forward( struct x86_function *p, 247 enum x86_cc cc ) 248 { 249 emit_2ub(p, 0x0f, 0x80 + cc); 250 emit_1i(p, 0); 251 return x86_get_label(p); 252 } 253 254 unsigned char *x86_jmp_forward( struct x86_function *p) 255 { 256 emit_1ub(p, 0xe9); 257 emit_1i(p, 0); 258 return x86_get_label(p); 259 } 260 261 unsigned char *x86_call_forward( struct x86_function *p) 262 { 263 emit_1ub(p, 0xe8); 264 emit_1i(p, 0); 265 return x86_get_label(p); 266 } 267 268 /* Fixup offset from forward jump: 269 */ 270 void x86_fixup_fwd_jump( struct x86_function *p, 271 unsigned char *fixup ) 272 { 273 *(int *)(fixup - 4) = x86_get_label(p) - fixup; 274 } 275 276 void x86_jmp( struct x86_function *p, unsigned char *label) 277 { 278 emit_1ub(p, 0xe9); 279 emit_1i(p, label - x86_get_label(p) - 4); 280 } 281 282 #if 0 283 /* This doesn't work once we start reallocating & copying the 284 * generated code on buffer fills, because the call is relative to the 285 * current pc. 286 */ 287 void x86_call( struct x86_function *p, void (*label)()) 288 { 289 emit_1ub(p, 0xe8); 290 emit_1i(p, cptr(label) - x86_get_label(p) - 4); 291 } 292 #else 293 void x86_call( struct x86_function *p, struct x86_reg reg) 294 { 295 emit_1ub(p, 0xff); 296 emit_modrm_noreg(p, 2, reg); 297 } 298 #endif 299 300 301 /* michal: 302 * Temporary. As I need immediate operands, and dont want to mess with the codegen, 303 * I load the immediate into general purpose register and use it. 304 */ 305 void x86_mov_reg_imm( struct x86_function *p, struct x86_reg dst, int imm ) 306 { 307 assert(dst.mod == mod_REG); 308 emit_1ub(p, 0xb8 + dst.idx); 309 emit_1i(p, imm); 310 } 311 312 void x86_push( struct x86_function *p, 313 struct x86_reg reg ) 314 { 315 assert(reg.mod == mod_REG); 316 emit_1ub(p, 0x50 + reg.idx); 317 p->stack_offset += 4; 318 } 319 320 void x86_pop( struct x86_function *p, 321 struct x86_reg reg ) 322 { 323 assert(reg.mod == mod_REG); 324 emit_1ub(p, 0x58 + reg.idx); 325 p->stack_offset -= 4; 326 } 327 328 void x86_inc( struct x86_function *p, 329 struct x86_reg reg ) 330 { 331 assert(reg.mod == mod_REG); 332 emit_1ub(p, 0x40 + reg.idx); 333 } 334 335 void x86_dec( struct x86_function *p, 336 struct x86_reg reg ) 337 { 338 assert(reg.mod == mod_REG); 339 emit_1ub(p, 0x48 + reg.idx); 340 } 341 342 void x86_ret( struct x86_function *p ) 343 { 344 emit_1ub(p, 0xc3); 345 } 346 347 void x86_sahf( struct x86_function *p ) 348 { 349 emit_1ub(p, 0x9e); 350 } 351 352 void x86_mov( struct x86_function *p, 353 struct x86_reg dst, 354 struct x86_reg src ) 355 { 356 emit_op_modrm( p, 0x8b, 0x89, dst, src ); 357 } 358 359 void x86_xor( struct x86_function *p, 360 struct x86_reg dst, 361 struct x86_reg src ) 362 { 363 emit_op_modrm( p, 0x33, 0x31, dst, src ); 364 } 365 366 void x86_cmp( struct x86_function *p, 367 struct x86_reg dst, 368 struct x86_reg src ) 369 { 370 emit_op_modrm( p, 0x3b, 0x39, dst, src ); 371 } 372 373 void x86_lea( struct x86_function *p, 374 struct x86_reg dst, 375 struct x86_reg src ) 376 { 377 emit_1ub(p, 0x8d); 378 emit_modrm( p, dst, src ); 379 } 380 381 void x86_test( struct x86_function *p, 382 struct x86_reg dst, 383 struct x86_reg src ) 384 { 385 emit_1ub(p, 0x85); 386 emit_modrm( p, dst, src ); 387 } 388 389 void x86_add( struct x86_function *p, 390 struct x86_reg dst, 391 struct x86_reg src ) 392 { 393 emit_op_modrm(p, 0x03, 0x01, dst, src ); 394 } 395 396 void x86_mul( struct x86_function *p, 397 struct x86_reg src ) 398 { 399 assert (src.file == file_REG32 && src.mod == mod_REG); 400 emit_op_modrm(p, 0xf7, 0, x86_make_reg (file_REG32, reg_SP), src ); 401 } 402 403 void x86_sub( struct x86_function *p, 404 struct x86_reg dst, 405 struct x86_reg src ) 406 { 407 emit_op_modrm(p, 0x2b, 0x29, dst, src ); 408 } 409 410 void x86_or( struct x86_function *p, 411 struct x86_reg dst, 412 struct x86_reg src ) 413 { 414 emit_op_modrm( p, 0x0b, 0x09, dst, src ); 415 } 416 417 void x86_and( struct x86_function *p, 418 struct x86_reg dst, 419 struct x86_reg src ) 420 { 421 emit_op_modrm( p, 0x23, 0x21, dst, src ); 422 } 423 424 425 426 /*********************************************************************** 427 * SSE instructions 428 */ 429 430 431 void sse_movss( struct x86_function *p, 432 struct x86_reg dst, 433 struct x86_reg src ) 434 { 435 emit_2ub(p, 0xF3, X86_TWOB); 436 emit_op_modrm( p, 0x10, 0x11, dst, src ); 437 } 438 439 void sse_movaps( struct x86_function *p, 440 struct x86_reg dst, 441 struct x86_reg src ) 442 { 443 emit_1ub(p, X86_TWOB); 444 emit_op_modrm( p, 0x28, 0x29, dst, src ); 445 } 446 447 void sse_movups( struct x86_function *p, 448 struct x86_reg dst, 449 struct x86_reg src ) 450 { 451 emit_1ub(p, X86_TWOB); 452 emit_op_modrm( p, 0x10, 0x11, dst, src ); 453 } 454 455 void sse_movhps( struct x86_function *p, 456 struct x86_reg dst, 457 struct x86_reg src ) 458 { 459 assert(dst.mod != mod_REG || src.mod != mod_REG); 460 emit_1ub(p, X86_TWOB); 461 emit_op_modrm( p, 0x16, 0x17, dst, src ); /* cf movlhps */ 462 } 463 464 void sse_movlps( struct x86_function *p, 465 struct x86_reg dst, 466 struct x86_reg src ) 467 { 468 assert(dst.mod != mod_REG || src.mod != mod_REG); 469 emit_1ub(p, X86_TWOB); 470 emit_op_modrm( p, 0x12, 0x13, dst, src ); /* cf movhlps */ 471 } 472 473 void sse_maxps( struct x86_function *p, 474 struct x86_reg dst, 475 struct x86_reg src ) 476 { 477 emit_2ub(p, X86_TWOB, 0x5F); 478 emit_modrm( p, dst, src ); 479 } 480 481 void sse_maxss( struct x86_function *p, 482 struct x86_reg dst, 483 struct x86_reg src ) 484 { 485 emit_3ub(p, 0xF3, X86_TWOB, 0x5F); 486 emit_modrm( p, dst, src ); 487 } 488 489 void sse_divss( struct x86_function *p, 490 struct x86_reg dst, 491 struct x86_reg src ) 492 { 493 emit_3ub(p, 0xF3, X86_TWOB, 0x5E); 494 emit_modrm( p, dst, src ); 495 } 496 497 void sse_minps( struct x86_function *p, 498 struct x86_reg dst, 499 struct x86_reg src ) 500 { 501 emit_2ub(p, X86_TWOB, 0x5D); 502 emit_modrm( p, dst, src ); 503 } 504 505 void sse_subps( struct x86_function *p, 506 struct x86_reg dst, 507 struct x86_reg src ) 508 { 509 emit_2ub(p, X86_TWOB, 0x5C); 510 emit_modrm( p, dst, src ); 511 } 512 513 void sse_mulps( struct x86_function *p, 514 struct x86_reg dst, 515 struct x86_reg src ) 516 { 517 emit_2ub(p, X86_TWOB, 0x59); 518 emit_modrm( p, dst, src ); 519 } 520 521 void sse_mulss( struct x86_function *p, 522 struct x86_reg dst, 523 struct x86_reg src ) 524 { 525 emit_3ub(p, 0xF3, X86_TWOB, 0x59); 526 emit_modrm( p, dst, src ); 527 } 528 529 void sse_addps( struct x86_function *p, 530 struct x86_reg dst, 531 struct x86_reg src ) 532 { 533 emit_2ub(p, X86_TWOB, 0x58); 534 emit_modrm( p, dst, src ); 535 } 536 537 void sse_addss( struct x86_function *p, 538 struct x86_reg dst, 539 struct x86_reg src ) 540 { 541 emit_3ub(p, 0xF3, X86_TWOB, 0x58); 542 emit_modrm( p, dst, src ); 543 } 544 545 void sse_andnps( struct x86_function *p, 546 struct x86_reg dst, 547 struct x86_reg src ) 548 { 549 emit_2ub(p, X86_TWOB, 0x55); 550 emit_modrm( p, dst, src ); 551 } 552 553 void sse_andps( struct x86_function *p, 554 struct x86_reg dst, 555 struct x86_reg src ) 556 { 557 emit_2ub(p, X86_TWOB, 0x54); 558 emit_modrm( p, dst, src ); 559 } 560 561 void sse_rsqrtps( struct x86_function *p, 562 struct x86_reg dst, 563 struct x86_reg src ) 564 { 565 emit_2ub(p, X86_TWOB, 0x52); 566 emit_modrm( p, dst, src ); 567 } 568 569 void sse_rsqrtss( struct x86_function *p, 570 struct x86_reg dst, 571 struct x86_reg src ) 572 { 573 emit_3ub(p, 0xF3, X86_TWOB, 0x52); 574 emit_modrm( p, dst, src ); 575 576 } 577 578 void sse_movhlps( struct x86_function *p, 579 struct x86_reg dst, 580 struct x86_reg src ) 581 { 582 assert(dst.mod == mod_REG && src.mod == mod_REG); 583 emit_2ub(p, X86_TWOB, 0x12); 584 emit_modrm( p, dst, src ); 585 } 586 587 void sse_movlhps( struct x86_function *p, 588 struct x86_reg dst, 589 struct x86_reg src ) 590 { 591 assert(dst.mod == mod_REG && src.mod == mod_REG); 592 emit_2ub(p, X86_TWOB, 0x16); 593 emit_modrm( p, dst, src ); 594 } 595 596 void sse_orps( struct x86_function *p, 597 struct x86_reg dst, 598 struct x86_reg src ) 599 { 600 emit_2ub(p, X86_TWOB, 0x56); 601 emit_modrm( p, dst, src ); 602 } 603 604 void sse_xorps( struct x86_function *p, 605 struct x86_reg dst, 606 struct x86_reg src ) 607 { 608 emit_2ub(p, X86_TWOB, 0x57); 609 emit_modrm( p, dst, src ); 610 } 611 612 void sse_cvtps2pi( struct x86_function *p, 613 struct x86_reg dst, 614 struct x86_reg src ) 615 { 616 assert(dst.file == file_MMX && 617 (src.file == file_XMM || src.mod != mod_REG)); 618 619 p->need_emms = 1; 620 621 emit_2ub(p, X86_TWOB, 0x2d); 622 emit_modrm( p, dst, src ); 623 } 624 625 626 /* Shufps can also be used to implement a reduced swizzle when dest == 627 * arg0. 628 */ 629 void sse_shufps( struct x86_function *p, 630 struct x86_reg dest, 631 struct x86_reg arg0, 632 unsigned char shuf) 633 { 634 emit_2ub(p, X86_TWOB, 0xC6); 635 emit_modrm(p, dest, arg0); 636 emit_1ub(p, shuf); 637 } 638 639 void sse_cmpps( struct x86_function *p, 640 struct x86_reg dest, 641 struct x86_reg arg0, 642 unsigned char cc) 643 { 644 emit_2ub(p, X86_TWOB, 0xC2); 645 emit_modrm(p, dest, arg0); 646 emit_1ub(p, cc); 647 } 648 649 void sse_pmovmskb( struct x86_function *p, 650 struct x86_reg dest, 651 struct x86_reg src) 652 { 653 emit_3ub(p, 0x66, X86_TWOB, 0xD7); 654 emit_modrm(p, dest, src); 655 } 656 657 /*********************************************************************** 658 * SSE2 instructions 659 */ 660 661 /** 662 * Perform a reduced swizzle: 663 */ 664 void sse2_pshufd( struct x86_function *p, 665 struct x86_reg dest, 666 struct x86_reg arg0, 667 unsigned char shuf) 668 { 669 emit_3ub(p, 0x66, X86_TWOB, 0x70); 670 emit_modrm(p, dest, arg0); 671 emit_1ub(p, shuf); 672 } 673 674 void sse2_cvttps2dq( struct x86_function *p, 675 struct x86_reg dst, 676 struct x86_reg src ) 677 { 678 emit_3ub( p, 0xF3, X86_TWOB, 0x5B ); 679 emit_modrm( p, dst, src ); 680 } 681 682 void sse2_cvtps2dq( struct x86_function *p, 683 struct x86_reg dst, 684 struct x86_reg src ) 685 { 686 emit_3ub(p, 0x66, X86_TWOB, 0x5B); 687 emit_modrm( p, dst, src ); 688 } 689 690 void sse2_packssdw( struct x86_function *p, 691 struct x86_reg dst, 692 struct x86_reg src ) 693 { 694 emit_3ub(p, 0x66, X86_TWOB, 0x6B); 695 emit_modrm( p, dst, src ); 696 } 697 698 void sse2_packsswb( struct x86_function *p, 699 struct x86_reg dst, 700 struct x86_reg src ) 701 { 702 emit_3ub(p, 0x66, X86_TWOB, 0x63); 703 emit_modrm( p, dst, src ); 704 } 705 706 void sse2_packuswb( struct x86_function *p, 707 struct x86_reg dst, 708 struct x86_reg src ) 709 { 710 emit_3ub(p, 0x66, X86_TWOB, 0x67); 711 emit_modrm( p, dst, src ); 712 } 713 714 void sse2_rcpps( struct x86_function *p, 715 struct x86_reg dst, 716 struct x86_reg src ) 717 { 718 emit_2ub(p, X86_TWOB, 0x53); 719 emit_modrm( p, dst, src ); 720 } 721 722 void sse2_rcpss( struct x86_function *p, 723 struct x86_reg dst, 724 struct x86_reg src ) 725 { 726 emit_3ub(p, 0xF3, X86_TWOB, 0x53); 727 emit_modrm( p, dst, src ); 728 } 729 730 void sse2_movd( struct x86_function *p, 731 struct x86_reg dst, 732 struct x86_reg src ) 733 { 734 emit_2ub(p, 0x66, X86_TWOB); 735 emit_op_modrm( p, 0x6e, 0x7e, dst, src ); 736 } 737 738 739 740 741 /*********************************************************************** 742 * x87 instructions 743 */ 744 void x87_fist( struct x86_function *p, struct x86_reg dst ) 745 { 746 emit_1ub(p, 0xdb); 747 emit_modrm_noreg(p, 2, dst); 748 } 749 750 void x87_fistp( struct x86_function *p, struct x86_reg dst ) 751 { 752 emit_1ub(p, 0xdb); 753 emit_modrm_noreg(p, 3, dst); 754 } 755 756 void x87_fild( struct x86_function *p, struct x86_reg arg ) 757 { 758 emit_1ub(p, 0xdf); 759 emit_modrm_noreg(p, 0, arg); 760 } 761 762 void x87_fldz( struct x86_function *p ) 763 { 764 emit_2ub(p, 0xd9, 0xee); 765 } 766 767 768 void x87_fldcw( struct x86_function *p, struct x86_reg arg ) 769 { 770 assert(arg.file == file_REG32); 771 assert(arg.mod != mod_REG); 772 emit_1ub(p, 0xd9); 773 emit_modrm_noreg(p, 5, arg); 774 } 775 776 void x87_fld1( struct x86_function *p ) 777 { 778 emit_2ub(p, 0xd9, 0xe8); 779 } 780 781 void x87_fldl2e( struct x86_function *p ) 782 { 783 emit_2ub(p, 0xd9, 0xea); 784 } 785 786 void x87_fldln2( struct x86_function *p ) 787 { 788 emit_2ub(p, 0xd9, 0xed); 789 } 790 791 void x87_fwait( struct x86_function *p ) 792 { 793 emit_1ub(p, 0x9b); 794 } 795 796 void x87_fnclex( struct x86_function *p ) 797 { 798 emit_2ub(p, 0xdb, 0xe2); 799 } 800 801 void x87_fclex( struct x86_function *p ) 802 { 803 x87_fwait(p); 804 x87_fnclex(p); 805 } 806 807 808 static void x87_arith_op( struct x86_function *p, struct x86_reg dst, struct x86_reg arg, 809 unsigned char dst0ub0, 810 unsigned char dst0ub1, 811 unsigned char arg0ub0, 812 unsigned char arg0ub1, 813 unsigned char argmem_noreg) 814 { 815 assert(dst.file == file_x87); 816 817 if (arg.file == file_x87) { 818 if (dst.idx == 0) 819 emit_2ub(p, dst0ub0, dst0ub1+arg.idx); 820 else if (arg.idx == 0) 821 emit_2ub(p, arg0ub0, arg0ub1+arg.idx); 822 else 823 assert(0); 824 } 825 else if (dst.idx == 0) { 826 assert(arg.file == file_REG32); 827 emit_1ub(p, 0xd8); 828 emit_modrm_noreg(p, argmem_noreg, arg); 829 } 830 else 831 assert(0); 832 } 833 834 void x87_fmul( struct x86_function *p, struct x86_reg dst, struct x86_reg arg ) 835 { 836 x87_arith_op(p, dst, arg, 837 0xd8, 0xc8, 838 0xdc, 0xc8, 839 4); 840 } 841 842 void x87_fsub( struct x86_function *p, struct x86_reg dst, struct x86_reg arg ) 843 { 844 x87_arith_op(p, dst, arg, 845 0xd8, 0xe0, 846 0xdc, 0xe8, 847 4); 848 } 849 850 void x87_fsubr( struct x86_function *p, struct x86_reg dst, struct x86_reg arg ) 851 { 852 x87_arith_op(p, dst, arg, 853 0xd8, 0xe8, 854 0xdc, 0xe0, 855 5); 856 } 857 858 void x87_fadd( struct x86_function *p, struct x86_reg dst, struct x86_reg arg ) 859 { 860 x87_arith_op(p, dst, arg, 861 0xd8, 0xc0, 862 0xdc, 0xc0, 863 0); 864 } 865 866 void x87_fdiv( struct x86_function *p, struct x86_reg dst, struct x86_reg arg ) 867 { 868 x87_arith_op(p, dst, arg, 869 0xd8, 0xf0, 870 0xdc, 0xf8, 871 6); 872 } 873 874 void x87_fdivr( struct x86_function *p, struct x86_reg dst, struct x86_reg arg ) 875 { 876 x87_arith_op(p, dst, arg, 877 0xd8, 0xf8, 878 0xdc, 0xf0, 879 7); 880 } 881 882 void x87_fmulp( struct x86_function *p, struct x86_reg dst ) 883 { 884 assert(dst.file == file_x87); 885 assert(dst.idx >= 1); 886 emit_2ub(p, 0xde, 0xc8+dst.idx); 887 } 888 889 void x87_fsubp( struct x86_function *p, struct x86_reg dst ) 890 { 891 assert(dst.file == file_x87); 892 assert(dst.idx >= 1); 893 emit_2ub(p, 0xde, 0xe8+dst.idx); 894 } 895 896 void x87_fsubrp( struct x86_function *p, struct x86_reg dst ) 897 { 898 assert(dst.file == file_x87); 899 assert(dst.idx >= 1); 900 emit_2ub(p, 0xde, 0xe0+dst.idx); 901 } 902 903 void x87_faddp( struct x86_function *p, struct x86_reg dst ) 904 { 905 assert(dst.file == file_x87); 906 assert(dst.idx >= 1); 907 emit_2ub(p, 0xde, 0xc0+dst.idx); 908 } 909 910 void x87_fdivp( struct x86_function *p, struct x86_reg dst ) 911 { 912 assert(dst.file == file_x87); 913 assert(dst.idx >= 1); 914 emit_2ub(p, 0xde, 0xf8+dst.idx); 915 } 916 917 void x87_fdivrp( struct x86_function *p, struct x86_reg dst ) 918 { 919 assert(dst.file == file_x87); 920 assert(dst.idx >= 1); 921 emit_2ub(p, 0xde, 0xf0+dst.idx); 922 } 923 924 void x87_fucom( struct x86_function *p, struct x86_reg arg ) 925 { 926 assert(arg.file == file_x87); 927 emit_2ub(p, 0xdd, 0xe0+arg.idx); 928 } 929 930 void x87_fucomp( struct x86_function *p, struct x86_reg arg ) 931 { 932 assert(arg.file == file_x87); 933 emit_2ub(p, 0xdd, 0xe8+arg.idx); 934 } 935 936 void x87_fucompp( struct x86_function *p ) 937 { 938 emit_2ub(p, 0xda, 0xe9); 939 } 940 941 void x87_fxch( struct x86_function *p, struct x86_reg arg ) 942 { 943 assert(arg.file == file_x87); 944 emit_2ub(p, 0xd9, 0xc8+arg.idx); 945 } 946 947 void x87_fabs( struct x86_function *p ) 948 { 949 emit_2ub(p, 0xd9, 0xe1); 950 } 951 952 void x87_fchs( struct x86_function *p ) 953 { 954 emit_2ub(p, 0xd9, 0xe0); 955 } 956 957 void x87_fcos( struct x86_function *p ) 958 { 959 emit_2ub(p, 0xd9, 0xff); 960 } 961 962 963 void x87_fprndint( struct x86_function *p ) 964 { 965 emit_2ub(p, 0xd9, 0xfc); 966 } 967 968 void x87_fscale( struct x86_function *p ) 969 { 970 emit_2ub(p, 0xd9, 0xfd); 971 } 972 973 void x87_fsin( struct x86_function *p ) 974 { 975 emit_2ub(p, 0xd9, 0xfe); 976 } 977 978 void x87_fsincos( struct x86_function *p ) 979 { 980 emit_2ub(p, 0xd9, 0xfb); 981 } 982 983 void x87_fsqrt( struct x86_function *p ) 984 { 985 emit_2ub(p, 0xd9, 0xfa); 986 } 987 988 void x87_fxtract( struct x86_function *p ) 989 { 990 emit_2ub(p, 0xd9, 0xf4); 991 } 992 993 /* st0 = (2^st0)-1 994 * 995 * Restrictions: -1.0 <= st0 <= 1.0 996 */ 997 void x87_f2xm1( struct x86_function *p ) 998 { 999 emit_2ub(p, 0xd9, 0xf0); 1000 } 1001 1002 /* st1 = st1 * log2(st0); 1003 * pop_stack; 1004 */ 1005 void x87_fyl2x( struct x86_function *p ) 1006 { 1007 emit_2ub(p, 0xd9, 0xf1); 1008 } 1009 1010 /* st1 = st1 * log2(st0 + 1.0); 1011 * pop_stack; 1012 * 1013 * A fast operation, with restrictions: -.29 < st0 < .29 1014 */ 1015 void x87_fyl2xp1( struct x86_function *p ) 1016 { 1017 emit_2ub(p, 0xd9, 0xf9); 1018 } 1019 1020 1021 void x87_fld( struct x86_function *p, struct x86_reg arg ) 1022 { 1023 if (arg.file == file_x87) 1024 emit_2ub(p, 0xd9, 0xc0 + arg.idx); 1025 else { 1026 emit_1ub(p, 0xd9); 1027 emit_modrm_noreg(p, 0, arg); 1028 } 1029 } 1030 1031 void x87_fst( struct x86_function *p, struct x86_reg dst ) 1032 { 1033 if (dst.file == file_x87) 1034 emit_2ub(p, 0xdd, 0xd0 + dst.idx); 1035 else { 1036 emit_1ub(p, 0xd9); 1037 emit_modrm_noreg(p, 2, dst); 1038 } 1039 } 1040 1041 void x87_fstp( struct x86_function *p, struct x86_reg dst ) 1042 { 1043 if (dst.file == file_x87) 1044 emit_2ub(p, 0xdd, 0xd8 + dst.idx); 1045 else { 1046 emit_1ub(p, 0xd9); 1047 emit_modrm_noreg(p, 3, dst); 1048 } 1049 } 1050 1051 void x87_fcom( struct x86_function *p, struct x86_reg dst ) 1052 { 1053 if (dst.file == file_x87) 1054 emit_2ub(p, 0xd8, 0xd0 + dst.idx); 1055 else { 1056 emit_1ub(p, 0xd8); 1057 emit_modrm_noreg(p, 2, dst); 1058 } 1059 } 1060 1061 void x87_fcomp( struct x86_function *p, struct x86_reg dst ) 1062 { 1063 if (dst.file == file_x87) 1064 emit_2ub(p, 0xd8, 0xd8 + dst.idx); 1065 else { 1066 emit_1ub(p, 0xd8); 1067 emit_modrm_noreg(p, 3, dst); 1068 } 1069 } 1070 1071 1072 void x87_fnstsw( struct x86_function *p, struct x86_reg dst ) 1073 { 1074 assert(dst.file == file_REG32); 1075 1076 if (dst.idx == reg_AX && 1077 dst.mod == mod_REG) 1078 emit_2ub(p, 0xdf, 0xe0); 1079 else { 1080 emit_1ub(p, 0xdd); 1081 emit_modrm_noreg(p, 7, dst); 1082 } 1083 } 1084 1085 1086 1087 1088 /*********************************************************************** 1089 * MMX instructions 1090 */ 1091 1092 void mmx_emms( struct x86_function *p ) 1093 { 1094 assert(p->need_emms); 1095 emit_2ub(p, 0x0f, 0x77); 1096 p->need_emms = 0; 1097 } 1098 1099 void mmx_packssdw( struct x86_function *p, 1100 struct x86_reg dst, 1101 struct x86_reg src ) 1102 { 1103 assert(dst.file == file_MMX && 1104 (src.file == file_MMX || src.mod != mod_REG)); 1105 1106 p->need_emms = 1; 1107 1108 emit_2ub(p, X86_TWOB, 0x6b); 1109 emit_modrm( p, dst, src ); 1110 } 1111 1112 void mmx_packuswb( struct x86_function *p, 1113 struct x86_reg dst, 1114 struct x86_reg src ) 1115 { 1116 assert(dst.file == file_MMX && 1117 (src.file == file_MMX || src.mod != mod_REG)); 1118 1119 p->need_emms = 1; 1120 1121 emit_2ub(p, X86_TWOB, 0x67); 1122 emit_modrm( p, dst, src ); 1123 } 1124 1125 void mmx_movd( struct x86_function *p, 1126 struct x86_reg dst, 1127 struct x86_reg src ) 1128 { 1129 p->need_emms = 1; 1130 emit_1ub(p, X86_TWOB); 1131 emit_op_modrm( p, 0x6e, 0x7e, dst, src ); 1132 } 1133 1134 void mmx_movq( struct x86_function *p, 1135 struct x86_reg dst, 1136 struct x86_reg src ) 1137 { 1138 p->need_emms = 1; 1139 emit_1ub(p, X86_TWOB); 1140 emit_op_modrm( p, 0x6f, 0x7f, dst, src ); 1141 } 1142 1143 1144 /*********************************************************************** 1145 * Helper functions 1146 */ 1147 1148 1149 /* Retreive a reference to one of the function arguments, taking into 1150 * account any push/pop activity: 1151 */ 1152 struct x86_reg x86_fn_arg( struct x86_function *p, 1153 unsigned arg ) 1154 { 1155 return x86_make_disp(x86_make_reg(file_REG32, reg_SP), 1156 p->stack_offset + arg * 4); /* ??? */ 1157 } 1158 1159 1160 void x86_init_func( struct x86_function *p ) 1161 { 1162 p->size = 0; 1163 p->store = NULL; 1164 p->csr = p->store; 1165 } 1166 1167 int x86_init_func_size( struct x86_function *p, unsigned code_size ) 1168 { 1169 p->size = code_size; 1170 p->store = _mesa_exec_malloc(code_size); 1171 p->csr = p->store; 1172 return p->store != NULL; 1173 } 1174 1175 void x86_release_func( struct x86_function *p ) 1176 { 1177 _mesa_exec_free(p->store); 1178 p->store = NULL; 1179 p->csr = NULL; 1180 p->size = 0; 1181 } 1182 1183 1184 void (*x86_get_func( struct x86_function *p ))(void) 1185 { 1186 if (DISASSEM && p->store) 1187 printf("disassemble %p %p\n", p->store, p->csr); 1188 return (void (*)(void)) (unsigned long) p->store; 1189 } 1190 1191 #else 1192 1193 void x86sse_dummy( void ) 1194 { 1195 } 1196 1197 #endif 1198 1199 #else /* USE_X86_ASM */ 1200 1201 int x86sse_c_dummy_var; /* silence warning */ 1202 1203 #endif /* USE_X86_ASM */ 1204