1 #ifdef USE_X86_ASM 2 #if defined(__i386__) || defined(__386__) 3 4 #include <stdio.h> 5 6 #include "main/imports.h" 7 #include "x86sse.h" 8 9 #define DISASSEM 0 10 #define X86_TWOB 0x0f 11 12 #if 0 13 static unsigned char *cptr( void (*label)() ) 14 { 15 return (unsigned char *)(unsigned long)label; 16 } 17 #endif 18 19 20 static void do_realloc( struct x86_function *p ) 21 { 22 if (p->size == 0) { 23 p->size = 1024; 24 p->store = _mesa_exec_malloc(p->size); 25 p->csr = p->store; 26 } 27 else { 28 unsigned used = p->csr - p->store; 29 unsigned char *tmp = p->store; 30 p->size *= 2; 31 p->store = _mesa_exec_malloc(p->size); 32 memcpy(p->store, tmp, used); 33 p->csr = p->store + used; 34 _mesa_exec_free(tmp); 35 } 36 } 37 38 /* Emit bytes to the instruction stream: 39 */ 40 static unsigned char *reserve( struct x86_function *p, int bytes ) 41 { 42 if (p->csr + bytes - p->store > p->size) 43 do_realloc(p); 44 45 { 46 unsigned char *csr = p->csr; 47 p->csr += bytes; 48 return csr; 49 } 50 } 51 52 53 54 static void emit_1b( struct x86_function *p, char b0 ) 55 { 56 char *csr = (char *)reserve(p, 1); 57 *csr = b0; 58 } 59 60 static void emit_1i( struct x86_function *p, int i0 ) 61 { 62 int *icsr = (int *)reserve(p, sizeof(i0)); 63 *icsr = i0; 64 } 65 66 static void emit_1ub( struct x86_function *p, unsigned char b0 ) 67 { 68 unsigned char *csr = reserve(p, 1); 69 *csr++ = b0; 70 } 71 72 static void emit_2ub( struct x86_function *p, unsigned char b0, unsigned char b1 ) 73 { 74 unsigned char *csr = reserve(p, 2); 75 *csr++ = b0; 76 *csr++ = b1; 77 } 78 79 static void emit_3ub( struct x86_function *p, unsigned char b0, unsigned char b1, unsigned char b2 ) 80 { 81 unsigned char *csr = reserve(p, 3); 82 *csr++ = b0; 83 *csr++ = b1; 84 *csr++ = b2; 85 } 86 87 88 /* Build a modRM byte + possible displacement. No treatment of SIB 89 * indexing. BZZT - no way to encode an absolute address. 90 */ 91 static void emit_modrm( struct x86_function *p, 92 struct x86_reg reg, 93 struct x86_reg regmem ) 94 { 95 unsigned char val = 0; 96 97 assert(reg.mod == mod_REG); 98 99 val |= regmem.mod << 6; /* mod field */ 100 val |= reg.idx << 3; /* reg field */ 101 val |= regmem.idx; /* r/m field */ 102 103 emit_1ub(p, val); 104 105 /* Oh-oh we've stumbled into the SIB thing. 106 */ 107 if (regmem.file == file_REG32 && 108 regmem.idx == reg_SP) { 109 emit_1ub(p, 0x24); /* simplistic! */ 110 } 111 112 switch (regmem.mod) { 113 case mod_REG: 114 case mod_INDIRECT: 115 break; 116 case mod_DISP8: 117 emit_1b(p, regmem.disp); 118 break; 119 case mod_DISP32: 120 emit_1i(p, regmem.disp); 121 break; 122 default: 123 assert(0); 124 break; 125 } 126 } 127 128 129 static void emit_modrm_noreg( struct x86_function *p, 130 unsigned op, 131 struct x86_reg regmem ) 132 { 133 struct x86_reg dummy = x86_make_reg(file_REG32, op); 134 emit_modrm(p, dummy, regmem); 135 } 136 137 /* Many x86 instructions have two opcodes to cope with the situations 138 * where the destination is a register or memory reference 139 * respectively. This function selects the correct opcode based on 140 * the arguments presented. 141 */ 142 static void emit_op_modrm( struct x86_function *p, 143 unsigned char op_dst_is_reg, 144 unsigned char op_dst_is_mem, 145 struct x86_reg dst, 146 struct x86_reg src ) 147 { 148 switch (dst.mod) { 149 case mod_REG: 150 emit_1ub(p, op_dst_is_reg); 151 emit_modrm(p, dst, src); 152 break; 153 case mod_INDIRECT: 154 case mod_DISP32: 155 case mod_DISP8: 156 assert(src.mod == mod_REG); 157 emit_1ub(p, op_dst_is_mem); 158 emit_modrm(p, src, dst); 159 break; 160 default: 161 assert(0); 162 break; 163 } 164 } 165 166 167 168 169 170 171 172 /* Create and manipulate registers and regmem values: 173 */ 174 struct x86_reg x86_make_reg( enum x86_reg_file file, 175 enum x86_reg_name idx ) 176 { 177 struct x86_reg reg; 178 179 reg.file = file; 180 reg.idx = idx; 181 reg.mod = mod_REG; 182 reg.disp = 0; 183 184 return reg; 185 } 186 187 struct x86_reg x86_make_disp( struct x86_reg reg, 188 int disp ) 189 { 190 assert(reg.file == file_REG32); 191 192 if (reg.mod == mod_REG) 193 reg.disp = disp; 194 else 195 reg.disp += disp; 196 197 if (reg.disp == 0) 198 reg.mod = mod_INDIRECT; 199 else if (reg.disp <= 127 && reg.disp >= -128) 200 reg.mod = mod_DISP8; 201 else 202 reg.mod = mod_DISP32; 203 204 return reg; 205 } 206 207 struct x86_reg x86_deref( struct x86_reg reg ) 208 { 209 return x86_make_disp(reg, 0); 210 } 211 212 struct x86_reg x86_get_base_reg( struct x86_reg reg ) 213 { 214 return x86_make_reg( reg.file, reg.idx ); 215 } 216 217 unsigned char *x86_get_label( struct x86_function *p ) 218 { 219 return p->csr; 220 } 221 222 223 224 /*********************************************************************** 225 * x86 instructions 226 */ 227 228 229 void x86_jcc( struct x86_function *p, 230 enum x86_cc cc, 231 unsigned char *label ) 232 { 233 int offset = label - (x86_get_label(p) + 2); 234 235 if (offset <= 127 && offset >= -128) { 236 emit_1ub(p, 0x70 + cc); 237 emit_1b(p, (char) offset); 238 } 239 else { 240 offset = label - (x86_get_label(p) + 6); 241 emit_2ub(p, 0x0f, 0x80 + cc); 242 emit_1i(p, offset); 243 } 244 } 245 246 /* Always use a 32bit offset for forward jumps: 247 */ 248 unsigned char *x86_jcc_forward( struct x86_function *p, 249 enum x86_cc cc ) 250 { 251 emit_2ub(p, 0x0f, 0x80 + cc); 252 emit_1i(p, 0); 253 return x86_get_label(p); 254 } 255 256 unsigned char *x86_jmp_forward( struct x86_function *p) 257 { 258 emit_1ub(p, 0xe9); 259 emit_1i(p, 0); 260 return x86_get_label(p); 261 } 262 263 unsigned char *x86_call_forward( struct x86_function *p) 264 { 265 emit_1ub(p, 0xe8); 266 emit_1i(p, 0); 267 return x86_get_label(p); 268 } 269 270 /* Fixup offset from forward jump: 271 */ 272 void x86_fixup_fwd_jump( struct x86_function *p, 273 unsigned char *fixup ) 274 { 275 *(int *)(fixup - 4) = x86_get_label(p) - fixup; 276 } 277 278 void x86_jmp( struct x86_function *p, unsigned char *label) 279 { 280 emit_1ub(p, 0xe9); 281 emit_1i(p, label - x86_get_label(p) - 4); 282 } 283 284 #if 0 285 /* This doesn't work once we start reallocating & copying the 286 * generated code on buffer fills, because the call is relative to the 287 * current pc. 288 */ 289 void x86_call( struct x86_function *p, void (*label)()) 290 { 291 emit_1ub(p, 0xe8); 292 emit_1i(p, cptr(label) - x86_get_label(p) - 4); 293 } 294 #else 295 void x86_call( struct x86_function *p, struct x86_reg reg) 296 { 297 emit_1ub(p, 0xff); 298 emit_modrm_noreg(p, 2, reg); 299 } 300 #endif 301 302 303 /* michal: 304 * Temporary. As I need immediate operands, and dont want to mess with the codegen, 305 * I load the immediate into general purpose register and use it. 306 */ 307 void x86_mov_reg_imm( struct x86_function *p, struct x86_reg dst, int imm ) 308 { 309 assert(dst.mod == mod_REG); 310 emit_1ub(p, 0xb8 + dst.idx); 311 emit_1i(p, imm); 312 } 313 314 void x86_push( struct x86_function *p, 315 struct x86_reg reg ) 316 { 317 assert(reg.mod == mod_REG); 318 emit_1ub(p, 0x50 + reg.idx); 319 p->stack_offset += 4; 320 } 321 322 void x86_pop( struct x86_function *p, 323 struct x86_reg reg ) 324 { 325 assert(reg.mod == mod_REG); 326 emit_1ub(p, 0x58 + reg.idx); 327 p->stack_offset -= 4; 328 } 329 330 void x86_inc( struct x86_function *p, 331 struct x86_reg reg ) 332 { 333 assert(reg.mod == mod_REG); 334 emit_1ub(p, 0x40 + reg.idx); 335 } 336 337 void x86_dec( struct x86_function *p, 338 struct x86_reg reg ) 339 { 340 assert(reg.mod == mod_REG); 341 emit_1ub(p, 0x48 + reg.idx); 342 } 343 344 void x86_ret( struct x86_function *p ) 345 { 346 emit_1ub(p, 0xc3); 347 } 348 349 void x86_sahf( struct x86_function *p ) 350 { 351 emit_1ub(p, 0x9e); 352 } 353 354 void x86_mov( struct x86_function *p, 355 struct x86_reg dst, 356 struct x86_reg src ) 357 { 358 emit_op_modrm( p, 0x8b, 0x89, dst, src ); 359 } 360 361 void x86_xor( struct x86_function *p, 362 struct x86_reg dst, 363 struct x86_reg src ) 364 { 365 emit_op_modrm( p, 0x33, 0x31, dst, src ); 366 } 367 368 void x86_cmp( struct x86_function *p, 369 struct x86_reg dst, 370 struct x86_reg src ) 371 { 372 emit_op_modrm( p, 0x3b, 0x39, dst, src ); 373 } 374 375 void x86_lea( struct x86_function *p, 376 struct x86_reg dst, 377 struct x86_reg src ) 378 { 379 emit_1ub(p, 0x8d); 380 emit_modrm( p, dst, src ); 381 } 382 383 void x86_test( struct x86_function *p, 384 struct x86_reg dst, 385 struct x86_reg src ) 386 { 387 emit_1ub(p, 0x85); 388 emit_modrm( p, dst, src ); 389 } 390 391 void x86_add( struct x86_function *p, 392 struct x86_reg dst, 393 struct x86_reg src ) 394 { 395 emit_op_modrm(p, 0x03, 0x01, dst, src ); 396 } 397 398 void x86_mul( struct x86_function *p, 399 struct x86_reg src ) 400 { 401 assert (src.file == file_REG32 && src.mod == mod_REG); 402 emit_op_modrm(p, 0xf7, 0, x86_make_reg (file_REG32, reg_SP), src ); 403 } 404 405 void x86_sub( struct x86_function *p, 406 struct x86_reg dst, 407 struct x86_reg src ) 408 { 409 emit_op_modrm(p, 0x2b, 0x29, dst, src ); 410 } 411 412 void x86_or( struct x86_function *p, 413 struct x86_reg dst, 414 struct x86_reg src ) 415 { 416 emit_op_modrm( p, 0x0b, 0x09, dst, src ); 417 } 418 419 void x86_and( struct x86_function *p, 420 struct x86_reg dst, 421 struct x86_reg src ) 422 { 423 emit_op_modrm( p, 0x23, 0x21, dst, src ); 424 } 425 426 427 428 /*********************************************************************** 429 * SSE instructions 430 */ 431 432 433 void sse_movss( struct x86_function *p, 434 struct x86_reg dst, 435 struct x86_reg src ) 436 { 437 emit_2ub(p, 0xF3, X86_TWOB); 438 emit_op_modrm( p, 0x10, 0x11, dst, src ); 439 } 440 441 void sse_movaps( struct x86_function *p, 442 struct x86_reg dst, 443 struct x86_reg src ) 444 { 445 emit_1ub(p, X86_TWOB); 446 emit_op_modrm( p, 0x28, 0x29, dst, src ); 447 } 448 449 void sse_movups( struct x86_function *p, 450 struct x86_reg dst, 451 struct x86_reg src ) 452 { 453 emit_1ub(p, X86_TWOB); 454 emit_op_modrm( p, 0x10, 0x11, dst, src ); 455 } 456 457 void sse_movhps( struct x86_function *p, 458 struct x86_reg dst, 459 struct x86_reg src ) 460 { 461 assert(dst.mod != mod_REG || src.mod != mod_REG); 462 emit_1ub(p, X86_TWOB); 463 emit_op_modrm( p, 0x16, 0x17, dst, src ); /* cf movlhps */ 464 } 465 466 void sse_movlps( struct x86_function *p, 467 struct x86_reg dst, 468 struct x86_reg src ) 469 { 470 assert(dst.mod != mod_REG || src.mod != mod_REG); 471 emit_1ub(p, X86_TWOB); 472 emit_op_modrm( p, 0x12, 0x13, dst, src ); /* cf movhlps */ 473 } 474 475 void sse_maxps( struct x86_function *p, 476 struct x86_reg dst, 477 struct x86_reg src ) 478 { 479 emit_2ub(p, X86_TWOB, 0x5F); 480 emit_modrm( p, dst, src ); 481 } 482 483 void sse_maxss( struct x86_function *p, 484 struct x86_reg dst, 485 struct x86_reg src ) 486 { 487 emit_3ub(p, 0xF3, X86_TWOB, 0x5F); 488 emit_modrm( p, dst, src ); 489 } 490 491 void sse_divss( struct x86_function *p, 492 struct x86_reg dst, 493 struct x86_reg src ) 494 { 495 emit_3ub(p, 0xF3, X86_TWOB, 0x5E); 496 emit_modrm( p, dst, src ); 497 } 498 499 void sse_minps( struct x86_function *p, 500 struct x86_reg dst, 501 struct x86_reg src ) 502 { 503 emit_2ub(p, X86_TWOB, 0x5D); 504 emit_modrm( p, dst, src ); 505 } 506 507 void sse_subps( struct x86_function *p, 508 struct x86_reg dst, 509 struct x86_reg src ) 510 { 511 emit_2ub(p, X86_TWOB, 0x5C); 512 emit_modrm( p, dst, src ); 513 } 514 515 void sse_mulps( struct x86_function *p, 516 struct x86_reg dst, 517 struct x86_reg src ) 518 { 519 emit_2ub(p, X86_TWOB, 0x59); 520 emit_modrm( p, dst, src ); 521 } 522 523 void sse_mulss( struct x86_function *p, 524 struct x86_reg dst, 525 struct x86_reg src ) 526 { 527 emit_3ub(p, 0xF3, X86_TWOB, 0x59); 528 emit_modrm( p, dst, src ); 529 } 530 531 void sse_addps( struct x86_function *p, 532 struct x86_reg dst, 533 struct x86_reg src ) 534 { 535 emit_2ub(p, X86_TWOB, 0x58); 536 emit_modrm( p, dst, src ); 537 } 538 539 void sse_addss( struct x86_function *p, 540 struct x86_reg dst, 541 struct x86_reg src ) 542 { 543 emit_3ub(p, 0xF3, X86_TWOB, 0x58); 544 emit_modrm( p, dst, src ); 545 } 546 547 void sse_andnps( struct x86_function *p, 548 struct x86_reg dst, 549 struct x86_reg src ) 550 { 551 emit_2ub(p, X86_TWOB, 0x55); 552 emit_modrm( p, dst, src ); 553 } 554 555 void sse_andps( struct x86_function *p, 556 struct x86_reg dst, 557 struct x86_reg src ) 558 { 559 emit_2ub(p, X86_TWOB, 0x54); 560 emit_modrm( p, dst, src ); 561 } 562 563 void sse_rsqrtps( struct x86_function *p, 564 struct x86_reg dst, 565 struct x86_reg src ) 566 { 567 emit_2ub(p, X86_TWOB, 0x52); 568 emit_modrm( p, dst, src ); 569 } 570 571 void sse_rsqrtss( struct x86_function *p, 572 struct x86_reg dst, 573 struct x86_reg src ) 574 { 575 emit_3ub(p, 0xF3, X86_TWOB, 0x52); 576 emit_modrm( p, dst, src ); 577 578 } 579 580 void sse_movhlps( struct x86_function *p, 581 struct x86_reg dst, 582 struct x86_reg src ) 583 { 584 assert(dst.mod == mod_REG && src.mod == mod_REG); 585 emit_2ub(p, X86_TWOB, 0x12); 586 emit_modrm( p, dst, src ); 587 } 588 589 void sse_movlhps( struct x86_function *p, 590 struct x86_reg dst, 591 struct x86_reg src ) 592 { 593 assert(dst.mod == mod_REG && src.mod == mod_REG); 594 emit_2ub(p, X86_TWOB, 0x16); 595 emit_modrm( p, dst, src ); 596 } 597 598 void sse_orps( struct x86_function *p, 599 struct x86_reg dst, 600 struct x86_reg src ) 601 { 602 emit_2ub(p, X86_TWOB, 0x56); 603 emit_modrm( p, dst, src ); 604 } 605 606 void sse_xorps( struct x86_function *p, 607 struct x86_reg dst, 608 struct x86_reg src ) 609 { 610 emit_2ub(p, X86_TWOB, 0x57); 611 emit_modrm( p, dst, src ); 612 } 613 614 void sse_cvtps2pi( struct x86_function *p, 615 struct x86_reg dst, 616 struct x86_reg src ) 617 { 618 assert(dst.file == file_MMX && 619 (src.file == file_XMM || src.mod != mod_REG)); 620 621 p->need_emms = 1; 622 623 emit_2ub(p, X86_TWOB, 0x2d); 624 emit_modrm( p, dst, src ); 625 } 626 627 628 /* Shufps can also be used to implement a reduced swizzle when dest == 629 * arg0. 630 */ 631 void sse_shufps( struct x86_function *p, 632 struct x86_reg dest, 633 struct x86_reg arg0, 634 unsigned char shuf) 635 { 636 emit_2ub(p, X86_TWOB, 0xC6); 637 emit_modrm(p, dest, arg0); 638 emit_1ub(p, shuf); 639 } 640 641 void sse_cmpps( struct x86_function *p, 642 struct x86_reg dest, 643 struct x86_reg arg0, 644 unsigned char cc) 645 { 646 emit_2ub(p, X86_TWOB, 0xC2); 647 emit_modrm(p, dest, arg0); 648 emit_1ub(p, cc); 649 } 650 651 void sse_pmovmskb( struct x86_function *p, 652 struct x86_reg dest, 653 struct x86_reg src) 654 { 655 emit_3ub(p, 0x66, X86_TWOB, 0xD7); 656 emit_modrm(p, dest, src); 657 } 658 659 /*********************************************************************** 660 * SSE2 instructions 661 */ 662 663 /** 664 * Perform a reduced swizzle: 665 */ 666 void sse2_pshufd( struct x86_function *p, 667 struct x86_reg dest, 668 struct x86_reg arg0, 669 unsigned char shuf) 670 { 671 emit_3ub(p, 0x66, X86_TWOB, 0x70); 672 emit_modrm(p, dest, arg0); 673 emit_1ub(p, shuf); 674 } 675 676 void sse2_cvttps2dq( struct x86_function *p, 677 struct x86_reg dst, 678 struct x86_reg src ) 679 { 680 emit_3ub( p, 0xF3, X86_TWOB, 0x5B ); 681 emit_modrm( p, dst, src ); 682 } 683 684 void sse2_cvtps2dq( struct x86_function *p, 685 struct x86_reg dst, 686 struct x86_reg src ) 687 { 688 emit_3ub(p, 0x66, X86_TWOB, 0x5B); 689 emit_modrm( p, dst, src ); 690 } 691 692 void sse2_packssdw( struct x86_function *p, 693 struct x86_reg dst, 694 struct x86_reg src ) 695 { 696 emit_3ub(p, 0x66, X86_TWOB, 0x6B); 697 emit_modrm( p, dst, src ); 698 } 699 700 void sse2_packsswb( struct x86_function *p, 701 struct x86_reg dst, 702 struct x86_reg src ) 703 { 704 emit_3ub(p, 0x66, X86_TWOB, 0x63); 705 emit_modrm( p, dst, src ); 706 } 707 708 void sse2_packuswb( struct x86_function *p, 709 struct x86_reg dst, 710 struct x86_reg src ) 711 { 712 emit_3ub(p, 0x66, X86_TWOB, 0x67); 713 emit_modrm( p, dst, src ); 714 } 715 716 void sse2_rcpps( struct x86_function *p, 717 struct x86_reg dst, 718 struct x86_reg src ) 719 { 720 emit_2ub(p, X86_TWOB, 0x53); 721 emit_modrm( p, dst, src ); 722 } 723 724 void sse2_rcpss( struct x86_function *p, 725 struct x86_reg dst, 726 struct x86_reg src ) 727 { 728 emit_3ub(p, 0xF3, X86_TWOB, 0x53); 729 emit_modrm( p, dst, src ); 730 } 731 732 void sse2_movd( struct x86_function *p, 733 struct x86_reg dst, 734 struct x86_reg src ) 735 { 736 emit_2ub(p, 0x66, X86_TWOB); 737 emit_op_modrm( p, 0x6e, 0x7e, dst, src ); 738 } 739 740 741 742 743 /*********************************************************************** 744 * x87 instructions 745 */ 746 void x87_fist( struct x86_function *p, struct x86_reg dst ) 747 { 748 emit_1ub(p, 0xdb); 749 emit_modrm_noreg(p, 2, dst); 750 } 751 752 void x87_fistp( struct x86_function *p, struct x86_reg dst ) 753 { 754 emit_1ub(p, 0xdb); 755 emit_modrm_noreg(p, 3, dst); 756 } 757 758 void x87_fild( struct x86_function *p, struct x86_reg arg ) 759 { 760 emit_1ub(p, 0xdf); 761 emit_modrm_noreg(p, 0, arg); 762 } 763 764 void x87_fldz( struct x86_function *p ) 765 { 766 emit_2ub(p, 0xd9, 0xee); 767 } 768 769 770 void x87_fldcw( struct x86_function *p, struct x86_reg arg ) 771 { 772 assert(arg.file == file_REG32); 773 assert(arg.mod != mod_REG); 774 emit_1ub(p, 0xd9); 775 emit_modrm_noreg(p, 5, arg); 776 } 777 778 void x87_fld1( struct x86_function *p ) 779 { 780 emit_2ub(p, 0xd9, 0xe8); 781 } 782 783 void x87_fldl2e( struct x86_function *p ) 784 { 785 emit_2ub(p, 0xd9, 0xea); 786 } 787 788 void x87_fldln2( struct x86_function *p ) 789 { 790 emit_2ub(p, 0xd9, 0xed); 791 } 792 793 void x87_fwait( struct x86_function *p ) 794 { 795 emit_1ub(p, 0x9b); 796 } 797 798 void x87_fnclex( struct x86_function *p ) 799 { 800 emit_2ub(p, 0xdb, 0xe2); 801 } 802 803 void x87_fclex( struct x86_function *p ) 804 { 805 x87_fwait(p); 806 x87_fnclex(p); 807 } 808 809 810 static void x87_arith_op( struct x86_function *p, struct x86_reg dst, struct x86_reg arg, 811 unsigned char dst0ub0, 812 unsigned char dst0ub1, 813 unsigned char arg0ub0, 814 unsigned char arg0ub1, 815 unsigned char argmem_noreg) 816 { 817 assert(dst.file == file_x87); 818 819 if (arg.file == file_x87) { 820 if (dst.idx == 0) 821 emit_2ub(p, dst0ub0, dst0ub1+arg.idx); 822 else if (arg.idx == 0) 823 emit_2ub(p, arg0ub0, arg0ub1+arg.idx); 824 else 825 assert(0); 826 } 827 else if (dst.idx == 0) { 828 assert(arg.file == file_REG32); 829 emit_1ub(p, 0xd8); 830 emit_modrm_noreg(p, argmem_noreg, arg); 831 } 832 else 833 assert(0); 834 } 835 836 void x87_fmul( struct x86_function *p, struct x86_reg dst, struct x86_reg arg ) 837 { 838 x87_arith_op(p, dst, arg, 839 0xd8, 0xc8, 840 0xdc, 0xc8, 841 4); 842 } 843 844 void x87_fsub( struct x86_function *p, struct x86_reg dst, struct x86_reg arg ) 845 { 846 x87_arith_op(p, dst, arg, 847 0xd8, 0xe0, 848 0xdc, 0xe8, 849 4); 850 } 851 852 void x87_fsubr( struct x86_function *p, struct x86_reg dst, struct x86_reg arg ) 853 { 854 x87_arith_op(p, dst, arg, 855 0xd8, 0xe8, 856 0xdc, 0xe0, 857 5); 858 } 859 860 void x87_fadd( struct x86_function *p, struct x86_reg dst, struct x86_reg arg ) 861 { 862 x87_arith_op(p, dst, arg, 863 0xd8, 0xc0, 864 0xdc, 0xc0, 865 0); 866 } 867 868 void x87_fdiv( struct x86_function *p, struct x86_reg dst, struct x86_reg arg ) 869 { 870 x87_arith_op(p, dst, arg, 871 0xd8, 0xf0, 872 0xdc, 0xf8, 873 6); 874 } 875 876 void x87_fdivr( struct x86_function *p, struct x86_reg dst, struct x86_reg arg ) 877 { 878 x87_arith_op(p, dst, arg, 879 0xd8, 0xf8, 880 0xdc, 0xf0, 881 7); 882 } 883 884 void x87_fmulp( struct x86_function *p, struct x86_reg dst ) 885 { 886 assert(dst.file == file_x87); 887 assert(dst.idx >= 1); 888 emit_2ub(p, 0xde, 0xc8+dst.idx); 889 } 890 891 void x87_fsubp( struct x86_function *p, struct x86_reg dst ) 892 { 893 assert(dst.file == file_x87); 894 assert(dst.idx >= 1); 895 emit_2ub(p, 0xde, 0xe8+dst.idx); 896 } 897 898 void x87_fsubrp( struct x86_function *p, struct x86_reg dst ) 899 { 900 assert(dst.file == file_x87); 901 assert(dst.idx >= 1); 902 emit_2ub(p, 0xde, 0xe0+dst.idx); 903 } 904 905 void x87_faddp( struct x86_function *p, struct x86_reg dst ) 906 { 907 assert(dst.file == file_x87); 908 assert(dst.idx >= 1); 909 emit_2ub(p, 0xde, 0xc0+dst.idx); 910 } 911 912 void x87_fdivp( struct x86_function *p, struct x86_reg dst ) 913 { 914 assert(dst.file == file_x87); 915 assert(dst.idx >= 1); 916 emit_2ub(p, 0xde, 0xf8+dst.idx); 917 } 918 919 void x87_fdivrp( struct x86_function *p, struct x86_reg dst ) 920 { 921 assert(dst.file == file_x87); 922 assert(dst.idx >= 1); 923 emit_2ub(p, 0xde, 0xf0+dst.idx); 924 } 925 926 void x87_fucom( struct x86_function *p, struct x86_reg arg ) 927 { 928 assert(arg.file == file_x87); 929 emit_2ub(p, 0xdd, 0xe0+arg.idx); 930 } 931 932 void x87_fucomp( struct x86_function *p, struct x86_reg arg ) 933 { 934 assert(arg.file == file_x87); 935 emit_2ub(p, 0xdd, 0xe8+arg.idx); 936 } 937 938 void x87_fucompp( struct x86_function *p ) 939 { 940 emit_2ub(p, 0xda, 0xe9); 941 } 942 943 void x87_fxch( struct x86_function *p, struct x86_reg arg ) 944 { 945 assert(arg.file == file_x87); 946 emit_2ub(p, 0xd9, 0xc8+arg.idx); 947 } 948 949 void x87_fabs( struct x86_function *p ) 950 { 951 emit_2ub(p, 0xd9, 0xe1); 952 } 953 954 void x87_fchs( struct x86_function *p ) 955 { 956 emit_2ub(p, 0xd9, 0xe0); 957 } 958 959 void x87_fcos( struct x86_function *p ) 960 { 961 emit_2ub(p, 0xd9, 0xff); 962 } 963 964 965 void x87_fprndint( struct x86_function *p ) 966 { 967 emit_2ub(p, 0xd9, 0xfc); 968 } 969 970 void x87_fscale( struct x86_function *p ) 971 { 972 emit_2ub(p, 0xd9, 0xfd); 973 } 974 975 void x87_fsin( struct x86_function *p ) 976 { 977 emit_2ub(p, 0xd9, 0xfe); 978 } 979 980 void x87_fsincos( struct x86_function *p ) 981 { 982 emit_2ub(p, 0xd9, 0xfb); 983 } 984 985 void x87_fsqrt( struct x86_function *p ) 986 { 987 emit_2ub(p, 0xd9, 0xfa); 988 } 989 990 void x87_fxtract( struct x86_function *p ) 991 { 992 emit_2ub(p, 0xd9, 0xf4); 993 } 994 995 /* st0 = (2^st0)-1 996 * 997 * Restrictions: -1.0 <= st0 <= 1.0 998 */ 999 void x87_f2xm1( struct x86_function *p ) 1000 { 1001 emit_2ub(p, 0xd9, 0xf0); 1002 } 1003 1004 /* st1 = st1 * log2(st0); 1005 * pop_stack; 1006 */ 1007 void x87_fyl2x( struct x86_function *p ) 1008 { 1009 emit_2ub(p, 0xd9, 0xf1); 1010 } 1011 1012 /* st1 = st1 * log2(st0 + 1.0); 1013 * pop_stack; 1014 * 1015 * A fast operation, with restrictions: -.29 < st0 < .29 1016 */ 1017 void x87_fyl2xp1( struct x86_function *p ) 1018 { 1019 emit_2ub(p, 0xd9, 0xf9); 1020 } 1021 1022 1023 void x87_fld( struct x86_function *p, struct x86_reg arg ) 1024 { 1025 if (arg.file == file_x87) 1026 emit_2ub(p, 0xd9, 0xc0 + arg.idx); 1027 else { 1028 emit_1ub(p, 0xd9); 1029 emit_modrm_noreg(p, 0, arg); 1030 } 1031 } 1032 1033 void x87_fst( struct x86_function *p, struct x86_reg dst ) 1034 { 1035 if (dst.file == file_x87) 1036 emit_2ub(p, 0xdd, 0xd0 + dst.idx); 1037 else { 1038 emit_1ub(p, 0xd9); 1039 emit_modrm_noreg(p, 2, dst); 1040 } 1041 } 1042 1043 void x87_fstp( struct x86_function *p, struct x86_reg dst ) 1044 { 1045 if (dst.file == file_x87) 1046 emit_2ub(p, 0xdd, 0xd8 + dst.idx); 1047 else { 1048 emit_1ub(p, 0xd9); 1049 emit_modrm_noreg(p, 3, dst); 1050 } 1051 } 1052 1053 void x87_fcom( struct x86_function *p, struct x86_reg dst ) 1054 { 1055 if (dst.file == file_x87) 1056 emit_2ub(p, 0xd8, 0xd0 + dst.idx); 1057 else { 1058 emit_1ub(p, 0xd8); 1059 emit_modrm_noreg(p, 2, dst); 1060 } 1061 } 1062 1063 void x87_fcomp( struct x86_function *p, struct x86_reg dst ) 1064 { 1065 if (dst.file == file_x87) 1066 emit_2ub(p, 0xd8, 0xd8 + dst.idx); 1067 else { 1068 emit_1ub(p, 0xd8); 1069 emit_modrm_noreg(p, 3, dst); 1070 } 1071 } 1072 1073 1074 void x87_fnstsw( struct x86_function *p, struct x86_reg dst ) 1075 { 1076 assert(dst.file == file_REG32); 1077 1078 if (dst.idx == reg_AX && 1079 dst.mod == mod_REG) 1080 emit_2ub(p, 0xdf, 0xe0); 1081 else { 1082 emit_1ub(p, 0xdd); 1083 emit_modrm_noreg(p, 7, dst); 1084 } 1085 } 1086 1087 1088 1089 1090 /*********************************************************************** 1091 * MMX instructions 1092 */ 1093 1094 void mmx_emms( struct x86_function *p ) 1095 { 1096 assert(p->need_emms); 1097 emit_2ub(p, 0x0f, 0x77); 1098 p->need_emms = 0; 1099 } 1100 1101 void mmx_packssdw( struct x86_function *p, 1102 struct x86_reg dst, 1103 struct x86_reg src ) 1104 { 1105 assert(dst.file == file_MMX && 1106 (src.file == file_MMX || src.mod != mod_REG)); 1107 1108 p->need_emms = 1; 1109 1110 emit_2ub(p, X86_TWOB, 0x6b); 1111 emit_modrm( p, dst, src ); 1112 } 1113 1114 void mmx_packuswb( struct x86_function *p, 1115 struct x86_reg dst, 1116 struct x86_reg src ) 1117 { 1118 assert(dst.file == file_MMX && 1119 (src.file == file_MMX || src.mod != mod_REG)); 1120 1121 p->need_emms = 1; 1122 1123 emit_2ub(p, X86_TWOB, 0x67); 1124 emit_modrm( p, dst, src ); 1125 } 1126 1127 void mmx_movd( struct x86_function *p, 1128 struct x86_reg dst, 1129 struct x86_reg src ) 1130 { 1131 p->need_emms = 1; 1132 emit_1ub(p, X86_TWOB); 1133 emit_op_modrm( p, 0x6e, 0x7e, dst, src ); 1134 } 1135 1136 void mmx_movq( struct x86_function *p, 1137 struct x86_reg dst, 1138 struct x86_reg src ) 1139 { 1140 p->need_emms = 1; 1141 emit_1ub(p, X86_TWOB); 1142 emit_op_modrm( p, 0x6f, 0x7f, dst, src ); 1143 } 1144 1145 1146 /*********************************************************************** 1147 * Helper functions 1148 */ 1149 1150 1151 /* Retreive a reference to one of the function arguments, taking into 1152 * account any push/pop activity: 1153 */ 1154 struct x86_reg x86_fn_arg( struct x86_function *p, 1155 unsigned arg ) 1156 { 1157 return x86_make_disp(x86_make_reg(file_REG32, reg_SP), 1158 p->stack_offset + arg * 4); /* ??? */ 1159 } 1160 1161 1162 void x86_init_func( struct x86_function *p ) 1163 { 1164 p->size = 0; 1165 p->store = NULL; 1166 p->csr = p->store; 1167 } 1168 1169 int x86_init_func_size( struct x86_function *p, unsigned code_size ) 1170 { 1171 p->size = code_size; 1172 p->store = _mesa_exec_malloc(code_size); 1173 p->csr = p->store; 1174 return p->store != NULL; 1175 } 1176 1177 void x86_release_func( struct x86_function *p ) 1178 { 1179 _mesa_exec_free(p->store); 1180 p->store = NULL; 1181 p->csr = NULL; 1182 p->size = 0; 1183 } 1184 1185 1186 void (*x86_get_func( struct x86_function *p ))(void) 1187 { 1188 if (DISASSEM && p->store) 1189 printf("disassemble %p %p\n", p->store, p->csr); 1190 return (void (*)(void)) (unsigned long) p->store; 1191 } 1192 1193 #else 1194 1195 void x86sse_dummy( void ) 1196 { 1197 } 1198 1199 #endif 1200 1201 #else /* USE_X86_ASM */ 1202 1203 int x86sse_c_dummy_var; /* silence warning */ 1204 1205 #endif /* USE_X86_ASM */ 1206