Home | History | Annotate | Download | only in rtasm
      1 #ifdef USE_X86_ASM
      2 #if defined(__i386__) || defined(__386__)
      3 
      4 #include <stdio.h>
      5 
      6 #include "main/imports.h"
      7 #include "x86sse.h"
      8 
      9 #define DISASSEM 0
     10 #define X86_TWOB 0x0f
     11 
     12 #if 0
     13 static unsigned char *cptr( void (*label)() )
     14 {
     15    return (unsigned char *)(unsigned long)label;
     16 }
     17 #endif
     18 
     19 
     20 static void do_realloc( struct x86_function *p )
     21 {
     22    if (p->size == 0) {
     23       p->size = 1024;
     24       p->store = _mesa_exec_malloc(p->size);
     25       p->csr = p->store;
     26    }
     27    else {
     28       unsigned used = p->csr - p->store;
     29       unsigned char *tmp = p->store;
     30       p->size *= 2;
     31       p->store = _mesa_exec_malloc(p->size);
     32       memcpy(p->store, tmp, used);
     33       p->csr = p->store + used;
     34       _mesa_exec_free(tmp);
     35    }
     36 }
     37 
     38 /* Emit bytes to the instruction stream:
     39  */
     40 static unsigned char *reserve( struct x86_function *p, int bytes )
     41 {
     42    if (p->csr + bytes - p->store > p->size)
     43       do_realloc(p);
     44 
     45    {
     46       unsigned char *csr = p->csr;
     47       p->csr += bytes;
     48       return csr;
     49    }
     50 }
     51 
     52 
     53 
     54 static void emit_1b( struct x86_function *p, char b0 )
     55 {
     56    char *csr = (char *)reserve(p, 1);
     57    *csr = b0;
     58 }
     59 
     60 static void emit_1i( struct x86_function *p, int i0 )
     61 {
     62    int *icsr = (int *)reserve(p, sizeof(i0));
     63    *icsr = i0;
     64 }
     65 
     66 static void emit_1ub( struct x86_function *p, unsigned char b0 )
     67 {
     68    unsigned char *csr = reserve(p, 1);
     69    *csr++ = b0;
     70 }
     71 
     72 static void emit_2ub( struct x86_function *p, unsigned char b0, unsigned char b1 )
     73 {
     74    unsigned char *csr = reserve(p, 2);
     75    *csr++ = b0;
     76    *csr++ = b1;
     77 }
     78 
     79 static void emit_3ub( struct x86_function *p, unsigned char b0, unsigned char b1, unsigned char b2 )
     80 {
     81    unsigned char *csr = reserve(p, 3);
     82    *csr++ = b0;
     83    *csr++ = b1;
     84    *csr++ = b2;
     85 }
     86 
     87 
     88 /* Build a modRM byte + possible displacement.  No treatment of SIB
     89  * indexing.  BZZT - no way to encode an absolute address.
     90  */
     91 static void emit_modrm( struct x86_function *p,
     92 			struct x86_reg reg,
     93 			struct x86_reg regmem )
     94 {
     95    unsigned char val = 0;
     96 
     97    assert(reg.mod == mod_REG);
     98 
     99    val |= regmem.mod << 6;     	/* mod field */
    100    val |= reg.idx << 3;		/* reg field */
    101    val |= regmem.idx;		/* r/m field */
    102 
    103    emit_1ub(p, val);
    104 
    105    /* Oh-oh we've stumbled into the SIB thing.
    106     */
    107    if (regmem.file == file_REG32 &&
    108        regmem.idx == reg_SP) {
    109       emit_1ub(p, 0x24);		/* simplistic! */
    110    }
    111 
    112    switch (regmem.mod) {
    113    case mod_REG:
    114    case mod_INDIRECT:
    115       break;
    116    case mod_DISP8:
    117       emit_1b(p, regmem.disp);
    118       break;
    119    case mod_DISP32:
    120       emit_1i(p, regmem.disp);
    121       break;
    122    default:
    123       assert(0);
    124       break;
    125    }
    126 }
    127 
    128 
    129 static void emit_modrm_noreg( struct x86_function *p,
    130 			      unsigned op,
    131 			      struct x86_reg regmem )
    132 {
    133    struct x86_reg dummy = x86_make_reg(file_REG32, op);
    134    emit_modrm(p, dummy, regmem);
    135 }
    136 
    137 /* Many x86 instructions have two opcodes to cope with the situations
    138  * where the destination is a register or memory reference
    139  * respectively.  This function selects the correct opcode based on
    140  * the arguments presented.
    141  */
    142 static void emit_op_modrm( struct x86_function *p,
    143 			   unsigned char op_dst_is_reg,
    144 			   unsigned char op_dst_is_mem,
    145 			   struct x86_reg dst,
    146 			   struct x86_reg src )
    147 {
    148    switch (dst.mod) {
    149    case mod_REG:
    150       emit_1ub(p, op_dst_is_reg);
    151       emit_modrm(p, dst, src);
    152       break;
    153    case mod_INDIRECT:
    154    case mod_DISP32:
    155    case mod_DISP8:
    156       assert(src.mod == mod_REG);
    157       emit_1ub(p, op_dst_is_mem);
    158       emit_modrm(p, src, dst);
    159       break;
    160    default:
    161       assert(0);
    162       break;
    163    }
    164 }
    165 
    166 
    167 
    168 
    169 
    170 
    171 
    172 /* Create and manipulate registers and regmem values:
    173  */
    174 struct x86_reg x86_make_reg( enum x86_reg_file file,
    175 			     enum x86_reg_name idx )
    176 {
    177    struct x86_reg reg;
    178 
    179    reg.file = file;
    180    reg.idx = idx;
    181    reg.mod = mod_REG;
    182    reg.disp = 0;
    183 
    184    return reg;
    185 }
    186 
    187 struct x86_reg x86_make_disp( struct x86_reg reg,
    188 			      int disp )
    189 {
    190    assert(reg.file == file_REG32);
    191 
    192    if (reg.mod == mod_REG)
    193       reg.disp = disp;
    194    else
    195       reg.disp += disp;
    196 
    197    if (reg.disp == 0)
    198       reg.mod = mod_INDIRECT;
    199    else if (reg.disp <= 127 && reg.disp >= -128)
    200       reg.mod = mod_DISP8;
    201    else
    202       reg.mod = mod_DISP32;
    203 
    204    return reg;
    205 }
    206 
    207 struct x86_reg x86_deref( struct x86_reg reg )
    208 {
    209    return x86_make_disp(reg, 0);
    210 }
    211 
    212 struct x86_reg x86_get_base_reg( struct x86_reg reg )
    213 {
    214    return x86_make_reg( reg.file, reg.idx );
    215 }
    216 
    217 unsigned char *x86_get_label( struct x86_function *p )
    218 {
    219    return p->csr;
    220 }
    221 
    222 
    223 
    224 /***********************************************************************
    225  * x86 instructions
    226  */
    227 
    228 
    229 void x86_jcc( struct x86_function *p,
    230 	      enum x86_cc cc,
    231 	      unsigned char *label )
    232 {
    233    int offset = label - (x86_get_label(p) + 2);
    234 
    235    if (offset <= 127 && offset >= -128) {
    236       emit_1ub(p, 0x70 + cc);
    237       emit_1b(p, (char) offset);
    238    }
    239    else {
    240       offset = label - (x86_get_label(p) + 6);
    241       emit_2ub(p, 0x0f, 0x80 + cc);
    242       emit_1i(p, offset);
    243    }
    244 }
    245 
    246 /* Always use a 32bit offset for forward jumps:
    247  */
    248 unsigned char *x86_jcc_forward( struct x86_function *p,
    249 			  enum x86_cc cc )
    250 {
    251    emit_2ub(p, 0x0f, 0x80 + cc);
    252    emit_1i(p, 0);
    253    return x86_get_label(p);
    254 }
    255 
    256 unsigned char *x86_jmp_forward( struct x86_function *p)
    257 {
    258    emit_1ub(p, 0xe9);
    259    emit_1i(p, 0);
    260    return x86_get_label(p);
    261 }
    262 
    263 unsigned char *x86_call_forward( struct x86_function *p)
    264 {
    265    emit_1ub(p, 0xe8);
    266    emit_1i(p, 0);
    267    return x86_get_label(p);
    268 }
    269 
    270 /* Fixup offset from forward jump:
    271  */
    272 void x86_fixup_fwd_jump( struct x86_function *p,
    273 			 unsigned char *fixup )
    274 {
    275    *(int *)(fixup - 4) = x86_get_label(p) - fixup;
    276 }
    277 
    278 void x86_jmp( struct x86_function *p, unsigned char *label)
    279 {
    280    emit_1ub(p, 0xe9);
    281    emit_1i(p, label - x86_get_label(p) - 4);
    282 }
    283 
    284 #if 0
    285 /* This doesn't work once we start reallocating & copying the
    286  * generated code on buffer fills, because the call is relative to the
    287  * current pc.
    288  */
    289 void x86_call( struct x86_function *p, void (*label)())
    290 {
    291    emit_1ub(p, 0xe8);
    292    emit_1i(p, cptr(label) - x86_get_label(p) - 4);
    293 }
    294 #else
    295 void x86_call( struct x86_function *p, struct x86_reg reg)
    296 {
    297    emit_1ub(p, 0xff);
    298    emit_modrm_noreg(p, 2, reg);
    299 }
    300 #endif
    301 
    302 
    303 /* michal:
    304  * Temporary. As I need immediate operands, and dont want to mess with the codegen,
    305  * I load the immediate into general purpose register and use it.
    306  */
    307 void x86_mov_reg_imm( struct x86_function *p, struct x86_reg dst, int imm )
    308 {
    309    assert(dst.mod == mod_REG);
    310    emit_1ub(p, 0xb8 + dst.idx);
    311    emit_1i(p, imm);
    312 }
    313 
    314 void x86_push( struct x86_function *p,
    315 	       struct x86_reg reg )
    316 {
    317    assert(reg.mod == mod_REG);
    318    emit_1ub(p, 0x50 + reg.idx);
    319    p->stack_offset += 4;
    320 }
    321 
    322 void x86_pop( struct x86_function *p,
    323 	      struct x86_reg reg )
    324 {
    325    assert(reg.mod == mod_REG);
    326    emit_1ub(p, 0x58 + reg.idx);
    327    p->stack_offset -= 4;
    328 }
    329 
    330 void x86_inc( struct x86_function *p,
    331 	      struct x86_reg reg )
    332 {
    333    assert(reg.mod == mod_REG);
    334    emit_1ub(p, 0x40 + reg.idx);
    335 }
    336 
    337 void x86_dec( struct x86_function *p,
    338 	      struct x86_reg reg )
    339 {
    340    assert(reg.mod == mod_REG);
    341    emit_1ub(p, 0x48 + reg.idx);
    342 }
    343 
    344 void x86_ret( struct x86_function *p )
    345 {
    346    emit_1ub(p, 0xc3);
    347 }
    348 
    349 void x86_sahf( struct x86_function *p )
    350 {
    351    emit_1ub(p, 0x9e);
    352 }
    353 
    354 void x86_mov( struct x86_function *p,
    355 	      struct x86_reg dst,
    356 	      struct x86_reg src )
    357 {
    358    emit_op_modrm( p, 0x8b, 0x89, dst, src );
    359 }
    360 
    361 void x86_xor( struct x86_function *p,
    362 	      struct x86_reg dst,
    363 	      struct x86_reg src )
    364 {
    365    emit_op_modrm( p, 0x33, 0x31, dst, src );
    366 }
    367 
    368 void x86_cmp( struct x86_function *p,
    369 	      struct x86_reg dst,
    370 	      struct x86_reg src )
    371 {
    372    emit_op_modrm( p, 0x3b, 0x39, dst, src );
    373 }
    374 
    375 void x86_lea( struct x86_function *p,
    376 	      struct x86_reg dst,
    377 	      struct x86_reg src )
    378 {
    379    emit_1ub(p, 0x8d);
    380    emit_modrm( p, dst, src );
    381 }
    382 
    383 void x86_test( struct x86_function *p,
    384 	       struct x86_reg dst,
    385 	       struct x86_reg src )
    386 {
    387    emit_1ub(p, 0x85);
    388    emit_modrm( p, dst, src );
    389 }
    390 
    391 void x86_add( struct x86_function *p,
    392 	       struct x86_reg dst,
    393 	       struct x86_reg src )
    394 {
    395    emit_op_modrm(p, 0x03, 0x01, dst, src );
    396 }
    397 
    398 void x86_mul( struct x86_function *p,
    399 	       struct x86_reg src )
    400 {
    401    assert (src.file == file_REG32 && src.mod == mod_REG);
    402    emit_op_modrm(p, 0xf7, 0, x86_make_reg (file_REG32, reg_SP), src );
    403 }
    404 
    405 void x86_sub( struct x86_function *p,
    406 	       struct x86_reg dst,
    407 	       struct x86_reg src )
    408 {
    409    emit_op_modrm(p, 0x2b, 0x29, dst, src );
    410 }
    411 
    412 void x86_or( struct x86_function *p,
    413              struct x86_reg dst,
    414              struct x86_reg src )
    415 {
    416    emit_op_modrm( p, 0x0b, 0x09, dst, src );
    417 }
    418 
    419 void x86_and( struct x86_function *p,
    420               struct x86_reg dst,
    421               struct x86_reg src )
    422 {
    423    emit_op_modrm( p, 0x23, 0x21, dst, src );
    424 }
    425 
    426 
    427 
    428 /***********************************************************************
    429  * SSE instructions
    430  */
    431 
    432 
    433 void sse_movss( struct x86_function *p,
    434 		struct x86_reg dst,
    435 		struct x86_reg src )
    436 {
    437    emit_2ub(p, 0xF3, X86_TWOB);
    438    emit_op_modrm( p, 0x10, 0x11, dst, src );
    439 }
    440 
    441 void sse_movaps( struct x86_function *p,
    442 		 struct x86_reg dst,
    443 		 struct x86_reg src )
    444 {
    445    emit_1ub(p, X86_TWOB);
    446    emit_op_modrm( p, 0x28, 0x29, dst, src );
    447 }
    448 
    449 void sse_movups( struct x86_function *p,
    450 		 struct x86_reg dst,
    451 		 struct x86_reg src )
    452 {
    453    emit_1ub(p, X86_TWOB);
    454    emit_op_modrm( p, 0x10, 0x11, dst, src );
    455 }
    456 
    457 void sse_movhps( struct x86_function *p,
    458 		 struct x86_reg dst,
    459 		 struct x86_reg src )
    460 {
    461    assert(dst.mod != mod_REG || src.mod != mod_REG);
    462    emit_1ub(p, X86_TWOB);
    463    emit_op_modrm( p, 0x16, 0x17, dst, src ); /* cf movlhps */
    464 }
    465 
    466 void sse_movlps( struct x86_function *p,
    467 		 struct x86_reg dst,
    468 		 struct x86_reg src )
    469 {
    470    assert(dst.mod != mod_REG || src.mod != mod_REG);
    471    emit_1ub(p, X86_TWOB);
    472    emit_op_modrm( p, 0x12, 0x13, dst, src ); /* cf movhlps */
    473 }
    474 
    475 void sse_maxps( struct x86_function *p,
    476 		struct x86_reg dst,
    477 		struct x86_reg src )
    478 {
    479    emit_2ub(p, X86_TWOB, 0x5F);
    480    emit_modrm( p, dst, src );
    481 }
    482 
    483 void sse_maxss( struct x86_function *p,
    484 		struct x86_reg dst,
    485 		struct x86_reg src )
    486 {
    487    emit_3ub(p, 0xF3, X86_TWOB, 0x5F);
    488    emit_modrm( p, dst, src );
    489 }
    490 
    491 void sse_divss( struct x86_function *p,
    492 		struct x86_reg dst,
    493 		struct x86_reg src )
    494 {
    495    emit_3ub(p, 0xF3, X86_TWOB, 0x5E);
    496    emit_modrm( p, dst, src );
    497 }
    498 
    499 void sse_minps( struct x86_function *p,
    500 		struct x86_reg dst,
    501 		struct x86_reg src )
    502 {
    503    emit_2ub(p, X86_TWOB, 0x5D);
    504    emit_modrm( p, dst, src );
    505 }
    506 
    507 void sse_subps( struct x86_function *p,
    508 		struct x86_reg dst,
    509 		struct x86_reg src )
    510 {
    511    emit_2ub(p, X86_TWOB, 0x5C);
    512    emit_modrm( p, dst, src );
    513 }
    514 
    515 void sse_mulps( struct x86_function *p,
    516 		struct x86_reg dst,
    517 		struct x86_reg src )
    518 {
    519    emit_2ub(p, X86_TWOB, 0x59);
    520    emit_modrm( p, dst, src );
    521 }
    522 
    523 void sse_mulss( struct x86_function *p,
    524 		struct x86_reg dst,
    525 		struct x86_reg src )
    526 {
    527    emit_3ub(p, 0xF3, X86_TWOB, 0x59);
    528    emit_modrm( p, dst, src );
    529 }
    530 
    531 void sse_addps( struct x86_function *p,
    532 		struct x86_reg dst,
    533 		struct x86_reg src )
    534 {
    535    emit_2ub(p, X86_TWOB, 0x58);
    536    emit_modrm( p, dst, src );
    537 }
    538 
    539 void sse_addss( struct x86_function *p,
    540 		struct x86_reg dst,
    541 		struct x86_reg src )
    542 {
    543    emit_3ub(p, 0xF3, X86_TWOB, 0x58);
    544    emit_modrm( p, dst, src );
    545 }
    546 
    547 void sse_andnps( struct x86_function *p,
    548                  struct x86_reg dst,
    549                  struct x86_reg src )
    550 {
    551    emit_2ub(p, X86_TWOB, 0x55);
    552    emit_modrm( p, dst, src );
    553 }
    554 
    555 void sse_andps( struct x86_function *p,
    556 		struct x86_reg dst,
    557 		struct x86_reg src )
    558 {
    559    emit_2ub(p, X86_TWOB, 0x54);
    560    emit_modrm( p, dst, src );
    561 }
    562 
    563 void sse_rsqrtps( struct x86_function *p,
    564                   struct x86_reg dst,
    565                   struct x86_reg src )
    566 {
    567    emit_2ub(p, X86_TWOB, 0x52);
    568    emit_modrm( p, dst, src );
    569 }
    570 
    571 void sse_rsqrtss( struct x86_function *p,
    572 		  struct x86_reg dst,
    573 		  struct x86_reg src )
    574 {
    575    emit_3ub(p, 0xF3, X86_TWOB, 0x52);
    576    emit_modrm( p, dst, src );
    577 
    578 }
    579 
    580 void sse_movhlps( struct x86_function *p,
    581 		  struct x86_reg dst,
    582 		  struct x86_reg src )
    583 {
    584    assert(dst.mod == mod_REG && src.mod == mod_REG);
    585    emit_2ub(p, X86_TWOB, 0x12);
    586    emit_modrm( p, dst, src );
    587 }
    588 
    589 void sse_movlhps( struct x86_function *p,
    590 		  struct x86_reg dst,
    591 		  struct x86_reg src )
    592 {
    593    assert(dst.mod == mod_REG && src.mod == mod_REG);
    594    emit_2ub(p, X86_TWOB, 0x16);
    595    emit_modrm( p, dst, src );
    596 }
    597 
    598 void sse_orps( struct x86_function *p,
    599                struct x86_reg dst,
    600                struct x86_reg src )
    601 {
    602    emit_2ub(p, X86_TWOB, 0x56);
    603    emit_modrm( p, dst, src );
    604 }
    605 
    606 void sse_xorps( struct x86_function *p,
    607                 struct x86_reg dst,
    608                 struct x86_reg src )
    609 {
    610    emit_2ub(p, X86_TWOB, 0x57);
    611    emit_modrm( p, dst, src );
    612 }
    613 
    614 void sse_cvtps2pi( struct x86_function *p,
    615 		   struct x86_reg dst,
    616 		   struct x86_reg src )
    617 {
    618    assert(dst.file == file_MMX &&
    619 	  (src.file == file_XMM || src.mod != mod_REG));
    620 
    621    p->need_emms = 1;
    622 
    623    emit_2ub(p, X86_TWOB, 0x2d);
    624    emit_modrm( p, dst, src );
    625 }
    626 
    627 
    628 /* Shufps can also be used to implement a reduced swizzle when dest ==
    629  * arg0.
    630  */
    631 void sse_shufps( struct x86_function *p,
    632 		 struct x86_reg dest,
    633 		 struct x86_reg arg0,
    634 		 unsigned char shuf)
    635 {
    636    emit_2ub(p, X86_TWOB, 0xC6);
    637    emit_modrm(p, dest, arg0);
    638    emit_1ub(p, shuf);
    639 }
    640 
    641 void sse_cmpps( struct x86_function *p,
    642 		struct x86_reg dest,
    643 		struct x86_reg arg0,
    644 		unsigned char cc)
    645 {
    646    emit_2ub(p, X86_TWOB, 0xC2);
    647    emit_modrm(p, dest, arg0);
    648    emit_1ub(p, cc);
    649 }
    650 
    651 void sse_pmovmskb( struct x86_function *p,
    652                    struct x86_reg dest,
    653                    struct x86_reg src)
    654 {
    655     emit_3ub(p, 0x66, X86_TWOB, 0xD7);
    656     emit_modrm(p, dest, src);
    657 }
    658 
    659 /***********************************************************************
    660  * SSE2 instructions
    661  */
    662 
    663 /**
    664  * Perform a reduced swizzle:
    665  */
    666 void sse2_pshufd( struct x86_function *p,
    667 		  struct x86_reg dest,
    668 		  struct x86_reg arg0,
    669 		  unsigned char shuf)
    670 {
    671    emit_3ub(p, 0x66, X86_TWOB, 0x70);
    672    emit_modrm(p, dest, arg0);
    673    emit_1ub(p, shuf);
    674 }
    675 
    676 void sse2_cvttps2dq( struct x86_function *p,
    677                      struct x86_reg dst,
    678                      struct x86_reg src )
    679 {
    680    emit_3ub( p, 0xF3, X86_TWOB, 0x5B );
    681    emit_modrm( p, dst, src );
    682 }
    683 
    684 void sse2_cvtps2dq( struct x86_function *p,
    685 		    struct x86_reg dst,
    686 		    struct x86_reg src )
    687 {
    688    emit_3ub(p, 0x66, X86_TWOB, 0x5B);
    689    emit_modrm( p, dst, src );
    690 }
    691 
    692 void sse2_packssdw( struct x86_function *p,
    693 		    struct x86_reg dst,
    694 		    struct x86_reg src )
    695 {
    696    emit_3ub(p, 0x66, X86_TWOB, 0x6B);
    697    emit_modrm( p, dst, src );
    698 }
    699 
    700 void sse2_packsswb( struct x86_function *p,
    701 		    struct x86_reg dst,
    702 		    struct x86_reg src )
    703 {
    704    emit_3ub(p, 0x66, X86_TWOB, 0x63);
    705    emit_modrm( p, dst, src );
    706 }
    707 
    708 void sse2_packuswb( struct x86_function *p,
    709 		    struct x86_reg dst,
    710 		    struct x86_reg src )
    711 {
    712    emit_3ub(p, 0x66, X86_TWOB, 0x67);
    713    emit_modrm( p, dst, src );
    714 }
    715 
    716 void sse2_rcpps( struct x86_function *p,
    717                  struct x86_reg dst,
    718                  struct x86_reg src )
    719 {
    720    emit_2ub(p, X86_TWOB, 0x53);
    721    emit_modrm( p, dst, src );
    722 }
    723 
    724 void sse2_rcpss( struct x86_function *p,
    725 		struct x86_reg dst,
    726 		struct x86_reg src )
    727 {
    728    emit_3ub(p, 0xF3, X86_TWOB, 0x53);
    729    emit_modrm( p, dst, src );
    730 }
    731 
    732 void sse2_movd( struct x86_function *p,
    733 		struct x86_reg dst,
    734 		struct x86_reg src )
    735 {
    736    emit_2ub(p, 0x66, X86_TWOB);
    737    emit_op_modrm( p, 0x6e, 0x7e, dst, src );
    738 }
    739 
    740 
    741 
    742 
    743 /***********************************************************************
    744  * x87 instructions
    745  */
    746 void x87_fist( struct x86_function *p, struct x86_reg dst )
    747 {
    748    emit_1ub(p, 0xdb);
    749    emit_modrm_noreg(p, 2, dst);
    750 }
    751 
    752 void x87_fistp( struct x86_function *p, struct x86_reg dst )
    753 {
    754    emit_1ub(p, 0xdb);
    755    emit_modrm_noreg(p, 3, dst);
    756 }
    757 
    758 void x87_fild( struct x86_function *p, struct x86_reg arg )
    759 {
    760    emit_1ub(p, 0xdf);
    761    emit_modrm_noreg(p, 0, arg);
    762 }
    763 
    764 void x87_fldz( struct x86_function *p )
    765 {
    766    emit_2ub(p, 0xd9, 0xee);
    767 }
    768 
    769 
    770 void x87_fldcw( struct x86_function *p, struct x86_reg arg )
    771 {
    772    assert(arg.file == file_REG32);
    773    assert(arg.mod != mod_REG);
    774    emit_1ub(p, 0xd9);
    775    emit_modrm_noreg(p, 5, arg);
    776 }
    777 
    778 void x87_fld1( struct x86_function *p )
    779 {
    780    emit_2ub(p, 0xd9, 0xe8);
    781 }
    782 
    783 void x87_fldl2e( struct x86_function *p )
    784 {
    785    emit_2ub(p, 0xd9, 0xea);
    786 }
    787 
    788 void x87_fldln2( struct x86_function *p )
    789 {
    790    emit_2ub(p, 0xd9, 0xed);
    791 }
    792 
    793 void x87_fwait( struct x86_function *p )
    794 {
    795    emit_1ub(p, 0x9b);
    796 }
    797 
    798 void x87_fnclex( struct x86_function *p )
    799 {
    800    emit_2ub(p, 0xdb, 0xe2);
    801 }
    802 
    803 void x87_fclex( struct x86_function *p )
    804 {
    805    x87_fwait(p);
    806    x87_fnclex(p);
    807 }
    808 
    809 
    810 static void x87_arith_op( struct x86_function *p, struct x86_reg dst, struct x86_reg arg,
    811 			  unsigned char dst0ub0,
    812 			  unsigned char dst0ub1,
    813 			  unsigned char arg0ub0,
    814 			  unsigned char arg0ub1,
    815 			  unsigned char argmem_noreg)
    816 {
    817    assert(dst.file == file_x87);
    818 
    819    if (arg.file == file_x87) {
    820       if (dst.idx == 0)
    821 	 emit_2ub(p, dst0ub0, dst0ub1+arg.idx);
    822       else if (arg.idx == 0)
    823 	 emit_2ub(p, arg0ub0, arg0ub1+arg.idx);
    824       else
    825 	 assert(0);
    826    }
    827    else if (dst.idx == 0) {
    828       assert(arg.file == file_REG32);
    829       emit_1ub(p, 0xd8);
    830       emit_modrm_noreg(p, argmem_noreg, arg);
    831    }
    832    else
    833       assert(0);
    834 }
    835 
    836 void x87_fmul( struct x86_function *p, struct x86_reg dst, struct x86_reg arg )
    837 {
    838    x87_arith_op(p, dst, arg,
    839 		0xd8, 0xc8,
    840 		0xdc, 0xc8,
    841 		4);
    842 }
    843 
    844 void x87_fsub( struct x86_function *p, struct x86_reg dst, struct x86_reg arg )
    845 {
    846    x87_arith_op(p, dst, arg,
    847 		0xd8, 0xe0,
    848 		0xdc, 0xe8,
    849 		4);
    850 }
    851 
    852 void x87_fsubr( struct x86_function *p, struct x86_reg dst, struct x86_reg arg )
    853 {
    854    x87_arith_op(p, dst, arg,
    855 		0xd8, 0xe8,
    856 		0xdc, 0xe0,
    857 		5);
    858 }
    859 
    860 void x87_fadd( struct x86_function *p, struct x86_reg dst, struct x86_reg arg )
    861 {
    862    x87_arith_op(p, dst, arg,
    863 		0xd8, 0xc0,
    864 		0xdc, 0xc0,
    865 		0);
    866 }
    867 
    868 void x87_fdiv( struct x86_function *p, struct x86_reg dst, struct x86_reg arg )
    869 {
    870    x87_arith_op(p, dst, arg,
    871 		0xd8, 0xf0,
    872 		0xdc, 0xf8,
    873 		6);
    874 }
    875 
    876 void x87_fdivr( struct x86_function *p, struct x86_reg dst, struct x86_reg arg )
    877 {
    878    x87_arith_op(p, dst, arg,
    879 		0xd8, 0xf8,
    880 		0xdc, 0xf0,
    881 		7);
    882 }
    883 
    884 void x87_fmulp( struct x86_function *p, struct x86_reg dst )
    885 {
    886    assert(dst.file == file_x87);
    887    assert(dst.idx >= 1);
    888    emit_2ub(p, 0xde, 0xc8+dst.idx);
    889 }
    890 
    891 void x87_fsubp( struct x86_function *p, struct x86_reg dst )
    892 {
    893    assert(dst.file == file_x87);
    894    assert(dst.idx >= 1);
    895    emit_2ub(p, 0xde, 0xe8+dst.idx);
    896 }
    897 
    898 void x87_fsubrp( struct x86_function *p, struct x86_reg dst )
    899 {
    900    assert(dst.file == file_x87);
    901    assert(dst.idx >= 1);
    902    emit_2ub(p, 0xde, 0xe0+dst.idx);
    903 }
    904 
    905 void x87_faddp( struct x86_function *p, struct x86_reg dst )
    906 {
    907    assert(dst.file == file_x87);
    908    assert(dst.idx >= 1);
    909    emit_2ub(p, 0xde, 0xc0+dst.idx);
    910 }
    911 
    912 void x87_fdivp( struct x86_function *p, struct x86_reg dst )
    913 {
    914    assert(dst.file == file_x87);
    915    assert(dst.idx >= 1);
    916    emit_2ub(p, 0xde, 0xf8+dst.idx);
    917 }
    918 
    919 void x87_fdivrp( struct x86_function *p, struct x86_reg dst )
    920 {
    921    assert(dst.file == file_x87);
    922    assert(dst.idx >= 1);
    923    emit_2ub(p, 0xde, 0xf0+dst.idx);
    924 }
    925 
    926 void x87_fucom( struct x86_function *p, struct x86_reg arg )
    927 {
    928    assert(arg.file == file_x87);
    929    emit_2ub(p, 0xdd, 0xe0+arg.idx);
    930 }
    931 
    932 void x87_fucomp( struct x86_function *p, struct x86_reg arg )
    933 {
    934    assert(arg.file == file_x87);
    935    emit_2ub(p, 0xdd, 0xe8+arg.idx);
    936 }
    937 
    938 void x87_fucompp( struct x86_function *p )
    939 {
    940    emit_2ub(p, 0xda, 0xe9);
    941 }
    942 
    943 void x87_fxch( struct x86_function *p, struct x86_reg arg )
    944 {
    945    assert(arg.file == file_x87);
    946    emit_2ub(p, 0xd9, 0xc8+arg.idx);
    947 }
    948 
    949 void x87_fabs( struct x86_function *p )
    950 {
    951    emit_2ub(p, 0xd9, 0xe1);
    952 }
    953 
    954 void x87_fchs( struct x86_function *p )
    955 {
    956    emit_2ub(p, 0xd9, 0xe0);
    957 }
    958 
    959 void x87_fcos( struct x86_function *p )
    960 {
    961    emit_2ub(p, 0xd9, 0xff);
    962 }
    963 
    964 
    965 void x87_fprndint( struct x86_function *p )
    966 {
    967    emit_2ub(p, 0xd9, 0xfc);
    968 }
    969 
    970 void x87_fscale( struct x86_function *p )
    971 {
    972    emit_2ub(p, 0xd9, 0xfd);
    973 }
    974 
    975 void x87_fsin( struct x86_function *p )
    976 {
    977    emit_2ub(p, 0xd9, 0xfe);
    978 }
    979 
    980 void x87_fsincos( struct x86_function *p )
    981 {
    982    emit_2ub(p, 0xd9, 0xfb);
    983 }
    984 
    985 void x87_fsqrt( struct x86_function *p )
    986 {
    987    emit_2ub(p, 0xd9, 0xfa);
    988 }
    989 
    990 void x87_fxtract( struct x86_function *p )
    991 {
    992    emit_2ub(p, 0xd9, 0xf4);
    993 }
    994 
    995 /* st0 = (2^st0)-1
    996  *
    997  * Restrictions: -1.0 <= st0 <= 1.0
    998  */
    999 void x87_f2xm1( struct x86_function *p )
   1000 {
   1001    emit_2ub(p, 0xd9, 0xf0);
   1002 }
   1003 
   1004 /* st1 = st1 * log2(st0);
   1005  * pop_stack;
   1006  */
   1007 void x87_fyl2x( struct x86_function *p )
   1008 {
   1009    emit_2ub(p, 0xd9, 0xf1);
   1010 }
   1011 
   1012 /* st1 = st1 * log2(st0 + 1.0);
   1013  * pop_stack;
   1014  *
   1015  * A fast operation, with restrictions: -.29 < st0 < .29
   1016  */
   1017 void x87_fyl2xp1( struct x86_function *p )
   1018 {
   1019    emit_2ub(p, 0xd9, 0xf9);
   1020 }
   1021 
   1022 
   1023 void x87_fld( struct x86_function *p, struct x86_reg arg )
   1024 {
   1025    if (arg.file == file_x87)
   1026       emit_2ub(p, 0xd9, 0xc0 + arg.idx);
   1027    else {
   1028       emit_1ub(p, 0xd9);
   1029       emit_modrm_noreg(p, 0, arg);
   1030    }
   1031 }
   1032 
   1033 void x87_fst( struct x86_function *p, struct x86_reg dst )
   1034 {
   1035    if (dst.file == file_x87)
   1036       emit_2ub(p, 0xdd, 0xd0 + dst.idx);
   1037    else {
   1038       emit_1ub(p, 0xd9);
   1039       emit_modrm_noreg(p, 2, dst);
   1040    }
   1041 }
   1042 
   1043 void x87_fstp( struct x86_function *p, struct x86_reg dst )
   1044 {
   1045    if (dst.file == file_x87)
   1046       emit_2ub(p, 0xdd, 0xd8 + dst.idx);
   1047    else {
   1048       emit_1ub(p, 0xd9);
   1049       emit_modrm_noreg(p, 3, dst);
   1050    }
   1051 }
   1052 
   1053 void x87_fcom( struct x86_function *p, struct x86_reg dst )
   1054 {
   1055    if (dst.file == file_x87)
   1056       emit_2ub(p, 0xd8, 0xd0 + dst.idx);
   1057    else {
   1058       emit_1ub(p, 0xd8);
   1059       emit_modrm_noreg(p, 2, dst);
   1060    }
   1061 }
   1062 
   1063 void x87_fcomp( struct x86_function *p, struct x86_reg dst )
   1064 {
   1065    if (dst.file == file_x87)
   1066       emit_2ub(p, 0xd8, 0xd8 + dst.idx);
   1067    else {
   1068       emit_1ub(p, 0xd8);
   1069       emit_modrm_noreg(p, 3, dst);
   1070    }
   1071 }
   1072 
   1073 
   1074 void x87_fnstsw( struct x86_function *p, struct x86_reg dst )
   1075 {
   1076    assert(dst.file == file_REG32);
   1077 
   1078    if (dst.idx == reg_AX &&
   1079        dst.mod == mod_REG)
   1080       emit_2ub(p, 0xdf, 0xe0);
   1081    else {
   1082       emit_1ub(p, 0xdd);
   1083       emit_modrm_noreg(p, 7, dst);
   1084    }
   1085 }
   1086 
   1087 
   1088 
   1089 
   1090 /***********************************************************************
   1091  * MMX instructions
   1092  */
   1093 
   1094 void mmx_emms( struct x86_function *p )
   1095 {
   1096    assert(p->need_emms);
   1097    emit_2ub(p, 0x0f, 0x77);
   1098    p->need_emms = 0;
   1099 }
   1100 
   1101 void mmx_packssdw( struct x86_function *p,
   1102 		   struct x86_reg dst,
   1103 		   struct x86_reg src )
   1104 {
   1105    assert(dst.file == file_MMX &&
   1106 	  (src.file == file_MMX || src.mod != mod_REG));
   1107 
   1108    p->need_emms = 1;
   1109 
   1110    emit_2ub(p, X86_TWOB, 0x6b);
   1111    emit_modrm( p, dst, src );
   1112 }
   1113 
   1114 void mmx_packuswb( struct x86_function *p,
   1115 		   struct x86_reg dst,
   1116 		   struct x86_reg src )
   1117 {
   1118    assert(dst.file == file_MMX &&
   1119 	  (src.file == file_MMX || src.mod != mod_REG));
   1120 
   1121    p->need_emms = 1;
   1122 
   1123    emit_2ub(p, X86_TWOB, 0x67);
   1124    emit_modrm( p, dst, src );
   1125 }
   1126 
   1127 void mmx_movd( struct x86_function *p,
   1128 	       struct x86_reg dst,
   1129 	       struct x86_reg src )
   1130 {
   1131    p->need_emms = 1;
   1132    emit_1ub(p, X86_TWOB);
   1133    emit_op_modrm( p, 0x6e, 0x7e, dst, src );
   1134 }
   1135 
   1136 void mmx_movq( struct x86_function *p,
   1137 	       struct x86_reg dst,
   1138 	       struct x86_reg src )
   1139 {
   1140    p->need_emms = 1;
   1141    emit_1ub(p, X86_TWOB);
   1142    emit_op_modrm( p, 0x6f, 0x7f, dst, src );
   1143 }
   1144 
   1145 
   1146 /***********************************************************************
   1147  * Helper functions
   1148  */
   1149 
   1150 
   1151 /* Retreive a reference to one of the function arguments, taking into
   1152  * account any push/pop activity:
   1153  */
   1154 struct x86_reg x86_fn_arg( struct x86_function *p,
   1155 			   unsigned arg )
   1156 {
   1157    return x86_make_disp(x86_make_reg(file_REG32, reg_SP),
   1158 			p->stack_offset + arg * 4);	/* ??? */
   1159 }
   1160 
   1161 
   1162 void x86_init_func( struct x86_function *p )
   1163 {
   1164    p->size = 0;
   1165    p->store = NULL;
   1166    p->csr = p->store;
   1167 }
   1168 
   1169 int x86_init_func_size( struct x86_function *p, unsigned code_size )
   1170 {
   1171    p->size = code_size;
   1172    p->store = _mesa_exec_malloc(code_size);
   1173    p->csr = p->store;
   1174    return p->store != NULL;
   1175 }
   1176 
   1177 void x86_release_func( struct x86_function *p )
   1178 {
   1179    _mesa_exec_free(p->store);
   1180    p->store = NULL;
   1181    p->csr = NULL;
   1182    p->size = 0;
   1183 }
   1184 
   1185 
   1186 void (*x86_get_func( struct x86_function *p ))(void)
   1187 {
   1188    if (DISASSEM && p->store)
   1189       printf("disassemble %p %p\n", p->store, p->csr);
   1190    return (void (*)(void)) (unsigned long) p->store;
   1191 }
   1192 
   1193 #else
   1194 
   1195 void x86sse_dummy( void )
   1196 {
   1197 }
   1198 
   1199 #endif
   1200 
   1201 #else  /* USE_X86_ASM */
   1202 
   1203 int x86sse_c_dummy_var; /* silence warning */
   1204 
   1205 #endif /* USE_X86_ASM */
   1206