Home | History | Annotate | Download | only in rtasm
      1 #ifdef USE_X86_ASM
      2 #if defined(__i386__) || defined(__386__)
      3 
      4 #include "main/imports.h"
      5 #include "x86sse.h"
      6 
      7 #define DISASSEM 0
      8 #define X86_TWOB 0x0f
      9 
     10 #if 0
     11 static unsigned char *cptr( void (*label)() )
     12 {
     13    return (unsigned char *)(unsigned long)label;
     14 }
     15 #endif
     16 
     17 
     18 static void do_realloc( struct x86_function *p )
     19 {
     20    if (p->size == 0) {
     21       p->size = 1024;
     22       p->store = _mesa_exec_malloc(p->size);
     23       p->csr = p->store;
     24    }
     25    else {
     26       unsigned used = p->csr - p->store;
     27       unsigned char *tmp = p->store;
     28       p->size *= 2;
     29       p->store = _mesa_exec_malloc(p->size);
     30       memcpy(p->store, tmp, used);
     31       p->csr = p->store + used;
     32       _mesa_exec_free(tmp);
     33    }
     34 }
     35 
     36 /* Emit bytes to the instruction stream:
     37  */
     38 static unsigned char *reserve( struct x86_function *p, int bytes )
     39 {
     40    if (p->csr + bytes - p->store > p->size)
     41       do_realloc(p);
     42 
     43    {
     44       unsigned char *csr = p->csr;
     45       p->csr += bytes;
     46       return csr;
     47    }
     48 }
     49 
     50 
     51 
     52 static void emit_1b( struct x86_function *p, char b0 )
     53 {
     54    char *csr = (char *)reserve(p, 1);
     55    *csr = b0;
     56 }
     57 
     58 static void emit_1i( struct x86_function *p, int i0 )
     59 {
     60    int *icsr = (int *)reserve(p, sizeof(i0));
     61    *icsr = i0;
     62 }
     63 
     64 static void emit_1ub( struct x86_function *p, unsigned char b0 )
     65 {
     66    unsigned char *csr = reserve(p, 1);
     67    *csr++ = b0;
     68 }
     69 
     70 static void emit_2ub( struct x86_function *p, unsigned char b0, unsigned char b1 )
     71 {
     72    unsigned char *csr = reserve(p, 2);
     73    *csr++ = b0;
     74    *csr++ = b1;
     75 }
     76 
     77 static void emit_3ub( struct x86_function *p, unsigned char b0, unsigned char b1, unsigned char b2 )
     78 {
     79    unsigned char *csr = reserve(p, 3);
     80    *csr++ = b0;
     81    *csr++ = b1;
     82    *csr++ = b2;
     83 }
     84 
     85 
     86 /* Build a modRM byte + possible displacement.  No treatment of SIB
     87  * indexing.  BZZT - no way to encode an absolute address.
     88  */
     89 static void emit_modrm( struct x86_function *p,
     90 			struct x86_reg reg,
     91 			struct x86_reg regmem )
     92 {
     93    unsigned char val = 0;
     94 
     95    assert(reg.mod == mod_REG);
     96 
     97    val |= regmem.mod << 6;     	/* mod field */
     98    val |= reg.idx << 3;		/* reg field */
     99    val |= regmem.idx;		/* r/m field */
    100 
    101    emit_1ub(p, val);
    102 
    103    /* Oh-oh we've stumbled into the SIB thing.
    104     */
    105    if (regmem.file == file_REG32 &&
    106        regmem.idx == reg_SP) {
    107       emit_1ub(p, 0x24);		/* simplistic! */
    108    }
    109 
    110    switch (regmem.mod) {
    111    case mod_REG:
    112    case mod_INDIRECT:
    113       break;
    114    case mod_DISP8:
    115       emit_1b(p, regmem.disp);
    116       break;
    117    case mod_DISP32:
    118       emit_1i(p, regmem.disp);
    119       break;
    120    default:
    121       assert(0);
    122       break;
    123    }
    124 }
    125 
    126 
    127 static void emit_modrm_noreg( struct x86_function *p,
    128 			      unsigned op,
    129 			      struct x86_reg regmem )
    130 {
    131    struct x86_reg dummy = x86_make_reg(file_REG32, op);
    132    emit_modrm(p, dummy, regmem);
    133 }
    134 
    135 /* Many x86 instructions have two opcodes to cope with the situations
    136  * where the destination is a register or memory reference
    137  * respectively.  This function selects the correct opcode based on
    138  * the arguments presented.
    139  */
    140 static void emit_op_modrm( struct x86_function *p,
    141 			   unsigned char op_dst_is_reg,
    142 			   unsigned char op_dst_is_mem,
    143 			   struct x86_reg dst,
    144 			   struct x86_reg src )
    145 {
    146    switch (dst.mod) {
    147    case mod_REG:
    148       emit_1ub(p, op_dst_is_reg);
    149       emit_modrm(p, dst, src);
    150       break;
    151    case mod_INDIRECT:
    152    case mod_DISP32:
    153    case mod_DISP8:
    154       assert(src.mod == mod_REG);
    155       emit_1ub(p, op_dst_is_mem);
    156       emit_modrm(p, src, dst);
    157       break;
    158    default:
    159       assert(0);
    160       break;
    161    }
    162 }
    163 
    164 
    165 
    166 
    167 
    168 
    169 
    170 /* Create and manipulate registers and regmem values:
    171  */
    172 struct x86_reg x86_make_reg( enum x86_reg_file file,
    173 			     enum x86_reg_name idx )
    174 {
    175    struct x86_reg reg;
    176 
    177    reg.file = file;
    178    reg.idx = idx;
    179    reg.mod = mod_REG;
    180    reg.disp = 0;
    181 
    182    return reg;
    183 }
    184 
    185 struct x86_reg x86_make_disp( struct x86_reg reg,
    186 			      int disp )
    187 {
    188    assert(reg.file == file_REG32);
    189 
    190    if (reg.mod == mod_REG)
    191       reg.disp = disp;
    192    else
    193       reg.disp += disp;
    194 
    195    if (reg.disp == 0)
    196       reg.mod = mod_INDIRECT;
    197    else if (reg.disp <= 127 && reg.disp >= -128)
    198       reg.mod = mod_DISP8;
    199    else
    200       reg.mod = mod_DISP32;
    201 
    202    return reg;
    203 }
    204 
    205 struct x86_reg x86_deref( struct x86_reg reg )
    206 {
    207    return x86_make_disp(reg, 0);
    208 }
    209 
    210 struct x86_reg x86_get_base_reg( struct x86_reg reg )
    211 {
    212    return x86_make_reg( reg.file, reg.idx );
    213 }
    214 
    215 unsigned char *x86_get_label( struct x86_function *p )
    216 {
    217    return p->csr;
    218 }
    219 
    220 
    221 
    222 /***********************************************************************
    223  * x86 instructions
    224  */
    225 
    226 
    227 void x86_jcc( struct x86_function *p,
    228 	      enum x86_cc cc,
    229 	      unsigned char *label )
    230 {
    231    int offset = label - (x86_get_label(p) + 2);
    232 
    233    if (offset <= 127 && offset >= -128) {
    234       emit_1ub(p, 0x70 + cc);
    235       emit_1b(p, (char) offset);
    236    }
    237    else {
    238       offset = label - (x86_get_label(p) + 6);
    239       emit_2ub(p, 0x0f, 0x80 + cc);
    240       emit_1i(p, offset);
    241    }
    242 }
    243 
    244 /* Always use a 32bit offset for forward jumps:
    245  */
    246 unsigned char *x86_jcc_forward( struct x86_function *p,
    247 			  enum x86_cc cc )
    248 {
    249    emit_2ub(p, 0x0f, 0x80 + cc);
    250    emit_1i(p, 0);
    251    return x86_get_label(p);
    252 }
    253 
    254 unsigned char *x86_jmp_forward( struct x86_function *p)
    255 {
    256    emit_1ub(p, 0xe9);
    257    emit_1i(p, 0);
    258    return x86_get_label(p);
    259 }
    260 
    261 unsigned char *x86_call_forward( struct x86_function *p)
    262 {
    263    emit_1ub(p, 0xe8);
    264    emit_1i(p, 0);
    265    return x86_get_label(p);
    266 }
    267 
    268 /* Fixup offset from forward jump:
    269  */
    270 void x86_fixup_fwd_jump( struct x86_function *p,
    271 			 unsigned char *fixup )
    272 {
    273    *(int *)(fixup - 4) = x86_get_label(p) - fixup;
    274 }
    275 
    276 void x86_jmp( struct x86_function *p, unsigned char *label)
    277 {
    278    emit_1ub(p, 0xe9);
    279    emit_1i(p, label - x86_get_label(p) - 4);
    280 }
    281 
    282 #if 0
    283 /* This doesn't work once we start reallocating & copying the
    284  * generated code on buffer fills, because the call is relative to the
    285  * current pc.
    286  */
    287 void x86_call( struct x86_function *p, void (*label)())
    288 {
    289    emit_1ub(p, 0xe8);
    290    emit_1i(p, cptr(label) - x86_get_label(p) - 4);
    291 }
    292 #else
    293 void x86_call( struct x86_function *p, struct x86_reg reg)
    294 {
    295    emit_1ub(p, 0xff);
    296    emit_modrm_noreg(p, 2, reg);
    297 }
    298 #endif
    299 
    300 
    301 /* michal:
    302  * Temporary. As I need immediate operands, and dont want to mess with the codegen,
    303  * I load the immediate into general purpose register and use it.
    304  */
    305 void x86_mov_reg_imm( struct x86_function *p, struct x86_reg dst, int imm )
    306 {
    307    assert(dst.mod == mod_REG);
    308    emit_1ub(p, 0xb8 + dst.idx);
    309    emit_1i(p, imm);
    310 }
    311 
    312 void x86_push( struct x86_function *p,
    313 	       struct x86_reg reg )
    314 {
    315    assert(reg.mod == mod_REG);
    316    emit_1ub(p, 0x50 + reg.idx);
    317    p->stack_offset += 4;
    318 }
    319 
    320 void x86_pop( struct x86_function *p,
    321 	      struct x86_reg reg )
    322 {
    323    assert(reg.mod == mod_REG);
    324    emit_1ub(p, 0x58 + reg.idx);
    325    p->stack_offset -= 4;
    326 }
    327 
    328 void x86_inc( struct x86_function *p,
    329 	      struct x86_reg reg )
    330 {
    331    assert(reg.mod == mod_REG);
    332    emit_1ub(p, 0x40 + reg.idx);
    333 }
    334 
    335 void x86_dec( struct x86_function *p,
    336 	      struct x86_reg reg )
    337 {
    338    assert(reg.mod == mod_REG);
    339    emit_1ub(p, 0x48 + reg.idx);
    340 }
    341 
    342 void x86_ret( struct x86_function *p )
    343 {
    344    emit_1ub(p, 0xc3);
    345 }
    346 
    347 void x86_sahf( struct x86_function *p )
    348 {
    349    emit_1ub(p, 0x9e);
    350 }
    351 
    352 void x86_mov( struct x86_function *p,
    353 	      struct x86_reg dst,
    354 	      struct x86_reg src )
    355 {
    356    emit_op_modrm( p, 0x8b, 0x89, dst, src );
    357 }
    358 
    359 void x86_xor( struct x86_function *p,
    360 	      struct x86_reg dst,
    361 	      struct x86_reg src )
    362 {
    363    emit_op_modrm( p, 0x33, 0x31, dst, src );
    364 }
    365 
    366 void x86_cmp( struct x86_function *p,
    367 	      struct x86_reg dst,
    368 	      struct x86_reg src )
    369 {
    370    emit_op_modrm( p, 0x3b, 0x39, dst, src );
    371 }
    372 
    373 void x86_lea( struct x86_function *p,
    374 	      struct x86_reg dst,
    375 	      struct x86_reg src )
    376 {
    377    emit_1ub(p, 0x8d);
    378    emit_modrm( p, dst, src );
    379 }
    380 
    381 void x86_test( struct x86_function *p,
    382 	       struct x86_reg dst,
    383 	       struct x86_reg src )
    384 {
    385    emit_1ub(p, 0x85);
    386    emit_modrm( p, dst, src );
    387 }
    388 
    389 void x86_add( struct x86_function *p,
    390 	       struct x86_reg dst,
    391 	       struct x86_reg src )
    392 {
    393    emit_op_modrm(p, 0x03, 0x01, dst, src );
    394 }
    395 
    396 void x86_mul( struct x86_function *p,
    397 	       struct x86_reg src )
    398 {
    399    assert (src.file == file_REG32 && src.mod == mod_REG);
    400    emit_op_modrm(p, 0xf7, 0, x86_make_reg (file_REG32, reg_SP), src );
    401 }
    402 
    403 void x86_sub( struct x86_function *p,
    404 	       struct x86_reg dst,
    405 	       struct x86_reg src )
    406 {
    407    emit_op_modrm(p, 0x2b, 0x29, dst, src );
    408 }
    409 
    410 void x86_or( struct x86_function *p,
    411              struct x86_reg dst,
    412              struct x86_reg src )
    413 {
    414    emit_op_modrm( p, 0x0b, 0x09, dst, src );
    415 }
    416 
    417 void x86_and( struct x86_function *p,
    418               struct x86_reg dst,
    419               struct x86_reg src )
    420 {
    421    emit_op_modrm( p, 0x23, 0x21, dst, src );
    422 }
    423 
    424 
    425 
    426 /***********************************************************************
    427  * SSE instructions
    428  */
    429 
    430 
    431 void sse_movss( struct x86_function *p,
    432 		struct x86_reg dst,
    433 		struct x86_reg src )
    434 {
    435    emit_2ub(p, 0xF3, X86_TWOB);
    436    emit_op_modrm( p, 0x10, 0x11, dst, src );
    437 }
    438 
    439 void sse_movaps( struct x86_function *p,
    440 		 struct x86_reg dst,
    441 		 struct x86_reg src )
    442 {
    443    emit_1ub(p, X86_TWOB);
    444    emit_op_modrm( p, 0x28, 0x29, dst, src );
    445 }
    446 
    447 void sse_movups( struct x86_function *p,
    448 		 struct x86_reg dst,
    449 		 struct x86_reg src )
    450 {
    451    emit_1ub(p, X86_TWOB);
    452    emit_op_modrm( p, 0x10, 0x11, dst, src );
    453 }
    454 
    455 void sse_movhps( struct x86_function *p,
    456 		 struct x86_reg dst,
    457 		 struct x86_reg src )
    458 {
    459    assert(dst.mod != mod_REG || src.mod != mod_REG);
    460    emit_1ub(p, X86_TWOB);
    461    emit_op_modrm( p, 0x16, 0x17, dst, src ); /* cf movlhps */
    462 }
    463 
    464 void sse_movlps( struct x86_function *p,
    465 		 struct x86_reg dst,
    466 		 struct x86_reg src )
    467 {
    468    assert(dst.mod != mod_REG || src.mod != mod_REG);
    469    emit_1ub(p, X86_TWOB);
    470    emit_op_modrm( p, 0x12, 0x13, dst, src ); /* cf movhlps */
    471 }
    472 
    473 void sse_maxps( struct x86_function *p,
    474 		struct x86_reg dst,
    475 		struct x86_reg src )
    476 {
    477    emit_2ub(p, X86_TWOB, 0x5F);
    478    emit_modrm( p, dst, src );
    479 }
    480 
    481 void sse_maxss( struct x86_function *p,
    482 		struct x86_reg dst,
    483 		struct x86_reg src )
    484 {
    485    emit_3ub(p, 0xF3, X86_TWOB, 0x5F);
    486    emit_modrm( p, dst, src );
    487 }
    488 
    489 void sse_divss( struct x86_function *p,
    490 		struct x86_reg dst,
    491 		struct x86_reg src )
    492 {
    493    emit_3ub(p, 0xF3, X86_TWOB, 0x5E);
    494    emit_modrm( p, dst, src );
    495 }
    496 
    497 void sse_minps( struct x86_function *p,
    498 		struct x86_reg dst,
    499 		struct x86_reg src )
    500 {
    501    emit_2ub(p, X86_TWOB, 0x5D);
    502    emit_modrm( p, dst, src );
    503 }
    504 
    505 void sse_subps( struct x86_function *p,
    506 		struct x86_reg dst,
    507 		struct x86_reg src )
    508 {
    509    emit_2ub(p, X86_TWOB, 0x5C);
    510    emit_modrm( p, dst, src );
    511 }
    512 
    513 void sse_mulps( struct x86_function *p,
    514 		struct x86_reg dst,
    515 		struct x86_reg src )
    516 {
    517    emit_2ub(p, X86_TWOB, 0x59);
    518    emit_modrm( p, dst, src );
    519 }
    520 
    521 void sse_mulss( struct x86_function *p,
    522 		struct x86_reg dst,
    523 		struct x86_reg src )
    524 {
    525    emit_3ub(p, 0xF3, X86_TWOB, 0x59);
    526    emit_modrm( p, dst, src );
    527 }
    528 
    529 void sse_addps( struct x86_function *p,
    530 		struct x86_reg dst,
    531 		struct x86_reg src )
    532 {
    533    emit_2ub(p, X86_TWOB, 0x58);
    534    emit_modrm( p, dst, src );
    535 }
    536 
    537 void sse_addss( struct x86_function *p,
    538 		struct x86_reg dst,
    539 		struct x86_reg src )
    540 {
    541    emit_3ub(p, 0xF3, X86_TWOB, 0x58);
    542    emit_modrm( p, dst, src );
    543 }
    544 
    545 void sse_andnps( struct x86_function *p,
    546                  struct x86_reg dst,
    547                  struct x86_reg src )
    548 {
    549    emit_2ub(p, X86_TWOB, 0x55);
    550    emit_modrm( p, dst, src );
    551 }
    552 
    553 void sse_andps( struct x86_function *p,
    554 		struct x86_reg dst,
    555 		struct x86_reg src )
    556 {
    557    emit_2ub(p, X86_TWOB, 0x54);
    558    emit_modrm( p, dst, src );
    559 }
    560 
    561 void sse_rsqrtps( struct x86_function *p,
    562                   struct x86_reg dst,
    563                   struct x86_reg src )
    564 {
    565    emit_2ub(p, X86_TWOB, 0x52);
    566    emit_modrm( p, dst, src );
    567 }
    568 
    569 void sse_rsqrtss( struct x86_function *p,
    570 		  struct x86_reg dst,
    571 		  struct x86_reg src )
    572 {
    573    emit_3ub(p, 0xF3, X86_TWOB, 0x52);
    574    emit_modrm( p, dst, src );
    575 
    576 }
    577 
    578 void sse_movhlps( struct x86_function *p,
    579 		  struct x86_reg dst,
    580 		  struct x86_reg src )
    581 {
    582    assert(dst.mod == mod_REG && src.mod == mod_REG);
    583    emit_2ub(p, X86_TWOB, 0x12);
    584    emit_modrm( p, dst, src );
    585 }
    586 
    587 void sse_movlhps( struct x86_function *p,
    588 		  struct x86_reg dst,
    589 		  struct x86_reg src )
    590 {
    591    assert(dst.mod == mod_REG && src.mod == mod_REG);
    592    emit_2ub(p, X86_TWOB, 0x16);
    593    emit_modrm( p, dst, src );
    594 }
    595 
    596 void sse_orps( struct x86_function *p,
    597                struct x86_reg dst,
    598                struct x86_reg src )
    599 {
    600    emit_2ub(p, X86_TWOB, 0x56);
    601    emit_modrm( p, dst, src );
    602 }
    603 
    604 void sse_xorps( struct x86_function *p,
    605                 struct x86_reg dst,
    606                 struct x86_reg src )
    607 {
    608    emit_2ub(p, X86_TWOB, 0x57);
    609    emit_modrm( p, dst, src );
    610 }
    611 
    612 void sse_cvtps2pi( struct x86_function *p,
    613 		   struct x86_reg dst,
    614 		   struct x86_reg src )
    615 {
    616    assert(dst.file == file_MMX &&
    617 	  (src.file == file_XMM || src.mod != mod_REG));
    618 
    619    p->need_emms = 1;
    620 
    621    emit_2ub(p, X86_TWOB, 0x2d);
    622    emit_modrm( p, dst, src );
    623 }
    624 
    625 
    626 /* Shufps can also be used to implement a reduced swizzle when dest ==
    627  * arg0.
    628  */
    629 void sse_shufps( struct x86_function *p,
    630 		 struct x86_reg dest,
    631 		 struct x86_reg arg0,
    632 		 unsigned char shuf)
    633 {
    634    emit_2ub(p, X86_TWOB, 0xC6);
    635    emit_modrm(p, dest, arg0);
    636    emit_1ub(p, shuf);
    637 }
    638 
    639 void sse_cmpps( struct x86_function *p,
    640 		struct x86_reg dest,
    641 		struct x86_reg arg0,
    642 		unsigned char cc)
    643 {
    644    emit_2ub(p, X86_TWOB, 0xC2);
    645    emit_modrm(p, dest, arg0);
    646    emit_1ub(p, cc);
    647 }
    648 
    649 void sse_pmovmskb( struct x86_function *p,
    650                    struct x86_reg dest,
    651                    struct x86_reg src)
    652 {
    653     emit_3ub(p, 0x66, X86_TWOB, 0xD7);
    654     emit_modrm(p, dest, src);
    655 }
    656 
    657 /***********************************************************************
    658  * SSE2 instructions
    659  */
    660 
    661 /**
    662  * Perform a reduced swizzle:
    663  */
    664 void sse2_pshufd( struct x86_function *p,
    665 		  struct x86_reg dest,
    666 		  struct x86_reg arg0,
    667 		  unsigned char shuf)
    668 {
    669    emit_3ub(p, 0x66, X86_TWOB, 0x70);
    670    emit_modrm(p, dest, arg0);
    671    emit_1ub(p, shuf);
    672 }
    673 
    674 void sse2_cvttps2dq( struct x86_function *p,
    675                      struct x86_reg dst,
    676                      struct x86_reg src )
    677 {
    678    emit_3ub( p, 0xF3, X86_TWOB, 0x5B );
    679    emit_modrm( p, dst, src );
    680 }
    681 
    682 void sse2_cvtps2dq( struct x86_function *p,
    683 		    struct x86_reg dst,
    684 		    struct x86_reg src )
    685 {
    686    emit_3ub(p, 0x66, X86_TWOB, 0x5B);
    687    emit_modrm( p, dst, src );
    688 }
    689 
    690 void sse2_packssdw( struct x86_function *p,
    691 		    struct x86_reg dst,
    692 		    struct x86_reg src )
    693 {
    694    emit_3ub(p, 0x66, X86_TWOB, 0x6B);
    695    emit_modrm( p, dst, src );
    696 }
    697 
    698 void sse2_packsswb( struct x86_function *p,
    699 		    struct x86_reg dst,
    700 		    struct x86_reg src )
    701 {
    702    emit_3ub(p, 0x66, X86_TWOB, 0x63);
    703    emit_modrm( p, dst, src );
    704 }
    705 
    706 void sse2_packuswb( struct x86_function *p,
    707 		    struct x86_reg dst,
    708 		    struct x86_reg src )
    709 {
    710    emit_3ub(p, 0x66, X86_TWOB, 0x67);
    711    emit_modrm( p, dst, src );
    712 }
    713 
    714 void sse2_rcpps( struct x86_function *p,
    715                  struct x86_reg dst,
    716                  struct x86_reg src )
    717 {
    718    emit_2ub(p, X86_TWOB, 0x53);
    719    emit_modrm( p, dst, src );
    720 }
    721 
    722 void sse2_rcpss( struct x86_function *p,
    723 		struct x86_reg dst,
    724 		struct x86_reg src )
    725 {
    726    emit_3ub(p, 0xF3, X86_TWOB, 0x53);
    727    emit_modrm( p, dst, src );
    728 }
    729 
    730 void sse2_movd( struct x86_function *p,
    731 		struct x86_reg dst,
    732 		struct x86_reg src )
    733 {
    734    emit_2ub(p, 0x66, X86_TWOB);
    735    emit_op_modrm( p, 0x6e, 0x7e, dst, src );
    736 }
    737 
    738 
    739 
    740 
    741 /***********************************************************************
    742  * x87 instructions
    743  */
    744 void x87_fist( struct x86_function *p, struct x86_reg dst )
    745 {
    746    emit_1ub(p, 0xdb);
    747    emit_modrm_noreg(p, 2, dst);
    748 }
    749 
    750 void x87_fistp( struct x86_function *p, struct x86_reg dst )
    751 {
    752    emit_1ub(p, 0xdb);
    753    emit_modrm_noreg(p, 3, dst);
    754 }
    755 
    756 void x87_fild( struct x86_function *p, struct x86_reg arg )
    757 {
    758    emit_1ub(p, 0xdf);
    759    emit_modrm_noreg(p, 0, arg);
    760 }
    761 
    762 void x87_fldz( struct x86_function *p )
    763 {
    764    emit_2ub(p, 0xd9, 0xee);
    765 }
    766 
    767 
    768 void x87_fldcw( struct x86_function *p, struct x86_reg arg )
    769 {
    770    assert(arg.file == file_REG32);
    771    assert(arg.mod != mod_REG);
    772    emit_1ub(p, 0xd9);
    773    emit_modrm_noreg(p, 5, arg);
    774 }
    775 
    776 void x87_fld1( struct x86_function *p )
    777 {
    778    emit_2ub(p, 0xd9, 0xe8);
    779 }
    780 
    781 void x87_fldl2e( struct x86_function *p )
    782 {
    783    emit_2ub(p, 0xd9, 0xea);
    784 }
    785 
    786 void x87_fldln2( struct x86_function *p )
    787 {
    788    emit_2ub(p, 0xd9, 0xed);
    789 }
    790 
    791 void x87_fwait( struct x86_function *p )
    792 {
    793    emit_1ub(p, 0x9b);
    794 }
    795 
    796 void x87_fnclex( struct x86_function *p )
    797 {
    798    emit_2ub(p, 0xdb, 0xe2);
    799 }
    800 
    801 void x87_fclex( struct x86_function *p )
    802 {
    803    x87_fwait(p);
    804    x87_fnclex(p);
    805 }
    806 
    807 
    808 static void x87_arith_op( struct x86_function *p, struct x86_reg dst, struct x86_reg arg,
    809 			  unsigned char dst0ub0,
    810 			  unsigned char dst0ub1,
    811 			  unsigned char arg0ub0,
    812 			  unsigned char arg0ub1,
    813 			  unsigned char argmem_noreg)
    814 {
    815    assert(dst.file == file_x87);
    816 
    817    if (arg.file == file_x87) {
    818       if (dst.idx == 0)
    819 	 emit_2ub(p, dst0ub0, dst0ub1+arg.idx);
    820       else if (arg.idx == 0)
    821 	 emit_2ub(p, arg0ub0, arg0ub1+arg.idx);
    822       else
    823 	 assert(0);
    824    }
    825    else if (dst.idx == 0) {
    826       assert(arg.file == file_REG32);
    827       emit_1ub(p, 0xd8);
    828       emit_modrm_noreg(p, argmem_noreg, arg);
    829    }
    830    else
    831       assert(0);
    832 }
    833 
    834 void x87_fmul( struct x86_function *p, struct x86_reg dst, struct x86_reg arg )
    835 {
    836    x87_arith_op(p, dst, arg,
    837 		0xd8, 0xc8,
    838 		0xdc, 0xc8,
    839 		4);
    840 }
    841 
    842 void x87_fsub( struct x86_function *p, struct x86_reg dst, struct x86_reg arg )
    843 {
    844    x87_arith_op(p, dst, arg,
    845 		0xd8, 0xe0,
    846 		0xdc, 0xe8,
    847 		4);
    848 }
    849 
    850 void x87_fsubr( struct x86_function *p, struct x86_reg dst, struct x86_reg arg )
    851 {
    852    x87_arith_op(p, dst, arg,
    853 		0xd8, 0xe8,
    854 		0xdc, 0xe0,
    855 		5);
    856 }
    857 
    858 void x87_fadd( struct x86_function *p, struct x86_reg dst, struct x86_reg arg )
    859 {
    860    x87_arith_op(p, dst, arg,
    861 		0xd8, 0xc0,
    862 		0xdc, 0xc0,
    863 		0);
    864 }
    865 
    866 void x87_fdiv( struct x86_function *p, struct x86_reg dst, struct x86_reg arg )
    867 {
    868    x87_arith_op(p, dst, arg,
    869 		0xd8, 0xf0,
    870 		0xdc, 0xf8,
    871 		6);
    872 }
    873 
    874 void x87_fdivr( struct x86_function *p, struct x86_reg dst, struct x86_reg arg )
    875 {
    876    x87_arith_op(p, dst, arg,
    877 		0xd8, 0xf8,
    878 		0xdc, 0xf0,
    879 		7);
    880 }
    881 
    882 void x87_fmulp( struct x86_function *p, struct x86_reg dst )
    883 {
    884    assert(dst.file == file_x87);
    885    assert(dst.idx >= 1);
    886    emit_2ub(p, 0xde, 0xc8+dst.idx);
    887 }
    888 
    889 void x87_fsubp( struct x86_function *p, struct x86_reg dst )
    890 {
    891    assert(dst.file == file_x87);
    892    assert(dst.idx >= 1);
    893    emit_2ub(p, 0xde, 0xe8+dst.idx);
    894 }
    895 
    896 void x87_fsubrp( struct x86_function *p, struct x86_reg dst )
    897 {
    898    assert(dst.file == file_x87);
    899    assert(dst.idx >= 1);
    900    emit_2ub(p, 0xde, 0xe0+dst.idx);
    901 }
    902 
    903 void x87_faddp( struct x86_function *p, struct x86_reg dst )
    904 {
    905    assert(dst.file == file_x87);
    906    assert(dst.idx >= 1);
    907    emit_2ub(p, 0xde, 0xc0+dst.idx);
    908 }
    909 
    910 void x87_fdivp( struct x86_function *p, struct x86_reg dst )
    911 {
    912    assert(dst.file == file_x87);
    913    assert(dst.idx >= 1);
    914    emit_2ub(p, 0xde, 0xf8+dst.idx);
    915 }
    916 
    917 void x87_fdivrp( struct x86_function *p, struct x86_reg dst )
    918 {
    919    assert(dst.file == file_x87);
    920    assert(dst.idx >= 1);
    921    emit_2ub(p, 0xde, 0xf0+dst.idx);
    922 }
    923 
    924 void x87_fucom( struct x86_function *p, struct x86_reg arg )
    925 {
    926    assert(arg.file == file_x87);
    927    emit_2ub(p, 0xdd, 0xe0+arg.idx);
    928 }
    929 
    930 void x87_fucomp( struct x86_function *p, struct x86_reg arg )
    931 {
    932    assert(arg.file == file_x87);
    933    emit_2ub(p, 0xdd, 0xe8+arg.idx);
    934 }
    935 
    936 void x87_fucompp( struct x86_function *p )
    937 {
    938    emit_2ub(p, 0xda, 0xe9);
    939 }
    940 
    941 void x87_fxch( struct x86_function *p, struct x86_reg arg )
    942 {
    943    assert(arg.file == file_x87);
    944    emit_2ub(p, 0xd9, 0xc8+arg.idx);
    945 }
    946 
    947 void x87_fabs( struct x86_function *p )
    948 {
    949    emit_2ub(p, 0xd9, 0xe1);
    950 }
    951 
    952 void x87_fchs( struct x86_function *p )
    953 {
    954    emit_2ub(p, 0xd9, 0xe0);
    955 }
    956 
    957 void x87_fcos( struct x86_function *p )
    958 {
    959    emit_2ub(p, 0xd9, 0xff);
    960 }
    961 
    962 
    963 void x87_fprndint( struct x86_function *p )
    964 {
    965    emit_2ub(p, 0xd9, 0xfc);
    966 }
    967 
    968 void x87_fscale( struct x86_function *p )
    969 {
    970    emit_2ub(p, 0xd9, 0xfd);
    971 }
    972 
    973 void x87_fsin( struct x86_function *p )
    974 {
    975    emit_2ub(p, 0xd9, 0xfe);
    976 }
    977 
    978 void x87_fsincos( struct x86_function *p )
    979 {
    980    emit_2ub(p, 0xd9, 0xfb);
    981 }
    982 
    983 void x87_fsqrt( struct x86_function *p )
    984 {
    985    emit_2ub(p, 0xd9, 0xfa);
    986 }
    987 
    988 void x87_fxtract( struct x86_function *p )
    989 {
    990    emit_2ub(p, 0xd9, 0xf4);
    991 }
    992 
    993 /* st0 = (2^st0)-1
    994  *
    995  * Restrictions: -1.0 <= st0 <= 1.0
    996  */
    997 void x87_f2xm1( struct x86_function *p )
    998 {
    999    emit_2ub(p, 0xd9, 0xf0);
   1000 }
   1001 
   1002 /* st1 = st1 * log2(st0);
   1003  * pop_stack;
   1004  */
   1005 void x87_fyl2x( struct x86_function *p )
   1006 {
   1007    emit_2ub(p, 0xd9, 0xf1);
   1008 }
   1009 
   1010 /* st1 = st1 * log2(st0 + 1.0);
   1011  * pop_stack;
   1012  *
   1013  * A fast operation, with restrictions: -.29 < st0 < .29
   1014  */
   1015 void x87_fyl2xp1( struct x86_function *p )
   1016 {
   1017    emit_2ub(p, 0xd9, 0xf9);
   1018 }
   1019 
   1020 
   1021 void x87_fld( struct x86_function *p, struct x86_reg arg )
   1022 {
   1023    if (arg.file == file_x87)
   1024       emit_2ub(p, 0xd9, 0xc0 + arg.idx);
   1025    else {
   1026       emit_1ub(p, 0xd9);
   1027       emit_modrm_noreg(p, 0, arg);
   1028    }
   1029 }
   1030 
   1031 void x87_fst( struct x86_function *p, struct x86_reg dst )
   1032 {
   1033    if (dst.file == file_x87)
   1034       emit_2ub(p, 0xdd, 0xd0 + dst.idx);
   1035    else {
   1036       emit_1ub(p, 0xd9);
   1037       emit_modrm_noreg(p, 2, dst);
   1038    }
   1039 }
   1040 
   1041 void x87_fstp( struct x86_function *p, struct x86_reg dst )
   1042 {
   1043    if (dst.file == file_x87)
   1044       emit_2ub(p, 0xdd, 0xd8 + dst.idx);
   1045    else {
   1046       emit_1ub(p, 0xd9);
   1047       emit_modrm_noreg(p, 3, dst);
   1048    }
   1049 }
   1050 
   1051 void x87_fcom( struct x86_function *p, struct x86_reg dst )
   1052 {
   1053    if (dst.file == file_x87)
   1054       emit_2ub(p, 0xd8, 0xd0 + dst.idx);
   1055    else {
   1056       emit_1ub(p, 0xd8);
   1057       emit_modrm_noreg(p, 2, dst);
   1058    }
   1059 }
   1060 
   1061 void x87_fcomp( struct x86_function *p, struct x86_reg dst )
   1062 {
   1063    if (dst.file == file_x87)
   1064       emit_2ub(p, 0xd8, 0xd8 + dst.idx);
   1065    else {
   1066       emit_1ub(p, 0xd8);
   1067       emit_modrm_noreg(p, 3, dst);
   1068    }
   1069 }
   1070 
   1071 
   1072 void x87_fnstsw( struct x86_function *p, struct x86_reg dst )
   1073 {
   1074    assert(dst.file == file_REG32);
   1075 
   1076    if (dst.idx == reg_AX &&
   1077        dst.mod == mod_REG)
   1078       emit_2ub(p, 0xdf, 0xe0);
   1079    else {
   1080       emit_1ub(p, 0xdd);
   1081       emit_modrm_noreg(p, 7, dst);
   1082    }
   1083 }
   1084 
   1085 
   1086 
   1087 
   1088 /***********************************************************************
   1089  * MMX instructions
   1090  */
   1091 
   1092 void mmx_emms( struct x86_function *p )
   1093 {
   1094    assert(p->need_emms);
   1095    emit_2ub(p, 0x0f, 0x77);
   1096    p->need_emms = 0;
   1097 }
   1098 
   1099 void mmx_packssdw( struct x86_function *p,
   1100 		   struct x86_reg dst,
   1101 		   struct x86_reg src )
   1102 {
   1103    assert(dst.file == file_MMX &&
   1104 	  (src.file == file_MMX || src.mod != mod_REG));
   1105 
   1106    p->need_emms = 1;
   1107 
   1108    emit_2ub(p, X86_TWOB, 0x6b);
   1109    emit_modrm( p, dst, src );
   1110 }
   1111 
   1112 void mmx_packuswb( struct x86_function *p,
   1113 		   struct x86_reg dst,
   1114 		   struct x86_reg src )
   1115 {
   1116    assert(dst.file == file_MMX &&
   1117 	  (src.file == file_MMX || src.mod != mod_REG));
   1118 
   1119    p->need_emms = 1;
   1120 
   1121    emit_2ub(p, X86_TWOB, 0x67);
   1122    emit_modrm( p, dst, src );
   1123 }
   1124 
   1125 void mmx_movd( struct x86_function *p,
   1126 	       struct x86_reg dst,
   1127 	       struct x86_reg src )
   1128 {
   1129    p->need_emms = 1;
   1130    emit_1ub(p, X86_TWOB);
   1131    emit_op_modrm( p, 0x6e, 0x7e, dst, src );
   1132 }
   1133 
   1134 void mmx_movq( struct x86_function *p,
   1135 	       struct x86_reg dst,
   1136 	       struct x86_reg src )
   1137 {
   1138    p->need_emms = 1;
   1139    emit_1ub(p, X86_TWOB);
   1140    emit_op_modrm( p, 0x6f, 0x7f, dst, src );
   1141 }
   1142 
   1143 
   1144 /***********************************************************************
   1145  * Helper functions
   1146  */
   1147 
   1148 
   1149 /* Retreive a reference to one of the function arguments, taking into
   1150  * account any push/pop activity:
   1151  */
   1152 struct x86_reg x86_fn_arg( struct x86_function *p,
   1153 			   unsigned arg )
   1154 {
   1155    return x86_make_disp(x86_make_reg(file_REG32, reg_SP),
   1156 			p->stack_offset + arg * 4);	/* ??? */
   1157 }
   1158 
   1159 
   1160 void x86_init_func( struct x86_function *p )
   1161 {
   1162    p->size = 0;
   1163    p->store = NULL;
   1164    p->csr = p->store;
   1165 }
   1166 
   1167 int x86_init_func_size( struct x86_function *p, unsigned code_size )
   1168 {
   1169    p->size = code_size;
   1170    p->store = _mesa_exec_malloc(code_size);
   1171    p->csr = p->store;
   1172    return p->store != NULL;
   1173 }
   1174 
   1175 void x86_release_func( struct x86_function *p )
   1176 {
   1177    _mesa_exec_free(p->store);
   1178    p->store = NULL;
   1179    p->csr = NULL;
   1180    p->size = 0;
   1181 }
   1182 
   1183 
   1184 void (*x86_get_func( struct x86_function *p ))(void)
   1185 {
   1186    if (DISASSEM && p->store)
   1187       printf("disassemble %p %p\n", p->store, p->csr);
   1188    return (void (*)(void)) (unsigned long) p->store;
   1189 }
   1190 
   1191 #else
   1192 
   1193 void x86sse_dummy( void )
   1194 {
   1195 }
   1196 
   1197 #endif
   1198 
   1199 #else  /* USE_X86_ASM */
   1200 
   1201 int x86sse_c_dummy_var; /* silence warning */
   1202 
   1203 #endif /* USE_X86_ASM */
   1204