Home | History | Annotate | Download | only in rtasm
      1 /**************************************************************************
      2  *
      3  * Copyright (C) 1999-2005  Brian Paul   All Rights Reserved.
      4  *
      5  * Permission is hereby granted, free of charge, to any person obtaining a
      6  * copy of this software and associated documentation files (the "Software"),
      7  * to deal in the Software without restriction, including without limitation
      8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
      9  * and/or sell copies of the Software, and to permit persons to whom the
     10  * Software is furnished to do so, subject to the following conditions:
     11  *
     12  * The above copyright notice and this permission notice shall be included
     13  * in all copies or substantial portions of the Software.
     14  *
     15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
     16  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
     19  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
     20  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
     21  * OTHER DEALINGS IN THE SOFTWARE.
     22  *
     23  **************************************************************************/
     24 
     25 #ifndef _RTASM_X86SSE_H_
     26 #define _RTASM_X86SSE_H_
     27 
     28 #include "pipe/p_compiler.h"
     29 #include "pipe/p_config.h"
     30 
     31 #if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
     32 
     33 /* It is up to the caller to ensure that instructions issued are
     34  * suitable for the host cpu.  There are no checks made in this module
     35  * for mmx/sse/sse2 support on the cpu.
     36  */
     37 struct x86_reg {
     38    unsigned file:2;
     39    unsigned idx:4;
     40    unsigned mod:2;		/* mod_REG if this is just a register */
     41    int      disp:24;		/* only +/- 23bits of offset - should be enough... */
     42 };
     43 
     44 #define X86_MMX 1
     45 #define X86_MMX2 2
     46 #define X86_SSE 4
     47 #define X86_SSE2 8
     48 #define X86_SSE3 0x10
     49 #define X86_SSE4_1 0x20
     50 
     51 struct x86_function {
     52    unsigned caps;
     53    unsigned size;
     54    unsigned char *store;
     55    unsigned char *csr;
     56 
     57    unsigned stack_offset:16;
     58    unsigned need_emms:8;
     59    int x87_stack:8;
     60 
     61    unsigned char error_overflow[4];
     62 };
     63 
     64 enum x86_reg_file {
     65    file_REG32,
     66    file_MMX,
     67    file_XMM,
     68    file_x87
     69 };
     70 
     71 /* Values for mod field of modr/m byte
     72  */
     73 enum x86_reg_mod {
     74    mod_INDIRECT,
     75    mod_DISP8,
     76    mod_DISP32,
     77    mod_REG
     78 };
     79 
     80 enum x86_reg_name {
     81    reg_AX,
     82    reg_CX,
     83    reg_DX,
     84    reg_BX,
     85    reg_SP,
     86    reg_BP,
     87    reg_SI,
     88    reg_DI,
     89    reg_R8,
     90    reg_R9,
     91    reg_R10,
     92    reg_R11,
     93    reg_R12,
     94    reg_R13,
     95    reg_R14,
     96    reg_R15
     97 };
     98 
     99 
    100 enum x86_cc {
    101    cc_O,			/* overflow */
    102    cc_NO,			/* not overflow */
    103    cc_NAE,			/* not above or equal / carry */
    104    cc_AE,			/* above or equal / not carry */
    105    cc_E,			/* equal / zero */
    106    cc_NE			/* not equal / not zero */
    107 };
    108 
    109 enum sse_cc {
    110    cc_Equal,
    111    cc_LessThan,
    112    cc_LessThanEqual,
    113    cc_Unordered,
    114    cc_NotEqual,
    115    cc_NotLessThan,
    116    cc_NotLessThanEqual,
    117    cc_Ordered
    118 };
    119 
    120 #define cc_Z  cc_E
    121 #define cc_NZ cc_NE
    122 
    123 
    124 /** generic pointer to function */
    125 typedef void (*x86_func)(void);
    126 
    127 
    128 /* Begin/end/retrieve function creation:
    129  */
    130 
    131 enum x86_target
    132 {
    133    X86_32,
    134    X86_64_STD_ABI,
    135    X86_64_WIN64_ABI
    136 };
    137 
    138 /* make this read a member of x86_function if target != host is desired */
    139 static inline enum x86_target x86_target( struct x86_function* p )
    140 {
    141 #ifdef PIPE_ARCH_X86
    142    return X86_32;
    143 #elif (defined(PIPE_OS_CYGWIN) || defined(PIPE_OS_WINDOWS)) && defined(PIPE_ARCH_X86_64)
    144    return X86_64_WIN64_ABI;
    145 #elif defined(PIPE_ARCH_X86_64)
    146    return X86_64_STD_ABI;
    147 #endif
    148 }
    149 
    150 static inline unsigned x86_target_caps( struct x86_function* p )
    151 {
    152    return p->caps;
    153 }
    154 
    155 void x86_init_func( struct x86_function *p );
    156 void x86_init_func_size( struct x86_function *p, unsigned code_size );
    157 void x86_release_func( struct x86_function *p );
    158 x86_func x86_get_func( struct x86_function *p );
    159 
    160 /* Debugging:
    161  */
    162 void x86_print_reg( struct x86_reg reg );
    163 
    164 
    165 /* Create and manipulate registers and regmem values:
    166  */
    167 struct x86_reg x86_make_reg( enum x86_reg_file file,
    168 			     enum x86_reg_name idx );
    169 
    170 struct x86_reg x86_make_disp( struct x86_reg reg,
    171 			      int disp );
    172 
    173 struct x86_reg x86_deref( struct x86_reg reg );
    174 
    175 struct x86_reg x86_get_base_reg( struct x86_reg reg );
    176 
    177 
    178 /* Labels, jumps and fixup:
    179  */
    180 int x86_get_label( struct x86_function *p );
    181 
    182 void x64_rexw(struct x86_function *p);
    183 
    184 void x86_jcc( struct x86_function *p,
    185 	      enum x86_cc cc,
    186 	      int label );
    187 
    188 int x86_jcc_forward( struct x86_function *p,
    189 			  enum x86_cc cc );
    190 
    191 int x86_jmp_forward( struct x86_function *p);
    192 
    193 int x86_call_forward( struct x86_function *p);
    194 
    195 void x86_fixup_fwd_jump( struct x86_function *p,
    196 			 int fixup );
    197 
    198 void x86_jmp( struct x86_function *p, int label );
    199 
    200 /* void x86_call( struct x86_function *p, void (*label)() ); */
    201 void x86_call( struct x86_function *p, struct x86_reg reg);
    202 
    203 void x86_mov_reg_imm( struct x86_function *p, struct x86_reg dst, int imm );
    204 void x86_add_imm( struct x86_function *p, struct x86_reg dst, int imm );
    205 void x86_or_imm( struct x86_function *p, struct x86_reg dst, int imm );
    206 void x86_and_imm( struct x86_function *p, struct x86_reg dst, int imm );
    207 void x86_sub_imm( struct x86_function *p, struct x86_reg dst, int imm );
    208 void x86_xor_imm( struct x86_function *p, struct x86_reg dst, int imm );
    209 void x86_cmp_imm( struct x86_function *p, struct x86_reg dst, int imm );
    210 
    211 
    212 /* Macro for sse_shufps() and sse2_pshufd():
    213  */
    214 #define SHUF(_x,_y,_z,_w)       (((_x)<<0) | ((_y)<<2) | ((_z)<<4) | ((_w)<<6))
    215 #define SHUF_NOOP               RSW(0,1,2,3)
    216 #define GET_SHUF(swz, idx)      (((swz) >> ((idx)*2)) & 0x3)
    217 
    218 void mmx_emms( struct x86_function *p );
    219 void mmx_movd( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
    220 void mmx_movq( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
    221 void mmx_packssdw( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
    222 void mmx_packuswb( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
    223 
    224 void sse2_movd( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
    225 void sse2_movq( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
    226 void sse2_movdqu( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
    227 void sse2_movdqa( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
    228 void sse2_movsd( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
    229 void sse2_movupd( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
    230 void sse2_movapd( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
    231 
    232 void sse2_cvtps2dq( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
    233 void sse2_cvttps2dq( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
    234 void sse2_cvtdq2ps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
    235 void sse2_cvtsd2ss( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
    236 void sse2_cvtpd2ps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
    237 
    238 void sse2_movd( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
    239 void sse2_packssdw( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
    240 void sse2_packsswb( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
    241 void sse2_packuswb( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
    242 void sse2_pshufd( struct x86_function *p, struct x86_reg dest, struct x86_reg arg0,
    243                   unsigned char shuf );
    244 void sse2_pshuflw( struct x86_function *p, struct x86_reg dest, struct x86_reg arg0,
    245                   unsigned char shuf );
    246 void sse2_pshufhw( struct x86_function *p, struct x86_reg dest, struct x86_reg arg0,
    247                   unsigned char shuf );
    248 void sse2_rcpps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
    249 void sse2_rcpss( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
    250 
    251 void sse2_punpcklbw( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
    252 void sse2_punpcklwd( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
    253 void sse2_punpckldq( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
    254 void sse2_punpcklqdq( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
    255 
    256 void sse2_psllw_imm( struct x86_function *p, struct x86_reg dst, unsigned imm );
    257 void sse2_pslld_imm( struct x86_function *p, struct x86_reg dst, unsigned imm );
    258 void sse2_psllq_imm( struct x86_function *p, struct x86_reg dst, unsigned imm );
    259 
    260 void sse2_psrlw_imm( struct x86_function *p, struct x86_reg dst, unsigned imm );
    261 void sse2_psrld_imm( struct x86_function *p, struct x86_reg dst, unsigned imm );
    262 void sse2_psrlq_imm( struct x86_function *p, struct x86_reg dst, unsigned imm );
    263 
    264 void sse2_psraw_imm( struct x86_function *p, struct x86_reg dst, unsigned imm );
    265 void sse2_psrad_imm( struct x86_function *p, struct x86_reg dst, unsigned imm );
    266 
    267 void sse2_por( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
    268 
    269 void sse2_pshuflw( struct x86_function *p, struct x86_reg dst, struct x86_reg src, uint8_t imm );
    270 void sse2_pshufhw( struct x86_function *p, struct x86_reg dst, struct x86_reg src, uint8_t imm );
    271 void sse2_pshufd( struct x86_function *p, struct x86_reg dst, struct x86_reg src, uint8_t imm );
    272 
    273 void sse_prefetchnta( struct x86_function *p, struct x86_reg ptr);
    274 void sse_prefetch0( struct x86_function *p, struct x86_reg ptr);
    275 void sse_prefetch1( struct x86_function *p, struct x86_reg ptr);
    276 
    277 void sse_movntps( struct x86_function *p, struct x86_reg dst, struct x86_reg src);
    278 
    279 void sse_addps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
    280 void sse_addss( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
    281 void sse_cvtps2pi( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
    282 void sse_divss( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
    283 void sse_andnps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
    284 void sse_andps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
    285 void sse_cmpps( struct x86_function *p, struct x86_reg dst, struct x86_reg src,
    286                 enum sse_cc cc );
    287 void sse_maxps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
    288 void sse_maxss( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
    289 void sse_minps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
    290 void sse_movaps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
    291 void sse_movhlps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
    292 void sse_movhps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
    293 void sse_movlhps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
    294 void sse_movlps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
    295 void sse_movss( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
    296 void sse_movups( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
    297 void sse_mulps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
    298 void sse_mulss( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
    299 void sse_orps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
    300 void sse_xorps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
    301 void sse_subps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
    302 void sse_rsqrtps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
    303 void sse_rsqrtss( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
    304 void sse_shufps( struct x86_function *p, struct x86_reg dest, struct x86_reg arg0,
    305                  unsigned char shuf );
    306 void sse_unpckhps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
    307 void sse_unpcklps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
    308 void sse_pmovmskb( struct x86_function *p, struct x86_reg dest, struct x86_reg src );
    309 void sse_movmskps( struct x86_function *p, struct x86_reg dst, struct x86_reg src);
    310 
    311 void x86_add( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
    312 void x86_and( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
    313 void x86_cmovcc( struct x86_function *p, struct x86_reg dst, struct x86_reg src, enum x86_cc cc );
    314 void x86_cmp( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
    315 void x86_dec( struct x86_function *p, struct x86_reg reg );
    316 void x86_inc( struct x86_function *p, struct x86_reg reg );
    317 void x86_lea( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
    318 void x86_mov( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
    319 void x64_mov64( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
    320 void x86_mov8( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
    321 void x86_mov16( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
    322 void x86_movzx8(struct x86_function *p, struct x86_reg dst, struct x86_reg src );
    323 void x86_movzx16(struct x86_function *p, struct x86_reg dst, struct x86_reg src );
    324 void x86_mov_imm(struct x86_function *p, struct x86_reg dst, int imm );
    325 void x86_mov8_imm(struct x86_function *p, struct x86_reg dst, uint8_t imm );
    326 void x86_mov16_imm(struct x86_function *p, struct x86_reg dst, uint16_t imm );
    327 void x86_mul( struct x86_function *p, struct x86_reg src );
    328 void x86_imul( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
    329 void x86_or( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
    330 void x86_pop( struct x86_function *p, struct x86_reg reg );
    331 void x86_push( struct x86_function *p, struct x86_reg reg );
    332 void x86_push_imm32( struct x86_function *p, int imm );
    333 void x86_ret( struct x86_function *p );
    334 void x86_retw( struct x86_function *p, unsigned short imm );
    335 void x86_sub( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
    336 void x86_test( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
    337 void x86_xor( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
    338 void x86_sahf( struct x86_function *p );
    339 void x86_div( struct x86_function *p, struct x86_reg src );
    340 void x86_bswap( struct x86_function *p, struct x86_reg src );
    341 void x86_shr_imm( struct x86_function *p, struct x86_reg reg, unsigned imm );
    342 void x86_sar_imm( struct x86_function *p, struct x86_reg reg, unsigned imm );
    343 void x86_shl_imm( struct x86_function *p, struct x86_reg reg, unsigned imm  );
    344 
    345 void x86_cdecl_caller_push_regs( struct x86_function *p );
    346 void x86_cdecl_caller_pop_regs( struct x86_function *p );
    347 
    348 void x87_assert_stack_empty( struct x86_function *p );
    349 
    350 void x87_f2xm1( struct x86_function *p );
    351 void x87_fabs( struct x86_function *p );
    352 void x87_fadd( struct x86_function *p, struct x86_reg dst, struct x86_reg arg );
    353 void x87_faddp( struct x86_function *p, struct x86_reg dst );
    354 void x87_fchs( struct x86_function *p );
    355 void x87_fclex( struct x86_function *p );
    356 void x87_fcmovb( struct x86_function *p, struct x86_reg src );
    357 void x87_fcmovbe( struct x86_function *p, struct x86_reg src );
    358 void x87_fcmove( struct x86_function *p, struct x86_reg src );
    359 void x87_fcmovnb( struct x86_function *p, struct x86_reg src );
    360 void x87_fcmovnbe( struct x86_function *p, struct x86_reg src );
    361 void x87_fcmovne( struct x86_function *p, struct x86_reg src );
    362 void x87_fcom( struct x86_function *p, struct x86_reg dst );
    363 void x87_fcomi( struct x86_function *p, struct x86_reg dst );
    364 void x87_fcomip( struct x86_function *p, struct x86_reg dst );
    365 void x87_fcomp( struct x86_function *p, struct x86_reg dst );
    366 void x87_fcos( struct x86_function *p );
    367 void x87_fdiv( struct x86_function *p, struct x86_reg dst, struct x86_reg arg );
    368 void x87_fdivp( struct x86_function *p, struct x86_reg dst );
    369 void x87_fdivr( struct x86_function *p, struct x86_reg dst, struct x86_reg arg );
    370 void x87_fdivrp( struct x86_function *p, struct x86_reg dst );
    371 void x87_fild( struct x86_function *p, struct x86_reg arg );
    372 void x87_fist( struct x86_function *p, struct x86_reg dst );
    373 void x87_fistp( struct x86_function *p, struct x86_reg dst );
    374 void x87_fld( struct x86_function *p, struct x86_reg arg );
    375 void x87_fld1( struct x86_function *p );
    376 void x87_fldcw( struct x86_function *p, struct x86_reg arg );
    377 void x87_fldl2e( struct x86_function *p );
    378 void x87_fldln2( struct x86_function *p );
    379 void x87_fldz( struct x86_function *p );
    380 void x87_fmul( struct x86_function *p, struct x86_reg dst, struct x86_reg arg );
    381 void x87_fmulp( struct x86_function *p, struct x86_reg dst );
    382 void x87_fnclex( struct x86_function *p );
    383 void x87_fprndint( struct x86_function *p );
    384 void x87_fpop( struct x86_function *p );
    385 void x87_fscale( struct x86_function *p );
    386 void x87_fsin( struct x86_function *p );
    387 void x87_fsincos( struct x86_function *p );
    388 void x87_fsqrt( struct x86_function *p );
    389 void x87_fst( struct x86_function *p, struct x86_reg dst );
    390 void x87_fstp( struct x86_function *p, struct x86_reg dst );
    391 void x87_fsub( struct x86_function *p, struct x86_reg dst, struct x86_reg arg );
    392 void x87_fsubp( struct x86_function *p, struct x86_reg dst );
    393 void x87_fsubr( struct x86_function *p, struct x86_reg dst, struct x86_reg arg );
    394 void x87_fsubrp( struct x86_function *p, struct x86_reg dst );
    395 void x87_ftst( struct x86_function *p );
    396 void x87_fxch( struct x86_function *p, struct x86_reg dst );
    397 void x87_fxtract( struct x86_function *p );
    398 void x87_fyl2x( struct x86_function *p );
    399 void x87_fyl2xp1( struct x86_function *p );
    400 void x87_fwait( struct x86_function *p );
    401 void x87_fnstcw( struct x86_function *p, struct x86_reg dst );
    402 void x87_fnstsw( struct x86_function *p, struct x86_reg dst );
    403 void x87_fucompp( struct x86_function *p );
    404 void x87_fucomp( struct x86_function *p, struct x86_reg arg );
    405 void x87_fucom( struct x86_function *p, struct x86_reg arg );
    406 
    407 
    408 
    409 /* Retrieve a reference to one of the function arguments, taking into
    410  * account any push/pop activity.  Note - doesn't track explicit
    411  * manipulation of ESP by other instructions.
    412  */
    413 struct x86_reg x86_fn_arg( struct x86_function *p, unsigned arg );
    414 
    415 #endif
    416 #endif
    417