1 /************************************************************************** 2 * 3 * Copyright (C) 1999-2005 Brian Paul All Rights Reserved. 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 * and/or sell copies of the Software, and to permit persons to whom the 10 * Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice shall be included 13 * in all copies or substantial portions of the Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 16 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR 19 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 20 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 21 * OTHER DEALINGS IN THE SOFTWARE. 22 * 23 **************************************************************************/ 24 25 #ifndef _RTASM_X86SSE_H_ 26 #define _RTASM_X86SSE_H_ 27 28 #include "pipe/p_compiler.h" 29 #include "pipe/p_config.h" 30 31 #if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) 32 33 /* It is up to the caller to ensure that instructions issued are 34 * suitable for the host cpu. There are no checks made in this module 35 * for mmx/sse/sse2 support on the cpu. 36 */ 37 struct x86_reg { 38 unsigned file:2; 39 unsigned idx:4; 40 unsigned mod:2; /* mod_REG if this is just a register */ 41 int disp:24; /* only +/- 23bits of offset - should be enough... */ 42 }; 43 44 #define X86_MMX 1 45 #define X86_MMX2 2 46 #define X86_SSE 4 47 #define X86_SSE2 8 48 #define X86_SSE3 0x10 49 #define X86_SSE4_1 0x20 50 51 struct x86_function { 52 unsigned caps; 53 unsigned size; 54 unsigned char *store; 55 unsigned char *csr; 56 57 unsigned stack_offset:16; 58 unsigned need_emms:8; 59 int x87_stack:8; 60 61 unsigned char error_overflow[4]; 62 }; 63 64 enum x86_reg_file { 65 file_REG32, 66 file_MMX, 67 file_XMM, 68 file_x87 69 }; 70 71 /* Values for mod field of modr/m byte 72 */ 73 enum x86_reg_mod { 74 mod_INDIRECT, 75 mod_DISP8, 76 mod_DISP32, 77 mod_REG 78 }; 79 80 enum x86_reg_name { 81 reg_AX, 82 reg_CX, 83 reg_DX, 84 reg_BX, 85 reg_SP, 86 reg_BP, 87 reg_SI, 88 reg_DI, 89 reg_R8, 90 reg_R9, 91 reg_R10, 92 reg_R11, 93 reg_R12, 94 reg_R13, 95 reg_R14, 96 reg_R15 97 }; 98 99 100 enum x86_cc { 101 cc_O, /* overflow */ 102 cc_NO, /* not overflow */ 103 cc_NAE, /* not above or equal / carry */ 104 cc_AE, /* above or equal / not carry */ 105 cc_E, /* equal / zero */ 106 cc_NE /* not equal / not zero */ 107 }; 108 109 enum sse_cc { 110 cc_Equal, 111 cc_LessThan, 112 cc_LessThanEqual, 113 cc_Unordered, 114 cc_NotEqual, 115 cc_NotLessThan, 116 cc_NotLessThanEqual, 117 cc_Ordered 118 }; 119 120 #define cc_Z cc_E 121 #define cc_NZ cc_NE 122 123 124 /** generic pointer to function */ 125 typedef void (*x86_func)(void); 126 127 128 /* Begin/end/retrieve function creation: 129 */ 130 131 enum x86_target 132 { 133 X86_32, 134 X86_64_STD_ABI, 135 X86_64_WIN64_ABI 136 }; 137 138 /* make this read a member of x86_function if target != host is desired */ 139 static inline enum x86_target x86_target( struct x86_function* p ) 140 { 141 #ifdef PIPE_ARCH_X86 142 return X86_32; 143 #elif (defined(PIPE_OS_CYGWIN) || defined(PIPE_OS_WINDOWS)) && defined(PIPE_ARCH_X86_64) 144 return X86_64_WIN64_ABI; 145 #elif defined(PIPE_ARCH_X86_64) 146 return X86_64_STD_ABI; 147 #endif 148 } 149 150 static inline unsigned x86_target_caps( struct x86_function* p ) 151 { 152 return p->caps; 153 } 154 155 void x86_init_func( struct x86_function *p ); 156 void x86_init_func_size( struct x86_function *p, unsigned code_size ); 157 void x86_release_func( struct x86_function *p ); 158 x86_func x86_get_func( struct x86_function *p ); 159 160 /* Debugging: 161 */ 162 void x86_print_reg( struct x86_reg reg ); 163 164 165 /* Create and manipulate registers and regmem values: 166 */ 167 struct x86_reg x86_make_reg( enum x86_reg_file file, 168 enum x86_reg_name idx ); 169 170 struct x86_reg x86_make_disp( struct x86_reg reg, 171 int disp ); 172 173 struct x86_reg x86_deref( struct x86_reg reg ); 174 175 struct x86_reg x86_get_base_reg( struct x86_reg reg ); 176 177 178 /* Labels, jumps and fixup: 179 */ 180 int x86_get_label( struct x86_function *p ); 181 182 void x64_rexw(struct x86_function *p); 183 184 void x86_jcc( struct x86_function *p, 185 enum x86_cc cc, 186 int label ); 187 188 int x86_jcc_forward( struct x86_function *p, 189 enum x86_cc cc ); 190 191 int x86_jmp_forward( struct x86_function *p); 192 193 int x86_call_forward( struct x86_function *p); 194 195 void x86_fixup_fwd_jump( struct x86_function *p, 196 int fixup ); 197 198 void x86_jmp( struct x86_function *p, int label ); 199 200 /* void x86_call( struct x86_function *p, void (*label)() ); */ 201 void x86_call( struct x86_function *p, struct x86_reg reg); 202 203 void x86_mov_reg_imm( struct x86_function *p, struct x86_reg dst, int imm ); 204 void x86_add_imm( struct x86_function *p, struct x86_reg dst, int imm ); 205 void x86_or_imm( struct x86_function *p, struct x86_reg dst, int imm ); 206 void x86_and_imm( struct x86_function *p, struct x86_reg dst, int imm ); 207 void x86_sub_imm( struct x86_function *p, struct x86_reg dst, int imm ); 208 void x86_xor_imm( struct x86_function *p, struct x86_reg dst, int imm ); 209 void x86_cmp_imm( struct x86_function *p, struct x86_reg dst, int imm ); 210 211 212 /* Macro for sse_shufps() and sse2_pshufd(): 213 */ 214 #define SHUF(_x,_y,_z,_w) (((_x)<<0) | ((_y)<<2) | ((_z)<<4) | ((_w)<<6)) 215 #define SHUF_NOOP RSW(0,1,2,3) 216 #define GET_SHUF(swz, idx) (((swz) >> ((idx)*2)) & 0x3) 217 218 void mmx_emms( struct x86_function *p ); 219 void mmx_movd( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 220 void mmx_movq( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 221 void mmx_packssdw( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 222 void mmx_packuswb( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 223 224 void sse2_movd( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 225 void sse2_movq( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 226 void sse2_movdqu( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 227 void sse2_movdqa( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 228 void sse2_movsd( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 229 void sse2_movupd( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 230 void sse2_movapd( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 231 232 void sse2_cvtps2dq( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 233 void sse2_cvttps2dq( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 234 void sse2_cvtdq2ps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 235 void sse2_cvtsd2ss( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 236 void sse2_cvtpd2ps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 237 238 void sse2_movd( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 239 void sse2_packssdw( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 240 void sse2_packsswb( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 241 void sse2_packuswb( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 242 void sse2_pshufd( struct x86_function *p, struct x86_reg dest, struct x86_reg arg0, 243 unsigned char shuf ); 244 void sse2_pshuflw( struct x86_function *p, struct x86_reg dest, struct x86_reg arg0, 245 unsigned char shuf ); 246 void sse2_pshufhw( struct x86_function *p, struct x86_reg dest, struct x86_reg arg0, 247 unsigned char shuf ); 248 void sse2_rcpps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 249 void sse2_rcpss( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 250 251 void sse2_punpcklbw( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 252 void sse2_punpcklwd( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 253 void sse2_punpckldq( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 254 void sse2_punpcklqdq( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 255 256 void sse2_psllw_imm( struct x86_function *p, struct x86_reg dst, unsigned imm ); 257 void sse2_pslld_imm( struct x86_function *p, struct x86_reg dst, unsigned imm ); 258 void sse2_psllq_imm( struct x86_function *p, struct x86_reg dst, unsigned imm ); 259 260 void sse2_psrlw_imm( struct x86_function *p, struct x86_reg dst, unsigned imm ); 261 void sse2_psrld_imm( struct x86_function *p, struct x86_reg dst, unsigned imm ); 262 void sse2_psrlq_imm( struct x86_function *p, struct x86_reg dst, unsigned imm ); 263 264 void sse2_psraw_imm( struct x86_function *p, struct x86_reg dst, unsigned imm ); 265 void sse2_psrad_imm( struct x86_function *p, struct x86_reg dst, unsigned imm ); 266 267 void sse2_por( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 268 269 void sse2_pshuflw( struct x86_function *p, struct x86_reg dst, struct x86_reg src, uint8_t imm ); 270 void sse2_pshufhw( struct x86_function *p, struct x86_reg dst, struct x86_reg src, uint8_t imm ); 271 void sse2_pshufd( struct x86_function *p, struct x86_reg dst, struct x86_reg src, uint8_t imm ); 272 273 void sse_prefetchnta( struct x86_function *p, struct x86_reg ptr); 274 void sse_prefetch0( struct x86_function *p, struct x86_reg ptr); 275 void sse_prefetch1( struct x86_function *p, struct x86_reg ptr); 276 277 void sse_movntps( struct x86_function *p, struct x86_reg dst, struct x86_reg src); 278 279 void sse_addps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 280 void sse_addss( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 281 void sse_cvtps2pi( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 282 void sse_divss( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 283 void sse_andnps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 284 void sse_andps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 285 void sse_cmpps( struct x86_function *p, struct x86_reg dst, struct x86_reg src, 286 enum sse_cc cc ); 287 void sse_maxps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 288 void sse_maxss( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 289 void sse_minps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 290 void sse_movaps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 291 void sse_movhlps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 292 void sse_movhps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 293 void sse_movlhps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 294 void sse_movlps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 295 void sse_movss( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 296 void sse_movups( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 297 void sse_mulps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 298 void sse_mulss( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 299 void sse_orps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 300 void sse_xorps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 301 void sse_subps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 302 void sse_rsqrtps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 303 void sse_rsqrtss( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 304 void sse_shufps( struct x86_function *p, struct x86_reg dest, struct x86_reg arg0, 305 unsigned char shuf ); 306 void sse_unpckhps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 307 void sse_unpcklps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 308 void sse_pmovmskb( struct x86_function *p, struct x86_reg dest, struct x86_reg src ); 309 void sse_movmskps( struct x86_function *p, struct x86_reg dst, struct x86_reg src); 310 311 void x86_add( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 312 void x86_and( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 313 void x86_cmovcc( struct x86_function *p, struct x86_reg dst, struct x86_reg src, enum x86_cc cc ); 314 void x86_cmp( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 315 void x86_dec( struct x86_function *p, struct x86_reg reg ); 316 void x86_inc( struct x86_function *p, struct x86_reg reg ); 317 void x86_lea( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 318 void x86_mov( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 319 void x64_mov64( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 320 void x86_mov8( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 321 void x86_mov16( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 322 void x86_movzx8(struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 323 void x86_movzx16(struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 324 void x86_mov_imm(struct x86_function *p, struct x86_reg dst, int imm ); 325 void x86_mov8_imm(struct x86_function *p, struct x86_reg dst, uint8_t imm ); 326 void x86_mov16_imm(struct x86_function *p, struct x86_reg dst, uint16_t imm ); 327 void x86_mul( struct x86_function *p, struct x86_reg src ); 328 void x86_imul( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 329 void x86_or( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 330 void x86_pop( struct x86_function *p, struct x86_reg reg ); 331 void x86_push( struct x86_function *p, struct x86_reg reg ); 332 void x86_push_imm32( struct x86_function *p, int imm ); 333 void x86_ret( struct x86_function *p ); 334 void x86_retw( struct x86_function *p, unsigned short imm ); 335 void x86_sub( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 336 void x86_test( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 337 void x86_xor( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 338 void x86_sahf( struct x86_function *p ); 339 void x86_div( struct x86_function *p, struct x86_reg src ); 340 void x86_bswap( struct x86_function *p, struct x86_reg src ); 341 void x86_shr_imm( struct x86_function *p, struct x86_reg reg, unsigned imm ); 342 void x86_sar_imm( struct x86_function *p, struct x86_reg reg, unsigned imm ); 343 void x86_shl_imm( struct x86_function *p, struct x86_reg reg, unsigned imm ); 344 345 void x86_cdecl_caller_push_regs( struct x86_function *p ); 346 void x86_cdecl_caller_pop_regs( struct x86_function *p ); 347 348 void x87_assert_stack_empty( struct x86_function *p ); 349 350 void x87_f2xm1( struct x86_function *p ); 351 void x87_fabs( struct x86_function *p ); 352 void x87_fadd( struct x86_function *p, struct x86_reg dst, struct x86_reg arg ); 353 void x87_faddp( struct x86_function *p, struct x86_reg dst ); 354 void x87_fchs( struct x86_function *p ); 355 void x87_fclex( struct x86_function *p ); 356 void x87_fcmovb( struct x86_function *p, struct x86_reg src ); 357 void x87_fcmovbe( struct x86_function *p, struct x86_reg src ); 358 void x87_fcmove( struct x86_function *p, struct x86_reg src ); 359 void x87_fcmovnb( struct x86_function *p, struct x86_reg src ); 360 void x87_fcmovnbe( struct x86_function *p, struct x86_reg src ); 361 void x87_fcmovne( struct x86_function *p, struct x86_reg src ); 362 void x87_fcom( struct x86_function *p, struct x86_reg dst ); 363 void x87_fcomi( struct x86_function *p, struct x86_reg dst ); 364 void x87_fcomip( struct x86_function *p, struct x86_reg dst ); 365 void x87_fcomp( struct x86_function *p, struct x86_reg dst ); 366 void x87_fcos( struct x86_function *p ); 367 void x87_fdiv( struct x86_function *p, struct x86_reg dst, struct x86_reg arg ); 368 void x87_fdivp( struct x86_function *p, struct x86_reg dst ); 369 void x87_fdivr( struct x86_function *p, struct x86_reg dst, struct x86_reg arg ); 370 void x87_fdivrp( struct x86_function *p, struct x86_reg dst ); 371 void x87_fild( struct x86_function *p, struct x86_reg arg ); 372 void x87_fist( struct x86_function *p, struct x86_reg dst ); 373 void x87_fistp( struct x86_function *p, struct x86_reg dst ); 374 void x87_fld( struct x86_function *p, struct x86_reg arg ); 375 void x87_fld1( struct x86_function *p ); 376 void x87_fldcw( struct x86_function *p, struct x86_reg arg ); 377 void x87_fldl2e( struct x86_function *p ); 378 void x87_fldln2( struct x86_function *p ); 379 void x87_fldz( struct x86_function *p ); 380 void x87_fmul( struct x86_function *p, struct x86_reg dst, struct x86_reg arg ); 381 void x87_fmulp( struct x86_function *p, struct x86_reg dst ); 382 void x87_fnclex( struct x86_function *p ); 383 void x87_fprndint( struct x86_function *p ); 384 void x87_fpop( struct x86_function *p ); 385 void x87_fscale( struct x86_function *p ); 386 void x87_fsin( struct x86_function *p ); 387 void x87_fsincos( struct x86_function *p ); 388 void x87_fsqrt( struct x86_function *p ); 389 void x87_fst( struct x86_function *p, struct x86_reg dst ); 390 void x87_fstp( struct x86_function *p, struct x86_reg dst ); 391 void x87_fsub( struct x86_function *p, struct x86_reg dst, struct x86_reg arg ); 392 void x87_fsubp( struct x86_function *p, struct x86_reg dst ); 393 void x87_fsubr( struct x86_function *p, struct x86_reg dst, struct x86_reg arg ); 394 void x87_fsubrp( struct x86_function *p, struct x86_reg dst ); 395 void x87_ftst( struct x86_function *p ); 396 void x87_fxch( struct x86_function *p, struct x86_reg dst ); 397 void x87_fxtract( struct x86_function *p ); 398 void x87_fyl2x( struct x86_function *p ); 399 void x87_fyl2xp1( struct x86_function *p ); 400 void x87_fwait( struct x86_function *p ); 401 void x87_fnstcw( struct x86_function *p, struct x86_reg dst ); 402 void x87_fnstsw( struct x86_function *p, struct x86_reg dst ); 403 void x87_fucompp( struct x86_function *p ); 404 void x87_fucomp( struct x86_function *p, struct x86_reg arg ); 405 void x87_fucom( struct x86_function *p, struct x86_reg arg ); 406 407 408 409 /* Retrieve a reference to one of the function arguments, taking into 410 * account any push/pop activity. Note - doesn't track explicit 411 * manipulation of ESP by other instructions. 412 */ 413 struct x86_reg x86_fn_arg( struct x86_function *p, unsigned arg ); 414 415 #endif 416 #endif 417