1 /************************************************************************** 2 * 3 * Copyright (C) 1999-2005 Brian Paul All Rights Reserved. 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 * and/or sell copies of the Software, and to permit persons to whom the 10 * Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice shall be included 13 * in all copies or substantial portions of the Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 16 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN 19 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 20 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 21 * 22 **************************************************************************/ 23 24 #ifndef _RTASM_X86SSE_H_ 25 #define _RTASM_X86SSE_H_ 26 27 #include "pipe/p_compiler.h" 28 #include "pipe/p_config.h" 29 30 #if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) 31 32 /* It is up to the caller to ensure that instructions issued are 33 * suitable for the host cpu. There are no checks made in this module 34 * for mmx/sse/sse2 support on the cpu. 35 */ 36 struct x86_reg { 37 unsigned file:2; 38 unsigned idx:4; 39 unsigned mod:2; /* mod_REG if this is just a register */ 40 int disp:24; /* only +/- 23bits of offset - should be enough... */ 41 }; 42 43 #define X86_MMX 1 44 #define X86_MMX2 2 45 #define X86_SSE 4 46 #define X86_SSE2 8 47 #define X86_SSE3 0x10 48 #define X86_SSE4_1 0x20 49 50 struct x86_function { 51 unsigned caps; 52 unsigned size; 53 unsigned char *store; 54 unsigned char *csr; 55 56 unsigned stack_offset:16; 57 unsigned need_emms:8; 58 int x87_stack:8; 59 60 unsigned char error_overflow[4]; 61 }; 62 63 enum x86_reg_file { 64 file_REG32, 65 file_MMX, 66 file_XMM, 67 file_x87 68 }; 69 70 /* Values for mod field of modr/m byte 71 */ 72 enum x86_reg_mod { 73 mod_INDIRECT, 74 mod_DISP8, 75 mod_DISP32, 76 mod_REG 77 }; 78 79 enum x86_reg_name { 80 reg_AX, 81 reg_CX, 82 reg_DX, 83 reg_BX, 84 reg_SP, 85 reg_BP, 86 reg_SI, 87 reg_DI, 88 reg_R8, 89 reg_R9, 90 reg_R10, 91 reg_R11, 92 reg_R12, 93 reg_R13, 94 reg_R14, 95 reg_R15 96 }; 97 98 99 enum x86_cc { 100 cc_O, /* overflow */ 101 cc_NO, /* not overflow */ 102 cc_NAE, /* not above or equal / carry */ 103 cc_AE, /* above or equal / not carry */ 104 cc_E, /* equal / zero */ 105 cc_NE /* not equal / not zero */ 106 }; 107 108 enum sse_cc { 109 cc_Equal, 110 cc_LessThan, 111 cc_LessThanEqual, 112 cc_Unordered, 113 cc_NotEqual, 114 cc_NotLessThan, 115 cc_NotLessThanEqual, 116 cc_Ordered 117 }; 118 119 #define cc_Z cc_E 120 #define cc_NZ cc_NE 121 122 123 /** generic pointer to function */ 124 typedef void (*x86_func)(void); 125 126 127 /* Begin/end/retrieve function creation: 128 */ 129 130 enum x86_target 131 { 132 X86_32, 133 X86_64_STD_ABI, 134 X86_64_WIN64_ABI 135 }; 136 137 /* make this read a member of x86_function if target != host is desired */ 138 static INLINE enum x86_target x86_target( struct x86_function* p ) 139 { 140 #ifdef PIPE_ARCH_X86 141 return X86_32; 142 #elif defined(_WIN64) 143 return X86_64_WIN64_ABI; 144 #elif defined(PIPE_ARCH_X86_64) 145 return X86_64_STD_ABI; 146 #endif 147 } 148 149 static INLINE unsigned x86_target_caps( struct x86_function* p ) 150 { 151 return p->caps; 152 } 153 154 void x86_init_func( struct x86_function *p ); 155 void x86_init_func_size( struct x86_function *p, unsigned code_size ); 156 void x86_release_func( struct x86_function *p ); 157 x86_func x86_get_func( struct x86_function *p ); 158 159 /* Debugging: 160 */ 161 void x86_print_reg( struct x86_reg reg ); 162 163 164 /* Create and manipulate registers and regmem values: 165 */ 166 struct x86_reg x86_make_reg( enum x86_reg_file file, 167 enum x86_reg_name idx ); 168 169 struct x86_reg x86_make_disp( struct x86_reg reg, 170 int disp ); 171 172 struct x86_reg x86_deref( struct x86_reg reg ); 173 174 struct x86_reg x86_get_base_reg( struct x86_reg reg ); 175 176 177 /* Labels, jumps and fixup: 178 */ 179 int x86_get_label( struct x86_function *p ); 180 181 void x64_rexw(struct x86_function *p); 182 183 void x86_jcc( struct x86_function *p, 184 enum x86_cc cc, 185 int label ); 186 187 int x86_jcc_forward( struct x86_function *p, 188 enum x86_cc cc ); 189 190 int x86_jmp_forward( struct x86_function *p); 191 192 int x86_call_forward( struct x86_function *p); 193 194 void x86_fixup_fwd_jump( struct x86_function *p, 195 int fixup ); 196 197 void x86_jmp( struct x86_function *p, int label ); 198 199 /* void x86_call( struct x86_function *p, void (*label)() ); */ 200 void x86_call( struct x86_function *p, struct x86_reg reg); 201 202 void x86_mov_reg_imm( struct x86_function *p, struct x86_reg dst, int imm ); 203 void x86_add_imm( struct x86_function *p, struct x86_reg dst, int imm ); 204 void x86_or_imm( struct x86_function *p, struct x86_reg dst, int imm ); 205 void x86_and_imm( struct x86_function *p, struct x86_reg dst, int imm ); 206 void x86_sub_imm( struct x86_function *p, struct x86_reg dst, int imm ); 207 void x86_xor_imm( struct x86_function *p, struct x86_reg dst, int imm ); 208 void x86_cmp_imm( struct x86_function *p, struct x86_reg dst, int imm ); 209 210 211 /* Macro for sse_shufps() and sse2_pshufd(): 212 */ 213 #define SHUF(_x,_y,_z,_w) (((_x)<<0) | ((_y)<<2) | ((_z)<<4) | ((_w)<<6)) 214 #define SHUF_NOOP RSW(0,1,2,3) 215 #define GET_SHUF(swz, idx) (((swz) >> ((idx)*2)) & 0x3) 216 217 void mmx_emms( struct x86_function *p ); 218 void mmx_movd( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 219 void mmx_movq( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 220 void mmx_packssdw( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 221 void mmx_packuswb( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 222 223 void sse2_movd( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 224 void sse2_movq( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 225 void sse2_movdqu( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 226 void sse2_movdqa( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 227 void sse2_movsd( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 228 void sse2_movupd( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 229 void sse2_movapd( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 230 231 void sse2_cvtps2dq( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 232 void sse2_cvttps2dq( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 233 void sse2_cvtdq2ps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 234 void sse2_cvtsd2ss( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 235 void sse2_cvtpd2ps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 236 237 void sse2_movd( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 238 void sse2_packssdw( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 239 void sse2_packsswb( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 240 void sse2_packuswb( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 241 void sse2_pshufd( struct x86_function *p, struct x86_reg dest, struct x86_reg arg0, 242 unsigned char shuf ); 243 void sse2_pshuflw( struct x86_function *p, struct x86_reg dest, struct x86_reg arg0, 244 unsigned char shuf ); 245 void sse2_pshufhw( struct x86_function *p, struct x86_reg dest, struct x86_reg arg0, 246 unsigned char shuf ); 247 void sse2_rcpps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 248 void sse2_rcpss( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 249 250 void sse2_punpcklbw( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 251 void sse2_punpcklwd( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 252 void sse2_punpckldq( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 253 void sse2_punpcklqdq( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 254 255 void sse2_psllw_imm( struct x86_function *p, struct x86_reg dst, unsigned imm ); 256 void sse2_pslld_imm( struct x86_function *p, struct x86_reg dst, unsigned imm ); 257 void sse2_psllq_imm( struct x86_function *p, struct x86_reg dst, unsigned imm ); 258 259 void sse2_psrlw_imm( struct x86_function *p, struct x86_reg dst, unsigned imm ); 260 void sse2_psrld_imm( struct x86_function *p, struct x86_reg dst, unsigned imm ); 261 void sse2_psrlq_imm( struct x86_function *p, struct x86_reg dst, unsigned imm ); 262 263 void sse2_psraw_imm( struct x86_function *p, struct x86_reg dst, unsigned imm ); 264 void sse2_psrad_imm( struct x86_function *p, struct x86_reg dst, unsigned imm ); 265 266 void sse2_por( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 267 268 void sse2_pshuflw( struct x86_function *p, struct x86_reg dst, struct x86_reg src, uint8_t imm ); 269 void sse2_pshufhw( struct x86_function *p, struct x86_reg dst, struct x86_reg src, uint8_t imm ); 270 void sse2_pshufd( struct x86_function *p, struct x86_reg dst, struct x86_reg src, uint8_t imm ); 271 272 void sse_prefetchnta( struct x86_function *p, struct x86_reg ptr); 273 void sse_prefetch0( struct x86_function *p, struct x86_reg ptr); 274 void sse_prefetch1( struct x86_function *p, struct x86_reg ptr); 275 276 void sse_movntps( struct x86_function *p, struct x86_reg dst, struct x86_reg src); 277 278 void sse_addps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 279 void sse_addss( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 280 void sse_cvtps2pi( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 281 void sse_divss( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 282 void sse_andnps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 283 void sse_andps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 284 void sse_cmpps( struct x86_function *p, struct x86_reg dst, struct x86_reg src, 285 enum sse_cc cc ); 286 void sse_maxps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 287 void sse_maxss( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 288 void sse_minps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 289 void sse_movaps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 290 void sse_movhlps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 291 void sse_movhps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 292 void sse_movlhps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 293 void sse_movlps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 294 void sse_movss( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 295 void sse_movups( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 296 void sse_mulps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 297 void sse_mulss( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 298 void sse_orps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 299 void sse_xorps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 300 void sse_subps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 301 void sse_rsqrtps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 302 void sse_rsqrtss( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 303 void sse_shufps( struct x86_function *p, struct x86_reg dest, struct x86_reg arg0, 304 unsigned char shuf ); 305 void sse_unpckhps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 306 void sse_unpcklps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 307 void sse_pmovmskb( struct x86_function *p, struct x86_reg dest, struct x86_reg src ); 308 void sse_movmskps( struct x86_function *p, struct x86_reg dst, struct x86_reg src); 309 310 void x86_add( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 311 void x86_and( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 312 void x86_cmovcc( struct x86_function *p, struct x86_reg dst, struct x86_reg src, enum x86_cc cc ); 313 void x86_cmp( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 314 void x86_dec( struct x86_function *p, struct x86_reg reg ); 315 void x86_inc( struct x86_function *p, struct x86_reg reg ); 316 void x86_lea( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 317 void x86_mov( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 318 void x64_mov64( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 319 void x86_mov8( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 320 void x86_mov16( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 321 void x86_movzx8(struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 322 void x86_movzx16(struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 323 void x86_mov_imm(struct x86_function *p, struct x86_reg dst, int imm ); 324 void x86_mov8_imm(struct x86_function *p, struct x86_reg dst, uint8_t imm ); 325 void x86_mov16_imm(struct x86_function *p, struct x86_reg dst, uint16_t imm ); 326 void x86_mul( struct x86_function *p, struct x86_reg src ); 327 void x86_imul( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 328 void x86_or( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 329 void x86_pop( struct x86_function *p, struct x86_reg reg ); 330 void x86_push( struct x86_function *p, struct x86_reg reg ); 331 void x86_push_imm32( struct x86_function *p, int imm ); 332 void x86_ret( struct x86_function *p ); 333 void x86_retw( struct x86_function *p, unsigned short imm ); 334 void x86_sub( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 335 void x86_test( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 336 void x86_xor( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 337 void x86_sahf( struct x86_function *p ); 338 void x86_div( struct x86_function *p, struct x86_reg src ); 339 void x86_bswap( struct x86_function *p, struct x86_reg src ); 340 void x86_shr_imm( struct x86_function *p, struct x86_reg reg, unsigned imm ); 341 void x86_sar_imm( struct x86_function *p, struct x86_reg reg, unsigned imm ); 342 void x86_shl_imm( struct x86_function *p, struct x86_reg reg, unsigned imm ); 343 344 void x86_cdecl_caller_push_regs( struct x86_function *p ); 345 void x86_cdecl_caller_pop_regs( struct x86_function *p ); 346 347 void x87_assert_stack_empty( struct x86_function *p ); 348 349 void x87_f2xm1( struct x86_function *p ); 350 void x87_fabs( struct x86_function *p ); 351 void x87_fadd( struct x86_function *p, struct x86_reg dst, struct x86_reg arg ); 352 void x87_faddp( struct x86_function *p, struct x86_reg dst ); 353 void x87_fchs( struct x86_function *p ); 354 void x87_fclex( struct x86_function *p ); 355 void x87_fcmovb( struct x86_function *p, struct x86_reg src ); 356 void x87_fcmovbe( struct x86_function *p, struct x86_reg src ); 357 void x87_fcmove( struct x86_function *p, struct x86_reg src ); 358 void x87_fcmovnb( struct x86_function *p, struct x86_reg src ); 359 void x87_fcmovnbe( struct x86_function *p, struct x86_reg src ); 360 void x87_fcmovne( struct x86_function *p, struct x86_reg src ); 361 void x87_fcom( struct x86_function *p, struct x86_reg dst ); 362 void x87_fcomi( struct x86_function *p, struct x86_reg dst ); 363 void x87_fcomip( struct x86_function *p, struct x86_reg dst ); 364 void x87_fcomp( struct x86_function *p, struct x86_reg dst ); 365 void x87_fcos( struct x86_function *p ); 366 void x87_fdiv( struct x86_function *p, struct x86_reg dst, struct x86_reg arg ); 367 void x87_fdivp( struct x86_function *p, struct x86_reg dst ); 368 void x87_fdivr( struct x86_function *p, struct x86_reg dst, struct x86_reg arg ); 369 void x87_fdivrp( struct x86_function *p, struct x86_reg dst ); 370 void x87_fild( struct x86_function *p, struct x86_reg arg ); 371 void x87_fist( struct x86_function *p, struct x86_reg dst ); 372 void x87_fistp( struct x86_function *p, struct x86_reg dst ); 373 void x87_fld( struct x86_function *p, struct x86_reg arg ); 374 void x87_fld1( struct x86_function *p ); 375 void x87_fldcw( struct x86_function *p, struct x86_reg arg ); 376 void x87_fldl2e( struct x86_function *p ); 377 void x87_fldln2( struct x86_function *p ); 378 void x87_fldz( struct x86_function *p ); 379 void x87_fmul( struct x86_function *p, struct x86_reg dst, struct x86_reg arg ); 380 void x87_fmulp( struct x86_function *p, struct x86_reg dst ); 381 void x87_fnclex( struct x86_function *p ); 382 void x87_fprndint( struct x86_function *p ); 383 void x87_fpop( struct x86_function *p ); 384 void x87_fscale( struct x86_function *p ); 385 void x87_fsin( struct x86_function *p ); 386 void x87_fsincos( struct x86_function *p ); 387 void x87_fsqrt( struct x86_function *p ); 388 void x87_fst( struct x86_function *p, struct x86_reg dst ); 389 void x87_fstp( struct x86_function *p, struct x86_reg dst ); 390 void x87_fsub( struct x86_function *p, struct x86_reg dst, struct x86_reg arg ); 391 void x87_fsubp( struct x86_function *p, struct x86_reg dst ); 392 void x87_fsubr( struct x86_function *p, struct x86_reg dst, struct x86_reg arg ); 393 void x87_fsubrp( struct x86_function *p, struct x86_reg dst ); 394 void x87_ftst( struct x86_function *p ); 395 void x87_fxch( struct x86_function *p, struct x86_reg dst ); 396 void x87_fxtract( struct x86_function *p ); 397 void x87_fyl2x( struct x86_function *p ); 398 void x87_fyl2xp1( struct x86_function *p ); 399 void x87_fwait( struct x86_function *p ); 400 void x87_fnstcw( struct x86_function *p, struct x86_reg dst ); 401 void x87_fnstsw( struct x86_function *p, struct x86_reg dst ); 402 void x87_fucompp( struct x86_function *p ); 403 void x87_fucomp( struct x86_function *p, struct x86_reg arg ); 404 void x87_fucom( struct x86_function *p, struct x86_reg arg ); 405 406 407 408 /* Retrieve a reference to one of the function arguments, taking into 409 * account any push/pop activity. Note - doesn't track explicit 410 * manipulation of ESP by other instructions. 411 */ 412 struct x86_reg x86_fn_arg( struct x86_function *p, unsigned arg ); 413 414 #endif 415 #endif 416