Lines Matching refs:__m128
35 typedef float __m128 __attribute__((__vector_size__(16)));
39 static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
40 _mm_add_ss(__m128 a, __m128 b)
46 static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
47 _mm_add_ps(__m128 a, __m128 b)
52 static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
53 _mm_sub_ss(__m128 a, __m128 b)
59 static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
60 _mm_sub_ps(__m128 a, __m128 b)
65 static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
66 _mm_mul_ss(__m128 a, __m128 b)
72 static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
73 _mm_mul_ps(__m128 a, __m128 b)
78 static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
79 _mm_div_ss(__m128 a, __m128 b)
85 static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
86 _mm_div_ps(__m128 a, __m128 b)
91 static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
92 _mm_sqrt_ss(__m128 a)
97 static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
98 _mm_sqrt_ps(__m128 a)
103 static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
104 _mm_rcp_ss(__m128 a)
109 static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
110 _mm_rcp_ps(__m128 a)
115 static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
116 _mm_rsqrt_ss(__m128 a)
121 static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
122 _mm_rsqrt_ps(__m128 a)
127 static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
128 _mm_min_ss(__m128 a, __m128 b)
133 static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
134 _mm_min_ps(__m128 a, __m128 b)
139 static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
140 _mm_max_ss(__m128 a, __m128 b)
145 static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
146 _mm_max_ps(__m128 a, __m128 b)
151 static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
152 _mm_and_ps(__m128 a, __m128 b)
154 return (__m128)((__v4si)a & (__v4si)b);
157 static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
158 _mm_andnot_ps(__m128 a, __m128 b)
160 return (__m128)(~(__v4si)a & (__v4si)b);
163 static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
164 _mm_or_ps(__m128 a, __m128 b)
166 return (__m128)((__v4si)a | (__v4si)b);
169 static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
170 _mm_xor_ps(__m128 a, __m128 b)
172 return (__m128)((__v4si)a ^ (__v4si)b);
175 static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
176 _mm_cmpeq_ss(__m128 a, __m128 b)
178 return (__m128)__builtin_ia32_cmpss(a, b, 0);
181 static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
182 _mm_cmpeq_ps(__m128 a, __m128 b)
184 return (__m128)__builtin_ia32_cmpps(a, b, 0);
187 static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
188 _mm_cmplt_ss(__m128 a, __m128 b)
190 return (__m128)__builtin_ia32_cmpss(a, b, 1);
193 static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
194 _mm_cmplt_ps(__m128 a, __m128 b)
196 return (__m128)__builtin_ia32_cmpps(a, b, 1);
199 static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
200 _mm_cmple_ss(__m128 a, __m128 b)
202 return (__m128)__builtin_ia32_cmpss(a, b, 2);
205 static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
206 _mm_cmple_ps(__m128 a, __m128 b)
208 return (__m128)__builtin_ia32_cmpps(a, b, 2);
211 static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
212 _mm_cmpgt_ss(__m128 a, __m128 b)
214 return (__m128)__builtin_ia32_cmpss(b, a, 1);
217 static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
218 _mm_cmpgt_ps(__m128 a, __m128 b)
220 return (__m128)__builtin_ia32_cmpps(b, a, 1);
223 static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
224 _mm_cmpge_ss(__m128 a, __m128 b)
226 return (__m128)__builtin_ia32_cmpss(b, a, 2);
229 static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
230 _mm_cmpge_ps(__m128 a, __m128 b)
232 return (__m128)__builtin_ia32_cmpps(b, a, 2);
235 static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
236 _mm_cmpneq_ss(__m128 a, __m128 b)
238 return (__m128)__builtin_ia32_cmpss(a, b, 4);
241 static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
242 _mm_cmpneq_ps(__m128 a, __m128 b)
244 return (__m128)__builtin_ia32_cmpps(a, b, 4);
247 static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
248 _mm_cmpnlt_ss(__m128 a, __m128 b)
250 return (__m128)__builtin_ia32_cmpss(a, b, 5);
253 static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
254 _mm_cmpnlt_ps(__m128 a, __m128 b)
256 return (__m128)__builtin_ia32_cmpps(a, b, 5);
259 static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
260 _mm_cmpnle_ss(__m128 a, __m128 b)
262 return (__m128)__builtin_ia32_cmpss(a, b, 6);
265 static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
266 _mm_cmpnle_ps(__m128 a, __m128 b)
268 return (__m128)__builtin_ia32_cmpps(a, b, 6);
271 static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
272 _mm_cmpngt_ss(__m128 a, __m128 b)
274 return (__m128)__builtin_ia32_cmpss(b, a, 5);
277 static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
278 _mm_cmpngt_ps(__m128 a, __m128 b)
280 return (__m128)__builtin_ia32_cmpps(b, a, 5);
283 static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
284 _mm_cmpnge_ss(__m128 a, __m128 b)
286 return (__m128)__builtin_ia32_cmpss(b, a, 6);
289 static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
290 _mm_cmpnge_ps(__m128 a, __m128 b)
292 return (__m128)__builtin_ia32_cmpps(b, a, 6);
295 static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
296 _mm_cmpord_ss(__m128 a, __m128 b)
298 return (__m128)__builtin_ia32_cmpss(a, b, 7);
301 static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
302 _mm_cmpord_ps(__m128 a, __m128 b)
304 return (__m128)__builtin_ia32_cmpps(a, b, 7);
307 static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
308 _mm_cmpunord_ss(__m128 a, __m128 b)
310 return (__m128)__builtin_ia32_cmpss(a, b, 3);
313 static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
314 _mm_cmpunord_ps(__m128 a, __m128 b)
316 return (__m128)__builtin_ia32_cmpps(a, b, 3);
320 _mm_comieq_ss(__m128 a, __m128 b)
326 _mm_comilt_ss(__m128 a, __m128 b)
332 _mm_comile_ss(__m128 a, __m128 b)
338 _mm_comigt_ss(__m128 a, __m128 b)
344 _mm_comige_ss(__m128 a, __m128 b)
350 _mm_comineq_ss(__m128 a, __m128 b)
356 _mm_ucomieq_ss(__m128 a, __m128 b)
362 _mm_ucomilt_ss(__m128 a, __m128 b)
368 _mm_ucomile_ss(__m128 a, __m128 b)
374 _mm_ucomigt_ss(__m128 a, __m128 b)
380 _mm_ucomige_ss(__m128 a, __m128 b)
386 _mm_ucomineq_ss(__m128 a, __m128 b)
392 _mm_cvtss_si32(__m128 a)
398 _mm_cvt_ss2si(__m128 a)
406 _mm_cvtss_si64(__m128 a)
414 _mm_cvtps_pi32(__m128 a)
420 _mm_cvt_ps2pi(__m128 a)
426 _mm_cvttss_si32(__m128 a)
432 _mm_cvtt_ss2si(__m128 a)
438 _mm_cvttss_si64(__m128 a)
444 _mm_cvttps_pi32(__m128 a)
450 _mm_cvtt_ps2pi(__m128 a)
455 static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
456 _mm_cvtsi32_ss(__m128 a, int b)
462 static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
463 _mm_cvt_si2ss(__m128 a, int b)
470 static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
471 _mm_cvtsi64_ss(__m128 a, long long b)
479 static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
480 _mm_cvtpi32_ps(__m128 a, __m64 b)
485 static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
486 _mm_cvt_pi2ps(__m128 a, __m64 b)
492 _mm_cvtss_f32(__m128 a)
497 static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
498 _mm_loadh_pi(__m128 a, const __m64 *p)
500 __m128 b;
506 static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
507 _mm_loadl_pi(__m128 a, const __m64 *p)
509 __m128 b;
515 static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
518 return (__m128){ *p, 0, 0, 0 };
521 static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
524 return (__m128){ *p, *p, *p, *p };
529 static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
532 return *(__m128*)p;
535 static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
541 static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
544 __m128 a = _mm_load_ps(p);
548 static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
551 return (__m128){ w, 0, 0, 0 };
554 static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
557 return (__m128){ w, w, w, w };
561 static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
567 static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
570 return (__m128){ w, x, y, z };
573 static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
576 return (__m128){ z, y, x, w };
579 static __inline__ __m128 __attribute__((__always_inline__))
582 return (__m128){ 0, 0, 0, 0 };
586 _mm_storeh_pi(__m64 *p, __m128 a)
592 _mm_storel_pi(__m64 *p, __m128 a)
598 _mm_store_ss(float *p, __m128 a)
604 _mm_storeu_ps(float *p, __m128 a)
610 _mm_store1_ps(float *p, __m128 a)
617 _mm_store_ps1(float *p, __m128 a)
623 _mm_store_ps(float *p, __m128 a)
625 *(__m128 *)p = a;
629 _mm_storer_ps(float *p, __m128 a)
652 _mm_stream_ps(float *p, __m128 a)
759 static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
760 _mm_unpackhi_ps(__m128 a, __m128 b)
765 static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
766 _mm_unpacklo_ps(__m128 a, __m128 b)
771 static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
772 _mm_move_ss(__m128 a, __m128 b)
777 static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
778 _mm_movehl_ps(__m128 a, __m128 b)
783 static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
784 _mm_movelh_ps(__m128 a, __m128 b)
789 static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
793 __m128 r;
807 static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
811 __m128 r;
824 static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
836 static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
847 static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
850 __m128 c;
860 _mm_cvtps_pi16(__m128 a)
872 _mm_cvtps_pi8(__m128 a)
883 _mm_movemask_ps(__m128 a)
928 __m128 tmp3, tmp2, tmp1, tmp0; \