Home | History | Annotate | Download | only in include

Lines Matching defs:a0

3677     ab = _mm_unpacklo_epi32 (a128, b128); //a0, b0, a1,b1
3678 ba = _mm_unpacklo_epi32 (b128, a128); //b0, a0, b1,a1
3717 ab = _mm_unpacklo_epi32 (a128, b128); //a0, b0, a1,b1
3718 ba = _mm_unpacklo_epi32 (b128, a128); //b0, a0, b1,a1
4219 ab = _mm_unpacklo_epi32 (a, b); //a0, b0, a1,b1
4220 ba = _mm_unpacklo_epi32 (b, a); //b0, a0, b1,a1
4280 ab = _mm_unpacklo_epi32 (a, b); //a0, b0, a1,b1
4281 ba = _mm_unpacklo_epi32 (b, a); //b0, a0, b1,a1
5511 __m128 a0, b0;
5513 a0 = _mm_and_ps (_pM128(a), *(__m128*)&c7fffffff);
5515 a0 = _mm_cmpge_ps ( a0, b0);
5516 return64f(a0);
5523 __m128 a0, b0;
5525 a0 = _mm_and_ps (a, *(__m128*)&c7fffffff);
5527 a0 = _mm_cmpge_ps ( a0, b0);
5528 return (*(__m128i*)&a0);
5538 __m128 a0, b0;
5540 a0 = _mm_and_ps (_pM128(a), *(__m128*)&c7fffffff);
5542 a0 = _mm_cmple_ps (a0, b0);
5543 return64f(a0);
5550 __m128 a0, b0;
5552 a0 = _mm_and_ps (a, *(__m128*)&c7fffffff);
5554 a0 = _mm_cmple_ps (a0, b0);
5555 return (*(__m128i*)&a0);
5565 __m128 a0, b0;
5567 a0 = _mm_and_ps (_pM128(a), *(__m128*)&c7fffffff);
5569 a0 = _mm_cmpgt_ps (a0, b0);
5570 return64f(a0);
5577 __m128 a0, b0;
5579 a0 = _mm_and_ps (a, *(__m128*)&c7fffffff);
5581 a0 = _mm_cmpgt_ps (a0, b0);
5582 return (*(__m128i*)&a0);
5592 __m128 a0, b0;
5594 a0 = _mm_and_ps (_pM128(a), *(__m128*)&c7fffffff);
5596 a0 = _mm_cmplt_ps (a0, b0);
5597 return64f(a0);
5604 __m128 a0, b0;
5606 a0 = _mm_and_ps (a, *(__m128*)&c7fffffff);
5608 a0 = _mm_cmplt_ps (a0, b0);
5609 return (*(__m128i*)&a0);
6278 ab = _mm_unpacklo_epi64 ( _pM128i(a), _pM128i(b)); //a0 a1 b0 b1
6279 ab_sh = _mm_shuffle_epi32(ab, 1 | (0 << 2) | (3 << 4) | (2 << 6)); //a1, a0, b1, b0
7586 __m128i c1, signmask,a0, res64;
7590 a0 = _mm_or_si128(a, *(__m128i*)mask); //get the first bit
7591 a0 = _MM_CMPEQ_EPI64 (a, a0);
7592 signmask = _mm_and_si128(a0, signmask);
8455 __m128i zero, maskA, maskGT0, a0, a_masked, a_shift;
8461 a0 = _mm_and_si128 (a, maskGT0); //negative are zeros now
8463 a_masked = _mm_and_si128 (a0, maskA);
8465 a_shift = _mm_slli_epi32 (a0, b);
9868 //a0,a1,a2,a3,...a7,a8,...a15, b0,b1,b2,...b7,b8,...b15, c0,c1,c2,...c7,c8,...c15 ->
9878 v.val[0] = vld1q_u8 (ptr); //a0,a1,a2,a3,...a7, ...a15
9886 tmp3 = _mm_slli_si128(tmp0,10); //0,0,0,0,0,0,0,0,0,0,a0,a3,a6,a9,a12,a15
9917 //a0, a1,a2,a3,...a7, b0,b1,b2,b3,...b7, c0,c1,c2,c3...c7 -> a0,a3,a6,b1,b4,b7,c2,c5, a1,a4,a7,b2,b5,c0,c3,c6, a2,a5,b0,b3,b6,c1,c4,c7
9924 v.val[0] = vld1q_u16 (ptr); //a0,a1,a2,a3,...a7,
9928 tmp0 = _mm_shuffle_epi8(v.val[0], *(__m128i*)mask16_0); //a0,a3,a6,a1,a4,a7,a2,a5,
9932 tmp3 = _mm_slli_si128(tmp0,10); //0,0,0,0,0,a0,a3,a6,
9933 tmp3 = _mm_alignr_epi8(tmp1,tmp3, 10); //a0,a3,a6,b1,b4,b7,x,x
9934 tmp3 = _mm_slli_si128(tmp3, 4); //0,0, a0,a3,a6,b1,b4,b7
9935 tmp3 = _mm_srli_si128(tmp3, 4); //a0,a3,a6,b1,b4,b7,0,0
9937 v.val[0] = _mm_or_si128(v.val[0],tmp3); //a0,a3,a6,b1,b4,b7,c2,c5
9939 tmp3 = _mm_slli_si128(tmp0, 4); //0,0,a0,a3,a6,a1,a4,a7
9963 //a0,a1,a2,a3, b0,b1,b2,b3, c0,c1,c2,c3 -> a0,a3,b2,c1, a1,b0,b3,c2, a2,b1,c0,c3,
9966 v.val[0] = vld1q_u32 (ptr); //a0,a1,a2,a3,
9970 tmp0 = _mm_shuffle_epi32(v.val[0], 0 | (3 << 2) | (1 << 4) | (2 << 6)); //a0,a3,a1,a2
9975 v.val[0] = _mm_unpacklo_epi64(tmp0,tmp3); //a0,a3,b2,c1
9998 //a0,a1,a2,a3, b0,b1,b2,b3, c0,c1,c2,c3 -> a0,a3,b2,c1, a1,b0,b3,c2, a2,b1,c0,c3,
10001 v.val[0] = vld1q_f32 (ptr); //a0,a1,a2,a3,
10005 tmp0 = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(v.val[0]), 0 | (3 << 2) | (1 << 4) | (2 << 6))); //a0,a3,a1,a2
10010 v.val[0] = _mm_movelh_ps(tmp0,tmp3); //a0,a3,b2,c1
10027 //a0, a1,a2,a3,...a7, b0,b1,b2,b3,...b7, c0,c1,c2,c3...c7 -> a0,a3,a6,b1,b4,b7,c2,c5, a1,a4,a7,b2,b5,c0,c3,c6, a2,a5,b0,b3,b6,c1,c4,c7
10032 val0 = vld1q_u8 (ptr); //a0,a1,a2,a3,...a7, b0,b1,b2,b3...b7
10035 tmp0 = _mm_shuffle_epi8(val0, *(__m128i*)mask8_0); //a0,a3,a6,b1,b4,b7, a1,a4,a7,b2,b5, a2,a5,b0,b3,b6,
10038 val0 = _mm_srli_si128(val0,10); //a0,a3,a6,b1,b4,b7, 0,0,0,0,0,0,0,0,0,0
10040 val0 = _mm_or_si128(val0,val2); //a0,a3,a6,b1,b4,b7,c2,c5 x,x,x,x,x,x,x,x
10060 //a0,a1,a2,a3, b0,b1,b2,b3, c0,c1,c2,c3 -> a0,a3,b2,c1, a1,b0,b3,c2, a2,b1,c0,c3,
10064 val0 = vld1q_u16 (ptr); //a0,a1,a2,a3, b0,b1,b2,b3
10067 tmp0 = _mm_shuffle_epi8(val0, *(__m128i*)mask16); //a0, a3, b2,a1, b0, b3, a2, b1
10070 val0 = _mm_srli_si128(val0,10); //a0, a3, b2, 0,0, 0,0,
10073 val0 = _mm_or_si128(val0,val2); //a0, a3, b2, c1, x,x,x,x
10094 //a0,a1, b0,b1, c0,c1, -> a0,b1, a1,c0, b0,c1
10097 val0 = vld1q_u32 (ptr); //a0,a1, b0,b1,
10100 val0 = _mm_shuffle_epi32(val0, 0 | (3 << 2) | (1 << 4) | (2 << 6)); //a0,b1, a1, b0
10137 //a0,a1, b0,b1, c0,c1, -> a0,b1, a1,c0, b0,c1
10164 v.val[0] = vld1q_u8 ( ptr); //a0,a1,a2,...a7, ...a15
10169 tmp0 = _mm_unpacklo_epi8(v.val[0],v.val[1]); //a0,b0, a1,b1, a2,b2, a3,b3,....a7,b7
10174 v.val[0] = _mm_unpacklo_epi8(tmp0, tmp2); //a0,a8, b0,b8, a1,a9, b1,b9, ....a3,a11, b3,b11
10179 tmp0 = _mm_unpacklo_epi32(v.val[0], v.val[2] ); ///a0,a8, b0,b8, c0,c8, d0,d8, a1,a9, b1,b9, c1,c9, d1,d9
10184 v.val[0] = _mm_unpacklo_epi8(tmp0, tmp2); //a0,a4,a8,a12,b0,b4,b8,b12,c0,c4,c8,c12,d0,d4,d8,d12
10196 tmp0 = vld1q_u16 (ptr); //a0,a1,a2,...a7
10200 v.val[0] = _mm_unpacklo_epi16(tmp0,tmp1); //a0,b0, a1,b1, a2,b2, a3,b3,
10204 tmp0 = _mm_unpacklo_epi16(v.val[0], v.val[2]); //a0,a4, b0,b4, a1,a5, b1,b5
10208 v.val[0] = _mm_unpacklo_epi64(tmp0, tmp2); //a0,a4, b0,b4, c0,c4, d0,d4,
10315 //a0,a1, b0,b1, c0,c1, d0,d1 -> a0,c0, a1,c1, b0,d0, b1,d1
10318 val0 = vld1q_u32 (ptr); //a0,a1, b0,b1,
10320 val01 = _mm_unpacklo_epi32(val0,val2); //a0, c0, a1,c1,
10356 //a0,a1, b0,b1, c0,c1, d0,d1 -> a0,c0, a1,c1, b0,d0, b1,d1
11242 //a0,a1,a2,a3, b0,b1,b2,b3, c0,c1,c2,c3 -> a0,b0,c0,a1, b1,c1,a2,b2, c2,a3,b3,c3
11245 tmp0 = _mm_unpacklo_epi32(val->val[0], val->val[1]); //a0,b0,a1,b1
11251 tmp1 = _mm_unpacklo_epi32(tmp2,val->val[0]); //b0,a0,c0,a1
11252 v.val[0] = _mm_castps_si128(_mm_shuffle_ps(_mm_castsi128_ps(tmp0),_mm_castsi128_ps(tmp1), _MM_SHUFFLE(3,2,1,0))); //a0,b0,c0,a1,
11281 tmp0 = _mm_unpacklo_ps(val->val[0], val->val[1]); //a0,b0,a1,b1
11287 tmp1 = _mm_unpacklo_ps(tmp2,val->val[0]); //b0,a0,c0,a1
11288 v.val[0] = _mm_shuffle_ps(tmp0,tmp1, _MM_SHUFFLE(3,2,1,0)); //a0,b0,c0,a1,
11516 sh0 = _mm_unpacklo_epi8(_pM128i(val->val[0]),_pM128i(val->val[1])); // a0,b0,a1,b1,a2,b2,a3,b3,a4,b4,a5,b5, a6,b6,a7,b7,
11518 val0 = _mm_unpacklo_epi16(sh0,sh1); // a0,b0,c0,d0,a1,b1,c1,d1,a2,b2,c2,d2,a3,b3,c3,d3,
11529 sh0 = _mm_unpacklo_epi16(_pM128i(val->val[0]),_pM128i(val->val[1])); //a0,a1,b0,b1,c0,c1,d0,d1,
11531 val0 = _mm_unpacklo_epi32(sh0,sh1); // a0,a1,a2,a3,b0,b1,b2,b3
15600 // making the result look as (a0, b0, a2, b2, a4, b4,....) (a1, b1, a3, b3, a5, b5,.....)
15607 tmp = _mm_unpacklo_epi8(_pM128i(a), _pM128i(b)); //a0,b0,a1,b1,a2,b2,a3,b3,...,a7,b7
15608 val0 = _mm_shuffle_epi8 (tmp, *(__m128i*)mask16_even_odd); //(a0, b0, a2, b2, a4, b4, a6, b6), (a1,b1, a3,b3, a5,b5, a7,b7)
15609 vst1q_s8 (val.val, val0); // _mm_shuffle_epi32 (val.val[0], _SWAP_HI_LOW32); //(a1,b1, a3,b3, a5,b5, a7,b7),(a0, b0, a2, b2, a4, b4, a6, b6),
15619 tmp = _mm_unpacklo_epi16(_pM128i(a), _pM128i(b)); //a0,b0,a1,b1,a2,b2,a3,b3
15620 val0 = _mm_shuffle_epi8 (tmp, *(__m128i*)maskdlv16); //a0, b0, a2, b2, a1,b1, a3, b3
15621 vst1q_s16(val.val, val0); // _mm_shuffle_epi32 (val.val[0], _SWAP_HI_LOW32); //(a1,b1, a3,b3),(a0, b0, a2, b2),
15630 val0 = _mm_unpacklo_epi32(_pM128i(a), _pM128i(b)); //a0,b0,a1,b1
15631 vst1q_s32(val.val, val0); // _mm_shuffle_epi32(val.val[0], _SWAP_HI_LOW32); //a1,b1, a0,b0,
15652 return val; //a0,b0,a1,b1
15667 a_sh = _mm_shuffle_epi8 (a, *(__m128i*)mask8_even_odd); //a0, a2, a4, a6, a8, a10, a12, a14, a1, a3, a5, a7, a9, a11, a13, a15
15670 r8x16.val[0] = _mm_unpacklo_epi8(a_sh, b_sh); //(a0, b0, a2, b2, a4, b4, a6, b6, a8,b8, a10,b10, a12,b12, a14,b14)
15681 a_sh = _mm_shuffle_epi8 (a, *(__m128i*)mask16_even_odd); //a0, a2, a4, a6, a1, a3, a5, a7
15683 v16x8.val[0] = _mm_unpacklo_epi16(a_sh, b_sh); //a0, b0, a2, b2, a4, b4, a6, b6
15694 a_sh = _mm_shuffle_epi32 (a, 216); //a0, a2, a1, a3
15697 v32x4.val[0] = _mm_unpacklo_epi32(a_sh, b_sh); //a0, b0, a2, b2
15717 a_sh = _mm_shuffle_ps (a, a, _MM_SHUFFLE(3,1, 2, 0)); //a0, a2, a1, a3, need to check endiness
15720 f32x4.val[0] = _mm_unpacklo_ps(a_sh, b_sh); //a0, b0, a2, b2
15733 //output has (a0,b0,a1,b1, a2,b2,.....)
15828 //As the result of these functions first val contains (a0,a2,a4,....,b0,b2, b4,...) and the second val (a1,a3,a5,....b1,b3,b5...)
15836 tmp = _mm_unpacklo_epi8(_pM128i(a), _pM128i(b)); //a0,b0,a1,b1,a2,b2,a3,b3,...,a7,b7
15837 val0 = _mm_shuffle_epi8 (tmp, *(__m128i*)maskdlv8); //(a0, a2, a4, a6, b0, b2, b4, b6), (a1, a3, a5, a7, b1,b3, b5, b7)
15848 tmp = _mm_unpacklo_epi16(_pM128i(a), _pM128i(b)); //a0,b0,a1,b1,a2,b2,a3,b3
15849 val0 = _mm_shuffle_epi8 (tmp, *(__m128i*)maskdlv16); //a0,a2, b0, b2, a1,a3, b1,b3
15859 val0 = _mm_unpacklo_epi32(_pM128i(a), _pM128i(b)); //a0,b0, a1,b1
15888 a_sh = _mm_shuffle_epi8 (a, *(__m128i*)mask8_even_odd); //a0, a2, a4, a6, a8, a10, a12, a14, a1, a3, a5, a7, a9, a11, a13, a15
15891 v8x16.val[0] = _mm_unpacklo_epi64(a_sh, b_sh); ///a0, a2, a4, a6, a8, a10, a12, a14, b0, b2, b4, b6, b8, b10, b12, b14,
15902 a_sh = _mm_shuffle_epi8 (a, *(__m128i*)mask16_even_odd); //a0, a2, a4, a6, a1, a3, a5, a7
15904 v16x8.val[0] = _mm_unpacklo_epi64(a_sh, b_sh); //a0, a2, a4, a6, b0, b2, b4, b6
15915 a_sh = _mm_shuffle_epi32 (a, 216); //a0, a2, a1, a3
15918 v32x4.val[0] = _mm_unpacklo_epi64(a_sh, b_sh); //a0, a2, b0, b2
15936 v32x4.val[0] = _mm_shuffle_ps(a, b, _MM_SHUFFLE(2,0, 2, 0)); //a0, a2, b0, b2 , need to check endianess however