Home | History | Annotate | Download | only in dsp

Lines Matching refs:__m128i

26 static WEBP_INLINE void SubtractAndSquare_SSE2(const __m128i a, const __m128i b,
27 __m128i* const sum) {
29 const __m128i a_b = _mm_subs_epu8(a, b);
30 const __m128i b_a = _mm_subs_epu8(b, a);
31 const __m128i abs_a_b = _mm_or_si128(a_b, b_a);
33 const __m128i zero = _mm_setzero_si128();
34 const __m128i C0 = _mm_unpacklo_epi8(abs_a_b, zero);
35 const __m128i C1 = _mm_unpackhi_epi8(abs_a_b, zero);
37 const __m128i sum1 = _mm_madd_epi16(C0, C0);
38 const __m128i sum2 = _mm_madd_epi16(C1, C1);
52 __m128i sum1;
53 __m128i sum = _mm_setzero_si128();
54 __m128i a0 = _mm_loadu_si128((const __m128i*)&src1[i]);
55 __m128i b0 = _mm_loadu_si128((const __m128i*)&src2[i]);
58 const __m128i a1 = _mm_loadu_si128((const __m128i*)&src1[i]);
59 const __m128i b1 = _mm_loadu_si128((const __m128i*)&src2[i]);
60 __m128i sum2;
64 a0 = _mm_loadu_si128((const __m128i*)&src1[i]);
65 b0 = _mm_loadu_si128((const __m128i*)&src2[i]);
72 _mm_storeu_si128((__m128i*)tmp, sum);
86 static uint32_t HorizontalAdd16b_SSE2(const __m128i* const m) {
88 const __m128i a = _mm_srli_si128(*m, 8);
89 const __m128i b = _mm_add_epi16(*m, a);
90 _mm_storeu_si128((__m128i*)tmp, b);
94 static uint32_t HorizontalAdd32b_SSE2(const __m128i* const m) {
95 const __m128i a = _mm_srli_si128(*m, 8);
96 const __m128i b = _mm_add_epi32(*m, a);
97 const __m128i c = _mm_add_epi32(b, _mm_srli_si128(b, 4));
105 const __m128i Wy = _mm_set1_epi16((WEIGHT)); \
106 const __m128i W = _mm_mullo_epi16(Wx, Wy); \
108 const __m128i a0 = _mm_loadl_epi64((const __m128i*)src1); \
109 const __m128i b0 = _mm_loadl_epi64((const __m128i*)src2); \
111 const __m128i a1 = _mm_unpacklo_epi8(a0, zero); \
112 const __m128i b1 = _mm_unpacklo_epi8(b0, zero); \
113 const __m128i wa1 = _mm_mullo_epi16(a1, W); \
114 const __m128i wb1 = _mm_mullo_epi16(b1, W); \
128 const __m128i zero = _mm_setzero_si128();
129 __m128i xm = zero, ym = zero; // 16b accums
130 __m128i xxm = zero, yym = zero, xym = zero; // 32b accum
131 const __m128i Wx = _mm_loadu_si128((const __m128i*)kWeight);