Home | History | Annotate | Download | only in x86

Lines Matching refs:dst

19 void vpx_highbd_h_predictor_4x4_sse2(uint16_t *dst, ptrdiff_t stride,
29 _mm_storel_epi64((__m128i *)dst, row0);
30 dst += stride;
31 _mm_storel_epi64((__m128i *)dst, row1);
32 dst += stride;
33 _mm_storel_epi64((__m128i *)dst, row2);
34 dst += stride;
35 _mm_storel_epi64((__m128i *)dst, row3);
38 void vpx_highbd_h_predictor_8x8_sse2(uint16_t *dst, ptrdiff_t stride,
52 _mm_store_si128((__m128i *)dst, _mm_unpacklo_epi64(row0, row0));
53 dst += stride;
54 _mm_store_si128((__m128i *)dst, _mm_unpacklo_epi64(row1, row1));
55 dst += stride;
56 _mm_store_si128((__m128i *)dst, _mm_unpacklo_epi64(row2, row2));
57 dst += stride;
58 _mm_store_si128((__m128i *)dst, _mm_unpacklo_epi64(row3, row3));
59 dst += stride;
60 _mm_store_si128((__m128i *)dst, _mm_unpackhi_epi64(row4, row4));
61 dst += stride;
62 _mm_store_si128((__m128i *)dst, _mm_unpackhi_epi64(row5, row5));
63 dst += stride;
64 _mm_store_si128((__m128i *)dst, _mm_unpackhi_epi64(row6, row6));
65 dst += stride;
66 _mm_store_si128((__m128i *)dst, _mm_unpackhi_epi64(row7, row7));
69 static INLINE void h_store_16_unpacklo(uint16_t **dst, const ptrdiff_t stride,
72 _mm_store_si128((__m128i *)*dst, val);
73 _mm_store_si128((__m128i *)(*dst + 8), val);
74 *dst += stride;
77 static INLINE void h_store_16_unpackhi(uint16_t **dst, const ptrdiff_t stride,
80 _mm_store_si128((__m128i *)(*dst), val);
81 _mm_store_si128((__m128i *)(*dst + 8), val);
82 *dst += stride;
85 void vpx_highbd_h_predictor_16x16_sse2(uint16_t *dst, ptrdiff_t stride,
102 h_store_16_unpacklo(&dst, stride, &row0);
103 h_store_16_unpacklo(&dst, stride, &row1);
104 h_store_16_unpacklo(&dst, stride, &row2);
105 h_store_16_unpacklo(&dst, stride, &row3);
106 h_store_16_unpackhi(&dst, stride, &row4);
107 h_store_16_unpackhi(&dst, stride, &row5);
108 h_store_16_unpackhi(&dst, stride, &row6);
109 h_store_16_unpackhi(&dst, stride, &row7);
113 static INLINE void h_store_32_unpacklo(uint16_t **dst, const ptrdiff_t stride,
116 _mm_store_si128((__m128i *)(*dst), val);
117 _mm_store_si128((__m128i *)(*dst + 8), val);
118 _mm_store_si128((__m128i *)(*dst + 16), val);
119 _mm_store_si128((__m128i *)(*dst + 24), val);
120 *dst += stride;
123 static INLINE void h_store_32_unpackhi(uint16_t **dst, const ptrdiff_t stride,
126 _mm_store_si128((__m128i *)(*dst), val);
127 _mm_store_si128((__m128i *)(*dst + 8), val);
128 _mm_store_si128((__m128i *)(*dst + 16), val);
129 _mm_store_si128((__m128i *)(*dst + 24), val);
130 *dst += stride;
133 void vpx_highbd_h_predictor_32x32_sse2(uint16_t *dst, ptrdiff_t stride,
150 h_store_32_unpacklo(&dst, stride, &row0);
151 h_store_32_unpacklo(&dst, stride, &row1);
152 h_store_32_unpacklo(&dst, stride, &row2);
153 h_store_32_unpacklo(&dst, stride, &row3);
154 h_store_32_unpackhi(&dst, stride, &row4);
155 h_store_32_unpackhi(&dst, stride, &row5);
156 h_store_32_unpackhi(&dst, stride, &row6);
157 h_store_32_unpackhi(&dst, stride, &row7);
171 static INLINE void dc_store_4x4(uint16_t *dst, ptrdiff_t stride,
175 for (i = 0; i < 4; ++i, dst += stride) {
176 _mm_storel_epi64((__m128i *)dst, dc_dup);
180 void vpx_highbd_dc_left_predictor_4x4_sse2(uint16_t *dst, ptrdiff_t stride,
188 dc_store_4x4(dst, stride, &dc);
191 void vpx_highbd_dc_top_predictor_4x4_sse2(uint16_t *dst, ptrdiff_t stride,
199 dc_store_4x4(dst, stride, &dc);
202 void vpx_highbd_dc_128_predictor_4x4_sse2(uint16_t *dst, ptrdiff_t stride,
209 dc_store_4x4(dst, stride, &dc_dup);
224 static INLINE void dc_store_8x8(uint16_t *dst, ptrdiff_t stride,
229 for (i = 0; i < 8; ++i, dst += stride) {
230 _mm_store_si128((__m128i *)dst, dc_dup);
234 void vpx_highbd_dc_left_predictor_8x8_sse2(uint16_t *dst, ptrdiff_t stride,
242 dc_store_8x8(dst, stride, &dc);
245 void vpx_highbd_dc_top_predictor_8x8_sse2(uint16_t *dst, ptrdiff_t stride,
253 dc_store_8x8(dst, stride, &dc);
256 void vpx_highbd_dc_128_predictor_8x8_sse2(uint16_t *dst, ptrdiff_t stride,
263 dc_store_8x8(dst, stride, &dc_dup);
275 static INLINE void dc_store_16x16(uint16_t *dst, ptrdiff_t stride,
280 for (i = 0; i < 16; ++i, dst += stride) {
281 _mm_store_si128((__m128i *)dst, dc_dup);
282 _mm_store_si128((__m128i *)(dst + 8), dc_dup);
286 void vpx_highbd_dc_left_predictor_16x16_sse2(uint16_t *dst, ptrdiff_t stride,
294 dc_store_16x16(dst, stride, &dc);
297 void vpx_highbd_dc_top_predictor_16x16_sse2(uint16_t *dst, ptrdiff_t stride,
305 dc_store_16x16(dst, stride, &dc);
308 void vpx_highbd_dc_128_predictor_16x16_sse2(uint16_t *dst, ptrdiff_t stride,
315 dc_store_16x16(dst, stride, &dc_dup);
330 static INLINE void dc_store_32x32(uint16_t *dst, ptrdiff_t stride,
335 for (i = 0; i < 32; ++i, dst += stride) {
336 _mm_store_si128((__m128i *)dst, dc_dup);
337 _mm_store_si128((__m128i *)(dst + 8), dc_dup);
338 _mm_store_si128((__m128i *)(dst + 16), dc_dup);
339 _mm_store_si128((__m128i *)(dst + 24), dc_dup);
343 void vpx_highbd_dc_left_predictor_32x32_sse2(uint16_t *dst, ptrdiff_t stride,
351 dc_store_32x32(dst, stride, &dc);
354 void vpx_highbd_dc_top_predictor_32x32_sse2(uint16_t *dst, ptrdiff_t stride,
362 dc_store_32x32(dst, stride, &dc);
365 void vpx_highbd_dc_128_predictor_32x32_sse2(uint16_t *dst, ptrdiff_t stride,
372 dc_store_32x32(dst, stride, &dc_dup);
396 void vpx_highbd_d117_predictor_4x4_sse2(uint16_t *dst, ptrdiff_t stride,
415 _mm_storel_epi64((__m128i *)dst, row0);
416 dst += stride;
417 _mm_storel_epi64((__m128i *)dst, row1);
418 dst += stride;
419 _mm_storel_epi64((__m128i *)dst, row2);
420 dst += stride;
421 _mm_storel_epi64((__m128i *)dst, row3);
423 dst -= stride;
424 dst[0] = _mm_extract_epi16(avg3, 1);
425 dst[stride] = _mm_extract_epi16(avg3, 0);
428 void vpx_highbd_d135_predictor_4x4_sse2(uint16_t *dst, ptrdiff_t stride,
447 _mm_storel_epi64((__m128i *)dst, row0);
448 dst += stride;
449 _mm_storel_epi64((__m128i *)dst, row1);
450 dst += stride;
451 _mm_storel_epi64((__m128i *)dst, row2);
452 dst += stride;
453 _mm_storel_epi64((__m128i *)dst, row3);
456 void vpx_highbd_d153_predictor_4x4_sse2(uint16_t *dst, ptrdiff_t stride,
477 _mm_storel_epi64((__m128i *)dst, row0);
478 dst[0] = _mm_extract_epi16(avg2, 3);
479 dst += stride;
480 _mm_storel_epi64((__m128i *)dst, row1);
481 dst += stride;
482 _mm_storel_epi64((__m128i *)dst, row2);
483 dst += stride;
484 _mm_storel_epi64((__m128i *)dst, row3);
487 void vpx_highbd_d207_predictor_4x4_sse2(uint16_t *dst, ptrdiff_t stride,
503 _mm_storel_epi64((__m128i *)dst, row0);
504 dst += stride;
505 _mm_storel_epi64((__m128i *)dst, row1);
506 dst += stride;
507 _mm_storel_epi64((__m128i *)dst, row2);
508 dst += stride;
509 _mm_storel_epi64((__m128i *)dst, row3);
512 void vpx_highbd_d63_predictor_4x4_sse2(uint16_t *dst, ptrdiff_t stride,
526 _mm_storel_epi64((__m128i *)dst, row0);
527 dst += stride;
528 _mm_storel_epi64((__m128i *)dst, row1);
529 dst += stride;
530 _mm_storel_epi64((__m128i *)dst, row2);
531 dst += stride;
532 _mm_storel_epi64((__m128i *)dst, row3);