Lines Matching refs:DST
29 static void ITransform_SSE2(const uint8_t* ref, const int16_t* in, uint8_t* dst,
182 _mm_storel_epi64((__m128i*)&dst[0 * BPS], ref0);
183 _mm_storel_epi64((__m128i*)&dst[1 * BPS], ref1);
184 _mm_storel_epi64((__m128i*)&dst[2 * BPS], ref2);
185 _mm_storel_epi64((__m128i*)&dst[3 * BPS], ref3);
188 WebPUint32ToMem(&dst[0 * BPS], _mm_cvtsi128_si32(ref0));
189 WebPUint32ToMem(&dst[1 * BPS], _mm_cvtsi128_si32(ref1));
190 WebPUint32ToMem(&dst[2 * BPS], _mm_cvtsi128_si32(ref2));
191 WebPUint32ToMem(&dst[3 * BPS], _mm_cvtsi128_si32(ref3));
482 static WEBP_INLINE void Put8x8uv_SSE2(uint8_t v, uint8_t* dst) {
486 _mm_storel_epi64((__m128i*)(dst + j * BPS), values);
490 static WEBP_INLINE void Put16_SSE2(uint8_t v, uint8_t* dst) {
494 _mm_store_si128((__m128i*)(dst + j * BPS), values);
498 static WEBP_INLINE void Fill_SSE2(uint8_t* dst, int value, int size) {
502 memset(dst + j * BPS, value, 4);
505 Put8x8uv_SSE2(value, dst);
507 Put16_SSE2(value, dst);
511 static WEBP_INLINE void VE8uv_SSE2(uint8_t* dst, const uint8_t* top) {
515 _mm_storel_epi64((__m128i*)(dst + j * BPS), top_values);
519 static WEBP_INLINE void VE16_SSE2(uint8_t* dst, const uint8_t* top) {
523 _mm_store_si128((__m128i*)(dst + j * BPS), top_values);
527 static WEBP_INLINE void VerticalPred_SSE2(uint8_t* dst,
531 VE8uv_SSE2(dst, top);
533 VE16_SSE2(dst, top);
536 Fill_SSE2(dst, 127, size);
540 static WEBP_INLINE void HE8uv_SSE2(uint8_t* dst, const uint8_t* left) {
544 _mm_storel_epi64((__m128i*)dst, values);
545 dst += BPS;
549 static WEBP_INLINE void HE16_SSE2(uint8_t* dst, const uint8_t* left) {
553 _mm_store_si128((__m128i*)dst, values);
554 dst += BPS;
558 static WEBP_INLINE void HorizontalPred_SSE2(uint8_t* dst,
562 HE8uv_SSE2(dst, left);
564 HE16_SSE2(dst, left);
567 Fill_SSE2(dst, 129, size);
571 static WEBP_INLINE void TM_SSE2(uint8_t* dst, const uint8_t* left,
578 for (y = 0; y < 8; ++y, dst += BPS) {
582 _mm_storel_epi64((__m128i*)dst, out);
588 for (y = 0; y < 16; ++y, dst += BPS) {
594 _mm_store_si128((__m128i*)dst, out);
599 static WEBP_INLINE void TrueMotion_SSE2(uint8_t* dst, const uint8_t* left,
603 TM_SSE2(dst, left, top, size);
605 HorizontalPred_SSE2(dst, left, size);
613 VerticalPred_SSE2(dst, top, size);
615 Fill_SSE2(dst, 129, size);
620 static WEBP_INLINE void DC8uv_SSE2(uint8_t* dst, const uint8_t* left,
626 Put8x8uv_SSE2(DC >> 4, dst);
629 static WEBP_INLINE void DC8uvNoLeft_SSE2(uint8_t* dst, const uint8_t* top) {
634 Put8x8uv_SSE2(DC >> 3, dst);
637 static WEBP_INLINE void DC8uvNoTop_SSE2(uint8_t* dst, const uint8_t* left) {
639 DC8uvNoLeft_SSE2(dst, left);
642 static WEBP_INLINE void DC8uvNoTopLeft_SSE2(uint8_t* dst) {
643 Put8x8uv_SSE2(0x80, dst);
646 static WEBP_INLINE void DC8uvMode_SSE2(uint8_t* dst, const uint8_t* left,
650 DC8uv_SSE2(dst, left, top);
652 DC8uvNoLeft_SSE2(dst, top);
655 DC8uvNoTop_SSE2(dst, left);
657 DC8uvNoTopLeft_SSE2(dst);
661 static WEBP_INLINE void DC16_SSE2(uint8_t* dst, const uint8_t* left,
667 Put16_SSE2(DC >> 5, dst);
670 static WEBP_INLINE void DC16NoLeft_SSE2(uint8_t* dst, const uint8_t* top) {
673 Put16_SSE2(DC >> 4, dst);
676 static WEBP_INLINE void DC16NoTop_SSE2(uint8_t* dst, const uint8_t* left) {
678 DC16NoLeft_SSE2(dst, left);
681 static WEBP_INLINE void DC16NoTopLeft_SSE2(uint8_t* dst) {
682 Put16_SSE2(0x80, dst);
685 static WEBP_INLINE void DC16Mode_SSE2(uint8_t* dst, const uint8_t* left,
689 DC16_SSE2(dst, left, top);
691 DC16NoLeft_SSE2(dst, top);
694 DC16NoTop_SSE2(dst, left);
696 DC16NoTopLeft_SSE2(dst);
703 #define DST(x, y) dst[(x) + (y) * BPS]
715 static WEBP_INLINE void VE4_SSE2(uint8_t* dst,
728 WebPUint32ToMem(dst + i * BPS, vals);
732 static WEBP_INLINE void HE4_SSE2(uint8_t* dst,
739 WebPUint32ToMem(dst + 0 * BPS, 0x01010101U * AVG3(X, I, J));
740 WebPUint32ToMem(dst + 1 * BPS, 0x01010101U * AVG3(I, J, K));
741 WebPUint32ToMem(dst + 2 * BPS, 0x01010101U * AVG3(J, K, L));
742 WebPUint32ToMem(dst + 3 * BPS, 0x01010101U * AVG3(K, L, L));
745 static WEBP_INLINE void DC4_SSE2(uint8_t* dst, const uint8_t* top) {
749 Fill_SSE2(dst, dc >> 3, 4);
752 static WEBP_INLINE void LD4_SSE2(uint8_t* dst,
763 WebPUint32ToMem(dst + 0 * BPS, _mm_cvtsi128_si32( abcdefg ));
764 WebPUint32ToMem(dst + 1 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(abcdefg, 1)));
765 WebPUint32ToMem(dst + 2 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(abcdefg, 2)));
766 WebPUint32ToMem(dst + 3 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(abcdefg, 3)));
769 static WEBP_INLINE void VR4_SSE2(uint8_t* dst,
785 WebPUint32ToMem(dst + 0 * BPS, _mm_cvtsi128_si32( abcd ));
786 WebPUint32ToMem(dst + 1 * BPS, _mm_cvtsi128_si32( efgh ));
787 WebPUint32ToMem(dst + 2 * BPS, _mm_cvtsi128_si32(_mm_slli_si128(abcd, 1)));
788 WebPUint32ToMem(dst + 3 * BPS, _mm_cvtsi128_si32(_mm_slli_si128(efgh, 1)));
791 DST(0, 2) = AVG3(J, I, X);
792 DST(0, 3) = AVG3(K, J, I);
795 static WEBP_INLINE void VL4_SSE2(uint8_t* dst,
811 WebPUint32ToMem(dst + 0 * BPS, _mm_cvtsi128_si32( avg1 ));
812 WebPUint32ToMem(dst + 1 * BPS, _mm_cvtsi128_si32( avg4 ));
813 WebPUint32ToMem(dst + 2 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(avg1, 1)));
814 WebPUint32ToMem(dst + 3 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(avg4, 1)));
817 DST(3, 2) = (extra_out >> 0) & 0xff;
818 DST(3, 3) = (extra_out >> 8) & 0xff;
821 dst,
832 WebPUint32ToMem(dst + 3 * BPS, _mm_cvtsi128_si32( abcdefg ));
833 WebPUint32ToMem(dst + 2 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(abcdefg, 1)));
834 WebPUint32ToMem(dst + 1 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(abcdefg, 2)));
835 WebPUint32ToMem(dst + 0 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(abcdefg, 3)));
838 static WEBP_INLINE void HU4_SSE2(uint8_t* dst, const uint8_t* top) {
843 DST(0, 0) = AVG2(I, J);
844 DST(2, 0) = DST(0, 1) = AVG2(J, K);
845 DST(2, 1) = DST(0, 2) = AVG2(K, L);
846 DST(1, 0) = AVG3(I, J, K);
847 DST(3, 0) = DST(1, 1) = AVG3(J, K, L);
848 DST(3, 1) = DST(1, 2) = AVG3(K, L, L);
849 DST(3, 2) = DST(2, 2) =
850 DST(0, 3) = DST(1, 3) = DST(2, 3) = DST(3, 3) = L;
853 static WEBP_INLINE void HD4_SSE2(uint8_t* dst, const uint8_t* top) {
863 DST(0, 0) = DST(2, 1) = AVG2(I, X);
864 DST(0, 1) = DST(2, 2) = AVG2(J, I);
865 DST(0, 2) = DST(2, 3) = AVG2(K, J);
866 DST(0, 3) = AVG2(L, K);
868 DST(3, 0) = AVG3(A, B, C);
869 DST(2, 0) = AVG3(X, A, B);
870 DST(1, 0) = DST(3, 1) = AVG3(I, X, A);
871 DST(1, 1) = DST(3, 2) = AVG3(J, I, X);
872 DST(1, 2) = DST(3, 3) = AVG3(K, J, I);
873 DST(1, 3) = AVG3(L, K, J);
876 static WEBP_INLINE void TM4_SSE2(uint8_t* dst, const uint8_t* top) {
881 for (y = 0; y < 4; ++y, dst += BPS) {
885 WebPUint32ToMem(dst, _mm_cvtsi128_si32(out));
889 #undef DST
898 static void Intra4Preds_SSE2(uint8_t* dst, const uint8_t* top) {
899 DC4_SSE2(I4DC4 + dst, top);
900 TM4_SSE2(I4TM4 + dst, top);
901 VE4_SSE2(I4VE4 + dst, top);
902 HE4_SSE2(I4HE4 + dst, top);
903 RD4_SSE2(I4RD4 + dst, top);
904 VR4_SSE2(I4VR4 + dst, top);
905 LD4_SSE2(I4LD4 + dst, top);
906 VL4_SSE2(I4VL4 + dst, top);
907 HD4_SSE2(I4HD4 + dst, top);
908 HU4_SSE2(I4HU4 + dst, top);
914 static void IntraChromaPreds_SSE2(uint8_t* dst, const uint8_t* left,
917 DC8uvMode_SSE2(C8DC8 + dst, left, top);
918 VerticalPred_SSE2(C8VE8 + dst, top, 8);
919 HorizontalPred_SSE2(C8HE8 + dst, left, 8);
920 TrueMotion_SSE2(C8TM8 + dst, left, top, 8);
922 dst += 8;
925 DC8uvMode_SSE2(C8DC8 + dst, left, top);
926 VerticalPred_SSE2(C8VE8 + dst, top, 8);
927 HorizontalPred_SSE2(C8HE8 + dst, left, 8);
928 TrueMotion_SSE2(C8TM8 + dst, left, top, 8);
934 static void Intra16Preds_SSE2(uint8_t* dst,
936 DC16Mode_SSE2(I16DC16 + dst, left, top);
937 VerticalPred_SSE2(I16VE16 + dst, top, 16);
938 HorizontalPred_SSE2(I16HE16 + dst, left, 16);
939 TrueMotion_SSE2(I16TM16 + dst, left, top, 16);