Home | History | Annotate | Download | only in dec

Lines Matching refs:p1

277 #define GET_NOTHEV(p1, p0, q0, q1, hev_thresh, not_hev) {                      \
279 const __m128i t1 = MM_ABS(p1, p0); \
283 const __m128i t3 = _mm_subs_epu8(t1, h); /* abs(p1 - p0) - hev_tresh */ \
290 #define GET_BASE_DELTA(p1, p0, q0, q1, o) { \
292 o = _mm_subs_epi8(p1, q1); /* p1 - q1 */ \
293 o = _mm_adds_epi8(o, qp0); /* p1 - q1 + 1 * (q0 - p0) */ \
294 o = _mm_adds_epi8(o, qp0); /* p1 - q1 + 2 * (q0 - p0) */ \
295 o = _mm_adds_epi8(o, qp0); /* p1 - q1 + 3 * (q0 - p0) */ \
324 static void NeedsFilter(const __m128i* p1, const __m128i* p0, const __m128i* q0,
326 __m128i t1 = MM_ABS(*p1, *q1); // abs(p1 - q1)
329 t1 = _mm_srli_epi16(t1, 1); // abs(p1 - q1) / 2
333 *mask = _mm_adds_epu8(*mask, t1); // abs(p0 - q0) * 2 + abs(p1 - q1) / 2
344 static inline void DoFilter2(const __m128i* p1, __m128i* p0, __m128i* q0,
348 const __m128i p1s = _mm_xor_si128(*p1, sign_bit);
351 NeedsFilter(p1, p0, q0, q1, thresh, &mask);
364 // Applies filter on 4 pixels (p1, p0, q0 and q1)
365 static inline void DoFilter4(__m128i* p1, __m128i *p0, __m128i* q0, __m128i* q1,
372 GET_NOTHEV(*p1, *p0, *q0, *q1, hev_thresh, not_hev);
375 FLIP_SIGN_BIT4(*p1, *p0, *q0, *q1);
377 t1 = _mm_subs_epi8(*p1, *q1); // p1 - q1
378 t1 = _mm_andnot_si128(not_hev, t1); // hev(p1 - q1)
380 t1 = _mm_adds_epi8(t1, t2); // hev(p1 - q1) + 1 * (q0 - p0)
381 t1 = _mm_adds_epi8(t1, t2); // hev(p1 - q1) + 2 * (q0 - p0)
382 t1 = _mm_adds_epi8(t1, t2); // hev(p1 - q1) + 3 * (q0 - p0)
387 t2 = _mm_adds_epi8(t1, t2); // 3 * (q0 - p0) + (p1 - q1) + 4
388 SIGNED_SHIFT_N(t2, 3); // (3 * (q0 - p0) + hev(p1 - q1) + 4) >> 3
395 SIGNED_SHIFT_N(t2, 3); // (3 * (q0 - p0) + hev(p1 - q1) + 3) >> 3
400 SIGNED_SHIFT_N(t3, 1); // (3 * (q0 - p0) + hev(p1 - q1) + 4) >> 4
404 *p1 = _mm_adds_epi8(*p1, t3); // p1 += t3
407 FLIP_SIGN_BIT4(*p1, *p0, *q0, *q1);
410 // Applies filter on 6 pixels (p2, p1, p0, q0, q1 and q2)
411 static inline void DoFilter6(__m128i *p2, __m128i* p1, __m128i *p0,
418 GET_NOTHEV(*p1, *p0, *q0, *q1, hev_thresh, not_hev);
421 FLIP_SIGN_BIT4(*p1, *p0, *q0, *q1);
424 GET_BASE_DELTA(*p1, *p0, *q0, *q1, a);
456 UPDATE_2PIXELS(*p1, *q1, a1_lo, a1_hi);
461 FLIP_SIGN_BIT4(*p1, *p0, *q0, *q1);
510 __m128i* p1, __m128i* p0,
524 // p1 = 71 61 51 41 31 21 11 01 70 60 50 40 30 20 10 00
528 Load8x4(r0, stride, p1, q0);
531 t1 = *p1;
533 // p1 = f0 e0 d0 c0 b0 a0 90 80 70 60 50 40 30 20 10 00
537 *p1 = _mm_unpacklo_epi64(t1, *p0);
552 static inline void Store16x4(uint8_t* r0, uint8_t* r8, int stride, __m128i* p1,
557 // p1 = f1 f0 e1 e0 d1 d0 c1 c0 b1 b0 a1 a0 91 90 81 80
559 *p0 = _mm_unpacklo_epi8(*p1, t1);
560 *p1 = _mm_unpackhi_epi8(*p1, t1);
574 // p1 = b3 b2 b1 b0 a3 a2 a1 a0 93 92 91 90 83 82 81 80
576 t1 = *p1;
577 *p1 = _mm_unpacklo_epi16(t1, *q1);
584 Store4x4(p1, r8, stride);
594 __m128i p1 = _mm_loadu_si128((__m128i*)&p[-2 * stride]);
599 DoFilter2(&p1, &p0, &q0, &q1, thresh);
607 __m128i p1, p0, q0, q1;
609 p -= 2; // beginning of p1
611 Load16x4(p, p + 8 * stride, stride, &p1, &p0, &q0, &q1);
612 DoFilter2(&p1, &p0, &q0, &q1, thresh);
613 Store16x4(p, p + 8 * stride, stride, &p1, &p0, &q0, &q1);
635 #define MAX_DIFF1(p3, p2, p1, p0, m) { \
637 m = _mm_max_epu8(m, MM_ABS(p2, p1)); \
638 m = _mm_max_epu8(m, MM_ABS(p1, p0)); \
641 #define MAX_DIFF2(p3, p2, p1, p0, m) { \
643 m = _mm_max_epu8(m, MM_ABS(p2, p1)); \
644 m = _mm_max_epu8(m, MM_ABS(p1, p0)); \
672 #define COMPLEX_FL_MASK(p1, p0, q0, q1, thresh, ithresh, mask) { \
677 NeedsFilter(&p1, &p0, &q0, &q1, thresh, &fl_yes); \
686 __m128i p2, p1, p0, q0, q1, q2;
688 // Load p3, p2, p1, p0
689 LOAD_H_EDGES4(p - 4 * stride, stride, t1, p2, p1, p0);
690 MAX_DIFF1(t1, p2, p1, p0, mask);
696 COMPLEX_FL_MASK(p1, p0, q0, q1, thresh, ithresh, mask);
697 DoFilter6(&p2, &p1, &p0, &q0, &q1, &q2, &mask, hev_thresh);
701 _mm_storeu_si128((__m128i*)&p[-2 * stride], p1);
711 __m128i p3, p2, p1, p0, q0, q1, q2, q3;
714 Load16x4(b, b + 8 * stride, stride, &p3, &p2, &p1, &p0); // p3, p2, p1, p0
715 MAX_DIFF1(p3, p2, p1, p0, mask);
720 COMPLEX_FL_MASK(p1, p0, q0, q1, thresh, ithresh, mask);
721 DoFilter6(&p2, &p1, &p0, &q0, &q1, &q2, &mask, hev_thresh);
723 Store16x4(b, b + 8 * stride, stride, &p3, &p2, &p1, &p0);
732 __m128i t1, t2, p1, p0, q0, q1;
735 // Load p3, p2, p1, p0
736 LOAD_H_EDGES4(p, stride, t2, t1, p1, p0);
737 MAX_DIFF1(t2, t1, p1, p0, mask);
745 COMPLEX_FL_MASK(p1, p0, q0, q1, thresh, ithresh, mask);
746 DoFilter4(&p1, &p0, &q0, &q1, &mask, hev_thresh);
749 _mm_storeu_si128((__m128i*)&p[-2 * stride], p1);
761 __m128i t1, t2, p1, p0, q0, q1;
765 Load16x4(b, b + 8 * stride, stride, &t2, &t1, &p1, &p0); // p3, p2, p1, p0
766 MAX_DIFF1(t2, t1, p1, p0, mask);
772 COMPLEX_FL_MASK(p1, p0, q0, q1, thresh, ithresh, mask);
773 DoFilter4(&p1, &p0, &q0, &q1, &mask, hev_thresh);
775 b -= 2; // beginning of p1
776 Store16x4(b, b + 8 * stride, stride, &p1, &p0, &q0, &q1);
786 __m128i t1, p2, p1, p0, q0, q1, q2;
788 // Load p3, p2, p1, p0
789 LOADUV_H_EDGES4(u - 4 * stride, v - 4 * stride, stride, t1, p2, p1, p0);
790 MAX_DIFF1(t1, p2, p1, p0, mask);
796 COMPLEX_FL_MASK(p1, p0, q0, q1, thresh, ithresh, mask);
797 DoFilter6(&p2, &p1, &p0, &q0, &q1, &q2, &mask, hev_thresh);
801 STOREUV(p1
811 __m128i p3, p2, p1, p0, q0, q1, q2, q3;
815 Load16x4(tu, tv, stride, &p3, &p2, &p1, &p0); // p3, p2, p1, p0
816 MAX_DIFF1(p3, p2, p1, p0, mask);
821 COMPLEX_FL_MASK(p1, p0, q0, q1, thresh, ithresh, mask);
822 DoFilter6(&p2, &p1, &p0, &q0, &q1, &q2, &mask, hev_thresh);
824 Store16x4(tu, tv, stride, &p3, &p2, &p1, &p0);
831 __m128i t1, t2, p1, p0, q0, q1;
833 // Load p3, p2, p1, p0
834 LOADUV_H_EDGES4(u, v, stride, t2, t1, p1, p0);
835 MAX_DIFF1(t2, t1, p1, p0, mask);
844 COMPLEX_FL_MASK(p1, p0, q0, q1, thresh, ithresh, mask);
845 DoFilter4(&p1, &p0, &q0, &q1, &mask, hev_thresh);
848 STOREUV(p1, u, v, -2 * stride);
857 __m128i t1, t2, p1, p0, q0, q1;
858 Load16x4(u, v, stride, &t2, &t1, &p1, &p0); // p3, p2, p1, p0
859 MAX_DIFF1(t2, t1, p1, p0, mask);
866 COMPLEX_FL_MASK(p1, p0, q0, q1, thresh, ithresh, mask);
867 DoFilter4(&p1, &p0, &q0, &q1, &mask, hev_thresh);
869 u -= 2; // beginning of p1
871 Store16x4(u, v, stride, &p1, &p0, &q0, &q1);