Lines Matching refs:q0
279 #define GET_NOTHEV(p1, p0, q0, q1, hev_thresh, not_hev) { \
282 const __m128i t_2 = MM_ABS(q1, q0); \
286 const __m128i t_4 = _mm_subs_epu8(t_2, h); /* abs(q1 - q0) - hev_tresh */ \
292 #define GET_BASE_DELTA(p1, p0, q0, q1, o) { \
293 const __m128i qp0 = _mm_subs_epi8(q0, p0); /* q0 - p0 */ \
295 o = _mm_adds_epi8(o, qp0); /* p1 - q1 + 1 * (q0 - p0) */ \
296 o = _mm_adds_epi8(o, qp0); /* p1 - q1 + 2 * (q0 - p0) */ \
297 o = _mm_adds_epi8(o, qp0); /* p1 - q1 + 3 * (q0 - p0) */ \
300 #define DO_SIMPLE_FILTER(p0, q0, fl) { \
308 q0 = _mm_subs_epi8(q0, v4); /* q0 -= v4 */ \
326 static void NeedsFilter(const __m128i* p1, const __m128i* p0, const __m128i* q0,
333 *mask = MM_ABS(*p0, *q0); // abs(p0 - q0)
334 *mask = _mm_adds_epu8(*mask, *mask); // abs(p0 - q0) * 2
335 *mask = _mm_adds_epu8(*mask, t1); // abs(p0 - q0) * 2 + abs(p1 - q1) / 2
345 // Applies filter on 2 pixels (p0 and q0)
346 static WEBP_INLINE void DoFilter2(const __m128i* p1, __m128i* p0, __m128i* q0,
353 NeedsFilter(p1, p0, q0, q1, thresh, &mask);
356 FLIP_SIGN_BIT2(*p0, *q0);
358 GET_BASE_DELTA(p1s, *p0, *q0, q1s, a);
360 DO_SIMPLE_FILTER(*p0, *q0, a);
363 FLIP_SIGN_BIT2(*p0, *q0);
366 // Applies filter on 4 pixels (p1, p0, q0 and q1)
368 __m128i* q0, __m128i* q1,
375 GET_NOTHEV(*p1, *p0, *q0, *q1, hev_thresh, not_hev);
378 FLIP_SIGN_BIT4(*p1, *p0, *q0, *q1);
382 t2 = _mm_subs_epi8(*q0, *p0); // q0 - p0
383 t1 = _mm_adds_epi8(t1, t2); // hev(p1 - q1) + 1 * (q0 - p0)
384 t1 = _mm_adds_epi8(t1, t2); // hev(p1 - q1) + 2 * (q0 - p0)
385 t1 = _mm_adds_epi8(t1, t2); // hev(p1 - q1) + 3 * (q0 - p0)
390 t2 = _mm_adds_epi8(t1, t2); // 3 * (q0 - p0) + (p1 - q1) + 4
391 SIGNED_SHIFT_N(t2, 3); // (3 * (q0 - p0) + hev(p1 - q1) + 4) >> 3
393 *q0 = _mm_subs_epi8(*q0, t2); // q0 -= t2
398 SIGNED_SHIFT_N(t2, 3); // (3 * (q0 - p0) + hev(p1 - q1) + 3) >> 3
403 SIGNED_SHIFT_N(t3, 1); // (3 * (q0 - p0) + hev(p1 - q1) + 4) >> 4
410 FLIP_SIGN_BIT4(*p1, *p0, *q0, *q1);
413 // Applies filter on 6 pixels (p2, p1, p0, q0, q1 and q2)
415 __m128i* q0, __m128i* q1, __m128i *q2,
421 GET_NOTHEV(*p1, *p0, *q0, *q1, hev_thresh, not_hev);
424 FLIP_SIGN_BIT4(*p1, *p0, *q0, *q1);
427 GET_BASE_DELTA(*p1, *p0, *q0, *q1, a);
432 DO_SIMPLE_FILTER(*p0, *q0, f);
460 UPDATE_2PIXELS(*p0, *q0, a0_lo, a0_hi);
464 FLIP_SIGN_BIT4(*p1, *p0, *q0, *q1);
515 __m128i* q0, __m128i* q1) {
529 // q0 = 73 63 53 43 33 23 13 03 72 62 52 42 32 22 12 02
532 Load8x4(r0, stride, p1, q0);
536 t2 = *q0;
539 // q0 = f2 e2 d2 c2 b2 a2 92 82 72 62 52 42 32 22 12 02
543 *q0 = _mm_unpacklo_epi64(t2, *q1);
558 __m128i* q0, __m128i* q1) {
567 // q0 = 73 72 63 62 53 52 43 42 33 32 23 22 13 12 03 02
569 t1 = *q0;
570 *q0 = _mm_unpacklo_epi8(t1, *q1);
574 // q0 = 73 72 71 70 63 62 61 60 53 52 51 50 43 42 41 40
576 *p0 = _mm_unpacklo_epi16(t1, *q0);
577 *q0 = _mm_unpackhi_epi16(t1, *q0);
587 Store4x4(q0, r0, stride);
601 __m128i q0 = _mm_loadu_si128((__m128i*)&p[0]);
604 DoFilter2(&p1, &p0, &q0, &q1, thresh);
608 _mm_storeu_si128((__m128i*)p, q0);
612 __m128i p1, p0, q0, q1;
616 Load16x4(p, p + 8 * stride, stride, &p1, &p0, &q0, &q1);
617 DoFilter2(&p1, &p0, &q0, &q1, thresh);
618 Store16x4(p, p + 8 * stride, stride, &p1, &p0, &q0, &q1);
677 #define COMPLEX_FL_MASK(p1, p0, q0, q1, thresh, ithresh, mask) { \
682 NeedsFilter(&p1, &p0, &q0, &q1, thresh, &fl_yes); \
691 __m128i p2, p1, p0, q0, q1, q2;
697 // Load q0, q1, q2, q3
698 LOAD_H_EDGES4(p, stride, q0, q1, q2, t1);
699 MAX_DIFF2(t1, q2, q1, q0, mask);
701 COMPLEX_FL_MASK(p1, p0, q0, q1, thresh, ithresh, mask);
702 DoFilter6(&p2, &p1, &p0, &q0, &q1, &q2, &mask, hev_thresh);
708 _mm_storeu_si128((__m128i*)&p[0 * stride], q0);
716 __m128i p3, p2, p1, p0, q0, q1, q2, q3;
722 Load16x4(p, p + 8 * stride, stride, &q0, &q1, &q2, &q3); // q0, q1, q2, q3
723 MAX_DIFF2(q3, q2, q1, q0, mask);
725 COMPLEX_FL_MASK(p1, p0, q0, q1, thresh, ithresh, mask);
726 DoFilter6(&p2, &p1, &p0, &q0, &q1, &q2, &mask, hev_thresh);
729 Store16x4(p, p + 8 * stride, stride, &q0, &q1, &q2, &q3);
737 __m128i t1, t2, p1, p0, q0, q1;
746 // Load q0, q1, q2, q3
747 LOAD_H_EDGES4(p, stride, q0, q1, t1, t2);
748 MAX_DIFF2(t2, t1, q1, q0, mask);
750 COMPLEX_FL_MASK(p1, p0, q0, q1, thresh, ithresh, mask);
751 DoFilter4(&p1, &p0, &q0, &q1, &mask, hev_thresh);
756 _mm_storeu_si128((__m128i*)&p[0 * stride], q0);
766 __m128i t1, t2, p1, p0, q0, q1;
773 b += 4; // beginning of q0
774 Load16x4(b, b + 8 * stride, stride, &q0, &q1, &t1, &t2); // q0, q1, q2, q3
775 MAX_DIFF2(t2, t1, q1, q0, mask);
777 COMPLEX_FL_MASK(p1, p0, q0, q1, thresh, ithresh, mask);
778 DoFilter4(&p1, &p0, &q0, &q1, &mask, hev_thresh);
781 Store16x4(b, b + 8 * stride, stride, &p1, &p0, &q0, &q1);
791 __m128i t1, p2, p1, p0, q0, q1, q2;
797 // Load q0, q1, q2, q3
798 q0, q1, q2, t1);
799 MAX_DIFF2(t1, q2, q1, q0, mask);
801 COMPLEX_FL_MASK(p1, p0, q0, q1, thresh, ithresh, mask);
802 DoFilter6(&p2, &p1, &p0, &q0, &q1, &q2, &mask, hev_thresh);
808 STOREUV(q0, u, v, 0 * stride);
816 __m128i p3, p2, p1, p0, q0, q1, q2, q3;
823 Load16x4(u, v, stride, &q0, &q1, &q2, &q3); // q0, q1, q2, q3
824 MAX_DIFF2(q3, q2, q1, q0, mask);
826 COMPLEX_FL_MASK(p1, p0, q0, q1, thresh, ithresh, mask);
827 DoFilter6(&p2, &p1, &p0, &q0, &q1, &q2, &mask, hev_thresh);
830 Store16x4(u, v, stride, &q0, &q1, &q2, &q3);
836 __m128i t1, t2, p1, p0, q0, q1;
845 // Load q0, q1, q2, q3
846 LOADUV_H_EDGES4(u, v, stride, q0, q1, t1, t2);
847 MAX_DIFF2(t2, t1, q1, q0, mask);
849 COMPLEX_FL_MASK(p1, p0, q0, q1, thresh, ithresh, mask);
850 DoFilter4(&p1, &p0, &q0, &q1, &mask, hev_thresh);
855 STOREUV(q0, u, v, 0 * stride);
862 __m128i t1, t2, p1, p0, q0, q1;
866 u += 4; // beginning of q0
868 Load16x4(u, v, stride, &q0, &q1, &t1, &t2); // q0, q1, q2, q3
869 MAX_DIFF2(t2, t1, q1, q0, mask);
871 COMPLEX_FL_MASK(p1, p0, q0, q1, thresh, ithresh, mask);
872 DoFilter4(&p1, &p0, &q0, &q1, &mask, hev_thresh);
876 Store16x4(u, v, stride, &p1, &p0, &q0, &q1);