Cross Reference: /external/webp/src/dsp/dec

Lines Matching defs:q0
279 #define GET_NOTHEV(p1, p0, q0, q1, hev_thresh, not_hev) {                      \
282   const __m128i t_2 = MM_ABS(q1, q0);                                          \
286   const __m128i t_4 = _mm_subs_epu8(t_2, h);  /* abs(q1 - q0) - hev_tresh */   \
292 #define GET_BASE_DELTA(p1, p0, q0, q1, o) {                                    \
293   const __m128i qp0 = _mm_subs_epi8(q0, p0);  /* q0 - p0 */                    \
295   o = _mm_adds_epi8(o, qp0);            /* p1 - q1 + 1 * (q0 - p0) */          \
296   o = _mm_adds_epi8(o, qp0);            /* p1 - q1 + 2 * (q0 - p0) */          \
297   o = _mm_adds_epi8(o, qp0);            /* p1 - q1 + 3 * (q0 - p0) */          \
300 #define DO_SIMPLE_FILTER(p0, q0, fl) {                                         \
308   q0 = _mm_subs_epi8(q0, v4);           /* q0 -= v4 */                         \
326 static void NeedsFilter(const __m128i* p1, const __m128i* p0, const __m128i* q0,
333   *mask = MM_ABS(*p0, *q0);             // abs(p0 - q0)
334   *mask = _mm_adds_epu8(*mask, *mask);  // abs(p0 - q0) * 2
335   *mask = _mm_adds_epu8(*mask, t1);     // abs(p0 - q0) * 2 + abs(p1 - q1) / 2
345 // Applies filter on 2 pixels (p0 and q0)
346 static WEBP_INLINE void DoFilter2(const __m128i* p1, __m128i* p0, __m128i* q0,
353   NeedsFilter(p1, p0, q0, q1, thresh, &mask);
356   FLIP_SIGN_BIT2(*p0, *q0);
358   GET_BASE_DELTA(p1s, *p0, *q0, q1s, a);
360   DO_SIMPLE_FILTER(*p0, *q0, a);
363   FLIP_SIGN_BIT2(*p0, *q0);
366 // Applies filter on 4 pixels (p1, p0, q0 and q1)
368                                   __m128i* q0, __m128i* q1,
375   GET_NOTHEV(*p1, *p0, *q0, *q1, hev_thresh, not_hev);
378   FLIP_SIGN_BIT4(*p1, *p0, *q0, *q1);
382   t2 = _mm_subs_epi8(*q0, *p0);        // q0 - p0
383   t1 = _mm_adds_epi8(t1, t2);          // hev(p1 - q1) + 1 * (q0 - p0)
384   t1 = _mm_adds_epi8(t1, t2);          // hev(p1 - q1) + 2 * (q0 - p0)
385   t1 = _mm_adds_epi8(t1, t2);          // hev(p1 - q1) + 3 * (q0 - p0)
390   t2 = _mm_adds_epi8(t1, t2);        // 3 * (q0 - p0) + (p1 - q1) + 4
391   SIGNED_SHIFT_N(t2, 3);             // (3 * (q0 - p0) + hev(p1 - q1) + 4) >> 3
393   *q0 = _mm_subs_epi8(*q0, t2);      // q0 -= t2
398   SIGNED_SHIFT_N(t2, 3);             // (3 * (q0 - p0) + hev(p1 - q1) + 3) >> 3
403   SIGNED_SHIFT_N(t3, 1);             // (3 * (q0 - p0) + hev(p1 - q1) + 4) >> 4
410   FLIP_SIGN_BIT4(*p1, *p0, *q0, *q1);
413 // Applies filter on 6 pixels (p2, p1, p0, q0, q1 and q2)
415                                   __m128i* q0, __m128i* q1, __m128i *q2,
421   GET_NOTHEV(*p1, *p0, *q0, *q1, hev_thresh, not_hev);
424   FLIP_SIGN_BIT4(*p1, *p0, *q0, *q1);
427   GET_BASE_DELTA(*p1, *p0, *q0, *q1, a);
432     DO_SIMPLE_FILTER(*p0, *q0, f);
460     UPDATE_2PIXELS(*p0, *q0, a0_lo, a0_hi);
464   FLIP_SIGN_BIT4(*p1, *p0, *q0, *q1);
515                                  __m128i* q0, __m128i* q1) {
529   // q0 = 73 63 53 43 33 23 13 03 72 62 52 42 32 22 12 02
532   Load8x4(r0, stride, p1, q0);
536   t2 = *q0;
539   // q0 = f2 e2 d2 c2 b2 a2 92 82 72 62 52 42 32 22 12 02
543   *q0 = _mm_unpacklo_epi64(t2, *q1);
558                                   __m128i* q0, __m128i* q1) {
567   // q0 = 73 72 63 62 53 52 43 42 33 32 23 22 13 12 03 02
569   t1 = *q0;
570   *q0 = _mm_unpacklo_epi8(t1, *q1);
574   // q0 = 73 72 71 70 63 62 61 60 53 52 51 50 43 42 41 40
576   *p0 = _mm_unpacklo_epi16(t1, *q0);
577   *q0 = _mm_unpackhi_epi16(t1, *q0);
587   Store4x4(q0, r0, stride);
601   __m128i q0 = _mm_loadu_si128((__m128i*)&p[0]);
604   DoFilter2(&p1, &p0, &q0, &q1, thresh);
608   _mm_storeu_si128((__m128i*)p, q0);
612   __m128i p1, p0, q0, q1;
616   Load16x4(p, p + 8 * stride,  stride, &p1, &p0, &q0, &q1);
617   DoFilter2(&p1, &p0, &q0, &q1, thresh);
618   Store16x4(p, p + 8 * stride, stride, &p1, &p0, &q0, &q1);
677 #define COMPLEX_FL_MASK(p1, p0, q0, q1, thresh, ithresh, mask) {               \
682   NeedsFilter(&p1, &p0, &q0, &q1, thresh, &fl_yes);                            \
691   __m128i p2, p1, p0, q0, q1, q2;
697   // Load q0, q1, q2, q3
698   LOAD_H_EDGES4(p, stride, q0, q1, q2, t1);
699   MAX_DIFF2(t1, q2, q1, q0, mask);
701   COMPLEX_FL_MASK(p1, p0, q0, q1, thresh, ithresh, mask);
702   DoFilter6(&p2, &p1, &p0, &q0, &q1, &q2, &mask, hev_thresh);
708   _mm_storeu_si128((__m128i*)&p[0 * stride], q0);
716   __m128i p3, p2, p1, p0, q0, q1, q2, q3;
722   Load16x4(p, p + 8 * stride, stride, &q0, &q1, &q2, &q3);  // q0, q1, q2, q3
723   MAX_DIFF2(q3, q2, q1, q0, mask);
725   COMPLEX_FL_MASK(p1, p0, q0, q1, thresh, ithresh, mask);
726   DoFilter6(&p2, &p1, &p0, &q0, &q1, &q2, &mask, hev_thresh);
729   Store16x4(p, p + 8 * stride, stride, &q0, &q1, &q2, &q3);
737   __m128i t1, t2, p1, p0, q0, q1;
746     // Load q0, q1, q2, q3
747     LOAD_H_EDGES4(p, stride, q0, q1, t1, t2);
748     MAX_DIFF2(t2, t1, q1, q0, mask);
750     COMPLEX_FL_MASK(p1, p0, q0, q1, thresh, ithresh, mask);
751     DoFilter4(&p1, &p0, &q0, &q1, &mask, hev_thresh);
756     _mm_storeu_si128((__m128i*)&p[0 * stride], q0);
766   __m128i t1, t2, p1, p0, q0, q1;
773     b += 4;  // beginning of q0
774     Load16x4(b, b + 8 * stride, stride, &q0, &q1, &t1, &t2);  // q0, q1, q2, q3
775     MAX_DIFF2(t2, t1, q1, q0, mask);
777     COMPLEX_FL_MASK(p1, p0, q0, q1, thresh, ithresh, mask);
778     DoFilter4(&p1, &p0, &q0, &q1, &mask, hev_thresh);
781     Store16x4(b, b + 8 * stride, stride, &p1, &p0, &q0, &q1);
791   __m128i t1, p2, p1, p0, q0, q1, q2;
797   // Load q0, q1, q2, q3
798 q0, q1, q2, t1);
799   MAX_DIFF2(t1, q2, q1, q0, mask);
801   COMPLEX_FL_MASK(p1, p0, q0, q1, thresh, ithresh, mask);
802   DoFilter6(&p2, &p1, &p0, &q0, &q1, &q2, &mask, hev_thresh);
808   STOREUV(q0, u, v, 0 * stride);
816   __m128i p3, p2, p1, p0, q0, q1, q2, q3;
823   Load16x4(u, v, stride, &q0, &q1, &q2, &q3);    // q0, q1, q2, q3
824   MAX_DIFF2(q3, q2, q1, q0, mask);
826   COMPLEX_FL_MASK(p1, p0, q0, q1, thresh, ithresh, mask);
827   DoFilter6(&p2, &p1, &p0, &q0, &q1, &q2, &mask, hev_thresh);
830   Store16x4(u, v, stride, &q0, &q1, &q2, &q3);
836   __m128i t1, t2, p1, p0, q0, q1;
845   // Load q0, q1, q2, q3
846   LOADUV_H_EDGES4(u, v, stride, q0, q1, t1, t2);
847   MAX_DIFF2(t2, t1, q1, q0, mask);
849   COMPLEX_FL_MASK(p1, p0, q0, q1, thresh, ithresh, mask);
850   DoFilter4(&p1, &p0, &q0, &q1, &mask, hev_thresh);
855   STOREUV(q0, u, v, 0 * stride);
862   __m128i t1, t2, p1, p0, q0, q1;
866   u += 4;  // beginning of q0
868   Load16x4(u, v, stride, &q0, &q1, &t1, &t2);  // q0, q1, q2, q3
869   MAX_DIFF2(t2, t1, q1, q0, mask);
871   COMPLEX_FL_MASK(p1, p0, q0, q1, thresh, ithresh, mask);
872   DoFilter4(&p1, &p0, &q0, &q1, &mask, hev_thresh);
876   Store16x4(u, v, stride, &p1, &p0, &q0, &q1);
OpenGrok