Home | History | Annotate | Download | only in dsp
      1 // Copyright 2015 Google Inc. All Rights Reserved.
      2 //
      3 // Use of this source code is governed by a BSD-style license
      4 // that can be found in the COPYING file in the root of the source
      5 // tree. An additional intellectual property rights grant can be found
      6 // in the file PATENTS. All contributing project authors may
      7 // be found in the AUTHORS file in the root of the source tree.
      8 // -----------------------------------------------------------------------------
      9 //
     10 // SSE2 variant of alpha filters
     11 //
     12 // Author: Skal (pascal.massimino (at) gmail.com)
     13 
     14 #include "./dsp.h"
     15 
     16 #if defined(WEBP_USE_SSE2)
     17 
     18 #include <assert.h>
     19 #include <emmintrin.h>
     20 #include <stdlib.h>
     21 #include <string.h>
     22 
     23 //------------------------------------------------------------------------------
     24 // Helpful macro.
     25 
     26 # define SANITY_CHECK(in, out)                                                 \
     27   assert(in != NULL);                                                          \
     28   assert(out != NULL);                                                         \
     29   assert(width > 0);                                                           \
     30   assert(height > 0);                                                          \
     31   assert(stride >= width);                                                     \
     32   assert(row >= 0 && num_rows > 0 && row + num_rows <= height);                \
     33   (void)height;  // Silence unused warning.
     34 
     35 static void PredictLineTop(const uint8_t* src, const uint8_t* pred,
     36                            uint8_t* dst, int length, int inverse) {
     37   int i;
     38   const int max_pos = length & ~31;
     39   assert(length >= 0);
     40   if (inverse) {
     41     for (i = 0; i < max_pos; i += 32) {
     42       const __m128i A0 = _mm_loadu_si128((const __m128i*)&src[i +  0]);
     43       const __m128i A1 = _mm_loadu_si128((const __m128i*)&src[i + 16]);
     44       const __m128i B0 = _mm_loadu_si128((const __m128i*)&pred[i +  0]);
     45       const __m128i B1 = _mm_loadu_si128((const __m128i*)&pred[i + 16]);
     46       const __m128i C0 = _mm_add_epi8(A0, B0);
     47       const __m128i C1 = _mm_add_epi8(A1, B1);
     48       _mm_storeu_si128((__m128i*)&dst[i +  0], C0);
     49       _mm_storeu_si128((__m128i*)&dst[i + 16], C1);
     50     }
     51     for (; i < length; ++i) dst[i] = src[i] + pred[i];
     52   } else {
     53     for (i = 0; i < max_pos; i += 32) {
     54       const __m128i A0 = _mm_loadu_si128((const __m128i*)&src[i +  0]);
     55       const __m128i A1 = _mm_loadu_si128((const __m128i*)&src[i + 16]);
     56       const __m128i B0 = _mm_loadu_si128((const __m128i*)&pred[i +  0]);
     57       const __m128i B1 = _mm_loadu_si128((const __m128i*)&pred[i + 16]);
     58       const __m128i C0 = _mm_sub_epi8(A0, B0);
     59       const __m128i C1 = _mm_sub_epi8(A1, B1);
     60       _mm_storeu_si128((__m128i*)&dst[i +  0], C0);
     61       _mm_storeu_si128((__m128i*)&dst[i + 16], C1);
     62     }
     63     for (; i < length; ++i) dst[i] = src[i] - pred[i];
     64   }
     65 }
     66 
     67 // Special case for left-based prediction (when preds==dst-1 or preds==src-1).
     68 static void PredictLineLeft(const uint8_t* src, uint8_t* dst, int length,
     69                             int inverse) {
     70   int i;
     71   if (length <= 0) return;
     72   if (inverse) {
     73     const int max_pos = length & ~7;
     74     __m128i last = _mm_set_epi32(0, 0, 0, dst[-1]);
     75     for (i = 0; i < max_pos; i += 8) {
     76       const __m128i A0 = _mm_loadl_epi64((const __m128i*)(src + i));
     77       const __m128i A1 = _mm_add_epi8(A0, last);
     78       const __m128i A2 = _mm_slli_si128(A1, 1);
     79       const __m128i A3 = _mm_add_epi8(A1, A2);
     80       const __m128i A4 = _mm_slli_si128(A3, 2);
     81       const __m128i A5 = _mm_add_epi8(A3, A4);
     82       const __m128i A6 = _mm_slli_si128(A5, 4);
     83       const __m128i A7 = _mm_add_epi8(A5, A6);
     84       _mm_storel_epi64((__m128i*)(dst + i), A7);
     85       last = _mm_srli_epi64(A7, 56);
     86     }
     87     for (; i < length; ++i) dst[i] = src[i] + dst[i - 1];
     88   } else {
     89     const int max_pos = length & ~31;
     90     for (i = 0; i < max_pos; i += 32) {
     91       const __m128i A0 = _mm_loadu_si128((const __m128i*)(src + i +  0    ));
     92       const __m128i B0 = _mm_loadu_si128((const __m128i*)(src + i +  0 - 1));
     93       const __m128i A1 = _mm_loadu_si128((const __m128i*)(src + i + 16    ));
     94       const __m128i B1 = _mm_loadu_si128((const __m128i*)(src + i + 16 - 1));
     95       const __m128i C0 = _mm_sub_epi8(A0, B0);
     96       const __m128i C1 = _mm_sub_epi8(A1, B1);
     97       _mm_storeu_si128((__m128i*)(dst + i +  0), C0);
     98       _mm_storeu_si128((__m128i*)(dst + i + 16), C1);
     99     }
    100     for (; i < length; ++i) dst[i] = src[i] - src[i - 1];
    101   }
    102 }
    103 
    104 static void PredictLineC(const uint8_t* src, const uint8_t* pred,
    105                          uint8_t* dst, int length, int inverse) {
    106   int i;
    107   if (inverse) {
    108     for (i = 0; i < length; ++i) dst[i] = src[i] + pred[i];
    109   } else {
    110     for (i = 0; i < length; ++i) dst[i] = src[i] - pred[i];
    111   }
    112 }
    113 
    114 //------------------------------------------------------------------------------
    115 // Horizontal filter.
    116 
    117 static WEBP_INLINE void DoHorizontalFilter(const uint8_t* in,
    118                                            int width, int height, int stride,
    119                                            int row, int num_rows,
    120                                            int inverse, uint8_t* out) {
    121   const uint8_t* preds;
    122   const size_t start_offset = row * stride;
    123   const int last_row = row + num_rows;
    124   SANITY_CHECK(in, out);
    125   in += start_offset;
    126   out += start_offset;
    127   preds = inverse ? out : in;
    128 
    129   if (row == 0) {
    130     // Leftmost pixel is the same as input for topmost scanline.
    131     out[0] = in[0];
    132     PredictLineLeft(in + 1, out + 1, width - 1, inverse);
    133     row = 1;
    134     preds += stride;
    135     in += stride;
    136     out += stride;
    137   }
    138 
    139   // Filter line-by-line.
    140   while (row < last_row) {
    141     // Leftmost pixel is predicted from above.
    142     PredictLineC(in, preds - stride, out, 1, inverse);
    143     PredictLineLeft(in + 1, out + 1, width - 1, inverse);
    144     ++row;
    145     preds += stride;
    146     in += stride;
    147     out += stride;
    148   }
    149 }
    150 
    151 //------------------------------------------------------------------------------
    152 // Vertical filter.
    153 
    154 static WEBP_INLINE void DoVerticalFilter(const uint8_t* in,
    155                                          int width, int height, int stride,
    156                                          int row, int num_rows,
    157                                          int inverse, uint8_t* out) {
    158   const uint8_t* preds;
    159   const size_t start_offset = row * stride;
    160   const int last_row = row + num_rows;
    161   SANITY_CHECK(in, out);
    162   in += start_offset;
    163   out += start_offset;
    164   preds = inverse ? out : in;
    165 
    166   if (row == 0) {
    167     // Very first top-left pixel is copied.
    168     out[0] = in[0];
    169     // Rest of top scan-line is left-predicted.
    170     PredictLineLeft(in + 1, out + 1, width - 1, inverse);
    171     row = 1;
    172     in += stride;
    173     out += stride;
    174   } else {
    175     // We are starting from in-between. Make sure 'preds' points to prev row.
    176     preds -= stride;
    177   }
    178 
    179   // Filter line-by-line.
    180   while (row < last_row) {
    181     PredictLineTop(in, preds, out, width, inverse);
    182     ++row;
    183     preds += stride;
    184     in += stride;
    185     out += stride;
    186   }
    187 }
    188 
    189 //------------------------------------------------------------------------------
    190 // Gradient filter.
    191 
    192 static WEBP_INLINE int GradientPredictorC(uint8_t a, uint8_t b, uint8_t c) {
    193   const int g = a + b - c;
    194   return ((g & ~0xff) == 0) ? g : (g < 0) ? 0 : 255;  // clip to 8bit
    195 }
    196 
    197 static void GradientPredictDirect(const uint8_t* const row,
    198                                   const uint8_t* const top,
    199                                   uint8_t* const out, int length) {
    200   const int max_pos = length & ~7;
    201   int i;
    202   const __m128i zero = _mm_setzero_si128();
    203   for (i = 0; i < max_pos; i += 8) {
    204     const __m128i A0 = _mm_loadl_epi64((const __m128i*)&row[i - 1]);
    205     const __m128i B0 = _mm_loadl_epi64((const __m128i*)&top[i]);
    206     const __m128i C0 = _mm_loadl_epi64((const __m128i*)&top[i - 1]);
    207     const __m128i D = _mm_loadl_epi64((const __m128i*)&row[i]);
    208     const __m128i A1 = _mm_unpacklo_epi8(A0, zero);
    209     const __m128i B1 = _mm_unpacklo_epi8(B0, zero);
    210     const __m128i C1 = _mm_unpacklo_epi8(C0, zero);
    211     const __m128i E = _mm_add_epi16(A1, B1);
    212     const __m128i F = _mm_sub_epi16(E, C1);
    213     const __m128i G = _mm_packus_epi16(F, zero);
    214     const __m128i H = _mm_sub_epi8(D, G);
    215     _mm_storel_epi64((__m128i*)(out + i), H);
    216   }
    217   for (; i < length; ++i) {
    218     out[i] = row[i] - GradientPredictorC(row[i - 1], top[i], top[i - 1]);
    219   }
    220 }
    221 
    222 static void GradientPredictInverse(const uint8_t* const in,
    223                                    const uint8_t* const top,
    224                                    uint8_t* const row, int length) {
    225   if (length > 0) {
    226     int i;
    227     const int max_pos = length & ~7;
    228     const __m128i zero = _mm_setzero_si128();
    229     __m128i A = _mm_set_epi32(0, 0, 0, row[-1]);   // left sample
    230     for (i = 0; i < max_pos; i += 8) {
    231       const __m128i tmp0 = _mm_loadl_epi64((const __m128i*)&top[i]);
    232       const __m128i tmp1 = _mm_loadl_epi64((const __m128i*)&top[i - 1]);
    233       const __m128i B = _mm_unpacklo_epi8(tmp0, zero);
    234       const __m128i C = _mm_unpacklo_epi8(tmp1, zero);
    235       const __m128i tmp2 = _mm_loadl_epi64((const __m128i*)&in[i]);
    236       const __m128i D = _mm_unpacklo_epi8(tmp2, zero);   // base input
    237       const __m128i E = _mm_sub_epi16(B, C);  // unclipped gradient basis B - C
    238       __m128i out = zero;                     // accumulator for output
    239       __m128i mask_hi = _mm_set_epi32(0, 0, 0, 0xff);
    240       int k = 8;
    241       while (1) {
    242         const __m128i tmp3 = _mm_add_epi16(A, E);        // delta = A + B - C
    243         const __m128i tmp4 = _mm_min_epi16(tmp3, mask_hi);
    244         const __m128i tmp5 = _mm_max_epi16(tmp4, zero);  // clipped delta
    245         const __m128i tmp6 = _mm_add_epi16(tmp5, D);     // add to in[] values
    246         A = _mm_and_si128(tmp6, mask_hi);                // 1-complement clip
    247         out = _mm_or_si128(out, A);                      // accumulate output
    248         if (--k == 0) break;
    249         A = _mm_slli_si128(A, 2);                        // rotate left sample
    250         mask_hi = _mm_slli_si128(mask_hi, 2);            // rotate mask
    251       }
    252       A = _mm_srli_si128(A, 14);       // prepare left sample for next iteration
    253       _mm_storel_epi64((__m128i*)&row[i], _mm_packus_epi16(out, zero));
    254     }
    255     for (; i < length; ++i) {
    256       row[i] = in[i] + GradientPredictorC(row[i - 1], top[i], top[i - 1]);
    257     }
    258   }
    259 }
    260 
    261 static WEBP_INLINE void DoGradientFilter(const uint8_t* in,
    262                                          int width, int height, int stride,
    263                                          int row, int num_rows,
    264                                          int inverse, uint8_t* out) {
    265   const size_t start_offset = row * stride;
    266   const int last_row = row + num_rows;
    267   SANITY_CHECK(in, out);
    268   in += start_offset;
    269   out += start_offset;
    270 
    271   // left prediction for top scan-line
    272   if (row == 0) {
    273     out[0] = in[0];
    274     PredictLineLeft(in + 1, out + 1, width - 1, inverse);
    275     row = 1;
    276     in += stride;
    277     out += stride;
    278   }
    279 
    280   // Filter line-by-line.
    281   while (row < last_row) {
    282     if (inverse) {
    283       PredictLineC(in, out - stride, out, 1, inverse);  // predict from above
    284       GradientPredictInverse(in + 1, out + 1 - stride, out + 1, width - 1);
    285     } else {
    286       PredictLineC(in, in - stride, out, 1, inverse);
    287       GradientPredictDirect(in + 1, in + 1 - stride, out + 1, width - 1);
    288     }
    289     ++row;
    290     in += stride;
    291     out += stride;
    292   }
    293 }
    294 
    295 #undef SANITY_CHECK
    296 
    297 //------------------------------------------------------------------------------
    298 
    299 static void HorizontalFilter(const uint8_t* data, int width, int height,
    300                              int stride, uint8_t* filtered_data) {
    301   DoHorizontalFilter(data, width, height, stride, 0, height, 0, filtered_data);
    302 }
    303 
    304 static void VerticalFilter(const uint8_t* data, int width, int height,
    305                            int stride, uint8_t* filtered_data) {
    306   DoVerticalFilter(data, width, height, stride, 0, height, 0, filtered_data);
    307 }
    308 
    309 
    310 static void GradientFilter(const uint8_t* data, int width, int height,
    311                            int stride, uint8_t* filtered_data) {
    312   DoGradientFilter(data, width, height, stride, 0, height, 0, filtered_data);
    313 }
    314 
    315 
    316 //------------------------------------------------------------------------------
    317 
    318 static void VerticalUnfilter(int width, int height, int stride, int row,
    319                              int num_rows, uint8_t* data) {
    320   DoVerticalFilter(data, width, height, stride, row, num_rows, 1, data);
    321 }
    322 
    323 static void HorizontalUnfilter(int width, int height, int stride, int row,
    324                                int num_rows, uint8_t* data) {
    325   DoHorizontalFilter(data, width, height, stride, row, num_rows, 1, data);
    326 }
    327 
    328 static void GradientUnfilter(int width, int height, int stride, int row,
    329                              int num_rows, uint8_t* data) {
    330   DoGradientFilter(data, width, height, stride, row, num_rows, 1, data);
    331 }
    332 
    333 //------------------------------------------------------------------------------
    334 // Entry point
    335 
    336 extern void VP8FiltersInitSSE2(void);
    337 
    338 WEBP_TSAN_IGNORE_FUNCTION void VP8FiltersInitSSE2(void) {
    339   WebPUnfilters[WEBP_FILTER_HORIZONTAL] = HorizontalUnfilter;
    340   WebPUnfilters[WEBP_FILTER_VERTICAL] = VerticalUnfilter;
    341   WebPUnfilters[WEBP_FILTER_GRADIENT] = GradientUnfilter;
    342 
    343   WebPFilters[WEBP_FILTER_HORIZONTAL] = HorizontalFilter;
    344   WebPFilters[WEBP_FILTER_VERTICAL] = VerticalFilter;
    345   WebPFilters[WEBP_FILTER_GRADIENT] = GradientFilter;
    346 }
    347 
    348 #else  // !WEBP_USE_SSE2
    349 
    350 WEBP_DSP_INIT_STUB(VP8FiltersInitSSE2)
    351 
    352 #endif  // WEBP_USE_SSE2
    353