Home | History | Annotate | Download | only in dsp
      1 // Copyright 2012 Google Inc. All Rights Reserved.
      2 //
      3 // Use of this source code is governed by a BSD-style license
      4 // that can be found in the COPYING file in the root of the source
      5 // tree. An additional intellectual property rights grant can be found
      6 // in the file PATENTS. All contributing project authors may
      7 // be found in the AUTHORS file in the root of the source tree.
      8 // -----------------------------------------------------------------------------
      9 //
     10 // Image transforms and color space conversion methods for lossless decoder.
     11 //
     12 // Authors: Vikas Arora (vikaas.arora (at) gmail.com)
     13 //          Jyrki Alakuijala (jyrki (at) google.com)
     14 //          Urvang Joshi (urvang (at) google.com)
     15 
     16 #include "src/dsp/dsp.h"
     17 
     18 #include <assert.h>
     19 #include <math.h>
     20 #include <stdlib.h>
     21 #include "src/dec/vp8li_dec.h"
     22 #include "src/utils/endian_inl_utils.h"
     23 #include "src/dsp/lossless.h"
     24 #include "src/dsp/lossless_common.h"
     25 
     26 #define MAX_DIFF_COST (1e30f)
     27 
     28 //------------------------------------------------------------------------------
     29 // Image transforms.
     30 
     31 static WEBP_INLINE uint32_t Average2(uint32_t a0, uint32_t a1) {
     32   return (((a0 ^ a1) & 0xfefefefeu) >> 1) + (a0 & a1);
     33 }
     34 
     35 static WEBP_INLINE uint32_t Average3(uint32_t a0, uint32_t a1, uint32_t a2) {
     36   return Average2(Average2(a0, a2), a1);
     37 }
     38 
     39 static WEBP_INLINE uint32_t Average4(uint32_t a0, uint32_t a1,
     40                                      uint32_t a2, uint32_t a3) {
     41   return Average2(Average2(a0, a1), Average2(a2, a3));
     42 }
     43 
     44 static WEBP_INLINE uint32_t Clip255(uint32_t a) {
     45   if (a < 256) {
     46     return a;
     47   }
     48   // return 0, when a is a negative integer.
     49   // return 255, when a is positive.
     50   return ~a >> 24;
     51 }
     52 
     53 static WEBP_INLINE int AddSubtractComponentFull(int a, int b, int c) {
     54   return Clip255(a + b - c);
     55 }
     56 
     57 static WEBP_INLINE uint32_t ClampedAddSubtractFull(uint32_t c0, uint32_t c1,
     58                                                    uint32_t c2) {
     59   const int a = AddSubtractComponentFull(c0 >> 24, c1 >> 24, c2 >> 24);
     60   const int r = AddSubtractComponentFull((c0 >> 16) & 0xff,
     61                                          (c1 >> 16) & 0xff,
     62                                          (c2 >> 16) & 0xff);
     63   const int g = AddSubtractComponentFull((c0 >> 8) & 0xff,
     64                                          (c1 >> 8) & 0xff,
     65                                          (c2 >> 8) & 0xff);
     66   const int b = AddSubtractComponentFull(c0 & 0xff, c1 & 0xff, c2 & 0xff);
     67   return ((uint32_t)a << 24) | (r << 16) | (g << 8) | b;
     68 }
     69 
     70 static WEBP_INLINE int AddSubtractComponentHalf(int a, int b) {
     71   return Clip255(a + (a - b) / 2);
     72 }
     73 
     74 static WEBP_INLINE uint32_t ClampedAddSubtractHalf(uint32_t c0, uint32_t c1,
     75                                                    uint32_t c2) {
     76   const uint32_t ave = Average2(c0, c1);
     77   const int a = AddSubtractComponentHalf(ave >> 24, c2 >> 24);
     78   const int r = AddSubtractComponentHalf((ave >> 16) & 0xff, (c2 >> 16) & 0xff);
     79   const int g = AddSubtractComponentHalf((ave >> 8) & 0xff, (c2 >> 8) & 0xff);
     80   const int b = AddSubtractComponentHalf((ave >> 0) & 0xff, (c2 >> 0) & 0xff);
     81   return ((uint32_t)a << 24) | (r << 16) | (g << 8) | b;
     82 }
     83 
     84 // gcc <= 4.9 on ARM generates incorrect code in Select() when Sub3() is
     85 // inlined.
     86 #if defined(__arm__) && LOCAL_GCC_VERSION <= 0x409
     87 # define LOCAL_INLINE __attribute__ ((noinline))
     88 #else
     89 # define LOCAL_INLINE WEBP_INLINE
     90 #endif
     91 
     92 static LOCAL_INLINE int Sub3(int a, int b, int c) {
     93   const int pb = b - c;
     94   const int pa = a - c;
     95   return abs(pb) - abs(pa);
     96 }
     97 
     98 #undef LOCAL_INLINE
     99 
    100 static WEBP_INLINE uint32_t Select(uint32_t a, uint32_t b, uint32_t c) {
    101   const int pa_minus_pb =
    102       Sub3((a >> 24)       , (b >> 24)       , (c >> 24)       ) +
    103       Sub3((a >> 16) & 0xff, (b >> 16) & 0xff, (c >> 16) & 0xff) +
    104       Sub3((a >>  8) & 0xff, (b >>  8) & 0xff, (c >>  8) & 0xff) +
    105       Sub3((a      ) & 0xff, (b      ) & 0xff, (c      ) & 0xff);
    106   return (pa_minus_pb <= 0) ? a : b;
    107 }
    108 
    109 //------------------------------------------------------------------------------
    110 // Predictors
    111 
    112 static uint32_t Predictor0_C(uint32_t left, const uint32_t* const top) {
    113   (void)top;
    114   (void)left;
    115   return ARGB_BLACK;
    116 }
    117 static uint32_t Predictor1_C(uint32_t left, const uint32_t* const top) {
    118   (void)top;
    119   return left;
    120 }
    121 static uint32_t Predictor2_C(uint32_t left, const uint32_t* const top) {
    122   (void)left;
    123   return top[0];
    124 }
    125 static uint32_t Predictor3_C(uint32_t left, const uint32_t* const top) {
    126   (void)left;
    127   return top[1];
    128 }
    129 static uint32_t Predictor4_C(uint32_t left, const uint32_t* const top) {
    130   (void)left;
    131   return top[-1];
    132 }
    133 static uint32_t Predictor5_C(uint32_t left, const uint32_t* const top) {
    134   const uint32_t pred = Average3(left, top[0], top[1]);
    135   return pred;
    136 }
    137 static uint32_t Predictor6_C(uint32_t left, const uint32_t* const top) {
    138   const uint32_t pred = Average2(left, top[-1]);
    139   return pred;
    140 }
    141 static uint32_t Predictor7_C(uint32_t left, const uint32_t* const top) {
    142   const uint32_t pred = Average2(left, top[0]);
    143   return pred;
    144 }
    145 static uint32_t Predictor8_C(uint32_t left, const uint32_t* const top) {
    146   const uint32_t pred = Average2(top[-1], top[0]);
    147   (void)left;
    148   return pred;
    149 }
    150 static uint32_t Predictor9_C(uint32_t left, const uint32_t* const top) {
    151   const uint32_t pred = Average2(top[0], top[1]);
    152   (void)left;
    153   return pred;
    154 }
    155 static uint32_t Predictor10_C(uint32_t left, const uint32_t* const top) {
    156   const uint32_t pred = Average4(left, top[-1], top[0], top[1]);
    157   return pred;
    158 }
    159 static uint32_t Predictor11_C(uint32_t left, const uint32_t* const top) {
    160   const uint32_t pred = Select(top[0], left, top[-1]);
    161   return pred;
    162 }
    163 static uint32_t Predictor12_C(uint32_t left, const uint32_t* const top) {
    164   const uint32_t pred = ClampedAddSubtractFull(left, top[0], top[-1]);
    165   return pred;
    166 }
    167 static uint32_t Predictor13_C(uint32_t left, const uint32_t* const top) {
    168   const uint32_t pred = ClampedAddSubtractHalf(left, top[0], top[-1]);
    169   return pred;
    170 }
    171 
    172 GENERATE_PREDICTOR_ADD(Predictor0_C, PredictorAdd0_C)
    173 static void PredictorAdd1_C(const uint32_t* in, const uint32_t* upper,
    174                             int num_pixels, uint32_t* out) {
    175   int i;
    176   uint32_t left = out[-1];
    177   for (i = 0; i < num_pixels; ++i) {
    178     out[i] = left = VP8LAddPixels(in[i], left);
    179   }
    180   (void)upper;
    181 }
    182 GENERATE_PREDICTOR_ADD(Predictor2_C, PredictorAdd2_C)
    183 GENERATE_PREDICTOR_ADD(Predictor3_C, PredictorAdd3_C)
    184 GENERATE_PREDICTOR_ADD(Predictor4_C, PredictorAdd4_C)
    185 GENERATE_PREDICTOR_ADD(Predictor5_C, PredictorAdd5_C)
    186 GENERATE_PREDICTOR_ADD(Predictor6_C, PredictorAdd6_C)
    187 GENERATE_PREDICTOR_ADD(Predictor7_C, PredictorAdd7_C)
    188 GENERATE_PREDICTOR_ADD(Predictor8_C, PredictorAdd8_C)
    189 GENERATE_PREDICTOR_ADD(Predictor9_C, PredictorAdd9_C)
    190 GENERATE_PREDICTOR_ADD(Predictor10_C, PredictorAdd10_C)
    191 GENERATE_PREDICTOR_ADD(Predictor11_C, PredictorAdd11_C)
    192 GENERATE_PREDICTOR_ADD(Predictor12_C, PredictorAdd12_C)
    193 GENERATE_PREDICTOR_ADD(Predictor13_C, PredictorAdd13_C)
    194 
    195 //------------------------------------------------------------------------------
    196 
    197 // Inverse prediction.
    198 static void PredictorInverseTransform_C(const VP8LTransform* const transform,
    199                                         int y_start, int y_end,
    200                                         const uint32_t* in, uint32_t* out) {
    201   const int width = transform->xsize_;
    202   if (y_start == 0) {  // First Row follows the L (mode=1) mode.
    203     PredictorAdd0_C(in, NULL, 1, out);
    204     PredictorAdd1_C(in + 1, NULL, width - 1, out + 1);
    205     in += width;
    206     out += width;
    207     ++y_start;
    208   }
    209 
    210   {
    211     int y = y_start;
    212     const int tile_width = 1 << transform->bits_;
    213     const int mask = tile_width - 1;
    214     const int tiles_per_row = VP8LSubSampleSize(width, transform->bits_);
    215     const uint32_t* pred_mode_base =
    216         transform->data_ + (y >> transform->bits_) * tiles_per_row;
    217 
    218     while (y < y_end) {
    219       const uint32_t* pred_mode_src = pred_mode_base;
    220       int x = 1;
    221       // First pixel follows the T (mode=2) mode.
    222       PredictorAdd2_C(in, out - width, 1, out);
    223       // .. the rest:
    224       while (x < width) {
    225         const VP8LPredictorAddSubFunc pred_func =
    226             VP8LPredictorsAdd[((*pred_mode_src++) >> 8) & 0xf];
    227         int x_end = (x & ~mask) + tile_width;
    228         if (x_end > width) x_end = width;
    229         pred_func(in + x, out + x - width, x_end - x, out + x);
    230         x = x_end;
    231       }
    232       in += width;
    233       out += width;
    234       ++y;
    235       if ((y & mask) == 0) {   // Use the same mask, since tiles are squares.
    236         pred_mode_base += tiles_per_row;
    237       }
    238     }
    239   }
    240 }
    241 
    242 // Add green to blue and red channels (i.e. perform the inverse transform of
    243 // 'subtract green').
    244 void VP8LAddGreenToBlueAndRed_C(const uint32_t* src, int num_pixels,
    245                                 uint32_t* dst) {
    246   int i;
    247   for (i = 0; i < num_pixels; ++i) {
    248     const uint32_t argb = src[i];
    249     const uint32_t green = ((argb >> 8) & 0xff);
    250     uint32_t red_blue = (argb & 0x00ff00ffu);
    251     red_blue += (green << 16) | green;
    252     red_blue &= 0x00ff00ffu;
    253     dst[i] = (argb & 0xff00ff00u) | red_blue;
    254   }
    255 }
    256 
    257 static WEBP_INLINE int ColorTransformDelta(int8_t color_pred,
    258                                            int8_t color) {
    259   return ((int)color_pred * color) >> 5;
    260 }
    261 
    262 static WEBP_INLINE void ColorCodeToMultipliers(uint32_t color_code,
    263                                                VP8LMultipliers* const m) {
    264   m->green_to_red_  = (color_code >>  0) & 0xff;
    265   m->green_to_blue_ = (color_code >>  8) & 0xff;
    266   m->red_to_blue_   = (color_code >> 16) & 0xff;
    267 }
    268 
    269 void VP8LTransformColorInverse_C(const VP8LMultipliers* const m,
    270                                  const uint32_t* src, int num_pixels,
    271                                  uint32_t* dst) {
    272   int i;
    273   for (i = 0; i < num_pixels; ++i) {
    274     const uint32_t argb = src[i];
    275     const uint32_t green = argb >> 8;
    276     const uint32_t red = argb >> 16;
    277     int new_red = red & 0xff;
    278     int new_blue = argb & 0xff;
    279     new_red += ColorTransformDelta(m->green_to_red_, green);
    280     new_red &= 0xff;
    281     new_blue += ColorTransformDelta(m->green_to_blue_, green);
    282     new_blue += ColorTransformDelta(m->red_to_blue_, new_red);
    283     new_blue &= 0xff;
    284     dst[i] = (argb & 0xff00ff00u) | (new_red << 16) | (new_blue);
    285   }
    286 }
    287 
    288 // Color space inverse transform.
    289 static void ColorSpaceInverseTransform_C(const VP8LTransform* const transform,
    290                                          int y_start, int y_end,
    291                                          const uint32_t* src, uint32_t* dst) {
    292   const int width = transform->xsize_;
    293   const int tile_width = 1 << transform->bits_;
    294   const int mask = tile_width - 1;
    295   const int safe_width = width & ~mask;
    296   const int remaining_width = width - safe_width;
    297   const int tiles_per_row = VP8LSubSampleSize(width, transform->bits_);
    298   int y = y_start;
    299   const uint32_t* pred_row =
    300       transform->data_ + (y >> transform->bits_) * tiles_per_row;
    301 
    302   while (y < y_end) {
    303     const uint32_t* pred = pred_row;
    304     VP8LMultipliers m = { 0, 0, 0 };
    305     const uint32_t* const src_safe_end = src + safe_width;
    306     const uint32_t* const src_end = src + width;
    307     while (src < src_safe_end) {
    308       ColorCodeToMultipliers(*pred++, &m);
    309       VP8LTransformColorInverse(&m, src, tile_width, dst);
    310       src += tile_width;
    311       dst += tile_width;
    312     }
    313     if (src < src_end) {  // Left-overs using C-version.
    314       ColorCodeToMultipliers(*pred++, &m);
    315       VP8LTransformColorInverse(&m, src, remaining_width, dst);
    316       src += remaining_width;
    317       dst += remaining_width;
    318     }
    319     ++y;
    320     if ((y & mask) == 0) pred_row += tiles_per_row;
    321   }
    322 }
    323 
    324 // Separate out pixels packed together using pixel-bundling.
    325 // We define two methods for ARGB data (uint32_t) and alpha-only data (uint8_t).
    326 #define COLOR_INDEX_INVERSE(FUNC_NAME, F_NAME, STATIC_DECL, TYPE, BIT_SUFFIX,  \
    327                             GET_INDEX, GET_VALUE)                              \
    328 static void F_NAME(const TYPE* src, const uint32_t* const color_map,           \
    329                    TYPE* dst, int y_start, int y_end, int width) {             \
    330   int y;                                                                       \
    331   for (y = y_start; y < y_end; ++y) {                                          \
    332     int x;                                                                     \
    333     for (x = 0; x < width; ++x) {                                              \
    334       *dst++ = GET_VALUE(color_map[GET_INDEX(*src++)]);                        \
    335     }                                                                          \
    336   }                                                                            \
    337 }                                                                              \
    338 STATIC_DECL void FUNC_NAME(const VP8LTransform* const transform,               \
    339                            int y_start, int y_end, const TYPE* src,            \
    340                            TYPE* dst) {                                        \
    341   int y;                                                                       \
    342   const int bits_per_pixel = 8 >> transform->bits_;                            \
    343   const int width = transform->xsize_;                                         \
    344   const uint32_t* const color_map = transform->data_;                          \
    345   if (bits_per_pixel < 8) {                                                    \
    346     const int pixels_per_byte = 1 << transform->bits_;                         \
    347     const int count_mask = pixels_per_byte - 1;                                \
    348     const uint32_t bit_mask = (1 << bits_per_pixel) - 1;                       \
    349     for (y = y_start; y < y_end; ++y) {                                        \
    350       uint32_t packed_pixels = 0;                                              \
    351       int x;                                                                   \
    352       for (x = 0; x < width; ++x) {                                            \
    353         /* We need to load fresh 'packed_pixels' once every                */  \
    354         /* 'pixels_per_byte' increments of x. Fortunately, pixels_per_byte */  \
    355         /* is a power of 2, so can just use a mask for that, instead of    */  \
    356         /* decrementing a counter.                                         */  \
    357         if ((x & count_mask) == 0) packed_pixels = GET_INDEX(*src++);          \
    358         *dst++ = GET_VALUE(color_map[packed_pixels & bit_mask]);               \
    359         packed_pixels >>= bits_per_pixel;                                      \
    360       }                                                                        \
    361     }                                                                          \
    362   } else {                                                                     \
    363     VP8LMapColor##BIT_SUFFIX(src, color_map, dst, y_start, y_end, width);      \
    364   }                                                                            \
    365 }
    366 
    367 COLOR_INDEX_INVERSE(ColorIndexInverseTransform_C, MapARGB_C, static,
    368                     uint32_t, 32b, VP8GetARGBIndex, VP8GetARGBValue)
    369 COLOR_INDEX_INVERSE(VP8LColorIndexInverseTransformAlpha, MapAlpha_C, ,
    370                     uint8_t, 8b, VP8GetAlphaIndex, VP8GetAlphaValue)
    371 
    372 #undef COLOR_INDEX_INVERSE
    373 
    374 void VP8LInverseTransform(const VP8LTransform* const transform,
    375                           int row_start, int row_end,
    376                           const uint32_t* const in, uint32_t* const out) {
    377   const int width = transform->xsize_;
    378   assert(row_start < row_end);
    379   assert(row_end <= transform->ysize_);
    380   switch (transform->type_) {
    381     case SUBTRACT_GREEN:
    382       VP8LAddGreenToBlueAndRed(in, (row_end - row_start) * width, out);
    383       break;
    384     case PREDICTOR_TRANSFORM:
    385       PredictorInverseTransform_C(transform, row_start, row_end, in, out);
    386       if (row_end != transform->ysize_) {
    387         // The last predicted row in this iteration will be the top-pred row
    388         // for the first row in next iteration.
    389         memcpy(out - width, out + (row_end - row_start - 1) * width,
    390                width * sizeof(*out));
    391       }
    392       break;
    393     case CROSS_COLOR_TRANSFORM:
    394       ColorSpaceInverseTransform_C(transform, row_start, row_end, in, out);
    395       break;
    396     case COLOR_INDEXING_TRANSFORM:
    397       if (in == out && transform->bits_ > 0) {
    398         // Move packed pixels to the end of unpacked region, so that unpacking
    399         // can occur seamlessly.
    400         // Also, note that this is the only transform that applies on
    401         // the effective width of VP8LSubSampleSize(xsize_, bits_). All other
    402         // transforms work on effective width of xsize_.
    403         const int out_stride = (row_end - row_start) * width;
    404         const int in_stride = (row_end - row_start) *
    405             VP8LSubSampleSize(transform->xsize_, transform->bits_);
    406         uint32_t* const src = out + out_stride - in_stride;
    407         memmove(src, out, in_stride * sizeof(*src));
    408         ColorIndexInverseTransform_C(transform, row_start, row_end, src, out);
    409       } else {
    410         ColorIndexInverseTransform_C(transform, row_start, row_end, in, out);
    411       }
    412       break;
    413   }
    414 }
    415 
    416 //------------------------------------------------------------------------------
    417 // Color space conversion.
    418 
    419 static int is_big_endian(void) {
    420   static const union {
    421     uint16_t w;
    422     uint8_t b[2];
    423   } tmp = { 1 };
    424   return (tmp.b[0] != 1);
    425 }
    426 
    427 void VP8LConvertBGRAToRGB_C(const uint32_t* src,
    428                             int num_pixels, uint8_t* dst) {
    429   const uint32_t* const src_end = src + num_pixels;
    430   while (src < src_end) {
    431     const uint32_t argb = *src++;
    432     *dst++ = (argb >> 16) & 0xff;
    433     *dst++ = (argb >>  8) & 0xff;
    434     *dst++ = (argb >>  0) & 0xff;
    435   }
    436 }
    437 
    438 void VP8LConvertBGRAToRGBA_C(const uint32_t* src,
    439                              int num_pixels, uint8_t* dst) {
    440   const uint32_t* const src_end = src + num_pixels;
    441   while (src < src_end) {
    442     const uint32_t argb = *src++;
    443     *dst++ = (argb >> 16) & 0xff;
    444     *dst++ = (argb >>  8) & 0xff;
    445     *dst++ = (argb >>  0) & 0xff;
    446     *dst++ = (argb >> 24) & 0xff;
    447   }
    448 }
    449 
    450 void VP8LConvertBGRAToRGBA4444_C(const uint32_t* src,
    451                                  int num_pixels, uint8_t* dst) {
    452   const uint32_t* const src_end = src + num_pixels;
    453   while (src < src_end) {
    454     const uint32_t argb = *src++;
    455     const uint8_t rg = ((argb >> 16) & 0xf0) | ((argb >> 12) & 0xf);
    456     const uint8_t ba = ((argb >>  0) & 0xf0) | ((argb >> 28) & 0xf);
    457 #if (WEBP_SWAP_16BIT_CSP == 1)
    458     *dst++ = ba;
    459     *dst++ = rg;
    460 #else
    461     *dst++ = rg;
    462     *dst++ = ba;
    463 #endif
    464   }
    465 }
    466 
    467 void VP8LConvertBGRAToRGB565_C(const uint32_t* src,
    468                                int num_pixels, uint8_t* dst) {
    469   const uint32_t* const src_end = src + num_pixels;
    470   while (src < src_end) {
    471     const uint32_t argb = *src++;
    472     const uint8_t rg = ((argb >> 16) & 0xf8) | ((argb >> 13) & 0x7);
    473     const uint8_t gb = ((argb >>  5) & 0xe0) | ((argb >>  3) & 0x1f);
    474 #if (WEBP_SWAP_16BIT_CSP == 1)
    475     *dst++ = gb;
    476     *dst++ = rg;
    477 #else
    478     *dst++ = rg;
    479     *dst++ = gb;
    480 #endif
    481   }
    482 }
    483 
    484 void VP8LConvertBGRAToBGR_C(const uint32_t* src,
    485                             int num_pixels, uint8_t* dst) {
    486   const uint32_t* const src_end = src + num_pixels;
    487   while (src < src_end) {
    488     const uint32_t argb = *src++;
    489     *dst++ = (argb >>  0) & 0xff;
    490     *dst++ = (argb >>  8) & 0xff;
    491     *dst++ = (argb >> 16) & 0xff;
    492   }
    493 }
    494 
    495 static void CopyOrSwap(const uint32_t* src, int num_pixels, uint8_t* dst,
    496                        int swap_on_big_endian) {
    497   if (is_big_endian() == swap_on_big_endian) {
    498     const uint32_t* const src_end = src + num_pixels;
    499     while (src < src_end) {
    500       const uint32_t argb = *src++;
    501       WebPUint32ToMem(dst, BSwap32(argb));
    502       dst += sizeof(argb);
    503     }
    504   } else {
    505     memcpy(dst, src, num_pixels * sizeof(*src));
    506   }
    507 }
    508 
    509 void VP8LConvertFromBGRA(const uint32_t* const in_data, int num_pixels,
    510                          WEBP_CSP_MODE out_colorspace, uint8_t* const rgba) {
    511   switch (out_colorspace) {
    512     case MODE_RGB:
    513       VP8LConvertBGRAToRGB(in_data, num_pixels, rgba);
    514       break;
    515     case MODE_RGBA:
    516       VP8LConvertBGRAToRGBA(in_data, num_pixels, rgba);
    517       break;
    518     case MODE_rgbA:
    519       VP8LConvertBGRAToRGBA(in_data, num_pixels, rgba);
    520       WebPApplyAlphaMultiply(rgba, 0, num_pixels, 1, 0);
    521       break;
    522     case MODE_BGR:
    523       VP8LConvertBGRAToBGR(in_data, num_pixels, rgba);
    524       break;
    525     case MODE_BGRA:
    526       CopyOrSwap(in_data, num_pixels, rgba, 1);
    527       break;
    528     case MODE_bgrA:
    529       CopyOrSwap(in_data, num_pixels, rgba, 1);
    530       WebPApplyAlphaMultiply(rgba, 0, num_pixels, 1, 0);
    531       break;
    532     case MODE_ARGB:
    533       CopyOrSwap(in_data, num_pixels, rgba, 0);
    534       break;
    535     case MODE_Argb:
    536       CopyOrSwap(in_data, num_pixels, rgba, 0);
    537       WebPApplyAlphaMultiply(rgba, 1, num_pixels, 1, 0);
    538       break;
    539     case MODE_RGBA_4444:
    540       VP8LConvertBGRAToRGBA4444(in_data, num_pixels, rgba);
    541       break;
    542     case MODE_rgbA_4444:
    543       VP8LConvertBGRAToRGBA4444(in_data, num_pixels, rgba);
    544       WebPApplyAlphaMultiply4444(rgba, num_pixels, 1, 0);
    545       break;
    546     case MODE_RGB_565:
    547       VP8LConvertBGRAToRGB565(in_data, num_pixels, rgba);
    548       break;
    549     default:
    550       assert(0);          // Code flow should not reach here.
    551   }
    552 }
    553 
    554 //------------------------------------------------------------------------------
    555 
    556 VP8LProcessDecBlueAndRedFunc VP8LAddGreenToBlueAndRed;
    557 VP8LPredictorAddSubFunc VP8LPredictorsAdd[16];
    558 VP8LPredictorFunc VP8LPredictors[16];
    559 
    560 // exposed plain-C implementations
    561 VP8LPredictorAddSubFunc VP8LPredictorsAdd_C[16];
    562 VP8LPredictorFunc VP8LPredictors_C[16];
    563 
    564 VP8LTransformColorInverseFunc VP8LTransformColorInverse;
    565 
    566 VP8LConvertFunc VP8LConvertBGRAToRGB;
    567 VP8LConvertFunc VP8LConvertBGRAToRGBA;
    568 VP8LConvertFunc VP8LConvertBGRAToRGBA4444;
    569 VP8LConvertFunc VP8LConvertBGRAToRGB565;
    570 VP8LConvertFunc VP8LConvertBGRAToBGR;
    571 
    572 VP8LMapARGBFunc VP8LMapColor32b;
    573 VP8LMapAlphaFunc VP8LMapColor8b;
    574 
    575 extern void VP8LDspInitSSE2(void);
    576 extern void VP8LDspInitNEON(void);
    577 extern void VP8LDspInitMIPSdspR2(void);
    578 extern void VP8LDspInitMSA(void);
    579 
    580 static volatile VP8CPUInfo lossless_last_cpuinfo_used =
    581     (VP8CPUInfo)&lossless_last_cpuinfo_used;
    582 
    583 #define COPY_PREDICTOR_ARRAY(IN, OUT) do {                \
    584   (OUT)[0] = IN##0_C;                                     \
    585   (OUT)[1] = IN##1_C;                                     \
    586   (OUT)[2] = IN##2_C;                                     \
    587   (OUT)[3] = IN##3_C;                                     \
    588   (OUT)[4] = IN##4_C;                                     \
    589   (OUT)[5] = IN##5_C;                                     \
    590   (OUT)[6] = IN##6_C;                                     \
    591   (OUT)[7] = IN##7_C;                                     \
    592   (OUT)[8] = IN##8_C;                                     \
    593   (OUT)[9] = IN##9_C;                                     \
    594   (OUT)[10] = IN##10_C;                                   \
    595   (OUT)[11] = IN##11_C;                                   \
    596   (OUT)[12] = IN##12_C;                                   \
    597   (OUT)[13] = IN##13_C;                                   \
    598   (OUT)[14] = IN##0_C; /* <- padding security sentinels*/ \
    599   (OUT)[15] = IN##0_C;                                    \
    600 } while (0);
    601 
    602 WEBP_TSAN_IGNORE_FUNCTION void VP8LDspInit(void) {
    603   if (lossless_last_cpuinfo_used == VP8GetCPUInfo) return;
    604 
    605   COPY_PREDICTOR_ARRAY(Predictor, VP8LPredictors)
    606   COPY_PREDICTOR_ARRAY(Predictor, VP8LPredictors_C)
    607   COPY_PREDICTOR_ARRAY(PredictorAdd, VP8LPredictorsAdd)
    608   COPY_PREDICTOR_ARRAY(PredictorAdd, VP8LPredictorsAdd_C)
    609 
    610 #if !WEBP_NEON_OMIT_C_CODE
    611   VP8LAddGreenToBlueAndRed = VP8LAddGreenToBlueAndRed_C;
    612 
    613   VP8LTransformColorInverse = VP8LTransformColorInverse_C;
    614 
    615   VP8LConvertBGRAToRGBA = VP8LConvertBGRAToRGBA_C;
    616   VP8LConvertBGRAToRGB = VP8LConvertBGRAToRGB_C;
    617   VP8LConvertBGRAToBGR = VP8LConvertBGRAToBGR_C;
    618 #endif
    619 
    620   VP8LConvertBGRAToRGBA4444 = VP8LConvertBGRAToRGBA4444_C;
    621   VP8LConvertBGRAToRGB565 = VP8LConvertBGRAToRGB565_C;
    622 
    623   VP8LMapColor32b = MapARGB_C;
    624   VP8LMapColor8b = MapAlpha_C;
    625 
    626   // If defined, use CPUInfo() to overwrite some pointers with faster versions.
    627   if (VP8GetCPUInfo != NULL) {
    628 #if defined(WEBP_USE_SSE2)
    629     if (VP8GetCPUInfo(kSSE2)) {
    630       VP8LDspInitSSE2();
    631     }
    632 #endif
    633 #if defined(WEBP_USE_MIPS_DSP_R2)
    634     if (VP8GetCPUInfo(kMIPSdspR2)) {
    635       VP8LDspInitMIPSdspR2();
    636     }
    637 #endif
    638 #if defined(WEBP_USE_MSA)
    639     if (VP8GetCPUInfo(kMSA)) {
    640       VP8LDspInitMSA();
    641     }
    642 #endif
    643   }
    644 
    645 #if defined(WEBP_USE_NEON)
    646   if (WEBP_NEON_OMIT_C_CODE ||
    647       (VP8GetCPUInfo != NULL && VP8GetCPUInfo(kNEON))) {
    648     VP8LDspInitNEON();
    649   }
    650 #endif
    651 
    652   assert(VP8LAddGreenToBlueAndRed != NULL);
    653   assert(VP8LTransformColorInverse != NULL);
    654   assert(VP8LConvertBGRAToRGBA != NULL);
    655   assert(VP8LConvertBGRAToRGB != NULL);
    656   assert(VP8LConvertBGRAToBGR != NULL);
    657   assert(VP8LConvertBGRAToRGBA4444 != NULL);
    658   assert(VP8LConvertBGRAToRGB565 != NULL);
    659   assert(VP8LMapColor32b != NULL);
    660   assert(VP8LMapColor8b != NULL);
    661 
    662   lossless_last_cpuinfo_used = VP8GetCPUInfo;
    663 }
    664 #undef COPY_PREDICTOR_ARRAY
    665 
    666 //------------------------------------------------------------------------------
    667