Home | History | Annotate | Download | only in dsp
      1 // Copyright 2012 Google Inc. All Rights Reserved.
      2 //
      3 // Use of this source code is governed by a BSD-style license
      4 // that can be found in the COPYING file in the root of the source
      5 // tree. An additional intellectual property rights grant can be found
      6 // in the file PATENTS. All contributing project authors may
      7 // be found in the AUTHORS file in the root of the source tree.
      8 // -----------------------------------------------------------------------------
      9 //
     10 // Image transforms and color space conversion methods for lossless decoder.
     11 //
     12 // Authors: Vikas Arora (vikaas.arora (at) gmail.com)
     13 //          Jyrki Alakuijala (jyrki (at) google.com)
     14 //          Urvang Joshi (urvang (at) google.com)
     15 
     16 #include "./dsp.h"
     17 
     18 #include <math.h>
     19 #include <stdlib.h>
     20 #include "../dec/vp8li_dec.h"
     21 #include "../utils/endian_inl_utils.h"
     22 #include "./lossless.h"
     23 #include "./lossless_common.h"
     24 
     25 #define MAX_DIFF_COST (1e30f)
     26 
     27 //------------------------------------------------------------------------------
     28 // Image transforms.
     29 
     30 static WEBP_INLINE uint32_t Average2(uint32_t a0, uint32_t a1) {
     31   return (((a0 ^ a1) & 0xfefefefeu) >> 1) + (a0 & a1);
     32 }
     33 
     34 static WEBP_INLINE uint32_t Average3(uint32_t a0, uint32_t a1, uint32_t a2) {
     35   return Average2(Average2(a0, a2), a1);
     36 }
     37 
     38 static WEBP_INLINE uint32_t Average4(uint32_t a0, uint32_t a1,
     39                                      uint32_t a2, uint32_t a3) {
     40   return Average2(Average2(a0, a1), Average2(a2, a3));
     41 }
     42 
     43 static WEBP_INLINE uint32_t Clip255(uint32_t a) {
     44   if (a < 256) {
     45     return a;
     46   }
     47   // return 0, when a is a negative integer.
     48   // return 255, when a is positive.
     49   return ~a >> 24;
     50 }
     51 
     52 static WEBP_INLINE int AddSubtractComponentFull(int a, int b, int c) {
     53   return Clip255(a + b - c);
     54 }
     55 
     56 static WEBP_INLINE uint32_t ClampedAddSubtractFull(uint32_t c0, uint32_t c1,
     57                                                    uint32_t c2) {
     58   const int a = AddSubtractComponentFull(c0 >> 24, c1 >> 24, c2 >> 24);
     59   const int r = AddSubtractComponentFull((c0 >> 16) & 0xff,
     60                                          (c1 >> 16) & 0xff,
     61                                          (c2 >> 16) & 0xff);
     62   const int g = AddSubtractComponentFull((c0 >> 8) & 0xff,
     63                                          (c1 >> 8) & 0xff,
     64                                          (c2 >> 8) & 0xff);
     65   const int b = AddSubtractComponentFull(c0 & 0xff, c1 & 0xff, c2 & 0xff);
     66   return ((uint32_t)a << 24) | (r << 16) | (g << 8) | b;
     67 }
     68 
     69 static WEBP_INLINE int AddSubtractComponentHalf(int a, int b) {
     70   return Clip255(a + (a - b) / 2);
     71 }
     72 
     73 static WEBP_INLINE uint32_t ClampedAddSubtractHalf(uint32_t c0, uint32_t c1,
     74                                                    uint32_t c2) {
     75   const uint32_t ave = Average2(c0, c1);
     76   const int a = AddSubtractComponentHalf(ave >> 24, c2 >> 24);
     77   const int r = AddSubtractComponentHalf((ave >> 16) & 0xff, (c2 >> 16) & 0xff);
     78   const int g = AddSubtractComponentHalf((ave >> 8) & 0xff, (c2 >> 8) & 0xff);
     79   const int b = AddSubtractComponentHalf((ave >> 0) & 0xff, (c2 >> 0) & 0xff);
     80   return ((uint32_t)a << 24) | (r << 16) | (g << 8) | b;
     81 }
     82 
     83 // gcc-4.9 on ARM generates incorrect code in Select() when Sub3() is inlined.
     84 #if defined(__arm__) && LOCAL_GCC_VERSION == 0x409
     85 # define LOCAL_INLINE __attribute__ ((noinline))
     86 #else
     87 # define LOCAL_INLINE WEBP_INLINE
     88 #endif
     89 
     90 static LOCAL_INLINE int Sub3(int a, int b, int c) {
     91   const int pb = b - c;
     92   const int pa = a - c;
     93   return abs(pb) - abs(pa);
     94 }
     95 
     96 #undef LOCAL_INLINE
     97 
     98 static WEBP_INLINE uint32_t Select(uint32_t a, uint32_t b, uint32_t c) {
     99   const int pa_minus_pb =
    100       Sub3((a >> 24)       , (b >> 24)       , (c >> 24)       ) +
    101       Sub3((a >> 16) & 0xff, (b >> 16) & 0xff, (c >> 16) & 0xff) +
    102       Sub3((a >>  8) & 0xff, (b >>  8) & 0xff, (c >>  8) & 0xff) +
    103       Sub3((a      ) & 0xff, (b      ) & 0xff, (c      ) & 0xff);
    104   return (pa_minus_pb <= 0) ? a : b;
    105 }
    106 
    107 //------------------------------------------------------------------------------
    108 // Predictors
    109 
    110 static uint32_t Predictor0(uint32_t left, const uint32_t* const top) {
    111   (void)top;
    112   (void)left;
    113   return ARGB_BLACK;
    114 }
    115 static uint32_t Predictor1(uint32_t left, const uint32_t* const top) {
    116   (void)top;
    117   return left;
    118 }
    119 static uint32_t Predictor2(uint32_t left, const uint32_t* const top) {
    120   (void)left;
    121   return top[0];
    122 }
    123 static uint32_t Predictor3(uint32_t left, const uint32_t* const top) {
    124   (void)left;
    125   return top[1];
    126 }
    127 static uint32_t Predictor4(uint32_t left, const uint32_t* const top) {
    128   (void)left;
    129   return top[-1];
    130 }
    131 static uint32_t Predictor5(uint32_t left, const uint32_t* const top) {
    132   const uint32_t pred = Average3(left, top[0], top[1]);
    133   return pred;
    134 }
    135 static uint32_t Predictor6(uint32_t left, const uint32_t* const top) {
    136   const uint32_t pred = Average2(left, top[-1]);
    137   return pred;
    138 }
    139 static uint32_t Predictor7(uint32_t left, const uint32_t* const top) {
    140   const uint32_t pred = Average2(left, top[0]);
    141   return pred;
    142 }
    143 static uint32_t Predictor8(uint32_t left, const uint32_t* const top) {
    144   const uint32_t pred = Average2(top[-1], top[0]);
    145   (void)left;
    146   return pred;
    147 }
    148 static uint32_t Predictor9(uint32_t left, const uint32_t* const top) {
    149   const uint32_t pred = Average2(top[0], top[1]);
    150   (void)left;
    151   return pred;
    152 }
    153 static uint32_t Predictor10(uint32_t left, const uint32_t* const top) {
    154   const uint32_t pred = Average4(left, top[-1], top[0], top[1]);
    155   return pred;
    156 }
    157 static uint32_t Predictor11(uint32_t left, const uint32_t* const top) {
    158   const uint32_t pred = Select(top[0], left, top[-1]);
    159   return pred;
    160 }
    161 static uint32_t Predictor12(uint32_t left, const uint32_t* const top) {
    162   const uint32_t pred = ClampedAddSubtractFull(left, top[0], top[-1]);
    163   return pred;
    164 }
    165 static uint32_t Predictor13(uint32_t left, const uint32_t* const top) {
    166   const uint32_t pred = ClampedAddSubtractHalf(left, top[0], top[-1]);
    167   return pred;
    168 }
    169 
    170 GENERATE_PREDICTOR_ADD(Predictor0, PredictorAdd0)
    171 static void PredictorAdd1(const uint32_t* in, const uint32_t* upper,
    172                           int num_pixels, uint32_t* out) {
    173   int i;
    174   uint32_t left = out[-1];
    175   for (i = 0; i < num_pixels; ++i) {
    176     out[i] = left = VP8LAddPixels(in[i], left);
    177   }
    178   (void)upper;
    179 }
    180 GENERATE_PREDICTOR_ADD(Predictor2, PredictorAdd2)
    181 GENERATE_PREDICTOR_ADD(Predictor3, PredictorAdd3)
    182 GENERATE_PREDICTOR_ADD(Predictor4, PredictorAdd4)
    183 GENERATE_PREDICTOR_ADD(Predictor5, PredictorAdd5)
    184 GENERATE_PREDICTOR_ADD(Predictor6, PredictorAdd6)
    185 GENERATE_PREDICTOR_ADD(Predictor7, PredictorAdd7)
    186 GENERATE_PREDICTOR_ADD(Predictor8, PredictorAdd8)
    187 GENERATE_PREDICTOR_ADD(Predictor9, PredictorAdd9)
    188 GENERATE_PREDICTOR_ADD(Predictor10, PredictorAdd10)
    189 GENERATE_PREDICTOR_ADD(Predictor11, PredictorAdd11)
    190 GENERATE_PREDICTOR_ADD(Predictor12, PredictorAdd12)
    191 GENERATE_PREDICTOR_ADD(Predictor13, PredictorAdd13)
    192 
    193 //------------------------------------------------------------------------------
    194 
    195 // Inverse prediction.
    196 static void PredictorInverseTransform(const VP8LTransform* const transform,
    197                                       int y_start, int y_end,
    198                                       const uint32_t* in, uint32_t* out) {
    199   const int width = transform->xsize_;
    200   if (y_start == 0) {  // First Row follows the L (mode=1) mode.
    201     PredictorAdd0(in, NULL, 1, out);
    202     PredictorAdd1(in + 1, NULL, width - 1, out + 1);
    203     in += width;
    204     out += width;
    205     ++y_start;
    206   }
    207 
    208   {
    209     int y = y_start;
    210     const int tile_width = 1 << transform->bits_;
    211     const int mask = tile_width - 1;
    212     const int tiles_per_row = VP8LSubSampleSize(width, transform->bits_);
    213     const uint32_t* pred_mode_base =
    214         transform->data_ + (y >> transform->bits_) * tiles_per_row;
    215 
    216     while (y < y_end) {
    217       const uint32_t* pred_mode_src = pred_mode_base;
    218       int x = 1;
    219       // First pixel follows the T (mode=2) mode.
    220       PredictorAdd2(in, out - width, 1, out);
    221       // .. the rest:
    222       while (x < width) {
    223         const VP8LPredictorAddSubFunc pred_func =
    224             VP8LPredictorsAdd[((*pred_mode_src++) >> 8) & 0xf];
    225         int x_end = (x & ~mask) + tile_width;
    226         if (x_end > width) x_end = width;
    227         pred_func(in + x, out + x - width, x_end - x, out + x);
    228         x = x_end;
    229       }
    230       in += width;
    231       out += width;
    232       ++y;
    233       if ((y & mask) == 0) {   // Use the same mask, since tiles are squares.
    234         pred_mode_base += tiles_per_row;
    235       }
    236     }
    237   }
    238 }
    239 
    240 // Add green to blue and red channels (i.e. perform the inverse transform of
    241 // 'subtract green').
    242 void VP8LAddGreenToBlueAndRed_C(const uint32_t* src, int num_pixels,
    243                                 uint32_t* dst) {
    244   int i;
    245   for (i = 0; i < num_pixels; ++i) {
    246     const uint32_t argb = src[i];
    247     const uint32_t green = ((argb >> 8) & 0xff);
    248     uint32_t red_blue = (argb & 0x00ff00ffu);
    249     red_blue += (green << 16) | green;
    250     red_blue &= 0x00ff00ffu;
    251     dst[i] = (argb & 0xff00ff00u) | red_blue;
    252   }
    253 }
    254 
    255 static WEBP_INLINE int ColorTransformDelta(int8_t color_pred,
    256                                            int8_t color) {
    257   return ((int)color_pred * color) >> 5;
    258 }
    259 
    260 static WEBP_INLINE void ColorCodeToMultipliers(uint32_t color_code,
    261                                                VP8LMultipliers* const m) {
    262   m->green_to_red_  = (color_code >>  0) & 0xff;
    263   m->green_to_blue_ = (color_code >>  8) & 0xff;
    264   m->red_to_blue_   = (color_code >> 16) & 0xff;
    265 }
    266 
    267 void VP8LTransformColorInverse_C(const VP8LMultipliers* const m,
    268                                  const uint32_t* src, int num_pixels,
    269                                  uint32_t* dst) {
    270   int i;
    271   for (i = 0; i < num_pixels; ++i) {
    272     const uint32_t argb = src[i];
    273     const uint32_t green = argb >> 8;
    274     const uint32_t red = argb >> 16;
    275     int new_red = red;
    276     int new_blue = argb;
    277     new_red += ColorTransformDelta(m->green_to_red_, green);
    278     new_red &= 0xff;
    279     new_blue += ColorTransformDelta(m->green_to_blue_, green);
    280     new_blue += ColorTransformDelta(m->red_to_blue_, new_red);
    281     new_blue &= 0xff;
    282     dst[i] = (argb & 0xff00ff00u) | (new_red << 16) | (new_blue);
    283   }
    284 }
    285 
    286 // Color space inverse transform.
    287 static void ColorSpaceInverseTransform(const VP8LTransform* const transform,
    288                                        int y_start, int y_end,
    289                                        const uint32_t* src, uint32_t* dst) {
    290   const int width = transform->xsize_;
    291   const int tile_width = 1 << transform->bits_;
    292   const int mask = tile_width - 1;
    293   const int safe_width = width & ~mask;
    294   const int remaining_width = width - safe_width;
    295   const int tiles_per_row = VP8LSubSampleSize(width, transform->bits_);
    296   int y = y_start;
    297   const uint32_t* pred_row =
    298       transform->data_ + (y >> transform->bits_) * tiles_per_row;
    299 
    300   while (y < y_end) {
    301     const uint32_t* pred = pred_row;
    302     VP8LMultipliers m = { 0, 0, 0 };
    303     const uint32_t* const src_safe_end = src + safe_width;
    304     const uint32_t* const src_end = src + width;
    305     while (src < src_safe_end) {
    306       ColorCodeToMultipliers(*pred++, &m);
    307       VP8LTransformColorInverse(&m, src, tile_width, dst);
    308       src += tile_width;
    309       dst += tile_width;
    310     }
    311     if (src < src_end) {  // Left-overs using C-version.
    312       ColorCodeToMultipliers(*pred++, &m);
    313       VP8LTransformColorInverse(&m, src, remaining_width, dst);
    314       src += remaining_width;
    315       dst += remaining_width;
    316     }
    317     ++y;
    318     if ((y & mask) == 0) pred_row += tiles_per_row;
    319   }
    320 }
    321 
    322 // Separate out pixels packed together using pixel-bundling.
    323 // We define two methods for ARGB data (uint32_t) and alpha-only data (uint8_t).
    324 #define COLOR_INDEX_INVERSE(FUNC_NAME, F_NAME, STATIC_DECL, TYPE, BIT_SUFFIX,  \
    325                             GET_INDEX, GET_VALUE)                              \
    326 static void F_NAME(const TYPE* src, const uint32_t* const color_map,           \
    327                    TYPE* dst, int y_start, int y_end, int width) {             \
    328   int y;                                                                       \
    329   for (y = y_start; y < y_end; ++y) {                                          \
    330     int x;                                                                     \
    331     for (x = 0; x < width; ++x) {                                              \
    332       *dst++ = GET_VALUE(color_map[GET_INDEX(*src++)]);                        \
    333     }                                                                          \
    334   }                                                                            \
    335 }                                                                              \
    336 STATIC_DECL void FUNC_NAME(const VP8LTransform* const transform,               \
    337                            int y_start, int y_end, const TYPE* src,            \
    338                            TYPE* dst) {                                        \
    339   int y;                                                                       \
    340   const int bits_per_pixel = 8 >> transform->bits_;                            \
    341   const int width = transform->xsize_;                                         \
    342   const uint32_t* const color_map = transform->data_;                          \
    343   if (bits_per_pixel < 8) {                                                    \
    344     const int pixels_per_byte = 1 << transform->bits_;                         \
    345     const int count_mask = pixels_per_byte - 1;                                \
    346     const uint32_t bit_mask = (1 << bits_per_pixel) - 1;                       \
    347     for (y = y_start; y < y_end; ++y) {                                        \
    348       uint32_t packed_pixels = 0;                                              \
    349       int x;                                                                   \
    350       for (x = 0; x < width; ++x) {                                            \
    351         /* We need to load fresh 'packed_pixels' once every                */  \
    352         /* 'pixels_per_byte' increments of x. Fortunately, pixels_per_byte */  \
    353         /* is a power of 2, so can just use a mask for that, instead of    */  \
    354         /* decrementing a counter.                                         */  \
    355         if ((x & count_mask) == 0) packed_pixels = GET_INDEX(*src++);          \
    356         *dst++ = GET_VALUE(color_map[packed_pixels & bit_mask]);               \
    357         packed_pixels >>= bits_per_pixel;                                      \
    358       }                                                                        \
    359     }                                                                          \
    360   } else {                                                                     \
    361     VP8LMapColor##BIT_SUFFIX(src, color_map, dst, y_start, y_end, width);      \
    362   }                                                                            \
    363 }
    364 
    365 COLOR_INDEX_INVERSE(ColorIndexInverseTransform, MapARGB, static, uint32_t, 32b,
    366                     VP8GetARGBIndex, VP8GetARGBValue)
    367 COLOR_INDEX_INVERSE(VP8LColorIndexInverseTransformAlpha, MapAlpha, , uint8_t,
    368                     8b, VP8GetAlphaIndex, VP8GetAlphaValue)
    369 
    370 #undef COLOR_INDEX_INVERSE
    371 
    372 void VP8LInverseTransform(const VP8LTransform* const transform,
    373                           int row_start, int row_end,
    374                           const uint32_t* const in, uint32_t* const out) {
    375   const int width = transform->xsize_;
    376   assert(row_start < row_end);
    377   assert(row_end <= transform->ysize_);
    378   switch (transform->type_) {
    379     case SUBTRACT_GREEN:
    380       VP8LAddGreenToBlueAndRed(in, (row_end - row_start) * width, out);
    381       break;
    382     case PREDICTOR_TRANSFORM:
    383       PredictorInverseTransform(transform, row_start, row_end, in, out);
    384       if (row_end != transform->ysize_) {
    385         // The last predicted row in this iteration will be the top-pred row
    386         // for the first row in next iteration.
    387         memcpy(out - width, out + (row_end - row_start - 1) * width,
    388                width * sizeof(*out));
    389       }
    390       break;
    391     case CROSS_COLOR_TRANSFORM:
    392       ColorSpaceInverseTransform(transform, row_start, row_end, in, out);
    393       break;
    394     case COLOR_INDEXING_TRANSFORM:
    395       if (in == out && transform->bits_ > 0) {
    396         // Move packed pixels to the end of unpacked region, so that unpacking
    397         // can occur seamlessly.
    398         // Also, note that this is the only transform that applies on
    399         // the effective width of VP8LSubSampleSize(xsize_, bits_). All other
    400         // transforms work on effective width of xsize_.
    401         const int out_stride = (row_end - row_start) * width;
    402         const int in_stride = (row_end - row_start) *
    403             VP8LSubSampleSize(transform->xsize_, transform->bits_);
    404         uint32_t* const src = out + out_stride - in_stride;
    405         memmove(src, out, in_stride * sizeof(*src));
    406         ColorIndexInverseTransform(transform, row_start, row_end, src, out);
    407       } else {
    408         ColorIndexInverseTransform(transform, row_start, row_end, in, out);
    409       }
    410       break;
    411   }
    412 }
    413 
    414 //------------------------------------------------------------------------------
    415 // Color space conversion.
    416 
    417 static int is_big_endian(void) {
    418   static const union {
    419     uint16_t w;
    420     uint8_t b[2];
    421   } tmp = { 1 };
    422   return (tmp.b[0] != 1);
    423 }
    424 
    425 void VP8LConvertBGRAToRGB_C(const uint32_t* src,
    426                             int num_pixels, uint8_t* dst) {
    427   const uint32_t* const src_end = src + num_pixels;
    428   while (src < src_end) {
    429     const uint32_t argb = *src++;
    430     *dst++ = (argb >> 16) & 0xff;
    431     *dst++ = (argb >>  8) & 0xff;
    432     *dst++ = (argb >>  0) & 0xff;
    433   }
    434 }
    435 
    436 void VP8LConvertBGRAToRGBA_C(const uint32_t* src,
    437                              int num_pixels, uint8_t* dst) {
    438   const uint32_t* const src_end = src + num_pixels;
    439   while (src < src_end) {
    440     const uint32_t argb = *src++;
    441     *dst++ = (argb >> 16) & 0xff;
    442     *dst++ = (argb >>  8) & 0xff;
    443     *dst++ = (argb >>  0) & 0xff;
    444     *dst++ = (argb >> 24) & 0xff;
    445   }
    446 }
    447 
    448 void VP8LConvertBGRAToRGBA4444_C(const uint32_t* src,
    449                                  int num_pixels, uint8_t* dst) {
    450   const uint32_t* const src_end = src + num_pixels;
    451   while (src < src_end) {
    452     const uint32_t argb = *src++;
    453     const uint8_t rg = ((argb >> 16) & 0xf0) | ((argb >> 12) & 0xf);
    454     const uint8_t ba = ((argb >>  0) & 0xf0) | ((argb >> 28) & 0xf);
    455 #ifdef WEBP_SWAP_16BIT_CSP
    456     *dst++ = ba;
    457     *dst++ = rg;
    458 #else
    459     *dst++ = rg;
    460     *dst++ = ba;
    461 #endif
    462   }
    463 }
    464 
    465 void VP8LConvertBGRAToRGB565_C(const uint32_t* src,
    466                                int num_pixels, uint8_t* dst) {
    467   const uint32_t* const src_end = src + num_pixels;
    468   while (src < src_end) {
    469     const uint32_t argb = *src++;
    470     const uint8_t rg = ((argb >> 16) & 0xf8) | ((argb >> 13) & 0x7);
    471     const uint8_t gb = ((argb >>  5) & 0xe0) | ((argb >>  3) & 0x1f);
    472 #ifdef WEBP_SWAP_16BIT_CSP
    473     *dst++ = gb;
    474     *dst++ = rg;
    475 #else
    476     *dst++ = rg;
    477     *dst++ = gb;
    478 #endif
    479   }
    480 }
    481 
    482 void VP8LConvertBGRAToBGR_C(const uint32_t* src,
    483                             int num_pixels, uint8_t* dst) {
    484   const uint32_t* const src_end = src + num_pixels;
    485   while (src < src_end) {
    486     const uint32_t argb = *src++;
    487     *dst++ = (argb >>  0) & 0xff;
    488     *dst++ = (argb >>  8) & 0xff;
    489     *dst++ = (argb >> 16) & 0xff;
    490   }
    491 }
    492 
    493 static void CopyOrSwap(const uint32_t* src, int num_pixels, uint8_t* dst,
    494                        int swap_on_big_endian) {
    495   if (is_big_endian() == swap_on_big_endian) {
    496     const uint32_t* const src_end = src + num_pixels;
    497     while (src < src_end) {
    498       const uint32_t argb = *src++;
    499 
    500 #if !defined(WORDS_BIGENDIAN)
    501 #if !defined(WEBP_REFERENCE_IMPLEMENTATION)
    502       WebPUint32ToMem(dst, BSwap32(argb));
    503 #else  // WEBP_REFERENCE_IMPLEMENTATION
    504       dst[0] = (argb >> 24) & 0xff;
    505       dst[1] = (argb >> 16) & 0xff;
    506       dst[2] = (argb >>  8) & 0xff;
    507       dst[3] = (argb >>  0) & 0xff;
    508 #endif
    509 #else  // WORDS_BIGENDIAN
    510       dst[0] = (argb >>  0) & 0xff;
    511       dst[1] = (argb >>  8) & 0xff;
    512       dst[2] = (argb >> 16) & 0xff;
    513       dst[3] = (argb >> 24) & 0xff;
    514 #endif
    515       dst += sizeof(argb);
    516     }
    517   } else {
    518     memcpy(dst, src, num_pixels * sizeof(*src));
    519   }
    520 }
    521 
    522 void VP8LConvertFromBGRA(const uint32_t* const in_data, int num_pixels,
    523                          WEBP_CSP_MODE out_colorspace, uint8_t* const rgba) {
    524   switch (out_colorspace) {
    525     case MODE_RGB:
    526       VP8LConvertBGRAToRGB(in_data, num_pixels, rgba);
    527       break;
    528     case MODE_RGBA:
    529       VP8LConvertBGRAToRGBA(in_data, num_pixels, rgba);
    530       break;
    531     case MODE_rgbA:
    532       VP8LConvertBGRAToRGBA(in_data, num_pixels, rgba);
    533       WebPApplyAlphaMultiply(rgba, 0, num_pixels, 1, 0);
    534       break;
    535     case MODE_BGR:
    536       VP8LConvertBGRAToBGR(in_data, num_pixels, rgba);
    537       break;
    538     case MODE_BGRA:
    539       CopyOrSwap(in_data, num_pixels, rgba, 1);
    540       break;
    541     case MODE_bgrA:
    542       CopyOrSwap(in_data, num_pixels, rgba, 1);
    543       WebPApplyAlphaMultiply(rgba, 0, num_pixels, 1, 0);
    544       break;
    545     case MODE_ARGB:
    546       CopyOrSwap(in_data, num_pixels, rgba, 0);
    547       break;
    548     case MODE_Argb:
    549       CopyOrSwap(in_data, num_pixels, rgba, 0);
    550       WebPApplyAlphaMultiply(rgba, 1, num_pixels, 1, 0);
    551       break;
    552     case MODE_RGBA_4444:
    553       VP8LConvertBGRAToRGBA4444(in_data, num_pixels, rgba);
    554       break;
    555     case MODE_rgbA_4444:
    556       VP8LConvertBGRAToRGBA4444(in_data, num_pixels, rgba);
    557       WebPApplyAlphaMultiply4444(rgba, num_pixels, 1, 0);
    558       break;
    559     case MODE_RGB_565:
    560       VP8LConvertBGRAToRGB565(in_data, num_pixels, rgba);
    561       break;
    562     default:
    563       assert(0);          // Code flow should not reach here.
    564   }
    565 }
    566 
    567 //------------------------------------------------------------------------------
    568 
    569 VP8LProcessDecBlueAndRedFunc VP8LAddGreenToBlueAndRed;
    570 VP8LPredictorAddSubFunc VP8LPredictorsAdd[16];
    571 VP8LPredictorFunc VP8LPredictors[16];
    572 
    573 // exposed plain-C implementations
    574 VP8LPredictorAddSubFunc VP8LPredictorsAdd_C[16];
    575 VP8LPredictorFunc VP8LPredictors_C[16];
    576 
    577 VP8LTransformColorInverseFunc VP8LTransformColorInverse;
    578 
    579 VP8LConvertFunc VP8LConvertBGRAToRGB;
    580 VP8LConvertFunc VP8LConvertBGRAToRGBA;
    581 VP8LConvertFunc VP8LConvertBGRAToRGBA4444;
    582 VP8LConvertFunc VP8LConvertBGRAToRGB565;
    583 VP8LConvertFunc VP8LConvertBGRAToBGR;
    584 
    585 VP8LMapARGBFunc VP8LMapColor32b;
    586 VP8LMapAlphaFunc VP8LMapColor8b;
    587 
    588 extern void VP8LDspInitSSE2(void);
    589 extern void VP8LDspInitNEON(void);
    590 extern void VP8LDspInitMIPSdspR2(void);
    591 extern void VP8LDspInitMSA(void);
    592 
    593 static volatile VP8CPUInfo lossless_last_cpuinfo_used =
    594     (VP8CPUInfo)&lossless_last_cpuinfo_used;
    595 
    596 #define COPY_PREDICTOR_ARRAY(IN, OUT) do {              \
    597   (OUT)[0] = IN##0;                                     \
    598   (OUT)[1] = IN##1;                                     \
    599   (OUT)[2] = IN##2;                                     \
    600   (OUT)[3] = IN##3;                                     \
    601   (OUT)[4] = IN##4;                                     \
    602   (OUT)[5] = IN##5;                                     \
    603   (OUT)[6] = IN##6;                                     \
    604   (OUT)[7] = IN##7;                                     \
    605   (OUT)[8] = IN##8;                                     \
    606   (OUT)[9] = IN##9;                                     \
    607   (OUT)[10] = IN##10;                                   \
    608   (OUT)[11] = IN##11;                                   \
    609   (OUT)[12] = IN##12;                                   \
    610   (OUT)[13] = IN##13;                                   \
    611   (OUT)[14] = IN##0; /* <- padding security sentinels*/ \
    612   (OUT)[15] = IN##0;                                    \
    613 } while (0);
    614 
    615 WEBP_TSAN_IGNORE_FUNCTION void VP8LDspInit(void) {
    616   if (lossless_last_cpuinfo_used == VP8GetCPUInfo) return;
    617 
    618   COPY_PREDICTOR_ARRAY(Predictor, VP8LPredictors)
    619   COPY_PREDICTOR_ARRAY(Predictor, VP8LPredictors_C)
    620   COPY_PREDICTOR_ARRAY(PredictorAdd, VP8LPredictorsAdd)
    621   COPY_PREDICTOR_ARRAY(PredictorAdd, VP8LPredictorsAdd_C)
    622 
    623   VP8LAddGreenToBlueAndRed = VP8LAddGreenToBlueAndRed_C;
    624 
    625   VP8LTransformColorInverse = VP8LTransformColorInverse_C;
    626 
    627   VP8LConvertBGRAToRGB = VP8LConvertBGRAToRGB_C;
    628   VP8LConvertBGRAToRGBA = VP8LConvertBGRAToRGBA_C;
    629   VP8LConvertBGRAToRGBA4444 = VP8LConvertBGRAToRGBA4444_C;
    630   VP8LConvertBGRAToRGB565 = VP8LConvertBGRAToRGB565_C;
    631   VP8LConvertBGRAToBGR = VP8LConvertBGRAToBGR_C;
    632 
    633   VP8LMapColor32b = MapARGB;
    634   VP8LMapColor8b = MapAlpha;
    635 
    636   // If defined, use CPUInfo() to overwrite some pointers with faster versions.
    637   if (VP8GetCPUInfo != NULL) {
    638 #if defined(WEBP_USE_SSE2)
    639     if (VP8GetCPUInfo(kSSE2)) {
    640       VP8LDspInitSSE2();
    641     }
    642 #endif
    643 #if defined(WEBP_USE_NEON)
    644     if (VP8GetCPUInfo(kNEON)) {
    645       VP8LDspInitNEON();
    646     }
    647 #endif
    648 #if defined(WEBP_USE_MIPS_DSP_R2)
    649     if (VP8GetCPUInfo(kMIPSdspR2)) {
    650       VP8LDspInitMIPSdspR2();
    651     }
    652 #endif
    653 #if defined(WEBP_USE_MSA)
    654     if (VP8GetCPUInfo(kMSA)) {
    655       VP8LDspInitMSA();
    656     }
    657 #endif
    658   }
    659   lossless_last_cpuinfo_used = VP8GetCPUInfo;
    660 }
    661 #undef COPY_PREDICTOR_ARRAY
    662 
    663 //------------------------------------------------------------------------------
    664