Home | History | Annotate | Download | only in dsp
      1 // Copyright 2012 Google Inc. All Rights Reserved.
      2 //
      3 // Use of this source code is governed by a BSD-style license
      4 // that can be found in the COPYING file in the root of the source
      5 // tree. An additional intellectual property rights grant can be found
      6 // in the file PATENTS. All contributing project authors may
      7 // be found in the AUTHORS file in the root of the source tree.
      8 // -----------------------------------------------------------------------------
      9 //
     10 // Image transforms and color space conversion methods for lossless decoder.
     11 //
     12 // Authors: Vikas Arora (vikaas.arora (at) gmail.com)
     13 //          Jyrki Alakuijala (jyrki (at) google.com)
     14 //          Urvang Joshi (urvang (at) google.com)
     15 
     16 #include "./dsp.h"
     17 
     18 #include <math.h>
     19 #include <stdlib.h>
     20 #include "../dec/vp8li.h"
     21 #include "../utils/endian_inl.h"
     22 #include "./lossless.h"
     23 
     24 #define MAX_DIFF_COST (1e30f)
     25 
     26 //------------------------------------------------------------------------------
     27 // Image transforms.
     28 
     29 // In-place sum of each component with mod 256.
     30 static WEBP_INLINE void AddPixelsEq(uint32_t* a, uint32_t b) {
     31   const uint32_t alpha_and_green = (*a & 0xff00ff00u) + (b & 0xff00ff00u);
     32   const uint32_t red_and_blue = (*a & 0x00ff00ffu) + (b & 0x00ff00ffu);
     33   *a = (alpha_and_green & 0xff00ff00u) | (red_and_blue & 0x00ff00ffu);
     34 }
     35 
     36 static WEBP_INLINE uint32_t Average2(uint32_t a0, uint32_t a1) {
     37   return (((a0 ^ a1) & 0xfefefefeu) >> 1) + (a0 & a1);
     38 }
     39 
     40 static WEBP_INLINE uint32_t Average3(uint32_t a0, uint32_t a1, uint32_t a2) {
     41   return Average2(Average2(a0, a2), a1);
     42 }
     43 
     44 static WEBP_INLINE uint32_t Average4(uint32_t a0, uint32_t a1,
     45                                      uint32_t a2, uint32_t a3) {
     46   return Average2(Average2(a0, a1), Average2(a2, a3));
     47 }
     48 
     49 static WEBP_INLINE uint32_t Clip255(uint32_t a) {
     50   if (a < 256) {
     51     return a;
     52   }
     53   // return 0, when a is a negative integer.
     54   // return 255, when a is positive.
     55   return ~a >> 24;
     56 }
     57 
     58 static WEBP_INLINE int AddSubtractComponentFull(int a, int b, int c) {
     59   return Clip255(a + b - c);
     60 }
     61 
     62 static WEBP_INLINE uint32_t ClampedAddSubtractFull(uint32_t c0, uint32_t c1,
     63                                                    uint32_t c2) {
     64   const int a = AddSubtractComponentFull(c0 >> 24, c1 >> 24, c2 >> 24);
     65   const int r = AddSubtractComponentFull((c0 >> 16) & 0xff,
     66                                          (c1 >> 16) & 0xff,
     67                                          (c2 >> 16) & 0xff);
     68   const int g = AddSubtractComponentFull((c0 >> 8) & 0xff,
     69                                          (c1 >> 8) & 0xff,
     70                                          (c2 >> 8) & 0xff);
     71   const int b = AddSubtractComponentFull(c0 & 0xff, c1 & 0xff, c2 & 0xff);
     72   return ((uint32_t)a << 24) | (r << 16) | (g << 8) | b;
     73 }
     74 
     75 static WEBP_INLINE int AddSubtractComponentHalf(int a, int b) {
     76   return Clip255(a + (a - b) / 2);
     77 }
     78 
     79 static WEBP_INLINE uint32_t ClampedAddSubtractHalf(uint32_t c0, uint32_t c1,
     80                                                    uint32_t c2) {
     81   const uint32_t ave = Average2(c0, c1);
     82   const int a = AddSubtractComponentHalf(ave >> 24, c2 >> 24);
     83   const int r = AddSubtractComponentHalf((ave >> 16) & 0xff, (c2 >> 16) & 0xff);
     84   const int g = AddSubtractComponentHalf((ave >> 8) & 0xff, (c2 >> 8) & 0xff);
     85   const int b = AddSubtractComponentHalf((ave >> 0) & 0xff, (c2 >> 0) & 0xff);
     86   return ((uint32_t)a << 24) | (r << 16) | (g << 8) | b;
     87 }
     88 
     89 // gcc-4.9 on ARM generates incorrect code in Select() when Sub3() is inlined.
     90 #if defined(__arm__) && LOCAL_GCC_VERSION == 0x409
     91 # define LOCAL_INLINE __attribute__ ((noinline))
     92 #else
     93 # define LOCAL_INLINE WEBP_INLINE
     94 #endif
     95 
     96 static LOCAL_INLINE int Sub3(int a, int b, int c) {
     97   const int pb = b - c;
     98   const int pa = a - c;
     99   return abs(pb) - abs(pa);
    100 }
    101 
    102 #undef LOCAL_INLINE
    103 
    104 static WEBP_INLINE uint32_t Select(uint32_t a, uint32_t b, uint32_t c) {
    105   const int pa_minus_pb =
    106       Sub3((a >> 24)       , (b >> 24)       , (c >> 24)       ) +
    107       Sub3((a >> 16) & 0xff, (b >> 16) & 0xff, (c >> 16) & 0xff) +
    108       Sub3((a >>  8) & 0xff, (b >>  8) & 0xff, (c >>  8) & 0xff) +
    109       Sub3((a      ) & 0xff, (b      ) & 0xff, (c      ) & 0xff);
    110   return (pa_minus_pb <= 0) ? a : b;
    111 }
    112 
    113 //------------------------------------------------------------------------------
    114 // Predictors
    115 
    116 static uint32_t Predictor0(uint32_t left, const uint32_t* const top) {
    117   (void)top;
    118   (void)left;
    119   return ARGB_BLACK;
    120 }
    121 static uint32_t Predictor1(uint32_t left, const uint32_t* const top) {
    122   (void)top;
    123   return left;
    124 }
    125 static uint32_t Predictor2(uint32_t left, const uint32_t* const top) {
    126   (void)left;
    127   return top[0];
    128 }
    129 static uint32_t Predictor3(uint32_t left, const uint32_t* const top) {
    130   (void)left;
    131   return top[1];
    132 }
    133 static uint32_t Predictor4(uint32_t left, const uint32_t* const top) {
    134   (void)left;
    135   return top[-1];
    136 }
    137 static uint32_t Predictor5(uint32_t left, const uint32_t* const top) {
    138   const uint32_t pred = Average3(left, top[0], top[1]);
    139   return pred;
    140 }
    141 static uint32_t Predictor6(uint32_t left, const uint32_t* const top) {
    142   const uint32_t pred = Average2(left, top[-1]);
    143   return pred;
    144 }
    145 static uint32_t Predictor7(uint32_t left, const uint32_t* const top) {
    146   const uint32_t pred = Average2(left, top[0]);
    147   return pred;
    148 }
    149 static uint32_t Predictor8(uint32_t left, const uint32_t* const top) {
    150   const uint32_t pred = Average2(top[-1], top[0]);
    151   (void)left;
    152   return pred;
    153 }
    154 static uint32_t Predictor9(uint32_t left, const uint32_t* const top) {
    155   const uint32_t pred = Average2(top[0], top[1]);
    156   (void)left;
    157   return pred;
    158 }
    159 static uint32_t Predictor10(uint32_t left, const uint32_t* const top) {
    160   const uint32_t pred = Average4(left, top[-1], top[0], top[1]);
    161   return pred;
    162 }
    163 static uint32_t Predictor11(uint32_t left, const uint32_t* const top) {
    164   const uint32_t pred = Select(top[0], left, top[-1]);
    165   return pred;
    166 }
    167 static uint32_t Predictor12(uint32_t left, const uint32_t* const top) {
    168   const uint32_t pred = ClampedAddSubtractFull(left, top[0], top[-1]);
    169   return pred;
    170 }
    171 static uint32_t Predictor13(uint32_t left, const uint32_t* const top) {
    172   const uint32_t pred = ClampedAddSubtractHalf(left, top[0], top[-1]);
    173   return pred;
    174 }
    175 
    176 //------------------------------------------------------------------------------
    177 
    178 // Inverse prediction.
    179 static void PredictorInverseTransform(const VP8LTransform* const transform,
    180                                       int y_start, int y_end, uint32_t* data) {
    181   const int width = transform->xsize_;
    182   if (y_start == 0) {  // First Row follows the L (mode=1) mode.
    183     int x;
    184     const uint32_t pred0 = Predictor0(data[-1], NULL);
    185     AddPixelsEq(data, pred0);
    186     for (x = 1; x < width; ++x) {
    187       const uint32_t pred1 = Predictor1(data[x - 1], NULL);
    188       AddPixelsEq(data + x, pred1);
    189     }
    190     data += width;
    191     ++y_start;
    192   }
    193 
    194   {
    195     int y = y_start;
    196     const int tile_width = 1 << transform->bits_;
    197     const int mask = tile_width - 1;
    198     const int safe_width = width & ~mask;
    199     const int tiles_per_row = VP8LSubSampleSize(width, transform->bits_);
    200     const uint32_t* pred_mode_base =
    201         transform->data_ + (y >> transform->bits_) * tiles_per_row;
    202 
    203     while (y < y_end) {
    204       const uint32_t pred2 = Predictor2(data[-1], data - width);
    205       const uint32_t* pred_mode_src = pred_mode_base;
    206       VP8LPredictorFunc pred_func;
    207       int x = 1;
    208       int t = 1;
    209       // First pixel follows the T (mode=2) mode.
    210       AddPixelsEq(data, pred2);
    211       // .. the rest:
    212       while (x < safe_width) {
    213         pred_func = VP8LPredictors[((*pred_mode_src++) >> 8) & 0xf];
    214         for (; t < tile_width; ++t, ++x) {
    215           const uint32_t pred = pred_func(data[x - 1], data + x - width);
    216           AddPixelsEq(data + x, pred);
    217         }
    218         t = 0;
    219       }
    220       if (x < width) {
    221         pred_func = VP8LPredictors[((*pred_mode_src++) >> 8) & 0xf];
    222         for (; x < width; ++x) {
    223           const uint32_t pred = pred_func(data[x - 1], data + x - width);
    224           AddPixelsEq(data + x, pred);
    225         }
    226       }
    227       data += width;
    228       ++y;
    229       if ((y & mask) == 0) {   // Use the same mask, since tiles are squares.
    230         pred_mode_base += tiles_per_row;
    231       }
    232     }
    233   }
    234 }
    235 
    236 // Add green to blue and red channels (i.e. perform the inverse transform of
    237 // 'subtract green').
    238 void VP8LAddGreenToBlueAndRed_C(uint32_t* data, int num_pixels) {
    239   int i;
    240   for (i = 0; i < num_pixels; ++i) {
    241     const uint32_t argb = data[i];
    242     const uint32_t green = ((argb >> 8) & 0xff);
    243     uint32_t red_blue = (argb & 0x00ff00ffu);
    244     red_blue += (green << 16) | green;
    245     red_blue &= 0x00ff00ffu;
    246     data[i] = (argb & 0xff00ff00u) | red_blue;
    247   }
    248 }
    249 
    250 static WEBP_INLINE uint32_t ColorTransformDelta(int8_t color_pred,
    251                                                 int8_t color) {
    252   return (uint32_t)((int)(color_pred) * color) >> 5;
    253 }
    254 
    255 static WEBP_INLINE void ColorCodeToMultipliers(uint32_t color_code,
    256                                                VP8LMultipliers* const m) {
    257   m->green_to_red_  = (color_code >>  0) & 0xff;
    258   m->green_to_blue_ = (color_code >>  8) & 0xff;
    259   m->red_to_blue_   = (color_code >> 16) & 0xff;
    260 }
    261 
    262 void VP8LTransformColorInverse_C(const VP8LMultipliers* const m, uint32_t* data,
    263                                  int num_pixels) {
    264   int i;
    265   for (i = 0; i < num_pixels; ++i) {
    266     const uint32_t argb = data[i];
    267     const uint32_t green = argb >> 8;
    268     const uint32_t red = argb >> 16;
    269     uint32_t new_red = red;
    270     uint32_t new_blue = argb;
    271     new_red += ColorTransformDelta(m->green_to_red_, green);
    272     new_red &= 0xff;
    273     new_blue += ColorTransformDelta(m->green_to_blue_, green);
    274     new_blue += ColorTransformDelta(m->red_to_blue_, new_red);
    275     new_blue &= 0xff;
    276     data[i] = (argb & 0xff00ff00u) | (new_red << 16) | (new_blue);
    277   }
    278 }
    279 
    280 // Color space inverse transform.
    281 static void ColorSpaceInverseTransform(const VP8LTransform* const transform,
    282                                        int y_start, int y_end, uint32_t* data) {
    283   const int width = transform->xsize_;
    284   const int tile_width = 1 << transform->bits_;
    285   const int mask = tile_width - 1;
    286   const int safe_width = width & ~mask;
    287   const int remaining_width = width - safe_width;
    288   const int tiles_per_row = VP8LSubSampleSize(width, transform->bits_);
    289   int y = y_start;
    290   const uint32_t* pred_row =
    291       transform->data_ + (y >> transform->bits_) * tiles_per_row;
    292 
    293   while (y < y_end) {
    294     const uint32_t* pred = pred_row;
    295     VP8LMultipliers m = { 0, 0, 0 };
    296     const uint32_t* const data_safe_end = data + safe_width;
    297     const uint32_t* const data_end = data + width;
    298     while (data < data_safe_end) {
    299       ColorCodeToMultipliers(*pred++, &m);
    300       VP8LTransformColorInverse(&m, data, tile_width);
    301       data += tile_width;
    302     }
    303     if (data < data_end) {  // Left-overs using C-version.
    304       ColorCodeToMultipliers(*pred++, &m);
    305       VP8LTransformColorInverse(&m, data, remaining_width);
    306       data += remaining_width;
    307     }
    308     ++y;
    309     if ((y & mask) == 0) pred_row += tiles_per_row;
    310   }
    311 }
    312 
    313 // Separate out pixels packed together using pixel-bundling.
    314 // We define two methods for ARGB data (uint32_t) and alpha-only data (uint8_t).
    315 #define COLOR_INDEX_INVERSE(FUNC_NAME, F_NAME, STATIC_DECL, TYPE, BIT_SUFFIX,  \
    316                             GET_INDEX, GET_VALUE)                              \
    317 static void F_NAME(const TYPE* src, const uint32_t* const color_map,           \
    318                    TYPE* dst, int y_start, int y_end, int width) {             \
    319   int y;                                                                       \
    320   for (y = y_start; y < y_end; ++y) {                                          \
    321     int x;                                                                     \
    322     for (x = 0; x < width; ++x) {                                              \
    323       *dst++ = GET_VALUE(color_map[GET_INDEX(*src++)]);                        \
    324     }                                                                          \
    325   }                                                                            \
    326 }                                                                              \
    327 STATIC_DECL void FUNC_NAME(const VP8LTransform* const transform,               \
    328                            int y_start, int y_end, const TYPE* src,            \
    329                            TYPE* dst) {                                        \
    330   int y;                                                                       \
    331   const int bits_per_pixel = 8 >> transform->bits_;                            \
    332   const int width = transform->xsize_;                                         \
    333   const uint32_t* const color_map = transform->data_;                          \
    334   if (bits_per_pixel < 8) {                                                    \
    335     const int pixels_per_byte = 1 << transform->bits_;                         \
    336     const int count_mask = pixels_per_byte - 1;                                \
    337     const uint32_t bit_mask = (1 << bits_per_pixel) - 1;                       \
    338     for (y = y_start; y < y_end; ++y) {                                        \
    339       uint32_t packed_pixels = 0;                                              \
    340       int x;                                                                   \
    341       for (x = 0; x < width; ++x) {                                            \
    342         /* We need to load fresh 'packed_pixels' once every                */  \
    343         /* 'pixels_per_byte' increments of x. Fortunately, pixels_per_byte */  \
    344         /* is a power of 2, so can just use a mask for that, instead of    */  \
    345         /* decrementing a counter.                                         */  \
    346         if ((x & count_mask) == 0) packed_pixels = GET_INDEX(*src++);          \
    347         *dst++ = GET_VALUE(color_map[packed_pixels & bit_mask]);               \
    348         packed_pixels >>= bits_per_pixel;                                      \
    349       }                                                                        \
    350     }                                                                          \
    351   } else {                                                                     \
    352     VP8LMapColor##BIT_SUFFIX(src, color_map, dst, y_start, y_end, width);      \
    353   }                                                                            \
    354 }
    355 
    356 COLOR_INDEX_INVERSE(ColorIndexInverseTransform, MapARGB, static, uint32_t, 32b,
    357                     VP8GetARGBIndex, VP8GetARGBValue)
    358 COLOR_INDEX_INVERSE(VP8LColorIndexInverseTransformAlpha, MapAlpha, , uint8_t,
    359                     8b, VP8GetAlphaIndex, VP8GetAlphaValue)
    360 
    361 #undef COLOR_INDEX_INVERSE
    362 
    363 void VP8LInverseTransform(const VP8LTransform* const transform,
    364                           int row_start, int row_end,
    365                           const uint32_t* const in, uint32_t* const out) {
    366   const int width = transform->xsize_;
    367   assert(row_start < row_end);
    368   assert(row_end <= transform->ysize_);
    369   switch (transform->type_) {
    370     case SUBTRACT_GREEN:
    371       VP8LAddGreenToBlueAndRed(out, (row_end - row_start) * width);
    372       break;
    373     case PREDICTOR_TRANSFORM:
    374       PredictorInverseTransform(transform, row_start, row_end, out);
    375       if (row_end != transform->ysize_) {
    376         // The last predicted row in this iteration will be the top-pred row
    377         // for the first row in next iteration.
    378         memcpy(out - width, out + (row_end - row_start - 1) * width,
    379                width * sizeof(*out));
    380       }
    381       break;
    382     case CROSS_COLOR_TRANSFORM:
    383       ColorSpaceInverseTransform(transform, row_start, row_end, out);
    384       break;
    385     case COLOR_INDEXING_TRANSFORM:
    386       if (in == out && transform->bits_ > 0) {
    387         // Move packed pixels to the end of unpacked region, so that unpacking
    388         // can occur seamlessly.
    389         // Also, note that this is the only transform that applies on
    390         // the effective width of VP8LSubSampleSize(xsize_, bits_). All other
    391         // transforms work on effective width of xsize_.
    392         const int out_stride = (row_end - row_start) * width;
    393         const int in_stride = (row_end - row_start) *
    394             VP8LSubSampleSize(transform->xsize_, transform->bits_);
    395         uint32_t* const src = out + out_stride - in_stride;
    396         memmove(src, out, in_stride * sizeof(*src));
    397         ColorIndexInverseTransform(transform, row_start, row_end, src, out);
    398       } else {
    399         ColorIndexInverseTransform(transform, row_start, row_end, in, out);
    400       }
    401       break;
    402   }
    403 }
    404 
    405 //------------------------------------------------------------------------------
    406 // Color space conversion.
    407 
    408 static int is_big_endian(void) {
    409   static const union {
    410     uint16_t w;
    411     uint8_t b[2];
    412   } tmp = { 1 };
    413   return (tmp.b[0] != 1);
    414 }
    415 
    416 void VP8LConvertBGRAToRGB_C(const uint32_t* src,
    417                             int num_pixels, uint8_t* dst) {
    418   const uint32_t* const src_end = src + num_pixels;
    419   while (src < src_end) {
    420     const uint32_t argb = *src++;
    421     *dst++ = (argb >> 16) & 0xff;
    422     *dst++ = (argb >>  8) & 0xff;
    423     *dst++ = (argb >>  0) & 0xff;
    424   }
    425 }
    426 
    427 void VP8LConvertBGRAToRGBA_C(const uint32_t* src,
    428                              int num_pixels, uint8_t* dst) {
    429   const uint32_t* const src_end = src + num_pixels;
    430   while (src < src_end) {
    431     const uint32_t argb = *src++;
    432     *dst++ = (argb >> 16) & 0xff;
    433     *dst++ = (argb >>  8) & 0xff;
    434     *dst++ = (argb >>  0) & 0xff;
    435     *dst++ = (argb >> 24) & 0xff;
    436   }
    437 }
    438 
    439 void VP8LConvertBGRAToRGBA4444_C(const uint32_t* src,
    440                                  int num_pixels, uint8_t* dst) {
    441   const uint32_t* const src_end = src + num_pixels;
    442   while (src < src_end) {
    443     const uint32_t argb = *src++;
    444     const uint8_t rg = ((argb >> 16) & 0xf0) | ((argb >> 12) & 0xf);
    445     const uint8_t ba = ((argb >>  0) & 0xf0) | ((argb >> 28) & 0xf);
    446 #ifdef WEBP_SWAP_16BIT_CSP
    447     *dst++ = ba;
    448     *dst++ = rg;
    449 #else
    450     *dst++ = rg;
    451     *dst++ = ba;
    452 #endif
    453   }
    454 }
    455 
    456 void VP8LConvertBGRAToRGB565_C(const uint32_t* src,
    457                                int num_pixels, uint8_t* dst) {
    458   const uint32_t* const src_end = src + num_pixels;
    459   while (src < src_end) {
    460     const uint32_t argb = *src++;
    461     const uint8_t rg = ((argb >> 16) & 0xf8) | ((argb >> 13) & 0x7);
    462     const uint8_t gb = ((argb >>  5) & 0xe0) | ((argb >>  3) & 0x1f);
    463 #ifdef WEBP_SWAP_16BIT_CSP
    464     *dst++ = gb;
    465     *dst++ = rg;
    466 #else
    467     *dst++ = rg;
    468     *dst++ = gb;
    469 #endif
    470   }
    471 }
    472 
    473 void VP8LConvertBGRAToBGR_C(const uint32_t* src,
    474                             int num_pixels, uint8_t* dst) {
    475   const uint32_t* const src_end = src + num_pixels;
    476   while (src < src_end) {
    477     const uint32_t argb = *src++;
    478     *dst++ = (argb >>  0) & 0xff;
    479     *dst++ = (argb >>  8) & 0xff;
    480     *dst++ = (argb >> 16) & 0xff;
    481   }
    482 }
    483 
    484 static void CopyOrSwap(const uint32_t* src, int num_pixels, uint8_t* dst,
    485                        int swap_on_big_endian) {
    486   if (is_big_endian() == swap_on_big_endian) {
    487     const uint32_t* const src_end = src + num_pixels;
    488     while (src < src_end) {
    489       const uint32_t argb = *src++;
    490 
    491 #if !defined(WORDS_BIGENDIAN)
    492 #if !defined(WEBP_REFERENCE_IMPLEMENTATION)
    493       WebPUint32ToMem(dst, BSwap32(argb));
    494 #else  // WEBP_REFERENCE_IMPLEMENTATION
    495       dst[0] = (argb >> 24) & 0xff;
    496       dst[1] = (argb >> 16) & 0xff;
    497       dst[2] = (argb >>  8) & 0xff;
    498       dst[3] = (argb >>  0) & 0xff;
    499 #endif
    500 #else  // WORDS_BIGENDIAN
    501       dst[0] = (argb >>  0) & 0xff;
    502       dst[1] = (argb >>  8) & 0xff;
    503       dst[2] = (argb >> 16) & 0xff;
    504       dst[3] = (argb >> 24) & 0xff;
    505 #endif
    506       dst += sizeof(argb);
    507     }
    508   } else {
    509     memcpy(dst, src, num_pixels * sizeof(*src));
    510   }
    511 }
    512 
    513 void VP8LConvertFromBGRA(const uint32_t* const in_data, int num_pixels,
    514                          WEBP_CSP_MODE out_colorspace, uint8_t* const rgba) {
    515   switch (out_colorspace) {
    516     case MODE_RGB:
    517       VP8LConvertBGRAToRGB(in_data, num_pixels, rgba);
    518       break;
    519     case MODE_RGBA:
    520       VP8LConvertBGRAToRGBA(in_data, num_pixels, rgba);
    521       break;
    522     case MODE_rgbA:
    523       VP8LConvertBGRAToRGBA(in_data, num_pixels, rgba);
    524       WebPApplyAlphaMultiply(rgba, 0, num_pixels, 1, 0);
    525       break;
    526     case MODE_BGR:
    527       VP8LConvertBGRAToBGR(in_data, num_pixels, rgba);
    528       break;
    529     case MODE_BGRA:
    530       CopyOrSwap(in_data, num_pixels, rgba, 1);
    531       break;
    532     case MODE_bgrA:
    533       CopyOrSwap(in_data, num_pixels, rgba, 1);
    534       WebPApplyAlphaMultiply(rgba, 0, num_pixels, 1, 0);
    535       break;
    536     case MODE_ARGB:
    537       CopyOrSwap(in_data, num_pixels, rgba, 0);
    538       break;
    539     case MODE_Argb:
    540       CopyOrSwap(in_data, num_pixels, rgba, 0);
    541       WebPApplyAlphaMultiply(rgba, 1, num_pixels, 1, 0);
    542       break;
    543     case MODE_RGBA_4444:
    544       VP8LConvertBGRAToRGBA4444(in_data, num_pixels, rgba);
    545       break;
    546     case MODE_rgbA_4444:
    547       VP8LConvertBGRAToRGBA4444(in_data, num_pixels, rgba);
    548       WebPApplyAlphaMultiply4444(rgba, num_pixels, 1, 0);
    549       break;
    550     case MODE_RGB_565:
    551       VP8LConvertBGRAToRGB565(in_data, num_pixels, rgba);
    552       break;
    553     default:
    554       assert(0);          // Code flow should not reach here.
    555   }
    556 }
    557 
    558 //------------------------------------------------------------------------------
    559 
    560 VP8LProcessBlueAndRedFunc VP8LAddGreenToBlueAndRed;
    561 VP8LPredictorFunc VP8LPredictors[16];
    562 
    563 VP8LTransformColorFunc VP8LTransformColorInverse;
    564 
    565 VP8LConvertFunc VP8LConvertBGRAToRGB;
    566 VP8LConvertFunc VP8LConvertBGRAToRGBA;
    567 VP8LConvertFunc VP8LConvertBGRAToRGBA4444;
    568 VP8LConvertFunc VP8LConvertBGRAToRGB565;
    569 VP8LConvertFunc VP8LConvertBGRAToBGR;
    570 
    571 VP8LMapARGBFunc VP8LMapColor32b;
    572 VP8LMapAlphaFunc VP8LMapColor8b;
    573 
    574 extern void VP8LDspInitSSE2(void);
    575 extern void VP8LDspInitNEON(void);
    576 extern void VP8LDspInitMIPSdspR2(void);
    577 
    578 static volatile VP8CPUInfo lossless_last_cpuinfo_used =
    579     (VP8CPUInfo)&lossless_last_cpuinfo_used;
    580 
    581 WEBP_TSAN_IGNORE_FUNCTION void VP8LDspInit(void) {
    582   if (lossless_last_cpuinfo_used == VP8GetCPUInfo) return;
    583 
    584   VP8LPredictors[0] = Predictor0;
    585   VP8LPredictors[1] = Predictor1;
    586   VP8LPredictors[2] = Predictor2;
    587   VP8LPredictors[3] = Predictor3;
    588   VP8LPredictors[4] = Predictor4;
    589   VP8LPredictors[5] = Predictor5;
    590   VP8LPredictors[6] = Predictor6;
    591   VP8LPredictors[7] = Predictor7;
    592   VP8LPredictors[8] = Predictor8;
    593   VP8LPredictors[9] = Predictor9;
    594   VP8LPredictors[10] = Predictor10;
    595   VP8LPredictors[11] = Predictor11;
    596   VP8LPredictors[12] = Predictor12;
    597   VP8LPredictors[13] = Predictor13;
    598   VP8LPredictors[14] = Predictor0;     // <- padding security sentinels
    599   VP8LPredictors[15] = Predictor0;
    600 
    601   VP8LAddGreenToBlueAndRed = VP8LAddGreenToBlueAndRed_C;
    602 
    603   VP8LTransformColorInverse = VP8LTransformColorInverse_C;
    604 
    605   VP8LConvertBGRAToRGB = VP8LConvertBGRAToRGB_C;
    606   VP8LConvertBGRAToRGBA = VP8LConvertBGRAToRGBA_C;
    607   VP8LConvertBGRAToRGBA4444 = VP8LConvertBGRAToRGBA4444_C;
    608   VP8LConvertBGRAToRGB565 = VP8LConvertBGRAToRGB565_C;
    609   VP8LConvertBGRAToBGR = VP8LConvertBGRAToBGR_C;
    610 
    611   VP8LMapColor32b = MapARGB;
    612   VP8LMapColor8b = MapAlpha;
    613 
    614   // If defined, use CPUInfo() to overwrite some pointers with faster versions.
    615   if (VP8GetCPUInfo != NULL) {
    616 #if defined(WEBP_USE_SSE2)
    617     if (VP8GetCPUInfo(kSSE2)) {
    618       VP8LDspInitSSE2();
    619     }
    620 #endif
    621 #if defined(WEBP_USE_NEON)
    622     if (VP8GetCPUInfo(kNEON)) {
    623       VP8LDspInitNEON();
    624     }
    625 #endif
    626 #if defined(WEBP_USE_MIPS_DSP_R2)
    627     if (VP8GetCPUInfo(kMIPSdspR2)) {
    628       VP8LDspInitMIPSdspR2();
    629     }
    630 #endif
    631   }
    632   lossless_last_cpuinfo_used = VP8GetCPUInfo;
    633 }
    634 
    635 //------------------------------------------------------------------------------
    636