Home | History | Annotate | Download | only in dsp
      1 // Copyright 2012 Google Inc. All Rights Reserved.
      2 //
      3 // This code is licensed under the same terms as WebM:
      4 //  Software License Agreement:  http://www.webmproject.org/license/software/
      5 //  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
      6 // -----------------------------------------------------------------------------
      7 //
      8 // Image transforms and color space conversion methods for lossless decoder.
      9 //
     10 // Authors: Vikas Arora (vikaas.arora (at) gmail.com)
     11 //          Jyrki Alakuijala (jyrki (at) google.com)
     12 //          Urvang Joshi (urvang (at) google.com)
     13 
     14 #define ANDROID_WEBP_RGB
     15 
     16 #if defined(__cplusplus) || defined(c_plusplus)
     17 extern "C" {
     18 #endif
     19 
     20 #include <math.h>
     21 #include <stdlib.h>
     22 #include "./lossless.h"
     23 #include "../dec/vp8li.h"
     24 #include "../dsp/yuv.h"
     25 #include "../dsp/dsp.h"
     26 #include "../enc/histogram.h"
     27 
     28 #define MAX_DIFF_COST (1e30f)
     29 
     30 // lookup table for small values of log2(int)
     31 #define APPROX_LOG_MAX  4096
     32 #define LOG_2_RECIPROCAL 1.44269504088896338700465094007086
     33 #define LOG_LOOKUP_IDX_MAX 256
     34 static const float kLog2Table[LOG_LOOKUP_IDX_MAX] = {
     35   0.0000000000000000f, 0.0000000000000000f,
     36   1.0000000000000000f, 1.5849625007211560f,
     37   2.0000000000000000f, 2.3219280948873621f,
     38   2.5849625007211560f, 2.8073549220576041f,
     39   3.0000000000000000f, 3.1699250014423121f,
     40   3.3219280948873621f, 3.4594316186372973f,
     41   3.5849625007211560f, 3.7004397181410921f,
     42   3.8073549220576041f, 3.9068905956085187f,
     43   4.0000000000000000f, 4.0874628412503390f,
     44   4.1699250014423121f, 4.2479275134435852f,
     45   4.3219280948873626f, 4.3923174227787606f,
     46   4.4594316186372973f, 4.5235619560570130f,
     47   4.5849625007211560f, 4.6438561897747243f,
     48   4.7004397181410917f, 4.7548875021634682f,
     49   4.8073549220576037f, 4.8579809951275718f,
     50   4.9068905956085187f, 4.9541963103868749f,
     51   5.0000000000000000f, 5.0443941193584533f,
     52   5.0874628412503390f, 5.1292830169449663f,
     53   5.1699250014423121f, 5.2094533656289501f,
     54   5.2479275134435852f, 5.2854022188622487f,
     55   5.3219280948873626f, 5.3575520046180837f,
     56   5.3923174227787606f, 5.4262647547020979f,
     57   5.4594316186372973f, 5.4918530963296747f,
     58   5.5235619560570130f, 5.5545888516776376f,
     59   5.5849625007211560f, 5.6147098441152083f,
     60   5.6438561897747243f, 5.6724253419714951f,
     61   5.7004397181410917f, 5.7279204545631987f,
     62   5.7548875021634682f, 5.7813597135246599f,
     63   5.8073549220576037f, 5.8328900141647412f,
     64   5.8579809951275718f, 5.8826430493618415f,
     65   5.9068905956085187f, 5.9307373375628866f,
     66   5.9541963103868749f, 5.9772799234999167f,
     67   6.0000000000000000f, 6.0223678130284543f,
     68   6.0443941193584533f, 6.0660891904577720f,
     69   6.0874628412503390f, 6.1085244567781691f,
     70   6.1292830169449663f, 6.1497471195046822f,
     71   6.1699250014423121f, 6.1898245588800175f,
     72   6.2094533656289501f, 6.2288186904958804f,
     73   6.2479275134435852f, 6.2667865406949010f,
     74   6.2854022188622487f, 6.3037807481771030f,
     75   6.3219280948873626f, 6.3398500028846243f,
     76   6.3575520046180837f, 6.3750394313469245f,
     77   6.3923174227787606f, 6.4093909361377017f,
     78   6.4262647547020979f, 6.4429434958487279f,
     79   6.4594316186372973f, 6.4757334309663976f,
     80   6.4918530963296747f, 6.5077946401986963f,
     81   6.5235619560570130f, 6.5391588111080309f,
     82   6.5545888516776376f, 6.5698556083309478f,
     83   6.5849625007211560f, 6.5999128421871278f,
     84   6.6147098441152083f, 6.6293566200796094f,
     85   6.6438561897747243f, 6.6582114827517946f,
     86   6.6724253419714951f, 6.6865005271832185f,
     87   6.7004397181410917f, 6.7142455176661224f,
     88   6.7279204545631987f, 6.7414669864011464f,
     89   6.7548875021634682f, 6.7681843247769259f,
     90   6.7813597135246599f, 6.7944158663501061f,
     91   6.8073549220576037f, 6.8201789624151878f,
     92   6.8328900141647412f, 6.8454900509443747f,
     93   6.8579809951275718f, 6.8703647195834047f,
     94   6.8826430493618415f, 6.8948177633079437f,
     95   6.9068905956085187f, 6.9188632372745946f,
     96   6.9307373375628866f, 6.9425145053392398f,
     97   6.9541963103868749f, 6.9657842846620869f,
     98   6.9772799234999167f, 6.9886846867721654f,
     99   7.0000000000000000f, 7.0112272554232539f,
    100   7.0223678130284543f, 7.0334230015374501f,
    101   7.0443941193584533f, 7.0552824355011898f,
    102   7.0660891904577720f, 7.0768155970508308f,
    103   7.0874628412503390f, 7.0980320829605263f,
    104   7.1085244567781691f, 7.1189410727235076f,
    105   7.1292830169449663f, 7.1395513523987936f,
    106   7.1497471195046822f, 7.1598713367783890f,
    107   7.1699250014423121f, 7.1799090900149344f,
    108   7.1898245588800175f, 7.1996723448363644f,
    109   7.2094533656289501f, 7.2191685204621611f,
    110   7.2288186904958804f, 7.2384047393250785f,
    111   7.2479275134435852f, 7.2573878426926521f,
    112   7.2667865406949010f, 7.2761244052742375f,
    113   7.2854022188622487f, 7.2946207488916270f,
    114   7.3037807481771030f, 7.3128829552843557f,
    115   7.3219280948873626f, 7.3309168781146167f,
    116   7.3398500028846243f, 7.3487281542310771f,
    117   7.3575520046180837f, 7.3663222142458160f,
    118   7.3750394313469245f, 7.3837042924740519f,
    119   7.3923174227787606f, 7.4008794362821843f,
    120   7.4093909361377017f, 7.4178525148858982f,
    121   7.4262647547020979f, 7.4346282276367245f,
    122   7.4429434958487279f, 7.4512111118323289f,
    123   7.4594316186372973f, 7.4676055500829976f,
    124   7.4757334309663976f, 7.4838157772642563f,
    125   7.4918530963296747f, 7.4998458870832056f,
    126   7.5077946401986963f, 7.5156998382840427f,
    127   7.5235619560570130f, 7.5313814605163118f,
    128   7.5391588111080309f, 7.5468944598876364f,
    129   7.5545888516776376f, 7.5622424242210728f,
    130   7.5698556083309478f, 7.5774288280357486f,
    131   7.5849625007211560f, 7.5924570372680806f,
    132   7.5999128421871278f, 7.6073303137496104f,
    133   7.6147098441152083f, 7.6220518194563764f,
    134   7.6293566200796094f, 7.6366246205436487f,
    135   7.6438561897747243f, 7.6510516911789281f,
    136   7.6582114827517946f, 7.6653359171851764f,
    137   7.6724253419714951f, 7.6794800995054464f,
    138   7.6865005271832185f, 7.6934869574993252f,
    139   7.7004397181410917f, 7.7073591320808825f,
    140   7.7142455176661224f, 7.7210991887071855f,
    141   7.7279204545631987f, 7.7347096202258383f,
    142   7.7414669864011464f, 7.7481928495894605f,
    143   7.7548875021634682f, 7.7615512324444795f,
    144   7.7681843247769259f, 7.7747870596011736f,
    145   7.7813597135246599f, 7.7879025593914317f,
    146   7.7944158663501061f, 7.8008998999203047f,
    147   7.8073549220576037f, 7.8137811912170374f,
    148   7.8201789624151878f, 7.8265484872909150f,
    149   7.8328900141647412f, 7.8392037880969436f,
    150   7.8454900509443747f, 7.8517490414160571f,
    151   7.8579809951275718f, 7.8641861446542797f,
    152   7.8703647195834047f, 7.8765169465649993f,
    153   7.8826430493618415f, 7.8887432488982591f,
    154   7.8948177633079437f, 7.9008668079807486f,
    155   7.9068905956085187f, 7.9128893362299619f,
    156   7.9188632372745946f, 7.9248125036057812f,
    157   7.9307373375628866f, 7.9366379390025709f,
    158   7.9425145053392398f, 7.9483672315846778f,
    159   7.9541963103868749f, 7.9600019320680805f,
    160   7.9657842846620869f, 7.9715435539507719f,
    161   7.9772799234999167f, 7.9829935746943103f,
    162   7.9886846867721654f, 7.9943534368588577f
    163 };
    164 
    165 float VP8LFastLog2(int v) {
    166   if (v < LOG_LOOKUP_IDX_MAX) {
    167     return kLog2Table[v];
    168   } else if (v < APPROX_LOG_MAX) {
    169     int log_cnt = 0;
    170     while (v >= LOG_LOOKUP_IDX_MAX) {
    171       ++log_cnt;
    172       v = v >> 1;
    173     }
    174     return kLog2Table[v] + (float)log_cnt;
    175   } else {
    176     return (float)(LOG_2_RECIPROCAL * log((double)v));
    177   }
    178 }
    179 
    180 //------------------------------------------------------------------------------
    181 // Image transforms.
    182 
    183 // In-place sum of each component with mod 256.
    184 static WEBP_INLINE void AddPixelsEq(uint32_t* a, uint32_t b) {
    185   const uint32_t alpha_and_green = (*a & 0xff00ff00u) + (b & 0xff00ff00u);
    186   const uint32_t red_and_blue = (*a & 0x00ff00ffu) + (b & 0x00ff00ffu);
    187   *a = (alpha_and_green & 0xff00ff00u) | (red_and_blue & 0x00ff00ffu);
    188 }
    189 
    190 static WEBP_INLINE uint32_t Average2(uint32_t a0, uint32_t a1) {
    191   return (((a0 ^ a1) & 0xfefefefeL) >> 1) + (a0 & a1);
    192 }
    193 
    194 static WEBP_INLINE uint32_t Average3(uint32_t a0, uint32_t a1, uint32_t a2) {
    195   return Average2(Average2(a0, a2), a1);
    196 }
    197 
    198 static WEBP_INLINE uint32_t Average4(uint32_t a0, uint32_t a1,
    199                                      uint32_t a2, uint32_t a3) {
    200   return Average2(Average2(a0, a1), Average2(a2, a3));
    201 }
    202 
    203 static WEBP_INLINE uint32_t Clip255(uint32_t a) {
    204   if (a < 256) {
    205     return a;
    206   }
    207   // return 0, when a is a negative integer.
    208   // return 255, when a is positive.
    209   return ~a >> 24;
    210 }
    211 
    212 static WEBP_INLINE int AddSubtractComponentFull(int a, int b, int c) {
    213   return Clip255(a + b - c);
    214 }
    215 
    216 static WEBP_INLINE uint32_t ClampedAddSubtractFull(uint32_t c0, uint32_t c1,
    217                                                    uint32_t c2) {
    218   const int a = AddSubtractComponentFull(c0 >> 24, c1 >> 24, c2 >> 24);
    219   const int r = AddSubtractComponentFull((c0 >> 16) & 0xff,
    220                                          (c1 >> 16) & 0xff,
    221                                          (c2 >> 16) & 0xff);
    222   const int g = AddSubtractComponentFull((c0 >> 8) & 0xff,
    223                                          (c1 >> 8) & 0xff,
    224                                          (c2 >> 8) & 0xff);
    225   const int b = AddSubtractComponentFull(c0 & 0xff, c1 & 0xff, c2 & 0xff);
    226   return (a << 24) | (r << 16) | (g << 8) | b;
    227 }
    228 
    229 static WEBP_INLINE int AddSubtractComponentHalf(int a, int b) {
    230   return Clip255(a + (a - b) / 2);
    231 }
    232 
    233 static WEBP_INLINE uint32_t ClampedAddSubtractHalf(uint32_t c0, uint32_t c1,
    234                                                    uint32_t c2) {
    235   const uint32_t ave = Average2(c0, c1);
    236   const int a = AddSubtractComponentHalf(ave >> 24, c2 >> 24);
    237   const int r = AddSubtractComponentHalf((ave >> 16) & 0xff, (c2 >> 16) & 0xff);
    238   const int g = AddSubtractComponentHalf((ave >> 8) & 0xff, (c2 >> 8) & 0xff);
    239   const int b = AddSubtractComponentHalf((ave >> 0) & 0xff, (c2 >> 0) & 0xff);
    240   return (a << 24) | (r << 16) | (g << 8) | b;
    241 }
    242 
    243 static WEBP_INLINE int Sub3(int a, int b, int c) {
    244   const int pa = b - c;
    245   const int pb = a - c;
    246   return abs(pa) - abs(pb);
    247 }
    248 
    249 static WEBP_INLINE uint32_t Select(uint32_t a, uint32_t b, uint32_t c) {
    250   const int pa_minus_pb =
    251       Sub3((a >> 24)       , (b >> 24)       , (c >> 24)       ) +
    252       Sub3((a >> 16) & 0xff, (b >> 16) & 0xff, (c >> 16) & 0xff) +
    253       Sub3((a >>  8) & 0xff, (b >>  8) & 0xff, (c >>  8) & 0xff) +
    254       Sub3((a      ) & 0xff, (b      ) & 0xff, (c      ) & 0xff);
    255 
    256   return (pa_minus_pb <= 0) ? a : b;
    257 }
    258 
    259 //------------------------------------------------------------------------------
    260 // Predictors
    261 
    262 static uint32_t Predictor0(uint32_t left, const uint32_t* const top) {
    263   (void)top;
    264   (void)left;
    265   return ARGB_BLACK;
    266 }
    267 static uint32_t Predictor1(uint32_t left, const uint32_t* const top) {
    268   (void)top;
    269   return left;
    270 }
    271 static uint32_t Predictor2(uint32_t left, const uint32_t* const top) {
    272   (void)left;
    273   return top[0];
    274 }
    275 static uint32_t Predictor3(uint32_t left, const uint32_t* const top) {
    276   (void)left;
    277   return top[1];
    278 }
    279 static uint32_t Predictor4(uint32_t left, const uint32_t* const top) {
    280   (void)left;
    281   return top[-1];
    282 }
    283 static uint32_t Predictor5(uint32_t left, const uint32_t* const top) {
    284   const uint32_t pred = Average3(left, top[0], top[1]);
    285   return pred;
    286 }
    287 static uint32_t Predictor6(uint32_t left, const uint32_t* const top) {
    288   const uint32_t pred = Average2(left, top[-1]);
    289   return pred;
    290 }
    291 static uint32_t Predictor7(uint32_t left, const uint32_t* const top) {
    292   const uint32_t pred = Average2(left, top[0]);
    293   return pred;
    294 }
    295 static uint32_t Predictor8(uint32_t left, const uint32_t* const top) {
    296   const uint32_t pred = Average2(top[-1], top[0]);
    297   (void)left;
    298   return pred;
    299 }
    300 static uint32_t Predictor9(uint32_t left, const uint32_t* const top) {
    301   const uint32_t pred = Average2(top[0], top[1]);
    302   (void)left;
    303   return pred;
    304 }
    305 static uint32_t Predictor10(uint32_t left, const uint32_t* const top) {
    306   const uint32_t pred = Average4(left, top[-1], top[0], top[1]);
    307   return pred;
    308 }
    309 static uint32_t Predictor11(uint32_t left, const uint32_t* const top) {
    310   const uint32_t pred = Select(top[0], left, top[-1]);
    311   return pred;
    312 }
    313 static uint32_t Predictor12(uint32_t left, const uint32_t* const top) {
    314   const uint32_t pred = ClampedAddSubtractFull(left, top[0], top[-1]);
    315   return pred;
    316 }
    317 static uint32_t Predictor13(uint32_t left, const uint32_t* const top) {
    318   const uint32_t pred = ClampedAddSubtractHalf(left, top[0], top[-1]);
    319   return pred;
    320 }
    321 
    322 typedef uint32_t (*PredictorFunc)(uint32_t left, const uint32_t* const top);
    323 static const PredictorFunc kPredictors[16] = {
    324   Predictor0, Predictor1, Predictor2, Predictor3,
    325   Predictor4, Predictor5, Predictor6, Predictor7,
    326   Predictor8, Predictor9, Predictor10, Predictor11,
    327   Predictor12, Predictor13,
    328   Predictor0, Predictor0    // <- padding security sentinels
    329 };
    330 
    331 // TODO(vikasa): Replace 256 etc with defines.
    332 static float PredictionCostSpatial(const int* counts,
    333                                    int weight_0, double exp_val) {
    334   const int significant_symbols = 16;
    335   const double exp_decay_factor = 0.6;
    336   double bits = weight_0 * counts[0];
    337   int i;
    338   for (i = 1; i < significant_symbols; ++i) {
    339     bits += exp_val * (counts[i] + counts[256 - i]);
    340     exp_val *= exp_decay_factor;
    341   }
    342   return (float)(-0.1 * bits);
    343 }
    344 
    345 // Compute the Shanon's entropy: Sum(p*log2(p))
    346 static float ShannonEntropy(const int* const array, int n) {
    347   int i;
    348   float retval = 0.f;
    349   int sum = 0;
    350   for (i = 0; i < n; ++i) {
    351     if (array[i] != 0) {
    352       sum += array[i];
    353       retval -= VP8LFastSLog2(array[i]);
    354     }
    355   }
    356   retval += VP8LFastSLog2(sum);
    357   return retval;
    358 }
    359 
    360 static float PredictionCostSpatialHistogram(int accumulated[4][256],
    361                                             int tile[4][256]) {
    362   int i;
    363   int k;
    364   int combo[256];
    365   double retval = 0;
    366   for (i = 0; i < 4; ++i) {
    367     const double exp_val = 0.94;
    368     retval += PredictionCostSpatial(&tile[i][0], 1, exp_val);
    369     retval += ShannonEntropy(&tile[i][0], 256);
    370     for (k = 0; k < 256; ++k) {
    371       combo[k] = accumulated[i][k] + tile[i][k];
    372     }
    373     retval += ShannonEntropy(&combo[0], 256);
    374   }
    375   return (float)retval;
    376 }
    377 
    378 static int GetBestPredictorForTile(int width, int height,
    379                                    int tile_x, int tile_y, int bits,
    380                                    int accumulated[4][256],
    381                                    const uint32_t* const argb_scratch) {
    382   const int kNumPredModes = 14;
    383   const int col_start = tile_x << bits;
    384   const int row_start = tile_y << bits;
    385   const int tile_size = 1 << bits;
    386   const int ymax = (tile_size <= height - row_start) ?
    387       tile_size : height - row_start;
    388   const int xmax = (tile_size <= width - col_start) ?
    389       tile_size : width - col_start;
    390   int histo[4][256];
    391   float best_diff = MAX_DIFF_COST;
    392   int best_mode = 0;
    393 
    394   int mode;
    395   for (mode = 0; mode < kNumPredModes; ++mode) {
    396     const uint32_t* current_row = argb_scratch;
    397     const PredictorFunc pred_func = kPredictors[mode];
    398     float cur_diff;
    399     int y;
    400     memset(&histo[0][0], 0, sizeof(histo));
    401     for (y = 0; y < ymax; ++y) {
    402       int x;
    403       const int row = row_start + y;
    404       const uint32_t* const upper_row = current_row;
    405       current_row = upper_row + width;
    406       for (x = 0; x < xmax; ++x) {
    407         const int col = col_start + x;
    408         uint32_t predict;
    409         uint32_t predict_diff;
    410         if (row == 0) {
    411           predict = (col == 0) ? ARGB_BLACK : current_row[col - 1];  // Left.
    412         } else if (col == 0) {
    413           predict = upper_row[col];  // Top.
    414         } else {
    415           predict = pred_func(current_row[col - 1], upper_row + col);
    416         }
    417         predict_diff = VP8LSubPixels(current_row[col], predict);
    418         ++histo[0][predict_diff >> 24];
    419         ++histo[1][((predict_diff >> 16) & 0xff)];
    420         ++histo[2][((predict_diff >> 8) & 0xff)];
    421         ++histo[3][(predict_diff & 0xff)];
    422       }
    423     }
    424     cur_diff = PredictionCostSpatialHistogram(accumulated, histo);
    425     if (cur_diff < best_diff) {
    426       best_diff = cur_diff;
    427       best_mode = mode;
    428     }
    429   }
    430 
    431   return best_mode;
    432 }
    433 
    434 static void CopyTileWithPrediction(int width, int height,
    435                                    int tile_x, int tile_y, int bits, int mode,
    436                                    const uint32_t* const argb_scratch,
    437                                    uint32_t* const argb) {
    438   const int col_start = tile_x << bits;
    439   const int row_start = tile_y << bits;
    440   const int tile_size = 1 << bits;
    441   const int ymax = (tile_size <= height - row_start) ?
    442       tile_size : height - row_start;
    443   const int xmax = (tile_size <= width - col_start) ?
    444       tile_size : width - col_start;
    445   const PredictorFunc pred_func = kPredictors[mode];
    446   const uint32_t* current_row = argb_scratch;
    447 
    448   int y;
    449   for (y = 0; y < ymax; ++y) {
    450     int x;
    451     const int row = row_start + y;
    452     const uint32_t* const upper_row = current_row;
    453     current_row = upper_row + width;
    454     for (x = 0; x < xmax; ++x) {
    455       const int col = col_start + x;
    456       const int pix = row * width + col;
    457       uint32_t predict;
    458       if (row == 0) {
    459         predict = (col == 0) ? ARGB_BLACK : current_row[col - 1];  // Left.
    460       } else if (col == 0) {
    461         predict = upper_row[col];  // Top.
    462       } else {
    463         predict = pred_func(current_row[col - 1], upper_row + col);
    464       }
    465       argb[pix] = VP8LSubPixels(current_row[col], predict);
    466     }
    467   }
    468 }
    469 
    470 void VP8LResidualImage(int width, int height, int bits,
    471                        uint32_t* const argb, uint32_t* const argb_scratch,
    472                        uint32_t* const image) {
    473   const int max_tile_size = 1 << bits;
    474   const int tiles_per_row = VP8LSubSampleSize(width, bits);
    475   const int tiles_per_col = VP8LSubSampleSize(height, bits);
    476   uint32_t* const upper_row = argb_scratch;
    477   uint32_t* const current_tile_rows = argb_scratch + width;
    478   int tile_y;
    479   int histo[4][256];
    480   memset(histo, 0, sizeof(histo));
    481   for (tile_y = 0; tile_y < tiles_per_col; ++tile_y) {
    482     const int tile_y_offset = tile_y * max_tile_size;
    483     const int this_tile_height =
    484         (tile_y < tiles_per_col - 1) ? max_tile_size : height - tile_y_offset;
    485     int tile_x;
    486     if (tile_y > 0) {
    487       memcpy(upper_row, current_tile_rows + (max_tile_size - 1) * width,
    488              width * sizeof(*upper_row));
    489     }
    490     memcpy(current_tile_rows, &argb[tile_y_offset * width],
    491            this_tile_height * width * sizeof(*current_tile_rows));
    492     for (tile_x = 0; tile_x < tiles_per_row; ++tile_x) {
    493       int pred;
    494       int y;
    495       const int tile_x_offset = tile_x * max_tile_size;
    496       int all_x_max = tile_x_offset + max_tile_size;
    497       if (all_x_max > width) {
    498         all_x_max = width;
    499       }
    500       pred = GetBestPredictorForTile(width, height, tile_x, tile_y, bits, histo,
    501                                      argb_scratch);
    502       image[tile_y * tiles_per_row + tile_x] = 0xff000000u | (pred << 8);
    503       CopyTileWithPrediction(width, height, tile_x, tile_y, bits, pred,
    504                              argb_scratch, argb);
    505       for (y = 0; y < max_tile_size; ++y) {
    506         int ix;
    507         int all_x;
    508         int all_y = tile_y_offset + y;
    509         if (all_y >= height) {
    510           break;
    511         }
    512         ix = all_y * width + tile_x_offset;
    513         for (all_x = tile_x_offset; all_x < all_x_max; ++all_x, ++ix) {
    514           const uint32_t a = argb[ix];
    515           ++histo[0][a >> 24];
    516           ++histo[1][((a >> 16) & 0xff)];
    517           ++histo[2][((a >> 8) & 0xff)];
    518           ++histo[3][(a & 0xff)];
    519         }
    520       }
    521     }
    522   }
    523 }
    524 
    525 // Inverse prediction.
    526 static void PredictorInverseTransform(const VP8LTransform* const transform,
    527                                       int y_start, int y_end, uint32_t* data) {
    528   const int width = transform->xsize_;
    529   if (y_start == 0) {  // First Row follows the L (mode=1) mode.
    530     int x;
    531     const uint32_t pred0 = Predictor0(data[-1], NULL);
    532     AddPixelsEq(data, pred0);
    533     for (x = 1; x < width; ++x) {
    534       const uint32_t pred1 = Predictor1(data[x - 1], NULL);
    535       AddPixelsEq(data + x, pred1);
    536     }
    537     data += width;
    538     ++y_start;
    539   }
    540 
    541   {
    542     int y = y_start;
    543     const int mask = (1 << transform->bits_) - 1;
    544     const int tiles_per_row = VP8LSubSampleSize(width, transform->bits_);
    545     const uint32_t* pred_mode_base =
    546         transform->data_ + (y >> transform->bits_) * tiles_per_row;
    547 
    548     while (y < y_end) {
    549       int x;
    550       const uint32_t pred2 = Predictor2(data[-1], data - width);
    551       const uint32_t* pred_mode_src = pred_mode_base;
    552       PredictorFunc pred_func;
    553 
    554       // First pixel follows the T (mode=2) mode.
    555       AddPixelsEq(data, pred2);
    556 
    557       // .. the rest:
    558       pred_func = kPredictors[((*pred_mode_src++) >> 8) & 0xf];
    559       for (x = 1; x < width; ++x) {
    560         uint32_t pred;
    561         if ((x & mask) == 0) {    // start of tile. Read predictor function.
    562           pred_func = kPredictors[((*pred_mode_src++) >> 8) & 0xf];
    563         }
    564         pred = pred_func(data[x - 1], data + x - width);
    565         AddPixelsEq(data + x, pred);
    566       }
    567       data += width;
    568       ++y;
    569       if ((y & mask) == 0) {   // Use the same mask, since tiles are squares.
    570         pred_mode_base += tiles_per_row;
    571       }
    572     }
    573   }
    574 }
    575 
    576 void VP8LSubtractGreenFromBlueAndRed(uint32_t* argb_data, int num_pixs) {
    577   int i;
    578   for (i = 0; i < num_pixs; ++i) {
    579     const uint32_t argb = argb_data[i];
    580     const uint32_t green = (argb >> 8) & 0xff;
    581     const uint32_t new_r = (((argb >> 16) & 0xff) - green) & 0xff;
    582     const uint32_t new_b = ((argb & 0xff) - green) & 0xff;
    583     argb_data[i] = (argb & 0xff00ff00) | (new_r << 16) | new_b;
    584   }
    585 }
    586 
    587 // Add green to blue and red channels (i.e. perform the inverse transform of
    588 // 'subtract green').
    589 static void AddGreenToBlueAndRed(const VP8LTransform* const transform,
    590                                  int y_start, int y_end, uint32_t* data) {
    591   const int width = transform->xsize_;
    592   const uint32_t* const data_end = data + (y_end - y_start) * width;
    593   while (data < data_end) {
    594     const uint32_t argb = *data;
    595     // "* 0001001u" is equivalent to "(green << 16) + green)"
    596     const uint32_t green = ((argb >> 8) & 0xff);
    597     uint32_t red_blue = (argb & 0x00ff00ffu);
    598     red_blue += (green << 16) | green;
    599     red_blue &= 0x00ff00ffu;
    600     *data++ = (argb & 0xff00ff00u) | red_blue;
    601   }
    602 }
    603 
    604 typedef struct {
    605   // Note: the members are uint8_t, so that any negative values are
    606   // automatically converted to "mod 256" values.
    607   uint8_t green_to_red_;
    608   uint8_t green_to_blue_;
    609   uint8_t red_to_blue_;
    610 } Multipliers;
    611 
    612 static WEBP_INLINE void MultipliersClear(Multipliers* m) {
    613   m->green_to_red_ = 0;
    614   m->green_to_blue_ = 0;
    615   m->red_to_blue_ = 0;
    616 }
    617 
    618 static WEBP_INLINE uint32_t ColorTransformDelta(int8_t color_pred,
    619                                                 int8_t color) {
    620   return (uint32_t)((int)(color_pred) * color) >> 5;
    621 }
    622 
    623 static WEBP_INLINE void ColorCodeToMultipliers(uint32_t color_code,
    624                                                Multipliers* const m) {
    625   m->green_to_red_  = (color_code >>  0) & 0xff;
    626   m->green_to_blue_ = (color_code >>  8) & 0xff;
    627   m->red_to_blue_   = (color_code >> 16) & 0xff;
    628 }
    629 
    630 static WEBP_INLINE uint32_t MultipliersToColorCode(Multipliers* const m) {
    631   return 0xff000000u |
    632          ((uint32_t)(m->red_to_blue_) << 16) |
    633          ((uint32_t)(m->green_to_blue_) << 8) |
    634          m->green_to_red_;
    635 }
    636 
    637 static WEBP_INLINE uint32_t TransformColor(const Multipliers* const m,
    638                                            uint32_t argb, int inverse) {
    639   const uint32_t green = argb >> 8;
    640   const uint32_t red = argb >> 16;
    641   uint32_t new_red = red;
    642   uint32_t new_blue = argb;
    643 
    644   if (inverse) {
    645     new_red += ColorTransformDelta(m->green_to_red_, green);
    646     new_red &= 0xff;
    647     new_blue += ColorTransformDelta(m->green_to_blue_, green);
    648     new_blue += ColorTransformDelta(m->red_to_blue_, new_red);
    649     new_blue &= 0xff;
    650   } else {
    651     new_red -= ColorTransformDelta(m->green_to_red_, green);
    652     new_red &= 0xff;
    653     new_blue -= ColorTransformDelta(m->green_to_blue_, green);
    654     new_blue -= ColorTransformDelta(m->red_to_blue_, red);
    655     new_blue &= 0xff;
    656   }
    657   return (argb & 0xff00ff00u) | (new_red << 16) | (new_blue);
    658 }
    659 
    660 static WEBP_INLINE int SkipRepeatedPixels(const uint32_t* const argb,
    661                                           int ix, int xsize) {
    662   const uint32_t v = argb[ix];
    663   if (ix >= xsize + 3) {
    664     if (v == argb[ix - xsize] &&
    665         argb[ix - 1] == argb[ix - xsize - 1] &&
    666         argb[ix - 2] == argb[ix - xsize - 2] &&
    667         argb[ix - 3] == argb[ix - xsize - 3]) {
    668       return 1;
    669     }
    670     return v == argb[ix - 3] && v == argb[ix - 2] && v == argb[ix - 1];
    671   } else if (ix >= 3) {
    672     return v == argb[ix - 3] && v == argb[ix - 2] && v == argb[ix - 1];
    673   }
    674   return 0;
    675 }
    676 
    677 static float PredictionCostCrossColor(const int accumulated[256],
    678                                       const int counts[256]) {
    679   // Favor low entropy, locally and globally.
    680   int i;
    681   int combo[256];
    682   for (i = 0; i < 256; ++i) {
    683     combo[i] = accumulated[i] + counts[i];
    684   }
    685   return ShannonEntropy(combo, 256) +
    686          ShannonEntropy(counts, 256) +
    687          PredictionCostSpatial(counts, 3, 2.4);  // Favor small absolute values.
    688 }
    689 
    690 static Multipliers GetBestColorTransformForTile(
    691     int tile_x, int tile_y, int bits,
    692     Multipliers prevX,
    693     Multipliers prevY,
    694     int step, int xsize, int ysize,
    695     int* accumulated_red_histo,
    696     int* accumulated_blue_histo,
    697     const uint32_t* const argb) {
    698   float best_diff = MAX_DIFF_COST;
    699   float cur_diff;
    700   const int halfstep = step / 2;
    701   const int max_tile_size = 1 << bits;
    702   const int tile_y_offset = tile_y * max_tile_size;
    703   const int tile_x_offset = tile_x * max_tile_size;
    704   int green_to_red;
    705   int green_to_blue;
    706   int red_to_blue;
    707   int all_x_max = tile_x_offset + max_tile_size;
    708   int all_y_max = tile_y_offset + max_tile_size;
    709   Multipliers best_tx;
    710   MultipliersClear(&best_tx);
    711   if (all_x_max > xsize) {
    712     all_x_max = xsize;
    713   }
    714   if (all_y_max > ysize) {
    715     all_y_max = ysize;
    716   }
    717   for (green_to_red = -64; green_to_red <= 64; green_to_red += halfstep) {
    718     int histo[256] = { 0 };
    719     int all_y;
    720     Multipliers tx;
    721     MultipliersClear(&tx);
    722     tx.green_to_red_ = green_to_red & 0xff;
    723 
    724     for (all_y = tile_y_offset; all_y < all_y_max; ++all_y) {
    725       uint32_t predict;
    726       int ix = all_y * xsize + tile_x_offset;
    727       int all_x;
    728       for (all_x = tile_x_offset; all_x < all_x_max; ++all_x, ++ix) {
    729         if (SkipRepeatedPixels(argb, ix, xsize)) {
    730           continue;
    731         }
    732         predict = TransformColor(&tx, argb[ix], 0);
    733         ++histo[(predict >> 16) & 0xff];  // red.
    734       }
    735     }
    736     cur_diff = PredictionCostCrossColor(&accumulated_red_histo[0], &histo[0]);
    737     if (tx.green_to_red_ == prevX.green_to_red_) {
    738       cur_diff -= 3;  // favor keeping the areas locally similar
    739     }
    740     if (tx.green_to_red_ == prevY.green_to_red_) {
    741       cur_diff -= 3;  // favor keeping the areas locally similar
    742     }
    743     if (tx.green_to_red_ == 0) {
    744       cur_diff -= 3;
    745     }
    746     if (cur_diff < best_diff) {
    747       best_diff = cur_diff;
    748       best_tx = tx;
    749     }
    750   }
    751   best_diff = MAX_DIFF_COST;
    752   green_to_red = best_tx.green_to_red_;
    753   for (green_to_blue = -32; green_to_blue <= 32; green_to_blue += step) {
    754     for (red_to_blue = -32; red_to_blue <= 32; red_to_blue += step) {
    755       int all_y;
    756       int histo[256] = { 0 };
    757       Multipliers tx;
    758       tx.green_to_red_ = green_to_red;
    759       tx.green_to_blue_ = green_to_blue;
    760       tx.red_to_blue_ = red_to_blue;
    761       for (all_y = tile_y_offset; all_y < all_y_max; ++all_y) {
    762         uint32_t predict;
    763         int all_x;
    764         int ix = all_y * xsize + tile_x_offset;
    765         for (all_x = tile_x_offset; all_x < all_x_max; ++all_x, ++ix) {
    766           if (SkipRepeatedPixels(argb, ix, xsize)) {
    767             continue;
    768           }
    769           predict = TransformColor(&tx, argb[ix], 0);
    770           ++histo[predict & 0xff];  // blue.
    771         }
    772       }
    773       cur_diff =
    774         PredictionCostCrossColor(&accumulated_blue_histo[0], &histo[0]);
    775       if (tx.green_to_blue_ == prevX.green_to_blue_) {
    776         cur_diff -= 3;  // favor keeping the areas locally similar
    777       }
    778       if (tx.green_to_blue_ == prevY.green_to_blue_) {
    779         cur_diff -= 3;  // favor keeping the areas locally similar
    780       }
    781       if (tx.red_to_blue_ == prevX.red_to_blue_) {
    782         cur_diff -= 3;  // favor keeping the areas locally similar
    783       }
    784       if (tx.red_to_blue_ == prevY.red_to_blue_) {
    785         cur_diff -= 3;  // favor keeping the areas locally similar
    786       }
    787       if (tx.green_to_blue_ == 0) {
    788         cur_diff -= 3;
    789       }
    790       if (tx.red_to_blue_ == 0) {
    791         cur_diff -= 3;
    792       }
    793       if (cur_diff < best_diff) {
    794         best_diff = cur_diff;
    795         best_tx = tx;
    796       }
    797     }
    798   }
    799   return best_tx;
    800 }
    801 
    802 static void CopyTileWithColorTransform(int xsize, int ysize,
    803                                        int tile_x, int tile_y, int bits,
    804                                        Multipliers color_transform,
    805                                        uint32_t* const argb) {
    806   int y;
    807   int xscan = 1 << bits;
    808   int yscan = 1 << bits;
    809   tile_x <<= bits;
    810   tile_y <<= bits;
    811   if (xscan > xsize - tile_x) {
    812     xscan = xsize - tile_x;
    813   }
    814   if (yscan > ysize - tile_y) {
    815     yscan = ysize - tile_y;
    816   }
    817   yscan += tile_y;
    818   for (y = tile_y; y < yscan; ++y) {
    819     int ix = y * xsize + tile_x;
    820     const int end_ix = ix + xscan;
    821     for (; ix < end_ix; ++ix) {
    822       argb[ix] = TransformColor(&color_transform, argb[ix], 0);
    823     }
    824   }
    825 }
    826 
    827 void VP8LColorSpaceTransform(int width, int height, int bits, int step,
    828                              uint32_t* const argb, uint32_t* image) {
    829   const int max_tile_size = 1 << bits;
    830   int tile_xsize = VP8LSubSampleSize(width, bits);
    831   int tile_ysize = VP8LSubSampleSize(height, bits);
    832   int accumulated_red_histo[256] = { 0 };
    833   int accumulated_blue_histo[256] = { 0 };
    834   int tile_y;
    835   int tile_x;
    836   Multipliers prevX;
    837   Multipliers prevY;
    838   MultipliersClear(&prevY);
    839   MultipliersClear(&prevX);
    840   for (tile_y = 0; tile_y < tile_ysize; ++tile_y) {
    841     for (tile_x = 0; tile_x < tile_xsize; ++tile_x) {
    842       Multipliers color_transform;
    843       int all_x_max;
    844       int y;
    845       const int tile_y_offset = tile_y * max_tile_size;
    846       const int tile_x_offset = tile_x * max_tile_size;
    847       if (tile_y != 0) {
    848         ColorCodeToMultipliers(image[tile_y * tile_xsize + tile_x - 1], &prevX);
    849         ColorCodeToMultipliers(image[(tile_y - 1) * tile_xsize + tile_x],
    850                                &prevY);
    851       } else if (tile_x != 0) {
    852         ColorCodeToMultipliers(image[tile_y * tile_xsize + tile_x - 1], &prevX);
    853       }
    854       color_transform =
    855           GetBestColorTransformForTile(tile_x, tile_y, bits,
    856                                        prevX, prevY,
    857                                        step, width, height,
    858                                        &accumulated_red_histo[0],
    859                                        &accumulated_blue_histo[0],
    860                                        argb);
    861       image[tile_y * tile_xsize + tile_x] =
    862           MultipliersToColorCode(&color_transform);
    863       CopyTileWithColorTransform(width, height, tile_x, tile_y, bits,
    864                                  color_transform, argb);
    865 
    866       // Gather accumulated histogram data.
    867       all_x_max = tile_x_offset + max_tile_size;
    868       if (all_x_max > width) {
    869         all_x_max = width;
    870       }
    871       for (y = 0; y < max_tile_size; ++y) {
    872         int ix;
    873         int all_x;
    874         int all_y = tile_y_offset + y;
    875         if (all_y >= height) {
    876           break;
    877         }
    878         ix = all_y * width + tile_x_offset;
    879         for (all_x = tile_x_offset; all_x < all_x_max; ++all_x, ++ix) {
    880           if (ix >= 2 &&
    881               argb[ix] == argb[ix - 2] &&
    882               argb[ix] == argb[ix - 1]) {
    883             continue;  // repeated pixels are handled by backward references
    884           }
    885           if (ix >= width + 2 &&
    886               argb[ix - 2] == argb[ix - width - 2] &&
    887               argb[ix - 1] == argb[ix - width - 1] &&
    888               argb[ix] == argb[ix - width]) {
    889             continue;  // repeated pixels are handled by backward references
    890           }
    891           ++accumulated_red_histo[(argb[ix] >> 16) & 0xff];
    892           ++accumulated_blue_histo[argb[ix] & 0xff];
    893         }
    894       }
    895     }
    896   }
    897 }
    898 
    899 // Color space inverse transform.
    900 static void ColorSpaceInverseTransform(const VP8LTransform* const transform,
    901                                        int y_start, int y_end, uint32_t* data) {
    902   const int width = transform->xsize_;
    903   const int mask = (1 << transform->bits_) - 1;
    904   const int tiles_per_row = VP8LSubSampleSize(width, transform->bits_);
    905   int y = y_start;
    906   const uint32_t* pred_row =
    907       transform->data_ + (y >> transform->bits_) * tiles_per_row;
    908 
    909   while (y < y_end) {
    910     const uint32_t* pred = pred_row;
    911     Multipliers m = { 0, 0, 0 };
    912     int x;
    913 
    914     for (x = 0; x < width; ++x) {
    915       if ((x & mask) == 0) ColorCodeToMultipliers(*pred++, &m);
    916       data[x] = TransformColor(&m, data[x], 1);
    917     }
    918     data += width;
    919     ++y;
    920     if ((y & mask) == 0) pred_row += tiles_per_row;;
    921   }
    922 }
    923 
    924 // Separate out pixels packed together using pixel-bundling.
    925 static void ColorIndexInverseTransform(
    926     const VP8LTransform* const transform,
    927     int y_start, int y_end, const uint32_t* src, uint32_t* dst) {
    928   int y;
    929   const int bits_per_pixel = 8 >> transform->bits_;
    930   const int width = transform->xsize_;
    931   const uint32_t* const color_map = transform->data_;
    932   if (bits_per_pixel < 8) {
    933     const int pixels_per_byte = 1 << transform->bits_;
    934     const int count_mask = pixels_per_byte - 1;
    935     const uint32_t bit_mask = (1 << bits_per_pixel) - 1;
    936     for (y = y_start; y < y_end; ++y) {
    937       uint32_t packed_pixels = 0;
    938       int x;
    939       for (x = 0; x < width; ++x) {
    940         // We need to load fresh 'packed_pixels' once every 'pixels_per_byte'
    941         // increments of x. Fortunately, pixels_per_byte is a power of 2, so
    942         // can just use a mask for that, instead of decrementing a counter.
    943         if ((x & count_mask) == 0) packed_pixels = ((*src++) >> 8) & 0xff;
    944         *dst++ = color_map[packed_pixels & bit_mask];
    945         packed_pixels >>= bits_per_pixel;
    946       }
    947     }
    948   } else {
    949     for (y = y_start; y < y_end; ++y) {
    950       int x;
    951       for (x = 0; x < width; ++x) {
    952         *dst++ = color_map[((*src++) >> 8) & 0xff];
    953       }
    954     }
    955   }
    956 }
    957 
    958 void VP8LInverseTransform(const VP8LTransform* const transform,
    959                           int row_start, int row_end,
    960                           const uint32_t* const in, uint32_t* const out) {
    961   assert(row_start < row_end);
    962   assert(row_end <= transform->ysize_);
    963   switch (transform->type_) {
    964     case SUBTRACT_GREEN:
    965       AddGreenToBlueAndRed(transform, row_start, row_end, out);
    966       break;
    967     case PREDICTOR_TRANSFORM:
    968       PredictorInverseTransform(transform, row_start, row_end, out);
    969       if (row_end != transform->ysize_) {
    970         // The last predicted row in this iteration will be the top-pred row
    971         // for the first row in next iteration.
    972         const int width = transform->xsize_;
    973         memcpy(out - width, out + (row_end - row_start - 1) * width,
    974                width * sizeof(*out));
    975       }
    976       break;
    977     case CROSS_COLOR_TRANSFORM:
    978       ColorSpaceInverseTransform(transform, row_start, row_end, out);
    979       break;
    980     case COLOR_INDEXING_TRANSFORM:
    981       if (in == out && transform->bits_ > 0) {
    982         // Move packed pixels to the end of unpacked region, so that unpacking
    983         // can occur seamlessly.
    984         // Also, note that this is the only transform that applies on
    985         // the effective width of VP8LSubSampleSize(xsize_, bits_). All other
    986         // transforms work on effective width of xsize_.
    987         const int out_stride = (row_end - row_start) * transform->xsize_;
    988         const int in_stride = (row_end - row_start) *
    989             VP8LSubSampleSize(transform->xsize_, transform->bits_);
    990         uint32_t* const src = out + out_stride - in_stride;
    991         memmove(src, out, in_stride * sizeof(*src));
    992         ColorIndexInverseTransform(transform, row_start, row_end, src, out);
    993       } else {
    994         ColorIndexInverseTransform(transform, row_start, row_end, in, out);
    995       }
    996       break;
    997   }
    998 }
    999 
   1000 //------------------------------------------------------------------------------
   1001 // Color space conversion.
   1002 
   1003 static int is_big_endian(void) {
   1004   static const union {
   1005     uint16_t w;
   1006     uint8_t b[2];
   1007   } tmp = { 1 };
   1008   return (tmp.b[0] != 1);
   1009 }
   1010 
   1011 static void ConvertBGRAToRGB(const uint32_t* src,
   1012                              int num_pixels, uint8_t* dst) {
   1013   const uint32_t* const src_end = src + num_pixels;
   1014   while (src < src_end) {
   1015     const uint32_t argb = *src++;
   1016     *dst++ = (argb >> 16) & 0xff;
   1017     *dst++ = (argb >>  8) & 0xff;
   1018     *dst++ = (argb >>  0) & 0xff;
   1019   }
   1020 }
   1021 
   1022 static void ConvertBGRAToRGBA(const uint32_t* src,
   1023                               int num_pixels, uint8_t* dst) {
   1024   const uint32_t* const src_end = src + num_pixels;
   1025   while (src < src_end) {
   1026     const uint32_t argb = *src++;
   1027     *dst++ = (argb >> 16) & 0xff;
   1028     *dst++ = (argb >>  8) & 0xff;
   1029     *dst++ = (argb >>  0) & 0xff;
   1030     *dst++ = (argb >> 24) & 0xff;
   1031   }
   1032 }
   1033 
   1034 static void ConvertBGRAToRGBA4444(const uint32_t* src,
   1035                                   int num_pixels, uint8_t* dst) {
   1036   const uint32_t* const src_end = src + num_pixels;
   1037   while (src < src_end) {
   1038     const uint32_t argb = *src++;
   1039 #ifdef ANDROID_WEBP_RGB
   1040     *dst++ = ((argb >>  0) & 0xf0) | ((argb >> 28) & 0xf);
   1041     *dst++ = ((argb >> 16) & 0xf0) | ((argb >> 12) & 0xf);
   1042 #else
   1043     *dst++ = ((argb >> 16) & 0xf0) | ((argb >> 12) & 0xf);
   1044     *dst++ = ((argb >>  0) & 0xf0) | ((argb >> 28) & 0xf);
   1045 #endif
   1046   }
   1047 }
   1048 
   1049 static void ConvertBGRAToRGB565(const uint32_t* src,
   1050                                 int num_pixels, uint8_t* dst) {
   1051   const uint32_t* const src_end = src + num_pixels;
   1052   while (src < src_end) {
   1053     const uint32_t argb = *src++;
   1054 #ifdef ANDROID_WEBP_RGB
   1055     *dst++ = ((argb >>  5) & 0xe0) | ((argb >>  3) & 0x1f);
   1056     *dst++ = ((argb >> 16) & 0xf8) | ((argb >> 13) & 0x7);
   1057 #else
   1058     *dst++ = ((argb >> 16) & 0xf8) | ((argb >> 13) & 0x7);
   1059     *dst++ = ((argb >>  5) & 0xe0) | ((argb >>  3) & 0x1f);
   1060 #endif
   1061   }
   1062 }
   1063 
   1064 static void ConvertBGRAToBGR(const uint32_t* src,
   1065                              int num_pixels, uint8_t* dst) {
   1066   const uint32_t* const src_end = src + num_pixels;
   1067   while (src < src_end) {
   1068     const uint32_t argb = *src++;
   1069     *dst++ = (argb >>  0) & 0xff;
   1070     *dst++ = (argb >>  8) & 0xff;
   1071     *dst++ = (argb >> 16) & 0xff;
   1072   }
   1073 }
   1074 
   1075 static void CopyOrSwap(const uint32_t* src, int num_pixels, uint8_t* dst,
   1076                        int swap_on_big_endian) {
   1077   if (is_big_endian() == swap_on_big_endian) {
   1078     const uint32_t* const src_end = src + num_pixels;
   1079     while (src < src_end) {
   1080       uint32_t argb = *src++;
   1081 #if !defined(__BIG_ENDIAN__) && (defined(__i386__) || defined(__x86_64__))
   1082       __asm__ volatile("bswap %0" : "=r"(argb) : "0"(argb));
   1083       *(uint32_t*)dst = argb;
   1084       dst += sizeof(argb);
   1085 #elif !defined(__BIG_ENDIAN__) && defined(_MSC_VER)
   1086       argb = _byteswap_ulong(argb);
   1087       *(uint32_t*)dst = argb;
   1088       dst += sizeof(argb);
   1089 #else
   1090       *dst++ = (argb >> 24) & 0xff;
   1091       *dst++ = (argb >> 16) & 0xff;
   1092       *dst++ = (argb >>  8) & 0xff;
   1093       *dst++ = (argb >>  0) & 0xff;
   1094 #endif
   1095     }
   1096   } else {
   1097     memcpy(dst, src, num_pixels * sizeof(*src));
   1098   }
   1099 }
   1100 
   1101 void VP8LConvertFromBGRA(const uint32_t* const in_data, int num_pixels,
   1102                          WEBP_CSP_MODE out_colorspace, uint8_t* const rgba) {
   1103   switch (out_colorspace) {
   1104     case MODE_RGB:
   1105       ConvertBGRAToRGB(in_data, num_pixels, rgba);
   1106       break;
   1107     case MODE_RGBA:
   1108       ConvertBGRAToRGBA(in_data, num_pixels, rgba);
   1109       break;
   1110     case MODE_rgbA:
   1111       ConvertBGRAToRGBA(in_data, num_pixels, rgba);
   1112       WebPApplyAlphaMultiply(rgba, 0, num_pixels, 1, 0);
   1113       break;
   1114     case MODE_BGR:
   1115       ConvertBGRAToBGR(in_data, num_pixels, rgba);
   1116       break;
   1117     case MODE_BGRA:
   1118       CopyOrSwap(in_data, num_pixels, rgba, 1);
   1119       break;
   1120     case MODE_bgrA:
   1121       CopyOrSwap(in_data, num_pixels, rgba, 1);
   1122       WebPApplyAlphaMultiply(rgba, 0, num_pixels, 1, 0);
   1123       break;
   1124     case MODE_ARGB:
   1125       CopyOrSwap(in_data, num_pixels, rgba, 0);
   1126       break;
   1127     case MODE_Argb:
   1128       CopyOrSwap(in_data, num_pixels, rgba, 0);
   1129       WebPApplyAlphaMultiply(rgba, 1, num_pixels, 1, 0);
   1130       break;
   1131     case MODE_RGBA_4444:
   1132       ConvertBGRAToRGBA4444(in_data, num_pixels, rgba);
   1133       break;
   1134     case MODE_rgbA_4444:
   1135       ConvertBGRAToRGBA4444(in_data, num_pixels, rgba);
   1136       WebPApplyAlphaMultiply4444(rgba, num_pixels, 1, 0);
   1137       break;
   1138     case MODE_RGB_565:
   1139       ConvertBGRAToRGB565(in_data, num_pixels, rgba);
   1140       break;
   1141     default:
   1142       assert(0);          // Code flow should not reach here.
   1143   }
   1144 }
   1145 
   1146 //------------------------------------------------------------------------------
   1147 
   1148 #if defined(__cplusplus) || defined(c_plusplus)
   1149 }    // extern "C"
   1150 #endif
   1151