Home | History | Annotate | Download | only in enc
      1 // Copyright 2014 Google Inc. All Rights Reserved.
      2 //
      3 // Use of this source code is governed by a BSD-style license
      4 // that can be found in the COPYING file in the root of the source
      5 // tree. An additional intellectual property rights grant can be found
      6 // in the file PATENTS. All contributing project authors may
      7 // be found in the AUTHORS file in the root of the source tree.
      8 // -----------------------------------------------------------------------------
      9 //
     10 // WebPPicture utils for colorspace conversion
     11 //
     12 // Author: Skal (pascal.massimino (at) gmail.com)
     13 
     14 #include <assert.h>
     15 #include <stdlib.h>
     16 #include <math.h>
     17 
     18 #include "./vp8enci.h"
     19 #include "../utils/random.h"
     20 #include "../utils/utils.h"
     21 #include "../dsp/yuv.h"
     22 
     23 // Uncomment to disable gamma-compression during RGB->U/V averaging
     24 #define USE_GAMMA_COMPRESSION
     25 
     26 // If defined, use table to compute x / alpha.
     27 #define USE_INVERSE_ALPHA_TABLE
     28 
     29 static const union {
     30   uint32_t argb;
     31   uint8_t  bytes[4];
     32 } test_endian = { 0xff000000u };
     33 #define ALPHA_IS_LAST (test_endian.bytes[3] == 0xff)
     34 
     35 //------------------------------------------------------------------------------
     36 // Detection of non-trivial transparency
     37 
     38 // Returns true if alpha[] has non-0xff values.
     39 static int CheckNonOpaque(const uint8_t* alpha, int width, int height,
     40                           int x_step, int y_step) {
     41   if (alpha == NULL) return 0;
     42   while (height-- > 0) {
     43     int x;
     44     for (x = 0; x < width * x_step; x += x_step) {
     45       if (alpha[x] != 0xff) return 1;  // TODO(skal): check 4/8 bytes at a time.
     46     }
     47     alpha += y_step;
     48   }
     49   return 0;
     50 }
     51 
     52 // Checking for the presence of non-opaque alpha.
     53 int WebPPictureHasTransparency(const WebPPicture* picture) {
     54   if (picture == NULL) return 0;
     55   if (!picture->use_argb) {
     56     return CheckNonOpaque(picture->a, picture->width, picture->height,
     57                           1, picture->a_stride);
     58   } else {
     59     int x, y;
     60     const uint32_t* argb = picture->argb;
     61     if (argb == NULL) return 0;
     62     for (y = 0; y < picture->height; ++y) {
     63       for (x = 0; x < picture->width; ++x) {
     64         if (argb[x] < 0xff000000u) return 1;   // test any alpha values != 0xff
     65       }
     66       argb += picture->argb_stride;
     67     }
     68   }
     69   return 0;
     70 }
     71 
     72 //------------------------------------------------------------------------------
     73 // Code for gamma correction
     74 
     75 #if defined(USE_GAMMA_COMPRESSION)
     76 
     77 // gamma-compensates loss of resolution during chroma subsampling
     78 #define kGamma 0.80      // for now we use a different gamma value than kGammaF
     79 #define kGammaFix 12     // fixed-point precision for linear values
     80 #define kGammaScale ((1 << kGammaFix) - 1)
     81 #define kGammaTabFix 7   // fixed-point fractional bits precision
     82 #define kGammaTabScale (1 << kGammaTabFix)
     83 #define kGammaTabRounder (kGammaTabScale >> 1)
     84 #define kGammaTabSize (1 << (kGammaFix - kGammaTabFix))
     85 
     86 static int kLinearToGammaTab[kGammaTabSize + 1];
     87 static uint16_t kGammaToLinearTab[256];
     88 static volatile int kGammaTablesOk = 0;
     89 
     90 static WEBP_TSAN_IGNORE_FUNCTION void InitGammaTables(void) {
     91   if (!kGammaTablesOk) {
     92     int v;
     93     const double scale = (double)(1 << kGammaTabFix) / kGammaScale;
     94     const double norm = 1. / 255.;
     95     for (v = 0; v <= 255; ++v) {
     96       kGammaToLinearTab[v] =
     97           (uint16_t)(pow(norm * v, kGamma) * kGammaScale + .5);
     98     }
     99     for (v = 0; v <= kGammaTabSize; ++v) {
    100       kLinearToGammaTab[v] = (int)(255. * pow(scale * v, 1. / kGamma) + .5);
    101     }
    102     kGammaTablesOk = 1;
    103   }
    104 }
    105 
    106 static WEBP_INLINE uint32_t GammaToLinear(uint8_t v) {
    107   return kGammaToLinearTab[v];
    108 }
    109 
    110 static WEBP_INLINE int Interpolate(int v) {
    111   const int tab_pos = v >> (kGammaTabFix + 2);    // integer part
    112   const int x = v & ((kGammaTabScale << 2) - 1);  // fractional part
    113   const int v0 = kLinearToGammaTab[tab_pos];
    114   const int v1 = kLinearToGammaTab[tab_pos + 1];
    115   const int y = v1 * x + v0 * ((kGammaTabScale << 2) - x);   // interpolate
    116   assert(tab_pos + 1 < kGammaTabSize + 1);
    117   return y;
    118 }
    119 
    120 // Convert a linear value 'v' to YUV_FIX+2 fixed-point precision
    121 // U/V value, suitable for RGBToU/V calls.
    122 static WEBP_INLINE int LinearToGamma(uint32_t base_value, int shift) {
    123   const int y = Interpolate(base_value << shift);   // final uplifted value
    124   return (y + kGammaTabRounder) >> kGammaTabFix;    // descale
    125 }
    126 
    127 #else
    128 
    129 static WEBP_TSAN_IGNORE_FUNCTION void InitGammaTables(void) {}
    130 static WEBP_INLINE uint32_t GammaToLinear(uint8_t v) { return v; }
    131 static WEBP_INLINE int LinearToGamma(uint32_t base_value, int shift) {
    132   return (int)(base_value << shift);
    133 }
    134 
    135 #endif    // USE_GAMMA_COMPRESSION
    136 
    137 //------------------------------------------------------------------------------
    138 // RGB -> YUV conversion
    139 
    140 static int RGBToY(int r, int g, int b, VP8Random* const rg) {
    141   return (rg == NULL) ? VP8RGBToY(r, g, b, YUV_HALF)
    142                       : VP8RGBToY(r, g, b, VP8RandomBits(rg, YUV_FIX));
    143 }
    144 
    145 static int RGBToU(int r, int g, int b, VP8Random* const rg) {
    146   return (rg == NULL) ? VP8RGBToU(r, g, b, YUV_HALF << 2)
    147                       : VP8RGBToU(r, g, b, VP8RandomBits(rg, YUV_FIX + 2));
    148 }
    149 
    150 static int RGBToV(int r, int g, int b, VP8Random* const rg) {
    151   return (rg == NULL) ? VP8RGBToV(r, g, b, YUV_HALF << 2)
    152                       : VP8RGBToV(r, g, b, VP8RandomBits(rg, YUV_FIX + 2));
    153 }
    154 
    155 //------------------------------------------------------------------------------
    156 // Smart RGB->YUV conversion
    157 
    158 static const int kNumIterations = 6;
    159 static const int kMinDimensionIterativeConversion = 4;
    160 
    161 // We could use SFIX=0 and only uint8_t for fixed_y_t, but it produces some
    162 // banding sometimes. Better use extra precision.
    163 #define SFIX 2                // fixed-point precision of RGB and Y/W
    164 typedef int16_t fixed_t;      // signed type with extra SFIX precision for UV
    165 typedef uint16_t fixed_y_t;   // unsigned type with extra SFIX precision for W
    166 
    167 #define SHALF (1 << SFIX >> 1)
    168 #define MAX_Y_T ((256 << SFIX) - 1)
    169 #define SROUNDER (1 << (YUV_FIX + SFIX - 1))
    170 
    171 #if defined(USE_GAMMA_COMPRESSION)
    172 
    173 // float variant of gamma-correction
    174 // We use tables of different size and precision, along with a 'real-world'
    175 // Gamma value close to ~2.
    176 #define kGammaF 2.2
    177 static float kGammaToLinearTabF[MAX_Y_T + 1];   // size scales with Y_FIX
    178 static float kLinearToGammaTabF[kGammaTabSize + 2];
    179 static volatile int kGammaTablesFOk = 0;
    180 
    181 static WEBP_TSAN_IGNORE_FUNCTION void InitGammaTablesF(void) {
    182   if (!kGammaTablesFOk) {
    183     int v;
    184     const double norm = 1. / MAX_Y_T;
    185     const double scale = 1. / kGammaTabSize;
    186     for (v = 0; v <= MAX_Y_T; ++v) {
    187       kGammaToLinearTabF[v] = (float)pow(norm * v, kGammaF);
    188     }
    189     for (v = 0; v <= kGammaTabSize; ++v) {
    190       kLinearToGammaTabF[v] = (float)(MAX_Y_T * pow(scale * v, 1. / kGammaF));
    191     }
    192     // to prevent small rounding errors to cause read-overflow:
    193     kLinearToGammaTabF[kGammaTabSize + 1] = kLinearToGammaTabF[kGammaTabSize];
    194     kGammaTablesFOk = 1;
    195   }
    196 }
    197 
    198 static WEBP_INLINE float GammaToLinearF(int v) {
    199   return kGammaToLinearTabF[v];
    200 }
    201 
    202 static WEBP_INLINE int LinearToGammaF(float value) {
    203   const float v = value * kGammaTabSize;
    204   const int tab_pos = (int)v;
    205   const float x = v - (float)tab_pos;      // fractional part
    206   const float v0 = kLinearToGammaTabF[tab_pos + 0];
    207   const float v1 = kLinearToGammaTabF[tab_pos + 1];
    208   const float y = v1 * x + v0 * (1.f - x);  // interpolate
    209   return (int)(y + .5);
    210 }
    211 
    212 #else
    213 
    214 static WEBP_TSAN_IGNORE_FUNCTION void InitGammaTablesF(void) {}
    215 static WEBP_INLINE float GammaToLinearF(int v) {
    216   const float norm = 1.f / MAX_Y_T;
    217   return norm * v;
    218 }
    219 static WEBP_INLINE int LinearToGammaF(float value) {
    220   return (int)(MAX_Y_T * value + .5);
    221 }
    222 
    223 #endif    // USE_GAMMA_COMPRESSION
    224 
    225 //------------------------------------------------------------------------------
    226 
    227 static uint8_t clip_8b(fixed_t v) {
    228   return (!(v & ~0xff)) ? (uint8_t)v : (v < 0) ? 0u : 255u;
    229 }
    230 
    231 static fixed_y_t clip_y(int y) {
    232   return (!(y & ~MAX_Y_T)) ? (fixed_y_t)y : (y < 0) ? 0 : MAX_Y_T;
    233 }
    234 
    235 //------------------------------------------------------------------------------
    236 
    237 static int RGBToGray(int r, int g, int b) {
    238   const int luma = 19595 * r + 38470 * g + 7471 * b + YUV_HALF;
    239   return (luma >> YUV_FIX);
    240 }
    241 
    242 static float RGBToGrayF(float r, float g, float b) {
    243   return 0.299f * r + 0.587f * g + 0.114f * b;
    244 }
    245 
    246 static int ScaleDown(int a, int b, int c, int d) {
    247   const float A = GammaToLinearF(a);
    248   const float B = GammaToLinearF(b);
    249   const float C = GammaToLinearF(c);
    250   const float D = GammaToLinearF(d);
    251   return LinearToGammaF(0.25f * (A + B + C + D));
    252 }
    253 
    254 static WEBP_INLINE void UpdateW(const fixed_y_t* src, fixed_y_t* dst, int len) {
    255   while (len-- > 0) {
    256     const float R = GammaToLinearF(src[0]);
    257     const float G = GammaToLinearF(src[1]);
    258     const float B = GammaToLinearF(src[2]);
    259     const float Y = RGBToGrayF(R, G, B);
    260     *dst++ = (fixed_y_t)LinearToGammaF(Y);
    261     src += 3;
    262   }
    263 }
    264 
    265 static int UpdateChroma(const fixed_y_t* src1,
    266                         const fixed_y_t* src2,
    267                         fixed_t* dst, fixed_y_t* tmp, int len) {
    268   int diff = 0;
    269   while (len--> 0) {
    270     const int r = ScaleDown(src1[0], src1[3], src2[0], src2[3]);
    271     const int g = ScaleDown(src1[1], src1[4], src2[1], src2[4]);
    272     const int b = ScaleDown(src1[2], src1[5], src2[2], src2[5]);
    273     const int W = RGBToGray(r, g, b);
    274     const int r_avg = (src1[0] + src1[3] + src2[0] + src2[3] + 2) >> 2;
    275     const int g_avg = (src1[1] + src1[4] + src2[1] + src2[4] + 2) >> 2;
    276     const int b_avg = (src1[2] + src1[5] + src2[2] + src2[5] + 2) >> 2;
    277     dst[0] = (fixed_t)(r - W);
    278     dst[1] = (fixed_t)(g - W);
    279     dst[2] = (fixed_t)(b - W);
    280     dst += 3;
    281     src1 += 6;
    282     src2 += 6;
    283     if (tmp != NULL) {
    284       tmp[0] = tmp[1] = clip_y(W);
    285       tmp += 2;
    286     }
    287     diff += abs(RGBToGray(r_avg, g_avg, b_avg) - W);
    288   }
    289   return diff;
    290 }
    291 
    292 //------------------------------------------------------------------------------
    293 
    294 static WEBP_INLINE int Filter(const fixed_t* const A, const fixed_t* const B,
    295                               int rightwise) {
    296   int v;
    297   if (!rightwise) {
    298     v = (A[0] * 9 + A[-3] * 3 + B[0] * 3 + B[-3]);
    299   } else {
    300     v = (A[0] * 9 + A[+3] * 3 + B[0] * 3 + B[+3]);
    301   }
    302   return (v + 8) >> 4;
    303 }
    304 
    305 static WEBP_INLINE int Filter2(int A, int B) { return (A * 3 + B + 2) >> 2; }
    306 
    307 //------------------------------------------------------------------------------
    308 
    309 static WEBP_INLINE fixed_y_t UpLift(uint8_t a) {  // 8bit -> SFIX
    310   return ((fixed_y_t)a << SFIX) | SHALF;
    311 }
    312 
    313 static void ImportOneRow(const uint8_t* const r_ptr,
    314                          const uint8_t* const g_ptr,
    315                          const uint8_t* const b_ptr,
    316                          int step,
    317                          int pic_width,
    318                          fixed_y_t* const dst) {
    319   int i;
    320   for (i = 0; i < pic_width; ++i) {
    321     const int off = i * step;
    322     dst[3 * i + 0] = UpLift(r_ptr[off]);
    323     dst[3 * i + 1] = UpLift(g_ptr[off]);
    324     dst[3 * i + 2] = UpLift(b_ptr[off]);
    325   }
    326   if (pic_width & 1) {  // replicate rightmost pixel
    327     memcpy(dst + 3 * pic_width, dst + 3 * (pic_width - 1), 3 * sizeof(*dst));
    328   }
    329 }
    330 
    331 static void InterpolateTwoRows(const fixed_y_t* const best_y,
    332                                const fixed_t* const prev_uv,
    333                                const fixed_t* const cur_uv,
    334                                const fixed_t* const next_uv,
    335                                int w,
    336                                fixed_y_t* const out1,
    337                                fixed_y_t* const out2) {
    338   int i, k;
    339   {  // special boundary case for i==0
    340     const int W0 = best_y[0];
    341     const int W1 = best_y[w];
    342     for (k = 0; k <= 2; ++k) {
    343       out1[k] = clip_y(Filter2(cur_uv[k], prev_uv[k]) + W0);
    344       out2[k] = clip_y(Filter2(cur_uv[k], next_uv[k]) + W1);
    345     }
    346   }
    347   for (i = 1; i < w - 1; ++i) {
    348     const int W0 = best_y[i + 0];
    349     const int W1 = best_y[i + w];
    350     const int off = 3 * (i >> 1);
    351     for (k = 0; k <= 2; ++k) {
    352       const int tmp0 = Filter(cur_uv + off + k, prev_uv + off + k, i & 1);
    353       const int tmp1 = Filter(cur_uv + off + k, next_uv + off + k, i & 1);
    354       out1[3 * i + k] = clip_y(tmp0 + W0);
    355       out2[3 * i + k] = clip_y(tmp1 + W1);
    356     }
    357   }
    358   {  // special boundary case for i == w - 1
    359     const int W0 = best_y[i + 0];
    360     const int W1 = best_y[i + w];
    361     const int off = 3 * (i >> 1);
    362     for (k = 0; k <= 2; ++k) {
    363       out1[3 * i + k] = clip_y(Filter2(cur_uv[off + k], prev_uv[off + k]) + W0);
    364       out2[3 * i + k] = clip_y(Filter2(cur_uv[off + k], next_uv[off + k]) + W1);
    365     }
    366   }
    367 }
    368 
    369 static WEBP_INLINE uint8_t ConvertRGBToY(int r, int g, int b) {
    370   const int luma = 16839 * r + 33059 * g + 6420 * b + SROUNDER;
    371   return clip_8b(16 + (luma >> (YUV_FIX + SFIX)));
    372 }
    373 
    374 static WEBP_INLINE uint8_t ConvertRGBToU(int r, int g, int b) {
    375   const int u =  -9719 * r - 19081 * g + 28800 * b + SROUNDER;
    376   return clip_8b(128 + (u >> (YUV_FIX + SFIX)));
    377 }
    378 
    379 static WEBP_INLINE uint8_t ConvertRGBToV(int r, int g, int b) {
    380   const int v = +28800 * r - 24116 * g -  4684 * b + SROUNDER;
    381   return clip_8b(128 + (v >> (YUV_FIX + SFIX)));
    382 }
    383 
    384 static int ConvertWRGBToYUV(const fixed_y_t* const best_y,
    385                             const fixed_t* const best_uv,
    386                             WebPPicture* const picture) {
    387   int i, j;
    388   const int w = (picture->width + 1) & ~1;
    389   const int h = (picture->height + 1) & ~1;
    390   const int uv_w = w >> 1;
    391   const int uv_h = h >> 1;
    392   for (j = 0; j < picture->height; ++j) {
    393     for (i = 0; i < picture->width; ++i) {
    394       const int off = 3 * ((i >> 1) + (j >> 1) * uv_w);
    395       const int off2 = i + j * picture->y_stride;
    396       const int W = best_y[i + j * w];
    397       const int r = best_uv[off + 0] + W;
    398       const int g = best_uv[off + 1] + W;
    399       const int b = best_uv[off + 2] + W;
    400       picture->y[off2] = ConvertRGBToY(r, g, b);
    401     }
    402   }
    403   for (j = 0; j < uv_h; ++j) {
    404     uint8_t* const dst_u = picture->u + j * picture->uv_stride;
    405     uint8_t* const dst_v = picture->v + j * picture->uv_stride;
    406     for (i = 0; i < uv_w; ++i) {
    407       const int off = 3 * (i + j * uv_w);
    408       const int r = best_uv[off + 0];
    409       const int g = best_uv[off + 1];
    410       const int b = best_uv[off + 2];
    411       dst_u[i] = ConvertRGBToU(r, g, b);
    412       dst_v[i] = ConvertRGBToV(r, g, b);
    413     }
    414   }
    415   return 1;
    416 }
    417 
    418 //------------------------------------------------------------------------------
    419 // Main function
    420 
    421 #define SAFE_ALLOC(W, H, T) ((T*)WebPSafeMalloc((W) * (H), sizeof(T)))
    422 
    423 static int PreprocessARGB(const uint8_t* const r_ptr,
    424                           const uint8_t* const g_ptr,
    425                           const uint8_t* const b_ptr,
    426                           int step, int rgb_stride,
    427                           WebPPicture* const picture) {
    428   // we expand the right/bottom border if needed
    429   const int w = (picture->width + 1) & ~1;
    430   const int h = (picture->height + 1) & ~1;
    431   const int uv_w = w >> 1;
    432   const int uv_h = h >> 1;
    433   int i, j, iter;
    434 
    435   // TODO(skal): allocate one big memory chunk. But for now, it's easier
    436   // for valgrind debugging to have several chunks.
    437   fixed_y_t* const tmp_buffer = SAFE_ALLOC(w * 3, 2, fixed_y_t);   // scratch
    438   fixed_y_t* const best_y = SAFE_ALLOC(w, h, fixed_y_t);
    439   fixed_y_t* const target_y = SAFE_ALLOC(w, h, fixed_y_t);
    440   fixed_y_t* const best_rgb_y = SAFE_ALLOC(w, 2, fixed_y_t);
    441   fixed_t* const best_uv = SAFE_ALLOC(uv_w * 3, uv_h, fixed_t);
    442   fixed_t* const target_uv = SAFE_ALLOC(uv_w * 3, uv_h, fixed_t);
    443   fixed_t* const best_rgb_uv = SAFE_ALLOC(uv_w * 3, 1, fixed_t);
    444   int ok;
    445   int diff_sum = 0;
    446   const int first_diff_threshold = (int)(2.5 * w * h);
    447   const int min_improvement = 5;   // stop if improvement is below this %
    448   const int min_first_improvement = 80;
    449 
    450   if (best_y == NULL || best_uv == NULL ||
    451       target_y == NULL || target_uv == NULL ||
    452       best_rgb_y == NULL || best_rgb_uv == NULL ||
    453       tmp_buffer == NULL) {
    454     ok = WebPEncodingSetError(picture, VP8_ENC_ERROR_OUT_OF_MEMORY);
    455     goto End;
    456   }
    457   assert(picture->width >= kMinDimensionIterativeConversion);
    458   assert(picture->height >= kMinDimensionIterativeConversion);
    459 
    460   // Import RGB samples to W/RGB representation.
    461   for (j = 0; j < picture->height; j += 2) {
    462     const int is_last_row = (j == picture->height - 1);
    463     fixed_y_t* const src1 = tmp_buffer;
    464     fixed_y_t* const src2 = tmp_buffer + 3 * w;
    465     const int off1 = j * rgb_stride;
    466     const int off2 = off1 + rgb_stride;
    467     const int uv_off = (j >> 1) * 3 * uv_w;
    468     fixed_y_t* const dst_y = best_y + j * w;
    469 
    470     // prepare two rows of input
    471     ImportOneRow(r_ptr + off1, g_ptr + off1, b_ptr + off1,
    472                  step, picture->width, src1);
    473     if (!is_last_row) {
    474       ImportOneRow(r_ptr + off2, g_ptr + off2, b_ptr + off2,
    475                    step, picture->width, src2);
    476     } else {
    477       memcpy(src2, src1, 3 * w * sizeof(*src2));
    478     }
    479     UpdateW(src1, target_y + (j + 0) * w, w);
    480     UpdateW(src2, target_y + (j + 1) * w, w);
    481     diff_sum += UpdateChroma(src1, src2, target_uv + uv_off, dst_y, uv_w);
    482     memcpy(best_uv + uv_off, target_uv + uv_off, 3 * uv_w * sizeof(*best_uv));
    483     memcpy(dst_y + w, dst_y, w * sizeof(*dst_y));
    484   }
    485 
    486   // Iterate and resolve clipping conflicts.
    487   for (iter = 0; iter < kNumIterations; ++iter) {
    488     int k;
    489     const fixed_t* cur_uv = best_uv;
    490     const fixed_t* prev_uv = best_uv;
    491     const int old_diff_sum = diff_sum;
    492     diff_sum = 0;
    493     for (j = 0; j < h; j += 2) {
    494       fixed_y_t* const src1 = tmp_buffer;
    495       fixed_y_t* const src2 = tmp_buffer + 3 * w;
    496       {
    497         const fixed_t* const next_uv = cur_uv + ((j < h - 2) ? 3 * uv_w : 0);
    498         InterpolateTwoRows(best_y + j * w, prev_uv, cur_uv, next_uv,
    499                            w, src1, src2);
    500         prev_uv = cur_uv;
    501         cur_uv = next_uv;
    502       }
    503 
    504       UpdateW(src1, best_rgb_y + 0 * w, w);
    505       UpdateW(src2, best_rgb_y + 1 * w, w);
    506       diff_sum += UpdateChroma(src1, src2, best_rgb_uv, NULL, uv_w);
    507 
    508       // update two rows of Y and one row of RGB
    509       for (i = 0; i < 2 * w; ++i) {
    510         const int off = i + j * w;
    511         const int diff_y = target_y[off] - best_rgb_y[i];
    512         const int new_y = (int)best_y[off] + diff_y;
    513         best_y[off] = clip_y(new_y);
    514       }
    515       for (i = 0; i < uv_w; ++i) {
    516         const int off = 3 * (i + (j >> 1) * uv_w);
    517         int W;
    518         for (k = 0; k <= 2; ++k) {
    519           const int diff_uv = (int)target_uv[off + k] - best_rgb_uv[3 * i + k];
    520           best_uv[off + k] += diff_uv;
    521         }
    522         W = RGBToGray(best_uv[off + 0], best_uv[off + 1], best_uv[off + 2]);
    523         for (k = 0; k <= 2; ++k) {
    524           best_uv[off + k] -= W;
    525         }
    526       }
    527     }
    528     // test exit condition
    529     if (diff_sum > 0) {
    530       const int improvement = 100 * abs(diff_sum - old_diff_sum) / diff_sum;
    531       // Check if first iteration gave good result already, without a large
    532       // jump of improvement (otherwise it means we need to try few extra
    533       // iterations, just to be sure).
    534       if (iter == 0 && diff_sum < first_diff_threshold &&
    535           improvement < min_first_improvement) {
    536         break;
    537       }
    538       // then, check if improvement is stalling.
    539       if (improvement < min_improvement) {
    540         break;
    541       }
    542     } else {
    543       break;
    544     }
    545   }
    546 
    547   // final reconstruction
    548   ok = ConvertWRGBToYUV(best_y, best_uv, picture);
    549 
    550  End:
    551   WebPSafeFree(best_y);
    552   WebPSafeFree(best_uv);
    553   WebPSafeFree(target_y);
    554   WebPSafeFree(target_uv);
    555   WebPSafeFree(best_rgb_y);
    556   WebPSafeFree(best_rgb_uv);
    557   WebPSafeFree(tmp_buffer);
    558   return ok;
    559 }
    560 #undef SAFE_ALLOC
    561 
    562 //------------------------------------------------------------------------------
    563 // "Fast" regular RGB->YUV
    564 
    565 #define SUM4(ptr, step) LinearToGamma(                     \
    566     GammaToLinear((ptr)[0]) +                              \
    567     GammaToLinear((ptr)[(step)]) +                         \
    568     GammaToLinear((ptr)[rgb_stride]) +                     \
    569     GammaToLinear((ptr)[rgb_stride + (step)]), 0)          \
    570 
    571 #define SUM2(ptr) \
    572     LinearToGamma(GammaToLinear((ptr)[0]) + GammaToLinear((ptr)[rgb_stride]), 1)
    573 
    574 #define SUM2ALPHA(ptr) ((ptr)[0] + (ptr)[rgb_stride])
    575 #define SUM4ALPHA(ptr) (SUM2ALPHA(ptr) + SUM2ALPHA((ptr) + 4))
    576 
    577 #if defined(USE_INVERSE_ALPHA_TABLE)
    578 
    579 static const int kAlphaFix = 19;
    580 // Following table is (1 << kAlphaFix) / a. The (v * kInvAlpha[a]) >> kAlphaFix
    581 // formula is then equal to v / a in most (99.6%) cases. Note that this table
    582 // and constant are adjusted very tightly to fit 32b arithmetic.
    583 // In particular, they use the fact that the operands for 'v / a' are actually
    584 // derived as v = (a0.p0 + a1.p1 + a2.p2 + a3.p3) and a = a0 + a1 + a2 + a3
    585 // with ai in [0..255] and pi in [0..1<<kGammaFix). The constraint to avoid
    586 // overflow is: kGammaFix + kAlphaFix <= 31.
    587 static const uint32_t kInvAlpha[4 * 0xff + 1] = {
    588   0,  /* alpha = 0 */
    589   524288, 262144, 174762, 131072, 104857, 87381, 74898, 65536,
    590   58254, 52428, 47662, 43690, 40329, 37449, 34952, 32768,
    591   30840, 29127, 27594, 26214, 24966, 23831, 22795, 21845,
    592   20971, 20164, 19418, 18724, 18078, 17476, 16912, 16384,
    593   15887, 15420, 14979, 14563, 14169, 13797, 13443, 13107,
    594   12787, 12483, 12192, 11915, 11650, 11397, 11155, 10922,
    595   10699, 10485, 10280, 10082, 9892, 9709, 9532, 9362,
    596   9198, 9039, 8886, 8738, 8594, 8456, 8322, 8192,
    597   8065, 7943, 7825, 7710, 7598, 7489, 7384, 7281,
    598   7182, 7084, 6990, 6898, 6808, 6721, 6636, 6553,
    599   6472, 6393, 6316, 6241, 6168, 6096, 6026, 5957,
    600   5890, 5825, 5761, 5698, 5637, 5577, 5518, 5461,
    601   5405, 5349, 5295, 5242, 5190, 5140, 5090, 5041,
    602   4993, 4946, 4899, 4854, 4809, 4766, 4723, 4681,
    603   4639, 4599, 4559, 4519, 4481, 4443, 4405, 4369,
    604   4332, 4297, 4262, 4228, 4194, 4161, 4128, 4096,
    605   4064, 4032, 4002, 3971, 3942, 3912, 3883, 3855,
    606   3826, 3799, 3771, 3744, 3718, 3692, 3666, 3640,
    607   3615, 3591, 3566, 3542, 3518, 3495, 3472, 3449,
    608   3426, 3404, 3382, 3360, 3339, 3318, 3297, 3276,
    609   3256, 3236, 3216, 3196, 3177, 3158, 3139, 3120,
    610   3102, 3084, 3066, 3048, 3030, 3013, 2995, 2978,
    611   2962, 2945, 2928, 2912, 2896, 2880, 2864, 2849,
    612   2833, 2818, 2803, 2788, 2774, 2759, 2744, 2730,
    613   2716, 2702, 2688, 2674, 2661, 2647, 2634, 2621,
    614   2608, 2595, 2582, 2570, 2557, 2545, 2532, 2520,
    615   2508, 2496, 2484, 2473, 2461, 2449, 2438, 2427,
    616   2416, 2404, 2394, 2383, 2372, 2361, 2351, 2340,
    617   2330, 2319, 2309, 2299, 2289, 2279, 2269, 2259,
    618   2250, 2240, 2231, 2221, 2212, 2202, 2193, 2184,
    619   2175, 2166, 2157, 2148, 2139, 2131, 2122, 2114,
    620   2105, 2097, 2088, 2080, 2072, 2064, 2056, 2048,
    621   2040, 2032, 2024, 2016, 2008, 2001, 1993, 1985,
    622   1978, 1971, 1963, 1956, 1949, 1941, 1934, 1927,
    623   1920, 1913, 1906, 1899, 1892, 1885, 1879, 1872,
    624   1865, 1859, 1852, 1846, 1839, 1833, 1826, 1820,
    625   1814, 1807, 1801, 1795, 1789, 1783, 1777, 1771,
    626   1765, 1759, 1753, 1747, 1741, 1736, 1730, 1724,
    627   1718, 1713, 1707, 1702, 1696, 1691, 1685, 1680,
    628   1675, 1669, 1664, 1659, 1653, 1648, 1643, 1638,
    629   1633, 1628, 1623, 1618, 1613, 1608, 1603, 1598,
    630   1593, 1588, 1583, 1579, 1574, 1569, 1565, 1560,
    631   1555, 1551, 1546, 1542, 1537, 1533, 1528, 1524,
    632   1519, 1515, 1510, 1506, 1502, 1497, 1493, 1489,
    633   1485, 1481, 1476, 1472, 1468, 1464, 1460, 1456,
    634   1452, 1448, 1444, 1440, 1436, 1432, 1428, 1424,
    635   1420, 1416, 1413, 1409, 1405, 1401, 1398, 1394,
    636   1390, 1387, 1383, 1379, 1376, 1372, 1368, 1365,
    637   1361, 1358, 1354, 1351, 1347, 1344, 1340, 1337,
    638   1334, 1330, 1327, 1323, 1320, 1317, 1314, 1310,
    639   1307, 1304, 1300, 1297, 1294, 1291, 1288, 1285,
    640   1281, 1278, 1275, 1272, 1269, 1266, 1263, 1260,
    641   1257, 1254, 1251, 1248, 1245, 1242, 1239, 1236,
    642   1233, 1230, 1227, 1224, 1222, 1219, 1216, 1213,
    643   1210, 1208, 1205, 1202, 1199, 1197, 1194, 1191,
    644   1188, 1186, 1183, 1180, 1178, 1175, 1172, 1170,
    645   1167, 1165, 1162, 1159, 1157, 1154, 1152, 1149,
    646   1147, 1144, 1142, 1139, 1137, 1134, 1132, 1129,
    647   1127, 1125, 1122, 1120, 1117, 1115, 1113, 1110,
    648   1108, 1106, 1103, 1101, 1099, 1096, 1094, 1092,
    649   1089, 1087, 1085, 1083, 1081, 1078, 1076, 1074,
    650   1072, 1069, 1067, 1065, 1063, 1061, 1059, 1057,
    651   1054, 1052, 1050, 1048, 1046, 1044, 1042, 1040,
    652   1038, 1036, 1034, 1032, 1030, 1028, 1026, 1024,
    653   1022, 1020, 1018, 1016, 1014, 1012, 1010, 1008,
    654   1006, 1004, 1002, 1000, 998, 996, 994, 992,
    655   991, 989, 987, 985, 983, 981, 979, 978,
    656   976, 974, 972, 970, 969, 967, 965, 963,
    657   961, 960, 958, 956, 954, 953, 951, 949,
    658   948, 946, 944, 942, 941, 939, 937, 936,
    659   934, 932, 931, 929, 927, 926, 924, 923,
    660   921, 919, 918, 916, 914, 913, 911, 910,
    661   908, 907, 905, 903, 902, 900, 899, 897,
    662   896, 894, 893, 891, 890, 888, 887, 885,
    663   884, 882, 881, 879, 878, 876, 875, 873,
    664   872, 870, 869, 868, 866, 865, 863, 862,
    665   860, 859, 858, 856, 855, 853, 852, 851,
    666   849, 848, 846, 845, 844, 842, 841, 840,
    667   838, 837, 836, 834, 833, 832, 830, 829,
    668   828, 826, 825, 824, 823, 821, 820, 819,
    669   817, 816, 815, 814, 812, 811, 810, 809,
    670   807, 806, 805, 804, 802, 801, 800, 799,
    671   798, 796, 795, 794, 793, 791, 790, 789,
    672   788, 787, 786, 784, 783, 782, 781, 780,
    673   779, 777, 776, 775, 774, 773, 772, 771,
    674   769, 768, 767, 766, 765, 764, 763, 762,
    675   760, 759, 758, 757, 756, 755, 754, 753,
    676   752, 751, 750, 748, 747, 746, 745, 744,
    677   743, 742, 741, 740, 739, 738, 737, 736,
    678   735, 734, 733, 732, 731, 730, 729, 728,
    679   727, 726, 725, 724, 723, 722, 721, 720,
    680   719, 718, 717, 716, 715, 714, 713, 712,
    681   711, 710, 709, 708, 707, 706, 705, 704,
    682   703, 702, 701, 700, 699, 699, 698, 697,
    683   696, 695, 694, 693, 692, 691, 690, 689,
    684   688, 688, 687, 686, 685, 684, 683, 682,
    685   681, 680, 680, 679, 678, 677, 676, 675,
    686   674, 673, 673, 672, 671, 670, 669, 668,
    687   667, 667, 666, 665, 664, 663, 662, 661,
    688   661, 660, 659, 658, 657, 657, 656, 655,
    689   654, 653, 652, 652, 651, 650, 649, 648,
    690   648, 647, 646, 645, 644, 644, 643, 642,
    691   641, 640, 640, 639, 638, 637, 637, 636,
    692   635, 634, 633, 633, 632, 631, 630, 630,
    693   629, 628, 627, 627, 626, 625, 624, 624,
    694   623, 622, 621, 621, 620, 619, 618, 618,
    695   617, 616, 616, 615, 614, 613, 613, 612,
    696   611, 611, 610, 609, 608, 608, 607, 606,
    697   606, 605, 604, 604, 603, 602, 601, 601,
    698   600, 599, 599, 598, 597, 597, 596, 595,
    699   595, 594, 593, 593, 592, 591, 591, 590,
    700   589, 589, 588, 587, 587, 586, 585, 585,
    701   584, 583, 583, 582, 581, 581, 580, 579,
    702   579, 578, 578, 577, 576, 576, 575, 574,
    703   574, 573, 572, 572, 571, 571, 570, 569,
    704   569, 568, 568, 567, 566, 566, 565, 564,
    705   564, 563, 563, 562, 561, 561, 560, 560,
    706   559, 558, 558, 557, 557, 556, 555, 555,
    707   554, 554, 553, 553, 552, 551, 551, 550,
    708   550, 549, 548, 548, 547, 547, 546, 546,
    709   545, 544, 544, 543, 543, 542, 542, 541,
    710   541, 540, 539, 539, 538, 538, 537, 537,
    711   536, 536, 535, 534, 534, 533, 533, 532,
    712   532, 531, 531, 530, 530, 529, 529, 528,
    713   527, 527, 526, 526, 525, 525, 524, 524,
    714   523, 523, 522, 522, 521, 521, 520, 520,
    715   519, 519, 518, 518, 517, 517, 516, 516,
    716   515, 515, 514, 514
    717 };
    718 
    719 // Note that LinearToGamma() expects the values to be premultiplied by 4,
    720 // so we incorporate this factor 4 inside the DIVIDE_BY_ALPHA macro directly.
    721 #define DIVIDE_BY_ALPHA(sum, a)  (((sum) * kInvAlpha[(a)]) >> (kAlphaFix - 2))
    722 
    723 #else
    724 
    725 #define DIVIDE_BY_ALPHA(sum, a) (4 * (sum) / (a))
    726 
    727 #endif  // USE_INVERSE_ALPHA_TABLE
    728 
    729 static WEBP_INLINE int LinearToGammaWeighted(const uint8_t* src,
    730                                              const uint8_t* a_ptr,
    731                                              uint32_t total_a, int step,
    732                                              int rgb_stride) {
    733   const uint32_t sum =
    734       a_ptr[0] * GammaToLinear(src[0]) +
    735       a_ptr[step] * GammaToLinear(src[step]) +
    736       a_ptr[rgb_stride] * GammaToLinear(src[rgb_stride]) +
    737       a_ptr[rgb_stride + step] * GammaToLinear(src[rgb_stride + step]);
    738   assert(total_a > 0 && total_a <= 4 * 0xff);
    739 #if defined(USE_INVERSE_ALPHA_TABLE)
    740   assert((uint64_t)sum * kInvAlpha[total_a] < ((uint64_t)1 << 32));
    741 #endif
    742   return LinearToGamma(DIVIDE_BY_ALPHA(sum, total_a), 0);
    743 }
    744 
    745 static WEBP_INLINE void ConvertRowToY(const uint8_t* const r_ptr,
    746                                       const uint8_t* const g_ptr,
    747                                       const uint8_t* const b_ptr,
    748                                       int step,
    749                                       uint8_t* const dst_y,
    750                                       int width,
    751                                       VP8Random* const rg) {
    752   int i, j;
    753   for (i = 0, j = 0; i < width; i += 1, j += step) {
    754     dst_y[i] = RGBToY(r_ptr[j], g_ptr[j], b_ptr[j], rg);
    755   }
    756 }
    757 
    758 static WEBP_INLINE void AccumulateRGBA(const uint8_t* const r_ptr,
    759                                        const uint8_t* const g_ptr,
    760                                        const uint8_t* const b_ptr,
    761                                        const uint8_t* const a_ptr,
    762                                        int rgb_stride,
    763                                        uint16_t* dst, int width) {
    764   int i, j;
    765   // we loop over 2x2 blocks and produce one R/G/B/A value for each.
    766   for (i = 0, j = 0; i < (width >> 1); i += 1, j += 2 * 4, dst += 4) {
    767     const uint32_t a = SUM4ALPHA(a_ptr + j);
    768     int r, g, b;
    769     if (a == 4 * 0xff || a == 0) {
    770       r = SUM4(r_ptr + j, 4);
    771       g = SUM4(g_ptr + j, 4);
    772       b = SUM4(b_ptr + j, 4);
    773     } else {
    774       r = LinearToGammaWeighted(r_ptr + j, a_ptr + j, a, 4, rgb_stride);
    775       g = LinearToGammaWeighted(g_ptr + j, a_ptr + j, a, 4, rgb_stride);
    776       b = LinearToGammaWeighted(b_ptr + j, a_ptr + j, a, 4, rgb_stride);
    777     }
    778     dst[0] = r;
    779     dst[1] = g;
    780     dst[2] = b;
    781     dst[3] = a;
    782   }
    783   if (width & 1) {
    784     const uint32_t a = 2u * SUM2ALPHA(a_ptr + j);
    785     int r, g, b;
    786     if (a == 4 * 0xff || a == 0) {
    787       r = SUM2(r_ptr + j);
    788       g = SUM2(g_ptr + j);
    789       b = SUM2(b_ptr + j);
    790     } else {
    791       r = LinearToGammaWeighted(r_ptr + j, a_ptr + j, a, 0, rgb_stride);
    792       g = LinearToGammaWeighted(g_ptr + j, a_ptr + j, a, 0, rgb_stride);
    793       b = LinearToGammaWeighted(b_ptr + j, a_ptr + j, a, 0, rgb_stride);
    794     }
    795     dst[0] = r;
    796     dst[1] = g;
    797     dst[2] = b;
    798     dst[3] = a;
    799   }
    800 }
    801 
    802 static WEBP_INLINE void AccumulateRGB(const uint8_t* const r_ptr,
    803                                       const uint8_t* const g_ptr,
    804                                       const uint8_t* const b_ptr,
    805                                       int step, int rgb_stride,
    806                                       uint16_t* dst, int width) {
    807   int i, j;
    808   for (i = 0, j = 0; i < (width >> 1); i += 1, j += 2 * step, dst += 4) {
    809     dst[0] = SUM4(r_ptr + j, step);
    810     dst[1] = SUM4(g_ptr + j, step);
    811     dst[2] = SUM4(b_ptr + j, step);
    812   }
    813   if (width & 1) {
    814     dst[0] = SUM2(r_ptr + j);
    815     dst[1] = SUM2(g_ptr + j);
    816     dst[2] = SUM2(b_ptr + j);
    817   }
    818 }
    819 
    820 static WEBP_INLINE void ConvertRowsToUV(const uint16_t* rgb,
    821                                         uint8_t* const dst_u,
    822                                         uint8_t* const dst_v,
    823                                         int width,
    824                                         VP8Random* const rg) {
    825   int i;
    826   for (i = 0; i < width; i += 1, rgb += 4) {
    827     const int r = rgb[0], g = rgb[1], b = rgb[2];
    828     dst_u[i] = RGBToU(r, g, b, rg);
    829     dst_v[i] = RGBToV(r, g, b, rg);
    830   }
    831 }
    832 
    833 static int ImportYUVAFromRGBA(const uint8_t* const r_ptr,
    834                               const uint8_t* const g_ptr,
    835                               const uint8_t* const b_ptr,
    836                               const uint8_t* const a_ptr,
    837                               int step,         // bytes per pixel
    838                               int rgb_stride,   // bytes per scanline
    839                               float dithering,
    840                               int use_iterative_conversion,
    841                               WebPPicture* const picture) {
    842   int y;
    843   const int width = picture->width;
    844   const int height = picture->height;
    845   const int has_alpha = CheckNonOpaque(a_ptr, width, height, step, rgb_stride);
    846   const int is_rgb = (r_ptr < b_ptr);  // otherwise it's bgr
    847 
    848   picture->colorspace = has_alpha ? WEBP_YUV420A : WEBP_YUV420;
    849   picture->use_argb = 0;
    850 
    851   // disable smart conversion if source is too small (overkill).
    852   if (width < kMinDimensionIterativeConversion ||
    853       height < kMinDimensionIterativeConversion) {
    854     use_iterative_conversion = 0;
    855   }
    856 
    857   if (!WebPPictureAllocYUVA(picture, width, height)) {
    858     return 0;
    859   }
    860   if (has_alpha) {
    861     WebPInitAlphaProcessing();
    862     assert(step == 4);
    863 #if defined(USE_GAMMA_COMPRESSION) && defined(USE_INVERSE_ALPHA_TABLE)
    864     assert(kAlphaFix + kGammaFix <= 31);
    865 #endif
    866   }
    867 
    868   if (use_iterative_conversion) {
    869     InitGammaTablesF();
    870     if (!PreprocessARGB(r_ptr, g_ptr, b_ptr, step, rgb_stride, picture)) {
    871       return 0;
    872     }
    873     if (has_alpha) {
    874       WebPExtractAlpha(a_ptr, rgb_stride, width, height,
    875                        picture->a, picture->a_stride);
    876     }
    877   } else {
    878     const int uv_width = (width + 1) >> 1;
    879     int use_dsp = (step == 3);  // use special function in this case
    880     // temporary storage for accumulated R/G/B values during conversion to U/V
    881     uint16_t* const tmp_rgb =
    882         (uint16_t*)WebPSafeMalloc(4 * uv_width, sizeof(*tmp_rgb));
    883     uint8_t* dst_y = picture->y;
    884     uint8_t* dst_u = picture->u;
    885     uint8_t* dst_v = picture->v;
    886     uint8_t* dst_a = picture->a;
    887 
    888     VP8Random base_rg;
    889     VP8Random* rg = NULL;
    890     if (dithering > 0.) {
    891       VP8InitRandom(&base_rg, dithering);
    892       rg = &base_rg;
    893       use_dsp = 0;   // can't use dsp in this case
    894     }
    895     WebPInitConvertARGBToYUV();
    896     InitGammaTables();
    897 
    898     if (tmp_rgb == NULL) return 0;  // malloc error
    899 
    900     // Downsample Y/U/V planes, two rows at a time
    901     for (y = 0; y < (height >> 1); ++y) {
    902       int rows_have_alpha = has_alpha;
    903       const int off1 = (2 * y + 0) * rgb_stride;
    904       const int off2 = (2 * y + 1) * rgb_stride;
    905       if (use_dsp) {
    906         if (is_rgb) {
    907           WebPConvertRGB24ToY(r_ptr + off1, dst_y, width);
    908           WebPConvertRGB24ToY(r_ptr + off2, dst_y + picture->y_stride, width);
    909         } else {
    910           WebPConvertBGR24ToY(b_ptr + off1, dst_y, width);
    911           WebPConvertBGR24ToY(b_ptr + off2, dst_y + picture->y_stride, width);
    912         }
    913       } else {
    914         ConvertRowToY(r_ptr + off1, g_ptr + off1, b_ptr + off1, step,
    915                       dst_y, width, rg);
    916         ConvertRowToY(r_ptr + off2, g_ptr + off2, b_ptr + off2, step,
    917                       dst_y + picture->y_stride, width, rg);
    918       }
    919       dst_y += 2 * picture->y_stride;
    920       if (has_alpha) {
    921         rows_have_alpha &= !WebPExtractAlpha(a_ptr + off1, rgb_stride,
    922                                              width, 2,
    923                                              dst_a, picture->a_stride);
    924         dst_a += 2 * picture->a_stride;
    925       }
    926       // Collect averaged R/G/B(/A)
    927       if (!rows_have_alpha) {
    928         AccumulateRGB(r_ptr + off1, g_ptr + off1, b_ptr + off1,
    929                       step, rgb_stride, tmp_rgb, width);
    930       } else {
    931         AccumulateRGBA(r_ptr + off1, g_ptr + off1, b_ptr + off1, a_ptr + off1,
    932                        rgb_stride, tmp_rgb, width);
    933       }
    934       // Convert to U/V
    935       if (rg == NULL) {
    936         WebPConvertRGBA32ToUV(tmp_rgb, dst_u, dst_v, uv_width);
    937       } else {
    938         ConvertRowsToUV(tmp_rgb, dst_u, dst_v, uv_width, rg);
    939       }
    940       dst_u += picture->uv_stride;
    941       dst_v += picture->uv_stride;
    942     }
    943     if (height & 1) {    // extra last row
    944       const int off = 2 * y * rgb_stride;
    945       int row_has_alpha = has_alpha;
    946       if (use_dsp) {
    947         if (r_ptr < b_ptr) {
    948           WebPConvertRGB24ToY(r_ptr + off, dst_y, width);
    949         } else {
    950           WebPConvertBGR24ToY(b_ptr + off, dst_y, width);
    951         }
    952       } else {
    953         ConvertRowToY(r_ptr + off, g_ptr + off, b_ptr + off, step,
    954                       dst_y, width, rg);
    955       }
    956       if (row_has_alpha) {
    957         row_has_alpha &= !WebPExtractAlpha(a_ptr + off, 0, width, 1, dst_a, 0);
    958       }
    959       // Collect averaged R/G/B(/A)
    960       if (!row_has_alpha) {
    961         // Collect averaged R/G/B
    962         AccumulateRGB(r_ptr + off, g_ptr + off, b_ptr + off,
    963                       step, /* rgb_stride = */ 0, tmp_rgb, width);
    964       } else {
    965         AccumulateRGBA(r_ptr + off, g_ptr + off, b_ptr + off, a_ptr + off,
    966                        /* rgb_stride = */ 0, tmp_rgb, width);
    967       }
    968       if (rg == NULL) {
    969         WebPConvertRGBA32ToUV(tmp_rgb, dst_u, dst_v, uv_width);
    970       } else {
    971         ConvertRowsToUV(tmp_rgb, dst_u, dst_v, uv_width, rg);
    972       }
    973     }
    974     WebPSafeFree(tmp_rgb);
    975   }
    976   return 1;
    977 }
    978 
    979 #undef SUM4
    980 #undef SUM2
    981 #undef SUM4ALPHA
    982 #undef SUM2ALPHA
    983 
    984 //------------------------------------------------------------------------------
    985 // call for ARGB->YUVA conversion
    986 
    987 static int PictureARGBToYUVA(WebPPicture* picture, WebPEncCSP colorspace,
    988                              float dithering, int use_iterative_conversion) {
    989   if (picture == NULL) return 0;
    990   if (picture->argb == NULL) {
    991     return WebPEncodingSetError(picture, VP8_ENC_ERROR_NULL_PARAMETER);
    992   } else if ((colorspace & WEBP_CSP_UV_MASK) != WEBP_YUV420) {
    993     return WebPEncodingSetError(picture, VP8_ENC_ERROR_INVALID_CONFIGURATION);
    994   } else {
    995     const uint8_t* const argb = (const uint8_t*)picture->argb;
    996     const uint8_t* const r = ALPHA_IS_LAST ? argb + 2 : argb + 1;
    997     const uint8_t* const g = ALPHA_IS_LAST ? argb + 1 : argb + 2;
    998     const uint8_t* const b = ALPHA_IS_LAST ? argb + 0 : argb + 3;
    999     const uint8_t* const a = ALPHA_IS_LAST ? argb + 3 : argb + 0;
   1000 
   1001     picture->colorspace = WEBP_YUV420;
   1002     return ImportYUVAFromRGBA(r, g, b, a, 4, 4 * picture->argb_stride,
   1003                               dithering, use_iterative_conversion, picture);
   1004   }
   1005 }
   1006 
   1007 int WebPPictureARGBToYUVADithered(WebPPicture* picture, WebPEncCSP colorspace,
   1008                                   float dithering) {
   1009   return PictureARGBToYUVA(picture, colorspace, dithering, 0);
   1010 }
   1011 
   1012 int WebPPictureARGBToYUVA(WebPPicture* picture, WebPEncCSP colorspace) {
   1013   return PictureARGBToYUVA(picture, colorspace, 0.f, 0);
   1014 }
   1015 
   1016 int WebPPictureSmartARGBToYUVA(WebPPicture* picture) {
   1017   return PictureARGBToYUVA(picture, WEBP_YUV420, 0.f, 1);
   1018 }
   1019 
   1020 //------------------------------------------------------------------------------
   1021 // call for YUVA -> ARGB conversion
   1022 
   1023 int WebPPictureYUVAToARGB(WebPPicture* picture) {
   1024   if (picture == NULL) return 0;
   1025   if (picture->y == NULL || picture->u == NULL || picture->v == NULL) {
   1026     return WebPEncodingSetError(picture, VP8_ENC_ERROR_NULL_PARAMETER);
   1027   }
   1028   if ((picture->colorspace & WEBP_CSP_ALPHA_BIT) && picture->a == NULL) {
   1029     return WebPEncodingSetError(picture, VP8_ENC_ERROR_NULL_PARAMETER);
   1030   }
   1031   if ((picture->colorspace & WEBP_CSP_UV_MASK) != WEBP_YUV420) {
   1032     return WebPEncodingSetError(picture, VP8_ENC_ERROR_INVALID_CONFIGURATION);
   1033   }
   1034   // Allocate a new argb buffer (discarding the previous one).
   1035   if (!WebPPictureAllocARGB(picture, picture->width, picture->height)) return 0;
   1036   picture->use_argb = 1;
   1037 
   1038   // Convert
   1039   {
   1040     int y;
   1041     const int width = picture->width;
   1042     const int height = picture->height;
   1043     const int argb_stride = 4 * picture->argb_stride;
   1044     uint8_t* dst = (uint8_t*)picture->argb;
   1045     const uint8_t *cur_u = picture->u, *cur_v = picture->v, *cur_y = picture->y;
   1046     WebPUpsampleLinePairFunc upsample = WebPGetLinePairConverter(ALPHA_IS_LAST);
   1047 
   1048     // First row, with replicated top samples.
   1049     upsample(cur_y, NULL, cur_u, cur_v, cur_u, cur_v, dst, NULL, width);
   1050     cur_y += picture->y_stride;
   1051     dst += argb_stride;
   1052     // Center rows.
   1053     for (y = 1; y + 1 < height; y += 2) {
   1054       const uint8_t* const top_u = cur_u;
   1055       const uint8_t* const top_v = cur_v;
   1056       cur_u += picture->uv_stride;
   1057       cur_v += picture->uv_stride;
   1058       upsample(cur_y, cur_y + picture->y_stride, top_u, top_v, cur_u, cur_v,
   1059                dst, dst + argb_stride, width);
   1060       cur_y += 2 * picture->y_stride;
   1061       dst += 2 * argb_stride;
   1062     }
   1063     // Last row (if needed), with replicated bottom samples.
   1064     if (height > 1 && !(height & 1)) {
   1065       upsample(cur_y, NULL, cur_u, cur_v, cur_u, cur_v, dst, NULL, width);
   1066     }
   1067     // Insert alpha values if needed, in replacement for the default 0xff ones.
   1068     if (picture->colorspace & WEBP_CSP_ALPHA_BIT) {
   1069       for (y = 0; y < height; ++y) {
   1070         uint32_t* const argb_dst = picture->argb + y * picture->argb_stride;
   1071         const uint8_t* const src = picture->a + y * picture->a_stride;
   1072         int x;
   1073         for (x = 0; x < width; ++x) {
   1074           argb_dst[x] = (argb_dst[x] & 0x00ffffffu) | ((uint32_t)src[x] << 24);
   1075         }
   1076       }
   1077     }
   1078   }
   1079   return 1;
   1080 }
   1081 
   1082 //------------------------------------------------------------------------------
   1083 // automatic import / conversion
   1084 
   1085 static int Import(WebPPicture* const picture,
   1086                   const uint8_t* const rgb, int rgb_stride,
   1087                   int step, int swap_rb, int import_alpha) {
   1088   int y;
   1089   const uint8_t* const r_ptr = rgb + (swap_rb ? 2 : 0);
   1090   const uint8_t* const g_ptr = rgb + 1;
   1091   const uint8_t* const b_ptr = rgb + (swap_rb ? 0 : 2);
   1092   const uint8_t* const a_ptr = import_alpha ? rgb + 3 : NULL;
   1093   const int width = picture->width;
   1094   const int height = picture->height;
   1095 
   1096   if (!picture->use_argb) {
   1097     return ImportYUVAFromRGBA(r_ptr, g_ptr, b_ptr, a_ptr, step, rgb_stride,
   1098                               0.f /* no dithering */, 0, picture);
   1099   }
   1100   if (!WebPPictureAlloc(picture)) return 0;
   1101 
   1102   VP8EncDspARGBInit();
   1103 
   1104   if (import_alpha) {
   1105     assert(step == 4);
   1106     for (y = 0; y < height; ++y) {
   1107       uint32_t* const dst = &picture->argb[y * picture->argb_stride];
   1108       const int offset = y * rgb_stride;
   1109       VP8PackARGB(a_ptr + offset, r_ptr + offset, g_ptr + offset,
   1110                   b_ptr + offset, width, dst);
   1111     }
   1112   } else {
   1113     assert(step >= 3);
   1114     for (y = 0; y < height; ++y) {
   1115       uint32_t* const dst = &picture->argb[y * picture->argb_stride];
   1116       const int offset = y * rgb_stride;
   1117       VP8PackRGB(r_ptr + offset, g_ptr + offset, b_ptr + offset,
   1118                  width, step, dst);
   1119     }
   1120   }
   1121   return 1;
   1122 }
   1123 
   1124 // Public API
   1125 
   1126 int WebPPictureImportRGB(WebPPicture* picture,
   1127                          const uint8_t* rgb, int rgb_stride) {
   1128   return (picture != NULL) ? Import(picture, rgb, rgb_stride, 3, 0, 0) : 0;
   1129 }
   1130 
   1131 int WebPPictureImportBGR(WebPPicture* picture,
   1132                          const uint8_t* rgb, int rgb_stride) {
   1133   return (picture != NULL) ? Import(picture, rgb, rgb_stride, 3, 1, 0) : 0;
   1134 }
   1135 
   1136 int WebPPictureImportRGBA(WebPPicture* picture,
   1137                           const uint8_t* rgba, int rgba_stride) {
   1138   return (picture != NULL) ? Import(picture, rgba, rgba_stride, 4, 0, 1) : 0;
   1139 }
   1140 
   1141 int WebPPictureImportBGRA(WebPPicture* picture,
   1142                           const uint8_t* rgba, int rgba_stride) {
   1143   return (picture != NULL) ? Import(picture, rgba, rgba_stride, 4, 1, 1) : 0;
   1144 }
   1145 
   1146 int WebPPictureImportRGBX(WebPPicture* picture,
   1147                           const uint8_t* rgba, int rgba_stride) {
   1148   return (picture != NULL) ? Import(picture, rgba, rgba_stride, 4, 0, 0) : 0;
   1149 }
   1150 
   1151 int WebPPictureImportBGRX(WebPPicture* picture,
   1152                           const uint8_t* rgba, int rgba_stride) {
   1153   return (picture != NULL) ? Import(picture, rgba, rgba_stride, 4, 1, 0) : 0;
   1154 }
   1155 
   1156 //------------------------------------------------------------------------------
   1157