Home | History | Annotate | Download | only in enc
      1 // Copyright 2014 Google Inc. All Rights Reserved.
      2 //
      3 // Use of this source code is governed by a BSD-style license
      4 // that can be found in the COPYING file in the root of the source
      5 // tree. An additional intellectual property rights grant can be found
      6 // in the file PATENTS. All contributing project authors may
      7 // be found in the AUTHORS file in the root of the source tree.
      8 // -----------------------------------------------------------------------------
      9 //
     10 // WebPPicture utils for colorspace conversion
     11 //
     12 // Author: Skal (pascal.massimino (at) gmail.com)
     13 
     14 #include <assert.h>
     15 #include <stdlib.h>
     16 #include <math.h>
     17 
     18 #include "src/enc/vp8i_enc.h"
     19 #include "src/utils/random_utils.h"
     20 #include "src/utils/utils.h"
     21 #include "src/dsp/dsp.h"
     22 #include "src/dsp/lossless.h"
     23 #include "src/dsp/yuv.h"
     24 
     25 // Uncomment to disable gamma-compression during RGB->U/V averaging
     26 #define USE_GAMMA_COMPRESSION
     27 
     28 // If defined, use table to compute x / alpha.
     29 #define USE_INVERSE_ALPHA_TABLE
     30 
     31 static const union {
     32   uint32_t argb;
     33   uint8_t  bytes[4];
     34 } test_endian = { 0xff000000u };
     35 #define ALPHA_IS_LAST (test_endian.bytes[3] == 0xff)
     36 
     37 //------------------------------------------------------------------------------
     38 // Detection of non-trivial transparency
     39 
     40 // Returns true if alpha[] has non-0xff values.
     41 static int CheckNonOpaque(const uint8_t* alpha, int width, int height,
     42                           int x_step, int y_step) {
     43   if (alpha == NULL) return 0;
     44   WebPInitAlphaProcessing();
     45   if (x_step == 1) {
     46     for (; height-- > 0; alpha += y_step) {
     47       if (WebPHasAlpha8b(alpha, width)) return 1;
     48     }
     49   } else {
     50     for (; height-- > 0; alpha += y_step) {
     51       if (WebPHasAlpha32b(alpha, width)) return 1;
     52     }
     53   }
     54   return 0;
     55 }
     56 
     57 // Checking for the presence of non-opaque alpha.
     58 int WebPPictureHasTransparency(const WebPPicture* picture) {
     59   if (picture == NULL) return 0;
     60   if (!picture->use_argb) {
     61     return CheckNonOpaque(picture->a, picture->width, picture->height,
     62                           1, picture->a_stride);
     63   } else {
     64     const int alpha_offset = ALPHA_IS_LAST ? 3 : 0;
     65     return CheckNonOpaque((const uint8_t*)picture->argb + alpha_offset,
     66                           picture->width, picture->height,
     67                           4, picture->argb_stride * sizeof(*picture->argb));
     68   }
     69   return 0;
     70 }
     71 
     72 //------------------------------------------------------------------------------
     73 // Code for gamma correction
     74 
     75 #if defined(USE_GAMMA_COMPRESSION)
     76 
     77 // gamma-compensates loss of resolution during chroma subsampling
     78 #define kGamma 0.80      // for now we use a different gamma value than kGammaF
     79 #define kGammaFix 12     // fixed-point precision for linear values
     80 #define kGammaScale ((1 << kGammaFix) - 1)
     81 #define kGammaTabFix 7   // fixed-point fractional bits precision
     82 #define kGammaTabScale (1 << kGammaTabFix)
     83 #define kGammaTabRounder (kGammaTabScale >> 1)
     84 #define kGammaTabSize (1 << (kGammaFix - kGammaTabFix))
     85 
     86 static int kLinearToGammaTab[kGammaTabSize + 1];
     87 static uint16_t kGammaToLinearTab[256];
     88 static volatile int kGammaTablesOk = 0;
     89 
     90 static WEBP_TSAN_IGNORE_FUNCTION void InitGammaTables(void) {
     91   if (!kGammaTablesOk) {
     92     int v;
     93     const double scale = (double)(1 << kGammaTabFix) / kGammaScale;
     94     const double norm = 1. / 255.;
     95     for (v = 0; v <= 255; ++v) {
     96       kGammaToLinearTab[v] =
     97           (uint16_t)(pow(norm * v, kGamma) * kGammaScale + .5);
     98     }
     99     for (v = 0; v <= kGammaTabSize; ++v) {
    100       kLinearToGammaTab[v] = (int)(255. * pow(scale * v, 1. / kGamma) + .5);
    101     }
    102     kGammaTablesOk = 1;
    103   }
    104 }
    105 
    106 static WEBP_INLINE uint32_t GammaToLinear(uint8_t v) {
    107   return kGammaToLinearTab[v];
    108 }
    109 
    110 static WEBP_INLINE int Interpolate(int v) {
    111   const int tab_pos = v >> (kGammaTabFix + 2);    // integer part
    112   const int x = v & ((kGammaTabScale << 2) - 1);  // fractional part
    113   const int v0 = kLinearToGammaTab[tab_pos];
    114   const int v1 = kLinearToGammaTab[tab_pos + 1];
    115   const int y = v1 * x + v0 * ((kGammaTabScale << 2) - x);   // interpolate
    116   assert(tab_pos + 1 < kGammaTabSize + 1);
    117   return y;
    118 }
    119 
    120 // Convert a linear value 'v' to YUV_FIX+2 fixed-point precision
    121 // U/V value, suitable for RGBToU/V calls.
    122 static WEBP_INLINE int LinearToGamma(uint32_t base_value, int shift) {
    123   const int y = Interpolate(base_value << shift);   // final uplifted value
    124   return (y + kGammaTabRounder) >> kGammaTabFix;    // descale
    125 }
    126 
    127 #else
    128 
    129 static WEBP_TSAN_IGNORE_FUNCTION void InitGammaTables(void) {}
    130 static WEBP_INLINE uint32_t GammaToLinear(uint8_t v) { return v; }
    131 static WEBP_INLINE int LinearToGamma(uint32_t base_value, int shift) {
    132   return (int)(base_value << shift);
    133 }
    134 
    135 #endif    // USE_GAMMA_COMPRESSION
    136 
    137 //------------------------------------------------------------------------------
    138 // RGB -> YUV conversion
    139 
    140 static int RGBToY(int r, int g, int b, VP8Random* const rg) {
    141   return (rg == NULL) ? VP8RGBToY(r, g, b, YUV_HALF)
    142                       : VP8RGBToY(r, g, b, VP8RandomBits(rg, YUV_FIX));
    143 }
    144 
    145 static int RGBToU(int r, int g, int b, VP8Random* const rg) {
    146   return (rg == NULL) ? VP8RGBToU(r, g, b, YUV_HALF << 2)
    147                       : VP8RGBToU(r, g, b, VP8RandomBits(rg, YUV_FIX + 2));
    148 }
    149 
    150 static int RGBToV(int r, int g, int b, VP8Random* const rg) {
    151   return (rg == NULL) ? VP8RGBToV(r, g, b, YUV_HALF << 2)
    152                       : VP8RGBToV(r, g, b, VP8RandomBits(rg, YUV_FIX + 2));
    153 }
    154 
    155 //------------------------------------------------------------------------------
    156 // Sharp RGB->YUV conversion
    157 
    158 static const int kNumIterations = 4;
    159 static const int kMinDimensionIterativeConversion = 4;
    160 
    161 // We could use SFIX=0 and only uint8_t for fixed_y_t, but it produces some
    162 // banding sometimes. Better use extra precision.
    163 #define SFIX 2                // fixed-point precision of RGB and Y/W
    164 typedef int16_t fixed_t;      // signed type with extra SFIX precision for UV
    165 typedef uint16_t fixed_y_t;   // unsigned type with extra SFIX precision for W
    166 
    167 #define SHALF (1 << SFIX >> 1)
    168 #define MAX_Y_T ((256 << SFIX) - 1)
    169 #define SROUNDER (1 << (YUV_FIX + SFIX - 1))
    170 
    171 #if defined(USE_GAMMA_COMPRESSION)
    172 
    173 // float variant of gamma-correction
    174 // We use tables of different size and precision for the Rec709 / BT2020
    175 // transfer function.
    176 #define kGammaF (1./0.45)
    177 static float kGammaToLinearTabF[MAX_Y_T + 1];   // size scales with Y_FIX
    178 static float kLinearToGammaTabF[kGammaTabSize + 2];
    179 static volatile int kGammaTablesFOk = 0;
    180 
    181 static WEBP_TSAN_IGNORE_FUNCTION void InitGammaTablesF(void) {
    182   if (!kGammaTablesFOk) {
    183     int v;
    184     const double norm = 1. / MAX_Y_T;
    185     const double scale = 1. / kGammaTabSize;
    186     const double a = 0.09929682680944;
    187     const double thresh = 0.018053968510807;
    188     for (v = 0; v <= MAX_Y_T; ++v) {
    189       const double g = norm * v;
    190       if (g <= thresh * 4.5) {
    191         kGammaToLinearTabF[v] = (float)(g / 4.5);
    192       } else {
    193         const double a_rec = 1. / (1. + a);
    194         kGammaToLinearTabF[v] = (float)pow(a_rec * (g + a), kGammaF);
    195       }
    196     }
    197     for (v = 0; v <= kGammaTabSize; ++v) {
    198       const double g = scale * v;
    199       double value;
    200       if (g <= thresh) {
    201         value = 4.5 * g;
    202       } else {
    203         value = (1. + a) * pow(g, 1. / kGammaF) - a;
    204       }
    205       kLinearToGammaTabF[v] = (float)(MAX_Y_T * value);
    206     }
    207     // to prevent small rounding errors to cause read-overflow:
    208     kLinearToGammaTabF[kGammaTabSize + 1] = kLinearToGammaTabF[kGammaTabSize];
    209     kGammaTablesFOk = 1;
    210   }
    211 }
    212 
    213 static WEBP_INLINE float GammaToLinearF(int v) {
    214   return kGammaToLinearTabF[v];
    215 }
    216 
    217 static WEBP_INLINE int LinearToGammaF(float value) {
    218   const float v = value * kGammaTabSize;
    219   const int tab_pos = (int)v;
    220   const float x = v - (float)tab_pos;      // fractional part
    221   const float v0 = kLinearToGammaTabF[tab_pos + 0];
    222   const float v1 = kLinearToGammaTabF[tab_pos + 1];
    223   const float y = v1 * x + v0 * (1.f - x);  // interpolate
    224   return (int)(y + .5);
    225 }
    226 
    227 #else
    228 
    229 static WEBP_TSAN_IGNORE_FUNCTION void InitGammaTablesF(void) {}
    230 static WEBP_INLINE float GammaToLinearF(int v) {
    231   const float norm = 1.f / MAX_Y_T;
    232   return norm * v;
    233 }
    234 static WEBP_INLINE int LinearToGammaF(float value) {
    235   return (int)(MAX_Y_T * value + .5);
    236 }
    237 
    238 #endif    // USE_GAMMA_COMPRESSION
    239 
    240 //------------------------------------------------------------------------------
    241 
    242 static uint8_t clip_8b(fixed_t v) {
    243   return (!(v & ~0xff)) ? (uint8_t)v : (v < 0) ? 0u : 255u;
    244 }
    245 
    246 static fixed_y_t clip_y(int y) {
    247   return (!(y & ~MAX_Y_T)) ? (fixed_y_t)y : (y < 0) ? 0 : MAX_Y_T;
    248 }
    249 
    250 //------------------------------------------------------------------------------
    251 
    252 static int RGBToGray(int r, int g, int b) {
    253   const int luma = 13933 * r + 46871 * g + 4732 * b + YUV_HALF;
    254   return (luma >> YUV_FIX);
    255 }
    256 
    257 static float RGBToGrayF(float r, float g, float b) {
    258   return (float)(0.2126 * r + 0.7152 * g + 0.0722 * b);
    259 }
    260 
    261 static int ScaleDown(int a, int b, int c, int d) {
    262   const float A = GammaToLinearF(a);
    263   const float B = GammaToLinearF(b);
    264   const float C = GammaToLinearF(c);
    265   const float D = GammaToLinearF(d);
    266   return LinearToGammaF(0.25f * (A + B + C + D));
    267 }
    268 
    269 static WEBP_INLINE void UpdateW(const fixed_y_t* src, fixed_y_t* dst, int w) {
    270   int i;
    271   for (i = 0; i < w; ++i) {
    272     const float R = GammaToLinearF(src[0 * w + i]);
    273     const float G = GammaToLinearF(src[1 * w + i]);
    274     const float B = GammaToLinearF(src[2 * w + i]);
    275     const float Y = RGBToGrayF(R, G, B);
    276     dst[i] = (fixed_y_t)LinearToGammaF(Y);
    277   }
    278 }
    279 
    280 static void UpdateChroma(const fixed_y_t* src1, const fixed_y_t* src2,
    281                          fixed_t* dst, int uv_w) {
    282   int i;
    283   for (i = 0; i < uv_w; ++i) {
    284     const int r = ScaleDown(src1[0 * uv_w + 0], src1[0 * uv_w + 1],
    285                             src2[0 * uv_w + 0], src2[0 * uv_w + 1]);
    286     const int g = ScaleDown(src1[2 * uv_w + 0], src1[2 * uv_w + 1],
    287                             src2[2 * uv_w + 0], src2[2 * uv_w + 1]);
    288     const int b = ScaleDown(src1[4 * uv_w + 0], src1[4 * uv_w + 1],
    289                             src2[4 * uv_w + 0], src2[4 * uv_w + 1]);
    290     const int W = RGBToGray(r, g, b);
    291     dst[0 * uv_w] = (fixed_t)(r - W);
    292     dst[1 * uv_w] = (fixed_t)(g - W);
    293     dst[2 * uv_w] = (fixed_t)(b - W);
    294     dst  += 1;
    295     src1 += 2;
    296     src2 += 2;
    297   }
    298 }
    299 
    300 static void StoreGray(const fixed_y_t* rgb, fixed_y_t* y, int w) {
    301   int i;
    302   for (i = 0; i < w; ++i) {
    303     y[i] = RGBToGray(rgb[0 * w + i], rgb[1 * w + i], rgb[2 * w + i]);
    304   }
    305 }
    306 
    307 //------------------------------------------------------------------------------
    308 
    309 static WEBP_INLINE fixed_y_t Filter2(int A, int B, int W0) {
    310   const int v0 = (A * 3 + B + 2) >> 2;
    311   return clip_y(v0 + W0);
    312 }
    313 
    314 //------------------------------------------------------------------------------
    315 
    316 static WEBP_INLINE fixed_y_t UpLift(uint8_t a) {  // 8bit -> SFIX
    317   return ((fixed_y_t)a << SFIX) | SHALF;
    318 }
    319 
    320 static void ImportOneRow(const uint8_t* const r_ptr,
    321                          const uint8_t* const g_ptr,
    322                          const uint8_t* const b_ptr,
    323                          int step,
    324                          int pic_width,
    325                          fixed_y_t* const dst) {
    326   int i;
    327   const int w = (pic_width + 1) & ~1;
    328   for (i = 0; i < pic_width; ++i) {
    329     const int off = i * step;
    330     dst[i + 0 * w] = UpLift(r_ptr[off]);
    331     dst[i + 1 * w] = UpLift(g_ptr[off]);
    332     dst[i + 2 * w] = UpLift(b_ptr[off]);
    333   }
    334   if (pic_width & 1) {  // replicate rightmost pixel
    335     dst[pic_width + 0 * w] = dst[pic_width + 0 * w - 1];
    336     dst[pic_width + 1 * w] = dst[pic_width + 1 * w - 1];
    337     dst[pic_width + 2 * w] = dst[pic_width + 2 * w - 1];
    338   }
    339 }
    340 
    341 static void InterpolateTwoRows(const fixed_y_t* const best_y,
    342                                const fixed_t* prev_uv,
    343                                const fixed_t* cur_uv,
    344                                const fixed_t* next_uv,
    345                                int w,
    346                                fixed_y_t* out1,
    347                                fixed_y_t* out2) {
    348   const int uv_w = w >> 1;
    349   const int len = (w - 1) >> 1;   // length to filter
    350   int k = 3;
    351   while (k-- > 0) {   // process each R/G/B segments in turn
    352     // special boundary case for i==0
    353     out1[0] = Filter2(cur_uv[0], prev_uv[0], best_y[0]);
    354     out2[0] = Filter2(cur_uv[0], next_uv[0], best_y[w]);
    355 
    356     WebPSharpYUVFilterRow(cur_uv, prev_uv, len, best_y + 0 + 1, out1 + 1);
    357     WebPSharpYUVFilterRow(cur_uv, next_uv, len, best_y + w + 1, out2 + 1);
    358 
    359     // special boundary case for i == w - 1 when w is even
    360     if (!(w & 1)) {
    361       out1[w - 1] = Filter2(cur_uv[uv_w - 1], prev_uv[uv_w - 1],
    362                             best_y[w - 1 + 0]);
    363       out2[w - 1] = Filter2(cur_uv[uv_w - 1], next_uv[uv_w - 1],
    364                             best_y[w - 1 + w]);
    365     }
    366     out1 += w;
    367     out2 += w;
    368     prev_uv += uv_w;
    369     cur_uv  += uv_w;
    370     next_uv += uv_w;
    371   }
    372 }
    373 
    374 static WEBP_INLINE uint8_t ConvertRGBToY(int r, int g, int b) {
    375   const int luma = 16839 * r + 33059 * g + 6420 * b + SROUNDER;
    376   return clip_8b(16 + (luma >> (YUV_FIX + SFIX)));
    377 }
    378 
    379 static WEBP_INLINE uint8_t ConvertRGBToU(int r, int g, int b) {
    380   const int u =  -9719 * r - 19081 * g + 28800 * b + SROUNDER;
    381   return clip_8b(128 + (u >> (YUV_FIX + SFIX)));
    382 }
    383 
    384 static WEBP_INLINE uint8_t ConvertRGBToV(int r, int g, int b) {
    385   const int v = +28800 * r - 24116 * g -  4684 * b + SROUNDER;
    386   return clip_8b(128 + (v >> (YUV_FIX + SFIX)));
    387 }
    388 
    389 static int ConvertWRGBToYUV(const fixed_y_t* best_y, const fixed_t* best_uv,
    390                             WebPPicture* const picture) {
    391   int i, j;
    392   uint8_t* dst_y = picture->y;
    393   uint8_t* dst_u = picture->u;
    394   uint8_t* dst_v = picture->v;
    395   const fixed_t* const best_uv_base = best_uv;
    396   const int w = (picture->width + 1) & ~1;
    397   const int h = (picture->height + 1) & ~1;
    398   const int uv_w = w >> 1;
    399   const int uv_h = h >> 1;
    400   for (best_uv = best_uv_base, j = 0; j < picture->height; ++j) {
    401     for (i = 0; i < picture->width; ++i) {
    402       const int off = (i >> 1);
    403       const int W = best_y[i];
    404       const int r = best_uv[off + 0 * uv_w] + W;
    405       const int g = best_uv[off + 1 * uv_w] + W;
    406       const int b = best_uv[off + 2 * uv_w] + W;
    407       dst_y[i] = ConvertRGBToY(r, g, b);
    408     }
    409     best_y += w;
    410     best_uv += (j & 1) * 3 * uv_w;
    411     dst_y += picture->y_stride;
    412   }
    413   for (best_uv = best_uv_base, j = 0; j < uv_h; ++j) {
    414     for (i = 0; i < uv_w; ++i) {
    415       const int off = i;
    416       const int r = best_uv[off + 0 * uv_w];
    417       const int g = best_uv[off + 1 * uv_w];
    418       const int b = best_uv[off + 2 * uv_w];
    419       dst_u[i] = ConvertRGBToU(r, g, b);
    420       dst_v[i] = ConvertRGBToV(r, g, b);
    421     }
    422     best_uv += 3 * uv_w;
    423     dst_u += picture->uv_stride;
    424     dst_v += picture->uv_stride;
    425   }
    426   return 1;
    427 }
    428 
    429 //------------------------------------------------------------------------------
    430 // Main function
    431 
    432 #define SAFE_ALLOC(W, H, T) ((T*)WebPSafeMalloc((W) * (H), sizeof(T)))
    433 
    434 static int PreprocessARGB(const uint8_t* r_ptr,
    435                           const uint8_t* g_ptr,
    436                           const uint8_t* b_ptr,
    437                           int step, int rgb_stride,
    438                           WebPPicture* const picture) {
    439   // we expand the right/bottom border if needed
    440   const int w = (picture->width + 1) & ~1;
    441   const int h = (picture->height + 1) & ~1;
    442   const int uv_w = w >> 1;
    443   const int uv_h = h >> 1;
    444   uint64_t prev_diff_y_sum = ~0;
    445   int j, iter;
    446 
    447   // TODO(skal): allocate one big memory chunk. But for now, it's easier
    448   // for valgrind debugging to have several chunks.
    449   fixed_y_t* const tmp_buffer = SAFE_ALLOC(w * 3, 2, fixed_y_t);   // scratch
    450   fixed_y_t* const best_y_base = SAFE_ALLOC(w, h, fixed_y_t);
    451   fixed_y_t* const target_y_base = SAFE_ALLOC(w, h, fixed_y_t);
    452   fixed_y_t* const best_rgb_y = SAFE_ALLOC(w, 2, fixed_y_t);
    453   fixed_t* const best_uv_base = SAFE_ALLOC(uv_w * 3, uv_h, fixed_t);
    454   fixed_t* const target_uv_base = SAFE_ALLOC(uv_w * 3, uv_h, fixed_t);
    455   fixed_t* const best_rgb_uv = SAFE_ALLOC(uv_w * 3, 1, fixed_t);
    456   fixed_y_t* best_y = best_y_base;
    457   fixed_y_t* target_y = target_y_base;
    458   fixed_t* best_uv = best_uv_base;
    459   fixed_t* target_uv = target_uv_base;
    460   const uint64_t diff_y_threshold = (uint64_t)(3.0 * w * h);
    461   int ok;
    462 
    463   if (best_y_base == NULL || best_uv_base == NULL ||
    464       target_y_base == NULL || target_uv_base == NULL ||
    465       best_rgb_y == NULL || best_rgb_uv == NULL ||
    466       tmp_buffer == NULL) {
    467     ok = WebPEncodingSetError(picture, VP8_ENC_ERROR_OUT_OF_MEMORY);
    468     goto End;
    469   }
    470   assert(picture->width >= kMinDimensionIterativeConversion);
    471   assert(picture->height >= kMinDimensionIterativeConversion);
    472 
    473   WebPInitConvertARGBToYUV();
    474 
    475   // Import RGB samples to W/RGB representation.
    476   for (j = 0; j < picture->height; j += 2) {
    477     const int is_last_row = (j == picture->height - 1);
    478     fixed_y_t* const src1 = tmp_buffer + 0 * w;
    479     fixed_y_t* const src2 = tmp_buffer + 3 * w;
    480 
    481     // prepare two rows of input
    482     ImportOneRow(r_ptr, g_ptr, b_ptr, step, picture->width, src1);
    483     if (!is_last_row) {
    484       ImportOneRow(r_ptr + rgb_stride, g_ptr + rgb_stride, b_ptr + rgb_stride,
    485                    step, picture->width, src2);
    486     } else {
    487       memcpy(src2, src1, 3 * w * sizeof(*src2));
    488     }
    489     StoreGray(src1, best_y + 0, w);
    490     StoreGray(src2, best_y + w, w);
    491 
    492     UpdateW(src1, target_y, w);
    493     UpdateW(src2, target_y + w, w);
    494     UpdateChroma(src1, src2, target_uv, uv_w);
    495     memcpy(best_uv, target_uv, 3 * uv_w * sizeof(*best_uv));
    496     best_y += 2 * w;
    497     best_uv += 3 * uv_w;
    498     target_y += 2 * w;
    499     target_uv += 3 * uv_w;
    500     r_ptr += 2 * rgb_stride;
    501     g_ptr += 2 * rgb_stride;
    502     b_ptr += 2 * rgb_stride;
    503   }
    504 
    505   // Iterate and resolve clipping conflicts.
    506   for (iter = 0; iter < kNumIterations; ++iter) {
    507     const fixed_t* cur_uv = best_uv_base;
    508     const fixed_t* prev_uv = best_uv_base;
    509     uint64_t diff_y_sum = 0;
    510 
    511     best_y = best_y_base;
    512     best_uv = best_uv_base;
    513     target_y = target_y_base;
    514     target_uv = target_uv_base;
    515     for (j = 0; j < h; j += 2) {
    516       fixed_y_t* const src1 = tmp_buffer + 0 * w;
    517       fixed_y_t* const src2 = tmp_buffer + 3 * w;
    518       {
    519         const fixed_t* const next_uv = cur_uv + ((j < h - 2) ? 3 * uv_w : 0);
    520         InterpolateTwoRows(best_y, prev_uv, cur_uv, next_uv, w, src1, src2);
    521         prev_uv = cur_uv;
    522         cur_uv = next_uv;
    523       }
    524 
    525       UpdateW(src1, best_rgb_y + 0 * w, w);
    526       UpdateW(src2, best_rgb_y + 1 * w, w);
    527       UpdateChroma(src1, src2, best_rgb_uv, uv_w);
    528 
    529       // update two rows of Y and one row of RGB
    530       diff_y_sum += WebPSharpYUVUpdateY(target_y, best_rgb_y, best_y, 2 * w);
    531       WebPSharpYUVUpdateRGB(target_uv, best_rgb_uv, best_uv, 3 * uv_w);
    532 
    533       best_y += 2 * w;
    534       best_uv += 3 * uv_w;
    535       target_y += 2 * w;
    536       target_uv += 3 * uv_w;
    537     }
    538     // test exit condition
    539     if (iter > 0) {
    540       if (diff_y_sum < diff_y_threshold) break;
    541       if (diff_y_sum > prev_diff_y_sum) break;
    542     }
    543     prev_diff_y_sum = diff_y_sum;
    544   }
    545   // final reconstruction
    546   ok = ConvertWRGBToYUV(best_y_base, best_uv_base, picture);
    547 
    548  End:
    549   WebPSafeFree(best_y_base);
    550   WebPSafeFree(best_uv_base);
    551   WebPSafeFree(target_y_base);
    552   WebPSafeFree(target_uv_base);
    553   WebPSafeFree(best_rgb_y);
    554   WebPSafeFree(best_rgb_uv);
    555   WebPSafeFree(tmp_buffer);
    556   return ok;
    557 }
    558 #undef SAFE_ALLOC
    559 
    560 //------------------------------------------------------------------------------
    561 // "Fast" regular RGB->YUV
    562 
    563 #define SUM4(ptr, step) LinearToGamma(                     \
    564     GammaToLinear((ptr)[0]) +                              \
    565     GammaToLinear((ptr)[(step)]) +                         \
    566     GammaToLinear((ptr)[rgb_stride]) +                     \
    567     GammaToLinear((ptr)[rgb_stride + (step)]), 0)          \
    568 
    569 #define SUM2(ptr) \
    570     LinearToGamma(GammaToLinear((ptr)[0]) + GammaToLinear((ptr)[rgb_stride]), 1)
    571 
    572 #define SUM2ALPHA(ptr) ((ptr)[0] + (ptr)[rgb_stride])
    573 #define SUM4ALPHA(ptr) (SUM2ALPHA(ptr) + SUM2ALPHA((ptr) + 4))
    574 
    575 #if defined(USE_INVERSE_ALPHA_TABLE)
    576 
    577 static const int kAlphaFix = 19;
    578 // Following table is (1 << kAlphaFix) / a. The (v * kInvAlpha[a]) >> kAlphaFix
    579 // formula is then equal to v / a in most (99.6%) cases. Note that this table
    580 // and constant are adjusted very tightly to fit 32b arithmetic.
    581 // In particular, they use the fact that the operands for 'v / a' are actually
    582 // derived as v = (a0.p0 + a1.p1 + a2.p2 + a3.p3) and a = a0 + a1 + a2 + a3
    583 // with ai in [0..255] and pi in [0..1<<kGammaFix). The constraint to avoid
    584 // overflow is: kGammaFix + kAlphaFix <= 31.
    585 static const uint32_t kInvAlpha[4 * 0xff + 1] = {
    586   0,  /* alpha = 0 */
    587   524288, 262144, 174762, 131072, 104857, 87381, 74898, 65536,
    588   58254, 52428, 47662, 43690, 40329, 37449, 34952, 32768,
    589   30840, 29127, 27594, 26214, 24966, 23831, 22795, 21845,
    590   20971, 20164, 19418, 18724, 18078, 17476, 16912, 16384,
    591   15887, 15420, 14979, 14563, 14169, 13797, 13443, 13107,
    592   12787, 12483, 12192, 11915, 11650, 11397, 11155, 10922,
    593   10699, 10485, 10280, 10082, 9892, 9709, 9532, 9362,
    594   9198, 9039, 8886, 8738, 8594, 8456, 8322, 8192,
    595   8065, 7943, 7825, 7710, 7598, 7489, 7384, 7281,
    596   7182, 7084, 6990, 6898, 6808, 6721, 6636, 6553,
    597   6472, 6393, 6316, 6241, 6168, 6096, 6026, 5957,
    598   5890, 5825, 5761, 5698, 5637, 5577, 5518, 5461,
    599   5405, 5349, 5295, 5242, 5190, 5140, 5090, 5041,
    600   4993, 4946, 4899, 4854, 4809, 4766, 4723, 4681,
    601   4639, 4599, 4559, 4519, 4481, 4443, 4405, 4369,
    602   4332, 4297, 4262, 4228, 4194, 4161, 4128, 4096,
    603   4064, 4032, 4002, 3971, 3942, 3912, 3883, 3855,
    604   3826, 3799, 3771, 3744, 3718, 3692, 3666, 3640,
    605   3615, 3591, 3566, 3542, 3518, 3495, 3472, 3449,
    606   3426, 3404, 3382, 3360, 3339, 3318, 3297, 3276,
    607   3256, 3236, 3216, 3196, 3177, 3158, 3139, 3120,
    608   3102, 3084, 3066, 3048, 3030, 3013, 2995, 2978,
    609   2962, 2945, 2928, 2912, 2896, 2880, 2864, 2849,
    610   2833, 2818, 2803, 2788, 2774, 2759, 2744, 2730,
    611   2716, 2702, 2688, 2674, 2661, 2647, 2634, 2621,
    612   2608, 2595, 2582, 2570, 2557, 2545, 2532, 2520,
    613   2508, 2496, 2484, 2473, 2461, 2449, 2438, 2427,
    614   2416, 2404, 2394, 2383, 2372, 2361, 2351, 2340,
    615   2330, 2319, 2309, 2299, 2289, 2279, 2269, 2259,
    616   2250, 2240, 2231, 2221, 2212, 2202, 2193, 2184,
    617   2175, 2166, 2157, 2148, 2139, 2131, 2122, 2114,
    618   2105, 2097, 2088, 2080, 2072, 2064, 2056, 2048,
    619   2040, 2032, 2024, 2016, 2008, 2001, 1993, 1985,
    620   1978, 1971, 1963, 1956, 1949, 1941, 1934, 1927,
    621   1920, 1913, 1906, 1899, 1892, 1885, 1879, 1872,
    622   1865, 1859, 1852, 1846, 1839, 1833, 1826, 1820,
    623   1814, 1807, 1801, 1795, 1789, 1783, 1777, 1771,
    624   1765, 1759, 1753, 1747, 1741, 1736, 1730, 1724,
    625   1718, 1713, 1707, 1702, 1696, 1691, 1685, 1680,
    626   1675, 1669, 1664, 1659, 1653, 1648, 1643, 1638,
    627   1633, 1628, 1623, 1618, 1613, 1608, 1603, 1598,
    628   1593, 1588, 1583, 1579, 1574, 1569, 1565, 1560,
    629   1555, 1551, 1546, 1542, 1537, 1533, 1528, 1524,
    630   1519, 1515, 1510, 1506, 1502, 1497, 1493, 1489,
    631   1485, 1481, 1476, 1472, 1468, 1464, 1460, 1456,
    632   1452, 1448, 1444, 1440, 1436, 1432, 1428, 1424,
    633   1420, 1416, 1413, 1409, 1405, 1401, 1398, 1394,
    634   1390, 1387, 1383, 1379, 1376, 1372, 1368, 1365,
    635   1361, 1358, 1354, 1351, 1347, 1344, 1340, 1337,
    636   1334, 1330, 1327, 1323, 1320, 1317, 1314, 1310,
    637   1307, 1304, 1300, 1297, 1294, 1291, 1288, 1285,
    638   1281, 1278, 1275, 1272, 1269, 1266, 1263, 1260,
    639   1257, 1254, 1251, 1248, 1245, 1242, 1239, 1236,
    640   1233, 1230, 1227, 1224, 1222, 1219, 1216, 1213,
    641   1210, 1208, 1205, 1202, 1199, 1197, 1194, 1191,
    642   1188, 1186, 1183, 1180, 1178, 1175, 1172, 1170,
    643   1167, 1165, 1162, 1159, 1157, 1154, 1152, 1149,
    644   1147, 1144, 1142, 1139, 1137, 1134, 1132, 1129,
    645   1127, 1125, 1122, 1120, 1117, 1115, 1113, 1110,
    646   1108, 1106, 1103, 1101, 1099, 1096, 1094, 1092,
    647   1089, 1087, 1085, 1083, 1081, 1078, 1076, 1074,
    648   1072, 1069, 1067, 1065, 1063, 1061, 1059, 1057,
    649   1054, 1052, 1050, 1048, 1046, 1044, 1042, 1040,
    650   1038, 1036, 1034, 1032, 1030, 1028, 1026, 1024,
    651   1022, 1020, 1018, 1016, 1014, 1012, 1010, 1008,
    652   1006, 1004, 1002, 1000, 998, 996, 994, 992,
    653   991, 989, 987, 985, 983, 981, 979, 978,
    654   976, 974, 972, 970, 969, 967, 965, 963,
    655   961, 960, 958, 956, 954, 953, 951, 949,
    656   948, 946, 944, 942, 941, 939, 937, 936,
    657   934, 932, 931, 929, 927, 926, 924, 923,
    658   921, 919, 918, 916, 914, 913, 911, 910,
    659   908, 907, 905, 903, 902, 900, 899, 897,
    660   896, 894, 893, 891, 890, 888, 887, 885,
    661   884, 882, 881, 879, 878, 876, 875, 873,
    662   872, 870, 869, 868, 866, 865, 863, 862,
    663   860, 859, 858, 856, 855, 853, 852, 851,
    664   849, 848, 846, 845, 844, 842, 841, 840,
    665   838, 837, 836, 834, 833, 832, 830, 829,
    666   828, 826, 825, 824, 823, 821, 820, 819,
    667   817, 816, 815, 814, 812, 811, 810, 809,
    668   807, 806, 805, 804, 802, 801, 800, 799,
    669   798, 796, 795, 794, 793, 791, 790, 789,
    670   788, 787, 786, 784, 783, 782, 781, 780,
    671   779, 777, 776, 775, 774, 773, 772, 771,
    672   769, 768, 767, 766, 765, 764, 763, 762,
    673   760, 759, 758, 757, 756, 755, 754, 753,
    674   752, 751, 750, 748, 747, 746, 745, 744,
    675   743, 742, 741, 740, 739, 738, 737, 736,
    676   735, 734, 733, 732, 731, 730, 729, 728,
    677   727, 726, 725, 724, 723, 722, 721, 720,
    678   719, 718, 717, 716, 715, 714, 713, 712,
    679   711, 710, 709, 708, 707, 706, 705, 704,
    680   703, 702, 701, 700, 699, 699, 698, 697,
    681   696, 695, 694, 693, 692, 691, 690, 689,
    682   688, 688, 687, 686, 685, 684, 683, 682,
    683   681, 680, 680, 679, 678, 677, 676, 675,
    684   674, 673, 673, 672, 671, 670, 669, 668,
    685   667, 667, 666, 665, 664, 663, 662, 661,
    686   661, 660, 659, 658, 657, 657, 656, 655,
    687   654, 653, 652, 652, 651, 650, 649, 648,
    688   648, 647, 646, 645, 644, 644, 643, 642,
    689   641, 640, 640, 639, 638, 637, 637, 636,
    690   635, 634, 633, 633, 632, 631, 630, 630,
    691   629, 628, 627, 627, 626, 625, 624, 624,
    692   623, 622, 621, 621, 620, 619, 618, 618,
    693   617, 616, 616, 615, 614, 613, 613, 612,
    694   611, 611, 610, 609, 608, 608, 607, 606,
    695   606, 605, 604, 604, 603, 602, 601, 601,
    696   600, 599, 599, 598, 597, 597, 596, 595,
    697   595, 594, 593, 593, 592, 591, 591, 590,
    698   589, 589, 588, 587, 587, 586, 585, 585,
    699   584, 583, 583, 582, 581, 581, 580, 579,
    700   579, 578, 578, 577, 576, 576, 575, 574,
    701   574, 573, 572, 572, 571, 571, 570, 569,
    702   569, 568, 568, 567, 566, 566, 565, 564,
    703   564, 563, 563, 562, 561, 561, 560, 560,
    704   559, 558, 558, 557, 557, 556, 555, 555,
    705   554, 554, 553, 553, 552, 551, 551, 550,
    706   550, 549, 548, 548, 547, 547, 546, 546,
    707   545, 544, 544, 543, 543, 542, 542, 541,
    708   541, 540, 539, 539, 538, 538, 537, 537,
    709   536, 536, 535, 534, 534, 533, 533, 532,
    710   532, 531, 531, 530, 530, 529, 529, 528,
    711   527, 527, 526, 526, 525, 525, 524, 524,
    712   523, 523, 522, 522, 521, 521, 520, 520,
    713   519, 519, 518, 518, 517, 517, 516, 516,
    714   515, 515, 514, 514
    715 };
    716 
    717 // Note that LinearToGamma() expects the values to be premultiplied by 4,
    718 // so we incorporate this factor 4 inside the DIVIDE_BY_ALPHA macro directly.
    719 #define DIVIDE_BY_ALPHA(sum, a)  (((sum) * kInvAlpha[(a)]) >> (kAlphaFix - 2))
    720 
    721 #else
    722 
    723 #define DIVIDE_BY_ALPHA(sum, a) (4 * (sum) / (a))
    724 
    725 #endif  // USE_INVERSE_ALPHA_TABLE
    726 
    727 static WEBP_INLINE int LinearToGammaWeighted(const uint8_t* src,
    728                                              const uint8_t* a_ptr,
    729                                              uint32_t total_a, int step,
    730                                              int rgb_stride) {
    731   const uint32_t sum =
    732       a_ptr[0] * GammaToLinear(src[0]) +
    733       a_ptr[step] * GammaToLinear(src[step]) +
    734       a_ptr[rgb_stride] * GammaToLinear(src[rgb_stride]) +
    735       a_ptr[rgb_stride + step] * GammaToLinear(src[rgb_stride + step]);
    736   assert(total_a > 0 && total_a <= 4 * 0xff);
    737 #if defined(USE_INVERSE_ALPHA_TABLE)
    738   assert((uint64_t)sum * kInvAlpha[total_a] < ((uint64_t)1 << 32));
    739 #endif
    740   return LinearToGamma(DIVIDE_BY_ALPHA(sum, total_a), 0);
    741 }
    742 
    743 static WEBP_INLINE void ConvertRowToY(const uint8_t* const r_ptr,
    744                                       const uint8_t* const g_ptr,
    745                                       const uint8_t* const b_ptr,
    746                                       int step,
    747                                       uint8_t* const dst_y,
    748                                       int width,
    749                                       VP8Random* const rg) {
    750   int i, j;
    751   for (i = 0, j = 0; i < width; i += 1, j += step) {
    752     dst_y[i] = RGBToY(r_ptr[j], g_ptr[j], b_ptr[j], rg);
    753   }
    754 }
    755 
    756 static WEBP_INLINE void AccumulateRGBA(const uint8_t* const r_ptr,
    757                                        const uint8_t* const g_ptr,
    758                                        const uint8_t* const b_ptr,
    759                                        const uint8_t* const a_ptr,
    760                                        int rgb_stride,
    761                                        uint16_t* dst, int width) {
    762   int i, j;
    763   // we loop over 2x2 blocks and produce one R/G/B/A value for each.
    764   for (i = 0, j = 0; i < (width >> 1); i += 1, j += 2 * 4, dst += 4) {
    765     const uint32_t a = SUM4ALPHA(a_ptr + j);
    766     int r, g, b;
    767     if (a == 4 * 0xff || a == 0) {
    768       r = SUM4(r_ptr + j, 4);
    769       g = SUM4(g_ptr + j, 4);
    770       b = SUM4(b_ptr + j, 4);
    771     } else {
    772       r = LinearToGammaWeighted(r_ptr + j, a_ptr + j, a, 4, rgb_stride);
    773       g = LinearToGammaWeighted(g_ptr + j, a_ptr + j, a, 4, rgb_stride);
    774       b = LinearToGammaWeighted(b_ptr + j, a_ptr + j, a, 4, rgb_stride);
    775     }
    776     dst[0] = r;
    777     dst[1] = g;
    778     dst[2] = b;
    779     dst[3] = a;
    780   }
    781   if (width & 1) {
    782     const uint32_t a = 2u * SUM2ALPHA(a_ptr + j);
    783     int r, g, b;
    784     if (a == 4 * 0xff || a == 0) {
    785       r = SUM2(r_ptr + j);
    786       g = SUM2(g_ptr + j);
    787       b = SUM2(b_ptr + j);
    788     } else {
    789       r = LinearToGammaWeighted(r_ptr + j, a_ptr + j, a, 0, rgb_stride);
    790       g = LinearToGammaWeighted(g_ptr + j, a_ptr + j, a, 0, rgb_stride);
    791       b = LinearToGammaWeighted(b_ptr + j, a_ptr + j, a, 0, rgb_stride);
    792     }
    793     dst[0] = r;
    794     dst[1] = g;
    795     dst[2] = b;
    796     dst[3] = a;
    797   }
    798 }
    799 
    800 static WEBP_INLINE void AccumulateRGB(const uint8_t* const r_ptr,
    801                                       const uint8_t* const g_ptr,
    802                                       const uint8_t* const b_ptr,
    803                                       int step, int rgb_stride,
    804                                       uint16_t* dst, int width) {
    805   int i, j;
    806   for (i = 0, j = 0; i < (width >> 1); i += 1, j += 2 * step, dst += 4) {
    807     dst[0] = SUM4(r_ptr + j, step);
    808     dst[1] = SUM4(g_ptr + j, step);
    809     dst[2] = SUM4(b_ptr + j, step);
    810   }
    811   if (width & 1) {
    812     dst[0] = SUM2(r_ptr + j);
    813     dst[1] = SUM2(g_ptr + j);
    814     dst[2] = SUM2(b_ptr + j);
    815   }
    816 }
    817 
    818 static WEBP_INLINE void ConvertRowsToUV(const uint16_t* rgb,
    819                                         uint8_t* const dst_u,
    820                                         uint8_t* const dst_v,
    821                                         int width,
    822                                         VP8Random* const rg) {
    823   int i;
    824   for (i = 0; i < width; i += 1, rgb += 4) {
    825     const int r = rgb[0], g = rgb[1], b = rgb[2];
    826     dst_u[i] = RGBToU(r, g, b, rg);
    827     dst_v[i] = RGBToV(r, g, b, rg);
    828   }
    829 }
    830 
    831 static int ImportYUVAFromRGBA(const uint8_t* r_ptr,
    832                               const uint8_t* g_ptr,
    833                               const uint8_t* b_ptr,
    834                               const uint8_t* a_ptr,
    835                               int step,         // bytes per pixel
    836                               int rgb_stride,   // bytes per scanline
    837                               float dithering,
    838                               int use_iterative_conversion,
    839                               WebPPicture* const picture) {
    840   int y;
    841   const int width = picture->width;
    842   const int height = picture->height;
    843   const int has_alpha = CheckNonOpaque(a_ptr, width, height, step, rgb_stride);
    844   const int is_rgb = (r_ptr < b_ptr);  // otherwise it's bgr
    845 
    846   picture->colorspace = has_alpha ? WEBP_YUV420A : WEBP_YUV420;
    847   picture->use_argb = 0;
    848 
    849   // disable smart conversion if source is too small (overkill).
    850   if (width < kMinDimensionIterativeConversion ||
    851       height < kMinDimensionIterativeConversion) {
    852     use_iterative_conversion = 0;
    853   }
    854 
    855   if (!WebPPictureAllocYUVA(picture, width, height)) {
    856     return 0;
    857   }
    858   if (has_alpha) {
    859     assert(step == 4);
    860 #if defined(USE_GAMMA_COMPRESSION) && defined(USE_INVERSE_ALPHA_TABLE)
    861     assert(kAlphaFix + kGammaFix <= 31);
    862 #endif
    863   }
    864 
    865   if (use_iterative_conversion) {
    866     InitGammaTablesF();
    867     if (!PreprocessARGB(r_ptr, g_ptr, b_ptr, step, rgb_stride, picture)) {
    868       return 0;
    869     }
    870     if (has_alpha) {
    871       WebPExtractAlpha(a_ptr, rgb_stride, width, height,
    872                        picture->a, picture->a_stride);
    873     }
    874   } else {
    875     const int uv_width = (width + 1) >> 1;
    876     int use_dsp = (step == 3);  // use special function in this case
    877     // temporary storage for accumulated R/G/B values during conversion to U/V
    878     uint16_t* const tmp_rgb =
    879         (uint16_t*)WebPSafeMalloc(4 * uv_width, sizeof(*tmp_rgb));
    880     uint8_t* dst_y = picture->y;
    881     uint8_t* dst_u = picture->u;
    882     uint8_t* dst_v = picture->v;
    883     uint8_t* dst_a = picture->a;
    884 
    885     VP8Random base_rg;
    886     VP8Random* rg = NULL;
    887     if (dithering > 0.) {
    888       VP8InitRandom(&base_rg, dithering);
    889       rg = &base_rg;
    890       use_dsp = 0;   // can't use dsp in this case
    891     }
    892     WebPInitConvertARGBToYUV();
    893     InitGammaTables();
    894 
    895     if (tmp_rgb == NULL) return 0;  // malloc error
    896 
    897     // Downsample Y/U/V planes, two rows at a time
    898     for (y = 0; y < (height >> 1); ++y) {
    899       int rows_have_alpha = has_alpha;
    900       if (use_dsp) {
    901         if (is_rgb) {
    902           WebPConvertRGB24ToY(r_ptr, dst_y, width);
    903           WebPConvertRGB24ToY(r_ptr + rgb_stride,
    904                               dst_y + picture->y_stride, width);
    905         } else {
    906           WebPConvertBGR24ToY(b_ptr, dst_y, width);
    907           WebPConvertBGR24ToY(b_ptr + rgb_stride,
    908                               dst_y + picture->y_stride, width);
    909         }
    910       } else {
    911         ConvertRowToY(r_ptr, g_ptr, b_ptr, step, dst_y, width, rg);
    912         ConvertRowToY(r_ptr + rgb_stride,
    913                       g_ptr + rgb_stride,
    914                       b_ptr + rgb_stride, step,
    915                       dst_y + picture->y_stride, width, rg);
    916       }
    917       dst_y += 2 * picture->y_stride;
    918       if (has_alpha) {
    919         rows_have_alpha &= !WebPExtractAlpha(a_ptr, rgb_stride, width, 2,
    920                                              dst_a, picture->a_stride);
    921         dst_a += 2 * picture->a_stride;
    922       }
    923       // Collect averaged R/G/B(/A)
    924       if (!rows_have_alpha) {
    925         AccumulateRGB(r_ptr, g_ptr, b_ptr, step, rgb_stride, tmp_rgb, width);
    926       } else {
    927         AccumulateRGBA(r_ptr, g_ptr, b_ptr, a_ptr, rgb_stride, tmp_rgb, width);
    928       }
    929       // Convert to U/V
    930       if (rg == NULL) {
    931         WebPConvertRGBA32ToUV(tmp_rgb, dst_u, dst_v, uv_width);
    932       } else {
    933         ConvertRowsToUV(tmp_rgb, dst_u, dst_v, uv_width, rg);
    934       }
    935       dst_u += picture->uv_stride;
    936       dst_v += picture->uv_stride;
    937       r_ptr += 2 * rgb_stride;
    938       b_ptr += 2 * rgb_stride;
    939       g_ptr += 2 * rgb_stride;
    940       if (has_alpha) a_ptr += 2 * rgb_stride;
    941     }
    942     if (height & 1) {    // extra last row
    943       int row_has_alpha = has_alpha;
    944       if (use_dsp) {
    945         if (r_ptr < b_ptr) {
    946           WebPConvertRGB24ToY(r_ptr, dst_y, width);
    947         } else {
    948           WebPConvertBGR24ToY(b_ptr, dst_y, width);
    949         }
    950       } else {
    951         ConvertRowToY(r_ptr, g_ptr, b_ptr, step, dst_y, width, rg);
    952       }
    953       if (row_has_alpha) {
    954         row_has_alpha &= !WebPExtractAlpha(a_ptr, 0, width, 1, dst_a, 0);
    955       }
    956       // Collect averaged R/G/B(/A)
    957       if (!row_has_alpha) {
    958         // Collect averaged R/G/B
    959         AccumulateRGB(r_ptr, g_ptr, b_ptr, step, /* rgb_stride = */ 0,
    960                       tmp_rgb, width);
    961       } else {
    962         AccumulateRGBA(r_ptr, g_ptr, b_ptr, a_ptr, /* rgb_stride = */ 0,
    963                        tmp_rgb, width);
    964       }
    965       if (rg == NULL) {
    966         WebPConvertRGBA32ToUV(tmp_rgb, dst_u, dst_v, uv_width);
    967       } else {
    968         ConvertRowsToUV(tmp_rgb, dst_u, dst_v, uv_width, rg);
    969       }
    970     }
    971     WebPSafeFree(tmp_rgb);
    972   }
    973   return 1;
    974 }
    975 
    976 #undef SUM4
    977 #undef SUM2
    978 #undef SUM4ALPHA
    979 #undef SUM2ALPHA
    980 
    981 //------------------------------------------------------------------------------
    982 // call for ARGB->YUVA conversion
    983 
    984 static int PictureARGBToYUVA(WebPPicture* picture, WebPEncCSP colorspace,
    985                              float dithering, int use_iterative_conversion) {
    986   if (picture == NULL) return 0;
    987   if (picture->argb == NULL) {
    988     return WebPEncodingSetError(picture, VP8_ENC_ERROR_NULL_PARAMETER);
    989   } else if ((colorspace & WEBP_CSP_UV_MASK) != WEBP_YUV420) {
    990     return WebPEncodingSetError(picture, VP8_ENC_ERROR_INVALID_CONFIGURATION);
    991   } else {
    992     const uint8_t* const argb = (const uint8_t*)picture->argb;
    993     const uint8_t* const r = ALPHA_IS_LAST ? argb + 2 : argb + 1;
    994     const uint8_t* const g = ALPHA_IS_LAST ? argb + 1 : argb + 2;
    995     const uint8_t* const b = ALPHA_IS_LAST ? argb + 0 : argb + 3;
    996     const uint8_t* const a = ALPHA_IS_LAST ? argb + 3 : argb + 0;
    997 
    998     picture->colorspace = WEBP_YUV420;
    999     return ImportYUVAFromRGBA(r, g, b, a, 4, 4 * picture->argb_stride,
   1000                               dithering, use_iterative_conversion, picture);
   1001   }
   1002 }
   1003 
   1004 int WebPPictureARGBToYUVADithered(WebPPicture* picture, WebPEncCSP colorspace,
   1005                                   float dithering) {
   1006   return PictureARGBToYUVA(picture, colorspace, dithering, 0);
   1007 }
   1008 
   1009 int WebPPictureARGBToYUVA(WebPPicture* picture, WebPEncCSP colorspace) {
   1010   return PictureARGBToYUVA(picture, colorspace, 0.f, 0);
   1011 }
   1012 
   1013 int WebPPictureSharpARGBToYUVA(WebPPicture* picture) {
   1014   return PictureARGBToYUVA(picture, WEBP_YUV420, 0.f, 1);
   1015 }
   1016 // for backward compatibility
   1017 int WebPPictureSmartARGBToYUVA(WebPPicture* picture) {
   1018   return WebPPictureSharpARGBToYUVA(picture);
   1019 }
   1020 
   1021 //------------------------------------------------------------------------------
   1022 // call for YUVA -> ARGB conversion
   1023 
   1024 int WebPPictureYUVAToARGB(WebPPicture* picture) {
   1025   if (picture == NULL) return 0;
   1026   if (picture->y == NULL || picture->u == NULL || picture->v == NULL) {
   1027     return WebPEncodingSetError(picture, VP8_ENC_ERROR_NULL_PARAMETER);
   1028   }
   1029   if ((picture->colorspace & WEBP_CSP_ALPHA_BIT) && picture->a == NULL) {
   1030     return WebPEncodingSetError(picture, VP8_ENC_ERROR_NULL_PARAMETER);
   1031   }
   1032   if ((picture->colorspace & WEBP_CSP_UV_MASK) != WEBP_YUV420) {
   1033     return WebPEncodingSetError(picture, VP8_ENC_ERROR_INVALID_CONFIGURATION);
   1034   }
   1035   // Allocate a new argb buffer (discarding the previous one).
   1036   if (!WebPPictureAllocARGB(picture, picture->width, picture->height)) return 0;
   1037   picture->use_argb = 1;
   1038 
   1039   // Convert
   1040   {
   1041     int y;
   1042     const int width = picture->width;
   1043     const int height = picture->height;
   1044     const int argb_stride = 4 * picture->argb_stride;
   1045     uint8_t* dst = (uint8_t*)picture->argb;
   1046     const uint8_t *cur_u = picture->u, *cur_v = picture->v, *cur_y = picture->y;
   1047     WebPUpsampleLinePairFunc upsample = WebPGetLinePairConverter(ALPHA_IS_LAST);
   1048 
   1049     // First row, with replicated top samples.
   1050     upsample(cur_y, NULL, cur_u, cur_v, cur_u, cur_v, dst, NULL, width);
   1051     cur_y += picture->y_stride;
   1052     dst += argb_stride;
   1053     // Center rows.
   1054     for (y = 1; y + 1 < height; y += 2) {
   1055       const uint8_t* const top_u = cur_u;
   1056       const uint8_t* const top_v = cur_v;
   1057       cur_u += picture->uv_stride;
   1058       cur_v += picture->uv_stride;
   1059       upsample(cur_y, cur_y + picture->y_stride, top_u, top_v, cur_u, cur_v,
   1060                dst, dst + argb_stride, width);
   1061       cur_y += 2 * picture->y_stride;
   1062       dst += 2 * argb_stride;
   1063     }
   1064     // Last row (if needed), with replicated bottom samples.
   1065     if (height > 1 && !(height & 1)) {
   1066       upsample(cur_y, NULL, cur_u, cur_v, cur_u, cur_v, dst, NULL, width);
   1067     }
   1068     // Insert alpha values if needed, in replacement for the default 0xff ones.
   1069     if (picture->colorspace & WEBP_CSP_ALPHA_BIT) {
   1070       for (y = 0; y < height; ++y) {
   1071         uint32_t* const argb_dst = picture->argb + y * picture->argb_stride;
   1072         const uint8_t* const src = picture->a + y * picture->a_stride;
   1073         int x;
   1074         for (x = 0; x < width; ++x) {
   1075           argb_dst[x] = (argb_dst[x] & 0x00ffffffu) | ((uint32_t)src[x] << 24);
   1076         }
   1077       }
   1078     }
   1079   }
   1080   return 1;
   1081 }
   1082 
   1083 //------------------------------------------------------------------------------
   1084 // automatic import / conversion
   1085 
   1086 static int Import(WebPPicture* const picture,
   1087                   const uint8_t* rgb, int rgb_stride,
   1088                   int step, int swap_rb, int import_alpha) {
   1089   int y;
   1090   const uint8_t* r_ptr = rgb + (swap_rb ? 2 : 0);
   1091   const uint8_t* g_ptr = rgb + 1;
   1092   const uint8_t* b_ptr = rgb + (swap_rb ? 0 : 2);
   1093   const int width = picture->width;
   1094   const int height = picture->height;
   1095 
   1096   if (!picture->use_argb) {
   1097     const uint8_t* a_ptr = import_alpha ? rgb + 3 : NULL;
   1098     return ImportYUVAFromRGBA(r_ptr, g_ptr, b_ptr, a_ptr, step, rgb_stride,
   1099                               0.f /* no dithering */, 0, picture);
   1100   }
   1101   if (!WebPPictureAlloc(picture)) return 0;
   1102 
   1103   VP8LDspInit();
   1104   WebPInitAlphaProcessing();
   1105 
   1106   if (import_alpha) {
   1107     uint32_t* dst = picture->argb;
   1108     const int do_copy =
   1109         (!swap_rb && !ALPHA_IS_LAST) || (swap_rb && ALPHA_IS_LAST);
   1110     assert(step == 4);
   1111     for (y = 0; y < height; ++y) {
   1112       if (do_copy) {
   1113         memcpy(dst, rgb, width * 4);
   1114       } else {
   1115         // RGBA input order. Need to swap R and B.
   1116         VP8LConvertBGRAToRGBA((const uint32_t*)rgb, width, (uint8_t*)dst);
   1117       }
   1118       rgb += rgb_stride;
   1119       dst += picture->argb_stride;
   1120     }
   1121   } else {
   1122     uint32_t* dst = picture->argb;
   1123     assert(step >= 3);
   1124     for (y = 0; y < height; ++y) {
   1125       WebPPackRGB(r_ptr, g_ptr, b_ptr, width, step, dst);
   1126       r_ptr += rgb_stride;
   1127       g_ptr += rgb_stride;
   1128       b_ptr += rgb_stride;
   1129       dst += picture->argb_stride;
   1130     }
   1131   }
   1132   return 1;
   1133 }
   1134 
   1135 // Public API
   1136 
   1137 #if !defined(WEBP_REDUCE_CSP)
   1138 
   1139 int WebPPictureImportBGR(WebPPicture* picture,
   1140                          const uint8_t* rgb, int rgb_stride) {
   1141   return (picture != NULL && rgb != NULL)
   1142              ? Import(picture, rgb, rgb_stride, 3, 1, 0)
   1143              : 0;
   1144 }
   1145 
   1146 int WebPPictureImportBGRA(WebPPicture* picture,
   1147                           const uint8_t* rgba, int rgba_stride) {
   1148   return (picture != NULL && rgba != NULL)
   1149              ? Import(picture, rgba, rgba_stride, 4, 1, 1)
   1150              : 0;
   1151 }
   1152 
   1153 
   1154 int WebPPictureImportBGRX(WebPPicture* picture,
   1155                           const uint8_t* rgba, int rgba_stride) {
   1156   return (picture != NULL && rgba != NULL)
   1157              ? Import(picture, rgba, rgba_stride, 4, 1, 0)
   1158              : 0;
   1159 }
   1160 
   1161 #endif   // WEBP_REDUCE_CSP
   1162 
   1163 int WebPPictureImportRGB(WebPPicture* picture,
   1164                          const uint8_t* rgb, int rgb_stride) {
   1165   return (picture != NULL && rgb != NULL)
   1166              ? Import(picture, rgb, rgb_stride, 3, 0, 0)
   1167              : 0;
   1168 }
   1169 
   1170 int WebPPictureImportRGBA(WebPPicture* picture,
   1171                           const uint8_t* rgba, int rgba_stride) {
   1172   return (picture != NULL && rgba != NULL)
   1173              ? Import(picture, rgba, rgba_stride, 4, 0, 1)
   1174              : 0;
   1175 }
   1176 
   1177 int WebPPictureImportRGBX(WebPPicture* picture,
   1178                           const uint8_t* rgba, int rgba_stride) {
   1179   return (picture != NULL && rgba != NULL)
   1180              ? Import(picture, rgba, rgba_stride, 4, 0, 0)
   1181              : 0;
   1182 }
   1183 
   1184 //------------------------------------------------------------------------------
   1185