Home | History | Annotate | Download | only in dec
      1 // Copyright 2011 Google Inc.
      2 //
      3 // This code is licensed under the same terms as WebM:
      4 //  Software License Agreement:  http://www.webmproject.org/license/software/
      5 //  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
      6 // -----------------------------------------------------------------------------
      7 //
      8 // functions for sample output.
      9 //
     10 // Author: Skal (pascal.massimino (at) gmail.com)
     11 
     12 #include <assert.h>
     13 #include <stdlib.h>
     14 #include "vp8i.h"
     15 #include "webpi.h"
     16 #include "yuv.h"
     17 
     18 #if defined(__cplusplus) || defined(c_plusplus)
     19 extern "C" {
     20 #endif
     21 
     22 #define FANCY_UPSAMPLING   // undefined to remove fancy upsampling support
     23 
     24 //------------------------------------------------------------------------------
     25 // Fancy upsampler
     26 
     27 #ifdef FANCY_UPSAMPLING
     28 
     29 // Given samples laid out in a square as:
     30 //  [a b]
     31 //  [c d]
     32 // we interpolate u/v as:
     33 //  ([9*a + 3*b + 3*c +   d    3*a + 9*b + 3*c +   d] + [8 8]) / 16
     34 //  ([3*a +   b + 9*c + 3*d      a + 3*b + 3*c + 9*d]   [8 8]) / 16
     35 
     36 // We process u and v together stashed into 32bit (16bit each).
     37 #define LOAD_UV(u,v) ((u) | ((v) << 16))
     38 
     39 #define UPSAMPLE_FUNC(FUNC_NAME, FUNC, XSTEP)                                  \
     40 static void FUNC_NAME(const uint8_t* top_y, const uint8_t* bottom_y,           \
     41                       const uint8_t* top_u, const uint8_t* top_v,              \
     42                       const uint8_t* cur_u, const uint8_t* cur_v,              \
     43                       uint8_t* top_dst, uint8_t* bottom_dst, int len) {        \
     44   int x;                                                                       \
     45   const int last_pixel_pair = (len - 1) >> 1;                                  \
     46   uint32_t tl_uv = LOAD_UV(top_u[0], top_v[0]);   /* top-left sample */        \
     47   uint32_t l_uv  = LOAD_UV(cur_u[0], cur_v[0]);   /* left-sample */            \
     48   if (top_y) {                                                                 \
     49     const uint32_t uv0 = (3 * tl_uv + l_uv + 0x00020002u) >> 2;                \
     50     FUNC(top_y[0], uv0 & 0xff, (uv0 >> 16), top_dst);                          \
     51   }                                                                            \
     52   if (bottom_y) {                                                              \
     53     const uint32_t uv0 = (3 * l_uv + tl_uv + 0x00020002u) >> 2;                \
     54     FUNC(bottom_y[0], uv0 & 0xff, (uv0 >> 16), bottom_dst);                    \
     55   }                                                                            \
     56   for (x = 1; x <= last_pixel_pair; ++x) {                                     \
     57     const uint32_t t_uv = LOAD_UV(top_u[x], top_v[x]);  /* top sample */       \
     58     const uint32_t uv   = LOAD_UV(cur_u[x], cur_v[x]);  /* sample */           \
     59     /* precompute invariant values associated with first and second diagonals*/\
     60     const uint32_t avg = tl_uv + t_uv + l_uv + uv + 0x00080008u;               \
     61     const uint32_t diag_12 = (avg + 2 * (t_uv + l_uv)) >> 3;                   \
     62     const uint32_t diag_03 = (avg + 2 * (tl_uv + uv)) >> 3;                    \
     63     if (top_y) {                                                               \
     64       const uint32_t uv0 = (diag_12 + tl_uv) >> 1;                             \
     65       const uint32_t uv1 = (diag_03 + t_uv) >> 1;                              \
     66       FUNC(top_y[2 * x - 1], uv0 & 0xff, (uv0 >> 16),                          \
     67            top_dst + (2 * x - 1) * XSTEP);                                     \
     68       FUNC(top_y[2 * x - 0], uv1 & 0xff, (uv1 >> 16),                          \
     69            top_dst + (2 * x - 0) * XSTEP);                                     \
     70     }                                                                          \
     71     if (bottom_y) {                                                            \
     72       const uint32_t uv0 = (diag_03 + l_uv) >> 1;                              \
     73       const uint32_t uv1 = (diag_12 + uv) >> 1;                                \
     74       FUNC(bottom_y[2 * x - 1], uv0 & 0xff, (uv0 >> 16),                       \
     75            bottom_dst + (2 * x - 1) * XSTEP);                                  \
     76       FUNC(bottom_y[2 * x + 0], uv1 & 0xff, (uv1 >> 16),                       \
     77            bottom_dst + (2 * x + 0) * XSTEP);                                  \
     78     }                                                                          \
     79     tl_uv = t_uv;                                                              \
     80     l_uv = uv;                                                                 \
     81   }                                                                            \
     82   if (!(len & 1)) {                                                            \
     83     if (top_y) {                                                               \
     84       const uint32_t uv0 = (3 * tl_uv + l_uv + 0x00020002u) >> 2;              \
     85       FUNC(top_y[len - 1], uv0 & 0xff, (uv0 >> 16),                            \
     86            top_dst + (len - 1) * XSTEP);                                       \
     87     }                                                                          \
     88     if (bottom_y) {                                                            \
     89       const uint32_t uv0 = (3 * l_uv + tl_uv + 0x00020002u) >> 2;              \
     90       FUNC(bottom_y[len - 1], uv0 & 0xff, (uv0 >> 16),                         \
     91            bottom_dst + (len - 1) * XSTEP);                                    \
     92     }                                                                          \
     93   }                                                                            \
     94 }
     95 
     96 // All variants implemented.
     97 UPSAMPLE_FUNC(UpsampleRgbLinePair,  VP8YuvToRgb,  3)
     98 UPSAMPLE_FUNC(UpsampleBgrLinePair,  VP8YuvToBgr,  3)
     99 UPSAMPLE_FUNC(UpsampleRgbaLinePair, VP8YuvToRgba, 4)
    100 UPSAMPLE_FUNC(UpsampleBgraLinePair, VP8YuvToBgra, 4)
    101 UPSAMPLE_FUNC(UpsampleArgbLinePair, VP8YuvToArgb, 4)
    102 UPSAMPLE_FUNC(UpsampleRgba4444LinePair, VP8YuvToRgba4444, 2)
    103 UPSAMPLE_FUNC(UpsampleRgb565LinePair,  VP8YuvToRgb565,  2)
    104 // These two don't erase the alpha value
    105 UPSAMPLE_FUNC(UpsampleRgbKeepAlphaLinePair, VP8YuvToRgb, 4)
    106 UPSAMPLE_FUNC(UpsampleBgrKeepAlphaLinePair, VP8YuvToBgr, 4)
    107 UPSAMPLE_FUNC(UpsampleArgbKeepAlphaLinePair, VP8YuvToArgbKeepA, 4)
    108 UPSAMPLE_FUNC(UpsampleRgba4444KeepAlphaLinePair, VP8YuvToRgba4444KeepA, 2)
    109 
    110 #undef LOAD_UV
    111 #undef UPSAMPLE_FUNC
    112 
    113 // Fancy upsampling functions to convert YUV to RGB
    114 WebPUpsampleLinePairFunc WebPUpsamplers[MODE_LAST];
    115 WebPUpsampleLinePairFunc WebPUpsamplersKeepAlpha[MODE_LAST];
    116 
    117 static void InitUpsamplers(void) {
    118   WebPUpsamplers[MODE_RGB]       = UpsampleRgbLinePair;
    119   WebPUpsamplers[MODE_RGBA]      = UpsampleRgbaLinePair;
    120   WebPUpsamplers[MODE_BGR]       = UpsampleBgrLinePair;
    121   WebPUpsamplers[MODE_BGRA]      = UpsampleBgraLinePair;
    122   WebPUpsamplers[MODE_ARGB]      = UpsampleArgbLinePair;
    123   WebPUpsamplers[MODE_RGBA_4444] = UpsampleRgba4444LinePair;
    124   WebPUpsamplers[MODE_RGB_565]   = UpsampleRgb565LinePair;
    125 
    126   WebPUpsamplersKeepAlpha[MODE_RGB]       = UpsampleRgbLinePair;
    127   WebPUpsamplersKeepAlpha[MODE_RGBA]      = UpsampleRgbKeepAlphaLinePair;
    128   WebPUpsamplersKeepAlpha[MODE_BGR]       = UpsampleBgrLinePair;
    129   WebPUpsamplersKeepAlpha[MODE_BGRA]      = UpsampleBgrKeepAlphaLinePair;
    130   WebPUpsamplersKeepAlpha[MODE_ARGB]      = UpsampleArgbKeepAlphaLinePair;
    131   WebPUpsamplersKeepAlpha[MODE_RGBA_4444] = UpsampleRgba4444KeepAlphaLinePair;
    132   WebPUpsamplersKeepAlpha[MODE_RGB_565]   = UpsampleRgb565LinePair;
    133 
    134   // If defined, use CPUInfo() to overwrite some pointers with faster versions.
    135   if (VP8DecGetCPUInfo) {
    136     if (VP8DecGetCPUInfo(kSSE2)) {
    137 #if defined(__SSE2__) || defined(_MSC_VER)
    138       WebPInitUpsamplersSSE2();
    139 #endif
    140     }
    141   }
    142 }
    143 
    144 #endif  // FANCY_UPSAMPLING
    145 
    146 //------------------------------------------------------------------------------
    147 // simple point-sampling
    148 
    149 #define SAMPLE_FUNC(FUNC_NAME, FUNC, XSTEP)                                    \
    150 static void FUNC_NAME(const uint8_t* top_y, const uint8_t* bottom_y,           \
    151                       const uint8_t* u, const uint8_t* v,                      \
    152                       uint8_t* top_dst, uint8_t* bottom_dst, int len) {        \
    153   int i;                                                                       \
    154   for (i = 0; i < len - 1; i += 2) {                                           \
    155     FUNC(top_y[0], u[0], v[0], top_dst);                                       \
    156     FUNC(top_y[1], u[0], v[0], top_dst + XSTEP);                               \
    157     FUNC(bottom_y[0], u[0], v[0], bottom_dst);                                 \
    158     FUNC(bottom_y[1], u[0], v[0], bottom_dst + XSTEP);                         \
    159     top_y += 2;                                                                \
    160     bottom_y += 2;                                                             \
    161     u++;                                                                       \
    162     v++;                                                                       \
    163     top_dst += 2 * XSTEP;                                                      \
    164     bottom_dst += 2 * XSTEP;                                                   \
    165   }                                                                            \
    166   if (i == len - 1) {    /* last one */                                        \
    167     FUNC(top_y[0], u[0], v[0], top_dst);                                       \
    168     FUNC(bottom_y[0], u[0], v[0], bottom_dst);                                 \
    169   }                                                                            \
    170 }
    171 
    172 // All variants implemented.
    173 SAMPLE_FUNC(SampleRgbLinePair,      VP8YuvToRgb,  3)
    174 SAMPLE_FUNC(SampleBgrLinePair,      VP8YuvToBgr,  3)
    175 SAMPLE_FUNC(SampleRgbaLinePair,     VP8YuvToRgba, 4)
    176 SAMPLE_FUNC(SampleBgraLinePair,     VP8YuvToBgra, 4)
    177 SAMPLE_FUNC(SampleArgbLinePair,     VP8YuvToArgb, 4)
    178 SAMPLE_FUNC(SampleRgba4444LinePair, VP8YuvToRgba4444, 2)
    179 SAMPLE_FUNC(SampleRgb565LinePair,   VP8YuvToRgb565, 2)
    180 
    181 #undef SAMPLE_FUNC
    182 
    183 // Main methods.
    184 typedef void (*SampleLinePairFunc)(
    185   const uint8_t* top_y, const uint8_t* bottom_y,
    186   const uint8_t* u, const uint8_t* v,
    187   uint8_t* top_dst, uint8_t* bottom_dst, int len);
    188 
    189 static const SampleLinePairFunc kSamplers[MODE_LAST] = {
    190   SampleRgbLinePair,       // MODE_RGB
    191   SampleRgbaLinePair,      // MODE_RGBA
    192   SampleBgrLinePair,       // MODE_BGR
    193   SampleBgraLinePair,      // MODE_BGRA
    194   SampleArgbLinePair,      // MODE_ARGB
    195   SampleRgba4444LinePair,  // MODE_RGBA_4444
    196   SampleRgb565LinePair     // MODE_RGB_565
    197 };
    198 
    199 //------------------------------------------------------------------------------
    200 // YUV444 converter
    201 
    202 #define YUV444_FUNC(FUNC_NAME, FUNC, XSTEP)                                    \
    203 static void FUNC_NAME(const uint8_t* y, const uint8_t* u, const uint8_t* v,    \
    204                       uint8_t* dst, int len) {                                 \
    205   int i;                                                                       \
    206   for (i = 0; i < len; ++i) FUNC(y[i], u[i], v[i], &dst[i * XSTEP]);           \
    207 }
    208 
    209 YUV444_FUNC(Yuv444ToRgb,      VP8YuvToRgb,  3)
    210 YUV444_FUNC(Yuv444ToBgr,      VP8YuvToBgr,  3)
    211 YUV444_FUNC(Yuv444ToRgba,     VP8YuvToRgba, 4)
    212 YUV444_FUNC(Yuv444ToBgra,     VP8YuvToBgra, 4)
    213 YUV444_FUNC(Yuv444ToArgb,     VP8YuvToArgb, 4)
    214 YUV444_FUNC(Yuv444ToRgba4444, VP8YuvToRgba4444, 2)
    215 YUV444_FUNC(Yuv444ToRgb565,   VP8YuvToRgb565, 2)
    216 
    217 #undef YUV444_FUNC
    218 
    219 typedef void (*YUV444Func)(const uint8_t* y, const uint8_t* u, const uint8_t* v,
    220                            uint8_t* dst, int len);
    221 
    222 static const YUV444Func kYUV444Converters[MODE_LAST] = {
    223   Yuv444ToRgb,       // MODE_RGB
    224   Yuv444ToRgba,      // MODE_RGBA
    225   Yuv444ToBgr,       // MODE_BGR
    226   Yuv444ToBgra,      // MODE_BGRA
    227   Yuv444ToArgb,      // MODE_ARGB
    228   Yuv444ToRgba4444,  // MODE_RGBA_4444
    229   Yuv444ToRgb565     // MODE_RGB_565
    230 };
    231 
    232 //------------------------------------------------------------------------------
    233 // Main YUV<->RGB conversion functions
    234 
    235 static int EmitYUV(const VP8Io* const io, WebPDecParams* const p) {
    236   WebPDecBuffer* output = p->output;
    237   const WebPYUVABuffer* const buf = &output->u.YUVA;
    238   uint8_t* const y_dst = buf->y + io->mb_y * buf->y_stride;
    239   uint8_t* const u_dst = buf->u + (io->mb_y >> 1) * buf->u_stride;
    240   uint8_t* const v_dst = buf->v + (io->mb_y >> 1) * buf->v_stride;
    241   const int mb_w = io->mb_w;
    242   const int mb_h = io->mb_h;
    243   const int uv_w = (mb_w + 1) / 2;
    244   int j;
    245   for (j = 0; j < mb_h; ++j) {
    246     memcpy(y_dst + j * buf->y_stride, io->y + j * io->y_stride, mb_w);
    247   }
    248   for (j = 0; j < (mb_h + 1) / 2; ++j) {
    249     memcpy(u_dst + j * buf->u_stride, io->u + j * io->uv_stride, uv_w);
    250     memcpy(v_dst + j * buf->v_stride, io->v + j * io->uv_stride, uv_w);
    251   }
    252   return io->mb_h;
    253 }
    254 
    255 // Point-sampling U/V sampler.
    256 static int EmitSampledRGB(const VP8Io* const io, WebPDecParams* const p) {
    257   WebPDecBuffer* output = p->output;
    258   const WebPRGBABuffer* const buf = &output->u.RGBA;
    259   uint8_t* dst = buf->rgba + io->mb_y * buf->stride;
    260   const uint8_t* y_src = io->y;
    261   const uint8_t* u_src = io->u;
    262   const uint8_t* v_src = io->v;
    263   const SampleLinePairFunc sample = kSamplers[output->colorspace];
    264   const int mb_w = io->mb_w;
    265   const int last = io->mb_h - 1;
    266   int j;
    267   for (j = 0; j < last; j += 2) {
    268     sample(y_src, y_src + io->y_stride, u_src, v_src,
    269            dst, dst + buf->stride, mb_w);
    270     y_src += 2 * io->y_stride;
    271     u_src += io->uv_stride;
    272     v_src += io->uv_stride;
    273     dst += 2 * buf->stride;
    274   }
    275   if (j == last) {  // Just do the last line twice
    276     sample(y_src, y_src, u_src, v_src, dst, dst, mb_w);
    277   }
    278   return io->mb_h;
    279 }
    280 
    281 //------------------------------------------------------------------------------
    282 // YUV444 -> RGB conversion
    283 
    284 #if 0   // TODO(skal): this is for future rescaling.
    285 static int EmitRGB(const VP8Io* const io, WebPDecParams* const p) {
    286   WebPDecBuffer* output = p->output;
    287   const WebPRGBABuffer* const buf = &output->u.RGBA;
    288   uint8_t* dst = buf->rgba + io->mb_y * buf->stride;
    289   const uint8_t* y_src = io->y;
    290   const uint8_t* u_src = io->u;
    291   const uint8_t* v_src = io->v;
    292   const YUV444Func convert = kYUV444Converters[output->colorspace];
    293   const int mb_w = io->mb_w;
    294   const int last = io->mb_h;
    295   int j;
    296   for (j = 0; j < last; ++j) {
    297     convert(y_src, u_src, v_src, dst, mb_w);
    298     y_src += io->y_stride;
    299     u_src += io->uv_stride;
    300     v_src += io->uv_stride;
    301     dst += buf->stride;
    302   }
    303   return io->mb_h;
    304 }
    305 #endif
    306 
    307 //------------------------------------------------------------------------------
    308 // Fancy upsampling
    309 
    310 #ifdef FANCY_UPSAMPLING
    311 static int EmitFancyRGB(const VP8Io* const io, WebPDecParams* const p) {
    312   int num_lines_out = io->mb_h;   // a priori guess
    313   const WebPRGBABuffer* const buf = &p->output->u.RGBA;
    314   uint8_t* dst = buf->rgba + io->mb_y * buf->stride;
    315   const WebPUpsampleLinePairFunc upsample =
    316       io->a ? WebPUpsamplersKeepAlpha[p->output->colorspace]
    317             : WebPUpsamplers[p->output->colorspace];
    318   const uint8_t* cur_y = io->y;
    319   const uint8_t* cur_u = io->u;
    320   const uint8_t* cur_v = io->v;
    321   const uint8_t* top_u = p->tmp_u;
    322   const uint8_t* top_v = p->tmp_v;
    323   int y = io->mb_y;
    324   int y_end = io->mb_y + io->mb_h;
    325   const int mb_w = io->mb_w;
    326   const int uv_w = (mb_w + 1) / 2;
    327 
    328   if (y == 0) {
    329     // First line is special cased. We mirror the u/v samples at boundary.
    330     upsample(NULL, cur_y, cur_u, cur_v, cur_u, cur_v, NULL, dst, mb_w);
    331   } else {
    332     // We can finish the left-over line from previous call.
    333     // Warning! Don't overwrite the alpha values (if any), as they
    334     // are not lagging one line behind but are already written.
    335     upsample(p->tmp_y, cur_y, top_u, top_v, cur_u, cur_v,
    336              dst - buf->stride, dst, mb_w);
    337     num_lines_out++;
    338   }
    339   // Loop over each output pairs of row.
    340   for (; y + 2 < y_end; y += 2) {
    341     top_u = cur_u;
    342     top_v = cur_v;
    343     cur_u += io->uv_stride;
    344     cur_v += io->uv_stride;
    345     dst += 2 * buf->stride;
    346     cur_y += 2 * io->y_stride;
    347     upsample(cur_y - io->y_stride, cur_y,
    348              top_u, top_v, cur_u, cur_v,
    349              dst - buf->stride, dst, mb_w);
    350   }
    351   // move to last row
    352   cur_y += io->y_stride;
    353   if (io->crop_top + y_end < io->crop_bottom) {
    354     // Save the unfinished samples for next call (as we're not done yet).
    355     memcpy(p->tmp_y, cur_y, mb_w * sizeof(*p->tmp_y));
    356     memcpy(p->tmp_u, cur_u, uv_w * sizeof(*p->tmp_u));
    357     memcpy(p->tmp_v, cur_v, uv_w * sizeof(*p->tmp_v));
    358     // The fancy upsampler leaves a row unfinished behind
    359     // (except for the very last row)
    360     num_lines_out--;
    361   } else {
    362     // Process the very last row of even-sized picture
    363     if (!(y_end & 1)) {
    364       upsample(cur_y, NULL, cur_u, cur_v, cur_u, cur_v,
    365               dst + buf->stride, NULL, mb_w);
    366     }
    367   }
    368   return num_lines_out;
    369 }
    370 
    371 #endif    /* FANCY_UPSAMPLING */
    372 
    373 //------------------------------------------------------------------------------
    374 
    375 #ifdef WEBP_EXPERIMENTAL_FEATURES
    376 static int EmitAlphaYUV(const VP8Io* const io, WebPDecParams* const p) {
    377   const int mb_w = io->mb_w;
    378   const int mb_h = io->mb_h;
    379   int j;
    380   const WebPYUVABuffer* const buf = &p->output->u.YUVA;
    381   uint8_t* dst = buf->a + io->mb_y * buf->a_stride;
    382   const uint8_t* alpha = io->a;
    383   if (alpha) {
    384     for (j = 0; j < mb_h; ++j) {
    385       memcpy(dst, alpha, mb_w * sizeof(*dst));
    386       alpha += io->width;
    387       dst += buf->a_stride;
    388     }
    389   }
    390   return 0;
    391 }
    392 
    393 static int EmitAlphaRGB(const VP8Io* const io, WebPDecParams* const p) {
    394   const int mb_w = io->mb_w;
    395   const int mb_h = io->mb_h;
    396   int i, j;
    397   const WebPRGBABuffer* const buf = &p->output->u.RGBA;
    398   uint8_t* dst = buf->rgba + io->mb_y * buf->stride;
    399   const uint8_t* alpha = io->a;
    400   if (alpha) {
    401     for (j = 0; j < mb_h; ++j) {
    402       for (i = 0; i < mb_w; ++i) {
    403         dst[4 * i + 3] = alpha[i];
    404       }
    405       alpha += io->width;
    406       dst += buf->stride;
    407     }
    408   }
    409   return 0;
    410 }
    411 
    412 #endif    /* WEBP_EXPERIMENTAL_FEATURES */
    413 
    414 //------------------------------------------------------------------------------
    415 // Simple picture rescaler
    416 
    417 // TODO(skal): start a common library for encoder and decoder, and factorize
    418 // this code in.
    419 
    420 #define RFIX 30
    421 #define MULT(x,y) (((int64_t)(x) * (y) + (1 << (RFIX - 1))) >> RFIX)
    422 
    423 static void InitRescaler(WebPRescaler* const wrk,
    424                          int src_width, int src_height,
    425                          uint8_t* dst,
    426                          int dst_width, int dst_height, int dst_stride,
    427                          int x_add, int x_sub, int y_add, int y_sub,
    428                          int32_t* work) {
    429   wrk->x_expand = (src_width < dst_width);
    430   wrk->src_width = src_width;
    431   wrk->src_height = src_height;
    432   wrk->dst_width = dst_width;
    433   wrk->dst_height = dst_height;
    434   wrk->dst = dst;
    435   wrk->dst_stride = dst_stride;
    436   // for 'x_expand', we use bilinear interpolation
    437   wrk->x_add = wrk->x_expand ? (x_sub - 1) : x_add - x_sub;
    438   wrk->x_sub = wrk->x_expand ? (x_add - 1) : x_sub;
    439   wrk->y_accum = y_add;
    440   wrk->y_add = y_add;
    441   wrk->y_sub = y_sub;
    442   wrk->fx_scale = (1 << RFIX) / x_sub;
    443   wrk->fy_scale = (1 << RFIX) / y_sub;
    444   wrk->fxy_scale = wrk->x_expand ?
    445       ((int64_t)dst_height << RFIX) / (x_sub * src_height) :
    446       ((int64_t)dst_height << RFIX) / (x_add * src_height);
    447   wrk->irow = work;
    448   wrk->frow = work + dst_width;
    449 }
    450 
    451 static inline void ImportRow(const uint8_t* const src,
    452                              WebPRescaler* const wrk) {
    453   int x_in = 0;
    454   int x_out;
    455   int accum = 0;
    456   if (!wrk->x_expand) {
    457     int sum = 0;
    458     for (x_out = 0; x_out < wrk->dst_width; ++x_out) {
    459       accum += wrk->x_add;
    460       for (; accum > 0; accum -= wrk->x_sub) {
    461         sum += src[x_in++];
    462       }
    463       {        // Emit next horizontal pixel.
    464         const int32_t base = src[x_in++];
    465         const int32_t frac = base * (-accum);
    466         wrk->frow[x_out] = (sum + base) * wrk->x_sub - frac;
    467         // fresh fractional start for next pixel
    468         sum = MULT(frac, wrk->fx_scale);
    469       }
    470     }
    471   } else {        // simple bilinear interpolation
    472     int left = src[0], right = src[0];
    473     for (x_out = 0; x_out < wrk->dst_width; ++x_out) {
    474       if (accum < 0) {
    475         left = right;
    476         right = src[++x_in];
    477         accum += wrk->x_add;
    478       }
    479       wrk->frow[x_out] = right * wrk->x_add + (left - right) * accum;
    480       accum -= wrk->x_sub;
    481     }
    482   }
    483   // Accumulate the new row's contribution
    484   for (x_out = 0; x_out < wrk->dst_width; ++x_out) {
    485     wrk->irow[x_out] += wrk->frow[x_out];
    486   }
    487 }
    488 
    489 static void ExportRow(WebPRescaler* const wrk) {
    490   int x_out;
    491   const int yscale = wrk->fy_scale * (-wrk->y_accum);
    492   assert(wrk->y_accum <= 0);
    493   for (x_out = 0; x_out < wrk->dst_width; ++x_out) {
    494     const int frac = MULT(wrk->frow[x_out], yscale);
    495     const int v = MULT(wrk->irow[x_out] - frac, wrk->fxy_scale);
    496     wrk->dst[x_out] = (!(v & ~0xff)) ? v : (v < 0) ? 0 : 255;
    497     wrk->irow[x_out] = frac;   // new fractional start
    498   }
    499   wrk->y_accum += wrk->y_add;
    500   wrk->dst += wrk->dst_stride;
    501 }
    502 
    503 #undef MULT
    504 #undef RFIX
    505 
    506 //------------------------------------------------------------------------------
    507 // YUV rescaling (no final RGB conversion needed)
    508 
    509 static int Rescale(const uint8_t* src, int src_stride,
    510                    int new_lines, WebPRescaler* const wrk) {
    511   int num_lines_out = 0;
    512   while (new_lines-- > 0) {    // import new contribution of one source row.
    513     ImportRow(src, wrk);
    514     src += src_stride;
    515     wrk->y_accum -= wrk->y_sub;
    516     while (wrk->y_accum <= 0) {      // emit output row(s)
    517       ExportRow(wrk);
    518       num_lines_out++;
    519     }
    520   }
    521   return num_lines_out;
    522 }
    523 
    524 static int EmitRescaledYUV(const VP8Io* const io, WebPDecParams* const p) {
    525   const int mb_h = io->mb_h;
    526   const int uv_mb_h = (mb_h + 1) >> 1;
    527   const int num_lines_out = Rescale(io->y, io->y_stride, mb_h, &p->scaler_y);
    528   Rescale(io->u, io->uv_stride, uv_mb_h, &p->scaler_u);
    529   Rescale(io->v, io->uv_stride, uv_mb_h, &p->scaler_v);
    530   return num_lines_out;
    531 }
    532 
    533 static int EmitRescaledAlphaYUV(const VP8Io* const io, WebPDecParams* const p) {
    534   if (io->a) {
    535     Rescale(io->a, io->width, io->mb_h, &p->scaler_a);
    536   }
    537   return 0;
    538 }
    539 
    540 static int IsAlphaMode(WEBP_CSP_MODE mode) {
    541   return (mode == MODE_RGBA || mode == MODE_BGRA || mode == MODE_ARGB ||
    542           mode == MODE_RGBA_4444 || mode == MODE_YUVA);
    543 }
    544 
    545 static int InitYUVRescaler(const VP8Io* const io, WebPDecParams* const p) {
    546   const int has_alpha = IsAlphaMode(p->output->colorspace);
    547   const WebPYUVABuffer* const buf = &p->output->u.YUVA;
    548   const int out_width  = io->scaled_width;
    549   const int out_height = io->scaled_height;
    550   const int uv_out_width  = (out_width + 1) >> 1;
    551   const int uv_out_height = (out_height + 1) >> 1;
    552   const int uv_in_width  = (io->mb_w + 1) >> 1;
    553   const int uv_in_height = (io->mb_h + 1) >> 1;
    554   const size_t work_size = 2 * out_width;   // scratch memory for luma rescaler
    555   const size_t uv_work_size = 2 * uv_out_width;  // and for each u/v ones
    556   size_t tmp_size;
    557   int32_t* work;
    558 
    559   tmp_size = work_size + 2 * uv_work_size;
    560   if (has_alpha) {
    561     tmp_size += work_size;
    562   }
    563   p->memory = calloc(1, tmp_size * sizeof(*work));
    564   if (p->memory == NULL) {
    565     return 0;   // memory error
    566   }
    567   work = (int32_t*)p->memory;
    568   InitRescaler(&p->scaler_y, io->mb_w, io->mb_h,
    569                buf->y, out_width, out_height, buf->y_stride,
    570                io->mb_w, out_width, io->mb_h, out_height,
    571                work);
    572   InitRescaler(&p->scaler_u, uv_in_width, uv_in_height,
    573                buf->u, uv_out_width, uv_out_height, buf->u_stride,
    574                uv_in_width, uv_out_width,
    575                uv_in_height, uv_out_height,
    576                work + work_size);
    577   InitRescaler(&p->scaler_v, uv_in_width, uv_in_height,
    578                buf->v, uv_out_width, uv_out_height, buf->v_stride,
    579                uv_in_width, uv_out_width,
    580                uv_in_height, uv_out_height,
    581                work + work_size + uv_work_size);
    582   p->emit = EmitRescaledYUV;
    583   if (has_alpha) {
    584     InitRescaler(&p->scaler_a, io->mb_w, io->mb_h,
    585                  buf->a, out_width, out_height, buf->a_stride,
    586                  io->mb_w, out_width, io->mb_h, out_height,
    587                  work + work_size + 2 * uv_work_size);
    588     p->emit_alpha = EmitRescaledAlphaYUV;
    589   }
    590   return 1;
    591 }
    592 
    593 //------------------------------------------------------------------------------
    594 // RGBA rescaling
    595 
    596 // import new contributions until one row is ready to be output, or all input
    597 // is consumed.
    598 static int Import(const uint8_t* src, int src_stride,
    599                   int new_lines, WebPRescaler* const wrk) {
    600   int num_lines_in = 0;
    601   while (num_lines_in < new_lines && wrk->y_accum > 0) {
    602     ImportRow(src, wrk);
    603     src += src_stride;
    604     ++num_lines_in;
    605     wrk->y_accum -= wrk->y_sub;
    606   }
    607   return num_lines_in;
    608 }
    609 
    610 static int ExportRGB(WebPDecParams* const p, int y_pos) {
    611   const YUV444Func convert = kYUV444Converters[p->output->colorspace];
    612   const WebPRGBABuffer* const buf = &p->output->u.RGBA;
    613   uint8_t* dst = buf->rgba + (p->last_y + y_pos) * buf->stride;
    614   int num_lines_out = 0;
    615   // For RGB rescaling, because of the YUV420, current scan position
    616   // U/V can be +1/-1 line from the Y one.  Hence the double test.
    617   while (p->scaler_y.y_accum <= 0 && p->scaler_u.y_accum <= 0) {
    618     assert(p->last_y + y_pos + num_lines_out < p->output->height);
    619     assert(p->scaler_u.y_accum == p->scaler_v.y_accum);
    620     ExportRow(&p->scaler_y);
    621     ExportRow(&p->scaler_u);
    622     ExportRow(&p->scaler_v);
    623     convert(p->scaler_y.dst, p->scaler_u.dst, p->scaler_v.dst,
    624             dst, p->scaler_y.dst_width);
    625     dst += buf->stride;
    626     num_lines_out++;
    627   }
    628   return num_lines_out;
    629 }
    630 
    631 static int EmitRescaledRGB(const VP8Io* const io, WebPDecParams* const p) {
    632   const int mb_h = io->mb_h;
    633   const int uv_mb_h = (mb_h + 1) >> 1;
    634   int j = 0, uv_j = 0;
    635   int num_lines_out = 0;
    636   while (j < mb_h) {
    637     const int y_lines_in = Import(io->y + j * io->y_stride, io->y_stride,
    638                                   mb_h - j, &p->scaler_y);
    639     const int u_lines_in = Import(io->u + uv_j * io->uv_stride, io->uv_stride,
    640                                   uv_mb_h - uv_j, &p->scaler_u);
    641     const int v_lines_in = Import(io->v + uv_j * io->uv_stride, io->uv_stride,
    642                                   uv_mb_h - uv_j, &p->scaler_v);
    643     (void)v_lines_in;   // remove a gcc warning
    644     assert(u_lines_in == v_lines_in);
    645     j += y_lines_in;
    646     uv_j += u_lines_in;
    647     num_lines_out += ExportRGB(p, num_lines_out);
    648   }
    649   return num_lines_out;
    650 }
    651 
    652 static int ExportAlpha(WebPDecParams* const p, int y_pos) {
    653   const WebPRGBABuffer* const buf = &p->output->u.RGBA;
    654   uint8_t* dst = buf->rgba + (p->last_y + y_pos) * buf->stride;
    655   int num_lines_out = 0;
    656   while (p->scaler_a.y_accum <= 0) {
    657     int i;
    658     assert(p->last_y + y_pos + num_lines_out < p->output->height);
    659     ExportRow(&p->scaler_a);
    660     for (i = 0; i < p->scaler_a.dst_width; ++i) {
    661       dst[4 * i + 3] = p->scaler_a.dst[i];
    662     }
    663     dst += buf->stride;
    664     num_lines_out++;
    665   }
    666   return num_lines_out;
    667 }
    668 
    669 static int EmitRescaledAlphaRGB(const VP8Io* const io, WebPDecParams* const p) {
    670   if (io->a) {
    671     int j = 0, pos = 0;
    672     while (j < io->mb_h) {
    673       j += Import(io->a + j * io->width, io->width, io->mb_h - j, &p->scaler_a);
    674       pos += ExportAlpha(p, pos);
    675     }
    676   }
    677   return 0;
    678 }
    679 
    680 static int InitRGBRescaler(const VP8Io* const io, WebPDecParams* const p) {
    681   const int has_alpha = IsAlphaMode(p->output->colorspace);
    682   const int out_width  = io->scaled_width;
    683   const int out_height = io->scaled_height;
    684   const int uv_in_width  = (io->mb_w + 1) >> 1;
    685   const int uv_in_height = (io->mb_h + 1) >> 1;
    686   const size_t work_size = 2 * out_width;   // scratch memory for one rescaler
    687   int32_t* work;  // rescalers work area
    688   uint8_t* tmp;   // tmp storage for scaled YUV444 samples before RGB conversion
    689   size_t tmp_size1, tmp_size2;
    690 
    691   tmp_size1 = 3 * work_size;
    692   tmp_size2 = 3 * out_width;
    693   if (has_alpha) {
    694     tmp_size1 += work_size;
    695     tmp_size2 += out_width;
    696   }
    697   p->memory =
    698       calloc(1, tmp_size1 * sizeof(*work) + tmp_size2 * sizeof(*tmp));
    699   if (p->memory == NULL) {
    700     return 0;   // memory error
    701   }
    702   work = (int32_t*)p->memory;
    703   tmp = (uint8_t*)(work + tmp_size1);
    704   InitRescaler(&p->scaler_y, io->mb_w, io->mb_h,
    705                tmp + 0 * out_width, out_width, out_height, 0,
    706                io->mb_w, out_width, io->mb_h, out_height,
    707                work + 0 * work_size);
    708   InitRescaler(&p->scaler_u, uv_in_width, uv_in_height,
    709                tmp + 1 * out_width, out_width, out_height, 0,
    710                io->mb_w, 2 * out_width, io->mb_h, 2 * out_height,
    711                work + 1 * work_size);
    712   InitRescaler(&p->scaler_v, uv_in_width, uv_in_height,
    713                tmp + 2 * out_width, out_width, out_height, 0,
    714                io->mb_w, 2 * out_width, io->mb_h, 2 * out_height,
    715                work + 2 * work_size);
    716   p->emit = EmitRescaledRGB;
    717 
    718   if (has_alpha) {
    719     InitRescaler(&p->scaler_a, io->mb_w, io->mb_h,
    720                  tmp + 3 * out_width, out_width, out_height, 0,
    721                  io->mb_w, out_width, io->mb_h, out_height,
    722                  work + 3 * work_size);
    723     p->emit_alpha = EmitRescaledAlphaRGB;
    724   }
    725   return 1;
    726 }
    727 
    728 //------------------------------------------------------------------------------
    729 // Default custom functions
    730 
    731 // Setup crop_xxx fields, mb_w and mb_h
    732 static int InitFromOptions(const WebPDecoderOptions* const options,
    733                            VP8Io* const io) {
    734   const int W = io->width;
    735   const int H = io->height;
    736   int x = 0, y = 0, w = W, h = H;
    737 
    738   // Cropping
    739   io->use_cropping = (options != NULL) && (options->use_cropping > 0);
    740   if (io->use_cropping) {
    741     w = options->crop_width;
    742     h = options->crop_height;
    743     // TODO(skal): take colorspace into account. Don't assume YUV420.
    744     x = options->crop_left & ~1;
    745     y = options->crop_top & ~1;
    746     if (x < 0 || y < 0 || w <= 0 || h <= 0 || x + w > W || y + h > H) {
    747       return 0;  // out of frame boundary error
    748     }
    749   }
    750   io->crop_left   = x;
    751   io->crop_top    = y;
    752   io->crop_right  = x + w;
    753   io->crop_bottom = y + h;
    754   io->mb_w = w;
    755   io->mb_h = h;
    756 
    757   // Scaling
    758   io->use_scaling = (options != NULL) && (options->use_scaling > 0);
    759   if (io->use_scaling) {
    760     if (options->scaled_width <= 0 || options->scaled_height <= 0) {
    761       return 0;
    762     }
    763     io->scaled_width = options->scaled_width;
    764     io->scaled_height = options->scaled_height;
    765   }
    766 
    767   // Filter
    768   io->bypass_filtering = options && options->bypass_filtering;
    769 
    770   // Fancy upsampler
    771 #ifdef FANCY_UPSAMPLING
    772   io->fancy_upsampling = (options == NULL) || (!options->no_fancy_upsampling);
    773 #endif
    774 
    775   if (io->use_scaling) {
    776     // disable filter (only for large downscaling ratio).
    777     io->bypass_filtering = (io->scaled_width < W * 3 / 4) &&
    778                            (io->scaled_height < H * 3 / 4);
    779     io->fancy_upsampling = 0;
    780   }
    781   return 1;
    782 }
    783 
    784 static int CustomSetup(VP8Io* io) {
    785   WebPDecParams* const p = (WebPDecParams*)io->opaque;
    786   const int is_rgb = (p->output->colorspace < MODE_YUV);
    787 
    788   p->memory = NULL;
    789   p->emit = NULL;
    790   p->emit_alpha = NULL;
    791   if (!InitFromOptions(p->options, io)) {
    792     return 0;
    793   }
    794 
    795   if (io->use_scaling) {
    796     const int ok = is_rgb ? InitRGBRescaler(io, p) : InitYUVRescaler(io, p);
    797     if (!ok) {
    798       return 0;    // memory error
    799     }
    800   } else {
    801     if (is_rgb) {
    802       p->emit = EmitSampledRGB;   // default
    803 #ifdef FANCY_UPSAMPLING
    804       if (io->fancy_upsampling) {
    805         const int uv_width = (io->mb_w + 1) >> 1;
    806         p->memory = malloc(io->mb_w + 2 * uv_width);
    807         if (p->memory == NULL) {
    808           return 0;   // memory error.
    809         }
    810         p->tmp_y = (uint8_t*)p->memory;
    811         p->tmp_u = p->tmp_y + io->mb_w;
    812         p->tmp_v = p->tmp_u + uv_width;
    813         p->emit = EmitFancyRGB;
    814         InitUpsamplers();
    815       }
    816 #endif
    817     } else {
    818       p->emit = EmitYUV;
    819     }
    820 #ifdef WEBP_EXPERIMENTAL_FEATURES
    821     if (IsAlphaMode(p->output->colorspace)) {
    822       // We need transparency output
    823       p->emit_alpha = is_rgb ? EmitAlphaRGB : EmitAlphaYUV;
    824     }
    825 #endif
    826   }
    827 
    828   if (is_rgb) {
    829     VP8YUVInit();
    830   }
    831   return 1;
    832 }
    833 
    834 //------------------------------------------------------------------------------
    835 
    836 static int CustomPut(const VP8Io* io) {
    837   WebPDecParams* p = (WebPDecParams*)io->opaque;
    838   const int mb_w = io->mb_w;
    839   const int mb_h = io->mb_h;
    840   int num_lines_out;
    841   assert(!(io->mb_y & 1));
    842 
    843   if (mb_w <= 0 || mb_h <= 0) {
    844     return 0;
    845   }
    846   num_lines_out = p->emit(io, p);
    847   if (p->emit_alpha) {
    848     p->emit_alpha(io, p);
    849   }
    850   p->last_y += num_lines_out;
    851   return 1;
    852 }
    853 
    854 //------------------------------------------------------------------------------
    855 
    856 static void CustomTeardown(const VP8Io* io) {
    857   WebPDecParams* const p = (WebPDecParams*)io->opaque;
    858   free(p->memory);
    859   p->memory = NULL;
    860 }
    861 
    862 //------------------------------------------------------------------------------
    863 // Main entry point
    864 
    865 void WebPInitCustomIo(WebPDecParams* const params, VP8Io* const io) {
    866   io->put      = CustomPut;
    867   io->setup    = CustomSetup;
    868   io->teardown = CustomTeardown;
    869   io->opaque   = params;
    870 }
    871 
    872 //------------------------------------------------------------------------------
    873 
    874 #if defined(__cplusplus) || defined(c_plusplus)
    875 }    // extern "C"
    876 #endif
    877