Home | History | Annotate | Download | only in source
      1 /*
      2  *  Copyright 2013 The LibYuv Project Authors. All rights reserved.
      3  *
      4  *  Use of this source code is governed by a BSD-style license
      5  *  that can be found in the LICENSE file in the root of the source
      6  *  tree. An additional intellectual property rights grant can be found
      7  *  in the file PATENTS. All contributing project authors may
      8  *  be found in the AUTHORS file in the root of the source tree.
      9  */
     10 
     11 #include "libyuv/scale.h"
     12 
     13 #include <assert.h>
     14 #include <string.h>
     15 
     16 #include "libyuv/cpu_id.h"
     17 #include "libyuv/planar_functions.h"  // For CopyARGB
     18 #include "libyuv/row.h"
     19 #include "libyuv/scale_row.h"
     20 
     21 #ifdef __cplusplus
     22 namespace libyuv {
     23 extern "C" {
     24 #endif
     25 
     26 static __inline int Abs(int v) {
     27   return v >= 0 ? v : -v;
     28 }
     29 
     30 // CPU agnostic row functions
     31 void ScaleRowDown2_C(const uint8* src_ptr, ptrdiff_t src_stride,
     32                      uint8* dst, int dst_width) {
     33   int x;
     34   for (x = 0; x < dst_width - 1; x += 2) {
     35     dst[0] = src_ptr[1];
     36     dst[1] = src_ptr[3];
     37     dst += 2;
     38     src_ptr += 4;
     39   }
     40   if (dst_width & 1) {
     41     dst[0] = src_ptr[1];
     42   }
     43 }
     44 
     45 void ScaleRowDown2_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
     46                         uint16* dst, int dst_width) {
     47   int x;
     48   for (x = 0; x < dst_width - 1; x += 2) {
     49     dst[0] = src_ptr[1];
     50     dst[1] = src_ptr[3];
     51     dst += 2;
     52     src_ptr += 4;
     53   }
     54   if (dst_width & 1) {
     55     dst[0] = src_ptr[1];
     56   }
     57 }
     58 
     59 void ScaleRowDown2Linear_C(const uint8* src_ptr, ptrdiff_t src_stride,
     60                            uint8* dst, int dst_width) {
     61   const uint8* s = src_ptr;
     62   int x;
     63   for (x = 0; x < dst_width - 1; x += 2) {
     64     dst[0] = (s[0] + s[1] + 1) >> 1;
     65     dst[1] = (s[2] + s[3] + 1) >> 1;
     66     dst += 2;
     67     s += 4;
     68   }
     69   if (dst_width & 1) {
     70     dst[0] = (s[0] + s[1] + 1) >> 1;
     71   }
     72 }
     73 
     74 void ScaleRowDown2Linear_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
     75                               uint16* dst, int dst_width) {
     76   const uint16* s = src_ptr;
     77   int x;
     78   for (x = 0; x < dst_width - 1; x += 2) {
     79     dst[0] = (s[0] + s[1] + 1) >> 1;
     80     dst[1] = (s[2] + s[3] + 1) >> 1;
     81     dst += 2;
     82     s += 4;
     83   }
     84   if (dst_width & 1) {
     85     dst[0] = (s[0] + s[1] + 1) >> 1;
     86   }
     87 }
     88 
     89 void ScaleRowDown2Box_C(const uint8* src_ptr, ptrdiff_t src_stride,
     90                         uint8* dst, int dst_width) {
     91   const uint8* s = src_ptr;
     92   const uint8* t = src_ptr + src_stride;
     93   int x;
     94   for (x = 0; x < dst_width - 1; x += 2) {
     95     dst[0] = (s[0] + s[1] + t[0] + t[1] + 2) >> 2;
     96     dst[1] = (s[2] + s[3] + t[2] + t[3] + 2) >> 2;
     97     dst += 2;
     98     s += 4;
     99     t += 4;
    100   }
    101   if (dst_width & 1) {
    102     dst[0] = (s[0] + s[1] + t[0] + t[1] + 2) >> 2;
    103   }
    104 }
    105 
    106 void ScaleRowDown2Box_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
    107                            uint16* dst, int dst_width) {
    108   const uint16* s = src_ptr;
    109   const uint16* t = src_ptr + src_stride;
    110   int x;
    111   for (x = 0; x < dst_width - 1; x += 2) {
    112     dst[0] = (s[0] + s[1] + t[0] + t[1] + 2) >> 2;
    113     dst[1] = (s[2] + s[3] + t[2] + t[3] + 2) >> 2;
    114     dst += 2;
    115     s += 4;
    116     t += 4;
    117   }
    118   if (dst_width & 1) {
    119     dst[0] = (s[0] + s[1] + t[0] + t[1] + 2) >> 2;
    120   }
    121 }
    122 
    123 void ScaleRowDown4_C(const uint8* src_ptr, ptrdiff_t src_stride,
    124                      uint8* dst, int dst_width) {
    125   int x;
    126   for (x = 0; x < dst_width - 1; x += 2) {
    127     dst[0] = src_ptr[2];
    128     dst[1] = src_ptr[6];
    129     dst += 2;
    130     src_ptr += 8;
    131   }
    132   if (dst_width & 1) {
    133     dst[0] = src_ptr[2];
    134   }
    135 }
    136 
    137 void ScaleRowDown4_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
    138                         uint16* dst, int dst_width) {
    139   int x;
    140   for (x = 0; x < dst_width - 1; x += 2) {
    141     dst[0] = src_ptr[2];
    142     dst[1] = src_ptr[6];
    143     dst += 2;
    144     src_ptr += 8;
    145   }
    146   if (dst_width & 1) {
    147     dst[0] = src_ptr[2];
    148   }
    149 }
    150 
    151 void ScaleRowDown4Box_C(const uint8* src_ptr, ptrdiff_t src_stride,
    152                         uint8* dst, int dst_width) {
    153   intptr_t stride = src_stride;
    154   int x;
    155   for (x = 0; x < dst_width - 1; x += 2) {
    156     dst[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[3] +
    157              src_ptr[stride + 0] + src_ptr[stride + 1] +
    158              src_ptr[stride + 2] + src_ptr[stride + 3] +
    159              src_ptr[stride * 2 + 0] + src_ptr[stride * 2 + 1] +
    160              src_ptr[stride * 2 + 2] + src_ptr[stride * 2 + 3] +
    161              src_ptr[stride * 3 + 0] + src_ptr[stride * 3 + 1] +
    162              src_ptr[stride * 3 + 2] + src_ptr[stride * 3 + 3] +
    163              8) >> 4;
    164     dst[1] = (src_ptr[4] + src_ptr[5] + src_ptr[6] + src_ptr[7] +
    165              src_ptr[stride + 4] + src_ptr[stride + 5] +
    166              src_ptr[stride + 6] + src_ptr[stride + 7] +
    167              src_ptr[stride * 2 + 4] + src_ptr[stride * 2 + 5] +
    168              src_ptr[stride * 2 + 6] + src_ptr[stride * 2 + 7] +
    169              src_ptr[stride * 3 + 4] + src_ptr[stride * 3 + 5] +
    170              src_ptr[stride * 3 + 6] + src_ptr[stride * 3 + 7] +
    171              8) >> 4;
    172     dst += 2;
    173     src_ptr += 8;
    174   }
    175   if (dst_width & 1) {
    176     dst[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[3] +
    177              src_ptr[stride + 0] + src_ptr[stride + 1] +
    178              src_ptr[stride + 2] + src_ptr[stride + 3] +
    179              src_ptr[stride * 2 + 0] + src_ptr[stride * 2 + 1] +
    180              src_ptr[stride * 2 + 2] + src_ptr[stride * 2 + 3] +
    181              src_ptr[stride * 3 + 0] + src_ptr[stride * 3 + 1] +
    182              src_ptr[stride * 3 + 2] + src_ptr[stride * 3 + 3] +
    183              8) >> 4;
    184   }
    185 }
    186 
    187 void ScaleRowDown4Box_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
    188                            uint16* dst, int dst_width) {
    189   intptr_t stride = src_stride;
    190   int x;
    191   for (x = 0; x < dst_width - 1; x += 2) {
    192     dst[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[3] +
    193              src_ptr[stride + 0] + src_ptr[stride + 1] +
    194              src_ptr[stride + 2] + src_ptr[stride + 3] +
    195              src_ptr[stride * 2 + 0] + src_ptr[stride * 2 + 1] +
    196              src_ptr[stride * 2 + 2] + src_ptr[stride * 2 + 3] +
    197              src_ptr[stride * 3 + 0] + src_ptr[stride * 3 + 1] +
    198              src_ptr[stride * 3 + 2] + src_ptr[stride * 3 + 3] +
    199              8) >> 4;
    200     dst[1] = (src_ptr[4] + src_ptr[5] + src_ptr[6] + src_ptr[7] +
    201              src_ptr[stride + 4] + src_ptr[stride + 5] +
    202              src_ptr[stride + 6] + src_ptr[stride + 7] +
    203              src_ptr[stride * 2 + 4] + src_ptr[stride * 2 + 5] +
    204              src_ptr[stride * 2 + 6] + src_ptr[stride * 2 + 7] +
    205              src_ptr[stride * 3 + 4] + src_ptr[stride * 3 + 5] +
    206              src_ptr[stride * 3 + 6] + src_ptr[stride * 3 + 7] +
    207              8) >> 4;
    208     dst += 2;
    209     src_ptr += 8;
    210   }
    211   if (dst_width & 1) {
    212     dst[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[3] +
    213              src_ptr[stride + 0] + src_ptr[stride + 1] +
    214              src_ptr[stride + 2] + src_ptr[stride + 3] +
    215              src_ptr[stride * 2 + 0] + src_ptr[stride * 2 + 1] +
    216              src_ptr[stride * 2 + 2] + src_ptr[stride * 2 + 3] +
    217              src_ptr[stride * 3 + 0] + src_ptr[stride * 3 + 1] +
    218              src_ptr[stride * 3 + 2] + src_ptr[stride * 3 + 3] +
    219              8) >> 4;
    220   }
    221 }
    222 
    223 void ScaleRowDown34_C(const uint8* src_ptr, ptrdiff_t src_stride,
    224                       uint8* dst, int dst_width) {
    225   int x;
    226   assert((dst_width % 3 == 0) && (dst_width > 0));
    227   for (x = 0; x < dst_width; x += 3) {
    228     dst[0] = src_ptr[0];
    229     dst[1] = src_ptr[1];
    230     dst[2] = src_ptr[3];
    231     dst += 3;
    232     src_ptr += 4;
    233   }
    234 }
    235 
    236 void ScaleRowDown34_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
    237                          uint16* dst, int dst_width) {
    238   int x;
    239   assert((dst_width % 3 == 0) && (dst_width > 0));
    240   for (x = 0; x < dst_width; x += 3) {
    241     dst[0] = src_ptr[0];
    242     dst[1] = src_ptr[1];
    243     dst[2] = src_ptr[3];
    244     dst += 3;
    245     src_ptr += 4;
    246   }
    247 }
    248 
    249 // Filter rows 0 and 1 together, 3 : 1
    250 void ScaleRowDown34_0_Box_C(const uint8* src_ptr, ptrdiff_t src_stride,
    251                             uint8* d, int dst_width) {
    252   const uint8* s = src_ptr;
    253   const uint8* t = src_ptr + src_stride;
    254   int x;
    255   assert((dst_width % 3 == 0) && (dst_width > 0));
    256   for (x = 0; x < dst_width; x += 3) {
    257     uint8 a0 = (s[0] * 3 + s[1] * 1 + 2) >> 2;
    258     uint8 a1 = (s[1] * 1 + s[2] * 1 + 1) >> 1;
    259     uint8 a2 = (s[2] * 1 + s[3] * 3 + 2) >> 2;
    260     uint8 b0 = (t[0] * 3 + t[1] * 1 + 2) >> 2;
    261     uint8 b1 = (t[1] * 1 + t[2] * 1 + 1) >> 1;
    262     uint8 b2 = (t[2] * 1 + t[3] * 3 + 2) >> 2;
    263     d[0] = (a0 * 3 + b0 + 2) >> 2;
    264     d[1] = (a1 * 3 + b1 + 2) >> 2;
    265     d[2] = (a2 * 3 + b2 + 2) >> 2;
    266     d += 3;
    267     s += 4;
    268     t += 4;
    269   }
    270 }
    271 
    272 void ScaleRowDown34_0_Box_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
    273                                uint16* d, int dst_width) {
    274   const uint16* s = src_ptr;
    275   const uint16* t = src_ptr + src_stride;
    276   int x;
    277   assert((dst_width % 3 == 0) && (dst_width > 0));
    278   for (x = 0; x < dst_width; x += 3) {
    279     uint16 a0 = (s[0] * 3 + s[1] * 1 + 2) >> 2;
    280     uint16 a1 = (s[1] * 1 + s[2] * 1 + 1) >> 1;
    281     uint16 a2 = (s[2] * 1 + s[3] * 3 + 2) >> 2;
    282     uint16 b0 = (t[0] * 3 + t[1] * 1 + 2) >> 2;
    283     uint16 b1 = (t[1] * 1 + t[2] * 1 + 1) >> 1;
    284     uint16 b2 = (t[2] * 1 + t[3] * 3 + 2) >> 2;
    285     d[0] = (a0 * 3 + b0 + 2) >> 2;
    286     d[1] = (a1 * 3 + b1 + 2) >> 2;
    287     d[2] = (a2 * 3 + b2 + 2) >> 2;
    288     d += 3;
    289     s += 4;
    290     t += 4;
    291   }
    292 }
    293 
    294 // Filter rows 1 and 2 together, 1 : 1
    295 void ScaleRowDown34_1_Box_C(const uint8* src_ptr, ptrdiff_t src_stride,
    296                             uint8* d, int dst_width) {
    297   const uint8* s = src_ptr;
    298   const uint8* t = src_ptr + src_stride;
    299   int x;
    300   assert((dst_width % 3 == 0) && (dst_width > 0));
    301   for (x = 0; x < dst_width; x += 3) {
    302     uint8 a0 = (s[0] * 3 + s[1] * 1 + 2) >> 2;
    303     uint8 a1 = (s[1] * 1 + s[2] * 1 + 1) >> 1;
    304     uint8 a2 = (s[2] * 1 + s[3] * 3 + 2) >> 2;
    305     uint8 b0 = (t[0] * 3 + t[1] * 1 + 2) >> 2;
    306     uint8 b1 = (t[1] * 1 + t[2] * 1 + 1) >> 1;
    307     uint8 b2 = (t[2] * 1 + t[3] * 3 + 2) >> 2;
    308     d[0] = (a0 + b0 + 1) >> 1;
    309     d[1] = (a1 + b1 + 1) >> 1;
    310     d[2] = (a2 + b2 + 1) >> 1;
    311     d += 3;
    312     s += 4;
    313     t += 4;
    314   }
    315 }
    316 
    317 void ScaleRowDown34_1_Box_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
    318                                uint16* d, int dst_width) {
    319   const uint16* s = src_ptr;
    320   const uint16* t = src_ptr + src_stride;
    321   int x;
    322   assert((dst_width % 3 == 0) && (dst_width > 0));
    323   for (x = 0; x < dst_width; x += 3) {
    324     uint16 a0 = (s[0] * 3 + s[1] * 1 + 2) >> 2;
    325     uint16 a1 = (s[1] * 1 + s[2] * 1 + 1) >> 1;
    326     uint16 a2 = (s[2] * 1 + s[3] * 3 + 2) >> 2;
    327     uint16 b0 = (t[0] * 3 + t[1] * 1 + 2) >> 2;
    328     uint16 b1 = (t[1] * 1 + t[2] * 1 + 1) >> 1;
    329     uint16 b2 = (t[2] * 1 + t[3] * 3 + 2) >> 2;
    330     d[0] = (a0 + b0 + 1) >> 1;
    331     d[1] = (a1 + b1 + 1) >> 1;
    332     d[2] = (a2 + b2 + 1) >> 1;
    333     d += 3;
    334     s += 4;
    335     t += 4;
    336   }
    337 }
    338 
    339 // Scales a single row of pixels using point sampling.
    340 void ScaleCols_C(uint8* dst_ptr, const uint8* src_ptr,
    341                  int dst_width, int x, int dx) {
    342   int j;
    343   for (j = 0; j < dst_width - 1; j += 2) {
    344     dst_ptr[0] = src_ptr[x >> 16];
    345     x += dx;
    346     dst_ptr[1] = src_ptr[x >> 16];
    347     x += dx;
    348     dst_ptr += 2;
    349   }
    350   if (dst_width & 1) {
    351     dst_ptr[0] = src_ptr[x >> 16];
    352   }
    353 }
    354 
    355 void ScaleCols_16_C(uint16* dst_ptr, const uint16* src_ptr,
    356                     int dst_width, int x, int dx) {
    357   int j;
    358   for (j = 0; j < dst_width - 1; j += 2) {
    359     dst_ptr[0] = src_ptr[x >> 16];
    360     x += dx;
    361     dst_ptr[1] = src_ptr[x >> 16];
    362     x += dx;
    363     dst_ptr += 2;
    364   }
    365   if (dst_width & 1) {
    366     dst_ptr[0] = src_ptr[x >> 16];
    367   }
    368 }
    369 
    370 // Scales a single row of pixels up by 2x using point sampling.
    371 void ScaleColsUp2_C(uint8* dst_ptr, const uint8* src_ptr,
    372                     int dst_width, int x, int dx) {
    373   int j;
    374   for (j = 0; j < dst_width - 1; j += 2) {
    375     dst_ptr[1] = dst_ptr[0] = src_ptr[0];
    376     src_ptr += 1;
    377     dst_ptr += 2;
    378   }
    379   if (dst_width & 1) {
    380     dst_ptr[0] = src_ptr[0];
    381   }
    382 }
    383 
    384 void ScaleColsUp2_16_C(uint16* dst_ptr, const uint16* src_ptr,
    385                        int dst_width, int x, int dx) {
    386   int j;
    387   for (j = 0; j < dst_width - 1; j += 2) {
    388     dst_ptr[1] = dst_ptr[0] = src_ptr[0];
    389     src_ptr += 1;
    390     dst_ptr += 2;
    391   }
    392   if (dst_width & 1) {
    393     dst_ptr[0] = src_ptr[0];
    394   }
    395 }
    396 
    397 // (1-f)a + fb can be replaced with a + f(b-a)
    398 #define BLENDER(a, b, f) (uint8)((int)(a) + \
    399     ((int)(f) * ((int)(b) - (int)(a)) >> 16))
    400 
    401 void ScaleFilterCols_C(uint8* dst_ptr, const uint8* src_ptr,
    402                        int dst_width, int x, int dx) {
    403   int j;
    404   for (j = 0; j < dst_width - 1; j += 2) {
    405     int xi = x >> 16;
    406     int a = src_ptr[xi];
    407     int b = src_ptr[xi + 1];
    408     dst_ptr[0] = BLENDER(a, b, x & 0xffff);
    409     x += dx;
    410     xi = x >> 16;
    411     a = src_ptr[xi];
    412     b = src_ptr[xi + 1];
    413     dst_ptr[1] = BLENDER(a, b, x & 0xffff);
    414     x += dx;
    415     dst_ptr += 2;
    416   }
    417   if (dst_width & 1) {
    418     int xi = x >> 16;
    419     int a = src_ptr[xi];
    420     int b = src_ptr[xi + 1];
    421     dst_ptr[0] = BLENDER(a, b, x & 0xffff);
    422   }
    423 }
    424 
    425 void ScaleFilterCols64_C(uint8* dst_ptr, const uint8* src_ptr,
    426                          int dst_width, int x32, int dx) {
    427   int64 x = (int64)(x32);
    428   int j;
    429   for (j = 0; j < dst_width - 1; j += 2) {
    430     int64 xi = x >> 16;
    431     int a = src_ptr[xi];
    432     int b = src_ptr[xi + 1];
    433     dst_ptr[0] = BLENDER(a, b, x & 0xffff);
    434     x += dx;
    435     xi = x >> 16;
    436     a = src_ptr[xi];
    437     b = src_ptr[xi + 1];
    438     dst_ptr[1] = BLENDER(a, b, x & 0xffff);
    439     x += dx;
    440     dst_ptr += 2;
    441   }
    442   if (dst_width & 1) {
    443     int64 xi = x >> 16;
    444     int a = src_ptr[xi];
    445     int b = src_ptr[xi + 1];
    446     dst_ptr[0] = BLENDER(a, b, x & 0xffff);
    447   }
    448 }
    449 #undef BLENDER
    450 
    451 #define BLENDER(a, b, f) (uint16)((int)(a) + \
    452     ((int)(f) * ((int)(b) - (int)(a)) >> 16))
    453 
    454 void ScaleFilterCols_16_C(uint16* dst_ptr, const uint16* src_ptr,
    455                        int dst_width, int x, int dx) {
    456   int j;
    457   for (j = 0; j < dst_width - 1; j += 2) {
    458     int xi = x >> 16;
    459     int a = src_ptr[xi];
    460     int b = src_ptr[xi + 1];
    461     dst_ptr[0] = BLENDER(a, b, x & 0xffff);
    462     x += dx;
    463     xi = x >> 16;
    464     a = src_ptr[xi];
    465     b = src_ptr[xi + 1];
    466     dst_ptr[1] = BLENDER(a, b, x & 0xffff);
    467     x += dx;
    468     dst_ptr += 2;
    469   }
    470   if (dst_width & 1) {
    471     int xi = x >> 16;
    472     int a = src_ptr[xi];
    473     int b = src_ptr[xi + 1];
    474     dst_ptr[0] = BLENDER(a, b, x & 0xffff);
    475   }
    476 }
    477 
    478 void ScaleFilterCols64_16_C(uint16* dst_ptr, const uint16* src_ptr,
    479                          int dst_width, int x32, int dx) {
    480   int64 x = (int64)(x32);
    481   int j;
    482   for (j = 0; j < dst_width - 1; j += 2) {
    483     int64 xi = x >> 16;
    484     int a = src_ptr[xi];
    485     int b = src_ptr[xi + 1];
    486     dst_ptr[0] = BLENDER(a, b, x & 0xffff);
    487     x += dx;
    488     xi = x >> 16;
    489     a = src_ptr[xi];
    490     b = src_ptr[xi + 1];
    491     dst_ptr[1] = BLENDER(a, b, x & 0xffff);
    492     x += dx;
    493     dst_ptr += 2;
    494   }
    495   if (dst_width & 1) {
    496     int64 xi = x >> 16;
    497     int a = src_ptr[xi];
    498     int b = src_ptr[xi + 1];
    499     dst_ptr[0] = BLENDER(a, b, x & 0xffff);
    500   }
    501 }
    502 #undef BLENDER
    503 
    504 void ScaleRowDown38_C(const uint8* src_ptr, ptrdiff_t src_stride,
    505                       uint8* dst, int dst_width) {
    506   int x;
    507   assert(dst_width % 3 == 0);
    508   for (x = 0; x < dst_width; x += 3) {
    509     dst[0] = src_ptr[0];
    510     dst[1] = src_ptr[3];
    511     dst[2] = src_ptr[6];
    512     dst += 3;
    513     src_ptr += 8;
    514   }
    515 }
    516 
    517 void ScaleRowDown38_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
    518                          uint16* dst, int dst_width) {
    519   int x;
    520   assert(dst_width % 3 == 0);
    521   for (x = 0; x < dst_width; x += 3) {
    522     dst[0] = src_ptr[0];
    523     dst[1] = src_ptr[3];
    524     dst[2] = src_ptr[6];
    525     dst += 3;
    526     src_ptr += 8;
    527   }
    528 }
    529 
    530 // 8x3 -> 3x1
    531 void ScaleRowDown38_3_Box_C(const uint8* src_ptr,
    532                             ptrdiff_t src_stride,
    533                             uint8* dst_ptr, int dst_width) {
    534   intptr_t stride = src_stride;
    535   int i;
    536   assert((dst_width % 3 == 0) && (dst_width > 0));
    537   for (i = 0; i < dst_width; i += 3) {
    538     dst_ptr[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] +
    539         src_ptr[stride + 0] + src_ptr[stride + 1] +
    540         src_ptr[stride + 2] + src_ptr[stride * 2 + 0] +
    541         src_ptr[stride * 2 + 1] + src_ptr[stride * 2 + 2]) *
    542         (65536 / 9) >> 16;
    543     dst_ptr[1] = (src_ptr[3] + src_ptr[4] + src_ptr[5] +
    544         src_ptr[stride + 3] + src_ptr[stride + 4] +
    545         src_ptr[stride + 5] + src_ptr[stride * 2 + 3] +
    546         src_ptr[stride * 2 + 4] + src_ptr[stride * 2 + 5]) *
    547         (65536 / 9) >> 16;
    548     dst_ptr[2] = (src_ptr[6] + src_ptr[7] +
    549         src_ptr[stride + 6] + src_ptr[stride + 7] +
    550         src_ptr[stride * 2 + 6] + src_ptr[stride * 2 + 7]) *
    551         (65536 / 6) >> 16;
    552     src_ptr += 8;
    553     dst_ptr += 3;
    554   }
    555 }
    556 
    557 void ScaleRowDown38_3_Box_16_C(const uint16* src_ptr,
    558                                ptrdiff_t src_stride,
    559                                uint16* dst_ptr, int dst_width) {
    560   intptr_t stride = src_stride;
    561   int i;
    562   assert((dst_width % 3 == 0) && (dst_width > 0));
    563   for (i = 0; i < dst_width; i += 3) {
    564     dst_ptr[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] +
    565         src_ptr[stride + 0] + src_ptr[stride + 1] +
    566         src_ptr[stride + 2] + src_ptr[stride * 2 + 0] +
    567         src_ptr[stride * 2 + 1] + src_ptr[stride * 2 + 2]) *
    568         (65536 / 9) >> 16;
    569     dst_ptr[1] = (src_ptr[3] + src_ptr[4] + src_ptr[5] +
    570         src_ptr[stride + 3] + src_ptr[stride + 4] +
    571         src_ptr[stride + 5] + src_ptr[stride * 2 + 3] +
    572         src_ptr[stride * 2 + 4] + src_ptr[stride * 2 + 5]) *
    573         (65536 / 9) >> 16;
    574     dst_ptr[2] = (src_ptr[6] + src_ptr[7] +
    575         src_ptr[stride + 6] + src_ptr[stride + 7] +
    576         src_ptr[stride * 2 + 6] + src_ptr[stride * 2 + 7]) *
    577         (65536 / 6) >> 16;
    578     src_ptr += 8;
    579     dst_ptr += 3;
    580   }
    581 }
    582 
    583 // 8x2 -> 3x1
    584 void ScaleRowDown38_2_Box_C(const uint8* src_ptr, ptrdiff_t src_stride,
    585                             uint8* dst_ptr, int dst_width) {
    586   intptr_t stride = src_stride;
    587   int i;
    588   assert((dst_width % 3 == 0) && (dst_width > 0));
    589   for (i = 0; i < dst_width; i += 3) {
    590     dst_ptr[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] +
    591         src_ptr[stride + 0] + src_ptr[stride + 1] +
    592         src_ptr[stride + 2]) * (65536 / 6) >> 16;
    593     dst_ptr[1] = (src_ptr[3] + src_ptr[4] + src_ptr[5] +
    594         src_ptr[stride + 3] + src_ptr[stride + 4] +
    595         src_ptr[stride + 5]) * (65536 / 6) >> 16;
    596     dst_ptr[2] = (src_ptr[6] + src_ptr[7] +
    597         src_ptr[stride + 6] + src_ptr[stride + 7]) *
    598         (65536 / 4) >> 16;
    599     src_ptr += 8;
    600     dst_ptr += 3;
    601   }
    602 }
    603 
    604 void ScaleRowDown38_2_Box_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
    605                                uint16* dst_ptr, int dst_width) {
    606   intptr_t stride = src_stride;
    607   int i;
    608   assert((dst_width % 3 == 0) && (dst_width > 0));
    609   for (i = 0; i < dst_width; i += 3) {
    610     dst_ptr[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] +
    611         src_ptr[stride + 0] + src_ptr[stride + 1] +
    612         src_ptr[stride + 2]) * (65536 / 6) >> 16;
    613     dst_ptr[1] = (src_ptr[3] + src_ptr[4] + src_ptr[5] +
    614         src_ptr[stride + 3] + src_ptr[stride + 4] +
    615         src_ptr[stride + 5]) * (65536 / 6) >> 16;
    616     dst_ptr[2] = (src_ptr[6] + src_ptr[7] +
    617         src_ptr[stride + 6] + src_ptr[stride + 7]) *
    618         (65536 / 4) >> 16;
    619     src_ptr += 8;
    620     dst_ptr += 3;
    621   }
    622 }
    623 
    624 void ScaleAddRows_C(const uint8* src_ptr, ptrdiff_t src_stride,
    625                     uint16* dst_ptr, int src_width, int src_height) {
    626   int x;
    627   assert(src_width > 0);
    628   assert(src_height > 0);
    629   for (x = 0; x < src_width; ++x) {
    630     const uint8* s = src_ptr + x;
    631     unsigned int sum = 0u;
    632     int y;
    633     for (y = 0; y < src_height; ++y) {
    634       sum += s[0];
    635       s += src_stride;
    636     }
    637     // TODO(fbarchard): Consider limitting height to 256 to avoid overflow.
    638     dst_ptr[x] = sum < 65535u ? sum : 65535u;
    639   }
    640 }
    641 
    642 void ScaleAddRows_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
    643                        uint32* dst_ptr, int src_width, int src_height) {
    644   int x;
    645   assert(src_width > 0);
    646   assert(src_height > 0);
    647   for (x = 0; x < src_width; ++x) {
    648     const uint16* s = src_ptr + x;
    649     unsigned int sum = 0u;
    650     int y;
    651     for (y = 0; y < src_height; ++y) {
    652       sum += s[0];
    653       s += src_stride;
    654     }
    655     // No risk of overflow here now
    656     dst_ptr[x] = sum;
    657   }
    658 }
    659 
    660 void ScaleARGBRowDown2_C(const uint8* src_argb,
    661                          ptrdiff_t src_stride,
    662                          uint8* dst_argb, int dst_width) {
    663   const uint32* src = (const uint32*)(src_argb);
    664   uint32* dst = (uint32*)(dst_argb);
    665 
    666   int x;
    667   for (x = 0; x < dst_width - 1; x += 2) {
    668     dst[0] = src[1];
    669     dst[1] = src[3];
    670     src += 4;
    671     dst += 2;
    672   }
    673   if (dst_width & 1) {
    674     dst[0] = src[1];
    675   }
    676 }
    677 
    678 void ScaleARGBRowDown2Linear_C(const uint8* src_argb,
    679                                ptrdiff_t src_stride,
    680                                uint8* dst_argb, int dst_width) {
    681   int x;
    682   for (x = 0; x < dst_width; ++x) {
    683     dst_argb[0] = (src_argb[0] + src_argb[4] + 1) >> 1;
    684     dst_argb[1] = (src_argb[1] + src_argb[5] + 1) >> 1;
    685     dst_argb[2] = (src_argb[2] + src_argb[6] + 1) >> 1;
    686     dst_argb[3] = (src_argb[3] + src_argb[7] + 1) >> 1;
    687     src_argb += 8;
    688     dst_argb += 4;
    689   }
    690 }
    691 
    692 void ScaleARGBRowDown2Box_C(const uint8* src_argb, ptrdiff_t src_stride,
    693                             uint8* dst_argb, int dst_width) {
    694   int x;
    695   for (x = 0; x < dst_width; ++x) {
    696     dst_argb[0] = (src_argb[0] + src_argb[4] +
    697                   src_argb[src_stride] + src_argb[src_stride + 4] + 2) >> 2;
    698     dst_argb[1] = (src_argb[1] + src_argb[5] +
    699                   src_argb[src_stride + 1] + src_argb[src_stride + 5] + 2) >> 2;
    700     dst_argb[2] = (src_argb[2] + src_argb[6] +
    701                   src_argb[src_stride + 2] + src_argb[src_stride + 6] + 2) >> 2;
    702     dst_argb[3] = (src_argb[3] + src_argb[7] +
    703                   src_argb[src_stride + 3] + src_argb[src_stride + 7] + 2) >> 2;
    704     src_argb += 8;
    705     dst_argb += 4;
    706   }
    707 }
    708 
    709 void ScaleARGBRowDownEven_C(const uint8* src_argb, ptrdiff_t src_stride,
    710                             int src_stepx,
    711                             uint8* dst_argb, int dst_width) {
    712   const uint32* src = (const uint32*)(src_argb);
    713   uint32* dst = (uint32*)(dst_argb);
    714 
    715   int x;
    716   for (x = 0; x < dst_width - 1; x += 2) {
    717     dst[0] = src[0];
    718     dst[1] = src[src_stepx];
    719     src += src_stepx * 2;
    720     dst += 2;
    721   }
    722   if (dst_width & 1) {
    723     dst[0] = src[0];
    724   }
    725 }
    726 
    727 void ScaleARGBRowDownEvenBox_C(const uint8* src_argb,
    728                                ptrdiff_t src_stride,
    729                                int src_stepx,
    730                                uint8* dst_argb, int dst_width) {
    731   int x;
    732   for (x = 0; x < dst_width; ++x) {
    733     dst_argb[0] = (src_argb[0] + src_argb[4] +
    734                   src_argb[src_stride] + src_argb[src_stride + 4] + 2) >> 2;
    735     dst_argb[1] = (src_argb[1] + src_argb[5] +
    736                   src_argb[src_stride + 1] + src_argb[src_stride + 5] + 2) >> 2;
    737     dst_argb[2] = (src_argb[2] + src_argb[6] +
    738                   src_argb[src_stride + 2] + src_argb[src_stride + 6] + 2) >> 2;
    739     dst_argb[3] = (src_argb[3] + src_argb[7] +
    740                   src_argb[src_stride + 3] + src_argb[src_stride + 7] + 2) >> 2;
    741     src_argb += src_stepx * 4;
    742     dst_argb += 4;
    743   }
    744 }
    745 
    746 // Scales a single row of pixels using point sampling.
    747 void ScaleARGBCols_C(uint8* dst_argb, const uint8* src_argb,
    748                      int dst_width, int x, int dx) {
    749   const uint32* src = (const uint32*)(src_argb);
    750   uint32* dst = (uint32*)(dst_argb);
    751   int j;
    752   for (j = 0; j < dst_width - 1; j += 2) {
    753     dst[0] = src[x >> 16];
    754     x += dx;
    755     dst[1] = src[x >> 16];
    756     x += dx;
    757     dst += 2;
    758   }
    759   if (dst_width & 1) {
    760     dst[0] = src[x >> 16];
    761   }
    762 }
    763 
    764 void ScaleARGBCols64_C(uint8* dst_argb, const uint8* src_argb,
    765                        int dst_width, int x32, int dx) {
    766   int64 x = (int64)(x32);
    767   const uint32* src = (const uint32*)(src_argb);
    768   uint32* dst = (uint32*)(dst_argb);
    769   int j;
    770   for (j = 0; j < dst_width - 1; j += 2) {
    771     dst[0] = src[x >> 16];
    772     x += dx;
    773     dst[1] = src[x >> 16];
    774     x += dx;
    775     dst += 2;
    776   }
    777   if (dst_width & 1) {
    778     dst[0] = src[x >> 16];
    779   }
    780 }
    781 
    782 // Scales a single row of pixels up by 2x using point sampling.
    783 void ScaleARGBColsUp2_C(uint8* dst_argb, const uint8* src_argb,
    784                         int dst_width, int x, int dx) {
    785   const uint32* src = (const uint32*)(src_argb);
    786   uint32* dst = (uint32*)(dst_argb);
    787   int j;
    788   for (j = 0; j < dst_width - 1; j += 2) {
    789     dst[1] = dst[0] = src[0];
    790     src += 1;
    791     dst += 2;
    792   }
    793   if (dst_width & 1) {
    794     dst[0] = src[0];
    795   }
    796 }
    797 
    798 // Mimics SSSE3 blender
    799 #define BLENDER1(a, b, f) ((a) * (0x7f ^ f) + (b) * f) >> 7
    800 #define BLENDERC(a, b, f, s) (uint32)( \
    801     BLENDER1(((a) >> s) & 255, ((b) >> s) & 255, f) << s)
    802 #define BLENDER(a, b, f) \
    803     BLENDERC(a, b, f, 24) | BLENDERC(a, b, f, 16) | \
    804     BLENDERC(a, b, f, 8) | BLENDERC(a, b, f, 0)
    805 
    806 void ScaleARGBFilterCols_C(uint8* dst_argb, const uint8* src_argb,
    807                            int dst_width, int x, int dx) {
    808   const uint32* src = (const uint32*)(src_argb);
    809   uint32* dst = (uint32*)(dst_argb);
    810   int j;
    811   for (j = 0; j < dst_width - 1; j += 2) {
    812     int xi = x >> 16;
    813     int xf = (x >> 9) & 0x7f;
    814     uint32 a = src[xi];
    815     uint32 b = src[xi + 1];
    816     dst[0] = BLENDER(a, b, xf);
    817     x += dx;
    818     xi = x >> 16;
    819     xf = (x >> 9) & 0x7f;
    820     a = src[xi];
    821     b = src[xi + 1];
    822     dst[1] = BLENDER(a, b, xf);
    823     x += dx;
    824     dst += 2;
    825   }
    826   if (dst_width & 1) {
    827     int xi = x >> 16;
    828     int xf = (x >> 9) & 0x7f;
    829     uint32 a = src[xi];
    830     uint32 b = src[xi + 1];
    831     dst[0] = BLENDER(a, b, xf);
    832   }
    833 }
    834 
    835 void ScaleARGBFilterCols64_C(uint8* dst_argb, const uint8* src_argb,
    836                              int dst_width, int x32, int dx) {
    837   int64 x = (int64)(x32);
    838   const uint32* src = (const uint32*)(src_argb);
    839   uint32* dst = (uint32*)(dst_argb);
    840   int j;
    841   for (j = 0; j < dst_width - 1; j += 2) {
    842     int64 xi = x >> 16;
    843     int xf = (x >> 9) & 0x7f;
    844     uint32 a = src[xi];
    845     uint32 b = src[xi + 1];
    846     dst[0] = BLENDER(a, b, xf);
    847     x += dx;
    848     xi = x >> 16;
    849     xf = (x >> 9) & 0x7f;
    850     a = src[xi];
    851     b = src[xi + 1];
    852     dst[1] = BLENDER(a, b, xf);
    853     x += dx;
    854     dst += 2;
    855   }
    856   if (dst_width & 1) {
    857     int64 xi = x >> 16;
    858     int xf = (x >> 9) & 0x7f;
    859     uint32 a = src[xi];
    860     uint32 b = src[xi + 1];
    861     dst[0] = BLENDER(a, b, xf);
    862   }
    863 }
    864 #undef BLENDER1
    865 #undef BLENDERC
    866 #undef BLENDER
    867 
    868 // Scale plane vertically with bilinear interpolation.
    869 void ScalePlaneVertical(int src_height,
    870                         int dst_width, int dst_height,
    871                         int src_stride, int dst_stride,
    872                         const uint8* src_argb, uint8* dst_argb,
    873                         int x, int y, int dy,
    874                         int bpp, enum FilterMode filtering) {
    875   // TODO(fbarchard): Allow higher bpp.
    876   int dst_width_bytes = dst_width * bpp;
    877   void (*InterpolateRow)(uint8* dst_argb, const uint8* src_argb,
    878       ptrdiff_t src_stride, int dst_width, int source_y_fraction) =
    879       InterpolateRow_C;
    880   const int max_y = (src_height > 1) ? ((src_height - 1) << 16) - 1 : 0;
    881   int j;
    882   assert(bpp >= 1 && bpp <= 4);
    883   assert(src_height != 0);
    884   assert(dst_width > 0);
    885   assert(dst_height > 0);
    886   src_argb += (x >> 16) * bpp;
    887 #if defined(HAS_INTERPOLATEROW_SSE2)
    888   if (TestCpuFlag(kCpuHasSSE2) && dst_width_bytes >= 16) {
    889     InterpolateRow = InterpolateRow_Any_SSE2;
    890     if (IS_ALIGNED(dst_width_bytes, 16)) {
    891       InterpolateRow = InterpolateRow_Unaligned_SSE2;
    892       if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride, 16) &&
    893           IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) {
    894         InterpolateRow = InterpolateRow_SSE2;
    895       }
    896     }
    897   }
    898 #endif
    899 #if defined(HAS_INTERPOLATEROW_SSSE3)
    900   if (TestCpuFlag(kCpuHasSSSE3) && dst_width_bytes >= 16) {
    901     InterpolateRow = InterpolateRow_Any_SSSE3;
    902     if (IS_ALIGNED(dst_width_bytes, 16)) {
    903       InterpolateRow = InterpolateRow_Unaligned_SSSE3;
    904       if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride, 16) &&
    905           IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) {
    906         InterpolateRow = InterpolateRow_SSSE3;
    907       }
    908     }
    909   }
    910 #endif
    911 #if defined(HAS_INTERPOLATEROW_AVX2)
    912   if (TestCpuFlag(kCpuHasAVX2) && dst_width_bytes >= 32) {
    913     InterpolateRow = InterpolateRow_Any_AVX2;
    914     if (IS_ALIGNED(dst_width_bytes, 32)) {
    915       InterpolateRow = InterpolateRow_AVX2;
    916     }
    917   }
    918 #endif
    919 #if defined(HAS_INTERPOLATEROW_NEON)
    920   if (TestCpuFlag(kCpuHasNEON) && dst_width_bytes >= 16) {
    921     InterpolateRow = InterpolateRow_Any_NEON;
    922     if (IS_ALIGNED(dst_width_bytes, 16)) {
    923       InterpolateRow = InterpolateRow_NEON;
    924     }
    925   }
    926 #endif
    927 #if defined(HAS_INTERPOLATEROWS_MIPS_DSPR2)
    928   if (TestCpuFlag(kCpuHasMIPS_DSPR2) && dst_width_bytes >= 4 &&
    929       IS_ALIGNED(src_argb, 4) && IS_ALIGNED(src_stride, 4) &&
    930       IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride, 4)) {
    931     InterpolateRow = InterpolateRow_Any_MIPS_DSPR2;
    932     if (IS_ALIGNED(dst_width_bytes, 4)) {
    933       InterpolateRow = InterpolateRow_MIPS_DSPR2;
    934     }
    935   }
    936 #endif
    937   for (j = 0; j < dst_height; ++j) {
    938     int yi;
    939     int yf;
    940     if (y > max_y) {
    941       y = max_y;
    942     }
    943     yi = y >> 16;
    944     yf = filtering ? ((y >> 8) & 255) : 0;
    945     InterpolateRow(dst_argb, src_argb + yi * src_stride,
    946                    src_stride, dst_width_bytes, yf);
    947     dst_argb += dst_stride;
    948     y += dy;
    949   }
    950 }
    951 void ScalePlaneVertical_16(int src_height,
    952                            int dst_width, int dst_height,
    953                            int src_stride, int dst_stride,
    954                            const uint16* src_argb, uint16* dst_argb,
    955                            int x, int y, int dy,
    956                            int wpp, enum FilterMode filtering) {
    957   // TODO(fbarchard): Allow higher wpp.
    958   int dst_width_words = dst_width * wpp;
    959   void (*InterpolateRow)(uint16* dst_argb, const uint16* src_argb,
    960       ptrdiff_t src_stride, int dst_width, int source_y_fraction) =
    961       InterpolateRow_16_C;
    962   const int max_y = (src_height > 1) ? ((src_height - 1) << 16) - 1 : 0;
    963   int j;
    964   assert(wpp >= 1 && wpp <= 2);
    965   assert(src_height != 0);
    966   assert(dst_width > 0);
    967   assert(dst_height > 0);
    968   src_argb += (x >> 16) * wpp;
    969 #if defined(HAS_INTERPOLATEROW_16_SSE2)
    970   if (TestCpuFlag(kCpuHasSSE2) && dst_width_bytes >= 16) {
    971     InterpolateRow = InterpolateRow_Any_16_SSE2;
    972     if (IS_ALIGNED(dst_width_bytes, 16)) {
    973       InterpolateRow = InterpolateRow_Unaligned_16_SSE2;
    974       if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride, 16) &&
    975           IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) {
    976         InterpolateRow = InterpolateRow_16_SSE2;
    977       }
    978     }
    979   }
    980 #endif
    981 #if defined(HAS_INTERPOLATEROW_16_SSSE3)
    982   if (TestCpuFlag(kCpuHasSSSE3) && dst_width_bytes >= 16) {
    983     InterpolateRow = InterpolateRow_Any_16_SSSE3;
    984     if (IS_ALIGNED(dst_width_bytes, 16)) {
    985       InterpolateRow = InterpolateRow_Unaligned_16_SSSE3;
    986       if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride, 16) &&
    987           IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) {
    988         InterpolateRow = InterpolateRow_16_SSSE3;
    989       }
    990     }
    991   }
    992 #endif
    993 #if defined(HAS_INTERPOLATEROW_16_AVX2)
    994   if (TestCpuFlag(kCpuHasAVX2) && dst_width_bytes >= 32) {
    995     InterpolateRow = InterpolateRow_Any_16_AVX2;
    996     if (IS_ALIGNED(dst_width_bytes, 32)) {
    997       InterpolateRow = InterpolateRow_16_AVX2;
    998     }
    999   }
   1000 #endif
   1001 #if defined(HAS_INTERPOLATEROW_16_NEON)
   1002   if (TestCpuFlag(kCpuHasNEON) && dst_width_bytes >= 16) {
   1003     InterpolateRow = InterpolateRow_Any_16_NEON;
   1004     if (IS_ALIGNED(dst_width_bytes, 16)) {
   1005       InterpolateRow = InterpolateRow_16_NEON;
   1006     }
   1007   }
   1008 #endif
   1009 #if defined(HAS_INTERPOLATEROWS_16_MIPS_DSPR2)
   1010   if (TestCpuFlag(kCpuHasMIPS_DSPR2) && dst_width_bytes >= 4 &&
   1011       IS_ALIGNED(src_argb, 4) && IS_ALIGNED(src_stride, 4) &&
   1012       IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride, 4)) {
   1013     InterpolateRow = InterpolateRow_Any_16_MIPS_DSPR2;
   1014     if (IS_ALIGNED(dst_width_bytes, 4)) {
   1015       InterpolateRow = InterpolateRow_16_MIPS_DSPR2;
   1016     }
   1017   }
   1018 #endif
   1019   for (j = 0; j < dst_height; ++j) {
   1020     int yi;
   1021     int yf;
   1022     if (y > max_y) {
   1023       y = max_y;
   1024     }
   1025     yi = y >> 16;
   1026     yf = filtering ? ((y >> 8) & 255) : 0;
   1027     InterpolateRow(dst_argb, src_argb + yi * src_stride,
   1028                    src_stride, dst_width_words, yf);
   1029     dst_argb += dst_stride;
   1030     y += dy;
   1031   }
   1032 }
   1033 
   1034 // Simplify the filtering based on scale factors.
   1035 enum FilterMode ScaleFilterReduce(int src_width, int src_height,
   1036                                   int dst_width, int dst_height,
   1037                                   enum FilterMode filtering) {
   1038   if (src_width < 0) {
   1039     src_width = -src_width;
   1040   }
   1041   if (src_height < 0) {
   1042     src_height = -src_height;
   1043   }
   1044   if (filtering == kFilterBox) {
   1045     // If scaling both axis to 0.5 or larger, switch from Box to Bilinear.
   1046     if (dst_width * 2 >= src_width && dst_height * 2 >= src_height) {
   1047       filtering = kFilterBilinear;
   1048     }
   1049     // If scaling to larger, switch from Box to Bilinear.
   1050     if (dst_width >= src_width || dst_height >= src_height) {
   1051       filtering = kFilterBilinear;
   1052     }
   1053   }
   1054   if (filtering == kFilterBilinear) {
   1055     if (src_height == 1) {
   1056       filtering = kFilterLinear;
   1057     }
   1058     // TODO(fbarchard): Detect any odd scale factor and reduce to Linear.
   1059     if (dst_height == src_height || dst_height * 3 == src_height) {
   1060       filtering = kFilterLinear;
   1061     }
   1062     // TODO(fbarchard): Remove 1 pixel wide filter restriction, which is to
   1063     // avoid reading 2 pixels horizontally that causes memory exception.
   1064     if (src_width == 1) {
   1065       filtering = kFilterNone;
   1066     }
   1067   }
   1068   if (filtering == kFilterLinear) {
   1069     if (src_width == 1) {
   1070       filtering = kFilterNone;
   1071     }
   1072     // TODO(fbarchard): Detect any odd scale factor and reduce to None.
   1073     if (dst_width == src_width || dst_width * 3 == src_width) {
   1074       filtering = kFilterNone;
   1075     }
   1076   }
   1077   return filtering;
   1078 }
   1079 
   1080 // Divide num by div and return as 16.16 fixed point result.
   1081 int FixedDiv_C(int num, int div) {
   1082   return (int)(((int64)(num) << 16) / div);
   1083 }
   1084 
   1085 // Divide num by div and return as 16.16 fixed point result.
   1086 int FixedDiv1_C(int num, int div) {
   1087   return (int)((((int64)(num) << 16) - 0x00010001) /
   1088                           (div - 1));
   1089 }
   1090 
   1091 #define CENTERSTART(dx, s) (dx < 0) ? -((-dx >> 1) + s) : ((dx >> 1) + s)
   1092 
   1093 // Compute slope values for stepping.
   1094 void ScaleSlope(int src_width, int src_height,
   1095                 int dst_width, int dst_height,
   1096                 enum FilterMode filtering,
   1097                 int* x, int* y, int* dx, int* dy) {
   1098   assert(x != NULL);
   1099   assert(y != NULL);
   1100   assert(dx != NULL);
   1101   assert(dy != NULL);
   1102   assert(src_width != 0);
   1103   assert(src_height != 0);
   1104   assert(dst_width > 0);
   1105   assert(dst_height > 0);
   1106   // Check for 1 pixel and avoid FixedDiv overflow.
   1107   if (dst_width == 1 && src_width >= 32768) {
   1108     dst_width = src_width;
   1109   }
   1110   if (dst_height == 1 && src_height >= 32768) {
   1111     dst_height = src_height;
   1112   }
   1113   if (filtering == kFilterBox) {
   1114     // Scale step for point sampling duplicates all pixels equally.
   1115     *dx = FixedDiv(Abs(src_width), dst_width);
   1116     *dy = FixedDiv(src_height, dst_height);
   1117     *x = 0;
   1118     *y = 0;
   1119   } else if (filtering == kFilterBilinear) {
   1120     // Scale step for bilinear sampling renders last pixel once for upsample.
   1121     if (dst_width <= Abs(src_width)) {
   1122       *dx = FixedDiv(Abs(src_width), dst_width);
   1123       *x = CENTERSTART(*dx, -32768);  // Subtract 0.5 (32768) to center filter.
   1124     } else if (dst_width > 1) {
   1125       *dx = FixedDiv1(Abs(src_width), dst_width);
   1126       *x = 0;
   1127     }
   1128     if (dst_height <= src_height) {
   1129       *dy = FixedDiv(src_height,  dst_height);
   1130       *y = CENTERSTART(*dy, -32768);  // Subtract 0.5 (32768) to center filter.
   1131     } else if (dst_height > 1) {
   1132       *dy = FixedDiv1(src_height, dst_height);
   1133       *y = 0;
   1134     }
   1135   } else if (filtering == kFilterLinear) {
   1136     // Scale step for bilinear sampling renders last pixel once for upsample.
   1137     if (dst_width <= Abs(src_width)) {
   1138       *dx = FixedDiv(Abs(src_width), dst_width);
   1139       *x = CENTERSTART(*dx, -32768);  // Subtract 0.5 (32768) to center filter.
   1140     } else if (dst_width > 1) {
   1141       *dx = FixedDiv1(Abs(src_width), dst_width);
   1142       *x = 0;
   1143     }
   1144     *dy = FixedDiv(src_height, dst_height);
   1145     *y = *dy >> 1;
   1146   } else {
   1147     // Scale step for point sampling duplicates all pixels equally.
   1148     *dx = FixedDiv(Abs(src_width), dst_width);
   1149     *dy = FixedDiv(src_height, dst_height);
   1150     *x = CENTERSTART(*dx, 0);
   1151     *y = CENTERSTART(*dy, 0);
   1152   }
   1153   // Negative src_width means horizontally mirror.
   1154   if (src_width < 0) {
   1155     *x += (dst_width - 1) * *dx;
   1156     *dx = -*dx;
   1157     // src_width = -src_width;   // Caller must do this.
   1158   }
   1159 }
   1160 #undef CENTERSTART
   1161 
   1162 #ifdef __cplusplus
   1163 }  // extern "C"
   1164 }  // namespace libyuv
   1165 #endif
   1166