Home | History | Annotate | Download | only in source
      1 /*
      2  *  Copyright 2013 The LibYuv Project Authors. All rights reserved.
      3  *
      4  *  Use of this source code is governed by a BSD-style license
      5  *  that can be found in the LICENSE file in the root of the source
      6  *  tree. An additional intellectual property rights grant can be found
      7  *  in the file PATENTS. All contributing project authors may
      8  *  be found in the AUTHORS file in the root of the source tree.
      9  */
     10 
     11 #include "libyuv/scale.h"
     12 
     13 #include <assert.h>
     14 #include <string.h>
     15 
     16 #include "libyuv/cpu_id.h"
     17 #include "libyuv/planar_functions.h"  // For CopyARGB
     18 #include "libyuv/row.h"
     19 #include "libyuv/scale_row.h"
     20 
     21 #ifdef __cplusplus
     22 namespace libyuv {
     23 extern "C" {
     24 #endif
     25 
     26 static __inline int Abs(int v) {
     27   return v >= 0 ? v : -v;
     28 }
     29 
     30 // CPU agnostic row functions
     31 void ScaleRowDown2_C(const uint8* src_ptr,
     32                      ptrdiff_t src_stride,
     33                      uint8* dst,
     34                      int dst_width) {
     35   int x;
     36   (void)src_stride;
     37   for (x = 0; x < dst_width - 1; x += 2) {
     38     dst[0] = src_ptr[1];
     39     dst[1] = src_ptr[3];
     40     dst += 2;
     41     src_ptr += 4;
     42   }
     43   if (dst_width & 1) {
     44     dst[0] = src_ptr[1];
     45   }
     46 }
     47 
     48 void ScaleRowDown2_16_C(const uint16* src_ptr,
     49                         ptrdiff_t src_stride,
     50                         uint16* dst,
     51                         int dst_width) {
     52   int x;
     53   (void)src_stride;
     54   for (x = 0; x < dst_width - 1; x += 2) {
     55     dst[0] = src_ptr[1];
     56     dst[1] = src_ptr[3];
     57     dst += 2;
     58     src_ptr += 4;
     59   }
     60   if (dst_width & 1) {
     61     dst[0] = src_ptr[1];
     62   }
     63 }
     64 
     65 void ScaleRowDown2Linear_C(const uint8* src_ptr,
     66                            ptrdiff_t src_stride,
     67                            uint8* dst,
     68                            int dst_width) {
     69   const uint8* s = src_ptr;
     70   int x;
     71   (void)src_stride;
     72   for (x = 0; x < dst_width - 1; x += 2) {
     73     dst[0] = (s[0] + s[1] + 1) >> 1;
     74     dst[1] = (s[2] + s[3] + 1) >> 1;
     75     dst += 2;
     76     s += 4;
     77   }
     78   if (dst_width & 1) {
     79     dst[0] = (s[0] + s[1] + 1) >> 1;
     80   }
     81 }
     82 
     83 void ScaleRowDown2Linear_16_C(const uint16* src_ptr,
     84                               ptrdiff_t src_stride,
     85                               uint16* dst,
     86                               int dst_width) {
     87   const uint16* s = src_ptr;
     88   int x;
     89   (void)src_stride;
     90   for (x = 0; x < dst_width - 1; x += 2) {
     91     dst[0] = (s[0] + s[1] + 1) >> 1;
     92     dst[1] = (s[2] + s[3] + 1) >> 1;
     93     dst += 2;
     94     s += 4;
     95   }
     96   if (dst_width & 1) {
     97     dst[0] = (s[0] + s[1] + 1) >> 1;
     98   }
     99 }
    100 
    101 void ScaleRowDown2Box_C(const uint8* src_ptr,
    102                         ptrdiff_t src_stride,
    103                         uint8* dst,
    104                         int dst_width) {
    105   const uint8* s = src_ptr;
    106   const uint8* t = src_ptr + src_stride;
    107   int x;
    108   for (x = 0; x < dst_width - 1; x += 2) {
    109     dst[0] = (s[0] + s[1] + t[0] + t[1] + 2) >> 2;
    110     dst[1] = (s[2] + s[3] + t[2] + t[3] + 2) >> 2;
    111     dst += 2;
    112     s += 4;
    113     t += 4;
    114   }
    115   if (dst_width & 1) {
    116     dst[0] = (s[0] + s[1] + t[0] + t[1] + 2) >> 2;
    117   }
    118 }
    119 
    120 void ScaleRowDown2Box_Odd_C(const uint8* src_ptr,
    121                             ptrdiff_t src_stride,
    122                             uint8* dst,
    123                             int dst_width) {
    124   const uint8* s = src_ptr;
    125   const uint8* t = src_ptr + src_stride;
    126   int x;
    127   dst_width -= 1;
    128   for (x = 0; x < dst_width - 1; x += 2) {
    129     dst[0] = (s[0] + s[1] + t[0] + t[1] + 2) >> 2;
    130     dst[1] = (s[2] + s[3] + t[2] + t[3] + 2) >> 2;
    131     dst += 2;
    132     s += 4;
    133     t += 4;
    134   }
    135   if (dst_width & 1) {
    136     dst[0] = (s[0] + s[1] + t[0] + t[1] + 2) >> 2;
    137     dst += 1;
    138     s += 2;
    139     t += 2;
    140   }
    141   dst[0] = (s[0] + t[0] + 1) >> 1;
    142 }
    143 
    144 void ScaleRowDown2Box_16_C(const uint16* src_ptr,
    145                            ptrdiff_t src_stride,
    146                            uint16* dst,
    147                            int dst_width) {
    148   const uint16* s = src_ptr;
    149   const uint16* t = src_ptr + src_stride;
    150   int x;
    151   for (x = 0; x < dst_width - 1; x += 2) {
    152     dst[0] = (s[0] + s[1] + t[0] + t[1] + 2) >> 2;
    153     dst[1] = (s[2] + s[3] + t[2] + t[3] + 2) >> 2;
    154     dst += 2;
    155     s += 4;
    156     t += 4;
    157   }
    158   if (dst_width & 1) {
    159     dst[0] = (s[0] + s[1] + t[0] + t[1] + 2) >> 2;
    160   }
    161 }
    162 
    163 void ScaleRowDown4_C(const uint8* src_ptr,
    164                      ptrdiff_t src_stride,
    165                      uint8* dst,
    166                      int dst_width) {
    167   int x;
    168   (void)src_stride;
    169   for (x = 0; x < dst_width - 1; x += 2) {
    170     dst[0] = src_ptr[2];
    171     dst[1] = src_ptr[6];
    172     dst += 2;
    173     src_ptr += 8;
    174   }
    175   if (dst_width & 1) {
    176     dst[0] = src_ptr[2];
    177   }
    178 }
    179 
    180 void ScaleRowDown4_16_C(const uint16* src_ptr,
    181                         ptrdiff_t src_stride,
    182                         uint16* dst,
    183                         int dst_width) {
    184   int x;
    185   (void)src_stride;
    186   for (x = 0; x < dst_width - 1; x += 2) {
    187     dst[0] = src_ptr[2];
    188     dst[1] = src_ptr[6];
    189     dst += 2;
    190     src_ptr += 8;
    191   }
    192   if (dst_width & 1) {
    193     dst[0] = src_ptr[2];
    194   }
    195 }
    196 
    197 void ScaleRowDown4Box_C(const uint8* src_ptr,
    198                         ptrdiff_t src_stride,
    199                         uint8* dst,
    200                         int dst_width) {
    201   intptr_t stride = src_stride;
    202   int x;
    203   for (x = 0; x < dst_width - 1; x += 2) {
    204     dst[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[3] +
    205               src_ptr[stride + 0] + src_ptr[stride + 1] + src_ptr[stride + 2] +
    206               src_ptr[stride + 3] + src_ptr[stride * 2 + 0] +
    207               src_ptr[stride * 2 + 1] + src_ptr[stride * 2 + 2] +
    208               src_ptr[stride * 2 + 3] + src_ptr[stride * 3 + 0] +
    209               src_ptr[stride * 3 + 1] + src_ptr[stride * 3 + 2] +
    210               src_ptr[stride * 3 + 3] + 8) >>
    211              4;
    212     dst[1] = (src_ptr[4] + src_ptr[5] + src_ptr[6] + src_ptr[7] +
    213               src_ptr[stride + 4] + src_ptr[stride + 5] + src_ptr[stride + 6] +
    214               src_ptr[stride + 7] + src_ptr[stride * 2 + 4] +
    215               src_ptr[stride * 2 + 5] + src_ptr[stride * 2 + 6] +
    216               src_ptr[stride * 2 + 7] + src_ptr[stride * 3 + 4] +
    217               src_ptr[stride * 3 + 5] + src_ptr[stride * 3 + 6] +
    218               src_ptr[stride * 3 + 7] + 8) >>
    219              4;
    220     dst += 2;
    221     src_ptr += 8;
    222   }
    223   if (dst_width & 1) {
    224     dst[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[3] +
    225               src_ptr[stride + 0] + src_ptr[stride + 1] + src_ptr[stride + 2] +
    226               src_ptr[stride + 3] + src_ptr[stride * 2 + 0] +
    227               src_ptr[stride * 2 + 1] + src_ptr[stride * 2 + 2] +
    228               src_ptr[stride * 2 + 3] + src_ptr[stride * 3 + 0] +
    229               src_ptr[stride * 3 + 1] + src_ptr[stride * 3 + 2] +
    230               src_ptr[stride * 3 + 3] + 8) >>
    231              4;
    232   }
    233 }
    234 
    235 void ScaleRowDown4Box_16_C(const uint16* src_ptr,
    236                            ptrdiff_t src_stride,
    237                            uint16* dst,
    238                            int dst_width) {
    239   intptr_t stride = src_stride;
    240   int x;
    241   for (x = 0; x < dst_width - 1; x += 2) {
    242     dst[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[3] +
    243               src_ptr[stride + 0] + src_ptr[stride + 1] + src_ptr[stride + 2] +
    244               src_ptr[stride + 3] + src_ptr[stride * 2 + 0] +
    245               src_ptr[stride * 2 + 1] + src_ptr[stride * 2 + 2] +
    246               src_ptr[stride * 2 + 3] + src_ptr[stride * 3 + 0] +
    247               src_ptr[stride * 3 + 1] + src_ptr[stride * 3 + 2] +
    248               src_ptr[stride * 3 + 3] + 8) >>
    249              4;
    250     dst[1] = (src_ptr[4] + src_ptr[5] + src_ptr[6] + src_ptr[7] +
    251               src_ptr[stride + 4] + src_ptr[stride + 5] + src_ptr[stride + 6] +
    252               src_ptr[stride + 7] + src_ptr[stride * 2 + 4] +
    253               src_ptr[stride * 2 + 5] + src_ptr[stride * 2 + 6] +
    254               src_ptr[stride * 2 + 7] + src_ptr[stride * 3 + 4] +
    255               src_ptr[stride * 3 + 5] + src_ptr[stride * 3 + 6] +
    256               src_ptr[stride * 3 + 7] + 8) >>
    257              4;
    258     dst += 2;
    259     src_ptr += 8;
    260   }
    261   if (dst_width & 1) {
    262     dst[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[3] +
    263               src_ptr[stride + 0] + src_ptr[stride + 1] + src_ptr[stride + 2] +
    264               src_ptr[stride + 3] + src_ptr[stride * 2 + 0] +
    265               src_ptr[stride * 2 + 1] + src_ptr[stride * 2 + 2] +
    266               src_ptr[stride * 2 + 3] + src_ptr[stride * 3 + 0] +
    267               src_ptr[stride * 3 + 1] + src_ptr[stride * 3 + 2] +
    268               src_ptr[stride * 3 + 3] + 8) >>
    269              4;
    270   }
    271 }
    272 
    273 void ScaleRowDown34_C(const uint8* src_ptr,
    274                       ptrdiff_t src_stride,
    275                       uint8* dst,
    276                       int dst_width) {
    277   int x;
    278   (void)src_stride;
    279   assert((dst_width % 3 == 0) && (dst_width > 0));
    280   for (x = 0; x < dst_width; x += 3) {
    281     dst[0] = src_ptr[0];
    282     dst[1] = src_ptr[1];
    283     dst[2] = src_ptr[3];
    284     dst += 3;
    285     src_ptr += 4;
    286   }
    287 }
    288 
    289 void ScaleRowDown34_16_C(const uint16* src_ptr,
    290                          ptrdiff_t src_stride,
    291                          uint16* dst,
    292                          int dst_width) {
    293   int x;
    294   (void)src_stride;
    295   assert((dst_width % 3 == 0) && (dst_width > 0));
    296   for (x = 0; x < dst_width; x += 3) {
    297     dst[0] = src_ptr[0];
    298     dst[1] = src_ptr[1];
    299     dst[2] = src_ptr[3];
    300     dst += 3;
    301     src_ptr += 4;
    302   }
    303 }
    304 
    305 // Filter rows 0 and 1 together, 3 : 1
    306 void ScaleRowDown34_0_Box_C(const uint8* src_ptr,
    307                             ptrdiff_t src_stride,
    308                             uint8* d,
    309                             int dst_width) {
    310   const uint8* s = src_ptr;
    311   const uint8* t = src_ptr + src_stride;
    312   int x;
    313   assert((dst_width % 3 == 0) && (dst_width > 0));
    314   for (x = 0; x < dst_width; x += 3) {
    315     uint8 a0 = (s[0] * 3 + s[1] * 1 + 2) >> 2;
    316     uint8 a1 = (s[1] * 1 + s[2] * 1 + 1) >> 1;
    317     uint8 a2 = (s[2] * 1 + s[3] * 3 + 2) >> 2;
    318     uint8 b0 = (t[0] * 3 + t[1] * 1 + 2) >> 2;
    319     uint8 b1 = (t[1] * 1 + t[2] * 1 + 1) >> 1;
    320     uint8 b2 = (t[2] * 1 + t[3] * 3 + 2) >> 2;
    321     d[0] = (a0 * 3 + b0 + 2) >> 2;
    322     d[1] = (a1 * 3 + b1 + 2) >> 2;
    323     d[2] = (a2 * 3 + b2 + 2) >> 2;
    324     d += 3;
    325     s += 4;
    326     t += 4;
    327   }
    328 }
    329 
    330 void ScaleRowDown34_0_Box_16_C(const uint16* src_ptr,
    331                                ptrdiff_t src_stride,
    332                                uint16* d,
    333                                int dst_width) {
    334   const uint16* s = src_ptr;
    335   const uint16* t = src_ptr + src_stride;
    336   int x;
    337   assert((dst_width % 3 == 0) && (dst_width > 0));
    338   for (x = 0; x < dst_width; x += 3) {
    339     uint16 a0 = (s[0] * 3 + s[1] * 1 + 2) >> 2;
    340     uint16 a1 = (s[1] * 1 + s[2] * 1 + 1) >> 1;
    341     uint16 a2 = (s[2] * 1 + s[3] * 3 + 2) >> 2;
    342     uint16 b0 = (t[0] * 3 + t[1] * 1 + 2) >> 2;
    343     uint16 b1 = (t[1] * 1 + t[2] * 1 + 1) >> 1;
    344     uint16 b2 = (t[2] * 1 + t[3] * 3 + 2) >> 2;
    345     d[0] = (a0 * 3 + b0 + 2) >> 2;
    346     d[1] = (a1 * 3 + b1 + 2) >> 2;
    347     d[2] = (a2 * 3 + b2 + 2) >> 2;
    348     d += 3;
    349     s += 4;
    350     t += 4;
    351   }
    352 }
    353 
    354 // Filter rows 1 and 2 together, 1 : 1
    355 void ScaleRowDown34_1_Box_C(const uint8* src_ptr,
    356                             ptrdiff_t src_stride,
    357                             uint8* d,
    358                             int dst_width) {
    359   const uint8* s = src_ptr;
    360   const uint8* t = src_ptr + src_stride;
    361   int x;
    362   assert((dst_width % 3 == 0) && (dst_width > 0));
    363   for (x = 0; x < dst_width; x += 3) {
    364     uint8 a0 = (s[0] * 3 + s[1] * 1 + 2) >> 2;
    365     uint8 a1 = (s[1] * 1 + s[2] * 1 + 1) >> 1;
    366     uint8 a2 = (s[2] * 1 + s[3] * 3 + 2) >> 2;
    367     uint8 b0 = (t[0] * 3 + t[1] * 1 + 2) >> 2;
    368     uint8 b1 = (t[1] * 1 + t[2] * 1 + 1) >> 1;
    369     uint8 b2 = (t[2] * 1 + t[3] * 3 + 2) >> 2;
    370     d[0] = (a0 + b0 + 1) >> 1;
    371     d[1] = (a1 + b1 + 1) >> 1;
    372     d[2] = (a2 + b2 + 1) >> 1;
    373     d += 3;
    374     s += 4;
    375     t += 4;
    376   }
    377 }
    378 
    379 void ScaleRowDown34_1_Box_16_C(const uint16* src_ptr,
    380                                ptrdiff_t src_stride,
    381                                uint16* d,
    382                                int dst_width) {
    383   const uint16* s = src_ptr;
    384   const uint16* t = src_ptr + src_stride;
    385   int x;
    386   assert((dst_width % 3 == 0) && (dst_width > 0));
    387   for (x = 0; x < dst_width; x += 3) {
    388     uint16 a0 = (s[0] * 3 + s[1] * 1 + 2) >> 2;
    389     uint16 a1 = (s[1] * 1 + s[2] * 1 + 1) >> 1;
    390     uint16 a2 = (s[2] * 1 + s[3] * 3 + 2) >> 2;
    391     uint16 b0 = (t[0] * 3 + t[1] * 1 + 2) >> 2;
    392     uint16 b1 = (t[1] * 1 + t[2] * 1 + 1) >> 1;
    393     uint16 b2 = (t[2] * 1 + t[3] * 3 + 2) >> 2;
    394     d[0] = (a0 + b0 + 1) >> 1;
    395     d[1] = (a1 + b1 + 1) >> 1;
    396     d[2] = (a2 + b2 + 1) >> 1;
    397     d += 3;
    398     s += 4;
    399     t += 4;
    400   }
    401 }
    402 
    403 // Scales a single row of pixels using point sampling.
    404 void ScaleCols_C(uint8* dst_ptr,
    405                  const uint8* src_ptr,
    406                  int dst_width,
    407                  int x,
    408                  int dx) {
    409   int j;
    410   for (j = 0; j < dst_width - 1; j += 2) {
    411     dst_ptr[0] = src_ptr[x >> 16];
    412     x += dx;
    413     dst_ptr[1] = src_ptr[x >> 16];
    414     x += dx;
    415     dst_ptr += 2;
    416   }
    417   if (dst_width & 1) {
    418     dst_ptr[0] = src_ptr[x >> 16];
    419   }
    420 }
    421 
    422 void ScaleCols_16_C(uint16* dst_ptr,
    423                     const uint16* src_ptr,
    424                     int dst_width,
    425                     int x,
    426                     int dx) {
    427   int j;
    428   for (j = 0; j < dst_width - 1; j += 2) {
    429     dst_ptr[0] = src_ptr[x >> 16];
    430     x += dx;
    431     dst_ptr[1] = src_ptr[x >> 16];
    432     x += dx;
    433     dst_ptr += 2;
    434   }
    435   if (dst_width & 1) {
    436     dst_ptr[0] = src_ptr[x >> 16];
    437   }
    438 }
    439 
    440 // Scales a single row of pixels up by 2x using point sampling.
    441 void ScaleColsUp2_C(uint8* dst_ptr,
    442                     const uint8* src_ptr,
    443                     int dst_width,
    444                     int x,
    445                     int dx) {
    446   int j;
    447   (void)x;
    448   (void)dx;
    449   for (j = 0; j < dst_width - 1; j += 2) {
    450     dst_ptr[1] = dst_ptr[0] = src_ptr[0];
    451     src_ptr += 1;
    452     dst_ptr += 2;
    453   }
    454   if (dst_width & 1) {
    455     dst_ptr[0] = src_ptr[0];
    456   }
    457 }
    458 
    459 void ScaleColsUp2_16_C(uint16* dst_ptr,
    460                        const uint16* src_ptr,
    461                        int dst_width,
    462                        int x,
    463                        int dx) {
    464   int j;
    465   (void)x;
    466   (void)dx;
    467   for (j = 0; j < dst_width - 1; j += 2) {
    468     dst_ptr[1] = dst_ptr[0] = src_ptr[0];
    469     src_ptr += 1;
    470     dst_ptr += 2;
    471   }
    472   if (dst_width & 1) {
    473     dst_ptr[0] = src_ptr[0];
    474   }
    475 }
    476 
    477 // (1-f)a + fb can be replaced with a + f(b-a)
    478 #if defined(__arm__) || defined(__aarch64__)
    479 #define BLENDER(a, b, f) \
    480   (uint8)((int)(a) + ((((int)((f)) * ((int)(b) - (int)(a))) + 0x8000) >> 16))
    481 #else
    482 // Intel uses 7 bit math with rounding.
    483 #define BLENDER(a, b, f) \
    484   (uint8)((int)(a) + (((int)((f) >> 9) * ((int)(b) - (int)(a)) + 0x40) >> 7))
    485 #endif
    486 
    487 void ScaleFilterCols_C(uint8* dst_ptr,
    488                        const uint8* src_ptr,
    489                        int dst_width,
    490                        int x,
    491                        int dx) {
    492   int j;
    493   for (j = 0; j < dst_width - 1; j += 2) {
    494     int xi = x >> 16;
    495     int a = src_ptr[xi];
    496     int b = src_ptr[xi + 1];
    497     dst_ptr[0] = BLENDER(a, b, x & 0xffff);
    498     x += dx;
    499     xi = x >> 16;
    500     a = src_ptr[xi];
    501     b = src_ptr[xi + 1];
    502     dst_ptr[1] = BLENDER(a, b, x & 0xffff);
    503     x += dx;
    504     dst_ptr += 2;
    505   }
    506   if (dst_width & 1) {
    507     int xi = x >> 16;
    508     int a = src_ptr[xi];
    509     int b = src_ptr[xi + 1];
    510     dst_ptr[0] = BLENDER(a, b, x & 0xffff);
    511   }
    512 }
    513 
    514 void ScaleFilterCols64_C(uint8* dst_ptr,
    515                          const uint8* src_ptr,
    516                          int dst_width,
    517                          int x32,
    518                          int dx) {
    519   int64 x = (int64)(x32);
    520   int j;
    521   for (j = 0; j < dst_width - 1; j += 2) {
    522     int64 xi = x >> 16;
    523     int a = src_ptr[xi];
    524     int b = src_ptr[xi + 1];
    525     dst_ptr[0] = BLENDER(a, b, x & 0xffff);
    526     x += dx;
    527     xi = x >> 16;
    528     a = src_ptr[xi];
    529     b = src_ptr[xi + 1];
    530     dst_ptr[1] = BLENDER(a, b, x & 0xffff);
    531     x += dx;
    532     dst_ptr += 2;
    533   }
    534   if (dst_width & 1) {
    535     int64 xi = x >> 16;
    536     int a = src_ptr[xi];
    537     int b = src_ptr[xi + 1];
    538     dst_ptr[0] = BLENDER(a, b, x & 0xffff);
    539   }
    540 }
    541 #undef BLENDER
    542 
    543 // Same as 8 bit arm blender but return is cast to uint16
    544 #define BLENDER(a, b, f) \
    545   (uint16)((int)(a) + ((((int)((f)) * ((int)(b) - (int)(a))) + 0x8000) >> 16))
    546 
    547 void ScaleFilterCols_16_C(uint16* dst_ptr,
    548                           const uint16* src_ptr,
    549                           int dst_width,
    550                           int x,
    551                           int dx) {
    552   int j;
    553   for (j = 0; j < dst_width - 1; j += 2) {
    554     int xi = x >> 16;
    555     int a = src_ptr[xi];
    556     int b = src_ptr[xi + 1];
    557     dst_ptr[0] = BLENDER(a, b, x & 0xffff);
    558     x += dx;
    559     xi = x >> 16;
    560     a = src_ptr[xi];
    561     b = src_ptr[xi + 1];
    562     dst_ptr[1] = BLENDER(a, b, x & 0xffff);
    563     x += dx;
    564     dst_ptr += 2;
    565   }
    566   if (dst_width & 1) {
    567     int xi = x >> 16;
    568     int a = src_ptr[xi];
    569     int b = src_ptr[xi + 1];
    570     dst_ptr[0] = BLENDER(a, b, x & 0xffff);
    571   }
    572 }
    573 
    574 void ScaleFilterCols64_16_C(uint16* dst_ptr,
    575                             const uint16* src_ptr,
    576                             int dst_width,
    577                             int x32,
    578                             int dx) {
    579   int64 x = (int64)(x32);
    580   int j;
    581   for (j = 0; j < dst_width - 1; j += 2) {
    582     int64 xi = x >> 16;
    583     int a = src_ptr[xi];
    584     int b = src_ptr[xi + 1];
    585     dst_ptr[0] = BLENDER(a, b, x & 0xffff);
    586     x += dx;
    587     xi = x >> 16;
    588     a = src_ptr[xi];
    589     b = src_ptr[xi + 1];
    590     dst_ptr[1] = BLENDER(a, b, x & 0xffff);
    591     x += dx;
    592     dst_ptr += 2;
    593   }
    594   if (dst_width & 1) {
    595     int64 xi = x >> 16;
    596     int a = src_ptr[xi];
    597     int b = src_ptr[xi + 1];
    598     dst_ptr[0] = BLENDER(a, b, x & 0xffff);
    599   }
    600 }
    601 #undef BLENDER
    602 
    603 void ScaleRowDown38_C(const uint8* src_ptr,
    604                       ptrdiff_t src_stride,
    605                       uint8* dst,
    606                       int dst_width) {
    607   int x;
    608   (void)src_stride;
    609   assert(dst_width % 3 == 0);
    610   for (x = 0; x < dst_width; x += 3) {
    611     dst[0] = src_ptr[0];
    612     dst[1] = src_ptr[3];
    613     dst[2] = src_ptr[6];
    614     dst += 3;
    615     src_ptr += 8;
    616   }
    617 }
    618 
    619 void ScaleRowDown38_16_C(const uint16* src_ptr,
    620                          ptrdiff_t src_stride,
    621                          uint16* dst,
    622                          int dst_width) {
    623   int x;
    624   (void)src_stride;
    625   assert(dst_width % 3 == 0);
    626   for (x = 0; x < dst_width; x += 3) {
    627     dst[0] = src_ptr[0];
    628     dst[1] = src_ptr[3];
    629     dst[2] = src_ptr[6];
    630     dst += 3;
    631     src_ptr += 8;
    632   }
    633 }
    634 
    635 // 8x3 -> 3x1
    636 void ScaleRowDown38_3_Box_C(const uint8* src_ptr,
    637                             ptrdiff_t src_stride,
    638                             uint8* dst_ptr,
    639                             int dst_width) {
    640   intptr_t stride = src_stride;
    641   int i;
    642   assert((dst_width % 3 == 0) && (dst_width > 0));
    643   for (i = 0; i < dst_width; i += 3) {
    644     dst_ptr[0] =
    645         (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[stride + 0] +
    646          src_ptr[stride + 1] + src_ptr[stride + 2] + src_ptr[stride * 2 + 0] +
    647          src_ptr[stride * 2 + 1] + src_ptr[stride * 2 + 2]) *
    648             (65536 / 9) >>
    649         16;
    650     dst_ptr[1] =
    651         (src_ptr[3] + src_ptr[4] + src_ptr[5] + src_ptr[stride + 3] +
    652          src_ptr[stride + 4] + src_ptr[stride + 5] + src_ptr[stride * 2 + 3] +
    653          src_ptr[stride * 2 + 4] + src_ptr[stride * 2 + 5]) *
    654             (65536 / 9) >>
    655         16;
    656     dst_ptr[2] =
    657         (src_ptr[6] + src_ptr[7] + src_ptr[stride + 6] + src_ptr[stride + 7] +
    658          src_ptr[stride * 2 + 6] + src_ptr[stride * 2 + 7]) *
    659             (65536 / 6) >>
    660         16;
    661     src_ptr += 8;
    662     dst_ptr += 3;
    663   }
    664 }
    665 
    666 void ScaleRowDown38_3_Box_16_C(const uint16* src_ptr,
    667                                ptrdiff_t src_stride,
    668                                uint16* dst_ptr,
    669                                int dst_width) {
    670   intptr_t stride = src_stride;
    671   int i;
    672   assert((dst_width % 3 == 0) && (dst_width > 0));
    673   for (i = 0; i < dst_width; i += 3) {
    674     dst_ptr[0] =
    675         (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[stride + 0] +
    676          src_ptr[stride + 1] + src_ptr[stride + 2] + src_ptr[stride * 2 + 0] +
    677          src_ptr[stride * 2 + 1] + src_ptr[stride * 2 + 2]) *
    678             (65536 / 9) >>
    679         16;
    680     dst_ptr[1] =
    681         (src_ptr[3] + src_ptr[4] + src_ptr[5] + src_ptr[stride + 3] +
    682          src_ptr[stride + 4] + src_ptr[stride + 5] + src_ptr[stride * 2 + 3] +
    683          src_ptr[stride * 2 + 4] + src_ptr[stride * 2 + 5]) *
    684             (65536 / 9) >>
    685         16;
    686     dst_ptr[2] =
    687         (src_ptr[6] + src_ptr[7] + src_ptr[stride + 6] + src_ptr[stride + 7] +
    688          src_ptr[stride * 2 + 6] + src_ptr[stride * 2 + 7]) *
    689             (65536 / 6) >>
    690         16;
    691     src_ptr += 8;
    692     dst_ptr += 3;
    693   }
    694 }
    695 
    696 // 8x2 -> 3x1
    697 void ScaleRowDown38_2_Box_C(const uint8* src_ptr,
    698                             ptrdiff_t src_stride,
    699                             uint8* dst_ptr,
    700                             int dst_width) {
    701   intptr_t stride = src_stride;
    702   int i;
    703   assert((dst_width % 3 == 0) && (dst_width > 0));
    704   for (i = 0; i < dst_width; i += 3) {
    705     dst_ptr[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[stride + 0] +
    706                   src_ptr[stride + 1] + src_ptr[stride + 2]) *
    707                      (65536 / 6) >>
    708                  16;
    709     dst_ptr[1] = (src_ptr[3] + src_ptr[4] + src_ptr[5] + src_ptr[stride + 3] +
    710                   src_ptr[stride + 4] + src_ptr[stride + 5]) *
    711                      (65536 / 6) >>
    712                  16;
    713     dst_ptr[2] =
    714         (src_ptr[6] + src_ptr[7] + src_ptr[stride + 6] + src_ptr[stride + 7]) *
    715             (65536 / 4) >>
    716         16;
    717     src_ptr += 8;
    718     dst_ptr += 3;
    719   }
    720 }
    721 
    722 void ScaleRowDown38_2_Box_16_C(const uint16* src_ptr,
    723                                ptrdiff_t src_stride,
    724                                uint16* dst_ptr,
    725                                int dst_width) {
    726   intptr_t stride = src_stride;
    727   int i;
    728   assert((dst_width % 3 == 0) && (dst_width > 0));
    729   for (i = 0; i < dst_width; i += 3) {
    730     dst_ptr[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[stride + 0] +
    731                   src_ptr[stride + 1] + src_ptr[stride + 2]) *
    732                      (65536 / 6) >>
    733                  16;
    734     dst_ptr[1] = (src_ptr[3] + src_ptr[4] + src_ptr[5] + src_ptr[stride + 3] +
    735                   src_ptr[stride + 4] + src_ptr[stride + 5]) *
    736                      (65536 / 6) >>
    737                  16;
    738     dst_ptr[2] =
    739         (src_ptr[6] + src_ptr[7] + src_ptr[stride + 6] + src_ptr[stride + 7]) *
    740             (65536 / 4) >>
    741         16;
    742     src_ptr += 8;
    743     dst_ptr += 3;
    744   }
    745 }
    746 
    747 void ScaleAddRow_C(const uint8* src_ptr, uint16* dst_ptr, int src_width) {
    748   int x;
    749   assert(src_width > 0);
    750   for (x = 0; x < src_width - 1; x += 2) {
    751     dst_ptr[0] += src_ptr[0];
    752     dst_ptr[1] += src_ptr[1];
    753     src_ptr += 2;
    754     dst_ptr += 2;
    755   }
    756   if (src_width & 1) {
    757     dst_ptr[0] += src_ptr[0];
    758   }
    759 }
    760 
    761 void ScaleAddRow_16_C(const uint16* src_ptr, uint32* dst_ptr, int src_width) {
    762   int x;
    763   assert(src_width > 0);
    764   for (x = 0; x < src_width - 1; x += 2) {
    765     dst_ptr[0] += src_ptr[0];
    766     dst_ptr[1] += src_ptr[1];
    767     src_ptr += 2;
    768     dst_ptr += 2;
    769   }
    770   if (src_width & 1) {
    771     dst_ptr[0] += src_ptr[0];
    772   }
    773 }
    774 
    775 void ScaleARGBRowDown2_C(const uint8* src_argb,
    776                          ptrdiff_t src_stride,
    777                          uint8* dst_argb,
    778                          int dst_width) {
    779   const uint32* src = (const uint32*)(src_argb);
    780   uint32* dst = (uint32*)(dst_argb);
    781   int x;
    782   (void)src_stride;
    783   for (x = 0; x < dst_width - 1; x += 2) {
    784     dst[0] = src[1];
    785     dst[1] = src[3];
    786     src += 4;
    787     dst += 2;
    788   }
    789   if (dst_width & 1) {
    790     dst[0] = src[1];
    791   }
    792 }
    793 
    794 void ScaleARGBRowDown2Linear_C(const uint8* src_argb,
    795                                ptrdiff_t src_stride,
    796                                uint8* dst_argb,
    797                                int dst_width) {
    798   int x;
    799   (void)src_stride;
    800   for (x = 0; x < dst_width; ++x) {
    801     dst_argb[0] = (src_argb[0] + src_argb[4] + 1) >> 1;
    802     dst_argb[1] = (src_argb[1] + src_argb[5] + 1) >> 1;
    803     dst_argb[2] = (src_argb[2] + src_argb[6] + 1) >> 1;
    804     dst_argb[3] = (src_argb[3] + src_argb[7] + 1) >> 1;
    805     src_argb += 8;
    806     dst_argb += 4;
    807   }
    808 }
    809 
    810 void ScaleARGBRowDown2Box_C(const uint8* src_argb,
    811                             ptrdiff_t src_stride,
    812                             uint8* dst_argb,
    813                             int dst_width) {
    814   int x;
    815   for (x = 0; x < dst_width; ++x) {
    816     dst_argb[0] = (src_argb[0] + src_argb[4] + src_argb[src_stride] +
    817                    src_argb[src_stride + 4] + 2) >>
    818                   2;
    819     dst_argb[1] = (src_argb[1] + src_argb[5] + src_argb[src_stride + 1] +
    820                    src_argb[src_stride + 5] + 2) >>
    821                   2;
    822     dst_argb[2] = (src_argb[2] + src_argb[6] + src_argb[src_stride + 2] +
    823                    src_argb[src_stride + 6] + 2) >>
    824                   2;
    825     dst_argb[3] = (src_argb[3] + src_argb[7] + src_argb[src_stride + 3] +
    826                    src_argb[src_stride + 7] + 2) >>
    827                   2;
    828     src_argb += 8;
    829     dst_argb += 4;
    830   }
    831 }
    832 
    833 void ScaleARGBRowDownEven_C(const uint8* src_argb,
    834                             ptrdiff_t src_stride,
    835                             int src_stepx,
    836                             uint8* dst_argb,
    837                             int dst_width) {
    838   const uint32* src = (const uint32*)(src_argb);
    839   uint32* dst = (uint32*)(dst_argb);
    840   (void)src_stride;
    841   int x;
    842   for (x = 0; x < dst_width - 1; x += 2) {
    843     dst[0] = src[0];
    844     dst[1] = src[src_stepx];
    845     src += src_stepx * 2;
    846     dst += 2;
    847   }
    848   if (dst_width & 1) {
    849     dst[0] = src[0];
    850   }
    851 }
    852 
    853 void ScaleARGBRowDownEvenBox_C(const uint8* src_argb,
    854                                ptrdiff_t src_stride,
    855                                int src_stepx,
    856                                uint8* dst_argb,
    857                                int dst_width) {
    858   int x;
    859   for (x = 0; x < dst_width; ++x) {
    860     dst_argb[0] = (src_argb[0] + src_argb[4] + src_argb[src_stride] +
    861                    src_argb[src_stride + 4] + 2) >>
    862                   2;
    863     dst_argb[1] = (src_argb[1] + src_argb[5] + src_argb[src_stride + 1] +
    864                    src_argb[src_stride + 5] + 2) >>
    865                   2;
    866     dst_argb[2] = (src_argb[2] + src_argb[6] + src_argb[src_stride + 2] +
    867                    src_argb[src_stride + 6] + 2) >>
    868                   2;
    869     dst_argb[3] = (src_argb[3] + src_argb[7] + src_argb[src_stride + 3] +
    870                    src_argb[src_stride + 7] + 2) >>
    871                   2;
    872     src_argb += src_stepx * 4;
    873     dst_argb += 4;
    874   }
    875 }
    876 
    877 // Scales a single row of pixels using point sampling.
    878 void ScaleARGBCols_C(uint8* dst_argb,
    879                      const uint8* src_argb,
    880                      int dst_width,
    881                      int x,
    882                      int dx) {
    883   const uint32* src = (const uint32*)(src_argb);
    884   uint32* dst = (uint32*)(dst_argb);
    885   int j;
    886   for (j = 0; j < dst_width - 1; j += 2) {
    887     dst[0] = src[x >> 16];
    888     x += dx;
    889     dst[1] = src[x >> 16];
    890     x += dx;
    891     dst += 2;
    892   }
    893   if (dst_width & 1) {
    894     dst[0] = src[x >> 16];
    895   }
    896 }
    897 
    898 void ScaleARGBCols64_C(uint8* dst_argb,
    899                        const uint8* src_argb,
    900                        int dst_width,
    901                        int x32,
    902                        int dx) {
    903   int64 x = (int64)(x32);
    904   const uint32* src = (const uint32*)(src_argb);
    905   uint32* dst = (uint32*)(dst_argb);
    906   int j;
    907   for (j = 0; j < dst_width - 1; j += 2) {
    908     dst[0] = src[x >> 16];
    909     x += dx;
    910     dst[1] = src[x >> 16];
    911     x += dx;
    912     dst += 2;
    913   }
    914   if (dst_width & 1) {
    915     dst[0] = src[x >> 16];
    916   }
    917 }
    918 
    919 // Scales a single row of pixels up by 2x using point sampling.
    920 void ScaleARGBColsUp2_C(uint8* dst_argb,
    921                         const uint8* src_argb,
    922                         int dst_width,
    923                         int x,
    924                         int dx) {
    925   const uint32* src = (const uint32*)(src_argb);
    926   uint32* dst = (uint32*)(dst_argb);
    927   int j;
    928   (void)x;
    929   (void)dx;
    930   for (j = 0; j < dst_width - 1; j += 2) {
    931     dst[1] = dst[0] = src[0];
    932     src += 1;
    933     dst += 2;
    934   }
    935   if (dst_width & 1) {
    936     dst[0] = src[0];
    937   }
    938 }
    939 
    940 // TODO(fbarchard): Replace 0x7f ^ f with 128-f.  bug=607.
    941 // Mimics SSSE3 blender
    942 #define BLENDER1(a, b, f) ((a) * (0x7f ^ f) + (b)*f) >> 7
    943 #define BLENDERC(a, b, f, s) \
    944   (uint32)(BLENDER1(((a) >> s) & 255, ((b) >> s) & 255, f) << s)
    945 #define BLENDER(a, b, f)                                                 \
    946   BLENDERC(a, b, f, 24) | BLENDERC(a, b, f, 16) | BLENDERC(a, b, f, 8) | \
    947       BLENDERC(a, b, f, 0)
    948 
    949 void ScaleARGBFilterCols_C(uint8* dst_argb,
    950                            const uint8* src_argb,
    951                            int dst_width,
    952                            int x,
    953                            int dx) {
    954   const uint32* src = (const uint32*)(src_argb);
    955   uint32* dst = (uint32*)(dst_argb);
    956   int j;
    957   for (j = 0; j < dst_width - 1; j += 2) {
    958     int xi = x >> 16;
    959     int xf = (x >> 9) & 0x7f;
    960     uint32 a = src[xi];
    961     uint32 b = src[xi + 1];
    962     dst[0] = BLENDER(a, b, xf);
    963     x += dx;
    964     xi = x >> 16;
    965     xf = (x >> 9) & 0x7f;
    966     a = src[xi];
    967     b = src[xi + 1];
    968     dst[1] = BLENDER(a, b, xf);
    969     x += dx;
    970     dst += 2;
    971   }
    972   if (dst_width & 1) {
    973     int xi = x >> 16;
    974     int xf = (x >> 9) & 0x7f;
    975     uint32 a = src[xi];
    976     uint32 b = src[xi + 1];
    977     dst[0] = BLENDER(a, b, xf);
    978   }
    979 }
    980 
    981 void ScaleARGBFilterCols64_C(uint8* dst_argb,
    982                              const uint8* src_argb,
    983                              int dst_width,
    984                              int x32,
    985                              int dx) {
    986   int64 x = (int64)(x32);
    987   const uint32* src = (const uint32*)(src_argb);
    988   uint32* dst = (uint32*)(dst_argb);
    989   int j;
    990   for (j = 0; j < dst_width - 1; j += 2) {
    991     int64 xi = x >> 16;
    992     int xf = (x >> 9) & 0x7f;
    993     uint32 a = src[xi];
    994     uint32 b = src[xi + 1];
    995     dst[0] = BLENDER(a, b, xf);
    996     x += dx;
    997     xi = x >> 16;
    998     xf = (x >> 9) & 0x7f;
    999     a = src[xi];
   1000     b = src[xi + 1];
   1001     dst[1] = BLENDER(a, b, xf);
   1002     x += dx;
   1003     dst += 2;
   1004   }
   1005   if (dst_width & 1) {
   1006     int64 xi = x >> 16;
   1007     int xf = (x >> 9) & 0x7f;
   1008     uint32 a = src[xi];
   1009     uint32 b = src[xi + 1];
   1010     dst[0] = BLENDER(a, b, xf);
   1011   }
   1012 }
   1013 #undef BLENDER1
   1014 #undef BLENDERC
   1015 #undef BLENDER
   1016 
   1017 // Scale plane vertically with bilinear interpolation.
   1018 void ScalePlaneVertical(int src_height,
   1019                         int dst_width,
   1020                         int dst_height,
   1021                         int src_stride,
   1022                         int dst_stride,
   1023                         const uint8* src_argb,
   1024                         uint8* dst_argb,
   1025                         int x,
   1026                         int y,
   1027                         int dy,
   1028                         int bpp,
   1029                         enum FilterMode filtering) {
   1030   // TODO(fbarchard): Allow higher bpp.
   1031   int dst_width_bytes = dst_width * bpp;
   1032   void (*InterpolateRow)(uint8 * dst_argb, const uint8* src_argb,
   1033                          ptrdiff_t src_stride, int dst_width,
   1034                          int source_y_fraction) = InterpolateRow_C;
   1035   const int max_y = (src_height > 1) ? ((src_height - 1) << 16) - 1 : 0;
   1036   int j;
   1037   assert(bpp >= 1 && bpp <= 4);
   1038   assert(src_height != 0);
   1039   assert(dst_width > 0);
   1040   assert(dst_height > 0);
   1041   src_argb += (x >> 16) * bpp;
   1042 #if defined(HAS_INTERPOLATEROW_SSSE3)
   1043   if (TestCpuFlag(kCpuHasSSSE3)) {
   1044     InterpolateRow = InterpolateRow_Any_SSSE3;
   1045     if (IS_ALIGNED(dst_width_bytes, 16)) {
   1046       InterpolateRow = InterpolateRow_SSSE3;
   1047     }
   1048   }
   1049 #endif
   1050 #if defined(HAS_INTERPOLATEROW_AVX2)
   1051   if (TestCpuFlag(kCpuHasAVX2)) {
   1052     InterpolateRow = InterpolateRow_Any_AVX2;
   1053     if (IS_ALIGNED(dst_width_bytes, 32)) {
   1054       InterpolateRow = InterpolateRow_AVX2;
   1055     }
   1056   }
   1057 #endif
   1058 #if defined(HAS_INTERPOLATEROW_NEON)
   1059   if (TestCpuFlag(kCpuHasNEON)) {
   1060     InterpolateRow = InterpolateRow_Any_NEON;
   1061     if (IS_ALIGNED(dst_width_bytes, 16)) {
   1062       InterpolateRow = InterpolateRow_NEON;
   1063     }
   1064   }
   1065 #endif
   1066 #if defined(HAS_INTERPOLATEROW_DSPR2)
   1067   if (TestCpuFlag(kCpuHasDSPR2) && IS_ALIGNED(src_argb, 4) &&
   1068       IS_ALIGNED(src_stride, 4) && IS_ALIGNED(dst_argb, 4) &&
   1069       IS_ALIGNED(dst_stride, 4)) {
   1070     InterpolateRow = InterpolateRow_Any_DSPR2;
   1071     if (IS_ALIGNED(dst_width_bytes, 4)) {
   1072       InterpolateRow = InterpolateRow_DSPR2;
   1073     }
   1074   }
   1075 #endif
   1076 #if defined(HAS_INTERPOLATEROW_MSA)
   1077   if (TestCpuFlag(kCpuHasMSA)) {
   1078     InterpolateRow = InterpolateRow_Any_MSA;
   1079     if (IS_ALIGNED(dst_width_bytes, 32)) {
   1080       InterpolateRow = InterpolateRow_MSA;
   1081     }
   1082   }
   1083 #endif
   1084   for (j = 0; j < dst_height; ++j) {
   1085     int yi;
   1086     int yf;
   1087     if (y > max_y) {
   1088       y = max_y;
   1089     }
   1090     yi = y >> 16;
   1091     yf = filtering ? ((y >> 8) & 255) : 0;
   1092     InterpolateRow(dst_argb, src_argb + yi * src_stride, src_stride,
   1093                    dst_width_bytes, yf);
   1094     dst_argb += dst_stride;
   1095     y += dy;
   1096   }
   1097 }
   1098 void ScalePlaneVertical_16(int src_height,
   1099                            int dst_width,
   1100                            int dst_height,
   1101                            int src_stride,
   1102                            int dst_stride,
   1103                            const uint16* src_argb,
   1104                            uint16* dst_argb,
   1105                            int x,
   1106                            int y,
   1107                            int dy,
   1108                            int wpp,
   1109                            enum FilterMode filtering) {
   1110   // TODO(fbarchard): Allow higher wpp.
   1111   int dst_width_words = dst_width * wpp;
   1112   void (*InterpolateRow)(uint16 * dst_argb, const uint16* src_argb,
   1113                          ptrdiff_t src_stride, int dst_width,
   1114                          int source_y_fraction) = InterpolateRow_16_C;
   1115   const int max_y = (src_height > 1) ? ((src_height - 1) << 16) - 1 : 0;
   1116   int j;
   1117   assert(wpp >= 1 && wpp <= 2);
   1118   assert(src_height != 0);
   1119   assert(dst_width > 0);
   1120   assert(dst_height > 0);
   1121   src_argb += (x >> 16) * wpp;
   1122 #if defined(HAS_INTERPOLATEROW_16_SSE2)
   1123   if (TestCpuFlag(kCpuHasSSE2)) {
   1124     InterpolateRow = InterpolateRow_Any_16_SSE2;
   1125     if (IS_ALIGNED(dst_width_bytes, 16)) {
   1126       InterpolateRow = InterpolateRow_16_SSE2;
   1127     }
   1128   }
   1129 #endif
   1130 #if defined(HAS_INTERPOLATEROW_16_SSSE3)
   1131   if (TestCpuFlag(kCpuHasSSSE3)) {
   1132     InterpolateRow = InterpolateRow_Any_16_SSSE3;
   1133     if (IS_ALIGNED(dst_width_bytes, 16)) {
   1134       InterpolateRow = InterpolateRow_16_SSSE3;
   1135     }
   1136   }
   1137 #endif
   1138 #if defined(HAS_INTERPOLATEROW_16_AVX2)
   1139   if (TestCpuFlag(kCpuHasAVX2)) {
   1140     InterpolateRow = InterpolateRow_Any_16_AVX2;
   1141     if (IS_ALIGNED(dst_width_bytes, 32)) {
   1142       InterpolateRow = InterpolateRow_16_AVX2;
   1143     }
   1144   }
   1145 #endif
   1146 #if defined(HAS_INTERPOLATEROW_16_NEON)
   1147   if (TestCpuFlag(kCpuHasNEON)) {
   1148     InterpolateRow = InterpolateRow_Any_16_NEON;
   1149     if (IS_ALIGNED(dst_width_bytes, 16)) {
   1150       InterpolateRow = InterpolateRow_16_NEON;
   1151     }
   1152   }
   1153 #endif
   1154 #if defined(HAS_INTERPOLATEROW_16_DSPR2)
   1155   if (TestCpuFlag(kCpuHasDSPR2) && IS_ALIGNED(src_argb, 4) &&
   1156       IS_ALIGNED(src_stride, 4) && IS_ALIGNED(dst_argb, 4) &&
   1157       IS_ALIGNED(dst_stride, 4)) {
   1158     InterpolateRow = InterpolateRow_Any_16_DSPR2;
   1159     if (IS_ALIGNED(dst_width_bytes, 4)) {
   1160       InterpolateRow = InterpolateRow_16_DSPR2;
   1161     }
   1162   }
   1163 #endif
   1164   for (j = 0; j < dst_height; ++j) {
   1165     int yi;
   1166     int yf;
   1167     if (y > max_y) {
   1168       y = max_y;
   1169     }
   1170     yi = y >> 16;
   1171     yf = filtering ? ((y >> 8) & 255) : 0;
   1172     InterpolateRow(dst_argb, src_argb + yi * src_stride, src_stride,
   1173                    dst_width_words, yf);
   1174     dst_argb += dst_stride;
   1175     y += dy;
   1176   }
   1177 }
   1178 
   1179 // Simplify the filtering based on scale factors.
   1180 enum FilterMode ScaleFilterReduce(int src_width,
   1181                                   int src_height,
   1182                                   int dst_width,
   1183                                   int dst_height,
   1184                                   enum FilterMode filtering) {
   1185   if (src_width < 0) {
   1186     src_width = -src_width;
   1187   }
   1188   if (src_height < 0) {
   1189     src_height = -src_height;
   1190   }
   1191   if (filtering == kFilterBox) {
   1192     // If scaling both axis to 0.5 or larger, switch from Box to Bilinear.
   1193     if (dst_width * 2 >= src_width && dst_height * 2 >= src_height) {
   1194       filtering = kFilterBilinear;
   1195     }
   1196   }
   1197   if (filtering == kFilterBilinear) {
   1198     if (src_height == 1) {
   1199       filtering = kFilterLinear;
   1200     }
   1201     // TODO(fbarchard): Detect any odd scale factor and reduce to Linear.
   1202     if (dst_height == src_height || dst_height * 3 == src_height) {
   1203       filtering = kFilterLinear;
   1204     }
   1205     // TODO(fbarchard): Remove 1 pixel wide filter restriction, which is to
   1206     // avoid reading 2 pixels horizontally that causes memory exception.
   1207     if (src_width == 1) {
   1208       filtering = kFilterNone;
   1209     }
   1210   }
   1211   if (filtering == kFilterLinear) {
   1212     if (src_width == 1) {
   1213       filtering = kFilterNone;
   1214     }
   1215     // TODO(fbarchard): Detect any odd scale factor and reduce to None.
   1216     if (dst_width == src_width || dst_width * 3 == src_width) {
   1217       filtering = kFilterNone;
   1218     }
   1219   }
   1220   return filtering;
   1221 }
   1222 
   1223 // Divide num by div and return as 16.16 fixed point result.
   1224 int FixedDiv_C(int num, int div) {
   1225   return (int)(((int64)(num) << 16) / div);
   1226 }
   1227 
   1228 // Divide num by div and return as 16.16 fixed point result.
   1229 int FixedDiv1_C(int num, int div) {
   1230   return (int)((((int64)(num) << 16) - 0x00010001) / (div - 1));
   1231 }
   1232 
   1233 #define CENTERSTART(dx, s) (dx < 0) ? -((-dx >> 1) + s) : ((dx >> 1) + s)
   1234 
   1235 // Compute slope values for stepping.
   1236 void ScaleSlope(int src_width,
   1237                 int src_height,
   1238                 int dst_width,
   1239                 int dst_height,
   1240                 enum FilterMode filtering,
   1241                 int* x,
   1242                 int* y,
   1243                 int* dx,
   1244                 int* dy) {
   1245   assert(x != NULL);
   1246   assert(y != NULL);
   1247   assert(dx != NULL);
   1248   assert(dy != NULL);
   1249   assert(src_width != 0);
   1250   assert(src_height != 0);
   1251   assert(dst_width > 0);
   1252   assert(dst_height > 0);
   1253   // Check for 1 pixel and avoid FixedDiv overflow.
   1254   if (dst_width == 1 && src_width >= 32768) {
   1255     dst_width = src_width;
   1256   }
   1257   if (dst_height == 1 && src_height >= 32768) {
   1258     dst_height = src_height;
   1259   }
   1260   if (filtering == kFilterBox) {
   1261     // Scale step for point sampling duplicates all pixels equally.
   1262     *dx = FixedDiv(Abs(src_width), dst_width);
   1263     *dy = FixedDiv(src_height, dst_height);
   1264     *x = 0;
   1265     *y = 0;
   1266   } else if (filtering == kFilterBilinear) {
   1267     // Scale step for bilinear sampling renders last pixel once for upsample.
   1268     if (dst_width <= Abs(src_width)) {
   1269       *dx = FixedDiv(Abs(src_width), dst_width);
   1270       *x = CENTERSTART(*dx, -32768);  // Subtract 0.5 (32768) to center filter.
   1271     } else if (dst_width > 1) {
   1272       *dx = FixedDiv1(Abs(src_width), dst_width);
   1273       *x = 0;
   1274     }
   1275     if (dst_height <= src_height) {
   1276       *dy = FixedDiv(src_height, dst_height);
   1277       *y = CENTERSTART(*dy, -32768);  // Subtract 0.5 (32768) to center filter.
   1278     } else if (dst_height > 1) {
   1279       *dy = FixedDiv1(src_height, dst_height);
   1280       *y = 0;
   1281     }
   1282   } else if (filtering == kFilterLinear) {
   1283     // Scale step for bilinear sampling renders last pixel once for upsample.
   1284     if (dst_width <= Abs(src_width)) {
   1285       *dx = FixedDiv(Abs(src_width), dst_width);
   1286       *x = CENTERSTART(*dx, -32768);  // Subtract 0.5 (32768) to center filter.
   1287     } else if (dst_width > 1) {
   1288       *dx = FixedDiv1(Abs(src_width), dst_width);
   1289       *x = 0;
   1290     }
   1291     *dy = FixedDiv(src_height, dst_height);
   1292     *y = *dy >> 1;
   1293   } else {
   1294     // Scale step for point sampling duplicates all pixels equally.
   1295     *dx = FixedDiv(Abs(src_width), dst_width);
   1296     *dy = FixedDiv(src_height, dst_height);
   1297     *x = CENTERSTART(*dx, 0);
   1298     *y = CENTERSTART(*dy, 0);
   1299   }
   1300   // Negative src_width means horizontally mirror.
   1301   if (src_width < 0) {
   1302     *x += (dst_width - 1) * *dx;
   1303     *dx = -*dx;
   1304     // src_width = -src_width;   // Caller must do this.
   1305   }
   1306 }
   1307 #undef CENTERSTART
   1308 
   1309 #ifdef __cplusplus
   1310 }  // extern "C"
   1311 }  // namespace libyuv
   1312 #endif
   1313