Home | History | Annotate | Download | only in source
      1 /*
      2  *  Copyright 2011 The LibYuv Project Authors. All rights reserved.
      3  *
      4  *  Use of this source code is governed by a BSD-style license
      5  *  that can be found in the LICENSE file in the root of the source
      6  *  tree. An additional intellectual property rights grant can be found
      7  *  in the file PATENTS. All contributing project authors may
      8  *  be found in the AUTHORS file in the root of the source tree.
      9  */
     10 
     11 #include "libyuv/scale.h"
     12 
     13 #include <assert.h>
     14 #include <string.h>
     15 
     16 #include "libyuv/cpu_id.h"
     17 #include "libyuv/planar_functions.h"  // For CopyPlane
     18 #include "libyuv/row.h"
     19 #include "libyuv/scale_row.h"
     20 
     21 #ifdef __cplusplus
     22 namespace libyuv {
     23 extern "C" {
     24 #endif
     25 
     26 // Remove this macro if OVERREAD is safe.
     27 #define AVOID_OVERREAD 1
     28 
     29 static __inline int Abs(int v) {
     30   return v >= 0 ? v : -v;
     31 }
     32 
     33 #define SUBSAMPLE(v, a, s) (v < 0) ? (-((-v + a) >> s)) : ((v + a) >> s)
     34 
     35 // Scale plane, 1/2
     36 // This is an optimized version for scaling down a plane to 1/2 of
     37 // its original size.
     38 
     39 static void ScalePlaneDown2(int src_width, int src_height,
     40                             int dst_width, int dst_height,
     41                             int src_stride, int dst_stride,
     42                             const uint8* src_ptr, uint8* dst_ptr,
     43                             enum FilterMode filtering) {
     44   int y;
     45   void (*ScaleRowDown2)(const uint8* src_ptr, ptrdiff_t src_stride,
     46                         uint8* dst_ptr, int dst_width) =
     47     filtering == kFilterNone ? ScaleRowDown2_C :
     48         (filtering == kFilterLinear ? ScaleRowDown2Linear_C :
     49         ScaleRowDown2Box_C);
     50   int row_stride = src_stride << 1;
     51   if (!filtering) {
     52     src_ptr += src_stride;  // Point to odd rows.
     53     src_stride = 0;
     54   }
     55 
     56 #if defined(HAS_SCALEROWDOWN2_NEON)
     57   if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(dst_width, 16)) {
     58     ScaleRowDown2 = filtering ? ScaleRowDown2Box_NEON : ScaleRowDown2_NEON;
     59   }
     60 #elif defined(HAS_SCALEROWDOWN2_SSE2)
     61   if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 16)) {
     62     ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_Unaligned_SSE2 :
     63         (filtering == kFilterLinear ? ScaleRowDown2Linear_Unaligned_SSE2 :
     64         ScaleRowDown2Box_Unaligned_SSE2);
     65     if (IS_ALIGNED(src_ptr, 16) &&
     66         IS_ALIGNED(src_stride, 16) && IS_ALIGNED(row_stride, 16) &&
     67         IS_ALIGNED(dst_ptr, 16) && IS_ALIGNED(dst_stride, 16)) {
     68       ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_SSE2 :
     69           (filtering == kFilterLinear ? ScaleRowDown2Linear_SSE2 :
     70           ScaleRowDown2Box_SSE2);
     71     }
     72   }
     73 #elif defined(HAS_SCALEROWDOWN2_MIPS_DSPR2)
     74   if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(src_ptr, 4) &&
     75       IS_ALIGNED(src_stride, 4) && IS_ALIGNED(row_stride, 4) &&
     76       IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) {
     77     ScaleRowDown2 = filtering ?
     78         ScaleRowDown2Box_MIPS_DSPR2 : ScaleRowDown2_MIPS_DSPR2;
     79   }
     80 #endif
     81 
     82   if (filtering == kFilterLinear) {
     83     src_stride = 0;
     84   }
     85   // TODO(fbarchard): Loop through source height to allow odd height.
     86   for (y = 0; y < dst_height; ++y) {
     87     ScaleRowDown2(src_ptr, src_stride, dst_ptr, dst_width);
     88     src_ptr += row_stride;
     89     dst_ptr += dst_stride;
     90   }
     91 }
     92 
     93 static void ScalePlaneDown2_16(int src_width, int src_height,
     94                                int dst_width, int dst_height,
     95                                int src_stride, int dst_stride,
     96                                const uint16* src_ptr, uint16* dst_ptr,
     97                                enum FilterMode filtering) {
     98   int y;
     99   void (*ScaleRowDown2)(const uint16* src_ptr, ptrdiff_t src_stride,
    100                         uint16* dst_ptr, int dst_width) =
    101     filtering == kFilterNone ? ScaleRowDown2_16_C :
    102         (filtering == kFilterLinear ? ScaleRowDown2Linear_16_C :
    103         ScaleRowDown2Box_16_C);
    104   int row_stride = src_stride << 1;
    105   if (!filtering) {
    106     src_ptr += src_stride;  // Point to odd rows.
    107     src_stride = 0;
    108   }
    109 
    110 #if defined(HAS_SCALEROWDOWN2_16_NEON)
    111   if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(dst_width, 16)) {
    112     ScaleRowDown2 = filtering ? ScaleRowDown2Box_16_NEON :
    113         ScaleRowDown2_16_NEON;
    114   }
    115 #elif defined(HAS_SCALEROWDOWN2_16_SSE2)
    116   if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 16)) {
    117     ScaleRowDown2 = filtering == kFilterNone ?
    118         ScaleRowDown2_Unaligned_16_SSE2 :
    119         (filtering == kFilterLinear ? ScaleRowDown2Linear_Unaligned_16_SSE2 :
    120         ScaleRowDown2Box_Unaligned_16_SSE2);
    121     if (IS_ALIGNED(src_ptr, 16) &&
    122         IS_ALIGNED(src_stride, 16) && IS_ALIGNED(row_stride, 16) &&
    123         IS_ALIGNED(dst_ptr, 16) && IS_ALIGNED(dst_stride, 16)) {
    124       ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_16_SSE2 :
    125           (filtering == kFilterLinear ? ScaleRowDown2Linear_16_SSE2 :
    126           ScaleRowDown2Box_16_SSE2);
    127     }
    128   }
    129 #elif defined(HAS_SCALEROWDOWN2_16_MIPS_DSPR2)
    130   if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(src_ptr, 4) &&
    131       IS_ALIGNED(src_stride, 4) && IS_ALIGNED(row_stride, 4) &&
    132       IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) {
    133     ScaleRowDown2 = filtering ?
    134         ScaleRowDown2Box_16_MIPS_DSPR2 : ScaleRowDown2_16_MIPS_DSPR2;
    135   }
    136 #endif
    137 
    138   if (filtering == kFilterLinear) {
    139     src_stride = 0;
    140   }
    141   // TODO(fbarchard): Loop through source height to allow odd height.
    142   for (y = 0; y < dst_height; ++y) {
    143     ScaleRowDown2(src_ptr, src_stride, dst_ptr, dst_width);
    144     src_ptr += row_stride;
    145     dst_ptr += dst_stride;
    146   }
    147 }
    148 
    149 // Scale plane, 1/4
    150 // This is an optimized version for scaling down a plane to 1/4 of
    151 // its original size.
    152 
    153 static void ScalePlaneDown4(int src_width, int src_height,
    154                             int dst_width, int dst_height,
    155                             int src_stride, int dst_stride,
    156                             const uint8* src_ptr, uint8* dst_ptr,
    157                             enum FilterMode filtering) {
    158   int y;
    159   void (*ScaleRowDown4)(const uint8* src_ptr, ptrdiff_t src_stride,
    160                         uint8* dst_ptr, int dst_width) =
    161       filtering ? ScaleRowDown4Box_C : ScaleRowDown4_C;
    162   int row_stride = src_stride << 2;
    163   if (!filtering) {
    164     src_ptr += src_stride * 2;  // Point to row 2.
    165     src_stride = 0;
    166   }
    167 #if defined(HAS_SCALEROWDOWN4_NEON)
    168   if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(dst_width, 8)) {
    169     ScaleRowDown4 = filtering ? ScaleRowDown4Box_NEON : ScaleRowDown4_NEON;
    170   }
    171 #elif defined(HAS_SCALEROWDOWN4_SSE2)
    172   if (TestCpuFlag(kCpuHasSSE2) &&
    173       IS_ALIGNED(dst_width, 8) && IS_ALIGNED(row_stride, 16) &&
    174       IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16)) {
    175     ScaleRowDown4 = filtering ? ScaleRowDown4Box_SSE2 : ScaleRowDown4_SSE2;
    176   }
    177 #elif defined(HAS_SCALEROWDOWN4_MIPS_DSPR2)
    178   if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(row_stride, 4) &&
    179       IS_ALIGNED(src_ptr, 4) && IS_ALIGNED(src_stride, 4) &&
    180       IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) {
    181     ScaleRowDown4 = filtering ?
    182         ScaleRowDown4Box_MIPS_DSPR2 : ScaleRowDown4_MIPS_DSPR2;
    183   }
    184 #endif
    185 
    186   if (filtering == kFilterLinear) {
    187     src_stride = 0;
    188   }
    189   for (y = 0; y < dst_height; ++y) {
    190     ScaleRowDown4(src_ptr, src_stride, dst_ptr, dst_width);
    191     src_ptr += row_stride;
    192     dst_ptr += dst_stride;
    193   }
    194 }
    195 
    196 static void ScalePlaneDown4_16(int src_width, int src_height,
    197                                int dst_width, int dst_height,
    198                                int src_stride, int dst_stride,
    199                                const uint16* src_ptr, uint16* dst_ptr,
    200                                enum FilterMode filtering) {
    201   int y;
    202   void (*ScaleRowDown4)(const uint16* src_ptr, ptrdiff_t src_stride,
    203                         uint16* dst_ptr, int dst_width) =
    204       filtering ? ScaleRowDown4Box_16_C : ScaleRowDown4_16_C;
    205   int row_stride = src_stride << 2;
    206   if (!filtering) {
    207     src_ptr += src_stride * 2;  // Point to row 2.
    208     src_stride = 0;
    209   }
    210 #if defined(HAS_SCALEROWDOWN4_16_NEON)
    211   if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(dst_width, 8)) {
    212     ScaleRowDown4 = filtering ? ScaleRowDown4Box_16_NEON :
    213         ScaleRowDown4_16_NEON;
    214   }
    215 #elif defined(HAS_SCALEROWDOWN4_16_SSE2)
    216   if (TestCpuFlag(kCpuHasSSE2) &&
    217       IS_ALIGNED(dst_width, 8) && IS_ALIGNED(row_stride, 16) &&
    218       IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16)) {
    219     ScaleRowDown4 = filtering ? ScaleRowDown4Box_16_SSE2 :
    220         ScaleRowDown4_16_SSE2;
    221   }
    222 #elif defined(HAS_SCALEROWDOWN4_16_MIPS_DSPR2)
    223   if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(row_stride, 4) &&
    224       IS_ALIGNED(src_ptr, 4) && IS_ALIGNED(src_stride, 4) &&
    225       IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) {
    226     ScaleRowDown4 = filtering ?
    227         ScaleRowDown4Box_16_MIPS_DSPR2 : ScaleRowDown4_16_MIPS_DSPR2;
    228   }
    229 #endif
    230 
    231   if (filtering == kFilterLinear) {
    232     src_stride = 0;
    233   }
    234   for (y = 0; y < dst_height; ++y) {
    235     ScaleRowDown4(src_ptr, src_stride, dst_ptr, dst_width);
    236     src_ptr += row_stride;
    237     dst_ptr += dst_stride;
    238   }
    239 }
    240 
    241 // Scale plane down, 3/4
    242 
    243 static void ScalePlaneDown34(int src_width, int src_height,
    244                              int dst_width, int dst_height,
    245                              int src_stride, int dst_stride,
    246                              const uint8* src_ptr, uint8* dst_ptr,
    247                              enum FilterMode filtering) {
    248   int y;
    249   void (*ScaleRowDown34_0)(const uint8* src_ptr, ptrdiff_t src_stride,
    250                            uint8* dst_ptr, int dst_width);
    251   void (*ScaleRowDown34_1)(const uint8* src_ptr, ptrdiff_t src_stride,
    252                            uint8* dst_ptr, int dst_width);
    253   const int filter_stride = (filtering == kFilterLinear) ? 0 : src_stride;
    254   assert(dst_width % 3 == 0);
    255   if (!filtering) {
    256     ScaleRowDown34_0 = ScaleRowDown34_C;
    257     ScaleRowDown34_1 = ScaleRowDown34_C;
    258   } else {
    259     ScaleRowDown34_0 = ScaleRowDown34_0_Box_C;
    260     ScaleRowDown34_1 = ScaleRowDown34_1_Box_C;
    261   }
    262 #if defined(HAS_SCALEROWDOWN34_NEON)
    263   if (TestCpuFlag(kCpuHasNEON) && (dst_width % 24 == 0)) {
    264     if (!filtering) {
    265       ScaleRowDown34_0 = ScaleRowDown34_NEON;
    266       ScaleRowDown34_1 = ScaleRowDown34_NEON;
    267     } else {
    268       ScaleRowDown34_0 = ScaleRowDown34_0_Box_NEON;
    269       ScaleRowDown34_1 = ScaleRowDown34_1_Box_NEON;
    270     }
    271   }
    272 #endif
    273 #if defined(HAS_SCALEROWDOWN34_SSSE3)
    274   if (TestCpuFlag(kCpuHasSSSE3) && (dst_width % 24 == 0) &&
    275       IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16)) {
    276     if (!filtering) {
    277       ScaleRowDown34_0 = ScaleRowDown34_SSSE3;
    278       ScaleRowDown34_1 = ScaleRowDown34_SSSE3;
    279     } else {
    280       ScaleRowDown34_0 = ScaleRowDown34_0_Box_SSSE3;
    281       ScaleRowDown34_1 = ScaleRowDown34_1_Box_SSSE3;
    282     }
    283   }
    284 #endif
    285 #if defined(HAS_SCALEROWDOWN34_MIPS_DSPR2)
    286   if (TestCpuFlag(kCpuHasMIPS_DSPR2) && (dst_width % 24 == 0) &&
    287       IS_ALIGNED(src_ptr, 4) && IS_ALIGNED(src_stride, 4) &&
    288       IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) {
    289     if (!filtering) {
    290       ScaleRowDown34_0 = ScaleRowDown34_MIPS_DSPR2;
    291       ScaleRowDown34_1 = ScaleRowDown34_MIPS_DSPR2;
    292     } else {
    293       ScaleRowDown34_0 = ScaleRowDown34_0_Box_MIPS_DSPR2;
    294       ScaleRowDown34_1 = ScaleRowDown34_1_Box_MIPS_DSPR2;
    295     }
    296   }
    297 #endif
    298 
    299   for (y = 0; y < dst_height - 2; y += 3) {
    300     ScaleRowDown34_0(src_ptr, filter_stride, dst_ptr, dst_width);
    301     src_ptr += src_stride;
    302     dst_ptr += dst_stride;
    303     ScaleRowDown34_1(src_ptr, filter_stride, dst_ptr, dst_width);
    304     src_ptr += src_stride;
    305     dst_ptr += dst_stride;
    306     ScaleRowDown34_0(src_ptr + src_stride, -filter_stride,
    307                      dst_ptr, dst_width);
    308     src_ptr += src_stride * 2;
    309     dst_ptr += dst_stride;
    310   }
    311 
    312   // Remainder 1 or 2 rows with last row vertically unfiltered
    313   if ((dst_height % 3) == 2) {
    314     ScaleRowDown34_0(src_ptr, filter_stride, dst_ptr, dst_width);
    315     src_ptr += src_stride;
    316     dst_ptr += dst_stride;
    317     ScaleRowDown34_1(src_ptr, 0, dst_ptr, dst_width);
    318   } else if ((dst_height % 3) == 1) {
    319     ScaleRowDown34_0(src_ptr, 0, dst_ptr, dst_width);
    320   }
    321 }
    322 
    323 static void ScalePlaneDown34_16(int src_width, int src_height,
    324                                 int dst_width, int dst_height,
    325                                 int src_stride, int dst_stride,
    326                                 const uint16* src_ptr, uint16* dst_ptr,
    327                                 enum FilterMode filtering) {
    328   int y;
    329   void (*ScaleRowDown34_0)(const uint16* src_ptr, ptrdiff_t src_stride,
    330                            uint16* dst_ptr, int dst_width);
    331   void (*ScaleRowDown34_1)(const uint16* src_ptr, ptrdiff_t src_stride,
    332                            uint16* dst_ptr, int dst_width);
    333   const int filter_stride = (filtering == kFilterLinear) ? 0 : src_stride;
    334   assert(dst_width % 3 == 0);
    335   if (!filtering) {
    336     ScaleRowDown34_0 = ScaleRowDown34_16_C;
    337     ScaleRowDown34_1 = ScaleRowDown34_16_C;
    338   } else {
    339     ScaleRowDown34_0 = ScaleRowDown34_0_Box_16_C;
    340     ScaleRowDown34_1 = ScaleRowDown34_1_Box_16_C;
    341   }
    342 #if defined(HAS_SCALEROWDOWN34_16_NEON)
    343   if (TestCpuFlag(kCpuHasNEON) && (dst_width % 24 == 0)) {
    344     if (!filtering) {
    345       ScaleRowDown34_0 = ScaleRowDown34_16_NEON;
    346       ScaleRowDown34_1 = ScaleRowDown34_16_NEON;
    347     } else {
    348       ScaleRowDown34_0 = ScaleRowDown34_0_Box_16_NEON;
    349       ScaleRowDown34_1 = ScaleRowDown34_1_Box_16_NEON;
    350     }
    351   }
    352 #endif
    353 #if defined(HAS_SCALEROWDOWN34_16_SSSE3)
    354   if (TestCpuFlag(kCpuHasSSSE3) && (dst_width % 24 == 0) &&
    355       IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16)) {
    356     if (!filtering) {
    357       ScaleRowDown34_0 = ScaleRowDown34_16_SSSE3;
    358       ScaleRowDown34_1 = ScaleRowDown34_16_SSSE3;
    359     } else {
    360       ScaleRowDown34_0 = ScaleRowDown34_0_Box_16_SSSE3;
    361       ScaleRowDown34_1 = ScaleRowDown34_1_Box_16_SSSE3;
    362     }
    363   }
    364 #endif
    365 #if defined(HAS_SCALEROWDOWN34_16_MIPS_DSPR2)
    366   if (TestCpuFlag(kCpuHasMIPS_DSPR2) && (dst_width % 24 == 0) &&
    367       IS_ALIGNED(src_ptr, 4) && IS_ALIGNED(src_stride, 4) &&
    368       IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) {
    369     if (!filtering) {
    370       ScaleRowDown34_0 = ScaleRowDown34_16_MIPS_DSPR2;
    371       ScaleRowDown34_1 = ScaleRowDown34_16_MIPS_DSPR2;
    372     } else {
    373       ScaleRowDown34_0 = ScaleRowDown34_0_Box_16_MIPS_DSPR2;
    374       ScaleRowDown34_1 = ScaleRowDown34_1_Box_16_MIPS_DSPR2;
    375     }
    376   }
    377 #endif
    378 
    379   for (y = 0; y < dst_height - 2; y += 3) {
    380     ScaleRowDown34_0(src_ptr, filter_stride, dst_ptr, dst_width);
    381     src_ptr += src_stride;
    382     dst_ptr += dst_stride;
    383     ScaleRowDown34_1(src_ptr, filter_stride, dst_ptr, dst_width);
    384     src_ptr += src_stride;
    385     dst_ptr += dst_stride;
    386     ScaleRowDown34_0(src_ptr + src_stride, -filter_stride,
    387                      dst_ptr, dst_width);
    388     src_ptr += src_stride * 2;
    389     dst_ptr += dst_stride;
    390   }
    391 
    392   // Remainder 1 or 2 rows with last row vertically unfiltered
    393   if ((dst_height % 3) == 2) {
    394     ScaleRowDown34_0(src_ptr, filter_stride, dst_ptr, dst_width);
    395     src_ptr += src_stride;
    396     dst_ptr += dst_stride;
    397     ScaleRowDown34_1(src_ptr, 0, dst_ptr, dst_width);
    398   } else if ((dst_height % 3) == 1) {
    399     ScaleRowDown34_0(src_ptr, 0, dst_ptr, dst_width);
    400   }
    401 }
    402 
    403 
    404 // Scale plane, 3/8
    405 // This is an optimized version for scaling down a plane to 3/8
    406 // of its original size.
    407 //
    408 // Uses box filter arranges like this
    409 // aaabbbcc -> abc
    410 // aaabbbcc    def
    411 // aaabbbcc    ghi
    412 // dddeeeff
    413 // dddeeeff
    414 // dddeeeff
    415 // ggghhhii
    416 // ggghhhii
    417 // Boxes are 3x3, 2x3, 3x2 and 2x2
    418 
    419 static void ScalePlaneDown38(int src_width, int src_height,
    420                              int dst_width, int dst_height,
    421                              int src_stride, int dst_stride,
    422                              const uint8* src_ptr, uint8* dst_ptr,
    423                              enum FilterMode filtering) {
    424   int y;
    425   void (*ScaleRowDown38_3)(const uint8* src_ptr, ptrdiff_t src_stride,
    426                            uint8* dst_ptr, int dst_width);
    427   void (*ScaleRowDown38_2)(const uint8* src_ptr, ptrdiff_t src_stride,
    428                            uint8* dst_ptr, int dst_width);
    429   const int filter_stride = (filtering == kFilterLinear) ? 0 : src_stride;
    430   assert(dst_width % 3 == 0);
    431   if (!filtering) {
    432     ScaleRowDown38_3 = ScaleRowDown38_C;
    433     ScaleRowDown38_2 = ScaleRowDown38_C;
    434   } else {
    435     ScaleRowDown38_3 = ScaleRowDown38_3_Box_C;
    436     ScaleRowDown38_2 = ScaleRowDown38_2_Box_C;
    437   }
    438 #if defined(HAS_SCALEROWDOWN38_NEON)
    439   if (TestCpuFlag(kCpuHasNEON) && (dst_width % 12 == 0)) {
    440     if (!filtering) {
    441       ScaleRowDown38_3 = ScaleRowDown38_NEON;
    442       ScaleRowDown38_2 = ScaleRowDown38_NEON;
    443     } else {
    444       ScaleRowDown38_3 = ScaleRowDown38_3_Box_NEON;
    445       ScaleRowDown38_2 = ScaleRowDown38_2_Box_NEON;
    446     }
    447   }
    448 #elif defined(HAS_SCALEROWDOWN38_SSSE3)
    449   if (TestCpuFlag(kCpuHasSSSE3) && (dst_width % 24 == 0) &&
    450       IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16)) {
    451     if (!filtering) {
    452       ScaleRowDown38_3 = ScaleRowDown38_SSSE3;
    453       ScaleRowDown38_2 = ScaleRowDown38_SSSE3;
    454     } else {
    455       ScaleRowDown38_3 = ScaleRowDown38_3_Box_SSSE3;
    456       ScaleRowDown38_2 = ScaleRowDown38_2_Box_SSSE3;
    457     }
    458   }
    459 #elif defined(HAS_SCALEROWDOWN38_MIPS_DSPR2)
    460   if (TestCpuFlag(kCpuHasMIPS_DSPR2) && (dst_width % 12 == 0) &&
    461       IS_ALIGNED(src_ptr, 4) && IS_ALIGNED(src_stride, 4) &&
    462       IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) {
    463     if (!filtering) {
    464       ScaleRowDown38_3 = ScaleRowDown38_MIPS_DSPR2;
    465       ScaleRowDown38_2 = ScaleRowDown38_MIPS_DSPR2;
    466     } else {
    467       ScaleRowDown38_3 = ScaleRowDown38_3_Box_MIPS_DSPR2;
    468       ScaleRowDown38_2 = ScaleRowDown38_2_Box_MIPS_DSPR2;
    469     }
    470   }
    471 #endif
    472 
    473   for (y = 0; y < dst_height - 2; y += 3) {
    474     ScaleRowDown38_3(src_ptr, filter_stride, dst_ptr, dst_width);
    475     src_ptr += src_stride * 3;
    476     dst_ptr += dst_stride;
    477     ScaleRowDown38_3(src_ptr, filter_stride, dst_ptr, dst_width);
    478     src_ptr += src_stride * 3;
    479     dst_ptr += dst_stride;
    480     ScaleRowDown38_2(src_ptr, filter_stride, dst_ptr, dst_width);
    481     src_ptr += src_stride * 2;
    482     dst_ptr += dst_stride;
    483   }
    484 
    485   // Remainder 1 or 2 rows with last row vertically unfiltered
    486   if ((dst_height % 3) == 2) {
    487     ScaleRowDown38_3(src_ptr, filter_stride, dst_ptr, dst_width);
    488     src_ptr += src_stride * 3;
    489     dst_ptr += dst_stride;
    490     ScaleRowDown38_3(src_ptr, 0, dst_ptr, dst_width);
    491   } else if ((dst_height % 3) == 1) {
    492     ScaleRowDown38_3(src_ptr, 0, dst_ptr, dst_width);
    493   }
    494 }
    495 
    496 static void ScalePlaneDown38_16(int src_width, int src_height,
    497                                 int dst_width, int dst_height,
    498                                 int src_stride, int dst_stride,
    499                                 const uint16* src_ptr, uint16* dst_ptr,
    500                                 enum FilterMode filtering) {
    501   int y;
    502   void (*ScaleRowDown38_3)(const uint16* src_ptr, ptrdiff_t src_stride,
    503                            uint16* dst_ptr, int dst_width);
    504   void (*ScaleRowDown38_2)(const uint16* src_ptr, ptrdiff_t src_stride,
    505                            uint16* dst_ptr, int dst_width);
    506   const int filter_stride = (filtering == kFilterLinear) ? 0 : src_stride;
    507   assert(dst_width % 3 == 0);
    508   if (!filtering) {
    509     ScaleRowDown38_3 = ScaleRowDown38_16_C;
    510     ScaleRowDown38_2 = ScaleRowDown38_16_C;
    511   } else {
    512     ScaleRowDown38_3 = ScaleRowDown38_3_Box_16_C;
    513     ScaleRowDown38_2 = ScaleRowDown38_2_Box_16_C;
    514   }
    515 #if defined(HAS_SCALEROWDOWN38_16_NEON)
    516   if (TestCpuFlag(kCpuHasNEON) && (dst_width % 12 == 0)) {
    517     if (!filtering) {
    518       ScaleRowDown38_3 = ScaleRowDown38_16_NEON;
    519       ScaleRowDown38_2 = ScaleRowDown38_16_NEON;
    520     } else {
    521       ScaleRowDown38_3 = ScaleRowDown38_3_Box_16_NEON;
    522       ScaleRowDown38_2 = ScaleRowDown38_2_Box_16_NEON;
    523     }
    524   }
    525 #elif defined(HAS_SCALEROWDOWN38_16_SSSE3)
    526   if (TestCpuFlag(kCpuHasSSSE3) && (dst_width % 24 == 0) &&
    527       IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16)) {
    528     if (!filtering) {
    529       ScaleRowDown38_3 = ScaleRowDown38_16_SSSE3;
    530       ScaleRowDown38_2 = ScaleRowDown38_16_SSSE3;
    531     } else {
    532       ScaleRowDown38_3 = ScaleRowDown38_3_Box_16_SSSE3;
    533       ScaleRowDown38_2 = ScaleRowDown38_2_Box_16_SSSE3;
    534     }
    535   }
    536 #elif defined(HAS_SCALEROWDOWN38_16_MIPS_DSPR2)
    537   if (TestCpuFlag(kCpuHasMIPS_DSPR2) && (dst_width % 12 == 0) &&
    538       IS_ALIGNED(src_ptr, 4) && IS_ALIGNED(src_stride, 4) &&
    539       IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) {
    540     if (!filtering) {
    541       ScaleRowDown38_3 = ScaleRowDown38_16_MIPS_DSPR2;
    542       ScaleRowDown38_2 = ScaleRowDown38_16_MIPS_DSPR2;
    543     } else {
    544       ScaleRowDown38_3 = ScaleRowDown38_3_Box_16_MIPS_DSPR2;
    545       ScaleRowDown38_2 = ScaleRowDown38_2_Box_16_MIPS_DSPR2;
    546     }
    547   }
    548 #endif
    549 
    550   for (y = 0; y < dst_height - 2; y += 3) {
    551     ScaleRowDown38_3(src_ptr, filter_stride, dst_ptr, dst_width);
    552     src_ptr += src_stride * 3;
    553     dst_ptr += dst_stride;
    554     ScaleRowDown38_3(src_ptr, filter_stride, dst_ptr, dst_width);
    555     src_ptr += src_stride * 3;
    556     dst_ptr += dst_stride;
    557     ScaleRowDown38_2(src_ptr, filter_stride, dst_ptr, dst_width);
    558     src_ptr += src_stride * 2;
    559     dst_ptr += dst_stride;
    560   }
    561 
    562   // Remainder 1 or 2 rows with last row vertically unfiltered
    563   if ((dst_height % 3) == 2) {
    564     ScaleRowDown38_3(src_ptr, filter_stride, dst_ptr, dst_width);
    565     src_ptr += src_stride * 3;
    566     dst_ptr += dst_stride;
    567     ScaleRowDown38_3(src_ptr, 0, dst_ptr, dst_width);
    568   } else if ((dst_height % 3) == 1) {
    569     ScaleRowDown38_3(src_ptr, 0, dst_ptr, dst_width);
    570   }
    571 }
    572 
    573 static __inline uint32 SumBox(int iboxwidth, int iboxheight,
    574                               ptrdiff_t src_stride, const uint8* src_ptr) {
    575   uint32 sum = 0u;
    576   int y;
    577   assert(iboxwidth > 0);
    578   assert(iboxheight > 0);
    579   for (y = 0; y < iboxheight; ++y) {
    580     int x;
    581     for (x = 0; x < iboxwidth; ++x) {
    582       sum += src_ptr[x];
    583     }
    584     src_ptr += src_stride;
    585   }
    586   return sum;
    587 }
    588 
    589 static __inline uint32 SumBox_16(int iboxwidth, int iboxheight,
    590                                  ptrdiff_t src_stride, const uint16* src_ptr) {
    591   uint32 sum = 0u;
    592   int y;
    593   assert(iboxwidth > 0);
    594   assert(iboxheight > 0);
    595   for (y = 0; y < iboxheight; ++y) {
    596     int x;
    597     for (x = 0; x < iboxwidth; ++x) {
    598       sum += src_ptr[x];
    599     }
    600     src_ptr += src_stride;
    601   }
    602   return sum;
    603 }
    604 
    605 static void ScalePlaneBoxRow_C(int dst_width, int boxheight,
    606                                int x, int dx, ptrdiff_t src_stride,
    607                                const uint8* src_ptr, uint8* dst_ptr) {
    608   int i;
    609   int boxwidth;
    610   for (i = 0; i < dst_width; ++i) {
    611     int ix = x >> 16;
    612     x += dx;
    613     boxwidth = (x >> 16) - ix;
    614     *dst_ptr++ = SumBox(boxwidth, boxheight, src_stride, src_ptr + ix) /
    615         (boxwidth * boxheight);
    616   }
    617 }
    618 
    619 static void ScalePlaneBoxRow_16_C(int dst_width, int boxheight,
    620                                   int x, int dx, ptrdiff_t src_stride,
    621                                   const uint16* src_ptr, uint16* dst_ptr) {
    622   int i;
    623   int boxwidth;
    624   for (i = 0; i < dst_width; ++i) {
    625     int ix = x >> 16;
    626     x += dx;
    627     boxwidth = (x >> 16) - ix;
    628     *dst_ptr++ = SumBox_16(boxwidth, boxheight, src_stride, src_ptr + ix) /
    629         (boxwidth * boxheight);
    630   }
    631 }
    632 
    633 static __inline uint32 SumPixels(int iboxwidth, const uint16* src_ptr) {
    634   uint32 sum = 0u;
    635   int x;
    636   assert(iboxwidth > 0);
    637   for (x = 0; x < iboxwidth; ++x) {
    638     sum += src_ptr[x];
    639   }
    640   return sum;
    641 }
    642 
    643 static __inline uint32 SumPixels_16(int iboxwidth, const uint32* src_ptr) {
    644   uint32 sum = 0u;
    645   int x;
    646   assert(iboxwidth > 0);
    647   for (x = 0; x < iboxwidth; ++x) {
    648     sum += src_ptr[x];
    649   }
    650   return sum;
    651 }
    652 
    653 static void ScaleAddCols2_C(int dst_width, int boxheight, int x, int dx,
    654                             const uint16* src_ptr, uint8* dst_ptr) {
    655   int i;
    656   int scaletbl[2];
    657   int minboxwidth = (dx >> 16);
    658   int* scaleptr = scaletbl - minboxwidth;
    659   int boxwidth;
    660   scaletbl[0] = 65536 / (minboxwidth * boxheight);
    661   scaletbl[1] = 65536 / ((minboxwidth + 1) * boxheight);
    662   for (i = 0; i < dst_width; ++i) {
    663     int ix = x >> 16;
    664     x += dx;
    665     boxwidth = (x >> 16) - ix;
    666     *dst_ptr++ = SumPixels(boxwidth, src_ptr + ix) * scaleptr[boxwidth] >> 16;
    667   }
    668 }
    669 
    670 static void ScaleAddCols2_16_C(int dst_width, int boxheight, int x, int dx,
    671                                const uint32* src_ptr, uint16* dst_ptr) {
    672   int i;
    673   int scaletbl[2];
    674   int minboxwidth = (dx >> 16);
    675   int* scaleptr = scaletbl - minboxwidth;
    676   int boxwidth;
    677   scaletbl[0] = 65536 / (minboxwidth * boxheight);
    678   scaletbl[1] = 65536 / ((minboxwidth + 1) * boxheight);
    679   for (i = 0; i < dst_width; ++i) {
    680     int ix = x >> 16;
    681     x += dx;
    682     boxwidth = (x >> 16) - ix;
    683     *dst_ptr++ = SumPixels_16(boxwidth, src_ptr + ix) *
    684         scaleptr[boxwidth] >> 16;
    685   }
    686 }
    687 
    688 static void ScaleAddCols1_C(int dst_width, int boxheight, int x, int dx,
    689                             const uint16* src_ptr, uint8* dst_ptr) {
    690   int boxwidth = (dx >> 16);
    691   int scaleval = 65536 / (boxwidth * boxheight);
    692   int i;
    693   for (i = 0; i < dst_width; ++i) {
    694     *dst_ptr++ = SumPixels(boxwidth, src_ptr + x) * scaleval >> 16;
    695     x += boxwidth;
    696   }
    697 }
    698 
    699 static void ScaleAddCols1_16_C(int dst_width, int boxheight, int x, int dx,
    700                                const uint32* src_ptr, uint16* dst_ptr) {
    701   int boxwidth = (dx >> 16);
    702   int scaleval = 65536 / (boxwidth * boxheight);
    703   int i;
    704   for (i = 0; i < dst_width; ++i) {
    705     *dst_ptr++ = SumPixels_16(boxwidth, src_ptr + x) * scaleval >> 16;
    706     x += boxwidth;
    707   }
    708 }
    709 
    710 // Scale plane down to any dimensions, with interpolation.
    711 // (boxfilter).
    712 //
    713 // Same method as SimpleScale, which is fixed point, outputting
    714 // one pixel of destination using fixed point (16.16) to step
    715 // through source, sampling a box of pixel with simple
    716 // averaging.
    717 static void ScalePlaneBox(int src_width, int src_height,
    718                           int dst_width, int dst_height,
    719                           int src_stride, int dst_stride,
    720                           const uint8* src_ptr, uint8* dst_ptr) {
    721   int j;
    722   // Initial source x/y coordinate and step values as 16.16 fixed point.
    723   int x = 0;
    724   int y = 0;
    725   int dx = 0;
    726   int dy = 0;
    727   const int max_y = (src_height << 16);
    728   ScaleSlope(src_width, src_height, dst_width, dst_height, kFilterBox,
    729              &x, &y, &dx, &dy);
    730   src_width = Abs(src_width);
    731   // TODO(fbarchard): Remove this and make AddRows handle boxheight 1.
    732   if (!IS_ALIGNED(src_width, 16) || dst_height * 2 > src_height) {
    733     uint8* dst = dst_ptr;
    734     int j;
    735     for (j = 0; j < dst_height; ++j) {
    736       int boxheight;
    737       int iy = y >> 16;
    738       const uint8* src = src_ptr + iy * src_stride;
    739       y += dy;
    740       if (y > max_y) {
    741         y = max_y;
    742       }
    743       boxheight = (y >> 16) - iy;
    744       ScalePlaneBoxRow_C(dst_width, boxheight,
    745                          x, dx, src_stride,
    746                          src, dst);
    747       dst += dst_stride;
    748     }
    749     return;
    750   }
    751   {
    752     // Allocate a row buffer of uint16.
    753     align_buffer_64(row16, src_width * 2);
    754     void (*ScaleAddCols)(int dst_width, int boxheight, int x, int dx,
    755         const uint16* src_ptr, uint8* dst_ptr) =
    756         (dx & 0xffff) ? ScaleAddCols2_C: ScaleAddCols1_C;
    757     void (*ScaleAddRows)(const uint8* src_ptr, ptrdiff_t src_stride,
    758         uint16* dst_ptr, int src_width, int src_height) = ScaleAddRows_C;
    759 
    760 #if defined(HAS_SCALEADDROWS_SSE2)
    761     if (TestCpuFlag(kCpuHasSSE2) &&
    762 #ifdef AVOID_OVERREAD
    763         IS_ALIGNED(src_width, 16) &&
    764 #endif
    765         IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16)) {
    766       ScaleAddRows = ScaleAddRows_SSE2;
    767     }
    768 #endif
    769 
    770     for (j = 0; j < dst_height; ++j) {
    771       int boxheight;
    772       int iy = y >> 16;
    773       const uint8* src = src_ptr + iy * src_stride;
    774       y += dy;
    775       if (y > (src_height << 16)) {
    776         y = (src_height << 16);
    777       }
    778       boxheight = (y >> 16) - iy;
    779       ScaleAddRows(src, src_stride, (uint16*)(row16),
    780                  src_width, boxheight);
    781       ScaleAddCols(dst_width, boxheight, x, dx, (uint16*)(row16),
    782                  dst_ptr);
    783       dst_ptr += dst_stride;
    784     }
    785     free_aligned_buffer_64(row16);
    786   }
    787 }
    788 
    789 static void ScalePlaneBox_16(int src_width, int src_height,
    790                              int dst_width, int dst_height,
    791                              int src_stride, int dst_stride,
    792                              const uint16* src_ptr, uint16* dst_ptr) {
    793   int j;
    794   // Initial source x/y coordinate and step values as 16.16 fixed point.
    795   int x = 0;
    796   int y = 0;
    797   int dx = 0;
    798   int dy = 0;
    799   const int max_y = (src_height << 16);
    800   ScaleSlope(src_width, src_height, dst_width, dst_height, kFilterBox,
    801              &x, &y, &dx, &dy);
    802   src_width = Abs(src_width);
    803   // TODO(fbarchard): Remove this and make AddRows handle boxheight 1.
    804   if (!IS_ALIGNED(src_width, 16) || dst_height * 2 > src_height) {
    805     uint16* dst = dst_ptr;
    806     int j;
    807     for (j = 0; j < dst_height; ++j) {
    808       int boxheight;
    809       int iy = y >> 16;
    810       const uint16* src = src_ptr + iy * src_stride;
    811       y += dy;
    812       if (y > max_y) {
    813         y = max_y;
    814       }
    815       boxheight = (y >> 16) - iy;
    816       ScalePlaneBoxRow_16_C(dst_width, boxheight,
    817                             x, dx, src_stride,
    818                             src, dst);
    819       dst += dst_stride;
    820     }
    821     return;
    822   }
    823   {
    824     // Allocate a row buffer of uint32.
    825     align_buffer_64(row32, src_width * 4);
    826     void (*ScaleAddCols)(int dst_width, int boxheight, int x, int dx,
    827         const uint32* src_ptr, uint16* dst_ptr) =
    828         (dx & 0xffff) ? ScaleAddCols2_16_C: ScaleAddCols1_16_C;
    829     void (*ScaleAddRows)(const uint16* src_ptr, ptrdiff_t src_stride,
    830         uint32* dst_ptr, int src_width, int src_height) = ScaleAddRows_16_C;
    831 
    832 #if defined(HAS_SCALEADDROWS_16_SSE2)
    833     if (TestCpuFlag(kCpuHasSSE2) &&
    834 #ifdef AVOID_OVERREAD
    835         IS_ALIGNED(src_width, 16) &&
    836 #endif
    837         IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16)) {
    838       ScaleAddRows = ScaleAddRows_16_SSE2;
    839     }
    840 #endif
    841 
    842     for (j = 0; j < dst_height; ++j) {
    843       int boxheight;
    844       int iy = y >> 16;
    845       const uint16* src = src_ptr + iy * src_stride;
    846       y += dy;
    847       if (y > (src_height << 16)) {
    848         y = (src_height << 16);
    849       }
    850       boxheight = (y >> 16) - iy;
    851       ScaleAddRows(src, src_stride, (uint32*)(row32),
    852                  src_width, boxheight);
    853       ScaleAddCols(dst_width, boxheight, x, dx, (uint32*)(row32),
    854                  dst_ptr);
    855       dst_ptr += dst_stride;
    856     }
    857     free_aligned_buffer_64(row32);
    858   }
    859 }
    860 
    861 // Scale plane down with bilinear interpolation.
    862 void ScalePlaneBilinearDown(int src_width, int src_height,
    863                             int dst_width, int dst_height,
    864                             int src_stride, int dst_stride,
    865                             const uint8* src_ptr, uint8* dst_ptr,
    866                             enum FilterMode filtering) {
    867   // Initial source x/y coordinate and step values as 16.16 fixed point.
    868   int x = 0;
    869   int y = 0;
    870   int dx = 0;
    871   int dy = 0;
    872   // TODO(fbarchard): Consider not allocating row buffer for kFilterLinear.
    873   // Allocate a row buffer.
    874   align_buffer_64(row, src_width);
    875 
    876   const int max_y = (src_height - 1) << 16;
    877   int j;
    878   void (*ScaleFilterCols)(uint8* dst_ptr, const uint8* src_ptr,
    879       int dst_width, int x, int dx) =
    880       (src_width >= 32768) ? ScaleFilterCols64_C : ScaleFilterCols_C;
    881   void (*InterpolateRow)(uint8* dst_ptr, const uint8* src_ptr,
    882       ptrdiff_t src_stride, int dst_width, int source_y_fraction) =
    883       InterpolateRow_C;
    884   ScaleSlope(src_width, src_height, dst_width, dst_height, filtering,
    885              &x, &y, &dx, &dy);
    886   src_width = Abs(src_width);
    887 
    888 #if defined(HAS_INTERPOLATEROW_SSE2)
    889   if (TestCpuFlag(kCpuHasSSE2) && src_width >= 16) {
    890     InterpolateRow = InterpolateRow_Any_SSE2;
    891     if (IS_ALIGNED(src_width, 16)) {
    892       InterpolateRow = InterpolateRow_Unaligned_SSE2;
    893       if (IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16)) {
    894         InterpolateRow = InterpolateRow_SSE2;
    895       }
    896     }
    897   }
    898 #endif
    899 #if defined(HAS_INTERPOLATEROW_SSSE3)
    900   if (TestCpuFlag(kCpuHasSSSE3) && src_width >= 16) {
    901     InterpolateRow = InterpolateRow_Any_SSSE3;
    902     if (IS_ALIGNED(src_width, 16)) {
    903       InterpolateRow = InterpolateRow_Unaligned_SSSE3;
    904       if (IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16)) {
    905         InterpolateRow = InterpolateRow_SSSE3;
    906       }
    907     }
    908   }
    909 #endif
    910 #if defined(HAS_INTERPOLATEROW_AVX2)
    911   if (TestCpuFlag(kCpuHasAVX2) && src_width >= 32) {
    912     InterpolateRow = InterpolateRow_Any_AVX2;
    913     if (IS_ALIGNED(src_width, 32)) {
    914       InterpolateRow = InterpolateRow_AVX2;
    915     }
    916   }
    917 #endif
    918 #if defined(HAS_INTERPOLATEROW_NEON)
    919   if (TestCpuFlag(kCpuHasNEON) && src_width >= 16) {
    920     InterpolateRow = InterpolateRow_Any_NEON;
    921     if (IS_ALIGNED(src_width, 16)) {
    922       InterpolateRow = InterpolateRow_NEON;
    923     }
    924   }
    925 #endif
    926 #if defined(HAS_INTERPOLATEROW_MIPS_DSPR2)
    927   if (TestCpuFlag(kCpuHasMIPS_DSPR2) && src_width >= 4) {
    928     InterpolateRow = InterpolateRow_Any_MIPS_DSPR2;
    929     if (IS_ALIGNED(src_width, 4)) {
    930       InterpolateRow = InterpolateRow_MIPS_DSPR2;
    931     }
    932   }
    933 #endif
    934 
    935 
    936 #if defined(HAS_SCALEFILTERCOLS_SSSE3)
    937   if (TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
    938     ScaleFilterCols = ScaleFilterCols_SSSE3;
    939   }
    940 #endif
    941   if (y > max_y) {
    942     y = max_y;
    943   }
    944 
    945   for (j = 0; j < dst_height; ++j) {
    946     int yi = y >> 16;
    947     const uint8* src = src_ptr + yi * src_stride;
    948     if (filtering == kFilterLinear) {
    949       ScaleFilterCols(dst_ptr, src, dst_width, x, dx);
    950     } else {
    951       int yf = (y >> 8) & 255;
    952       InterpolateRow(row, src, src_stride, src_width, yf);
    953       ScaleFilterCols(dst_ptr, row, dst_width, x, dx);
    954     }
    955     dst_ptr += dst_stride;
    956     y += dy;
    957     if (y > max_y) {
    958       y = max_y;
    959     }
    960   }
    961   free_aligned_buffer_64(row);
    962 }
    963 
    964 void ScalePlaneBilinearDown_16(int src_width, int src_height,
    965                                int dst_width, int dst_height,
    966                                int src_stride, int dst_stride,
    967                                const uint16* src_ptr, uint16* dst_ptr,
    968                                enum FilterMode filtering) {
    969   // Initial source x/y coordinate and step values as 16.16 fixed point.
    970   int x = 0;
    971   int y = 0;
    972   int dx = 0;
    973   int dy = 0;
    974   // TODO(fbarchard): Consider not allocating row buffer for kFilterLinear.
    975   // Allocate a row buffer.
    976   align_buffer_64(row, src_width * 2);
    977 
    978   const int max_y = (src_height - 1) << 16;
    979   int j;
    980   void (*ScaleFilterCols)(uint16* dst_ptr, const uint16* src_ptr,
    981       int dst_width, int x, int dx) =
    982       (src_width >= 32768) ? ScaleFilterCols64_16_C : ScaleFilterCols_16_C;
    983   void (*InterpolateRow)(uint16* dst_ptr, const uint16* src_ptr,
    984       ptrdiff_t src_stride, int dst_width, int source_y_fraction) =
    985       InterpolateRow_16_C;
    986   ScaleSlope(src_width, src_height, dst_width, dst_height, filtering,
    987              &x, &y, &dx, &dy);
    988   src_width = Abs(src_width);
    989 
    990 #if defined(HAS_INTERPOLATEROW_16_SSE2)
    991   if (TestCpuFlag(kCpuHasSSE2) && src_width >= 16) {
    992     InterpolateRow = InterpolateRow_Any_16_SSE2;
    993     if (IS_ALIGNED(src_width, 16)) {
    994       InterpolateRow = InterpolateRow_Unaligned_16_SSE2;
    995       if (IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16)) {
    996         InterpolateRow = InterpolateRow_16_SSE2;
    997       }
    998     }
    999   }
   1000 #endif
   1001 #if defined(HAS_INTERPOLATEROW_16_SSSE3)
   1002   if (TestCpuFlag(kCpuHasSSSE3) && src_width >= 16) {
   1003     InterpolateRow = InterpolateRow_Any_16_SSSE3;
   1004     if (IS_ALIGNED(src_width, 16)) {
   1005       InterpolateRow = InterpolateRow_Unaligned_16_SSSE3;
   1006       if (IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16)) {
   1007         InterpolateRow = InterpolateRow_16_SSSE3;
   1008       }
   1009     }
   1010   }
   1011 #endif
   1012 #if defined(HAS_INTERPOLATEROW_16_AVX2)
   1013   if (TestCpuFlag(kCpuHasAVX2) && src_width >= 32) {
   1014     InterpolateRow = InterpolateRow_Any_16_AVX2;
   1015     if (IS_ALIGNED(src_width, 32)) {
   1016       InterpolateRow = InterpolateRow_16_AVX2;
   1017     }
   1018   }
   1019 #endif
   1020 #if defined(HAS_INTERPOLATEROW_16_NEON)
   1021   if (TestCpuFlag(kCpuHasNEON) && src_width >= 16) {
   1022     InterpolateRow = InterpolateRow_Any_16_NEON;
   1023     if (IS_ALIGNED(src_width, 16)) {
   1024       InterpolateRow = InterpolateRow_16_NEON;
   1025     }
   1026   }
   1027 #endif
   1028 #if defined(HAS_INTERPOLATEROW_16_MIPS_DSPR2)
   1029   if (TestCpuFlag(kCpuHasMIPS_DSPR2) && src_width >= 4) {
   1030     InterpolateRow = InterpolateRow_Any_16_MIPS_DSPR2;
   1031     if (IS_ALIGNED(src_width, 4)) {
   1032       InterpolateRow = InterpolateRow_16_MIPS_DSPR2;
   1033     }
   1034   }
   1035 #endif
   1036 
   1037 
   1038 #if defined(HAS_SCALEFILTERCOLS_16_SSSE3)
   1039   if (TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
   1040     ScaleFilterCols = ScaleFilterCols_16_SSSE3;
   1041   }
   1042 #endif
   1043   if (y > max_y) {
   1044     y = max_y;
   1045   }
   1046 
   1047   for (j = 0; j < dst_height; ++j) {
   1048     int yi = y >> 16;
   1049     const uint16* src = src_ptr + yi * src_stride;
   1050     if (filtering == kFilterLinear) {
   1051       ScaleFilterCols(dst_ptr, src, dst_width, x, dx);
   1052     } else {
   1053       int yf = (y >> 8) & 255;
   1054       InterpolateRow((uint16*)row, src, src_stride, src_width, yf);
   1055       ScaleFilterCols(dst_ptr, (uint16*)row, dst_width, x, dx);
   1056     }
   1057     dst_ptr += dst_stride;
   1058     y += dy;
   1059     if (y > max_y) {
   1060       y = max_y;
   1061     }
   1062   }
   1063   free_aligned_buffer_64(row);
   1064 }
   1065 
   1066 // Scale up down with bilinear interpolation.
   1067 void ScalePlaneBilinearUp(int src_width, int src_height,
   1068                           int dst_width, int dst_height,
   1069                           int src_stride, int dst_stride,
   1070                           const uint8* src_ptr, uint8* dst_ptr,
   1071                           enum FilterMode filtering) {
   1072   int j;
   1073   // Initial source x/y coordinate and step values as 16.16 fixed point.
   1074   int x = 0;
   1075   int y = 0;
   1076   int dx = 0;
   1077   int dy = 0;
   1078   const int max_y = (src_height - 1) << 16;
   1079   void (*InterpolateRow)(uint8* dst_ptr, const uint8* src_ptr,
   1080       ptrdiff_t src_stride, int dst_width, int source_y_fraction) =
   1081       InterpolateRow_C;
   1082   void (*ScaleFilterCols)(uint8* dst_ptr, const uint8* src_ptr,
   1083        int dst_width, int x, int dx) =
   1084        filtering ? ScaleFilterCols_C : ScaleCols_C;
   1085   ScaleSlope(src_width, src_height, dst_width, dst_height, filtering,
   1086              &x, &y, &dx, &dy);
   1087   src_width = Abs(src_width);
   1088 
   1089 #if defined(HAS_INTERPOLATEROW_SSE2)
   1090   if (TestCpuFlag(kCpuHasSSE2) && dst_width >= 16) {
   1091     InterpolateRow = InterpolateRow_Any_SSE2;
   1092     if (IS_ALIGNED(dst_width, 16)) {
   1093       InterpolateRow = InterpolateRow_Unaligned_SSE2;
   1094       if (IS_ALIGNED(dst_ptr, 16) && IS_ALIGNED(dst_stride, 16)) {
   1095         InterpolateRow = InterpolateRow_SSE2;
   1096       }
   1097     }
   1098   }
   1099 #endif
   1100 #if defined(HAS_INTERPOLATEROW_SSSE3)
   1101   if (TestCpuFlag(kCpuHasSSSE3) && dst_width >= 16) {
   1102     InterpolateRow = InterpolateRow_Any_SSSE3;
   1103     if (IS_ALIGNED(dst_width, 16)) {
   1104       InterpolateRow = InterpolateRow_Unaligned_SSSE3;
   1105       if (IS_ALIGNED(dst_ptr, 16) && IS_ALIGNED(dst_stride, 16)) {
   1106         InterpolateRow = InterpolateRow_SSSE3;
   1107       }
   1108     }
   1109   }
   1110 #endif
   1111 #if defined(HAS_INTERPOLATEROW_AVX2)
   1112   if (TestCpuFlag(kCpuHasAVX2) && dst_width >= 32) {
   1113     InterpolateRow = InterpolateRow_Any_AVX2;
   1114     if (IS_ALIGNED(dst_width, 32)) {
   1115       InterpolateRow = InterpolateRow_AVX2;
   1116     }
   1117   }
   1118 #endif
   1119 #if defined(HAS_INTERPOLATEROW_NEON)
   1120   if (TestCpuFlag(kCpuHasNEON) && dst_width >= 16) {
   1121     InterpolateRow = InterpolateRow_Any_NEON;
   1122     if (IS_ALIGNED(dst_width, 16)) {
   1123       InterpolateRow = InterpolateRow_NEON;
   1124     }
   1125   }
   1126 #endif
   1127 #if defined(HAS_INTERPOLATEROW_MIPS_DSPR2)
   1128   if (TestCpuFlag(kCpuHasMIPS_DSPR2) && dst_width >= 4) {
   1129     InterpolateRow = InterpolateRow_Any_MIPS_DSPR2;
   1130     if (IS_ALIGNED(dst_width, 4)) {
   1131       InterpolateRow = InterpolateRow_MIPS_DSPR2;
   1132     }
   1133   }
   1134 #endif
   1135 
   1136   if (filtering && src_width >= 32768) {
   1137     ScaleFilterCols = ScaleFilterCols64_C;
   1138   }
   1139 #if defined(HAS_SCALEFILTERCOLS_SSSE3)
   1140   if (filtering && TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
   1141     ScaleFilterCols = ScaleFilterCols_SSSE3;
   1142   }
   1143 #endif
   1144   if (!filtering && src_width * 2 == dst_width && x < 0x8000) {
   1145     ScaleFilterCols = ScaleColsUp2_C;
   1146 #if defined(HAS_SCALECOLS_SSE2)
   1147     if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8) &&
   1148         IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16) &&
   1149         IS_ALIGNED(dst_ptr, 16) && IS_ALIGNED(dst_stride, 16)) {
   1150       ScaleFilterCols = ScaleColsUp2_SSE2;
   1151     }
   1152 #endif
   1153   }
   1154 
   1155   if (y > max_y) {
   1156     y = max_y;
   1157   }
   1158   {
   1159     int yi = y >> 16;
   1160     const uint8* src = src_ptr + yi * src_stride;
   1161 
   1162     // Allocate 2 row buffers.
   1163     const int kRowSize = (dst_width + 15) & ~15;
   1164     align_buffer_64(row, kRowSize * 2);
   1165 
   1166     uint8* rowptr = row;
   1167     int rowstride = kRowSize;
   1168     int lasty = yi;
   1169 
   1170     ScaleFilterCols(rowptr, src, dst_width, x, dx);
   1171     if (src_height > 1) {
   1172       src += src_stride;
   1173     }
   1174     ScaleFilterCols(rowptr + rowstride, src, dst_width, x, dx);
   1175     src += src_stride;
   1176 
   1177     for (j = 0; j < dst_height; ++j) {
   1178       yi = y >> 16;
   1179       if (yi != lasty) {
   1180         if (y > max_y) {
   1181           y = max_y;
   1182           yi = y >> 16;
   1183           src = src_ptr + yi * src_stride;
   1184         }
   1185         if (yi != lasty) {
   1186           ScaleFilterCols(rowptr, src, dst_width, x, dx);
   1187           rowptr += rowstride;
   1188           rowstride = -rowstride;
   1189           lasty = yi;
   1190           src += src_stride;
   1191         }
   1192       }
   1193       if (filtering == kFilterLinear) {
   1194         InterpolateRow(dst_ptr, rowptr, 0, dst_width, 0);
   1195       } else {
   1196         int yf = (y >> 8) & 255;
   1197         InterpolateRow(dst_ptr, rowptr, rowstride, dst_width, yf);
   1198       }
   1199       dst_ptr += dst_stride;
   1200       y += dy;
   1201     }
   1202     free_aligned_buffer_64(row);
   1203   }
   1204 }
   1205 
   1206 void ScalePlaneBilinearUp_16(int src_width, int src_height,
   1207                              int dst_width, int dst_height,
   1208                              int src_stride, int dst_stride,
   1209                              const uint16* src_ptr, uint16* dst_ptr,
   1210                              enum FilterMode filtering) {
   1211   int j;
   1212   // Initial source x/y coordinate and step values as 16.16 fixed point.
   1213   int x = 0;
   1214   int y = 0;
   1215   int dx = 0;
   1216   int dy = 0;
   1217   const int max_y = (src_height - 1) << 16;
   1218   void (*InterpolateRow)(uint16* dst_ptr, const uint16* src_ptr,
   1219       ptrdiff_t src_stride, int dst_width, int source_y_fraction) =
   1220       InterpolateRow_16_C;
   1221   void (*ScaleFilterCols)(uint16* dst_ptr, const uint16* src_ptr,
   1222        int dst_width, int x, int dx) =
   1223        filtering ? ScaleFilterCols_16_C : ScaleCols_16_C;
   1224   ScaleSlope(src_width, src_height, dst_width, dst_height, filtering,
   1225              &x, &y, &dx, &dy);
   1226   src_width = Abs(src_width);
   1227 
   1228 #if defined(HAS_INTERPOLATEROW_16_SSE2)
   1229   if (TestCpuFlag(kCpuHasSSE2) && dst_width >= 16) {
   1230     InterpolateRow = InterpolateRow_Any_16_SSE2;
   1231     if (IS_ALIGNED(dst_width, 16)) {
   1232       InterpolateRow = InterpolateRow_Unaligned_16_SSE2;
   1233       if (IS_ALIGNED(dst_ptr, 16) && IS_ALIGNED(dst_stride, 16)) {
   1234         InterpolateRow = InterpolateRow_16_SSE2;
   1235       }
   1236     }
   1237   }
   1238 #endif
   1239 #if defined(HAS_INTERPOLATEROW_16_SSSE3)
   1240   if (TestCpuFlag(kCpuHasSSSE3) && dst_width >= 16) {
   1241     InterpolateRow = InterpolateRow_Any_16_SSSE3;
   1242     if (IS_ALIGNED(dst_width, 16)) {
   1243       InterpolateRow = InterpolateRow_Unaligned_16_SSSE3;
   1244       if (IS_ALIGNED(dst_ptr, 16) && IS_ALIGNED(dst_stride, 16)) {
   1245         InterpolateRow = InterpolateRow_16_SSSE3;
   1246       }
   1247     }
   1248   }
   1249 #endif
   1250 #if defined(HAS_INTERPOLATEROW_16_AVX2)
   1251   if (TestCpuFlag(kCpuHasAVX2) && dst_width >= 32) {
   1252     InterpolateRow = InterpolateRow_Any_16_AVX2;
   1253     if (IS_ALIGNED(dst_width, 32)) {
   1254       InterpolateRow = InterpolateRow_16_AVX2;
   1255     }
   1256   }
   1257 #endif
   1258 #if defined(HAS_INTERPOLATEROW_16_NEON)
   1259   if (TestCpuFlag(kCpuHasNEON) && dst_width >= 16) {
   1260     InterpolateRow = InterpolateRow_Any_16_NEON;
   1261     if (IS_ALIGNED(dst_width, 16)) {
   1262       InterpolateRow = InterpolateRow_16_NEON;
   1263     }
   1264   }
   1265 #endif
   1266 #if defined(HAS_INTERPOLATEROW_16_MIPS_DSPR2)
   1267   if (TestCpuFlag(kCpuHasMIPS_DSPR2) && dst_width >= 4) {
   1268     InterpolateRow = InterpolateRow_Any_16_MIPS_DSPR2;
   1269     if (IS_ALIGNED(dst_width, 4)) {
   1270       InterpolateRow = InterpolateRow_16_MIPS_DSPR2;
   1271     }
   1272   }
   1273 #endif
   1274 
   1275   if (filtering && src_width >= 32768) {
   1276     ScaleFilterCols = ScaleFilterCols64_16_C;
   1277   }
   1278 #if defined(HAS_SCALEFILTERCOLS_16_SSSE3)
   1279   if (filtering && TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
   1280     ScaleFilterCols = ScaleFilterCols_16_SSSE3;
   1281   }
   1282 #endif
   1283   if (!filtering && src_width * 2 == dst_width && x < 0x8000) {
   1284     ScaleFilterCols = ScaleColsUp2_16_C;
   1285 #if defined(HAS_SCALECOLS_16_SSE2)
   1286     if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8) &&
   1287         IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16) &&
   1288         IS_ALIGNED(dst_ptr, 16) && IS_ALIGNED(dst_stride, 16)) {
   1289       ScaleFilterCols = ScaleColsUp2_16_SSE2;
   1290     }
   1291 #endif
   1292   }
   1293 
   1294   if (y > max_y) {
   1295     y = max_y;
   1296   }
   1297   {
   1298     int yi = y >> 16;
   1299     const uint16* src = src_ptr + yi * src_stride;
   1300 
   1301     // Allocate 2 row buffers.
   1302     const int kRowSize = (dst_width + 15) & ~15;
   1303     align_buffer_64(row, kRowSize * 4);
   1304 
   1305     uint16* rowptr = (uint16*)row;
   1306     int rowstride = kRowSize;
   1307     int lasty = yi;
   1308 
   1309     ScaleFilterCols(rowptr, src, dst_width, x, dx);
   1310     if (src_height > 1) {
   1311       src += src_stride;
   1312     }
   1313     ScaleFilterCols(rowptr + rowstride, src, dst_width, x, dx);
   1314     src += src_stride;
   1315 
   1316     for (j = 0; j < dst_height; ++j) {
   1317       yi = y >> 16;
   1318       if (yi != lasty) {
   1319         if (y > max_y) {
   1320           y = max_y;
   1321           yi = y >> 16;
   1322           src = src_ptr + yi * src_stride;
   1323         }
   1324         if (yi != lasty) {
   1325           ScaleFilterCols(rowptr, src, dst_width, x, dx);
   1326           rowptr += rowstride;
   1327           rowstride = -rowstride;
   1328           lasty = yi;
   1329           src += src_stride;
   1330         }
   1331       }
   1332       if (filtering == kFilterLinear) {
   1333         InterpolateRow(dst_ptr, rowptr, 0, dst_width, 0);
   1334       } else {
   1335         int yf = (y >> 8) & 255;
   1336         InterpolateRow(dst_ptr, rowptr, rowstride, dst_width, yf);
   1337       }
   1338       dst_ptr += dst_stride;
   1339       y += dy;
   1340     }
   1341     free_aligned_buffer_64(row);
   1342   }
   1343 }
   1344 
   1345 // Scale Plane to/from any dimensions, without interpolation.
   1346 // Fixed point math is used for performance: The upper 16 bits
   1347 // of x and dx is the integer part of the source position and
   1348 // the lower 16 bits are the fixed decimal part.
   1349 
   1350 static void ScalePlaneSimple(int src_width, int src_height,
   1351                              int dst_width, int dst_height,
   1352                              int src_stride, int dst_stride,
   1353                              const uint8* src_ptr, uint8* dst_ptr) {
   1354   int i;
   1355   void (*ScaleCols)(uint8* dst_ptr, const uint8* src_ptr,
   1356       int dst_width, int x, int dx) = ScaleCols_C;
   1357   // Initial source x/y coordinate and step values as 16.16 fixed point.
   1358   int x = 0;
   1359   int y = 0;
   1360   int dx = 0;
   1361   int dy = 0;
   1362   ScaleSlope(src_width, src_height, dst_width, dst_height, kFilterNone,
   1363              &x, &y, &dx, &dy);
   1364   src_width = Abs(src_width);
   1365 
   1366   if (src_width * 2 == dst_width && x < 0x8000) {
   1367     ScaleCols = ScaleColsUp2_C;
   1368 #if defined(HAS_SCALECOLS_SSE2)
   1369     if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8) &&
   1370         IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16) &&
   1371         IS_ALIGNED(dst_ptr, 16) && IS_ALIGNED(dst_stride, 16)) {
   1372       ScaleCols = ScaleColsUp2_SSE2;
   1373     }
   1374 #endif
   1375   }
   1376 
   1377   for (i = 0; i < dst_height; ++i) {
   1378     ScaleCols(dst_ptr, src_ptr + (y >> 16) * src_stride,
   1379               dst_width, x, dx);
   1380     dst_ptr += dst_stride;
   1381     y += dy;
   1382   }
   1383 }
   1384 
   1385 static void ScalePlaneSimple_16(int src_width, int src_height,
   1386                                 int dst_width, int dst_height,
   1387                                 int src_stride, int dst_stride,
   1388                                 const uint16* src_ptr, uint16* dst_ptr) {
   1389   int i;
   1390   void (*ScaleCols)(uint16* dst_ptr, const uint16* src_ptr,
   1391       int dst_width, int x, int dx) = ScaleCols_16_C;
   1392   // Initial source x/y coordinate and step values as 16.16 fixed point.
   1393   int x = 0;
   1394   int y = 0;
   1395   int dx = 0;
   1396   int dy = 0;
   1397   ScaleSlope(src_width, src_height, dst_width, dst_height, kFilterNone,
   1398              &x, &y, &dx, &dy);
   1399   src_width = Abs(src_width);
   1400 
   1401   if (src_width * 2 == dst_width && x < 0x8000) {
   1402     ScaleCols = ScaleColsUp2_16_C;
   1403 #if defined(HAS_SCALECOLS_16_SSE2)
   1404     if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8) &&
   1405         IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16) &&
   1406         IS_ALIGNED(dst_ptr, 16) && IS_ALIGNED(dst_stride, 16)) {
   1407       ScaleCols = ScaleColsUp2_16_SSE2;
   1408     }
   1409 #endif
   1410   }
   1411 
   1412   for (i = 0; i < dst_height; ++i) {
   1413     ScaleCols(dst_ptr, src_ptr + (y >> 16) * src_stride,
   1414               dst_width, x, dx);
   1415     dst_ptr += dst_stride;
   1416     y += dy;
   1417   }
   1418 }
   1419 
   1420 // Scale a plane.
   1421 // This function dispatches to a specialized scaler based on scale factor.
   1422 
   1423 LIBYUV_API
   1424 void ScalePlane(const uint8* src, int src_stride,
   1425                 int src_width, int src_height,
   1426                 uint8* dst, int dst_stride,
   1427                 int dst_width, int dst_height,
   1428                 enum FilterMode filtering) {
   1429   // Simplify filtering when possible.
   1430   filtering = ScaleFilterReduce(src_width, src_height,
   1431                                 dst_width, dst_height,
   1432                                 filtering);
   1433 
   1434   // Negative height means invert the image.
   1435   if (src_height < 0) {
   1436     src_height = -src_height;
   1437     src = src + (src_height - 1) * src_stride;
   1438     src_stride = -src_stride;
   1439   }
   1440 
   1441   // Use specialized scales to improve performance for common resolutions.
   1442   // For example, all the 1/2 scalings will use ScalePlaneDown2()
   1443   if (dst_width == src_width && dst_height == src_height) {
   1444     // Straight copy.
   1445     CopyPlane(src, src_stride, dst, dst_stride, dst_width, dst_height);
   1446     return;
   1447   }
   1448   if (dst_width == src_width) {
   1449     int dy = FixedDiv(src_height, dst_height);
   1450     // Arbitrary scale vertically, but unscaled vertically.
   1451     ScalePlaneVertical(src_height,
   1452                        dst_width, dst_height,
   1453                        src_stride, dst_stride, src, dst,
   1454                        0, 0, dy, 1, filtering);
   1455     return;
   1456   }
   1457   if (dst_width <= Abs(src_width) && dst_height <= src_height) {
   1458     // Scale down.
   1459     if (4 * dst_width == 3 * src_width &&
   1460         4 * dst_height == 3 * src_height) {
   1461       // optimized, 3/4
   1462       ScalePlaneDown34(src_width, src_height, dst_width, dst_height,
   1463                        src_stride, dst_stride, src, dst, filtering);
   1464       return;
   1465     }
   1466     if (2 * dst_width == src_width && 2 * dst_height == src_height) {
   1467       // optimized, 1/2
   1468       ScalePlaneDown2(src_width, src_height, dst_width, dst_height,
   1469                       src_stride, dst_stride, src, dst, filtering);
   1470       return;
   1471     }
   1472     // 3/8 rounded up for odd sized chroma height.
   1473     if (8 * dst_width == 3 * src_width &&
   1474         dst_height == ((src_height * 3 + 7) / 8)) {
   1475       // optimized, 3/8
   1476       ScalePlaneDown38(src_width, src_height, dst_width, dst_height,
   1477                        src_stride, dst_stride, src, dst, filtering);
   1478       return;
   1479     }
   1480     if (4 * dst_width == src_width && 4 * dst_height == src_height &&
   1481                filtering != kFilterBilinear) {
   1482       // optimized, 1/4
   1483       ScalePlaneDown4(src_width, src_height, dst_width, dst_height,
   1484                       src_stride, dst_stride, src, dst, filtering);
   1485       return;
   1486     }
   1487   }
   1488   if (filtering == kFilterBox && dst_height * 2 < src_height) {
   1489     ScalePlaneBox(src_width, src_height, dst_width, dst_height,
   1490                   src_stride, dst_stride, src, dst);
   1491     return;
   1492   }
   1493   if (filtering && dst_height > src_height) {
   1494     ScalePlaneBilinearUp(src_width, src_height, dst_width, dst_height,
   1495                          src_stride, dst_stride, src, dst, filtering);
   1496     return;
   1497   }
   1498   if (filtering) {
   1499     ScalePlaneBilinearDown(src_width, src_height, dst_width, dst_height,
   1500                            src_stride, dst_stride, src, dst, filtering);
   1501     return;
   1502   }
   1503   ScalePlaneSimple(src_width, src_height, dst_width, dst_height,
   1504                    src_stride, dst_stride, src, dst);
   1505 }
   1506 
   1507 LIBYUV_API
   1508 void ScalePlane_16(const uint16* src, int src_stride,
   1509                   int src_width, int src_height,
   1510                   uint16* dst, int dst_stride,
   1511                   int dst_width, int dst_height,
   1512                   enum FilterMode filtering) {
   1513   // Simplify filtering when possible.
   1514   filtering = ScaleFilterReduce(src_width, src_height,
   1515                                 dst_width, dst_height,
   1516                                 filtering);
   1517 
   1518   // Negative height means invert the image.
   1519   if (src_height < 0) {
   1520     src_height = -src_height;
   1521     src = src + (src_height - 1) * src_stride;
   1522     src_stride = -src_stride;
   1523   }
   1524 
   1525   // Use specialized scales to improve performance for common resolutions.
   1526   // For example, all the 1/2 scalings will use ScalePlaneDown2()
   1527   if (dst_width == src_width && dst_height == src_height) {
   1528     // Straight copy.
   1529     CopyPlane_16(src, src_stride, dst, dst_stride, dst_width, dst_height);
   1530     return;
   1531   }
   1532   if (dst_width == src_width) {
   1533     int dy = FixedDiv(src_height, dst_height);
   1534     // Arbitrary scale vertically, but unscaled vertically.
   1535     ScalePlaneVertical_16(src_height,
   1536                           dst_width, dst_height,
   1537                           src_stride, dst_stride, src, dst,
   1538                           0, 0, dy, 1, filtering);
   1539     return;
   1540   }
   1541   if (dst_width <= Abs(src_width) && dst_height <= src_height) {
   1542     // Scale down.
   1543     if (4 * dst_width == 3 * src_width &&
   1544         4 * dst_height == 3 * src_height) {
   1545       // optimized, 3/4
   1546       ScalePlaneDown34_16(src_width, src_height, dst_width, dst_height,
   1547                           src_stride, dst_stride, src, dst, filtering);
   1548       return;
   1549     }
   1550     if (2 * dst_width == src_width && 2 * dst_height == src_height) {
   1551       // optimized, 1/2
   1552       ScalePlaneDown2_16(src_width, src_height, dst_width, dst_height,
   1553                          src_stride, dst_stride, src, dst, filtering);
   1554       return;
   1555     }
   1556     // 3/8 rounded up for odd sized chroma height.
   1557     if (8 * dst_width == 3 * src_width &&
   1558         dst_height == ((src_height * 3 + 7) / 8)) {
   1559       // optimized, 3/8
   1560       ScalePlaneDown38_16(src_width, src_height, dst_width, dst_height,
   1561                           src_stride, dst_stride, src, dst, filtering);
   1562       return;
   1563     }
   1564     if (4 * dst_width == src_width && 4 * dst_height == src_height &&
   1565                filtering != kFilterBilinear) {
   1566       // optimized, 1/4
   1567       ScalePlaneDown4_16(src_width, src_height, dst_width, dst_height,
   1568                          src_stride, dst_stride, src, dst, filtering);
   1569       return;
   1570     }
   1571   }
   1572   if (filtering == kFilterBox && dst_height * 2 < src_height) {
   1573     ScalePlaneBox_16(src_width, src_height, dst_width, dst_height,
   1574                      src_stride, dst_stride, src, dst);
   1575     return;
   1576   }
   1577   if (filtering && dst_height > src_height) {
   1578     ScalePlaneBilinearUp_16(src_width, src_height, dst_width, dst_height,
   1579                             src_stride, dst_stride, src, dst, filtering);
   1580     return;
   1581   }
   1582   if (filtering) {
   1583     ScalePlaneBilinearDown_16(src_width, src_height, dst_width, dst_height,
   1584                               src_stride, dst_stride, src, dst, filtering);
   1585     return;
   1586   }
   1587   ScalePlaneSimple_16(src_width, src_height, dst_width, dst_height,
   1588                       src_stride, dst_stride, src, dst);
   1589 }
   1590 
   1591 // Scale an I420 image.
   1592 // This function in turn calls a scaling function for each plane.
   1593 
   1594 LIBYUV_API
   1595 int I420Scale(const uint8* src_y, int src_stride_y,
   1596               const uint8* src_u, int src_stride_u,
   1597               const uint8* src_v, int src_stride_v,
   1598               int src_width, int src_height,
   1599               uint8* dst_y, int dst_stride_y,
   1600               uint8* dst_u, int dst_stride_u,
   1601               uint8* dst_v, int dst_stride_v,
   1602               int dst_width, int dst_height,
   1603               enum FilterMode filtering) {
   1604   int src_halfwidth = SUBSAMPLE(src_width, 1, 1);
   1605   int src_halfheight = SUBSAMPLE(src_height, 1, 1);
   1606   int dst_halfwidth = SUBSAMPLE(dst_width, 1, 1);
   1607   int dst_halfheight = SUBSAMPLE(dst_height, 1, 1);
   1608   if (!src_y || !src_u || !src_v || src_width == 0 || src_height == 0 ||
   1609       !dst_y || !dst_u || !dst_v || dst_width <= 0 || dst_height <= 0) {
   1610     return -1;
   1611   }
   1612 
   1613   ScalePlane(src_y, src_stride_y, src_width, src_height,
   1614              dst_y, dst_stride_y, dst_width, dst_height,
   1615              filtering);
   1616   ScalePlane(src_u, src_stride_u, src_halfwidth, src_halfheight,
   1617              dst_u, dst_stride_u, dst_halfwidth, dst_halfheight,
   1618              filtering);
   1619   ScalePlane(src_v, src_stride_v, src_halfwidth, src_halfheight,
   1620              dst_v, dst_stride_v, dst_halfwidth, dst_halfheight,
   1621              filtering);
   1622   return 0;
   1623 }
   1624 
   1625 LIBYUV_API
   1626 int I420Scale_16(const uint16* src_y, int src_stride_y,
   1627                  const uint16* src_u, int src_stride_u,
   1628                  const uint16* src_v, int src_stride_v,
   1629                  int src_width, int src_height,
   1630                  uint16* dst_y, int dst_stride_y,
   1631                  uint16* dst_u, int dst_stride_u,
   1632                  uint16* dst_v, int dst_stride_v,
   1633                  int dst_width, int dst_height,
   1634                  enum FilterMode filtering) {
   1635   int src_halfwidth = SUBSAMPLE(src_width, 1, 1);
   1636   int src_halfheight = SUBSAMPLE(src_height, 1, 1);
   1637   int dst_halfwidth = SUBSAMPLE(dst_width, 1, 1);
   1638   int dst_halfheight = SUBSAMPLE(dst_height, 1, 1);
   1639   if (!src_y || !src_u || !src_v || src_width == 0 || src_height == 0 ||
   1640       !dst_y || !dst_u || !dst_v || dst_width <= 0 || dst_height <= 0) {
   1641     return -1;
   1642   }
   1643 
   1644   ScalePlane_16(src_y, src_stride_y, src_width, src_height,
   1645                 dst_y, dst_stride_y, dst_width, dst_height,
   1646                 filtering);
   1647   ScalePlane_16(src_u, src_stride_u, src_halfwidth, src_halfheight,
   1648                 dst_u, dst_stride_u, dst_halfwidth, dst_halfheight,
   1649                 filtering);
   1650   ScalePlane_16(src_v, src_stride_v, src_halfwidth, src_halfheight,
   1651                 dst_v, dst_stride_v, dst_halfwidth, dst_halfheight,
   1652                 filtering);
   1653   return 0;
   1654 }
   1655 
   1656 // Deprecated api
   1657 LIBYUV_API
   1658 int Scale(const uint8* src_y, const uint8* src_u, const uint8* src_v,
   1659           int src_stride_y, int src_stride_u, int src_stride_v,
   1660           int src_width, int src_height,
   1661           uint8* dst_y, uint8* dst_u, uint8* dst_v,
   1662           int dst_stride_y, int dst_stride_u, int dst_stride_v,
   1663           int dst_width, int dst_height,
   1664           LIBYUV_BOOL interpolate) {
   1665   return I420Scale(src_y, src_stride_y,
   1666                    src_u, src_stride_u,
   1667                    src_v, src_stride_v,
   1668                    src_width, src_height,
   1669                    dst_y, dst_stride_y,
   1670                    dst_u, dst_stride_u,
   1671                    dst_v, dst_stride_v,
   1672                    dst_width, dst_height,
   1673                    interpolate ? kFilterBox : kFilterNone);
   1674 }
   1675 
   1676 // Deprecated api
   1677 LIBYUV_API
   1678 int ScaleOffset(const uint8* src, int src_width, int src_height,
   1679                 uint8* dst, int dst_width, int dst_height, int dst_yoffset,
   1680                 LIBYUV_BOOL interpolate) {
   1681   // Chroma requires offset to multiple of 2.
   1682   int dst_yoffset_even = dst_yoffset & ~1;
   1683   int src_halfwidth = SUBSAMPLE(src_width, 1, 1);
   1684   int src_halfheight = SUBSAMPLE(src_height, 1, 1);
   1685   int dst_halfwidth = SUBSAMPLE(dst_width, 1, 1);
   1686   int dst_halfheight = SUBSAMPLE(dst_height, 1, 1);
   1687   int aheight = dst_height - dst_yoffset_even * 2;  // actual output height
   1688   const uint8* src_y = src;
   1689   const uint8* src_u = src + src_width * src_height;
   1690   const uint8* src_v = src + src_width * src_height +
   1691                              src_halfwidth * src_halfheight;
   1692   uint8* dst_y = dst + dst_yoffset_even * dst_width;
   1693   uint8* dst_u = dst + dst_width * dst_height +
   1694                  (dst_yoffset_even >> 1) * dst_halfwidth;
   1695   uint8* dst_v = dst + dst_width * dst_height + dst_halfwidth * dst_halfheight +
   1696                  (dst_yoffset_even >> 1) * dst_halfwidth;
   1697   if (!src || src_width <= 0 || src_height <= 0 ||
   1698       !dst || dst_width <= 0 || dst_height <= 0 || dst_yoffset_even < 0 ||
   1699       dst_yoffset_even >= dst_height) {
   1700     return -1;
   1701   }
   1702   return I420Scale(src_y, src_width,
   1703                    src_u, src_halfwidth,
   1704                    src_v, src_halfwidth,
   1705                    src_width, src_height,
   1706                    dst_y, dst_width,
   1707                    dst_u, dst_halfwidth,
   1708                    dst_v, dst_halfwidth,
   1709                    dst_width, aheight,
   1710                    interpolate ? kFilterBox : kFilterNone);
   1711 }
   1712 
   1713 #ifdef __cplusplus
   1714 }  // extern "C"
   1715 }  // namespace libyuv
   1716 #endif
   1717