Home | History | Annotate | Download | only in source
      1 /*
      2  *  Copyright 2011 The LibYuv Project Authors. All rights reserved.
      3  *
      4  *  Use of this source code is governed by a BSD-style license
      5  *  that can be found in the LICENSE file in the root of the source
      6  *  tree. An additional intellectual property rights grant can be found
      7  *  in the file PATENTS. All contributing project authors may
      8  *  be found in the AUTHORS file in the root of the source tree.
      9  */
     10 
     11 #include "libyuv/scale.h"
     12 
     13 #include <assert.h>
     14 #include <string.h>
     15 
     16 #include "libyuv/cpu_id.h"
     17 #include "libyuv/planar_functions.h"  // For CopyARGB
     18 #include "libyuv/row.h"
     19 #include "libyuv/scale_row.h"
     20 
     21 #ifdef __cplusplus
     22 namespace libyuv {
     23 extern "C" {
     24 #endif
     25 
     26 static __inline int Abs(int v) {
     27   return v >= 0 ? v : -v;
     28 }
     29 
     30 // ScaleARGB ARGB, 1/2
     31 // This is an optimized version for scaling down a ARGB to 1/2 of
     32 // its original size.
     33 static void ScaleARGBDown2(int src_width, int src_height,
     34                            int dst_width, int dst_height,
     35                            int src_stride, int dst_stride,
     36                            const uint8* src_argb, uint8* dst_argb,
     37                            int x, int dx, int y, int dy,
     38                            enum FilterMode filtering) {
     39   int j;
     40   int row_stride = src_stride * (dy >> 16);
     41   void (*ScaleARGBRowDown2)(const uint8* src_argb, ptrdiff_t src_stride,
     42                             uint8* dst_argb, int dst_width) =
     43     filtering == kFilterNone ? ScaleARGBRowDown2_C :
     44         (filtering == kFilterLinear ? ScaleARGBRowDown2Linear_C :
     45         ScaleARGBRowDown2Box_C);
     46   assert(dx == 65536 * 2);  // Test scale factor of 2.
     47   assert((dy & 0x1ffff) == 0);  // Test vertical scale is multiple of 2.
     48   // Advance to odd row, even column.
     49   if (filtering == kFilterBilinear) {
     50     src_argb += (y >> 16) * src_stride + (x >> 16) * 4;
     51   } else {
     52     src_argb += (y >> 16) * src_stride + ((x >> 16) - 1) * 4;
     53   }
     54 
     55 #if defined(HAS_SCALEARGBROWDOWN2_SSE2)
     56   if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 4) &&
     57       IS_ALIGNED(src_argb, 16) && IS_ALIGNED(row_stride, 16) &&
     58       IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) {
     59     ScaleARGBRowDown2 = filtering == kFilterNone ? ScaleARGBRowDown2_SSE2 :
     60         (filtering == kFilterLinear ? ScaleARGBRowDown2Linear_SSE2 :
     61         ScaleARGBRowDown2Box_SSE2);
     62   }
     63 #elif defined(HAS_SCALEARGBROWDOWN2_NEON)
     64   if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(dst_width, 8) &&
     65       IS_ALIGNED(src_argb, 4) && IS_ALIGNED(row_stride, 4)) {
     66     ScaleARGBRowDown2 = filtering ? ScaleARGBRowDown2Box_NEON :
     67         ScaleARGBRowDown2_NEON;
     68   }
     69 #endif
     70 
     71   if (filtering == kFilterLinear) {
     72     src_stride = 0;
     73   }
     74   for (j = 0; j < dst_height; ++j) {
     75     ScaleARGBRowDown2(src_argb, src_stride, dst_argb, dst_width);
     76     src_argb += row_stride;
     77     dst_argb += dst_stride;
     78   }
     79 }
     80 
     81 // ScaleARGB ARGB, 1/4
     82 // This is an optimized version for scaling down a ARGB to 1/4 of
     83 // its original size.
     84 static void ScaleARGBDown4Box(int src_width, int src_height,
     85                               int dst_width, int dst_height,
     86                               int src_stride, int dst_stride,
     87                               const uint8* src_argb, uint8* dst_argb,
     88                               int x, int dx, int y, int dy) {
     89   int j;
     90   // Allocate 2 rows of ARGB.
     91   const int kRowSize = (dst_width * 2 * 4 + 15) & ~15;
     92   align_buffer_64(row, kRowSize * 2);
     93   int row_stride = src_stride * (dy >> 16);
     94   void (*ScaleARGBRowDown2)(const uint8* src_argb, ptrdiff_t src_stride,
     95     uint8* dst_argb, int dst_width) = ScaleARGBRowDown2Box_C;
     96   // Advance to odd row, even column.
     97   src_argb += (y >> 16) * src_stride + (x >> 16) * 4;
     98   assert(dx == 65536 * 4);  // Test scale factor of 4.
     99   assert((dy & 0x3ffff) == 0);  // Test vertical scale is multiple of 4.
    100 #if defined(HAS_SCALEARGBROWDOWN2_SSE2)
    101   if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 4) &&
    102       IS_ALIGNED(src_argb, 16) && IS_ALIGNED(row_stride, 16) &&
    103       IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) {
    104     ScaleARGBRowDown2 = ScaleARGBRowDown2Box_SSE2;
    105   }
    106 #elif defined(HAS_SCALEARGBROWDOWN2_NEON)
    107   if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(dst_width, 8) &&
    108       IS_ALIGNED(src_argb, 4) && IS_ALIGNED(row_stride, 4)) {
    109     ScaleARGBRowDown2 = ScaleARGBRowDown2Box_NEON;
    110   }
    111 #endif
    112   for (j = 0; j < dst_height; ++j) {
    113     ScaleARGBRowDown2(src_argb, src_stride, row, dst_width * 2);
    114     ScaleARGBRowDown2(src_argb + src_stride * 2, src_stride,
    115                       row + kRowSize, dst_width * 2);
    116     ScaleARGBRowDown2(row, kRowSize, dst_argb, dst_width);
    117     src_argb += row_stride;
    118     dst_argb += dst_stride;
    119   }
    120   free_aligned_buffer_64(row);
    121 }
    122 
    123 // ScaleARGB ARGB Even
    124 // This is an optimized version for scaling down a ARGB to even
    125 // multiple of its original size.
    126 static void ScaleARGBDownEven(int src_width, int src_height,
    127                               int dst_width, int dst_height,
    128                               int src_stride, int dst_stride,
    129                               const uint8* src_argb, uint8* dst_argb,
    130                               int x, int dx, int y, int dy,
    131                               enum FilterMode filtering) {
    132   int j;
    133   int col_step = dx >> 16;
    134   int row_stride = (dy >> 16) * src_stride;
    135   void (*ScaleARGBRowDownEven)(const uint8* src_argb, ptrdiff_t src_stride,
    136                                int src_step, uint8* dst_argb, int dst_width) =
    137       filtering ? ScaleARGBRowDownEvenBox_C : ScaleARGBRowDownEven_C;
    138   assert(IS_ALIGNED(src_width, 2));
    139   assert(IS_ALIGNED(src_height, 2));
    140   src_argb += (y >> 16) * src_stride + (x >> 16) * 4;
    141 #if defined(HAS_SCALEARGBROWDOWNEVEN_SSE2)
    142   if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 4) &&
    143       IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) {
    144     ScaleARGBRowDownEven = filtering ? ScaleARGBRowDownEvenBox_SSE2 :
    145         ScaleARGBRowDownEven_SSE2;
    146   }
    147 #elif defined(HAS_SCALEARGBROWDOWNEVEN_NEON)
    148   if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(dst_width, 4) &&
    149       IS_ALIGNED(src_argb, 4)) {
    150     ScaleARGBRowDownEven = filtering ? ScaleARGBRowDownEvenBox_NEON :
    151         ScaleARGBRowDownEven_NEON;
    152   }
    153 #endif
    154 
    155   if (filtering == kFilterLinear) {
    156     src_stride = 0;
    157   }
    158   for (j = 0; j < dst_height; ++j) {
    159     ScaleARGBRowDownEven(src_argb, src_stride, col_step, dst_argb, dst_width);
    160     src_argb += row_stride;
    161     dst_argb += dst_stride;
    162   }
    163 }
    164 
    165 // Scale ARGB down with bilinear interpolation.
    166 static void ScaleARGBBilinearDown(int src_width, int src_height,
    167                                   int dst_width, int dst_height,
    168                                   int src_stride, int dst_stride,
    169                                   const uint8* src_argb, uint8* dst_argb,
    170                                   int x, int dx, int y, int dy,
    171                                   enum FilterMode filtering) {
    172   int j;
    173   void (*InterpolateRow)(uint8* dst_argb, const uint8* src_argb,
    174       ptrdiff_t src_stride, int dst_width, int source_y_fraction) =
    175       InterpolateRow_C;
    176   void (*ScaleARGBFilterCols)(uint8* dst_argb, const uint8* src_argb,
    177       int dst_width, int x, int dx) =
    178       (src_width >= 32768) ? ScaleARGBFilterCols64_C : ScaleARGBFilterCols_C;
    179   int64 xlast = x + (int64)(dst_width - 1) * dx;
    180   int64 xl = (dx >= 0) ? x : xlast;
    181   int64 xr = (dx >= 0) ? xlast : x;
    182   int clip_src_width;
    183   xl = (xl >> 16) & ~3;  // Left edge aligned.
    184   xr = (xr >> 16) + 1;  // Right most pixel used.  Bilinear uses 2 pixels.
    185   xr = (xr + 1 + 3) & ~3;  // 1 beyond 4 pixel aligned right most pixel.
    186   if (xr > src_width) {
    187     xr = src_width;
    188   }
    189   clip_src_width = (int)(xr - xl) * 4;  // Width aligned to 4.
    190   src_argb += xl * 4;
    191   x -= (int)(xl << 16);
    192 #if defined(HAS_INTERPOLATEROW_SSE2)
    193   if (TestCpuFlag(kCpuHasSSE2) && clip_src_width >= 16) {
    194     InterpolateRow = InterpolateRow_Any_SSE2;
    195     if (IS_ALIGNED(clip_src_width, 16)) {
    196       InterpolateRow = InterpolateRow_Unaligned_SSE2;
    197       if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride, 16)) {
    198         InterpolateRow = InterpolateRow_SSE2;
    199       }
    200     }
    201   }
    202 #endif
    203 #if defined(HAS_INTERPOLATEROW_SSSE3)
    204   if (TestCpuFlag(kCpuHasSSSE3) && clip_src_width >= 16) {
    205     InterpolateRow = InterpolateRow_Any_SSSE3;
    206     if (IS_ALIGNED(clip_src_width, 16)) {
    207       InterpolateRow = InterpolateRow_Unaligned_SSSE3;
    208       if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride, 16)) {
    209         InterpolateRow = InterpolateRow_SSSE3;
    210       }
    211     }
    212   }
    213 #endif
    214 #if defined(HAS_INTERPOLATEROW_AVX2)
    215   if (TestCpuFlag(kCpuHasAVX2) && clip_src_width >= 32) {
    216     InterpolateRow = InterpolateRow_Any_AVX2;
    217     if (IS_ALIGNED(clip_src_width, 32)) {
    218       InterpolateRow = InterpolateRow_AVX2;
    219     }
    220   }
    221 #endif
    222 #if defined(HAS_INTERPOLATEROW_NEON)
    223   if (TestCpuFlag(kCpuHasNEON) && clip_src_width >= 16) {
    224     InterpolateRow = InterpolateRow_Any_NEON;
    225     if (IS_ALIGNED(clip_src_width, 16)) {
    226       InterpolateRow = InterpolateRow_NEON;
    227     }
    228   }
    229 #endif
    230 #if defined(HAS_INTERPOLATEROWS_MIPS_DSPR2)
    231   if (TestCpuFlag(kCpuHasMIPS_DSPR2) && clip_src_width >= 4 &&
    232       IS_ALIGNED(src_argb, 4) && IS_ALIGNED(src_stride, 4)) {
    233     InterpolateRow = InterpolateRow_Any_MIPS_DSPR2;
    234     if (IS_ALIGNED(clip_src_width, 4)) {
    235       InterpolateRow = InterpolateRow_MIPS_DSPR2;
    236     }
    237   }
    238 #endif
    239 #if defined(HAS_SCALEARGBFILTERCOLS_SSSE3)
    240   if (TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
    241     ScaleARGBFilterCols = ScaleARGBFilterCols_SSSE3;
    242   }
    243 #endif
    244   // TODO(fbarchard): Consider not allocating row buffer for kFilterLinear.
    245   // Allocate a row of ARGB.
    246   {
    247     align_buffer_64(row, clip_src_width * 4);
    248 
    249     const int max_y = (src_height - 1) << 16;
    250     if (y > max_y) {
    251       y = max_y;
    252     }
    253     for (j = 0; j < dst_height; ++j) {
    254       int yi = y >> 16;
    255       const uint8* src = src_argb + yi * src_stride;
    256       if (filtering == kFilterLinear) {
    257         ScaleARGBFilterCols(dst_argb, src, dst_width, x, dx);
    258       } else {
    259         int yf = (y >> 8) & 255;
    260         InterpolateRow(row, src, src_stride, clip_src_width, yf);
    261         ScaleARGBFilterCols(dst_argb, row, dst_width, x, dx);
    262       }
    263       dst_argb += dst_stride;
    264       y += dy;
    265       if (y > max_y) {
    266         y = max_y;
    267       }
    268     }
    269     free_aligned_buffer_64(row);
    270   }
    271 }
    272 
    273 // Scale ARGB up with bilinear interpolation.
    274 static void ScaleARGBBilinearUp(int src_width, int src_height,
    275                                 int dst_width, int dst_height,
    276                                 int src_stride, int dst_stride,
    277                                 const uint8* src_argb, uint8* dst_argb,
    278                                 int x, int dx, int y, int dy,
    279                                 enum FilterMode filtering) {
    280   int j;
    281   void (*InterpolateRow)(uint8* dst_argb, const uint8* src_argb,
    282       ptrdiff_t src_stride, int dst_width, int source_y_fraction) =
    283       InterpolateRow_C;
    284   void (*ScaleARGBFilterCols)(uint8* dst_argb, const uint8* src_argb,
    285       int dst_width, int x, int dx) =
    286       filtering ? ScaleARGBFilterCols_C : ScaleARGBCols_C;
    287   const int max_y = (src_height - 1) << 16;
    288 #if defined(HAS_INTERPOLATEROW_SSE2)
    289   if (TestCpuFlag(kCpuHasSSE2) && dst_width >= 4) {
    290     InterpolateRow = InterpolateRow_Any_SSE2;
    291     if (IS_ALIGNED(dst_width, 4)) {
    292       InterpolateRow = InterpolateRow_Unaligned_SSE2;
    293       if (IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) {
    294         InterpolateRow = InterpolateRow_SSE2;
    295       }
    296     }
    297   }
    298 #endif
    299 #if defined(HAS_INTERPOLATEROW_SSSE3)
    300   if (TestCpuFlag(kCpuHasSSSE3) && dst_width >= 4) {
    301     InterpolateRow = InterpolateRow_Any_SSSE3;
    302     if (IS_ALIGNED(dst_width, 4)) {
    303       InterpolateRow = InterpolateRow_Unaligned_SSSE3;
    304       if (IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) {
    305         InterpolateRow = InterpolateRow_SSSE3;
    306       }
    307     }
    308   }
    309 #endif
    310 #if defined(HAS_INTERPOLATEROW_AVX2)
    311   if (TestCpuFlag(kCpuHasAVX2) && dst_width >= 8) {
    312     InterpolateRow = InterpolateRow_Any_AVX2;
    313     if (IS_ALIGNED(dst_width, 8)) {
    314       InterpolateRow = InterpolateRow_AVX2;
    315     }
    316   }
    317 #endif
    318 #if defined(HAS_INTERPOLATEROW_NEON)
    319   if (TestCpuFlag(kCpuHasNEON) && dst_width >= 4) {
    320     InterpolateRow = InterpolateRow_Any_NEON;
    321     if (IS_ALIGNED(dst_width, 4)) {
    322       InterpolateRow = InterpolateRow_NEON;
    323     }
    324   }
    325 #endif
    326 #if defined(HAS_INTERPOLATEROWS_MIPS_DSPR2)
    327   if (TestCpuFlag(kCpuHasMIPS_DSPR2) && dst_width >= 1 &&
    328       IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride, 4)) {
    329     InterpolateRow = InterpolateRow_MIPS_DSPR2;
    330   }
    331 #endif
    332   if (src_width >= 32768) {
    333     ScaleARGBFilterCols = filtering ?
    334         ScaleARGBFilterCols64_C : ScaleARGBCols64_C;
    335   }
    336 #if defined(HAS_SCALEARGBFILTERCOLS_SSSE3)
    337   if (filtering && TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
    338     ScaleARGBFilterCols = ScaleARGBFilterCols_SSSE3;
    339   }
    340 #endif
    341 #if defined(HAS_SCALEARGBCOLS_SSE2)
    342   if (!filtering && TestCpuFlag(kCpuHasSSE2) && src_width < 32768) {
    343     ScaleARGBFilterCols = ScaleARGBCols_SSE2;
    344   }
    345 #endif
    346   if (!filtering && src_width * 2 == dst_width && x < 0x8000) {
    347     ScaleARGBFilterCols = ScaleARGBColsUp2_C;
    348 #if defined(HAS_SCALEARGBCOLSUP2_SSE2)
    349     if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8) &&
    350         IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride, 16) &&
    351         IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) {
    352       ScaleARGBFilterCols = ScaleARGBColsUp2_SSE2;
    353     }
    354 #endif
    355   }
    356 
    357   if (y > max_y) {
    358     y = max_y;
    359   }
    360 
    361   {
    362     int yi = y >> 16;
    363     const uint8* src = src_argb + yi * src_stride;
    364 
    365     // Allocate 2 rows of ARGB.
    366     const int kRowSize = (dst_width * 4 + 15) & ~15;
    367     align_buffer_64(row, kRowSize * 2);
    368 
    369     uint8* rowptr = row;
    370     int rowstride = kRowSize;
    371     int lasty = yi;
    372 
    373     ScaleARGBFilterCols(rowptr, src, dst_width, x, dx);
    374     if (src_height > 1) {
    375       src += src_stride;
    376     }
    377     ScaleARGBFilterCols(rowptr + rowstride, src, dst_width, x, dx);
    378     src += src_stride;
    379 
    380     for (j = 0; j < dst_height; ++j) {
    381       yi = y >> 16;
    382       if (yi != lasty) {
    383         if (y > max_y) {
    384           y = max_y;
    385           yi = y >> 16;
    386           src = src_argb + yi * src_stride;
    387         }
    388         if (yi != lasty) {
    389           ScaleARGBFilterCols(rowptr, src, dst_width, x, dx);
    390           rowptr += rowstride;
    391           rowstride = -rowstride;
    392           lasty = yi;
    393           src += src_stride;
    394         }
    395       }
    396       if (filtering == kFilterLinear) {
    397         InterpolateRow(dst_argb, rowptr, 0, dst_width * 4, 0);
    398       } else {
    399         int yf = (y >> 8) & 255;
    400         InterpolateRow(dst_argb, rowptr, rowstride, dst_width * 4, yf);
    401       }
    402       dst_argb += dst_stride;
    403       y += dy;
    404     }
    405     free_aligned_buffer_64(row);
    406   }
    407 }
    408 
    409 #ifdef YUVSCALEUP
    410 // Scale YUV to ARGB up with bilinear interpolation.
    411 static void ScaleYUVToARGBBilinearUp(int src_width, int src_height,
    412                                      int dst_width, int dst_height,
    413                                      int src_stride_y,
    414                                      int src_stride_u,
    415                                      int src_stride_v,
    416                                      int dst_stride_argb,
    417                                      const uint8* src_y,
    418                                      const uint8* src_u,
    419                                      const uint8* src_v,
    420                                      uint8* dst_argb,
    421                                      int x, int dx, int y, int dy,
    422                                      enum FilterMode filtering) {
    423   int j;
    424   void (*I422ToARGBRow)(const uint8* y_buf,
    425                         const uint8* u_buf,
    426                         const uint8* v_buf,
    427                         uint8* rgb_buf,
    428                         int width) = I422ToARGBRow_C;
    429 #if defined(HAS_I422TOARGBROW_SSSE3)
    430   if (TestCpuFlag(kCpuHasSSSE3) && src_width >= 8) {
    431     I422ToARGBRow = I422ToARGBRow_Any_SSSE3;
    432     if (IS_ALIGNED(src_width, 8)) {
    433       I422ToARGBRow = I422ToARGBRow_Unaligned_SSSE3;
    434       if (IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
    435         I422ToARGBRow = I422ToARGBRow_SSSE3;
    436       }
    437     }
    438   }
    439 #endif
    440 #if defined(HAS_I422TOARGBROW_AVX2)
    441   if (TestCpuFlag(kCpuHasAVX2) && src_width >= 16) {
    442     I422ToARGBRow = I422ToARGBRow_Any_AVX2;
    443     if (IS_ALIGNED(src_width, 16)) {
    444       I422ToARGBRow = I422ToARGBRow_AVX2;
    445     }
    446   }
    447 #endif
    448 #if defined(HAS_I422TOARGBROW_NEON)
    449   if (TestCpuFlag(kCpuHasNEON) && src_width >= 8) {
    450     I422ToARGBRow = I422ToARGBRow_Any_NEON;
    451     if (IS_ALIGNED(src_width, 8)) {
    452       I422ToARGBRow = I422ToARGBRow_NEON;
    453     }
    454   }
    455 #endif
    456 #if defined(HAS_I422TOARGBROW_MIPS_DSPR2)
    457   if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(src_width, 4) &&
    458       IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) &&
    459       IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) &&
    460       IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2) &&
    461       IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride_argb, 4)) {
    462     I422ToARGBRow = I422ToARGBRow_MIPS_DSPR2;
    463   }
    464 #endif
    465 
    466   void (*InterpolateRow)(uint8* dst_argb, const uint8* src_argb,
    467       ptrdiff_t src_stride, int dst_width, int source_y_fraction) =
    468       InterpolateRow_C;
    469 #if defined(HAS_INTERPOLATEROW_SSE2)
    470   if (TestCpuFlag(kCpuHasSSE2) && dst_width >= 4) {
    471     InterpolateRow = InterpolateRow_Any_SSE2;
    472     if (IS_ALIGNED(dst_width, 4)) {
    473       InterpolateRow = InterpolateRow_Unaligned_SSE2;
    474       if (IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
    475         InterpolateRow = InterpolateRow_SSE2;
    476       }
    477     }
    478   }
    479 #endif
    480 #if defined(HAS_INTERPOLATEROW_SSSE3)
    481   if (TestCpuFlag(kCpuHasSSSE3) && dst_width >= 4) {
    482     InterpolateRow = InterpolateRow_Any_SSSE3;
    483     if (IS_ALIGNED(dst_width, 4)) {
    484       InterpolateRow = InterpolateRow_Unaligned_SSSE3;
    485       if (IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
    486         InterpolateRow = InterpolateRow_SSSE3;
    487       }
    488     }
    489   }
    490 #endif
    491 #if defined(HAS_INTERPOLATEROW_AVX2)
    492   if (TestCpuFlag(kCpuHasAVX2) && dst_width >= 8) {
    493     InterpolateRow = InterpolateRow_Any_AVX2;
    494     if (IS_ALIGNED(dst_width, 8)) {
    495       InterpolateRow = InterpolateRow_AVX2;
    496     }
    497   }
    498 #endif
    499 #if defined(HAS_INTERPOLATEROW_NEON)
    500   if (TestCpuFlag(kCpuHasNEON) && dst_width >= 4) {
    501     InterpolateRow = InterpolateRow_Any_NEON;
    502     if (IS_ALIGNED(dst_width, 4)) {
    503       InterpolateRow = InterpolateRow_NEON;
    504     }
    505   }
    506 #endif
    507 #if defined(HAS_INTERPOLATEROWS_MIPS_DSPR2)
    508   if (TestCpuFlag(kCpuHasMIPS_DSPR2) && dst_width >= 1 &&
    509       IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride_argb, 4)) {
    510     InterpolateRow = InterpolateRow_MIPS_DSPR2;
    511   }
    512 #endif
    513 
    514   void (*ScaleARGBFilterCols)(uint8* dst_argb, const uint8* src_argb,
    515       int dst_width, int x, int dx) =
    516       filtering ? ScaleARGBFilterCols_C : ScaleARGBCols_C;
    517   if (src_width >= 32768) {
    518     ScaleARGBFilterCols = filtering ?
    519         ScaleARGBFilterCols64_C : ScaleARGBCols64_C;
    520   }
    521 #if defined(HAS_SCALEARGBFILTERCOLS_SSSE3)
    522   if (filtering && TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
    523     ScaleARGBFilterCols = ScaleARGBFilterCols_SSSE3;
    524   }
    525 #endif
    526 #if defined(HAS_SCALEARGBCOLS_SSE2)
    527   if (!filtering && TestCpuFlag(kCpuHasSSE2) && src_width < 32768) {
    528     ScaleARGBFilterCols = ScaleARGBCols_SSE2;
    529   }
    530 #endif
    531   if (!filtering && src_width * 2 == dst_width && x < 0x8000) {
    532     ScaleARGBFilterCols = ScaleARGBColsUp2_C;
    533 #if defined(HAS_SCALEARGBCOLSUP2_SSE2)
    534     if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8) &&
    535         IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride, 16) &&
    536         IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) {
    537       ScaleARGBFilterCols = ScaleARGBColsUp2_SSE2;
    538     }
    539 #endif
    540   }
    541 
    542   const int max_y = (src_height - 1) << 16;
    543   if (y > max_y) {
    544     y = max_y;
    545   }
    546   const int kYShift = 1;  // Shift Y by 1 to convert Y plane to UV coordinate.
    547   int yi = y >> 16;
    548   int uv_yi = yi >> kYShift;
    549   const uint8* src_row_y = src_y + yi * src_stride_y;
    550   const uint8* src_row_u = src_u + uv_yi * src_stride_u;
    551   const uint8* src_row_v = src_v + uv_yi * src_stride_v;
    552 
    553   // Allocate 2 rows of ARGB.
    554   const int kRowSize = (dst_width * 4 + 15) & ~15;
    555   align_buffer_64(row, kRowSize * 2);
    556 
    557   // Allocate 1 row of ARGB for source conversion.
    558   align_buffer_64(argb_row, src_width * 4);
    559 
    560   uint8* rowptr = row;
    561   int rowstride = kRowSize;
    562   int lasty = yi;
    563 
    564   // TODO(fbarchard): Convert first 2 rows of YUV to ARGB.
    565   ScaleARGBFilterCols(rowptr, src_row_y, dst_width, x, dx);
    566   if (src_height > 1) {
    567     src_row_y += src_stride_y;
    568     if (yi & 1) {
    569       src_row_u += src_stride_u;
    570       src_row_v += src_stride_v;
    571     }
    572   }
    573   ScaleARGBFilterCols(rowptr + rowstride, src_row_y, dst_width, x, dx);
    574   if (src_height > 2) {
    575     src_row_y += src_stride_y;
    576     if (!(yi & 1)) {
    577       src_row_u += src_stride_u;
    578       src_row_v += src_stride_v;
    579     }
    580   }
    581 
    582   for (j = 0; j < dst_height; ++j) {
    583     yi = y >> 16;
    584     if (yi != lasty) {
    585       if (y > max_y) {
    586         y = max_y;
    587         yi = y >> 16;
    588         uv_yi = yi >> kYShift;
    589         src_row_y = src_y + yi * src_stride_y;
    590         src_row_u = src_u + uv_yi * src_stride_u;
    591         src_row_v = src_v + uv_yi * src_stride_v;
    592       }
    593       if (yi != lasty) {
    594         // TODO(fbarchard): Convert the clipped region of row.
    595         I422ToARGBRow(src_row_y, src_row_u, src_row_v, argb_row, src_width);
    596         ScaleARGBFilterCols(rowptr, argb_row, dst_width, x, dx);
    597         rowptr += rowstride;
    598         rowstride = -rowstride;
    599         lasty = yi;
    600         src_row_y += src_stride_y;
    601         if (yi & 1) {
    602           src_row_u += src_stride_u;
    603           src_row_v += src_stride_v;
    604         }
    605       }
    606     }
    607     if (filtering == kFilterLinear) {
    608       InterpolateRow(dst_argb, rowptr, 0, dst_width * 4, 0);
    609     } else {
    610       int yf = (y >> 8) & 255;
    611       InterpolateRow(dst_argb, rowptr, rowstride, dst_width * 4, yf);
    612     }
    613     dst_argb += dst_stride_argb;
    614     y += dy;
    615   }
    616   free_aligned_buffer_64(row);
    617   free_aligned_buffer_64(row_argb);
    618 }
    619 #endif
    620 
    621 // Scale ARGB to/from any dimensions, without interpolation.
    622 // Fixed point math is used for performance: The upper 16 bits
    623 // of x and dx is the integer part of the source position and
    624 // the lower 16 bits are the fixed decimal part.
    625 
    626 static void ScaleARGBSimple(int src_width, int src_height,
    627                             int dst_width, int dst_height,
    628                             int src_stride, int dst_stride,
    629                             const uint8* src_argb, uint8* dst_argb,
    630                             int x, int dx, int y, int dy) {
    631   int j;
    632   void (*ScaleARGBCols)(uint8* dst_argb, const uint8* src_argb,
    633       int dst_width, int x, int dx) =
    634       (src_width >= 32768) ? ScaleARGBCols64_C : ScaleARGBCols_C;
    635 #if defined(HAS_SCALEARGBCOLS_SSE2)
    636   if (TestCpuFlag(kCpuHasSSE2) && src_width < 32768) {
    637     ScaleARGBCols = ScaleARGBCols_SSE2;
    638   }
    639 #endif
    640   if (src_width * 2 == dst_width && x < 0x8000) {
    641     ScaleARGBCols = ScaleARGBColsUp2_C;
    642 #if defined(HAS_SCALEARGBCOLSUP2_SSE2)
    643     if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8) &&
    644         IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride, 16) &&
    645         IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) {
    646       ScaleARGBCols = ScaleARGBColsUp2_SSE2;
    647     }
    648 #endif
    649   }
    650 
    651   for (j = 0; j < dst_height; ++j) {
    652     ScaleARGBCols(dst_argb, src_argb + (y >> 16) * src_stride,
    653                   dst_width, x, dx);
    654     dst_argb += dst_stride;
    655     y += dy;
    656   }
    657 }
    658 
    659 // ScaleARGB a ARGB.
    660 // This function in turn calls a scaling function
    661 // suitable for handling the desired resolutions.
    662 static void ScaleARGB(const uint8* src, int src_stride,
    663                       int src_width, int src_height,
    664                       uint8* dst, int dst_stride,
    665                       int dst_width, int dst_height,
    666                       int clip_x, int clip_y, int clip_width, int clip_height,
    667                       enum FilterMode filtering) {
    668   // Initial source x/y coordinate and step values as 16.16 fixed point.
    669   int x = 0;
    670   int y = 0;
    671   int dx = 0;
    672   int dy = 0;
    673   // ARGB does not support box filter yet, but allow the user to pass it.
    674   // Simplify filtering when possible.
    675   filtering = ScaleFilterReduce(src_width, src_height,
    676                                 dst_width, dst_height,
    677                                 filtering);
    678 
    679   // Negative src_height means invert the image.
    680   if (src_height < 0) {
    681     src_height = -src_height;
    682     src = src + (src_height - 1) * src_stride;
    683     src_stride = -src_stride;
    684   }
    685   ScaleSlope(src_width, src_height, dst_width, dst_height, filtering,
    686              &x, &y, &dx, &dy);
    687   src_width = Abs(src_width);
    688   if (clip_x) {
    689     int64 clipf = (int64)(clip_x) * dx;
    690     x += (clipf & 0xffff);
    691     src += (clipf >> 16) * 4;
    692     dst += clip_x * 4;
    693   }
    694   if (clip_y) {
    695     int64 clipf = (int64)(clip_y) * dy;
    696     y += (clipf & 0xffff);
    697     src += (clipf >> 16) * src_stride;
    698     dst += clip_y * dst_stride;
    699   }
    700 
    701   // Special case for integer step values.
    702   if (((dx | dy) & 0xffff) == 0) {
    703     if (!dx || !dy) {  // 1 pixel wide and/or tall.
    704       filtering = kFilterNone;
    705     } else {
    706       // Optimized even scale down. ie 2, 4, 6, 8, 10x.
    707       if (!(dx & 0x10000) && !(dy & 0x10000)) {
    708         if (dx == 0x20000) {
    709           // Optimized 1/2 downsample.
    710           ScaleARGBDown2(src_width, src_height,
    711                          clip_width, clip_height,
    712                          src_stride, dst_stride, src, dst,
    713                          x, dx, y, dy, filtering);
    714           return;
    715         }
    716         if (dx == 0x40000 && filtering == kFilterBox) {
    717           // Optimized 1/4 box downsample.
    718           ScaleARGBDown4Box(src_width, src_height,
    719                             clip_width, clip_height,
    720                             src_stride, dst_stride, src, dst,
    721                             x, dx, y, dy);
    722           return;
    723         }
    724         ScaleARGBDownEven(src_width, src_height,
    725                           clip_width, clip_height,
    726                           src_stride, dst_stride, src, dst,
    727                           x, dx, y, dy, filtering);
    728         return;
    729       }
    730       // Optimized odd scale down. ie 3, 5, 7, 9x.
    731       if ((dx & 0x10000) && (dy & 0x10000)) {
    732         filtering = kFilterNone;
    733         if (dx == 0x10000 && dy == 0x10000) {
    734           // Straight copy.
    735           ARGBCopy(src + (y >> 16) * src_stride + (x >> 16) * 4, src_stride,
    736                    dst, dst_stride, clip_width, clip_height);
    737           return;
    738         }
    739       }
    740     }
    741   }
    742   if (dx == 0x10000 && (x & 0xffff) == 0) {
    743     // Arbitrary scale vertically, but unscaled vertically.
    744     ScalePlaneVertical(src_height,
    745                        clip_width, clip_height,
    746                        src_stride, dst_stride, src, dst,
    747                        x, y, dy, 4, filtering);
    748     return;
    749   }
    750   if (filtering && dy < 65536) {
    751     ScaleARGBBilinearUp(src_width, src_height,
    752                         clip_width, clip_height,
    753                         src_stride, dst_stride, src, dst,
    754                         x, dx, y, dy, filtering);
    755     return;
    756   }
    757   if (filtering) {
    758     ScaleARGBBilinearDown(src_width, src_height,
    759                           clip_width, clip_height,
    760                           src_stride, dst_stride, src, dst,
    761                           x, dx, y, dy, filtering);
    762     return;
    763   }
    764   ScaleARGBSimple(src_width, src_height, clip_width, clip_height,
    765                   src_stride, dst_stride, src, dst,
    766                   x, dx, y, dy);
    767 }
    768 
    769 LIBYUV_API
    770 int ARGBScaleClip(const uint8* src_argb, int src_stride_argb,
    771                   int src_width, int src_height,
    772                   uint8* dst_argb, int dst_stride_argb,
    773                   int dst_width, int dst_height,
    774                   int clip_x, int clip_y, int clip_width, int clip_height,
    775                   enum FilterMode filtering) {
    776   if (!src_argb || src_width == 0 || src_height == 0 ||
    777       !dst_argb || dst_width <= 0 || dst_height <= 0 ||
    778       clip_x < 0 || clip_y < 0 ||
    779       (clip_x + clip_width) > dst_width ||
    780       (clip_y + clip_height) > dst_height) {
    781     return -1;
    782   }
    783   ScaleARGB(src_argb, src_stride_argb, src_width, src_height,
    784             dst_argb, dst_stride_argb, dst_width, dst_height,
    785             clip_x, clip_y, clip_width, clip_height, filtering);
    786   return 0;
    787 }
    788 
    789 // Scale an ARGB image.
    790 LIBYUV_API
    791 int ARGBScale(const uint8* src_argb, int src_stride_argb,
    792               int src_width, int src_height,
    793               uint8* dst_argb, int dst_stride_argb,
    794               int dst_width, int dst_height,
    795               enum FilterMode filtering) {
    796   if (!src_argb || src_width == 0 || src_height == 0 ||
    797       !dst_argb || dst_width <= 0 || dst_height <= 0) {
    798     return -1;
    799   }
    800   ScaleARGB(src_argb, src_stride_argb, src_width, src_height,
    801             dst_argb, dst_stride_argb, dst_width, dst_height,
    802             0, 0, dst_width, dst_height, filtering);
    803   return 0;
    804 }
    805 
    806 #ifdef __cplusplus
    807 }  // extern "C"
    808 }  // namespace libyuv
    809 #endif
    810