Home | History | Annotate | Download | only in source
      1 /*
      2  *  Copyright 2011 The LibYuv Project Authors. All rights reserved.
      3  *
      4  *  Use of this source code is governed by a BSD-style license
      5  *  that can be found in the LICENSE file in the root of the source
      6  *  tree. An additional intellectual property rights grant can be found
      7  *  in the file PATENTS. All contributing project authors may
      8  *  be found in the AUTHORS file in the root of the source tree.
      9  */
     10 
     11 #include "libyuv/scale.h"
     12 
     13 #include <assert.h>
     14 #include <string.h>
     15 
     16 #include "libyuv/cpu_id.h"
     17 #include "libyuv/planar_functions.h"  // For CopyARGB
     18 #include "libyuv/row.h"
     19 #include "libyuv/scale_row.h"
     20 
     21 #ifdef __cplusplus
     22 namespace libyuv {
     23 extern "C" {
     24 #endif
     25 
     26 static __inline int Abs(int v) {
     27   return v >= 0 ? v : -v;
     28 }
     29 
     30 // ScaleARGB ARGB, 1/2
     31 // This is an optimized version for scaling down a ARGB to 1/2 of
     32 // its original size.
     33 static void ScaleARGBDown2(int src_width,
     34                            int src_height,
     35                            int dst_width,
     36                            int dst_height,
     37                            int src_stride,
     38                            int dst_stride,
     39                            const uint8* src_argb,
     40                            uint8* dst_argb,
     41                            int x,
     42                            int dx,
     43                            int y,
     44                            int dy,
     45                            enum FilterMode filtering) {
     46   int j;
     47   int row_stride = src_stride * (dy >> 16);
     48   void (*ScaleARGBRowDown2)(const uint8* src_argb, ptrdiff_t src_stride,
     49                             uint8* dst_argb, int dst_width) =
     50       filtering == kFilterNone
     51           ? ScaleARGBRowDown2_C
     52           : (filtering == kFilterLinear ? ScaleARGBRowDown2Linear_C
     53                                         : ScaleARGBRowDown2Box_C);
     54   (void)src_width;
     55   (void)src_height;
     56   (void)dx;
     57   assert(dx == 65536 * 2);      // Test scale factor of 2.
     58   assert((dy & 0x1ffff) == 0);  // Test vertical scale is multiple of 2.
     59   // Advance to odd row, even column.
     60   if (filtering == kFilterBilinear) {
     61     src_argb += (y >> 16) * src_stride + (x >> 16) * 4;
     62   } else {
     63     src_argb += (y >> 16) * src_stride + ((x >> 16) - 1) * 4;
     64   }
     65 
     66 #if defined(HAS_SCALEARGBROWDOWN2_SSE2)
     67   if (TestCpuFlag(kCpuHasSSE2)) {
     68     ScaleARGBRowDown2 =
     69         filtering == kFilterNone
     70             ? ScaleARGBRowDown2_Any_SSE2
     71             : (filtering == kFilterLinear ? ScaleARGBRowDown2Linear_Any_SSE2
     72                                           : ScaleARGBRowDown2Box_Any_SSE2);
     73     if (IS_ALIGNED(dst_width, 4)) {
     74       ScaleARGBRowDown2 =
     75           filtering == kFilterNone
     76               ? ScaleARGBRowDown2_SSE2
     77               : (filtering == kFilterLinear ? ScaleARGBRowDown2Linear_SSE2
     78                                             : ScaleARGBRowDown2Box_SSE2);
     79     }
     80   }
     81 #endif
     82 #if defined(HAS_SCALEARGBROWDOWN2_NEON)
     83   if (TestCpuFlag(kCpuHasNEON)) {
     84     ScaleARGBRowDown2 =
     85         filtering == kFilterNone
     86             ? ScaleARGBRowDown2_Any_NEON
     87             : (filtering == kFilterLinear ? ScaleARGBRowDown2Linear_Any_NEON
     88                                           : ScaleARGBRowDown2Box_Any_NEON);
     89     if (IS_ALIGNED(dst_width, 8)) {
     90       ScaleARGBRowDown2 =
     91           filtering == kFilterNone
     92               ? ScaleARGBRowDown2_NEON
     93               : (filtering == kFilterLinear ? ScaleARGBRowDown2Linear_NEON
     94                                             : ScaleARGBRowDown2Box_NEON);
     95     }
     96   }
     97 #endif
     98 #if defined(HAS_SCALEARGBROWDOWN2_MSA)
     99   if (TestCpuFlag(kCpuHasMSA)) {
    100     ScaleARGBRowDown2 =
    101         filtering == kFilterNone
    102             ? ScaleARGBRowDown2_Any_MSA
    103             : (filtering == kFilterLinear ? ScaleARGBRowDown2Linear_Any_MSA
    104                                           : ScaleARGBRowDown2Box_Any_MSA);
    105     if (IS_ALIGNED(dst_width, 4)) {
    106       ScaleARGBRowDown2 =
    107           filtering == kFilterNone
    108               ? ScaleARGBRowDown2_MSA
    109               : (filtering == kFilterLinear ? ScaleARGBRowDown2Linear_MSA
    110                                             : ScaleARGBRowDown2Box_MSA);
    111     }
    112   }
    113 #endif
    114 
    115   if (filtering == kFilterLinear) {
    116     src_stride = 0;
    117   }
    118   for (j = 0; j < dst_height; ++j) {
    119     ScaleARGBRowDown2(src_argb, src_stride, dst_argb, dst_width);
    120     src_argb += row_stride;
    121     dst_argb += dst_stride;
    122   }
    123 }
    124 
    125 // ScaleARGB ARGB, 1/4
    126 // This is an optimized version for scaling down a ARGB to 1/4 of
    127 // its original size.
    128 static void ScaleARGBDown4Box(int src_width,
    129                               int src_height,
    130                               int dst_width,
    131                               int dst_height,
    132                               int src_stride,
    133                               int dst_stride,
    134                               const uint8* src_argb,
    135                               uint8* dst_argb,
    136                               int x,
    137                               int dx,
    138                               int y,
    139                               int dy) {
    140   int j;
    141   // Allocate 2 rows of ARGB.
    142   const int kRowSize = (dst_width * 2 * 4 + 31) & ~31;
    143   align_buffer_64(row, kRowSize * 2);
    144   int row_stride = src_stride * (dy >> 16);
    145   void (*ScaleARGBRowDown2)(const uint8* src_argb, ptrdiff_t src_stride,
    146                             uint8* dst_argb, int dst_width) =
    147       ScaleARGBRowDown2Box_C;
    148   // Advance to odd row, even column.
    149   src_argb += (y >> 16) * src_stride + (x >> 16) * 4;
    150   (void)src_width;
    151   (void)src_height;
    152   (void)dx;
    153   assert(dx == 65536 * 4);      // Test scale factor of 4.
    154   assert((dy & 0x3ffff) == 0);  // Test vertical scale is multiple of 4.
    155 #if defined(HAS_SCALEARGBROWDOWN2_SSE2)
    156   if (TestCpuFlag(kCpuHasSSE2)) {
    157     ScaleARGBRowDown2 = ScaleARGBRowDown2Box_Any_SSE2;
    158     if (IS_ALIGNED(dst_width, 4)) {
    159       ScaleARGBRowDown2 = ScaleARGBRowDown2Box_SSE2;
    160     }
    161   }
    162 #endif
    163 #if defined(HAS_SCALEARGBROWDOWN2_NEON)
    164   if (TestCpuFlag(kCpuHasNEON)) {
    165     ScaleARGBRowDown2 = ScaleARGBRowDown2Box_Any_NEON;
    166     if (IS_ALIGNED(dst_width, 8)) {
    167       ScaleARGBRowDown2 = ScaleARGBRowDown2Box_NEON;
    168     }
    169   }
    170 #endif
    171 
    172   for (j = 0; j < dst_height; ++j) {
    173     ScaleARGBRowDown2(src_argb, src_stride, row, dst_width * 2);
    174     ScaleARGBRowDown2(src_argb + src_stride * 2, src_stride, row + kRowSize,
    175                       dst_width * 2);
    176     ScaleARGBRowDown2(row, kRowSize, dst_argb, dst_width);
    177     src_argb += row_stride;
    178     dst_argb += dst_stride;
    179   }
    180   free_aligned_buffer_64(row);
    181 }
    182 
    183 // ScaleARGB ARGB Even
    184 // This is an optimized version for scaling down a ARGB to even
    185 // multiple of its original size.
    186 static void ScaleARGBDownEven(int src_width,
    187                               int src_height,
    188                               int dst_width,
    189                               int dst_height,
    190                               int src_stride,
    191                               int dst_stride,
    192                               const uint8* src_argb,
    193                               uint8* dst_argb,
    194                               int x,
    195                               int dx,
    196                               int y,
    197                               int dy,
    198                               enum FilterMode filtering) {
    199   int j;
    200   int col_step = dx >> 16;
    201   int row_stride = (dy >> 16) * src_stride;
    202   void (*ScaleARGBRowDownEven)(const uint8* src_argb, ptrdiff_t src_stride,
    203                                int src_step, uint8* dst_argb, int dst_width) =
    204       filtering ? ScaleARGBRowDownEvenBox_C : ScaleARGBRowDownEven_C;
    205   (void)src_width;
    206   (void)src_height;
    207   assert(IS_ALIGNED(src_width, 2));
    208   assert(IS_ALIGNED(src_height, 2));
    209   src_argb += (y >> 16) * src_stride + (x >> 16) * 4;
    210 #if defined(HAS_SCALEARGBROWDOWNEVEN_SSE2)
    211   if (TestCpuFlag(kCpuHasSSE2)) {
    212     ScaleARGBRowDownEven = filtering ? ScaleARGBRowDownEvenBox_Any_SSE2
    213                                      : ScaleARGBRowDownEven_Any_SSE2;
    214     if (IS_ALIGNED(dst_width, 4)) {
    215       ScaleARGBRowDownEven =
    216           filtering ? ScaleARGBRowDownEvenBox_SSE2 : ScaleARGBRowDownEven_SSE2;
    217     }
    218   }
    219 #endif
    220 #if defined(HAS_SCALEARGBROWDOWNEVEN_NEON)
    221   if (TestCpuFlag(kCpuHasNEON)) {
    222     ScaleARGBRowDownEven = filtering ? ScaleARGBRowDownEvenBox_Any_NEON
    223                                      : ScaleARGBRowDownEven_Any_NEON;
    224     if (IS_ALIGNED(dst_width, 4)) {
    225       ScaleARGBRowDownEven =
    226           filtering ? ScaleARGBRowDownEvenBox_NEON : ScaleARGBRowDownEven_NEON;
    227     }
    228   }
    229 #endif
    230 #if defined(HAS_SCALEARGBROWDOWNEVEN_MSA)
    231   if (TestCpuFlag(kCpuHasMSA)) {
    232     ScaleARGBRowDownEven = filtering ? ScaleARGBRowDownEvenBox_Any_MSA
    233                                      : ScaleARGBRowDownEven_Any_MSA;
    234     if (IS_ALIGNED(dst_width, 4)) {
    235       ScaleARGBRowDownEven =
    236           filtering ? ScaleARGBRowDownEvenBox_MSA : ScaleARGBRowDownEven_MSA;
    237     }
    238   }
    239 #endif
    240 
    241   if (filtering == kFilterLinear) {
    242     src_stride = 0;
    243   }
    244   for (j = 0; j < dst_height; ++j) {
    245     ScaleARGBRowDownEven(src_argb, src_stride, col_step, dst_argb, dst_width);
    246     src_argb += row_stride;
    247     dst_argb += dst_stride;
    248   }
    249 }
    250 
    251 // Scale ARGB down with bilinear interpolation.
    252 static void ScaleARGBBilinearDown(int src_width,
    253                                   int src_height,
    254                                   int dst_width,
    255                                   int dst_height,
    256                                   int src_stride,
    257                                   int dst_stride,
    258                                   const uint8* src_argb,
    259                                   uint8* dst_argb,
    260                                   int x,
    261                                   int dx,
    262                                   int y,
    263                                   int dy,
    264                                   enum FilterMode filtering) {
    265   int j;
    266   void (*InterpolateRow)(uint8 * dst_argb, const uint8* src_argb,
    267                          ptrdiff_t src_stride, int dst_width,
    268                          int source_y_fraction) = InterpolateRow_C;
    269   void (*ScaleARGBFilterCols)(uint8 * dst_argb, const uint8* src_argb,
    270                               int dst_width, int x, int dx) =
    271       (src_width >= 32768) ? ScaleARGBFilterCols64_C : ScaleARGBFilterCols_C;
    272   int64 xlast = x + (int64)(dst_width - 1) * dx;
    273   int64 xl = (dx >= 0) ? x : xlast;
    274   int64 xr = (dx >= 0) ? xlast : x;
    275   int clip_src_width;
    276   xl = (xl >> 16) & ~3;    // Left edge aligned.
    277   xr = (xr >> 16) + 1;     // Right most pixel used.  Bilinear uses 2 pixels.
    278   xr = (xr + 1 + 3) & ~3;  // 1 beyond 4 pixel aligned right most pixel.
    279   if (xr > src_width) {
    280     xr = src_width;
    281   }
    282   clip_src_width = (int)(xr - xl) * 4;  // Width aligned to 4.
    283   src_argb += xl * 4;
    284   x -= (int)(xl << 16);
    285 #if defined(HAS_INTERPOLATEROW_SSSE3)
    286   if (TestCpuFlag(kCpuHasSSSE3)) {
    287     InterpolateRow = InterpolateRow_Any_SSSE3;
    288     if (IS_ALIGNED(clip_src_width, 16)) {
    289       InterpolateRow = InterpolateRow_SSSE3;
    290     }
    291   }
    292 #endif
    293 #if defined(HAS_INTERPOLATEROW_AVX2)
    294   if (TestCpuFlag(kCpuHasAVX2)) {
    295     InterpolateRow = InterpolateRow_Any_AVX2;
    296     if (IS_ALIGNED(clip_src_width, 32)) {
    297       InterpolateRow = InterpolateRow_AVX2;
    298     }
    299   }
    300 #endif
    301 #if defined(HAS_INTERPOLATEROW_NEON)
    302   if (TestCpuFlag(kCpuHasNEON)) {
    303     InterpolateRow = InterpolateRow_Any_NEON;
    304     if (IS_ALIGNED(clip_src_width, 16)) {
    305       InterpolateRow = InterpolateRow_NEON;
    306     }
    307   }
    308 #endif
    309 #if defined(HAS_INTERPOLATEROW_DSPR2)
    310   if (TestCpuFlag(kCpuHasDSPR2) && IS_ALIGNED(src_argb, 4) &&
    311       IS_ALIGNED(src_stride, 4)) {
    312     InterpolateRow = InterpolateRow_Any_DSPR2;
    313     if (IS_ALIGNED(clip_src_width, 4)) {
    314       InterpolateRow = InterpolateRow_DSPR2;
    315     }
    316   }
    317 #endif
    318 #if defined(HAS_INTERPOLATEROW_MSA)
    319   if (TestCpuFlag(kCpuHasMSA)) {
    320     InterpolateRow = InterpolateRow_Any_MSA;
    321     if (IS_ALIGNED(clip_src_width, 32)) {
    322       InterpolateRow = InterpolateRow_MSA;
    323     }
    324   }
    325 #endif
    326 #if defined(HAS_SCALEARGBFILTERCOLS_SSSE3)
    327   if (TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
    328     ScaleARGBFilterCols = ScaleARGBFilterCols_SSSE3;
    329   }
    330 #endif
    331 #if defined(HAS_SCALEARGBFILTERCOLS_NEON)
    332   if (TestCpuFlag(kCpuHasNEON)) {
    333     ScaleARGBFilterCols = ScaleARGBFilterCols_Any_NEON;
    334     if (IS_ALIGNED(dst_width, 4)) {
    335       ScaleARGBFilterCols = ScaleARGBFilterCols_NEON;
    336     }
    337   }
    338 #endif
    339   // TODO(fbarchard): Consider not allocating row buffer for kFilterLinear.
    340   // Allocate a row of ARGB.
    341   {
    342     align_buffer_64(row, clip_src_width * 4);
    343 
    344     const int max_y = (src_height - 1) << 16;
    345     if (y > max_y) {
    346       y = max_y;
    347     }
    348     for (j = 0; j < dst_height; ++j) {
    349       int yi = y >> 16;
    350       const uint8* src = src_argb + yi * src_stride;
    351       if (filtering == kFilterLinear) {
    352         ScaleARGBFilterCols(dst_argb, src, dst_width, x, dx);
    353       } else {
    354         int yf = (y >> 8) & 255;
    355         InterpolateRow(row, src, src_stride, clip_src_width, yf);
    356         ScaleARGBFilterCols(dst_argb, row, dst_width, x, dx);
    357       }
    358       dst_argb += dst_stride;
    359       y += dy;
    360       if (y > max_y) {
    361         y = max_y;
    362       }
    363     }
    364     free_aligned_buffer_64(row);
    365   }
    366 }
    367 
    368 // Scale ARGB up with bilinear interpolation.
    369 static void ScaleARGBBilinearUp(int src_width,
    370                                 int src_height,
    371                                 int dst_width,
    372                                 int dst_height,
    373                                 int src_stride,
    374                                 int dst_stride,
    375                                 const uint8* src_argb,
    376                                 uint8* dst_argb,
    377                                 int x,
    378                                 int dx,
    379                                 int y,
    380                                 int dy,
    381                                 enum FilterMode filtering) {
    382   int j;
    383   void (*InterpolateRow)(uint8 * dst_argb, const uint8* src_argb,
    384                          ptrdiff_t src_stride, int dst_width,
    385                          int source_y_fraction) = InterpolateRow_C;
    386   void (*ScaleARGBFilterCols)(uint8 * dst_argb, const uint8* src_argb,
    387                               int dst_width, int x, int dx) =
    388       filtering ? ScaleARGBFilterCols_C : ScaleARGBCols_C;
    389   const int max_y = (src_height - 1) << 16;
    390 #if defined(HAS_INTERPOLATEROW_SSSE3)
    391   if (TestCpuFlag(kCpuHasSSSE3)) {
    392     InterpolateRow = InterpolateRow_Any_SSSE3;
    393     if (IS_ALIGNED(dst_width, 4)) {
    394       InterpolateRow = InterpolateRow_SSSE3;
    395     }
    396   }
    397 #endif
    398 #if defined(HAS_INTERPOLATEROW_AVX2)
    399   if (TestCpuFlag(kCpuHasAVX2)) {
    400     InterpolateRow = InterpolateRow_Any_AVX2;
    401     if (IS_ALIGNED(dst_width, 8)) {
    402       InterpolateRow = InterpolateRow_AVX2;
    403     }
    404   }
    405 #endif
    406 #if defined(HAS_INTERPOLATEROW_NEON)
    407   if (TestCpuFlag(kCpuHasNEON)) {
    408     InterpolateRow = InterpolateRow_Any_NEON;
    409     if (IS_ALIGNED(dst_width, 4)) {
    410       InterpolateRow = InterpolateRow_NEON;
    411     }
    412   }
    413 #endif
    414 #if defined(HAS_INTERPOLATEROW_DSPR2)
    415   if (TestCpuFlag(kCpuHasDSPR2) && IS_ALIGNED(dst_argb, 4) &&
    416       IS_ALIGNED(dst_stride, 4)) {
    417     InterpolateRow = InterpolateRow_DSPR2;
    418   }
    419 #endif
    420 #if defined(HAS_INTERPOLATEROW_MSA)
    421   if (TestCpuFlag(kCpuHasMSA)) {
    422     InterpolateRow = InterpolateRow_Any_MSA;
    423     if (IS_ALIGNED(dst_width, 8)) {
    424       InterpolateRow = InterpolateRow_MSA;
    425     }
    426   }
    427 #endif
    428   if (src_width >= 32768) {
    429     ScaleARGBFilterCols =
    430         filtering ? ScaleARGBFilterCols64_C : ScaleARGBCols64_C;
    431   }
    432 #if defined(HAS_SCALEARGBFILTERCOLS_SSSE3)
    433   if (filtering && TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
    434     ScaleARGBFilterCols = ScaleARGBFilterCols_SSSE3;
    435   }
    436 #endif
    437 #if defined(HAS_SCALEARGBFILTERCOLS_NEON)
    438   if (filtering && TestCpuFlag(kCpuHasNEON)) {
    439     ScaleARGBFilterCols = ScaleARGBFilterCols_Any_NEON;
    440     if (IS_ALIGNED(dst_width, 4)) {
    441       ScaleARGBFilterCols = ScaleARGBFilterCols_NEON;
    442     }
    443   }
    444 #endif
    445 #if defined(HAS_SCALEARGBCOLS_SSE2)
    446   if (!filtering && TestCpuFlag(kCpuHasSSE2) && src_width < 32768) {
    447     ScaleARGBFilterCols = ScaleARGBCols_SSE2;
    448   }
    449 #endif
    450 #if defined(HAS_SCALEARGBCOLS_NEON)
    451   if (!filtering && TestCpuFlag(kCpuHasNEON)) {
    452     ScaleARGBFilterCols = ScaleARGBCols_Any_NEON;
    453     if (IS_ALIGNED(dst_width, 8)) {
    454       ScaleARGBFilterCols = ScaleARGBCols_NEON;
    455     }
    456   }
    457 #endif
    458   if (!filtering && src_width * 2 == dst_width && x < 0x8000) {
    459     ScaleARGBFilterCols = ScaleARGBColsUp2_C;
    460 #if defined(HAS_SCALEARGBCOLSUP2_SSE2)
    461     if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) {
    462       ScaleARGBFilterCols = ScaleARGBColsUp2_SSE2;
    463     }
    464 #endif
    465   }
    466 
    467   if (y > max_y) {
    468     y = max_y;
    469   }
    470 
    471   {
    472     int yi = y >> 16;
    473     const uint8* src = src_argb + yi * src_stride;
    474 
    475     // Allocate 2 rows of ARGB.
    476     const int kRowSize = (dst_width * 4 + 31) & ~31;
    477     align_buffer_64(row, kRowSize * 2);
    478 
    479     uint8* rowptr = row;
    480     int rowstride = kRowSize;
    481     int lasty = yi;
    482 
    483     ScaleARGBFilterCols(rowptr, src, dst_width, x, dx);
    484     if (src_height > 1) {
    485       src += src_stride;
    486     }
    487     ScaleARGBFilterCols(rowptr + rowstride, src, dst_width, x, dx);
    488     src += src_stride;
    489 
    490     for (j = 0; j < dst_height; ++j) {
    491       yi = y >> 16;
    492       if (yi != lasty) {
    493         if (y > max_y) {
    494           y = max_y;
    495           yi = y >> 16;
    496           src = src_argb + yi * src_stride;
    497         }
    498         if (yi != lasty) {
    499           ScaleARGBFilterCols(rowptr, src, dst_width, x, dx);
    500           rowptr += rowstride;
    501           rowstride = -rowstride;
    502           lasty = yi;
    503           src += src_stride;
    504         }
    505       }
    506       if (filtering == kFilterLinear) {
    507         InterpolateRow(dst_argb, rowptr, 0, dst_width * 4, 0);
    508       } else {
    509         int yf = (y >> 8) & 255;
    510         InterpolateRow(dst_argb, rowptr, rowstride, dst_width * 4, yf);
    511       }
    512       dst_argb += dst_stride;
    513       y += dy;
    514     }
    515     free_aligned_buffer_64(row);
    516   }
    517 }
    518 
    519 #ifdef YUVSCALEUP
    520 // Scale YUV to ARGB up with bilinear interpolation.
    521 static void ScaleYUVToARGBBilinearUp(int src_width,
    522                                      int src_height,
    523                                      int dst_width,
    524                                      int dst_height,
    525                                      int src_stride_y,
    526                                      int src_stride_u,
    527                                      int src_stride_v,
    528                                      int dst_stride_argb,
    529                                      const uint8* src_y,
    530                                      const uint8* src_u,
    531                                      const uint8* src_v,
    532                                      uint8* dst_argb,
    533                                      int x,
    534                                      int dx,
    535                                      int y,
    536                                      int dy,
    537                                      enum FilterMode filtering) {
    538   int j;
    539   void (*I422ToARGBRow)(const uint8* y_buf, const uint8* u_buf,
    540                         const uint8* v_buf, uint8* rgb_buf, int width) =
    541       I422ToARGBRow_C;
    542 #if defined(HAS_I422TOARGBROW_SSSE3)
    543   if (TestCpuFlag(kCpuHasSSSE3)) {
    544     I422ToARGBRow = I422ToARGBRow_Any_SSSE3;
    545     if (IS_ALIGNED(src_width, 8)) {
    546       I422ToARGBRow = I422ToARGBRow_SSSE3;
    547     }
    548   }
    549 #endif
    550 #if defined(HAS_I422TOARGBROW_AVX2)
    551   if (TestCpuFlag(kCpuHasAVX2)) {
    552     I422ToARGBRow = I422ToARGBRow_Any_AVX2;
    553     if (IS_ALIGNED(src_width, 16)) {
    554       I422ToARGBRow = I422ToARGBRow_AVX2;
    555     }
    556   }
    557 #endif
    558 #if defined(HAS_I422TOARGBROW_NEON)
    559   if (TestCpuFlag(kCpuHasNEON)) {
    560     I422ToARGBRow = I422ToARGBRow_Any_NEON;
    561     if (IS_ALIGNED(src_width, 8)) {
    562       I422ToARGBRow = I422ToARGBRow_NEON;
    563     }
    564   }
    565 #endif
    566 #if defined(HAS_I422TOARGBROW_DSPR2)
    567   if (TestCpuFlag(kCpuHasDSPR2) && IS_ALIGNED(src_width, 4) &&
    568       IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) &&
    569       IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) &&
    570       IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2) &&
    571       IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride_argb, 4)) {
    572     I422ToARGBRow = I422ToARGBRow_DSPR2;
    573   }
    574 #endif
    575 #if defined(HAS_I422TOARGBROW_MSA)
    576   if (TestCpuFlag(kCpuHasMSA)) {
    577     I422ToARGBRow = I422ToARGBRow_Any_MSA;
    578     if (IS_ALIGNED(src_width, 8)) {
    579       I422ToARGBRow = I422ToARGBRow_MSA;
    580     }
    581   }
    582 #endif
    583 
    584   void (*InterpolateRow)(uint8 * dst_argb, const uint8* src_argb,
    585                          ptrdiff_t src_stride, int dst_width,
    586                          int source_y_fraction) = InterpolateRow_C;
    587 #if defined(HAS_INTERPOLATEROW_SSSE3)
    588   if (TestCpuFlag(kCpuHasSSSE3)) {
    589     InterpolateRow = InterpolateRow_Any_SSSE3;
    590     if (IS_ALIGNED(dst_width, 4)) {
    591       InterpolateRow = InterpolateRow_SSSE3;
    592     }
    593   }
    594 #endif
    595 #if defined(HAS_INTERPOLATEROW_AVX2)
    596   if (TestCpuFlag(kCpuHasAVX2)) {
    597     InterpolateRow = InterpolateRow_Any_AVX2;
    598     if (IS_ALIGNED(dst_width, 8)) {
    599       InterpolateRow = InterpolateRow_AVX2;
    600     }
    601   }
    602 #endif
    603 #if defined(HAS_INTERPOLATEROW_NEON)
    604   if (TestCpuFlag(kCpuHasNEON)) {
    605     InterpolateRow = InterpolateRow_Any_NEON;
    606     if (IS_ALIGNED(dst_width, 4)) {
    607       InterpolateRow = InterpolateRow_NEON;
    608     }
    609   }
    610 #endif
    611 #if defined(HAS_INTERPOLATEROW_DSPR2)
    612   if (TestCpuFlag(kCpuHasDSPR2) && IS_ALIGNED(dst_argb, 4) &&
    613       IS_ALIGNED(dst_stride_argb, 4)) {
    614     InterpolateRow = InterpolateRow_DSPR2;
    615   }
    616 #endif
    617 #if defined(HAS_INTERPOLATEROW_MSA)
    618   if (TestCpuFlag(kCpuHasMSA)) {
    619     InterpolateRow = InterpolateRow_Any_MSA;
    620     if (IS_ALIGNED(dst_width, 8)) {
    621       InterpolateRow = InterpolateRow_MSA;
    622     }
    623   }
    624 #endif
    625 
    626   void (*ScaleARGBFilterCols)(uint8 * dst_argb, const uint8* src_argb,
    627                               int dst_width, int x, int dx) =
    628       filtering ? ScaleARGBFilterCols_C : ScaleARGBCols_C;
    629   if (src_width >= 32768) {
    630     ScaleARGBFilterCols =
    631         filtering ? ScaleARGBFilterCols64_C : ScaleARGBCols64_C;
    632   }
    633 #if defined(HAS_SCALEARGBFILTERCOLS_SSSE3)
    634   if (filtering && TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
    635     ScaleARGBFilterCols = ScaleARGBFilterCols_SSSE3;
    636   }
    637 #endif
    638 #if defined(HAS_SCALEARGBFILTERCOLS_NEON)
    639   if (filtering && TestCpuFlag(kCpuHasNEON)) {
    640     ScaleARGBFilterCols = ScaleARGBFilterCols_Any_NEON;
    641     if (IS_ALIGNED(dst_width, 4)) {
    642       ScaleARGBFilterCols = ScaleARGBFilterCols_NEON;
    643     }
    644   }
    645 #endif
    646 #if defined(HAS_SCALEARGBCOLS_SSE2)
    647   if (!filtering && TestCpuFlag(kCpuHasSSE2) && src_width < 32768) {
    648     ScaleARGBFilterCols = ScaleARGBCols_SSE2;
    649   }
    650 #endif
    651 #if defined(HAS_SCALEARGBCOLS_NEON)
    652   if (!filtering && TestCpuFlag(kCpuHasNEON)) {
    653     ScaleARGBFilterCols = ScaleARGBCols_Any_NEON;
    654     if (IS_ALIGNED(dst_width, 8)) {
    655       ScaleARGBFilterCols = ScaleARGBCols_NEON;
    656     }
    657   }
    658 #endif
    659   if (!filtering && src_width * 2 == dst_width && x < 0x8000) {
    660     ScaleARGBFilterCols = ScaleARGBColsUp2_C;
    661 #if defined(HAS_SCALEARGBCOLSUP2_SSE2)
    662     if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) {
    663       ScaleARGBFilterCols = ScaleARGBColsUp2_SSE2;
    664     }
    665 #endif
    666   }
    667 
    668   const int max_y = (src_height - 1) << 16;
    669   if (y > max_y) {
    670     y = max_y;
    671   }
    672   const int kYShift = 1;  // Shift Y by 1 to convert Y plane to UV coordinate.
    673   int yi = y >> 16;
    674   int uv_yi = yi >> kYShift;
    675   const uint8* src_row_y = src_y + yi * src_stride_y;
    676   const uint8* src_row_u = src_u + uv_yi * src_stride_u;
    677   const uint8* src_row_v = src_v + uv_yi * src_stride_v;
    678 
    679   // Allocate 2 rows of ARGB.
    680   const int kRowSize = (dst_width * 4 + 31) & ~31;
    681   align_buffer_64(row, kRowSize * 2);
    682 
    683   // Allocate 1 row of ARGB for source conversion.
    684   align_buffer_64(argb_row, src_width * 4);
    685 
    686   uint8* rowptr = row;
    687   int rowstride = kRowSize;
    688   int lasty = yi;
    689 
    690   // TODO(fbarchard): Convert first 2 rows of YUV to ARGB.
    691   ScaleARGBFilterCols(rowptr, src_row_y, dst_width, x, dx);
    692   if (src_height > 1) {
    693     src_row_y += src_stride_y;
    694     if (yi & 1) {
    695       src_row_u += src_stride_u;
    696       src_row_v += src_stride_v;
    697     }
    698   }
    699   ScaleARGBFilterCols(rowptr + rowstride, src_row_y, dst_width, x, dx);
    700   if (src_height > 2) {
    701     src_row_y += src_stride_y;
    702     if (!(yi & 1)) {
    703       src_row_u += src_stride_u;
    704       src_row_v += src_stride_v;
    705     }
    706   }
    707 
    708   for (j = 0; j < dst_height; ++j) {
    709     yi = y >> 16;
    710     if (yi != lasty) {
    711       if (y > max_y) {
    712         y = max_y;
    713         yi = y >> 16;
    714         uv_yi = yi >> kYShift;
    715         src_row_y = src_y + yi * src_stride_y;
    716         src_row_u = src_u + uv_yi * src_stride_u;
    717         src_row_v = src_v + uv_yi * src_stride_v;
    718       }
    719       if (yi != lasty) {
    720         // TODO(fbarchard): Convert the clipped region of row.
    721         I422ToARGBRow(src_row_y, src_row_u, src_row_v, argb_row, src_width);
    722         ScaleARGBFilterCols(rowptr, argb_row, dst_width, x, dx);
    723         rowptr += rowstride;
    724         rowstride = -rowstride;
    725         lasty = yi;
    726         src_row_y += src_stride_y;
    727         if (yi & 1) {
    728           src_row_u += src_stride_u;
    729           src_row_v += src_stride_v;
    730         }
    731       }
    732     }
    733     if (filtering == kFilterLinear) {
    734       InterpolateRow(dst_argb, rowptr, 0, dst_width * 4, 0);
    735     } else {
    736       int yf = (y >> 8) & 255;
    737       InterpolateRow(dst_argb, rowptr, rowstride, dst_width * 4, yf);
    738     }
    739     dst_argb += dst_stride_argb;
    740     y += dy;
    741   }
    742   free_aligned_buffer_64(row);
    743   free_aligned_buffer_64(row_argb);
    744 }
    745 #endif
    746 
    747 // Scale ARGB to/from any dimensions, without interpolation.
    748 // Fixed point math is used for performance: The upper 16 bits
    749 // of x and dx is the integer part of the source position and
    750 // the lower 16 bits are the fixed decimal part.
    751 
    752 static void ScaleARGBSimple(int src_width,
    753                             int src_height,
    754                             int dst_width,
    755                             int dst_height,
    756                             int src_stride,
    757                             int dst_stride,
    758                             const uint8* src_argb,
    759                             uint8* dst_argb,
    760                             int x,
    761                             int dx,
    762                             int y,
    763                             int dy) {
    764   int j;
    765   void (*ScaleARGBCols)(uint8 * dst_argb, const uint8* src_argb, int dst_width,
    766                         int x, int dx) =
    767       (src_width >= 32768) ? ScaleARGBCols64_C : ScaleARGBCols_C;
    768   (void)src_height;
    769 #if defined(HAS_SCALEARGBCOLS_SSE2)
    770   if (TestCpuFlag(kCpuHasSSE2) && src_width < 32768) {
    771     ScaleARGBCols = ScaleARGBCols_SSE2;
    772   }
    773 #endif
    774 #if defined(HAS_SCALEARGBCOLS_NEON)
    775   if (TestCpuFlag(kCpuHasNEON)) {
    776     ScaleARGBCols = ScaleARGBCols_Any_NEON;
    777     if (IS_ALIGNED(dst_width, 8)) {
    778       ScaleARGBCols = ScaleARGBCols_NEON;
    779     }
    780   }
    781 #endif
    782   if (src_width * 2 == dst_width && x < 0x8000) {
    783     ScaleARGBCols = ScaleARGBColsUp2_C;
    784 #if defined(HAS_SCALEARGBCOLSUP2_SSE2)
    785     if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) {
    786       ScaleARGBCols = ScaleARGBColsUp2_SSE2;
    787     }
    788 #endif
    789   }
    790 
    791   for (j = 0; j < dst_height; ++j) {
    792     ScaleARGBCols(dst_argb, src_argb + (y >> 16) * src_stride, dst_width, x,
    793                   dx);
    794     dst_argb += dst_stride;
    795     y += dy;
    796   }
    797 }
    798 
    799 // ScaleARGB a ARGB.
    800 // This function in turn calls a scaling function
    801 // suitable for handling the desired resolutions.
    802 static void ScaleARGB(const uint8* src,
    803                       int src_stride,
    804                       int src_width,
    805                       int src_height,
    806                       uint8* dst,
    807                       int dst_stride,
    808                       int dst_width,
    809                       int dst_height,
    810                       int clip_x,
    811                       int clip_y,
    812                       int clip_width,
    813                       int clip_height,
    814                       enum FilterMode filtering) {
    815   // Initial source x/y coordinate and step values as 16.16 fixed point.
    816   int x = 0;
    817   int y = 0;
    818   int dx = 0;
    819   int dy = 0;
    820   // ARGB does not support box filter yet, but allow the user to pass it.
    821   // Simplify filtering when possible.
    822   filtering = ScaleFilterReduce(src_width, src_height, dst_width, dst_height,
    823                                 filtering);
    824 
    825   // Negative src_height means invert the image.
    826   if (src_height < 0) {
    827     src_height = -src_height;
    828     src = src + (src_height - 1) * src_stride;
    829     src_stride = -src_stride;
    830   }
    831   ScaleSlope(src_width, src_height, dst_width, dst_height, filtering, &x, &y,
    832              &dx, &dy);
    833   src_width = Abs(src_width);
    834   if (clip_x) {
    835     int64 clipf = (int64)(clip_x)*dx;
    836     x += (clipf & 0xffff);
    837     src += (clipf >> 16) * 4;
    838     dst += clip_x * 4;
    839   }
    840   if (clip_y) {
    841     int64 clipf = (int64)(clip_y)*dy;
    842     y += (clipf & 0xffff);
    843     src += (clipf >> 16) * src_stride;
    844     dst += clip_y * dst_stride;
    845   }
    846 
    847   // Special case for integer step values.
    848   if (((dx | dy) & 0xffff) == 0) {
    849     if (!dx || !dy) {  // 1 pixel wide and/or tall.
    850       filtering = kFilterNone;
    851     } else {
    852       // Optimized even scale down. ie 2, 4, 6, 8, 10x.
    853       if (!(dx & 0x10000) && !(dy & 0x10000)) {
    854         if (dx == 0x20000) {
    855           // Optimized 1/2 downsample.
    856           ScaleARGBDown2(src_width, src_height, clip_width, clip_height,
    857                          src_stride, dst_stride, src, dst, x, dx, y, dy,
    858                          filtering);
    859           return;
    860         }
    861         if (dx == 0x40000 && filtering == kFilterBox) {
    862           // Optimized 1/4 box downsample.
    863           ScaleARGBDown4Box(src_width, src_height, clip_width, clip_height,
    864                             src_stride, dst_stride, src, dst, x, dx, y, dy);
    865           return;
    866         }
    867         ScaleARGBDownEven(src_width, src_height, clip_width, clip_height,
    868                           src_stride, dst_stride, src, dst, x, dx, y, dy,
    869                           filtering);
    870         return;
    871       }
    872       // Optimized odd scale down. ie 3, 5, 7, 9x.
    873       if ((dx & 0x10000) && (dy & 0x10000)) {
    874         filtering = kFilterNone;
    875         if (dx == 0x10000 && dy == 0x10000) {
    876           // Straight copy.
    877           ARGBCopy(src + (y >> 16) * src_stride + (x >> 16) * 4, src_stride,
    878                    dst, dst_stride, clip_width, clip_height);
    879           return;
    880         }
    881       }
    882     }
    883   }
    884   if (dx == 0x10000 && (x & 0xffff) == 0) {
    885     // Arbitrary scale vertically, but unscaled vertically.
    886     ScalePlaneVertical(src_height, clip_width, clip_height, src_stride,
    887                        dst_stride, src, dst, x, y, dy, 4, filtering);
    888     return;
    889   }
    890   if (filtering && dy < 65536) {
    891     ScaleARGBBilinearUp(src_width, src_height, clip_width, clip_height,
    892                         src_stride, dst_stride, src, dst, x, dx, y, dy,
    893                         filtering);
    894     return;
    895   }
    896   if (filtering) {
    897     ScaleARGBBilinearDown(src_width, src_height, clip_width, clip_height,
    898                           src_stride, dst_stride, src, dst, x, dx, y, dy,
    899                           filtering);
    900     return;
    901   }
    902   ScaleARGBSimple(src_width, src_height, clip_width, clip_height, src_stride,
    903                   dst_stride, src, dst, x, dx, y, dy);
    904 }
    905 
    906 LIBYUV_API
    907 int ARGBScaleClip(const uint8* src_argb,
    908                   int src_stride_argb,
    909                   int src_width,
    910                   int src_height,
    911                   uint8* dst_argb,
    912                   int dst_stride_argb,
    913                   int dst_width,
    914                   int dst_height,
    915                   int clip_x,
    916                   int clip_y,
    917                   int clip_width,
    918                   int clip_height,
    919                   enum FilterMode filtering) {
    920   if (!src_argb || src_width == 0 || src_height == 0 || !dst_argb ||
    921       dst_width <= 0 || dst_height <= 0 || clip_x < 0 || clip_y < 0 ||
    922       clip_width > 32768 || clip_height > 32768 ||
    923       (clip_x + clip_width) > dst_width ||
    924       (clip_y + clip_height) > dst_height) {
    925     return -1;
    926   }
    927   ScaleARGB(src_argb, src_stride_argb, src_width, src_height, dst_argb,
    928             dst_stride_argb, dst_width, dst_height, clip_x, clip_y, clip_width,
    929             clip_height, filtering);
    930   return 0;
    931 }
    932 
    933 // Scale an ARGB image.
    934 LIBYUV_API
    935 int ARGBScale(const uint8* src_argb,
    936               int src_stride_argb,
    937               int src_width,
    938               int src_height,
    939               uint8* dst_argb,
    940               int dst_stride_argb,
    941               int dst_width,
    942               int dst_height,
    943               enum FilterMode filtering) {
    944   if (!src_argb || src_width == 0 || src_height == 0 || src_width > 32768 ||
    945       src_height > 32768 || !dst_argb || dst_width <= 0 || dst_height <= 0) {
    946     return -1;
    947   }
    948   ScaleARGB(src_argb, src_stride_argb, src_width, src_height, dst_argb,
    949             dst_stride_argb, dst_width, dst_height, 0, 0, dst_width, dst_height,
    950             filtering);
    951   return 0;
    952 }
    953 
    954 // Scale with YUV conversion to ARGB and clipping.
    955 LIBYUV_API
    956 int YUVToARGBScaleClip(const uint8* src_y,
    957                        int src_stride_y,
    958                        const uint8* src_u,
    959                        int src_stride_u,
    960                        const uint8* src_v,
    961                        int src_stride_v,
    962                        uint32 src_fourcc,
    963                        int src_width,
    964                        int src_height,
    965                        uint8* dst_argb,
    966                        int dst_stride_argb,
    967                        uint32 dst_fourcc,
    968                        int dst_width,
    969                        int dst_height,
    970                        int clip_x,
    971                        int clip_y,
    972                        int clip_width,
    973                        int clip_height,
    974                        enum FilterMode filtering) {
    975   uint8* argb_buffer = (uint8*)malloc(src_width * src_height * 4);
    976   int r;
    977   (void)src_fourcc;  // TODO(fbarchard): implement and/or assert.
    978   (void)dst_fourcc;
    979   I420ToARGB(src_y, src_stride_y, src_u, src_stride_u, src_v, src_stride_v,
    980              argb_buffer, src_width * 4, src_width, src_height);
    981 
    982   r = ARGBScaleClip(argb_buffer, src_width * 4, src_width, src_height, dst_argb,
    983                     dst_stride_argb, dst_width, dst_height, clip_x, clip_y,
    984                     clip_width, clip_height, filtering);
    985   free(argb_buffer);
    986   return r;
    987 }
    988 
    989 #ifdef __cplusplus
    990 }  // extern "C"
    991 }  // namespace libyuv
    992 #endif
    993