Home | History | Annotate | Download | only in source
      1 /*
      2  *  Copyright 2011 The LibYuv Project Authors. All rights reserved.
      3  *
      4  *  Use of this source code is governed by a BSD-style license
      5  *  that can be found in the LICENSE file in the root of the source
      6  *  tree. An additional intellectual property rights grant can be found
      7  *  in the file PATENTS. All contributing project authors may
      8  *  be found in the AUTHORS file in the root of the source tree.
      9  */
     10 
     11 #include "libyuv/planar_functions.h"
     12 
     13 #include <string.h>  // for memset()
     14 
     15 #include "libyuv/cpu_id.h"
     16 #ifdef HAVE_JPEG
     17 #include "libyuv/mjpeg_decoder.h"
     18 #endif
     19 #include "libyuv/row.h"
     20 #include "libyuv/scale_row.h"  // for ScaleRowDown2
     21 
     22 #ifdef __cplusplus
     23 namespace libyuv {
     24 extern "C" {
     25 #endif
     26 
     27 // Copy a plane of data
     28 LIBYUV_API
     29 void CopyPlane(const uint8* src_y,
     30                int src_stride_y,
     31                uint8* dst_y,
     32                int dst_stride_y,
     33                int width,
     34                int height) {
     35   int y;
     36   void (*CopyRow)(const uint8* src, uint8* dst, int width) = CopyRow_C;
     37   // Negative height means invert the image.
     38   if (height < 0) {
     39     height = -height;
     40     dst_y = dst_y + (height - 1) * dst_stride_y;
     41     dst_stride_y = -dst_stride_y;
     42   }
     43   // Coalesce rows.
     44   if (src_stride_y == width && dst_stride_y == width) {
     45     width *= height;
     46     height = 1;
     47     src_stride_y = dst_stride_y = 0;
     48   }
     49   // Nothing to do.
     50   if (src_y == dst_y && src_stride_y == dst_stride_y) {
     51     return;
     52   }
     53 #if defined(HAS_COPYROW_SSE2)
     54   if (TestCpuFlag(kCpuHasSSE2)) {
     55     CopyRow = IS_ALIGNED(width, 32) ? CopyRow_SSE2 : CopyRow_Any_SSE2;
     56   }
     57 #endif
     58 #if defined(HAS_COPYROW_AVX)
     59   if (TestCpuFlag(kCpuHasAVX)) {
     60     CopyRow = IS_ALIGNED(width, 64) ? CopyRow_AVX : CopyRow_Any_AVX;
     61   }
     62 #endif
     63 #if defined(HAS_COPYROW_ERMS)
     64   if (TestCpuFlag(kCpuHasERMS)) {
     65     CopyRow = CopyRow_ERMS;
     66   }
     67 #endif
     68 #if defined(HAS_COPYROW_NEON)
     69   if (TestCpuFlag(kCpuHasNEON)) {
     70     CopyRow = IS_ALIGNED(width, 32) ? CopyRow_NEON : CopyRow_Any_NEON;
     71   }
     72 #endif
     73 #if defined(HAS_COPYROW_MIPS)
     74   if (TestCpuFlag(kCpuHasMIPS)) {
     75     CopyRow = CopyRow_MIPS;
     76   }
     77 #endif
     78 
     79   // Copy plane
     80   for (y = 0; y < height; ++y) {
     81     CopyRow(src_y, dst_y, width);
     82     src_y += src_stride_y;
     83     dst_y += dst_stride_y;
     84   }
     85 }
     86 
     87 // TODO(fbarchard): Consider support for negative height.
     88 // TODO(fbarchard): Consider stride measured in bytes.
     89 LIBYUV_API
     90 void CopyPlane_16(const uint16* src_y,
     91                   int src_stride_y,
     92                   uint16* dst_y,
     93                   int dst_stride_y,
     94                   int width,
     95                   int height) {
     96   int y;
     97   void (*CopyRow)(const uint16* src, uint16* dst, int width) = CopyRow_16_C;
     98   // Coalesce rows.
     99   if (src_stride_y == width && dst_stride_y == width) {
    100     width *= height;
    101     height = 1;
    102     src_stride_y = dst_stride_y = 0;
    103   }
    104 #if defined(HAS_COPYROW_16_SSE2)
    105   if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 32)) {
    106     CopyRow = CopyRow_16_SSE2;
    107   }
    108 #endif
    109 #if defined(HAS_COPYROW_16_ERMS)
    110   if (TestCpuFlag(kCpuHasERMS)) {
    111     CopyRow = CopyRow_16_ERMS;
    112   }
    113 #endif
    114 #if defined(HAS_COPYROW_16_NEON)
    115   if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 32)) {
    116     CopyRow = CopyRow_16_NEON;
    117   }
    118 #endif
    119 #if defined(HAS_COPYROW_16_MIPS)
    120   if (TestCpuFlag(kCpuHasMIPS)) {
    121     CopyRow = CopyRow_16_MIPS;
    122   }
    123 #endif
    124 
    125   // Copy plane
    126   for (y = 0; y < height; ++y) {
    127     CopyRow(src_y, dst_y, width);
    128     src_y += src_stride_y;
    129     dst_y += dst_stride_y;
    130   }
    131 }
    132 
    133 // Copy I422.
    134 LIBYUV_API
    135 int I422Copy(const uint8* src_y,
    136              int src_stride_y,
    137              const uint8* src_u,
    138              int src_stride_u,
    139              const uint8* src_v,
    140              int src_stride_v,
    141              uint8* dst_y,
    142              int dst_stride_y,
    143              uint8* dst_u,
    144              int dst_stride_u,
    145              uint8* dst_v,
    146              int dst_stride_v,
    147              int width,
    148              int height) {
    149   int halfwidth = (width + 1) >> 1;
    150   if (!src_u || !src_v || !dst_u || !dst_v || width <= 0 || height == 0) {
    151     return -1;
    152   }
    153   // Negative height means invert the image.
    154   if (height < 0) {
    155     height = -height;
    156     src_y = src_y + (height - 1) * src_stride_y;
    157     src_u = src_u + (height - 1) * src_stride_u;
    158     src_v = src_v + (height - 1) * src_stride_v;
    159     src_stride_y = -src_stride_y;
    160     src_stride_u = -src_stride_u;
    161     src_stride_v = -src_stride_v;
    162   }
    163 
    164   if (dst_y) {
    165     CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
    166   }
    167   CopyPlane(src_u, src_stride_u, dst_u, dst_stride_u, halfwidth, height);
    168   CopyPlane(src_v, src_stride_v, dst_v, dst_stride_v, halfwidth, height);
    169   return 0;
    170 }
    171 
    172 // Copy I444.
    173 LIBYUV_API
    174 int I444Copy(const uint8* src_y,
    175              int src_stride_y,
    176              const uint8* src_u,
    177              int src_stride_u,
    178              const uint8* src_v,
    179              int src_stride_v,
    180              uint8* dst_y,
    181              int dst_stride_y,
    182              uint8* dst_u,
    183              int dst_stride_u,
    184              uint8* dst_v,
    185              int dst_stride_v,
    186              int width,
    187              int height) {
    188   if (!src_u || !src_v || !dst_u || !dst_v || width <= 0 || height == 0) {
    189     return -1;
    190   }
    191   // Negative height means invert the image.
    192   if (height < 0) {
    193     height = -height;
    194     src_y = src_y + (height - 1) * src_stride_y;
    195     src_u = src_u + (height - 1) * src_stride_u;
    196     src_v = src_v + (height - 1) * src_stride_v;
    197     src_stride_y = -src_stride_y;
    198     src_stride_u = -src_stride_u;
    199     src_stride_v = -src_stride_v;
    200   }
    201 
    202   if (dst_y) {
    203     CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
    204   }
    205   CopyPlane(src_u, src_stride_u, dst_u, dst_stride_u, width, height);
    206   CopyPlane(src_v, src_stride_v, dst_v, dst_stride_v, width, height);
    207   return 0;
    208 }
    209 
    210 // Copy I400.
    211 LIBYUV_API
    212 int I400ToI400(const uint8* src_y,
    213                int src_stride_y,
    214                uint8* dst_y,
    215                int dst_stride_y,
    216                int width,
    217                int height) {
    218   if (!src_y || !dst_y || width <= 0 || height == 0) {
    219     return -1;
    220   }
    221   // Negative height means invert the image.
    222   if (height < 0) {
    223     height = -height;
    224     src_y = src_y + (height - 1) * src_stride_y;
    225     src_stride_y = -src_stride_y;
    226   }
    227   CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
    228   return 0;
    229 }
    230 
    231 // Convert I420 to I400.
    232 LIBYUV_API
    233 int I420ToI400(const uint8* src_y,
    234                int src_stride_y,
    235                const uint8* src_u,
    236                int src_stride_u,
    237                const uint8* src_v,
    238                int src_stride_v,
    239                uint8* dst_y,
    240                int dst_stride_y,
    241                int width,
    242                int height) {
    243   (void)src_u;
    244   (void)src_stride_u;
    245   (void)src_v;
    246   (void)src_stride_v;
    247   if (!src_y || !dst_y || width <= 0 || height == 0) {
    248     return -1;
    249   }
    250   // Negative height means invert the image.
    251   if (height < 0) {
    252     height = -height;
    253     src_y = src_y + (height - 1) * src_stride_y;
    254     src_stride_y = -src_stride_y;
    255   }
    256 
    257   CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
    258   return 0;
    259 }
    260 
    261 // Support function for NV12 etc UV channels.
    262 // Width and height are plane sizes (typically half pixel width).
    263 LIBYUV_API
    264 void SplitUVPlane(const uint8* src_uv,
    265                   int src_stride_uv,
    266                   uint8* dst_u,
    267                   int dst_stride_u,
    268                   uint8* dst_v,
    269                   int dst_stride_v,
    270                   int width,
    271                   int height) {
    272   int y;
    273   void (*SplitUVRow)(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
    274                      int width) = SplitUVRow_C;
    275   // Negative height means invert the image.
    276   if (height < 0) {
    277     height = -height;
    278     dst_u = dst_u + (height - 1) * dst_stride_u;
    279     dst_v = dst_v + (height - 1) * dst_stride_v;
    280     dst_stride_u = -dst_stride_u;
    281     dst_stride_v = -dst_stride_v;
    282   }
    283   // Coalesce rows.
    284   if (src_stride_uv == width * 2 && dst_stride_u == width &&
    285       dst_stride_v == width) {
    286     width *= height;
    287     height = 1;
    288     src_stride_uv = dst_stride_u = dst_stride_v = 0;
    289   }
    290 #if defined(HAS_SPLITUVROW_SSE2)
    291   if (TestCpuFlag(kCpuHasSSE2)) {
    292     SplitUVRow = SplitUVRow_Any_SSE2;
    293     if (IS_ALIGNED(width, 16)) {
    294       SplitUVRow = SplitUVRow_SSE2;
    295     }
    296   }
    297 #endif
    298 #if defined(HAS_SPLITUVROW_AVX2)
    299   if (TestCpuFlag(kCpuHasAVX2)) {
    300     SplitUVRow = SplitUVRow_Any_AVX2;
    301     if (IS_ALIGNED(width, 32)) {
    302       SplitUVRow = SplitUVRow_AVX2;
    303     }
    304   }
    305 #endif
    306 #if defined(HAS_SPLITUVROW_NEON)
    307   if (TestCpuFlag(kCpuHasNEON)) {
    308     SplitUVRow = SplitUVRow_Any_NEON;
    309     if (IS_ALIGNED(width, 16)) {
    310       SplitUVRow = SplitUVRow_NEON;
    311     }
    312   }
    313 #endif
    314 #if defined(HAS_SPLITUVROW_DSPR2)
    315   if (TestCpuFlag(kCpuHasDSPR2) && IS_ALIGNED(dst_u, 4) &&
    316       IS_ALIGNED(dst_stride_u, 4) && IS_ALIGNED(dst_v, 4) &&
    317       IS_ALIGNED(dst_stride_v, 4)) {
    318     SplitUVRow = SplitUVRow_Any_DSPR2;
    319     if (IS_ALIGNED(width, 16)) {
    320       SplitUVRow = SplitUVRow_DSPR2;
    321     }
    322   }
    323 #endif
    324 
    325   for (y = 0; y < height; ++y) {
    326     // Copy a row of UV.
    327     SplitUVRow(src_uv, dst_u, dst_v, width);
    328     dst_u += dst_stride_u;
    329     dst_v += dst_stride_v;
    330     src_uv += src_stride_uv;
    331   }
    332 }
    333 
    334 LIBYUV_API
    335 void MergeUVPlane(const uint8* src_u,
    336                   int src_stride_u,
    337                   const uint8* src_v,
    338                   int src_stride_v,
    339                   uint8* dst_uv,
    340                   int dst_stride_uv,
    341                   int width,
    342                   int height) {
    343   int y;
    344   void (*MergeUVRow)(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
    345                      int width) = MergeUVRow_C;
    346   // Coalesce rows.
    347   // Negative height means invert the image.
    348   if (height < 0) {
    349     height = -height;
    350     dst_uv = dst_uv + (height - 1) * dst_stride_uv;
    351     dst_stride_uv = -dst_stride_uv;
    352   }
    353   // Coalesce rows.
    354   if (src_stride_u == width && src_stride_v == width &&
    355       dst_stride_uv == width * 2) {
    356     width *= height;
    357     height = 1;
    358     src_stride_u = src_stride_v = dst_stride_uv = 0;
    359   }
    360 #if defined(HAS_MERGEUVROW_SSE2)
    361   if (TestCpuFlag(kCpuHasSSE2)) {
    362     MergeUVRow = MergeUVRow_Any_SSE2;
    363     if (IS_ALIGNED(width, 16)) {
    364       MergeUVRow = MergeUVRow_SSE2;
    365     }
    366   }
    367 #endif
    368 #if defined(HAS_MERGEUVROW_AVX2)
    369   if (TestCpuFlag(kCpuHasAVX2)) {
    370     MergeUVRow = MergeUVRow_Any_AVX2;
    371     if (IS_ALIGNED(width, 32)) {
    372       MergeUVRow = MergeUVRow_AVX2;
    373     }
    374   }
    375 #endif
    376 #if defined(HAS_MERGEUVROW_NEON)
    377   if (TestCpuFlag(kCpuHasNEON)) {
    378     MergeUVRow = MergeUVRow_Any_NEON;
    379     if (IS_ALIGNED(width, 16)) {
    380       MergeUVRow = MergeUVRow_NEON;
    381     }
    382   }
    383 #endif
    384 #if defined(HAS_MERGEUVROW_MSA)
    385   if (TestCpuFlag(kCpuHasMSA)) {
    386     MergeUVRow = MergeUVRow_Any_MSA;
    387     if (IS_ALIGNED(width, 16)) {
    388       MergeUVRow = MergeUVRow_MSA;
    389     }
    390   }
    391 #endif
    392 
    393   for (y = 0; y < height; ++y) {
    394     // Merge a row of U and V into a row of UV.
    395     MergeUVRow(src_u, src_v, dst_uv, width);
    396     src_u += src_stride_u;
    397     src_v += src_stride_v;
    398     dst_uv += dst_stride_uv;
    399   }
    400 }
    401 
    402 // Mirror a plane of data.
    403 void MirrorPlane(const uint8* src_y,
    404                  int src_stride_y,
    405                  uint8* dst_y,
    406                  int dst_stride_y,
    407                  int width,
    408                  int height) {
    409   int y;
    410   void (*MirrorRow)(const uint8* src, uint8* dst, int width) = MirrorRow_C;
    411   // Negative height means invert the image.
    412   if (height < 0) {
    413     height = -height;
    414     src_y = src_y + (height - 1) * src_stride_y;
    415     src_stride_y = -src_stride_y;
    416   }
    417 #if defined(HAS_MIRRORROW_NEON)
    418   if (TestCpuFlag(kCpuHasNEON)) {
    419     MirrorRow = MirrorRow_Any_NEON;
    420     if (IS_ALIGNED(width, 16)) {
    421       MirrorRow = MirrorRow_NEON;
    422     }
    423   }
    424 #endif
    425 #if defined(HAS_MIRRORROW_SSSE3)
    426   if (TestCpuFlag(kCpuHasSSSE3)) {
    427     MirrorRow = MirrorRow_Any_SSSE3;
    428     if (IS_ALIGNED(width, 16)) {
    429       MirrorRow = MirrorRow_SSSE3;
    430     }
    431   }
    432 #endif
    433 #if defined(HAS_MIRRORROW_AVX2)
    434   if (TestCpuFlag(kCpuHasAVX2)) {
    435     MirrorRow = MirrorRow_Any_AVX2;
    436     if (IS_ALIGNED(width, 32)) {
    437       MirrorRow = MirrorRow_AVX2;
    438     }
    439   }
    440 #endif
    441 // TODO(fbarchard): Mirror on mips handle unaligned memory.
    442 #if defined(HAS_MIRRORROW_DSPR2)
    443   if (TestCpuFlag(kCpuHasDSPR2) && IS_ALIGNED(src_y, 4) &&
    444       IS_ALIGNED(src_stride_y, 4) && IS_ALIGNED(dst_y, 4) &&
    445       IS_ALIGNED(dst_stride_y, 4)) {
    446     MirrorRow = MirrorRow_DSPR2;
    447   }
    448 #endif
    449 #if defined(HAS_MIRRORROW_MSA)
    450   if (TestCpuFlag(kCpuHasMSA)) {
    451     MirrorRow = MirrorRow_Any_MSA;
    452     if (IS_ALIGNED(width, 64)) {
    453       MirrorRow = MirrorRow_MSA;
    454     }
    455   }
    456 #endif
    457 
    458   // Mirror plane
    459   for (y = 0; y < height; ++y) {
    460     MirrorRow(src_y, dst_y, width);
    461     src_y += src_stride_y;
    462     dst_y += dst_stride_y;
    463   }
    464 }
    465 
    466 // Convert YUY2 to I422.
    467 LIBYUV_API
    468 int YUY2ToI422(const uint8* src_yuy2,
    469                int src_stride_yuy2,
    470                uint8* dst_y,
    471                int dst_stride_y,
    472                uint8* dst_u,
    473                int dst_stride_u,
    474                uint8* dst_v,
    475                int dst_stride_v,
    476                int width,
    477                int height) {
    478   int y;
    479   void (*YUY2ToUV422Row)(const uint8* src_yuy2, uint8* dst_u, uint8* dst_v,
    480                          int width) = YUY2ToUV422Row_C;
    481   void (*YUY2ToYRow)(const uint8* src_yuy2, uint8* dst_y, int width) =
    482       YUY2ToYRow_C;
    483   if (!src_yuy2 || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) {
    484     return -1;
    485   }
    486   // Negative height means invert the image.
    487   if (height < 0) {
    488     height = -height;
    489     src_yuy2 = src_yuy2 + (height - 1) * src_stride_yuy2;
    490     src_stride_yuy2 = -src_stride_yuy2;
    491   }
    492   // Coalesce rows.
    493   if (src_stride_yuy2 == width * 2 && dst_stride_y == width &&
    494       dst_stride_u * 2 == width && dst_stride_v * 2 == width &&
    495       width * height <= 32768) {
    496     width *= height;
    497     height = 1;
    498     src_stride_yuy2 = dst_stride_y = dst_stride_u = dst_stride_v = 0;
    499   }
    500 #if defined(HAS_YUY2TOYROW_SSE2)
    501   if (TestCpuFlag(kCpuHasSSE2)) {
    502     YUY2ToUV422Row = YUY2ToUV422Row_Any_SSE2;
    503     YUY2ToYRow = YUY2ToYRow_Any_SSE2;
    504     if (IS_ALIGNED(width, 16)) {
    505       YUY2ToUV422Row = YUY2ToUV422Row_SSE2;
    506       YUY2ToYRow = YUY2ToYRow_SSE2;
    507     }
    508   }
    509 #endif
    510 #if defined(HAS_YUY2TOYROW_AVX2)
    511   if (TestCpuFlag(kCpuHasAVX2)) {
    512     YUY2ToUV422Row = YUY2ToUV422Row_Any_AVX2;
    513     YUY2ToYRow = YUY2ToYRow_Any_AVX2;
    514     if (IS_ALIGNED(width, 32)) {
    515       YUY2ToUV422Row = YUY2ToUV422Row_AVX2;
    516       YUY2ToYRow = YUY2ToYRow_AVX2;
    517     }
    518   }
    519 #endif
    520 #if defined(HAS_YUY2TOYROW_NEON)
    521   if (TestCpuFlag(kCpuHasNEON)) {
    522     YUY2ToYRow = YUY2ToYRow_Any_NEON;
    523     YUY2ToUV422Row = YUY2ToUV422Row_Any_NEON;
    524     if (IS_ALIGNED(width, 16)) {
    525       YUY2ToYRow = YUY2ToYRow_NEON;
    526       YUY2ToUV422Row = YUY2ToUV422Row_NEON;
    527     }
    528   }
    529 #endif
    530 #if defined(HAS_YUY2TOYROW_MSA)
    531   if (TestCpuFlag(kCpuHasMSA)) {
    532     YUY2ToYRow = YUY2ToYRow_Any_MSA;
    533     YUY2ToUV422Row = YUY2ToUV422Row_Any_MSA;
    534     if (IS_ALIGNED(width, 32)) {
    535       YUY2ToYRow = YUY2ToYRow_MSA;
    536       YUY2ToUV422Row = YUY2ToUV422Row_MSA;
    537     }
    538   }
    539 #endif
    540 
    541   for (y = 0; y < height; ++y) {
    542     YUY2ToUV422Row(src_yuy2, dst_u, dst_v, width);
    543     YUY2ToYRow(src_yuy2, dst_y, width);
    544     src_yuy2 += src_stride_yuy2;
    545     dst_y += dst_stride_y;
    546     dst_u += dst_stride_u;
    547     dst_v += dst_stride_v;
    548   }
    549   return 0;
    550 }
    551 
    552 // Convert UYVY to I422.
    553 LIBYUV_API
    554 int UYVYToI422(const uint8* src_uyvy,
    555                int src_stride_uyvy,
    556                uint8* dst_y,
    557                int dst_stride_y,
    558                uint8* dst_u,
    559                int dst_stride_u,
    560                uint8* dst_v,
    561                int dst_stride_v,
    562                int width,
    563                int height) {
    564   int y;
    565   void (*UYVYToUV422Row)(const uint8* src_uyvy, uint8* dst_u, uint8* dst_v,
    566                          int width) = UYVYToUV422Row_C;
    567   void (*UYVYToYRow)(const uint8* src_uyvy, uint8* dst_y, int width) =
    568       UYVYToYRow_C;
    569   if (!src_uyvy || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) {
    570     return -1;
    571   }
    572   // Negative height means invert the image.
    573   if (height < 0) {
    574     height = -height;
    575     src_uyvy = src_uyvy + (height - 1) * src_stride_uyvy;
    576     src_stride_uyvy = -src_stride_uyvy;
    577   }
    578   // Coalesce rows.
    579   if (src_stride_uyvy == width * 2 && dst_stride_y == width &&
    580       dst_stride_u * 2 == width && dst_stride_v * 2 == width &&
    581       width * height <= 32768) {
    582     width *= height;
    583     height = 1;
    584     src_stride_uyvy = dst_stride_y = dst_stride_u = dst_stride_v = 0;
    585   }
    586 #if defined(HAS_UYVYTOYROW_SSE2)
    587   if (TestCpuFlag(kCpuHasSSE2)) {
    588     UYVYToUV422Row = UYVYToUV422Row_Any_SSE2;
    589     UYVYToYRow = UYVYToYRow_Any_SSE2;
    590     if (IS_ALIGNED(width, 16)) {
    591       UYVYToUV422Row = UYVYToUV422Row_SSE2;
    592       UYVYToYRow = UYVYToYRow_SSE2;
    593     }
    594   }
    595 #endif
    596 #if defined(HAS_UYVYTOYROW_AVX2)
    597   if (TestCpuFlag(kCpuHasAVX2)) {
    598     UYVYToUV422Row = UYVYToUV422Row_Any_AVX2;
    599     UYVYToYRow = UYVYToYRow_Any_AVX2;
    600     if (IS_ALIGNED(width, 32)) {
    601       UYVYToUV422Row = UYVYToUV422Row_AVX2;
    602       UYVYToYRow = UYVYToYRow_AVX2;
    603     }
    604   }
    605 #endif
    606 #if defined(HAS_UYVYTOYROW_NEON)
    607   if (TestCpuFlag(kCpuHasNEON)) {
    608     UYVYToYRow = UYVYToYRow_Any_NEON;
    609     UYVYToUV422Row = UYVYToUV422Row_Any_NEON;
    610     if (IS_ALIGNED(width, 16)) {
    611       UYVYToYRow = UYVYToYRow_NEON;
    612       UYVYToUV422Row = UYVYToUV422Row_NEON;
    613     }
    614   }
    615 #endif
    616 #if defined(HAS_UYVYTOYROW_MSA)
    617   if (TestCpuFlag(kCpuHasMSA)) {
    618     UYVYToYRow = UYVYToYRow_Any_MSA;
    619     UYVYToUV422Row = UYVYToUV422Row_Any_MSA;
    620     if (IS_ALIGNED(width, 32)) {
    621       UYVYToYRow = UYVYToYRow_MSA;
    622       UYVYToUV422Row = UYVYToUV422Row_MSA;
    623     }
    624   }
    625 #endif
    626 
    627   for (y = 0; y < height; ++y) {
    628     UYVYToUV422Row(src_uyvy, dst_u, dst_v, width);
    629     UYVYToYRow(src_uyvy, dst_y, width);
    630     src_uyvy += src_stride_uyvy;
    631     dst_y += dst_stride_y;
    632     dst_u += dst_stride_u;
    633     dst_v += dst_stride_v;
    634   }
    635   return 0;
    636 }
    637 
    638 // Convert YUY2 to Y.
    639 LIBYUV_API
    640 int YUY2ToY(const uint8* src_yuy2,
    641             int src_stride_yuy2,
    642             uint8* dst_y,
    643             int dst_stride_y,
    644             int width,
    645             int height) {
    646   int y;
    647   void (*YUY2ToYRow)(const uint8* src_yuy2, uint8* dst_y, int width) =
    648       YUY2ToYRow_C;
    649   if (!src_yuy2 || !dst_y || width <= 0 || height == 0) {
    650     return -1;
    651   }
    652   // Negative height means invert the image.
    653   if (height < 0) {
    654     height = -height;
    655     src_yuy2 = src_yuy2 + (height - 1) * src_stride_yuy2;
    656     src_stride_yuy2 = -src_stride_yuy2;
    657   }
    658   // Coalesce rows.
    659   if (src_stride_yuy2 == width * 2 && dst_stride_y == width) {
    660     width *= height;
    661     height = 1;
    662     src_stride_yuy2 = dst_stride_y = 0;
    663   }
    664 #if defined(HAS_YUY2TOYROW_SSE2)
    665   if (TestCpuFlag(kCpuHasSSE2)) {
    666     YUY2ToYRow = YUY2ToYRow_Any_SSE2;
    667     if (IS_ALIGNED(width, 16)) {
    668       YUY2ToYRow = YUY2ToYRow_SSE2;
    669     }
    670   }
    671 #endif
    672 #if defined(HAS_YUY2TOYROW_AVX2)
    673   if (TestCpuFlag(kCpuHasAVX2)) {
    674     YUY2ToYRow = YUY2ToYRow_Any_AVX2;
    675     if (IS_ALIGNED(width, 32)) {
    676       YUY2ToYRow = YUY2ToYRow_AVX2;
    677     }
    678   }
    679 #endif
    680 #if defined(HAS_YUY2TOYROW_NEON)
    681   if (TestCpuFlag(kCpuHasNEON)) {
    682     YUY2ToYRow = YUY2ToYRow_Any_NEON;
    683     if (IS_ALIGNED(width, 16)) {
    684       YUY2ToYRow = YUY2ToYRow_NEON;
    685     }
    686   }
    687 #endif
    688 #if defined(HAS_YUY2TOYROW_MSA)
    689   if (TestCpuFlag(kCpuHasMSA)) {
    690     YUY2ToYRow = YUY2ToYRow_Any_MSA;
    691     if (IS_ALIGNED(width, 32)) {
    692       YUY2ToYRow = YUY2ToYRow_MSA;
    693     }
    694   }
    695 #endif
    696 
    697   for (y = 0; y < height; ++y) {
    698     YUY2ToYRow(src_yuy2, dst_y, width);
    699     src_yuy2 += src_stride_yuy2;
    700     dst_y += dst_stride_y;
    701   }
    702   return 0;
    703 }
    704 
    705 // Mirror I400 with optional flipping
    706 LIBYUV_API
    707 int I400Mirror(const uint8* src_y,
    708                int src_stride_y,
    709                uint8* dst_y,
    710                int dst_stride_y,
    711                int width,
    712                int height) {
    713   if (!src_y || !dst_y || width <= 0 || height == 0) {
    714     return -1;
    715   }
    716   // Negative height means invert the image.
    717   if (height < 0) {
    718     height = -height;
    719     src_y = src_y + (height - 1) * src_stride_y;
    720     src_stride_y = -src_stride_y;
    721   }
    722 
    723   MirrorPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
    724   return 0;
    725 }
    726 
    727 // Mirror I420 with optional flipping
    728 LIBYUV_API
    729 int I420Mirror(const uint8* src_y,
    730                int src_stride_y,
    731                const uint8* src_u,
    732                int src_stride_u,
    733                const uint8* src_v,
    734                int src_stride_v,
    735                uint8* dst_y,
    736                int dst_stride_y,
    737                uint8* dst_u,
    738                int dst_stride_u,
    739                uint8* dst_v,
    740                int dst_stride_v,
    741                int width,
    742                int height) {
    743   int halfwidth = (width + 1) >> 1;
    744   int halfheight = (height + 1) >> 1;
    745   if (!src_y || !src_u || !src_v || !dst_y || !dst_u || !dst_v || width <= 0 ||
    746       height == 0) {
    747     return -1;
    748   }
    749   // Negative height means invert the image.
    750   if (height < 0) {
    751     height = -height;
    752     halfheight = (height + 1) >> 1;
    753     src_y = src_y + (height - 1) * src_stride_y;
    754     src_u = src_u + (halfheight - 1) * src_stride_u;
    755     src_v = src_v + (halfheight - 1) * src_stride_v;
    756     src_stride_y = -src_stride_y;
    757     src_stride_u = -src_stride_u;
    758     src_stride_v = -src_stride_v;
    759   }
    760 
    761   if (dst_y) {
    762     MirrorPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
    763   }
    764   MirrorPlane(src_u, src_stride_u, dst_u, dst_stride_u, halfwidth, halfheight);
    765   MirrorPlane(src_v, src_stride_v, dst_v, dst_stride_v, halfwidth, halfheight);
    766   return 0;
    767 }
    768 
    769 // ARGB mirror.
    770 LIBYUV_API
    771 int ARGBMirror(const uint8* src_argb,
    772                int src_stride_argb,
    773                uint8* dst_argb,
    774                int dst_stride_argb,
    775                int width,
    776                int height) {
    777   int y;
    778   void (*ARGBMirrorRow)(const uint8* src, uint8* dst, int width) =
    779       ARGBMirrorRow_C;
    780   if (!src_argb || !dst_argb || width <= 0 || height == 0) {
    781     return -1;
    782   }
    783   // Negative height means invert the image.
    784   if (height < 0) {
    785     height = -height;
    786     src_argb = src_argb + (height - 1) * src_stride_argb;
    787     src_stride_argb = -src_stride_argb;
    788   }
    789 #if defined(HAS_ARGBMIRRORROW_NEON)
    790   if (TestCpuFlag(kCpuHasNEON)) {
    791     ARGBMirrorRow = ARGBMirrorRow_Any_NEON;
    792     if (IS_ALIGNED(width, 4)) {
    793       ARGBMirrorRow = ARGBMirrorRow_NEON;
    794     }
    795   }
    796 #endif
    797 #if defined(HAS_ARGBMIRRORROW_SSE2)
    798   if (TestCpuFlag(kCpuHasSSE2)) {
    799     ARGBMirrorRow = ARGBMirrorRow_Any_SSE2;
    800     if (IS_ALIGNED(width, 4)) {
    801       ARGBMirrorRow = ARGBMirrorRow_SSE2;
    802     }
    803   }
    804 #endif
    805 #if defined(HAS_ARGBMIRRORROW_AVX2)
    806   if (TestCpuFlag(kCpuHasAVX2)) {
    807     ARGBMirrorRow = ARGBMirrorRow_Any_AVX2;
    808     if (IS_ALIGNED(width, 8)) {
    809       ARGBMirrorRow = ARGBMirrorRow_AVX2;
    810     }
    811   }
    812 #endif
    813 #if defined(HAS_ARGBMIRRORROW_MSA)
    814   if (TestCpuFlag(kCpuHasMSA)) {
    815     ARGBMirrorRow = ARGBMirrorRow_Any_MSA;
    816     if (IS_ALIGNED(width, 16)) {
    817       ARGBMirrorRow = ARGBMirrorRow_MSA;
    818     }
    819   }
    820 #endif
    821 
    822   // Mirror plane
    823   for (y = 0; y < height; ++y) {
    824     ARGBMirrorRow(src_argb, dst_argb, width);
    825     src_argb += src_stride_argb;
    826     dst_argb += dst_stride_argb;
    827   }
    828   return 0;
    829 }
    830 
    831 // Get a blender that optimized for the CPU and pixel count.
    832 // As there are 6 blenders to choose from, the caller should try to use
    833 // the same blend function for all pixels if possible.
    834 LIBYUV_API
    835 ARGBBlendRow GetARGBBlend() {
    836   void (*ARGBBlendRow)(const uint8* src_argb, const uint8* src_argb1,
    837                        uint8* dst_argb, int width) = ARGBBlendRow_C;
    838 #if defined(HAS_ARGBBLENDROW_SSSE3)
    839   if (TestCpuFlag(kCpuHasSSSE3)) {
    840     ARGBBlendRow = ARGBBlendRow_SSSE3;
    841     return ARGBBlendRow;
    842   }
    843 #endif
    844 #if defined(HAS_ARGBBLENDROW_NEON)
    845   if (TestCpuFlag(kCpuHasNEON)) {
    846     ARGBBlendRow = ARGBBlendRow_NEON;
    847   }
    848 #endif
    849   return ARGBBlendRow;
    850 }
    851 
    852 // Alpha Blend 2 ARGB images and store to destination.
    853 LIBYUV_API
    854 int ARGBBlend(const uint8* src_argb0,
    855               int src_stride_argb0,
    856               const uint8* src_argb1,
    857               int src_stride_argb1,
    858               uint8* dst_argb,
    859               int dst_stride_argb,
    860               int width,
    861               int height) {
    862   int y;
    863   void (*ARGBBlendRow)(const uint8* src_argb, const uint8* src_argb1,
    864                        uint8* dst_argb, int width) = GetARGBBlend();
    865   if (!src_argb0 || !src_argb1 || !dst_argb || width <= 0 || height == 0) {
    866     return -1;
    867   }
    868   // Negative height means invert the image.
    869   if (height < 0) {
    870     height = -height;
    871     dst_argb = dst_argb + (height - 1) * dst_stride_argb;
    872     dst_stride_argb = -dst_stride_argb;
    873   }
    874   // Coalesce rows.
    875   if (src_stride_argb0 == width * 4 && src_stride_argb1 == width * 4 &&
    876       dst_stride_argb == width * 4) {
    877     width *= height;
    878     height = 1;
    879     src_stride_argb0 = src_stride_argb1 = dst_stride_argb = 0;
    880   }
    881 
    882   for (y = 0; y < height; ++y) {
    883     ARGBBlendRow(src_argb0, src_argb1, dst_argb, width);
    884     src_argb0 += src_stride_argb0;
    885     src_argb1 += src_stride_argb1;
    886     dst_argb += dst_stride_argb;
    887   }
    888   return 0;
    889 }
    890 
    891 // Alpha Blend plane and store to destination.
    892 LIBYUV_API
    893 int BlendPlane(const uint8* src_y0,
    894                int src_stride_y0,
    895                const uint8* src_y1,
    896                int src_stride_y1,
    897                const uint8* alpha,
    898                int alpha_stride,
    899                uint8* dst_y,
    900                int dst_stride_y,
    901                int width,
    902                int height) {
    903   int y;
    904   void (*BlendPlaneRow)(const uint8* src0, const uint8* src1,
    905                         const uint8* alpha, uint8* dst, int width) =
    906       BlendPlaneRow_C;
    907   if (!src_y0 || !src_y1 || !alpha || !dst_y || width <= 0 || height == 0) {
    908     return -1;
    909   }
    910   // Negative height means invert the image.
    911   if (height < 0) {
    912     height = -height;
    913     dst_y = dst_y + (height - 1) * dst_stride_y;
    914     dst_stride_y = -dst_stride_y;
    915   }
    916 
    917   // Coalesce rows for Y plane.
    918   if (src_stride_y0 == width && src_stride_y1 == width &&
    919       alpha_stride == width && dst_stride_y == width) {
    920     width *= height;
    921     height = 1;
    922     src_stride_y0 = src_stride_y1 = alpha_stride = dst_stride_y = 0;
    923   }
    924 
    925 #if defined(HAS_BLENDPLANEROW_SSSE3)
    926   if (TestCpuFlag(kCpuHasSSSE3)) {
    927     BlendPlaneRow = BlendPlaneRow_Any_SSSE3;
    928     if (IS_ALIGNED(width, 8)) {
    929       BlendPlaneRow = BlendPlaneRow_SSSE3;
    930     }
    931   }
    932 #endif
    933 #if defined(HAS_BLENDPLANEROW_AVX2)
    934   if (TestCpuFlag(kCpuHasAVX2)) {
    935     BlendPlaneRow = BlendPlaneRow_Any_AVX2;
    936     if (IS_ALIGNED(width, 32)) {
    937       BlendPlaneRow = BlendPlaneRow_AVX2;
    938     }
    939   }
    940 #endif
    941 
    942   for (y = 0; y < height; ++y) {
    943     BlendPlaneRow(src_y0, src_y1, alpha, dst_y, width);
    944     src_y0 += src_stride_y0;
    945     src_y1 += src_stride_y1;
    946     alpha += alpha_stride;
    947     dst_y += dst_stride_y;
    948   }
    949   return 0;
    950 }
    951 
    952 #define MAXTWIDTH 2048
    953 // Alpha Blend YUV images and store to destination.
    954 LIBYUV_API
    955 int I420Blend(const uint8* src_y0,
    956               int src_stride_y0,
    957               const uint8* src_u0,
    958               int src_stride_u0,
    959               const uint8* src_v0,
    960               int src_stride_v0,
    961               const uint8* src_y1,
    962               int src_stride_y1,
    963               const uint8* src_u1,
    964               int src_stride_u1,
    965               const uint8* src_v1,
    966               int src_stride_v1,
    967               const uint8* alpha,
    968               int alpha_stride,
    969               uint8* dst_y,
    970               int dst_stride_y,
    971               uint8* dst_u,
    972               int dst_stride_u,
    973               uint8* dst_v,
    974               int dst_stride_v,
    975               int width,
    976               int height) {
    977   int y;
    978   // Half width/height for UV.
    979   int halfwidth = (width + 1) >> 1;
    980   void (*BlendPlaneRow)(const uint8* src0, const uint8* src1,
    981                         const uint8* alpha, uint8* dst, int width) =
    982       BlendPlaneRow_C;
    983   void (*ScaleRowDown2)(const uint8* src_ptr, ptrdiff_t src_stride,
    984                         uint8* dst_ptr, int dst_width) = ScaleRowDown2Box_C;
    985   if (!src_y0 || !src_u0 || !src_v0 || !src_y1 || !src_u1 || !src_v1 ||
    986       !alpha || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) {
    987     return -1;
    988   }
    989 
    990   // Negative height means invert the image.
    991   if (height < 0) {
    992     height = -height;
    993     dst_y = dst_y + (height - 1) * dst_stride_y;
    994     dst_stride_y = -dst_stride_y;
    995   }
    996 
    997   // Blend Y plane.
    998   BlendPlane(src_y0, src_stride_y0, src_y1, src_stride_y1, alpha, alpha_stride,
    999              dst_y, dst_stride_y, width, height);
   1000 
   1001 #if defined(HAS_BLENDPLANEROW_SSSE3)
   1002   if (TestCpuFlag(kCpuHasSSSE3)) {
   1003     BlendPlaneRow = BlendPlaneRow_Any_SSSE3;
   1004     if (IS_ALIGNED(halfwidth, 8)) {
   1005       BlendPlaneRow = BlendPlaneRow_SSSE3;
   1006     }
   1007   }
   1008 #endif
   1009 #if defined(HAS_BLENDPLANEROW_AVX2)
   1010   if (TestCpuFlag(kCpuHasAVX2)) {
   1011     BlendPlaneRow = BlendPlaneRow_Any_AVX2;
   1012     if (IS_ALIGNED(halfwidth, 32)) {
   1013       BlendPlaneRow = BlendPlaneRow_AVX2;
   1014     }
   1015   }
   1016 #endif
   1017   if (!IS_ALIGNED(width, 2)) {
   1018     ScaleRowDown2 = ScaleRowDown2Box_Odd_C;
   1019   }
   1020 #if defined(HAS_SCALEROWDOWN2_NEON)
   1021   if (TestCpuFlag(kCpuHasNEON)) {
   1022     ScaleRowDown2 = ScaleRowDown2Box_Odd_NEON;
   1023     if (IS_ALIGNED(width, 2)) {
   1024       ScaleRowDown2 = ScaleRowDown2Box_Any_NEON;
   1025       if (IS_ALIGNED(halfwidth, 16)) {
   1026         ScaleRowDown2 = ScaleRowDown2Box_NEON;
   1027       }
   1028     }
   1029   }
   1030 #endif
   1031 #if defined(HAS_SCALEROWDOWN2_SSSE3)
   1032   if (TestCpuFlag(kCpuHasSSSE3)) {
   1033     ScaleRowDown2 = ScaleRowDown2Box_Odd_SSSE3;
   1034     if (IS_ALIGNED(width, 2)) {
   1035       ScaleRowDown2 = ScaleRowDown2Box_Any_SSSE3;
   1036       if (IS_ALIGNED(halfwidth, 16)) {
   1037         ScaleRowDown2 = ScaleRowDown2Box_SSSE3;
   1038       }
   1039     }
   1040   }
   1041 #endif
   1042 #if defined(HAS_SCALEROWDOWN2_AVX2)
   1043   if (TestCpuFlag(kCpuHasAVX2)) {
   1044     ScaleRowDown2 = ScaleRowDown2Box_Odd_AVX2;
   1045     if (IS_ALIGNED(width, 2)) {
   1046       ScaleRowDown2 = ScaleRowDown2Box_Any_AVX2;
   1047       if (IS_ALIGNED(halfwidth, 32)) {
   1048         ScaleRowDown2 = ScaleRowDown2Box_AVX2;
   1049       }
   1050     }
   1051   }
   1052 #endif
   1053 
   1054   // Row buffer for intermediate alpha pixels.
   1055   align_buffer_64(halfalpha, halfwidth);
   1056   for (y = 0; y < height; y += 2) {
   1057     // last row of odd height image use 1 row of alpha instead of 2.
   1058     if (y == (height - 1)) {
   1059       alpha_stride = 0;
   1060     }
   1061     // Subsample 2 rows of UV to half width and half height.
   1062     ScaleRowDown2(alpha, alpha_stride, halfalpha, halfwidth);
   1063     alpha += alpha_stride * 2;
   1064     BlendPlaneRow(src_u0, src_u1, halfalpha, dst_u, halfwidth);
   1065     BlendPlaneRow(src_v0, src_v1, halfalpha, dst_v, halfwidth);
   1066     src_u0 += src_stride_u0;
   1067     src_u1 += src_stride_u1;
   1068     dst_u += dst_stride_u;
   1069     src_v0 += src_stride_v0;
   1070     src_v1 += src_stride_v1;
   1071     dst_v += dst_stride_v;
   1072   }
   1073   free_aligned_buffer_64(halfalpha);
   1074   return 0;
   1075 }
   1076 
   1077 // Multiply 2 ARGB images and store to destination.
   1078 LIBYUV_API
   1079 int ARGBMultiply(const uint8* src_argb0,
   1080                  int src_stride_argb0,
   1081                  const uint8* src_argb1,
   1082                  int src_stride_argb1,
   1083                  uint8* dst_argb,
   1084                  int dst_stride_argb,
   1085                  int width,
   1086                  int height) {
   1087   int y;
   1088   void (*ARGBMultiplyRow)(const uint8* src0, const uint8* src1, uint8* dst,
   1089                           int width) = ARGBMultiplyRow_C;
   1090   if (!src_argb0 || !src_argb1 || !dst_argb || width <= 0 || height == 0) {
   1091     return -1;
   1092   }
   1093   // Negative height means invert the image.
   1094   if (height < 0) {
   1095     height = -height;
   1096     dst_argb = dst_argb + (height - 1) * dst_stride_argb;
   1097     dst_stride_argb = -dst_stride_argb;
   1098   }
   1099   // Coalesce rows.
   1100   if (src_stride_argb0 == width * 4 && src_stride_argb1 == width * 4 &&
   1101       dst_stride_argb == width * 4) {
   1102     width *= height;
   1103     height = 1;
   1104     src_stride_argb0 = src_stride_argb1 = dst_stride_argb = 0;
   1105   }
   1106 #if defined(HAS_ARGBMULTIPLYROW_SSE2)
   1107   if (TestCpuFlag(kCpuHasSSE2)) {
   1108     ARGBMultiplyRow = ARGBMultiplyRow_Any_SSE2;
   1109     if (IS_ALIGNED(width, 4)) {
   1110       ARGBMultiplyRow = ARGBMultiplyRow_SSE2;
   1111     }
   1112   }
   1113 #endif
   1114 #if defined(HAS_ARGBMULTIPLYROW_AVX2)
   1115   if (TestCpuFlag(kCpuHasAVX2)) {
   1116     ARGBMultiplyRow = ARGBMultiplyRow_Any_AVX2;
   1117     if (IS_ALIGNED(width, 8)) {
   1118       ARGBMultiplyRow = ARGBMultiplyRow_AVX2;
   1119     }
   1120   }
   1121 #endif
   1122 #if defined(HAS_ARGBMULTIPLYROW_NEON)
   1123   if (TestCpuFlag(kCpuHasNEON)) {
   1124     ARGBMultiplyRow = ARGBMultiplyRow_Any_NEON;
   1125     if (IS_ALIGNED(width, 8)) {
   1126       ARGBMultiplyRow = ARGBMultiplyRow_NEON;
   1127     }
   1128   }
   1129 #endif
   1130 #if defined(HAS_ARGBMULTIPLYROW_MSA)
   1131   if (TestCpuFlag(kCpuHasMSA)) {
   1132     ARGBMultiplyRow = ARGBMultiplyRow_Any_MSA;
   1133     if (IS_ALIGNED(width, 4)) {
   1134       ARGBMultiplyRow = ARGBMultiplyRow_MSA;
   1135     }
   1136   }
   1137 #endif
   1138 
   1139   // Multiply plane
   1140   for (y = 0; y < height; ++y) {
   1141     ARGBMultiplyRow(src_argb0, src_argb1, dst_argb, width);
   1142     src_argb0 += src_stride_argb0;
   1143     src_argb1 += src_stride_argb1;
   1144     dst_argb += dst_stride_argb;
   1145   }
   1146   return 0;
   1147 }
   1148 
   1149 // Add 2 ARGB images and store to destination.
   1150 LIBYUV_API
   1151 int ARGBAdd(const uint8* src_argb0,
   1152             int src_stride_argb0,
   1153             const uint8* src_argb1,
   1154             int src_stride_argb1,
   1155             uint8* dst_argb,
   1156             int dst_stride_argb,
   1157             int width,
   1158             int height) {
   1159   int y;
   1160   void (*ARGBAddRow)(const uint8* src0, const uint8* src1, uint8* dst,
   1161                      int width) = ARGBAddRow_C;
   1162   if (!src_argb0 || !src_argb1 || !dst_argb || width <= 0 || height == 0) {
   1163     return -1;
   1164   }
   1165   // Negative height means invert the image.
   1166   if (height < 0) {
   1167     height = -height;
   1168     dst_argb = dst_argb + (height - 1) * dst_stride_argb;
   1169     dst_stride_argb = -dst_stride_argb;
   1170   }
   1171   // Coalesce rows.
   1172   if (src_stride_argb0 == width * 4 && src_stride_argb1 == width * 4 &&
   1173       dst_stride_argb == width * 4) {
   1174     width *= height;
   1175     height = 1;
   1176     src_stride_argb0 = src_stride_argb1 = dst_stride_argb = 0;
   1177   }
   1178 #if defined(HAS_ARGBADDROW_SSE2) && (defined(_MSC_VER) && !defined(__clang__))
   1179   if (TestCpuFlag(kCpuHasSSE2)) {
   1180     ARGBAddRow = ARGBAddRow_SSE2;
   1181   }
   1182 #endif
   1183 #if defined(HAS_ARGBADDROW_SSE2) && !(defined(_MSC_VER) && !defined(__clang__))
   1184   if (TestCpuFlag(kCpuHasSSE2)) {
   1185     ARGBAddRow = ARGBAddRow_Any_SSE2;
   1186     if (IS_ALIGNED(width, 4)) {
   1187       ARGBAddRow = ARGBAddRow_SSE2;
   1188     }
   1189   }
   1190 #endif
   1191 #if defined(HAS_ARGBADDROW_AVX2)
   1192   if (TestCpuFlag(kCpuHasAVX2)) {
   1193     ARGBAddRow = ARGBAddRow_Any_AVX2;
   1194     if (IS_ALIGNED(width, 8)) {
   1195       ARGBAddRow = ARGBAddRow_AVX2;
   1196     }
   1197   }
   1198 #endif
   1199 #if defined(HAS_ARGBADDROW_NEON)
   1200   if (TestCpuFlag(kCpuHasNEON)) {
   1201     ARGBAddRow = ARGBAddRow_Any_NEON;
   1202     if (IS_ALIGNED(width, 8)) {
   1203       ARGBAddRow = ARGBAddRow_NEON;
   1204     }
   1205   }
   1206 #endif
   1207 #if defined(HAS_ARGBADDROW_MSA)
   1208   if (TestCpuFlag(kCpuHasMSA)) {
   1209     ARGBAddRow = ARGBAddRow_Any_MSA;
   1210     if (IS_ALIGNED(width, 8)) {
   1211       ARGBAddRow = ARGBAddRow_MSA;
   1212     }
   1213   }
   1214 #endif
   1215 
   1216   // Add plane
   1217   for (y = 0; y < height; ++y) {
   1218     ARGBAddRow(src_argb0, src_argb1, dst_argb, width);
   1219     src_argb0 += src_stride_argb0;
   1220     src_argb1 += src_stride_argb1;
   1221     dst_argb += dst_stride_argb;
   1222   }
   1223   return 0;
   1224 }
   1225 
   1226 // Subtract 2 ARGB images and store to destination.
   1227 LIBYUV_API
   1228 int ARGBSubtract(const uint8* src_argb0,
   1229                  int src_stride_argb0,
   1230                  const uint8* src_argb1,
   1231                  int src_stride_argb1,
   1232                  uint8* dst_argb,
   1233                  int dst_stride_argb,
   1234                  int width,
   1235                  int height) {
   1236   int y;
   1237   void (*ARGBSubtractRow)(const uint8* src0, const uint8* src1, uint8* dst,
   1238                           int width) = ARGBSubtractRow_C;
   1239   if (!src_argb0 || !src_argb1 || !dst_argb || width <= 0 || height == 0) {
   1240     return -1;
   1241   }
   1242   // Negative height means invert the image.
   1243   if (height < 0) {
   1244     height = -height;
   1245     dst_argb = dst_argb + (height - 1) * dst_stride_argb;
   1246     dst_stride_argb = -dst_stride_argb;
   1247   }
   1248   // Coalesce rows.
   1249   if (src_stride_argb0 == width * 4 && src_stride_argb1 == width * 4 &&
   1250       dst_stride_argb == width * 4) {
   1251     width *= height;
   1252     height = 1;
   1253     src_stride_argb0 = src_stride_argb1 = dst_stride_argb = 0;
   1254   }
   1255 #if defined(HAS_ARGBSUBTRACTROW_SSE2)
   1256   if (TestCpuFlag(kCpuHasSSE2)) {
   1257     ARGBSubtractRow = ARGBSubtractRow_Any_SSE2;
   1258     if (IS_ALIGNED(width, 4)) {
   1259       ARGBSubtractRow = ARGBSubtractRow_SSE2;
   1260     }
   1261   }
   1262 #endif
   1263 #if defined(HAS_ARGBSUBTRACTROW_AVX2)
   1264   if (TestCpuFlag(kCpuHasAVX2)) {
   1265     ARGBSubtractRow = ARGBSubtractRow_Any_AVX2;
   1266     if (IS_ALIGNED(width, 8)) {
   1267       ARGBSubtractRow = ARGBSubtractRow_AVX2;
   1268     }
   1269   }
   1270 #endif
   1271 #if defined(HAS_ARGBSUBTRACTROW_NEON)
   1272   if (TestCpuFlag(kCpuHasNEON)) {
   1273     ARGBSubtractRow = ARGBSubtractRow_Any_NEON;
   1274     if (IS_ALIGNED(width, 8)) {
   1275       ARGBSubtractRow = ARGBSubtractRow_NEON;
   1276     }
   1277   }
   1278 #endif
   1279 #if defined(HAS_ARGBSUBTRACTROW_MSA)
   1280   if (TestCpuFlag(kCpuHasMSA)) {
   1281     ARGBSubtractRow = ARGBSubtractRow_Any_MSA;
   1282     if (IS_ALIGNED(width, 8)) {
   1283       ARGBSubtractRow = ARGBSubtractRow_MSA;
   1284     }
   1285   }
   1286 #endif
   1287 
   1288   // Subtract plane
   1289   for (y = 0; y < height; ++y) {
   1290     ARGBSubtractRow(src_argb0, src_argb1, dst_argb, width);
   1291     src_argb0 += src_stride_argb0;
   1292     src_argb1 += src_stride_argb1;
   1293     dst_argb += dst_stride_argb;
   1294   }
   1295   return 0;
   1296 }
   1297 // Convert I422 to RGBA with matrix
   1298 static int I422ToRGBAMatrix(const uint8* src_y,
   1299                             int src_stride_y,
   1300                             const uint8* src_u,
   1301                             int src_stride_u,
   1302                             const uint8* src_v,
   1303                             int src_stride_v,
   1304                             uint8* dst_rgba,
   1305                             int dst_stride_rgba,
   1306                             const struct YuvConstants* yuvconstants,
   1307                             int width,
   1308                             int height) {
   1309   int y;
   1310   void (*I422ToRGBARow)(const uint8* y_buf, const uint8* u_buf,
   1311                         const uint8* v_buf, uint8* rgb_buf,
   1312                         const struct YuvConstants* yuvconstants, int width) =
   1313       I422ToRGBARow_C;
   1314   if (!src_y || !src_u || !src_v || !dst_rgba || width <= 0 || height == 0) {
   1315     return -1;
   1316   }
   1317   // Negative height means invert the image.
   1318   if (height < 0) {
   1319     height = -height;
   1320     dst_rgba = dst_rgba + (height - 1) * dst_stride_rgba;
   1321     dst_stride_rgba = -dst_stride_rgba;
   1322   }
   1323 #if defined(HAS_I422TORGBAROW_SSSE3)
   1324   if (TestCpuFlag(kCpuHasSSSE3)) {
   1325     I422ToRGBARow = I422ToRGBARow_Any_SSSE3;
   1326     if (IS_ALIGNED(width, 8)) {
   1327       I422ToRGBARow = I422ToRGBARow_SSSE3;
   1328     }
   1329   }
   1330 #endif
   1331 #if defined(HAS_I422TORGBAROW_AVX2)
   1332   if (TestCpuFlag(kCpuHasAVX2)) {
   1333     I422ToRGBARow = I422ToRGBARow_Any_AVX2;
   1334     if (IS_ALIGNED(width, 16)) {
   1335       I422ToRGBARow = I422ToRGBARow_AVX2;
   1336     }
   1337   }
   1338 #endif
   1339 #if defined(HAS_I422TORGBAROW_NEON)
   1340   if (TestCpuFlag(kCpuHasNEON)) {
   1341     I422ToRGBARow = I422ToRGBARow_Any_NEON;
   1342     if (IS_ALIGNED(width, 8)) {
   1343       I422ToRGBARow = I422ToRGBARow_NEON;
   1344     }
   1345   }
   1346 #endif
   1347 #if defined(HAS_I422TORGBAROW_DSPR2)
   1348   if (TestCpuFlag(kCpuHasDSPR2) && IS_ALIGNED(width, 4) &&
   1349       IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) &&
   1350       IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) &&
   1351       IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2) &&
   1352       IS_ALIGNED(dst_rgba, 4) && IS_ALIGNED(dst_stride_rgba, 4)) {
   1353     I422ToRGBARow = I422ToRGBARow_DSPR2;
   1354   }
   1355 #endif
   1356 #if defined(HAS_I422TORGBAROW_MSA)
   1357   if (TestCpuFlag(kCpuHasMSA)) {
   1358     I422ToRGBARow = I422ToRGBARow_Any_MSA;
   1359     if (IS_ALIGNED(width, 8)) {
   1360       I422ToRGBARow = I422ToRGBARow_MSA;
   1361     }
   1362   }
   1363 #endif
   1364 
   1365   for (y = 0; y < height; ++y) {
   1366     I422ToRGBARow(src_y, src_u, src_v, dst_rgba, yuvconstants, width);
   1367     dst_rgba += dst_stride_rgba;
   1368     src_y += src_stride_y;
   1369     src_u += src_stride_u;
   1370     src_v += src_stride_v;
   1371   }
   1372   return 0;
   1373 }
   1374 
   1375 // Convert I422 to RGBA.
   1376 LIBYUV_API
   1377 int I422ToRGBA(const uint8* src_y,
   1378                int src_stride_y,
   1379                const uint8* src_u,
   1380                int src_stride_u,
   1381                const uint8* src_v,
   1382                int src_stride_v,
   1383                uint8* dst_rgba,
   1384                int dst_stride_rgba,
   1385                int width,
   1386                int height) {
   1387   return I422ToRGBAMatrix(src_y, src_stride_y, src_u, src_stride_u, src_v,
   1388                           src_stride_v, dst_rgba, dst_stride_rgba,
   1389                           &kYuvI601Constants, width, height);
   1390 }
   1391 
   1392 // Convert I422 to BGRA.
   1393 LIBYUV_API
   1394 int I422ToBGRA(const uint8* src_y,
   1395                int src_stride_y,
   1396                const uint8* src_u,
   1397                int src_stride_u,
   1398                const uint8* src_v,
   1399                int src_stride_v,
   1400                uint8* dst_bgra,
   1401                int dst_stride_bgra,
   1402                int width,
   1403                int height) {
   1404   return I422ToRGBAMatrix(src_y, src_stride_y, src_v,
   1405                           src_stride_v,  // Swap U and V
   1406                           src_u, src_stride_u, dst_bgra, dst_stride_bgra,
   1407                           &kYvuI601Constants,  // Use Yvu matrix
   1408                           width, height);
   1409 }
   1410 
   1411 // Convert NV12 to RGB565.
   1412 LIBYUV_API
   1413 int NV12ToRGB565(const uint8* src_y,
   1414                  int src_stride_y,
   1415                  const uint8* src_uv,
   1416                  int src_stride_uv,
   1417                  uint8* dst_rgb565,
   1418                  int dst_stride_rgb565,
   1419                  int width,
   1420                  int height) {
   1421   int y;
   1422   void (*NV12ToRGB565Row)(
   1423       const uint8* y_buf, const uint8* uv_buf, uint8* rgb_buf,
   1424       const struct YuvConstants* yuvconstants, int width) = NV12ToRGB565Row_C;
   1425   if (!src_y || !src_uv || !dst_rgb565 || width <= 0 || height == 0) {
   1426     return -1;
   1427   }
   1428   // Negative height means invert the image.
   1429   if (height < 0) {
   1430     height = -height;
   1431     dst_rgb565 = dst_rgb565 + (height - 1) * dst_stride_rgb565;
   1432     dst_stride_rgb565 = -dst_stride_rgb565;
   1433   }
   1434 #if defined(HAS_NV12TORGB565ROW_SSSE3)
   1435   if (TestCpuFlag(kCpuHasSSSE3)) {
   1436     NV12ToRGB565Row = NV12ToRGB565Row_Any_SSSE3;
   1437     if (IS_ALIGNED(width, 8)) {
   1438       NV12ToRGB565Row = NV12ToRGB565Row_SSSE3;
   1439     }
   1440   }
   1441 #endif
   1442 #if defined(HAS_NV12TORGB565ROW_AVX2)
   1443   if (TestCpuFlag(kCpuHasAVX2)) {
   1444     NV12ToRGB565Row = NV12ToRGB565Row_Any_AVX2;
   1445     if (IS_ALIGNED(width, 16)) {
   1446       NV12ToRGB565Row = NV12ToRGB565Row_AVX2;
   1447     }
   1448   }
   1449 #endif
   1450 #if defined(HAS_NV12TORGB565ROW_NEON)
   1451   if (TestCpuFlag(kCpuHasNEON)) {
   1452     NV12ToRGB565Row = NV12ToRGB565Row_Any_NEON;
   1453     if (IS_ALIGNED(width, 8)) {
   1454       NV12ToRGB565Row = NV12ToRGB565Row_NEON;
   1455     }
   1456   }
   1457 #endif
   1458 #if defined(HAS_NV12TORGB565ROW_MSA)
   1459   if (TestCpuFlag(kCpuHasMSA)) {
   1460     NV12ToRGB565Row = NV12ToRGB565Row_Any_MSA;
   1461     if (IS_ALIGNED(width, 8)) {
   1462       NV12ToRGB565Row = NV12ToRGB565Row_MSA;
   1463     }
   1464   }
   1465 #endif
   1466 
   1467   for (y = 0; y < height; ++y) {
   1468     NV12ToRGB565Row(src_y, src_uv, dst_rgb565, &kYuvI601Constants, width);
   1469     dst_rgb565 += dst_stride_rgb565;
   1470     src_y += src_stride_y;
   1471     if (y & 1) {
   1472       src_uv += src_stride_uv;
   1473     }
   1474   }
   1475   return 0;
   1476 }
   1477 
   1478 // Convert RAW to RGB24.
   1479 LIBYUV_API
   1480 int RAWToRGB24(const uint8* src_raw,
   1481                int src_stride_raw,
   1482                uint8* dst_rgb24,
   1483                int dst_stride_rgb24,
   1484                int width,
   1485                int height) {
   1486   int y;
   1487   void (*RAWToRGB24Row)(const uint8* src_rgb, uint8* dst_rgb24, int width) =
   1488       RAWToRGB24Row_C;
   1489   if (!src_raw || !dst_rgb24 || width <= 0 || height == 0) {
   1490     return -1;
   1491   }
   1492   // Negative height means invert the image.
   1493   if (height < 0) {
   1494     height = -height;
   1495     src_raw = src_raw + (height - 1) * src_stride_raw;
   1496     src_stride_raw = -src_stride_raw;
   1497   }
   1498   // Coalesce rows.
   1499   if (src_stride_raw == width * 3 && dst_stride_rgb24 == width * 3) {
   1500     width *= height;
   1501     height = 1;
   1502     src_stride_raw = dst_stride_rgb24 = 0;
   1503   }
   1504 #if defined(HAS_RAWTORGB24ROW_SSSE3)
   1505   if (TestCpuFlag(kCpuHasSSSE3)) {
   1506     RAWToRGB24Row = RAWToRGB24Row_Any_SSSE3;
   1507     if (IS_ALIGNED(width, 8)) {
   1508       RAWToRGB24Row = RAWToRGB24Row_SSSE3;
   1509     }
   1510   }
   1511 #endif
   1512 #if defined(HAS_RAWTORGB24ROW_NEON)
   1513   if (TestCpuFlag(kCpuHasNEON)) {
   1514     RAWToRGB24Row = RAWToRGB24Row_Any_NEON;
   1515     if (IS_ALIGNED(width, 8)) {
   1516       RAWToRGB24Row = RAWToRGB24Row_NEON;
   1517     }
   1518   }
   1519 #endif
   1520 #if defined(HAS_RAWTORGB24ROW_MSA)
   1521   if (TestCpuFlag(kCpuHasMSA)) {
   1522     RAWToRGB24Row = RAWToRGB24Row_Any_MSA;
   1523     if (IS_ALIGNED(width, 16)) {
   1524       RAWToRGB24Row = RAWToRGB24Row_MSA;
   1525     }
   1526   }
   1527 #endif
   1528 
   1529   for (y = 0; y < height; ++y) {
   1530     RAWToRGB24Row(src_raw, dst_rgb24, width);
   1531     src_raw += src_stride_raw;
   1532     dst_rgb24 += dst_stride_rgb24;
   1533   }
   1534   return 0;
   1535 }
   1536 
   1537 LIBYUV_API
   1538 void SetPlane(uint8* dst_y,
   1539               int dst_stride_y,
   1540               int width,
   1541               int height,
   1542               uint32 value) {
   1543   int y;
   1544   void (*SetRow)(uint8 * dst, uint8 value, int width) = SetRow_C;
   1545   if (height < 0) {
   1546     height = -height;
   1547     dst_y = dst_y + (height - 1) * dst_stride_y;
   1548     dst_stride_y = -dst_stride_y;
   1549   }
   1550   // Coalesce rows.
   1551   if (dst_stride_y == width) {
   1552     width *= height;
   1553     height = 1;
   1554     dst_stride_y = 0;
   1555   }
   1556 #if defined(HAS_SETROW_NEON)
   1557   if (TestCpuFlag(kCpuHasNEON)) {
   1558     SetRow = SetRow_Any_NEON;
   1559     if (IS_ALIGNED(width, 16)) {
   1560       SetRow = SetRow_NEON;
   1561     }
   1562   }
   1563 #endif
   1564 #if defined(HAS_SETROW_X86)
   1565   if (TestCpuFlag(kCpuHasX86)) {
   1566     SetRow = SetRow_Any_X86;
   1567     if (IS_ALIGNED(width, 4)) {
   1568       SetRow = SetRow_X86;
   1569     }
   1570   }
   1571 #endif
   1572 #if defined(HAS_SETROW_ERMS)
   1573   if (TestCpuFlag(kCpuHasERMS)) {
   1574     SetRow = SetRow_ERMS;
   1575   }
   1576 #endif
   1577 
   1578   // Set plane
   1579   for (y = 0; y < height; ++y) {
   1580     SetRow(dst_y, value, width);
   1581     dst_y += dst_stride_y;
   1582   }
   1583 }
   1584 
   1585 // Draw a rectangle into I420
   1586 LIBYUV_API
   1587 int I420Rect(uint8* dst_y,
   1588              int dst_stride_y,
   1589              uint8* dst_u,
   1590              int dst_stride_u,
   1591              uint8* dst_v,
   1592              int dst_stride_v,
   1593              int x,
   1594              int y,
   1595              int width,
   1596              int height,
   1597              int value_y,
   1598              int value_u,
   1599              int value_v) {
   1600   int halfwidth = (width + 1) >> 1;
   1601   int halfheight = (height + 1) >> 1;
   1602   uint8* start_y = dst_y + y * dst_stride_y + x;
   1603   uint8* start_u = dst_u + (y / 2) * dst_stride_u + (x / 2);
   1604   uint8* start_v = dst_v + (y / 2) * dst_stride_v + (x / 2);
   1605   if (!dst_y || !dst_u || !dst_v || width <= 0 || height == 0 || x < 0 ||
   1606       y < 0 || value_y < 0 || value_y > 255 || value_u < 0 || value_u > 255 ||
   1607       value_v < 0 || value_v > 255) {
   1608     return -1;
   1609   }
   1610 
   1611   SetPlane(start_y, dst_stride_y, width, height, value_y);
   1612   SetPlane(start_u, dst_stride_u, halfwidth, halfheight, value_u);
   1613   SetPlane(start_v, dst_stride_v, halfwidth, halfheight, value_v);
   1614   return 0;
   1615 }
   1616 
   1617 // Draw a rectangle into ARGB
   1618 LIBYUV_API
   1619 int ARGBRect(uint8* dst_argb,
   1620              int dst_stride_argb,
   1621              int dst_x,
   1622              int dst_y,
   1623              int width,
   1624              int height,
   1625              uint32 value) {
   1626   int y;
   1627   void (*ARGBSetRow)(uint8 * dst_argb, uint32 value, int width) = ARGBSetRow_C;
   1628   if (!dst_argb || width <= 0 || height == 0 || dst_x < 0 || dst_y < 0) {
   1629     return -1;
   1630   }
   1631   if (height < 0) {
   1632     height = -height;
   1633     dst_argb = dst_argb + (height - 1) * dst_stride_argb;
   1634     dst_stride_argb = -dst_stride_argb;
   1635   }
   1636   dst_argb += dst_y * dst_stride_argb + dst_x * 4;
   1637   // Coalesce rows.
   1638   if (dst_stride_argb == width * 4) {
   1639     width *= height;
   1640     height = 1;
   1641     dst_stride_argb = 0;
   1642   }
   1643 
   1644 #if defined(HAS_ARGBSETROW_NEON)
   1645   if (TestCpuFlag(kCpuHasNEON)) {
   1646     ARGBSetRow = ARGBSetRow_Any_NEON;
   1647     if (IS_ALIGNED(width, 4)) {
   1648       ARGBSetRow = ARGBSetRow_NEON;
   1649     }
   1650   }
   1651 #endif
   1652 #if defined(HAS_ARGBSETROW_X86)
   1653   if (TestCpuFlag(kCpuHasX86)) {
   1654     ARGBSetRow = ARGBSetRow_X86;
   1655   }
   1656 #endif
   1657 #if defined(HAS_ARGBSETROW_MSA)
   1658   if (TestCpuFlag(kCpuHasMSA)) {
   1659     ARGBSetRow = ARGBSetRow_Any_MSA;
   1660     if (IS_ALIGNED(width, 4)) {
   1661       ARGBSetRow = ARGBSetRow_MSA;
   1662     }
   1663   }
   1664 #endif
   1665 
   1666   // Set plane
   1667   for (y = 0; y < height; ++y) {
   1668     ARGBSetRow(dst_argb, value, width);
   1669     dst_argb += dst_stride_argb;
   1670   }
   1671   return 0;
   1672 }
   1673 
   1674 // Convert unattentuated ARGB to preattenuated ARGB.
   1675 // An unattenutated ARGB alpha blend uses the formula
   1676 // p = a * f + (1 - a) * b
   1677 // where
   1678 //   p is output pixel
   1679 //   f is foreground pixel
   1680 //   b is background pixel
   1681 //   a is alpha value from foreground pixel
   1682 // An preattenutated ARGB alpha blend uses the formula
   1683 // p = f + (1 - a) * b
   1684 // where
   1685 //   f is foreground pixel premultiplied by alpha
   1686 
   1687 LIBYUV_API
   1688 int ARGBAttenuate(const uint8* src_argb,
   1689                   int src_stride_argb,
   1690                   uint8* dst_argb,
   1691                   int dst_stride_argb,
   1692                   int width,
   1693                   int height) {
   1694   int y;
   1695   void (*ARGBAttenuateRow)(const uint8* src_argb, uint8* dst_argb, int width) =
   1696       ARGBAttenuateRow_C;
   1697   if (!src_argb || !dst_argb || width <= 0 || height == 0) {
   1698     return -1;
   1699   }
   1700   if (height < 0) {
   1701     height = -height;
   1702     src_argb = src_argb + (height - 1) * src_stride_argb;
   1703     src_stride_argb = -src_stride_argb;
   1704   }
   1705   // Coalesce rows.
   1706   if (src_stride_argb == width * 4 && dst_stride_argb == width * 4) {
   1707     width *= height;
   1708     height = 1;
   1709     src_stride_argb = dst_stride_argb = 0;
   1710   }
   1711 #if defined(HAS_ARGBATTENUATEROW_SSSE3)
   1712   if (TestCpuFlag(kCpuHasSSSE3)) {
   1713     ARGBAttenuateRow = ARGBAttenuateRow_Any_SSSE3;
   1714     if (IS_ALIGNED(width, 4)) {
   1715       ARGBAttenuateRow = ARGBAttenuateRow_SSSE3;
   1716     }
   1717   }
   1718 #endif
   1719 #if defined(HAS_ARGBATTENUATEROW_AVX2)
   1720   if (TestCpuFlag(kCpuHasAVX2)) {
   1721     ARGBAttenuateRow = ARGBAttenuateRow_Any_AVX2;
   1722     if (IS_ALIGNED(width, 8)) {
   1723       ARGBAttenuateRow = ARGBAttenuateRow_AVX2;
   1724     }
   1725   }
   1726 #endif
   1727 #if defined(HAS_ARGBATTENUATEROW_NEON)
   1728   if (TestCpuFlag(kCpuHasNEON)) {
   1729     ARGBAttenuateRow = ARGBAttenuateRow_Any_NEON;
   1730     if (IS_ALIGNED(width, 8)) {
   1731       ARGBAttenuateRow = ARGBAttenuateRow_NEON;
   1732     }
   1733   }
   1734 #endif
   1735 #if defined(HAS_ARGBATTENUATEROW_MSA)
   1736   if (TestCpuFlag(kCpuHasMSA)) {
   1737     ARGBAttenuateRow = ARGBAttenuateRow_Any_MSA;
   1738     if (IS_ALIGNED(width, 8)) {
   1739       ARGBAttenuateRow = ARGBAttenuateRow_MSA;
   1740     }
   1741   }
   1742 #endif
   1743 
   1744   for (y = 0; y < height; ++y) {
   1745     ARGBAttenuateRow(src_argb, dst_argb, width);
   1746     src_argb += src_stride_argb;
   1747     dst_argb += dst_stride_argb;
   1748   }
   1749   return 0;
   1750 }
   1751 
   1752 // Convert preattentuated ARGB to unattenuated ARGB.
   1753 LIBYUV_API
   1754 int ARGBUnattenuate(const uint8* src_argb,
   1755                     int src_stride_argb,
   1756                     uint8* dst_argb,
   1757                     int dst_stride_argb,
   1758                     int width,
   1759                     int height) {
   1760   int y;
   1761   void (*ARGBUnattenuateRow)(const uint8* src_argb, uint8* dst_argb,
   1762                              int width) = ARGBUnattenuateRow_C;
   1763   if (!src_argb || !dst_argb || width <= 0 || height == 0) {
   1764     return -1;
   1765   }
   1766   if (height < 0) {
   1767     height = -height;
   1768     src_argb = src_argb + (height - 1) * src_stride_argb;
   1769     src_stride_argb = -src_stride_argb;
   1770   }
   1771   // Coalesce rows.
   1772   if (src_stride_argb == width * 4 && dst_stride_argb == width * 4) {
   1773     width *= height;
   1774     height = 1;
   1775     src_stride_argb = dst_stride_argb = 0;
   1776   }
   1777 #if defined(HAS_ARGBUNATTENUATEROW_SSE2)
   1778   if (TestCpuFlag(kCpuHasSSE2)) {
   1779     ARGBUnattenuateRow = ARGBUnattenuateRow_Any_SSE2;
   1780     if (IS_ALIGNED(width, 4)) {
   1781       ARGBUnattenuateRow = ARGBUnattenuateRow_SSE2;
   1782     }
   1783   }
   1784 #endif
   1785 #if defined(HAS_ARGBUNATTENUATEROW_AVX2)
   1786   if (TestCpuFlag(kCpuHasAVX2)) {
   1787     ARGBUnattenuateRow = ARGBUnattenuateRow_Any_AVX2;
   1788     if (IS_ALIGNED(width, 8)) {
   1789       ARGBUnattenuateRow = ARGBUnattenuateRow_AVX2;
   1790     }
   1791   }
   1792 #endif
   1793   // TODO(fbarchard): Neon version.
   1794 
   1795   for (y = 0; y < height; ++y) {
   1796     ARGBUnattenuateRow(src_argb, dst_argb, width);
   1797     src_argb += src_stride_argb;
   1798     dst_argb += dst_stride_argb;
   1799   }
   1800   return 0;
   1801 }
   1802 
   1803 // Convert ARGB to Grayed ARGB.
   1804 LIBYUV_API
   1805 int ARGBGrayTo(const uint8* src_argb,
   1806                int src_stride_argb,
   1807                uint8* dst_argb,
   1808                int dst_stride_argb,
   1809                int width,
   1810                int height) {
   1811   int y;
   1812   void (*ARGBGrayRow)(const uint8* src_argb, uint8* dst_argb, int width) =
   1813       ARGBGrayRow_C;
   1814   if (!src_argb || !dst_argb || width <= 0 || height == 0) {
   1815     return -1;
   1816   }
   1817   if (height < 0) {
   1818     height = -height;
   1819     src_argb = src_argb + (height - 1) * src_stride_argb;
   1820     src_stride_argb = -src_stride_argb;
   1821   }
   1822   // Coalesce rows.
   1823   if (src_stride_argb == width * 4 && dst_stride_argb == width * 4) {
   1824     width *= height;
   1825     height = 1;
   1826     src_stride_argb = dst_stride_argb = 0;
   1827   }
   1828 #if defined(HAS_ARGBGRAYROW_SSSE3)
   1829   if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 8)) {
   1830     ARGBGrayRow = ARGBGrayRow_SSSE3;
   1831   }
   1832 #endif
   1833 #if defined(HAS_ARGBGRAYROW_NEON)
   1834   if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) {
   1835     ARGBGrayRow = ARGBGrayRow_NEON;
   1836   }
   1837 #endif
   1838 #if defined(HAS_ARGBGRAYROW_MSA)
   1839   if (TestCpuFlag(kCpuHasMSA) && IS_ALIGNED(width, 8)) {
   1840     ARGBGrayRow = ARGBGrayRow_MSA;
   1841   }
   1842 #endif
   1843 
   1844   for (y = 0; y < height; ++y) {
   1845     ARGBGrayRow(src_argb, dst_argb, width);
   1846     src_argb += src_stride_argb;
   1847     dst_argb += dst_stride_argb;
   1848   }
   1849   return 0;
   1850 }
   1851 
   1852 // Make a rectangle of ARGB gray scale.
   1853 LIBYUV_API
   1854 int ARGBGray(uint8* dst_argb,
   1855              int dst_stride_argb,
   1856              int dst_x,
   1857              int dst_y,
   1858              int width,
   1859              int height) {
   1860   int y;
   1861   void (*ARGBGrayRow)(const uint8* src_argb, uint8* dst_argb, int width) =
   1862       ARGBGrayRow_C;
   1863   uint8* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4;
   1864   if (!dst_argb || width <= 0 || height <= 0 || dst_x < 0 || dst_y < 0) {
   1865     return -1;
   1866   }
   1867   // Coalesce rows.
   1868   if (dst_stride_argb == width * 4) {
   1869     width *= height;
   1870     height = 1;
   1871     dst_stride_argb = 0;
   1872   }
   1873 #if defined(HAS_ARGBGRAYROW_SSSE3)
   1874   if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 8)) {
   1875     ARGBGrayRow = ARGBGrayRow_SSSE3;
   1876   }
   1877 #endif
   1878 #if defined(HAS_ARGBGRAYROW_NEON)
   1879   if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) {
   1880     ARGBGrayRow = ARGBGrayRow_NEON;
   1881   }
   1882 #endif
   1883 #if defined(HAS_ARGBGRAYROW_MSA)
   1884   if (TestCpuFlag(kCpuHasMSA) && IS_ALIGNED(width, 8)) {
   1885     ARGBGrayRow = ARGBGrayRow_MSA;
   1886   }
   1887 #endif
   1888 
   1889   for (y = 0; y < height; ++y) {
   1890     ARGBGrayRow(dst, dst, width);
   1891     dst += dst_stride_argb;
   1892   }
   1893   return 0;
   1894 }
   1895 
   1896 // Make a rectangle of ARGB Sepia tone.
   1897 LIBYUV_API
   1898 int ARGBSepia(uint8* dst_argb,
   1899               int dst_stride_argb,
   1900               int dst_x,
   1901               int dst_y,
   1902               int width,
   1903               int height) {
   1904   int y;
   1905   void (*ARGBSepiaRow)(uint8 * dst_argb, int width) = ARGBSepiaRow_C;
   1906   uint8* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4;
   1907   if (!dst_argb || width <= 0 || height <= 0 || dst_x < 0 || dst_y < 0) {
   1908     return -1;
   1909   }
   1910   // Coalesce rows.
   1911   if (dst_stride_argb == width * 4) {
   1912     width *= height;
   1913     height = 1;
   1914     dst_stride_argb = 0;
   1915   }
   1916 #if defined(HAS_ARGBSEPIAROW_SSSE3)
   1917   if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 8)) {
   1918     ARGBSepiaRow = ARGBSepiaRow_SSSE3;
   1919   }
   1920 #endif
   1921 #if defined(HAS_ARGBSEPIAROW_NEON)
   1922   if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) {
   1923     ARGBSepiaRow = ARGBSepiaRow_NEON;
   1924   }
   1925 #endif
   1926 #if defined(HAS_ARGBSEPIAROW_MSA)
   1927   if (TestCpuFlag(kCpuHasMSA) && IS_ALIGNED(width, 8)) {
   1928     ARGBSepiaRow = ARGBSepiaRow_MSA;
   1929   }
   1930 #endif
   1931 
   1932   for (y = 0; y < height; ++y) {
   1933     ARGBSepiaRow(dst, width);
   1934     dst += dst_stride_argb;
   1935   }
   1936   return 0;
   1937 }
   1938 
   1939 // Apply a 4x4 matrix to each ARGB pixel.
   1940 // Note: Normally for shading, but can be used to swizzle or invert.
   1941 LIBYUV_API
   1942 int ARGBColorMatrix(const uint8* src_argb,
   1943                     int src_stride_argb,
   1944                     uint8* dst_argb,
   1945                     int dst_stride_argb,
   1946                     const int8* matrix_argb,
   1947                     int width,
   1948                     int height) {
   1949   int y;
   1950   void (*ARGBColorMatrixRow)(const uint8* src_argb, uint8* dst_argb,
   1951                              const int8* matrix_argb, int width) =
   1952       ARGBColorMatrixRow_C;
   1953   if (!src_argb || !dst_argb || !matrix_argb || width <= 0 || height == 0) {
   1954     return -1;
   1955   }
   1956   if (height < 0) {
   1957     height = -height;
   1958     src_argb = src_argb + (height - 1) * src_stride_argb;
   1959     src_stride_argb = -src_stride_argb;
   1960   }
   1961   // Coalesce rows.
   1962   if (src_stride_argb == width * 4 && dst_stride_argb == width * 4) {
   1963     width *= height;
   1964     height = 1;
   1965     src_stride_argb = dst_stride_argb = 0;
   1966   }
   1967 #if defined(HAS_ARGBCOLORMATRIXROW_SSSE3)
   1968   if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 8)) {
   1969     ARGBColorMatrixRow = ARGBColorMatrixRow_SSSE3;
   1970   }
   1971 #endif
   1972 #if defined(HAS_ARGBCOLORMATRIXROW_NEON)
   1973   if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) {
   1974     ARGBColorMatrixRow = ARGBColorMatrixRow_NEON;
   1975   }
   1976 #endif
   1977   for (y = 0; y < height; ++y) {
   1978     ARGBColorMatrixRow(src_argb, dst_argb, matrix_argb, width);
   1979     src_argb += src_stride_argb;
   1980     dst_argb += dst_stride_argb;
   1981   }
   1982   return 0;
   1983 }
   1984 
   1985 // Apply a 4x3 matrix to each ARGB pixel.
   1986 // Deprecated.
   1987 LIBYUV_API
   1988 int RGBColorMatrix(uint8* dst_argb,
   1989                    int dst_stride_argb,
   1990                    const int8* matrix_rgb,
   1991                    int dst_x,
   1992                    int dst_y,
   1993                    int width,
   1994                    int height) {
   1995   SIMD_ALIGNED(int8 matrix_argb[16]);
   1996   uint8* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4;
   1997   if (!dst_argb || !matrix_rgb || width <= 0 || height <= 0 || dst_x < 0 ||
   1998       dst_y < 0) {
   1999     return -1;
   2000   }
   2001 
   2002   // Convert 4x3 7 bit matrix to 4x4 6 bit matrix.
   2003   matrix_argb[0] = matrix_rgb[0] / 2;
   2004   matrix_argb[1] = matrix_rgb[1] / 2;
   2005   matrix_argb[2] = matrix_rgb[2] / 2;
   2006   matrix_argb[3] = matrix_rgb[3] / 2;
   2007   matrix_argb[4] = matrix_rgb[4] / 2;
   2008   matrix_argb[5] = matrix_rgb[5] / 2;
   2009   matrix_argb[6] = matrix_rgb[6] / 2;
   2010   matrix_argb[7] = matrix_rgb[7] / 2;
   2011   matrix_argb[8] = matrix_rgb[8] / 2;
   2012   matrix_argb[9] = matrix_rgb[9] / 2;
   2013   matrix_argb[10] = matrix_rgb[10] / 2;
   2014   matrix_argb[11] = matrix_rgb[11] / 2;
   2015   matrix_argb[14] = matrix_argb[13] = matrix_argb[12] = 0;
   2016   matrix_argb[15] = 64;  // 1.0
   2017 
   2018   return ARGBColorMatrix((const uint8*)(dst), dst_stride_argb, dst,
   2019                          dst_stride_argb, &matrix_argb[0], width, height);
   2020 }
   2021 
   2022 // Apply a color table each ARGB pixel.
   2023 // Table contains 256 ARGB values.
   2024 LIBYUV_API
   2025 int ARGBColorTable(uint8* dst_argb,
   2026                    int dst_stride_argb,
   2027                    const uint8* table_argb,
   2028                    int dst_x,
   2029                    int dst_y,
   2030                    int width,
   2031                    int height) {
   2032   int y;
   2033   void (*ARGBColorTableRow)(uint8 * dst_argb, const uint8* table_argb,
   2034                             int width) = ARGBColorTableRow_C;
   2035   uint8* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4;
   2036   if (!dst_argb || !table_argb || width <= 0 || height <= 0 || dst_x < 0 ||
   2037       dst_y < 0) {
   2038     return -1;
   2039   }
   2040   // Coalesce rows.
   2041   if (dst_stride_argb == width * 4) {
   2042     width *= height;
   2043     height = 1;
   2044     dst_stride_argb = 0;
   2045   }
   2046 #if defined(HAS_ARGBCOLORTABLEROW_X86)
   2047   if (TestCpuFlag(kCpuHasX86)) {
   2048     ARGBColorTableRow = ARGBColorTableRow_X86;
   2049   }
   2050 #endif
   2051   for (y = 0; y < height; ++y) {
   2052     ARGBColorTableRow(dst, table_argb, width);
   2053     dst += dst_stride_argb;
   2054   }
   2055   return 0;
   2056 }
   2057 
   2058 // Apply a color table each ARGB pixel but preserve destination alpha.
   2059 // Table contains 256 ARGB values.
   2060 LIBYUV_API
   2061 int RGBColorTable(uint8* dst_argb,
   2062                   int dst_stride_argb,
   2063                   const uint8* table_argb,
   2064                   int dst_x,
   2065                   int dst_y,
   2066                   int width,
   2067                   int height) {
   2068   int y;
   2069   void (*RGBColorTableRow)(uint8 * dst_argb, const uint8* table_argb,
   2070                            int width) = RGBColorTableRow_C;
   2071   uint8* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4;
   2072   if (!dst_argb || !table_argb || width <= 0 || height <= 0 || dst_x < 0 ||
   2073       dst_y < 0) {
   2074     return -1;
   2075   }
   2076   // Coalesce rows.
   2077   if (dst_stride_argb == width * 4) {
   2078     width *= height;
   2079     height = 1;
   2080     dst_stride_argb = 0;
   2081   }
   2082 #if defined(HAS_RGBCOLORTABLEROW_X86)
   2083   if (TestCpuFlag(kCpuHasX86)) {
   2084     RGBColorTableRow = RGBColorTableRow_X86;
   2085   }
   2086 #endif
   2087   for (y = 0; y < height; ++y) {
   2088     RGBColorTableRow(dst, table_argb, width);
   2089     dst += dst_stride_argb;
   2090   }
   2091   return 0;
   2092 }
   2093 
   2094 // ARGBQuantize is used to posterize art.
   2095 // e.g. rgb / qvalue * qvalue + qvalue / 2
   2096 // But the low levels implement efficiently with 3 parameters, and could be
   2097 // used for other high level operations.
   2098 // dst_argb[0] = (b * scale >> 16) * interval_size + interval_offset;
   2099 // where scale is 1 / interval_size as a fixed point value.
   2100 // The divide is replaces with a multiply by reciprocal fixed point multiply.
   2101 // Caveat - although SSE2 saturates, the C function does not and should be used
   2102 // with care if doing anything but quantization.
   2103 LIBYUV_API
   2104 int ARGBQuantize(uint8* dst_argb,
   2105                  int dst_stride_argb,
   2106                  int scale,
   2107                  int interval_size,
   2108                  int interval_offset,
   2109                  int dst_x,
   2110                  int dst_y,
   2111                  int width,
   2112                  int height) {
   2113   int y;
   2114   void (*ARGBQuantizeRow)(uint8 * dst_argb, int scale, int interval_size,
   2115                           int interval_offset, int width) = ARGBQuantizeRow_C;
   2116   uint8* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4;
   2117   if (!dst_argb || width <= 0 || height <= 0 || dst_x < 0 || dst_y < 0 ||
   2118       interval_size < 1 || interval_size > 255) {
   2119     return -1;
   2120   }
   2121   // Coalesce rows.
   2122   if (dst_stride_argb == width * 4) {
   2123     width *= height;
   2124     height = 1;
   2125     dst_stride_argb = 0;
   2126   }
   2127 #if defined(HAS_ARGBQUANTIZEROW_SSE2)
   2128   if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 4)) {
   2129     ARGBQuantizeRow = ARGBQuantizeRow_SSE2;
   2130   }
   2131 #endif
   2132 #if defined(HAS_ARGBQUANTIZEROW_NEON)
   2133   if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) {
   2134     ARGBQuantizeRow = ARGBQuantizeRow_NEON;
   2135   }
   2136 #endif
   2137   for (y = 0; y < height; ++y) {
   2138     ARGBQuantizeRow(dst, scale, interval_size, interval_offset, width);
   2139     dst += dst_stride_argb;
   2140   }
   2141   return 0;
   2142 }
   2143 
   2144 // Computes table of cumulative sum for image where the value is the sum
   2145 // of all values above and to the left of the entry. Used by ARGBBlur.
   2146 LIBYUV_API
   2147 int ARGBComputeCumulativeSum(const uint8* src_argb,
   2148                              int src_stride_argb,
   2149                              int32* dst_cumsum,
   2150                              int dst_stride32_cumsum,
   2151                              int width,
   2152                              int height) {
   2153   int y;
   2154   void (*ComputeCumulativeSumRow)(const uint8* row, int32* cumsum,
   2155                                   const int32* previous_cumsum, int width) =
   2156       ComputeCumulativeSumRow_C;
   2157   int32* previous_cumsum = dst_cumsum;
   2158   if (!dst_cumsum || !src_argb || width <= 0 || height <= 0) {
   2159     return -1;
   2160   }
   2161 #if defined(HAS_CUMULATIVESUMTOAVERAGEROW_SSE2)
   2162   if (TestCpuFlag(kCpuHasSSE2)) {
   2163     ComputeCumulativeSumRow = ComputeCumulativeSumRow_SSE2;
   2164   }
   2165 #endif
   2166   memset(dst_cumsum, 0, width * sizeof(dst_cumsum[0]) * 4);  // 4 int per pixel.
   2167   for (y = 0; y < height; ++y) {
   2168     ComputeCumulativeSumRow(src_argb, dst_cumsum, previous_cumsum, width);
   2169     previous_cumsum = dst_cumsum;
   2170     dst_cumsum += dst_stride32_cumsum;
   2171     src_argb += src_stride_argb;
   2172   }
   2173   return 0;
   2174 }
   2175 
   2176 // Blur ARGB image.
   2177 // Caller should allocate CumulativeSum table of width * height * 16 bytes
   2178 // aligned to 16 byte boundary. height can be radius * 2 + 2 to save memory
   2179 // as the buffer is treated as circular.
   2180 LIBYUV_API
   2181 int ARGBBlur(const uint8* src_argb,
   2182              int src_stride_argb,
   2183              uint8* dst_argb,
   2184              int dst_stride_argb,
   2185              int32* dst_cumsum,
   2186              int dst_stride32_cumsum,
   2187              int width,
   2188              int height,
   2189              int radius) {
   2190   int y;
   2191   void (*ComputeCumulativeSumRow)(const uint8* row, int32* cumsum,
   2192                                   const int32* previous_cumsum, int width) =
   2193       ComputeCumulativeSumRow_C;
   2194   void (*CumulativeSumToAverageRow)(const int32* topleft, const int32* botleft,
   2195                                     int width, int area, uint8* dst,
   2196                                     int count) = CumulativeSumToAverageRow_C;
   2197   int32* cumsum_bot_row;
   2198   int32* max_cumsum_bot_row;
   2199   int32* cumsum_top_row;
   2200 
   2201   if (!src_argb || !dst_argb || width <= 0 || height == 0) {
   2202     return -1;
   2203   }
   2204   if (height < 0) {
   2205     height = -height;
   2206     src_argb = src_argb + (height - 1) * src_stride_argb;
   2207     src_stride_argb = -src_stride_argb;
   2208   }
   2209   if (radius > height) {
   2210     radius = height;
   2211   }
   2212   if (radius > (width / 2 - 1)) {
   2213     radius = width / 2 - 1;
   2214   }
   2215   if (radius <= 0) {
   2216     return -1;
   2217   }
   2218 #if defined(HAS_CUMULATIVESUMTOAVERAGEROW_SSE2)
   2219   if (TestCpuFlag(kCpuHasSSE2)) {
   2220     ComputeCumulativeSumRow = ComputeCumulativeSumRow_SSE2;
   2221     CumulativeSumToAverageRow = CumulativeSumToAverageRow_SSE2;
   2222   }
   2223 #endif
   2224   // Compute enough CumulativeSum for first row to be blurred. After this
   2225   // one row of CumulativeSum is updated at a time.
   2226   ARGBComputeCumulativeSum(src_argb, src_stride_argb, dst_cumsum,
   2227                            dst_stride32_cumsum, width, radius);
   2228 
   2229   src_argb = src_argb + radius * src_stride_argb;
   2230   cumsum_bot_row = &dst_cumsum[(radius - 1) * dst_stride32_cumsum];
   2231 
   2232   max_cumsum_bot_row = &dst_cumsum[(radius * 2 + 2) * dst_stride32_cumsum];
   2233   cumsum_top_row = &dst_cumsum[0];
   2234 
   2235   for (y = 0; y < height; ++y) {
   2236     int top_y = ((y - radius - 1) >= 0) ? (y - radius - 1) : 0;
   2237     int bot_y = ((y + radius) < height) ? (y + radius) : (height - 1);
   2238     int area = radius * (bot_y - top_y);
   2239     int boxwidth = radius * 4;
   2240     int x;
   2241     int n;
   2242 
   2243     // Increment cumsum_top_row pointer with circular buffer wrap around.
   2244     if (top_y) {
   2245       cumsum_top_row += dst_stride32_cumsum;
   2246       if (cumsum_top_row >= max_cumsum_bot_row) {
   2247         cumsum_top_row = dst_cumsum;
   2248       }
   2249     }
   2250     // Increment cumsum_bot_row pointer with circular buffer wrap around and
   2251     // then fill in a row of CumulativeSum.
   2252     if ((y + radius) < height) {
   2253       const int32* prev_cumsum_bot_row = cumsum_bot_row;
   2254       cumsum_bot_row += dst_stride32_cumsum;
   2255       if (cumsum_bot_row >= max_cumsum_bot_row) {
   2256         cumsum_bot_row = dst_cumsum;
   2257       }
   2258       ComputeCumulativeSumRow(src_argb, cumsum_bot_row, prev_cumsum_bot_row,
   2259                               width);
   2260       src_argb += src_stride_argb;
   2261     }
   2262 
   2263     // Left clipped.
   2264     for (x = 0; x < radius + 1; ++x) {
   2265       CumulativeSumToAverageRow(cumsum_top_row, cumsum_bot_row, boxwidth, area,
   2266                                 &dst_argb[x * 4], 1);
   2267       area += (bot_y - top_y);
   2268       boxwidth += 4;
   2269     }
   2270 
   2271     // Middle unclipped.
   2272     n = (width - 1) - radius - x + 1;
   2273     CumulativeSumToAverageRow(cumsum_top_row, cumsum_bot_row, boxwidth, area,
   2274                               &dst_argb[x * 4], n);
   2275 
   2276     // Right clipped.
   2277     for (x += n; x <= width - 1; ++x) {
   2278       area -= (bot_y - top_y);
   2279       boxwidth -= 4;
   2280       CumulativeSumToAverageRow(cumsum_top_row + (x - radius - 1) * 4,
   2281                                 cumsum_bot_row + (x - radius - 1) * 4, boxwidth,
   2282                                 area, &dst_argb[x * 4], 1);
   2283     }
   2284     dst_argb += dst_stride_argb;
   2285   }
   2286   return 0;
   2287 }
   2288 
   2289 // Multiply ARGB image by a specified ARGB value.
   2290 LIBYUV_API
   2291 int ARGBShade(const uint8* src_argb,
   2292               int src_stride_argb,
   2293               uint8* dst_argb,
   2294               int dst_stride_argb,
   2295               int width,
   2296               int height,
   2297               uint32 value) {
   2298   int y;
   2299   void (*ARGBShadeRow)(const uint8* src_argb, uint8* dst_argb, int width,
   2300                        uint32 value) = ARGBShadeRow_C;
   2301   if (!src_argb || !dst_argb || width <= 0 || height == 0 || value == 0u) {
   2302     return -1;
   2303   }
   2304   if (height < 0) {
   2305     height = -height;
   2306     src_argb = src_argb + (height - 1) * src_stride_argb;
   2307     src_stride_argb = -src_stride_argb;
   2308   }
   2309   // Coalesce rows.
   2310   if (src_stride_argb == width * 4 && dst_stride_argb == width * 4) {
   2311     width *= height;
   2312     height = 1;
   2313     src_stride_argb = dst_stride_argb = 0;
   2314   }
   2315 #if defined(HAS_ARGBSHADEROW_SSE2)
   2316   if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 4)) {
   2317     ARGBShadeRow = ARGBShadeRow_SSE2;
   2318   }
   2319 #endif
   2320 #if defined(HAS_ARGBSHADEROW_NEON)
   2321   if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) {
   2322     ARGBShadeRow = ARGBShadeRow_NEON;
   2323   }
   2324 #endif
   2325 #if defined(HAS_ARGBSHADEROW_MSA)
   2326   if (TestCpuFlag(kCpuHasMSA) && IS_ALIGNED(width, 4)) {
   2327     ARGBShadeRow = ARGBShadeRow_MSA;
   2328   }
   2329 #endif
   2330 
   2331   for (y = 0; y < height; ++y) {
   2332     ARGBShadeRow(src_argb, dst_argb, width, value);
   2333     src_argb += src_stride_argb;
   2334     dst_argb += dst_stride_argb;
   2335   }
   2336   return 0;
   2337 }
   2338 
   2339 // Interpolate 2 planes by specified amount (0 to 255).
   2340 LIBYUV_API
   2341 int InterpolatePlane(const uint8* src0,
   2342                      int src_stride0,
   2343                      const uint8* src1,
   2344                      int src_stride1,
   2345                      uint8* dst,
   2346                      int dst_stride,
   2347                      int width,
   2348                      int height,
   2349                      int interpolation) {
   2350   int y;
   2351   void (*InterpolateRow)(uint8 * dst_ptr, const uint8* src_ptr,
   2352                          ptrdiff_t src_stride, int dst_width,
   2353                          int source_y_fraction) = InterpolateRow_C;
   2354   if (!src0 || !src1 || !dst || width <= 0 || height == 0) {
   2355     return -1;
   2356   }
   2357   // Negative height means invert the image.
   2358   if (height < 0) {
   2359     height = -height;
   2360     dst = dst + (height - 1) * dst_stride;
   2361     dst_stride = -dst_stride;
   2362   }
   2363   // Coalesce rows.
   2364   if (src_stride0 == width && src_stride1 == width && dst_stride == width) {
   2365     width *= height;
   2366     height = 1;
   2367     src_stride0 = src_stride1 = dst_stride = 0;
   2368   }
   2369 #if defined(HAS_INTERPOLATEROW_SSSE3)
   2370   if (TestCpuFlag(kCpuHasSSSE3)) {
   2371     InterpolateRow = InterpolateRow_Any_SSSE3;
   2372     if (IS_ALIGNED(width, 16)) {
   2373       InterpolateRow = InterpolateRow_SSSE3;
   2374     }
   2375   }
   2376 #endif
   2377 #if defined(HAS_INTERPOLATEROW_AVX2)
   2378   if (TestCpuFlag(kCpuHasAVX2)) {
   2379     InterpolateRow = InterpolateRow_Any_AVX2;
   2380     if (IS_ALIGNED(width, 32)) {
   2381       InterpolateRow = InterpolateRow_AVX2;
   2382     }
   2383   }
   2384 #endif
   2385 #if defined(HAS_INTERPOLATEROW_NEON)
   2386   if (TestCpuFlag(kCpuHasNEON)) {
   2387     InterpolateRow = InterpolateRow_Any_NEON;
   2388     if (IS_ALIGNED(width, 16)) {
   2389       InterpolateRow = InterpolateRow_NEON;
   2390     }
   2391   }
   2392 #endif
   2393 #if defined(HAS_INTERPOLATEROW_DSPR2)
   2394   if (TestCpuFlag(kCpuHasDSPR2) && IS_ALIGNED(src0, 4) &&
   2395       IS_ALIGNED(src_stride0, 4) && IS_ALIGNED(src1, 4) &&
   2396       IS_ALIGNED(src_stride1, 4) && IS_ALIGNED(dst, 4) &&
   2397       IS_ALIGNED(dst_stride, 4) && IS_ALIGNED(width, 4)) {
   2398     InterpolateRow = InterpolateRow_DSPR2;
   2399   }
   2400 #endif
   2401 #if defined(HAS_INTERPOLATEROW_MSA)
   2402   if (TestCpuFlag(kCpuHasMSA)) {
   2403     InterpolateRow = InterpolateRow_Any_MSA;
   2404     if (IS_ALIGNED(width, 32)) {
   2405       InterpolateRow = InterpolateRow_MSA;
   2406     }
   2407   }
   2408 #endif
   2409 
   2410   for (y = 0; y < height; ++y) {
   2411     InterpolateRow(dst, src0, src1 - src0, width, interpolation);
   2412     src0 += src_stride0;
   2413     src1 += src_stride1;
   2414     dst += dst_stride;
   2415   }
   2416   return 0;
   2417 }
   2418 
   2419 // Interpolate 2 ARGB images by specified amount (0 to 255).
   2420 LIBYUV_API
   2421 int ARGBInterpolate(const uint8* src_argb0,
   2422                     int src_stride_argb0,
   2423                     const uint8* src_argb1,
   2424                     int src_stride_argb1,
   2425                     uint8* dst_argb,
   2426                     int dst_stride_argb,
   2427                     int width,
   2428                     int height,
   2429                     int interpolation) {
   2430   return InterpolatePlane(src_argb0, src_stride_argb0, src_argb1,
   2431                           src_stride_argb1, dst_argb, dst_stride_argb,
   2432                           width * 4, height, interpolation);
   2433 }
   2434 
   2435 // Interpolate 2 YUV images by specified amount (0 to 255).
   2436 LIBYUV_API
   2437 int I420Interpolate(const uint8* src0_y,
   2438                     int src0_stride_y,
   2439                     const uint8* src0_u,
   2440                     int src0_stride_u,
   2441                     const uint8* src0_v,
   2442                     int src0_stride_v,
   2443                     const uint8* src1_y,
   2444                     int src1_stride_y,
   2445                     const uint8* src1_u,
   2446                     int src1_stride_u,
   2447                     const uint8* src1_v,
   2448                     int src1_stride_v,
   2449                     uint8* dst_y,
   2450                     int dst_stride_y,
   2451                     uint8* dst_u,
   2452                     int dst_stride_u,
   2453                     uint8* dst_v,
   2454                     int dst_stride_v,
   2455                     int width,
   2456                     int height,
   2457                     int interpolation) {
   2458   int halfwidth = (width + 1) >> 1;
   2459   int halfheight = (height + 1) >> 1;
   2460   if (!src0_y || !src0_u || !src0_v || !src1_y || !src1_u || !src1_v ||
   2461       !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) {
   2462     return -1;
   2463   }
   2464   InterpolatePlane(src0_y, src0_stride_y, src1_y, src1_stride_y, dst_y,
   2465                    dst_stride_y, width, height, interpolation);
   2466   InterpolatePlane(src0_u, src0_stride_u, src1_u, src1_stride_u, dst_u,
   2467                    dst_stride_u, halfwidth, halfheight, interpolation);
   2468   InterpolatePlane(src0_v, src0_stride_v, src1_v, src1_stride_v, dst_v,
   2469                    dst_stride_v, halfwidth, halfheight, interpolation);
   2470   return 0;
   2471 }
   2472 
   2473 // Shuffle ARGB channel order.  e.g. BGRA to ARGB.
   2474 LIBYUV_API
   2475 int ARGBShuffle(const uint8* src_bgra,
   2476                 int src_stride_bgra,
   2477                 uint8* dst_argb,
   2478                 int dst_stride_argb,
   2479                 const uint8* shuffler,
   2480                 int width,
   2481                 int height) {
   2482   int y;
   2483   void (*ARGBShuffleRow)(const uint8* src_bgra, uint8* dst_argb,
   2484                          const uint8* shuffler, int width) = ARGBShuffleRow_C;
   2485   if (!src_bgra || !dst_argb || width <= 0 || height == 0) {
   2486     return -1;
   2487   }
   2488   // Negative height means invert the image.
   2489   if (height < 0) {
   2490     height = -height;
   2491     src_bgra = src_bgra + (height - 1) * src_stride_bgra;
   2492     src_stride_bgra = -src_stride_bgra;
   2493   }
   2494   // Coalesce rows.
   2495   if (src_stride_bgra == width * 4 && dst_stride_argb == width * 4) {
   2496     width *= height;
   2497     height = 1;
   2498     src_stride_bgra = dst_stride_argb = 0;
   2499   }
   2500 #if defined(HAS_ARGBSHUFFLEROW_SSE2)
   2501   if (TestCpuFlag(kCpuHasSSE2)) {
   2502     ARGBShuffleRow = ARGBShuffleRow_Any_SSE2;
   2503     if (IS_ALIGNED(width, 4)) {
   2504       ARGBShuffleRow = ARGBShuffleRow_SSE2;
   2505     }
   2506   }
   2507 #endif
   2508 #if defined(HAS_ARGBSHUFFLEROW_SSSE3)
   2509   if (TestCpuFlag(kCpuHasSSSE3)) {
   2510     ARGBShuffleRow = ARGBShuffleRow_Any_SSSE3;
   2511     if (IS_ALIGNED(width, 8)) {
   2512       ARGBShuffleRow = ARGBShuffleRow_SSSE3;
   2513     }
   2514   }
   2515 #endif
   2516 #if defined(HAS_ARGBSHUFFLEROW_AVX2)
   2517   if (TestCpuFlag(kCpuHasAVX2)) {
   2518     ARGBShuffleRow = ARGBShuffleRow_Any_AVX2;
   2519     if (IS_ALIGNED(width, 16)) {
   2520       ARGBShuffleRow = ARGBShuffleRow_AVX2;
   2521     }
   2522   }
   2523 #endif
   2524 #if defined(HAS_ARGBSHUFFLEROW_NEON)
   2525   if (TestCpuFlag(kCpuHasNEON)) {
   2526     ARGBShuffleRow = ARGBShuffleRow_Any_NEON;
   2527     if (IS_ALIGNED(width, 4)) {
   2528       ARGBShuffleRow = ARGBShuffleRow_NEON;
   2529     }
   2530   }
   2531 #endif
   2532 #if defined(HAS_ARGBSHUFFLEROW_MSA)
   2533   if (TestCpuFlag(kCpuHasMSA)) {
   2534     ARGBShuffleRow = ARGBShuffleRow_Any_MSA;
   2535     if (IS_ALIGNED(width, 8)) {
   2536       ARGBShuffleRow = ARGBShuffleRow_MSA;
   2537     }
   2538   }
   2539 #endif
   2540 
   2541   for (y = 0; y < height; ++y) {
   2542     ARGBShuffleRow(src_bgra, dst_argb, shuffler, width);
   2543     src_bgra += src_stride_bgra;
   2544     dst_argb += dst_stride_argb;
   2545   }
   2546   return 0;
   2547 }
   2548 
   2549 // Sobel ARGB effect.
   2550 static int ARGBSobelize(const uint8* src_argb,
   2551                         int src_stride_argb,
   2552                         uint8* dst_argb,
   2553                         int dst_stride_argb,
   2554                         int width,
   2555                         int height,
   2556                         void (*SobelRow)(const uint8* src_sobelx,
   2557                                          const uint8* src_sobely,
   2558                                          uint8* dst,
   2559                                          int width)) {
   2560   int y;
   2561   void (*ARGBToYJRow)(const uint8* src_argb, uint8* dst_g, int width) =
   2562       ARGBToYJRow_C;
   2563   void (*SobelYRow)(const uint8* src_y0, const uint8* src_y1, uint8* dst_sobely,
   2564                     int width) = SobelYRow_C;
   2565   void (*SobelXRow)(const uint8* src_y0, const uint8* src_y1,
   2566                     const uint8* src_y2, uint8* dst_sobely, int width) =
   2567       SobelXRow_C;
   2568   const int kEdge = 16;  // Extra pixels at start of row for extrude/align.
   2569   if (!src_argb || !dst_argb || width <= 0 || height == 0) {
   2570     return -1;
   2571   }
   2572   // Negative height means invert the image.
   2573   if (height < 0) {
   2574     height = -height;
   2575     src_argb = src_argb + (height - 1) * src_stride_argb;
   2576     src_stride_argb = -src_stride_argb;
   2577   }
   2578 
   2579 #if defined(HAS_ARGBTOYJROW_SSSE3)
   2580   if (TestCpuFlag(kCpuHasSSSE3)) {
   2581     ARGBToYJRow = ARGBToYJRow_Any_SSSE3;
   2582     if (IS_ALIGNED(width, 16)) {
   2583       ARGBToYJRow = ARGBToYJRow_SSSE3;
   2584     }
   2585   }
   2586 #endif
   2587 #if defined(HAS_ARGBTOYJROW_AVX2)
   2588   if (TestCpuFlag(kCpuHasAVX2)) {
   2589     ARGBToYJRow = ARGBToYJRow_Any_AVX2;
   2590     if (IS_ALIGNED(width, 32)) {
   2591       ARGBToYJRow = ARGBToYJRow_AVX2;
   2592     }
   2593   }
   2594 #endif
   2595 #if defined(HAS_ARGBTOYJROW_NEON)
   2596   if (TestCpuFlag(kCpuHasNEON)) {
   2597     ARGBToYJRow = ARGBToYJRow_Any_NEON;
   2598     if (IS_ALIGNED(width, 8)) {
   2599       ARGBToYJRow = ARGBToYJRow_NEON;
   2600     }
   2601   }
   2602 #endif
   2603 #if defined(HAS_ARGBTOYJROW_MSA)
   2604   if (TestCpuFlag(kCpuHasMSA)) {
   2605     ARGBToYJRow = ARGBToYJRow_Any_MSA;
   2606     if (IS_ALIGNED(width, 16)) {
   2607       ARGBToYJRow = ARGBToYJRow_MSA;
   2608     }
   2609   }
   2610 #endif
   2611 
   2612 #if defined(HAS_SOBELYROW_SSE2)
   2613   if (TestCpuFlag(kCpuHasSSE2)) {
   2614     SobelYRow = SobelYRow_SSE2;
   2615   }
   2616 #endif
   2617 #if defined(HAS_SOBELYROW_NEON)
   2618   if (TestCpuFlag(kCpuHasNEON)) {
   2619     SobelYRow = SobelYRow_NEON;
   2620   }
   2621 #endif
   2622 #if defined(HAS_SOBELXROW_SSE2)
   2623   if (TestCpuFlag(kCpuHasSSE2)) {
   2624     SobelXRow = SobelXRow_SSE2;
   2625   }
   2626 #endif
   2627 #if defined(HAS_SOBELXROW_NEON)
   2628   if (TestCpuFlag(kCpuHasNEON)) {
   2629     SobelXRow = SobelXRow_NEON;
   2630   }
   2631 #endif
   2632   {
   2633     // 3 rows with edges before/after.
   2634     const int kRowSize = (width + kEdge + 31) & ~31;
   2635     align_buffer_64(rows, kRowSize * 2 + (kEdge + kRowSize * 3 + kEdge));
   2636     uint8* row_sobelx = rows;
   2637     uint8* row_sobely = rows + kRowSize;
   2638     uint8* row_y = rows + kRowSize * 2;
   2639 
   2640     // Convert first row.
   2641     uint8* row_y0 = row_y + kEdge;
   2642     uint8* row_y1 = row_y0 + kRowSize;
   2643     uint8* row_y2 = row_y1 + kRowSize;
   2644     ARGBToYJRow(src_argb, row_y0, width);
   2645     row_y0[-1] = row_y0[0];
   2646     memset(row_y0 + width, row_y0[width - 1], 16);  // Extrude 16 for valgrind.
   2647     ARGBToYJRow(src_argb, row_y1, width);
   2648     row_y1[-1] = row_y1[0];
   2649     memset(row_y1 + width, row_y1[width - 1], 16);
   2650     memset(row_y2 + width, 0, 16);
   2651 
   2652     for (y = 0; y < height; ++y) {
   2653       // Convert next row of ARGB to G.
   2654       if (y < (height - 1)) {
   2655         src_argb += src_stride_argb;
   2656       }
   2657       ARGBToYJRow(src_argb, row_y2, width);
   2658       row_y2[-1] = row_y2[0];
   2659       row_y2[width] = row_y2[width - 1];
   2660 
   2661       SobelXRow(row_y0 - 1, row_y1 - 1, row_y2 - 1, row_sobelx, width);
   2662       SobelYRow(row_y0 - 1, row_y2 - 1, row_sobely, width);
   2663       SobelRow(row_sobelx, row_sobely, dst_argb, width);
   2664 
   2665       // Cycle thru circular queue of 3 row_y buffers.
   2666       {
   2667         uint8* row_yt = row_y0;
   2668         row_y0 = row_y1;
   2669         row_y1 = row_y2;
   2670         row_y2 = row_yt;
   2671       }
   2672 
   2673       dst_argb += dst_stride_argb;
   2674     }
   2675     free_aligned_buffer_64(rows);
   2676   }
   2677   return 0;
   2678 }
   2679 
   2680 // Sobel ARGB effect.
   2681 LIBYUV_API
   2682 int ARGBSobel(const uint8* src_argb,
   2683               int src_stride_argb,
   2684               uint8* dst_argb,
   2685               int dst_stride_argb,
   2686               int width,
   2687               int height) {
   2688   void (*SobelRow)(const uint8* src_sobelx, const uint8* src_sobely,
   2689                    uint8* dst_argb, int width) = SobelRow_C;
   2690 #if defined(HAS_SOBELROW_SSE2)
   2691   if (TestCpuFlag(kCpuHasSSE2)) {
   2692     SobelRow = SobelRow_Any_SSE2;
   2693     if (IS_ALIGNED(width, 16)) {
   2694       SobelRow = SobelRow_SSE2;
   2695     }
   2696   }
   2697 #endif
   2698 #if defined(HAS_SOBELROW_NEON)
   2699   if (TestCpuFlag(kCpuHasNEON)) {
   2700     SobelRow = SobelRow_Any_NEON;
   2701     if (IS_ALIGNED(width, 8)) {
   2702       SobelRow = SobelRow_NEON;
   2703     }
   2704   }
   2705 #endif
   2706 #if defined(HAS_SOBELROW_MSA)
   2707   if (TestCpuFlag(kCpuHasMSA)) {
   2708     SobelRow = SobelRow_Any_MSA;
   2709     if (IS_ALIGNED(width, 16)) {
   2710       SobelRow = SobelRow_MSA;
   2711     }
   2712   }
   2713 #endif
   2714   return ARGBSobelize(src_argb, src_stride_argb, dst_argb, dst_stride_argb,
   2715                       width, height, SobelRow);
   2716 }
   2717 
   2718 // Sobel ARGB effect with planar output.
   2719 LIBYUV_API
   2720 int ARGBSobelToPlane(const uint8* src_argb,
   2721                      int src_stride_argb,
   2722                      uint8* dst_y,
   2723                      int dst_stride_y,
   2724                      int width,
   2725                      int height) {
   2726   void (*SobelToPlaneRow)(const uint8* src_sobelx, const uint8* src_sobely,
   2727                           uint8* dst_, int width) = SobelToPlaneRow_C;
   2728 #if defined(HAS_SOBELTOPLANEROW_SSE2)
   2729   if (TestCpuFlag(kCpuHasSSE2)) {
   2730     SobelToPlaneRow = SobelToPlaneRow_Any_SSE2;
   2731     if (IS_ALIGNED(width, 16)) {
   2732       SobelToPlaneRow = SobelToPlaneRow_SSE2;
   2733     }
   2734   }
   2735 #endif
   2736 #if defined(HAS_SOBELTOPLANEROW_NEON)
   2737   if (TestCpuFlag(kCpuHasNEON)) {
   2738     SobelToPlaneRow = SobelToPlaneRow_Any_NEON;
   2739     if (IS_ALIGNED(width, 16)) {
   2740       SobelToPlaneRow = SobelToPlaneRow_NEON;
   2741     }
   2742   }
   2743 #endif
   2744 #if defined(HAS_SOBELTOPLANEROW_MSA)
   2745   if (TestCpuFlag(kCpuHasMSA)) {
   2746     SobelToPlaneRow = SobelToPlaneRow_Any_MSA;
   2747     if (IS_ALIGNED(width, 32)) {
   2748       SobelToPlaneRow = SobelToPlaneRow_MSA;
   2749     }
   2750   }
   2751 #endif
   2752   return ARGBSobelize(src_argb, src_stride_argb, dst_y, dst_stride_y, width,
   2753                       height, SobelToPlaneRow);
   2754 }
   2755 
   2756 // SobelXY ARGB effect.
   2757 // Similar to Sobel, but also stores Sobel X in R and Sobel Y in B.  G = Sobel.
   2758 LIBYUV_API
   2759 int ARGBSobelXY(const uint8* src_argb,
   2760                 int src_stride_argb,
   2761                 uint8* dst_argb,
   2762                 int dst_stride_argb,
   2763                 int width,
   2764                 int height) {
   2765   void (*SobelXYRow)(const uint8* src_sobelx, const uint8* src_sobely,
   2766                      uint8* dst_argb, int width) = SobelXYRow_C;
   2767 #if defined(HAS_SOBELXYROW_SSE2)
   2768   if (TestCpuFlag(kCpuHasSSE2)) {
   2769     SobelXYRow = SobelXYRow_Any_SSE2;
   2770     if (IS_ALIGNED(width, 16)) {
   2771       SobelXYRow = SobelXYRow_SSE2;
   2772     }
   2773   }
   2774 #endif
   2775 #if defined(HAS_SOBELXYROW_NEON)
   2776   if (TestCpuFlag(kCpuHasNEON)) {
   2777     SobelXYRow = SobelXYRow_Any_NEON;
   2778     if (IS_ALIGNED(width, 8)) {
   2779       SobelXYRow = SobelXYRow_NEON;
   2780     }
   2781   }
   2782 #endif
   2783 #if defined(HAS_SOBELXYROW_MSA)
   2784   if (TestCpuFlag(kCpuHasMSA)) {
   2785     SobelXYRow = SobelXYRow_Any_MSA;
   2786     if (IS_ALIGNED(width, 16)) {
   2787       SobelXYRow = SobelXYRow_MSA;
   2788     }
   2789   }
   2790 #endif
   2791   return ARGBSobelize(src_argb, src_stride_argb, dst_argb, dst_stride_argb,
   2792                       width, height, SobelXYRow);
   2793 }
   2794 
   2795 // Apply a 4x4 polynomial to each ARGB pixel.
   2796 LIBYUV_API
   2797 int ARGBPolynomial(const uint8* src_argb,
   2798                    int src_stride_argb,
   2799                    uint8* dst_argb,
   2800                    int dst_stride_argb,
   2801                    const float* poly,
   2802                    int width,
   2803                    int height) {
   2804   int y;
   2805   void (*ARGBPolynomialRow)(const uint8* src_argb, uint8* dst_argb,
   2806                             const float* poly, int width) = ARGBPolynomialRow_C;
   2807   if (!src_argb || !dst_argb || !poly || width <= 0 || height == 0) {
   2808     return -1;
   2809   }
   2810   // Negative height means invert the image.
   2811   if (height < 0) {
   2812     height = -height;
   2813     src_argb = src_argb + (height - 1) * src_stride_argb;
   2814     src_stride_argb = -src_stride_argb;
   2815   }
   2816   // Coalesce rows.
   2817   if (src_stride_argb == width * 4 && dst_stride_argb == width * 4) {
   2818     width *= height;
   2819     height = 1;
   2820     src_stride_argb = dst_stride_argb = 0;
   2821   }
   2822 #if defined(HAS_ARGBPOLYNOMIALROW_SSE2)
   2823   if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 2)) {
   2824     ARGBPolynomialRow = ARGBPolynomialRow_SSE2;
   2825   }
   2826 #endif
   2827 #if defined(HAS_ARGBPOLYNOMIALROW_AVX2)
   2828   if (TestCpuFlag(kCpuHasAVX2) && TestCpuFlag(kCpuHasFMA3) &&
   2829       IS_ALIGNED(width, 2)) {
   2830     ARGBPolynomialRow = ARGBPolynomialRow_AVX2;
   2831   }
   2832 #endif
   2833 
   2834   for (y = 0; y < height; ++y) {
   2835     ARGBPolynomialRow(src_argb, dst_argb, poly, width);
   2836     src_argb += src_stride_argb;
   2837     dst_argb += dst_stride_argb;
   2838   }
   2839   return 0;
   2840 }
   2841 
   2842 // Convert plane of 16 bit shorts to half floats.
   2843 // Source values are multiplied by scale before storing as half float.
   2844 LIBYUV_API
   2845 int HalfFloatPlane(const uint16* src_y,
   2846                    int src_stride_y,
   2847                    uint16* dst_y,
   2848                    int dst_stride_y,
   2849                    float scale,
   2850                    int width,
   2851                    int height) {
   2852   int y;
   2853   void (*HalfFloatRow)(const uint16* src, uint16* dst, float scale, int width) =
   2854       HalfFloatRow_C;
   2855   if (!src_y || !dst_y || width <= 0 || height == 0) {
   2856     return -1;
   2857   }
   2858   src_stride_y >>= 1;
   2859   dst_stride_y >>= 1;
   2860   // Negative height means invert the image.
   2861   if (height < 0) {
   2862     height = -height;
   2863     src_y = src_y + (height - 1) * src_stride_y;
   2864     src_stride_y = -src_stride_y;
   2865   }
   2866   // Coalesce rows.
   2867   if (src_stride_y == width && dst_stride_y == width) {
   2868     width *= height;
   2869     height = 1;
   2870     src_stride_y = dst_stride_y = 0;
   2871   }
   2872 #if defined(HAS_HALFFLOATROW_SSE2)
   2873   if (TestCpuFlag(kCpuHasSSE2)) {
   2874     HalfFloatRow = HalfFloatRow_Any_SSE2;
   2875     if (IS_ALIGNED(width, 8)) {
   2876       HalfFloatRow = HalfFloatRow_SSE2;
   2877     }
   2878   }
   2879 #endif
   2880 #if defined(HAS_HALFFLOATROW_AVX2)
   2881   if (TestCpuFlag(kCpuHasAVX2)) {
   2882     HalfFloatRow = HalfFloatRow_Any_AVX2;
   2883     if (IS_ALIGNED(width, 16)) {
   2884       HalfFloatRow = HalfFloatRow_AVX2;
   2885     }
   2886   }
   2887 #endif
   2888 #if defined(HAS_HALFFLOATROW_F16C)
   2889   if (TestCpuFlag(kCpuHasAVX2) && TestCpuFlag(kCpuHasF16C)) {
   2890     HalfFloatRow =
   2891         (scale == 1.0f) ? HalfFloat1Row_Any_F16C : HalfFloatRow_Any_F16C;
   2892     if (IS_ALIGNED(width, 16)) {
   2893       HalfFloatRow = (scale == 1.0f) ? HalfFloat1Row_F16C : HalfFloatRow_F16C;
   2894     }
   2895   }
   2896 #endif
   2897 #if defined(HAS_HALFFLOATROW_NEON)
   2898   if (TestCpuFlag(kCpuHasNEON)) {
   2899     HalfFloatRow =
   2900         (scale == 1.0f) ? HalfFloat1Row_Any_NEON : HalfFloatRow_Any_NEON;
   2901     if (IS_ALIGNED(width, 8)) {
   2902       HalfFloatRow = (scale == 1.0f) ? HalfFloat1Row_NEON : HalfFloatRow_NEON;
   2903     }
   2904   }
   2905 #endif
   2906 
   2907   for (y = 0; y < height; ++y) {
   2908     HalfFloatRow(src_y, dst_y, scale, width);
   2909     src_y += src_stride_y;
   2910     dst_y += dst_stride_y;
   2911   }
   2912   return 0;
   2913 }
   2914 
   2915 // Apply a lumacolortable to each ARGB pixel.
   2916 LIBYUV_API
   2917 int ARGBLumaColorTable(const uint8* src_argb,
   2918                        int src_stride_argb,
   2919                        uint8* dst_argb,
   2920                        int dst_stride_argb,
   2921                        const uint8* luma,
   2922                        int width,
   2923                        int height) {
   2924   int y;
   2925   void (*ARGBLumaColorTableRow)(
   2926       const uint8* src_argb, uint8* dst_argb, int width, const uint8* luma,
   2927       const uint32 lumacoeff) = ARGBLumaColorTableRow_C;
   2928   if (!src_argb || !dst_argb || !luma || width <= 0 || height == 0) {
   2929     return -1;
   2930   }
   2931   // Negative height means invert the image.
   2932   if (height < 0) {
   2933     height = -height;
   2934     src_argb = src_argb + (height - 1) * src_stride_argb;
   2935     src_stride_argb = -src_stride_argb;
   2936   }
   2937   // Coalesce rows.
   2938   if (src_stride_argb == width * 4 && dst_stride_argb == width * 4) {
   2939     width *= height;
   2940     height = 1;
   2941     src_stride_argb = dst_stride_argb = 0;
   2942   }
   2943 #if defined(HAS_ARGBLUMACOLORTABLEROW_SSSE3)
   2944   if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 4)) {
   2945     ARGBLumaColorTableRow = ARGBLumaColorTableRow_SSSE3;
   2946   }
   2947 #endif
   2948 
   2949   for (y = 0; y < height; ++y) {
   2950     ARGBLumaColorTableRow(src_argb, dst_argb, width, luma, 0x00264b0f);
   2951     src_argb += src_stride_argb;
   2952     dst_argb += dst_stride_argb;
   2953   }
   2954   return 0;
   2955 }
   2956 
   2957 // Copy Alpha from one ARGB image to another.
   2958 LIBYUV_API
   2959 int ARGBCopyAlpha(const uint8* src_argb,
   2960                   int src_stride_argb,
   2961                   uint8* dst_argb,
   2962                   int dst_stride_argb,
   2963                   int width,
   2964                   int height) {
   2965   int y;
   2966   void (*ARGBCopyAlphaRow)(const uint8* src_argb, uint8* dst_argb, int width) =
   2967       ARGBCopyAlphaRow_C;
   2968   if (!src_argb || !dst_argb || width <= 0 || height == 0) {
   2969     return -1;
   2970   }
   2971   // Negative height means invert the image.
   2972   if (height < 0) {
   2973     height = -height;
   2974     src_argb = src_argb + (height - 1) * src_stride_argb;
   2975     src_stride_argb = -src_stride_argb;
   2976   }
   2977   // Coalesce rows.
   2978   if (src_stride_argb == width * 4 && dst_stride_argb == width * 4) {
   2979     width *= height;
   2980     height = 1;
   2981     src_stride_argb = dst_stride_argb = 0;
   2982   }
   2983 #if defined(HAS_ARGBCOPYALPHAROW_SSE2)
   2984   if (TestCpuFlag(kCpuHasSSE2)) {
   2985     ARGBCopyAlphaRow = ARGBCopyAlphaRow_Any_SSE2;
   2986     if (IS_ALIGNED(width, 8)) {
   2987       ARGBCopyAlphaRow = ARGBCopyAlphaRow_SSE2;
   2988     }
   2989   }
   2990 #endif
   2991 #if defined(HAS_ARGBCOPYALPHAROW_AVX2)
   2992   if (TestCpuFlag(kCpuHasAVX2)) {
   2993     ARGBCopyAlphaRow = ARGBCopyAlphaRow_Any_AVX2;
   2994     if (IS_ALIGNED(width, 16)) {
   2995       ARGBCopyAlphaRow = ARGBCopyAlphaRow_AVX2;
   2996     }
   2997   }
   2998 #endif
   2999 
   3000   for (y = 0; y < height; ++y) {
   3001     ARGBCopyAlphaRow(src_argb, dst_argb, width);
   3002     src_argb += src_stride_argb;
   3003     dst_argb += dst_stride_argb;
   3004   }
   3005   return 0;
   3006 }
   3007 
   3008 // Extract just the alpha channel from ARGB.
   3009 LIBYUV_API
   3010 int ARGBExtractAlpha(const uint8* src_argb,
   3011                      int src_stride,
   3012                      uint8* dst_a,
   3013                      int dst_stride,
   3014                      int width,
   3015                      int height) {
   3016   if (!src_argb || !dst_a || width <= 0 || height == 0) {
   3017     return -1;
   3018   }
   3019   // Negative height means invert the image.
   3020   if (height < 0) {
   3021     height = -height;
   3022     src_argb += (height - 1) * src_stride;
   3023     src_stride = -src_stride;
   3024   }
   3025   // Coalesce rows.
   3026   if (src_stride == width * 4 && dst_stride == width) {
   3027     width *= height;
   3028     height = 1;
   3029     src_stride = dst_stride = 0;
   3030   }
   3031   void (*ARGBExtractAlphaRow)(const uint8* src_argb, uint8* dst_a, int width) =
   3032       ARGBExtractAlphaRow_C;
   3033 #if defined(HAS_ARGBEXTRACTALPHAROW_SSE2)
   3034   if (TestCpuFlag(kCpuHasSSE2)) {
   3035     ARGBExtractAlphaRow = IS_ALIGNED(width, 8) ? ARGBExtractAlphaRow_SSE2
   3036                                                : ARGBExtractAlphaRow_Any_SSE2;
   3037   }
   3038 #endif
   3039 #if defined(HAS_ARGBEXTRACTALPHAROW_AVX2)
   3040   if (TestCpuFlag(kCpuHasAVX2)) {
   3041     ARGBExtractAlphaRow = IS_ALIGNED(width, 32) ? ARGBExtractAlphaRow_AVX2
   3042                                                 : ARGBExtractAlphaRow_Any_AVX2;
   3043   }
   3044 #endif
   3045 #if defined(HAS_ARGBEXTRACTALPHAROW_NEON)
   3046   if (TestCpuFlag(kCpuHasNEON)) {
   3047     ARGBExtractAlphaRow = IS_ALIGNED(width, 16) ? ARGBExtractAlphaRow_NEON
   3048                                                 : ARGBExtractAlphaRow_Any_NEON;
   3049   }
   3050 #endif
   3051 
   3052   for (int y = 0; y < height; ++y) {
   3053     ARGBExtractAlphaRow(src_argb, dst_a, width);
   3054     src_argb += src_stride;
   3055     dst_a += dst_stride;
   3056   }
   3057   return 0;
   3058 }
   3059 
   3060 // Copy a planar Y channel to the alpha channel of a destination ARGB image.
   3061 LIBYUV_API
   3062 int ARGBCopyYToAlpha(const uint8* src_y,
   3063                      int src_stride_y,
   3064                      uint8* dst_argb,
   3065                      int dst_stride_argb,
   3066                      int width,
   3067                      int height) {
   3068   int y;
   3069   void (*ARGBCopyYToAlphaRow)(const uint8* src_y, uint8* dst_argb, int width) =
   3070       ARGBCopyYToAlphaRow_C;
   3071   if (!src_y || !dst_argb || width <= 0 || height == 0) {
   3072     return -1;
   3073   }
   3074   // Negative height means invert the image.
   3075   if (height < 0) {
   3076     height = -height;
   3077     src_y = src_y + (height - 1) * src_stride_y;
   3078     src_stride_y = -src_stride_y;
   3079   }
   3080   // Coalesce rows.
   3081   if (src_stride_y == width && dst_stride_argb == width * 4) {
   3082     width *= height;
   3083     height = 1;
   3084     src_stride_y = dst_stride_argb = 0;
   3085   }
   3086 #if defined(HAS_ARGBCOPYYTOALPHAROW_SSE2)
   3087   if (TestCpuFlag(kCpuHasSSE2)) {
   3088     ARGBCopyYToAlphaRow = ARGBCopyYToAlphaRow_Any_SSE2;
   3089     if (IS_ALIGNED(width, 8)) {
   3090       ARGBCopyYToAlphaRow = ARGBCopyYToAlphaRow_SSE2;
   3091     }
   3092   }
   3093 #endif
   3094 #if defined(HAS_ARGBCOPYYTOALPHAROW_AVX2)
   3095   if (TestCpuFlag(kCpuHasAVX2)) {
   3096     ARGBCopyYToAlphaRow = ARGBCopyYToAlphaRow_Any_AVX2;
   3097     if (IS_ALIGNED(width, 16)) {
   3098       ARGBCopyYToAlphaRow = ARGBCopyYToAlphaRow_AVX2;
   3099     }
   3100   }
   3101 #endif
   3102 
   3103   for (y = 0; y < height; ++y) {
   3104     ARGBCopyYToAlphaRow(src_y, dst_argb, width);
   3105     src_y += src_stride_y;
   3106     dst_argb += dst_stride_argb;
   3107   }
   3108   return 0;
   3109 }
   3110 
   3111 // TODO(fbarchard): Consider if width is even Y channel can be split
   3112 // directly. A SplitUVRow_Odd function could copy the remaining chroma.
   3113 
   3114 LIBYUV_API
   3115 int YUY2ToNV12(const uint8* src_yuy2,
   3116                int src_stride_yuy2,
   3117                uint8* dst_y,
   3118                int dst_stride_y,
   3119                uint8* dst_uv,
   3120                int dst_stride_uv,
   3121                int width,
   3122                int height) {
   3123   int y;
   3124   int halfwidth = (width + 1) >> 1;
   3125   void (*SplitUVRow)(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
   3126                      int width) = SplitUVRow_C;
   3127   void (*InterpolateRow)(uint8 * dst_ptr, const uint8* src_ptr,
   3128                          ptrdiff_t src_stride, int dst_width,
   3129                          int source_y_fraction) = InterpolateRow_C;
   3130   if (!src_yuy2 || !dst_y || !dst_uv || width <= 0 || height == 0) {
   3131     return -1;
   3132   }
   3133   // Negative height means invert the image.
   3134   if (height < 0) {
   3135     height = -height;
   3136     src_yuy2 = src_yuy2 + (height - 1) * src_stride_yuy2;
   3137     src_stride_yuy2 = -src_stride_yuy2;
   3138   }
   3139 #if defined(HAS_SPLITUVROW_SSE2)
   3140   if (TestCpuFlag(kCpuHasSSE2)) {
   3141     SplitUVRow = SplitUVRow_Any_SSE2;
   3142     if (IS_ALIGNED(width, 16)) {
   3143       SplitUVRow = SplitUVRow_SSE2;
   3144     }
   3145   }
   3146 #endif
   3147 #if defined(HAS_SPLITUVROW_AVX2)
   3148   if (TestCpuFlag(kCpuHasAVX2)) {
   3149     SplitUVRow = SplitUVRow_Any_AVX2;
   3150     if (IS_ALIGNED(width, 32)) {
   3151       SplitUVRow = SplitUVRow_AVX2;
   3152     }
   3153   }
   3154 #endif
   3155 #if defined(HAS_SPLITUVROW_NEON)
   3156   if (TestCpuFlag(kCpuHasNEON)) {
   3157     SplitUVRow = SplitUVRow_Any_NEON;
   3158     if (IS_ALIGNED(width, 16)) {
   3159       SplitUVRow = SplitUVRow_NEON;
   3160     }
   3161   }
   3162 #endif
   3163 #if defined(HAS_INTERPOLATEROW_SSSE3)
   3164   if (TestCpuFlag(kCpuHasSSSE3)) {
   3165     InterpolateRow = InterpolateRow_Any_SSSE3;
   3166     if (IS_ALIGNED(width, 16)) {
   3167       InterpolateRow = InterpolateRow_SSSE3;
   3168     }
   3169   }
   3170 #endif
   3171 #if defined(HAS_INTERPOLATEROW_AVX2)
   3172   if (TestCpuFlag(kCpuHasAVX2)) {
   3173     InterpolateRow = InterpolateRow_Any_AVX2;
   3174     if (IS_ALIGNED(width, 32)) {
   3175       InterpolateRow = InterpolateRow_AVX2;
   3176     }
   3177   }
   3178 #endif
   3179 #if defined(HAS_INTERPOLATEROW_NEON)
   3180   if (TestCpuFlag(kCpuHasNEON)) {
   3181     InterpolateRow = InterpolateRow_Any_NEON;
   3182     if (IS_ALIGNED(width, 16)) {
   3183       InterpolateRow = InterpolateRow_NEON;
   3184     }
   3185   }
   3186 #endif
   3187 #if defined(HAS_INTERPOLATEROW_MSA)
   3188   if (TestCpuFlag(kCpuHasMSA)) {
   3189     InterpolateRow = InterpolateRow_Any_MSA;
   3190     if (IS_ALIGNED(width, 32)) {
   3191       InterpolateRow = InterpolateRow_MSA;
   3192     }
   3193   }
   3194 #endif
   3195 
   3196   {
   3197     int awidth = halfwidth * 2;
   3198     // row of y and 2 rows of uv
   3199     align_buffer_64(rows, awidth * 3);
   3200 
   3201     for (y = 0; y < height - 1; y += 2) {
   3202       // Split Y from UV.
   3203       SplitUVRow(src_yuy2, rows, rows + awidth, awidth);
   3204       memcpy(dst_y, rows, width);
   3205       SplitUVRow(src_yuy2 + src_stride_yuy2, rows, rows + awidth * 2, awidth);
   3206       memcpy(dst_y + dst_stride_y, rows, width);
   3207       InterpolateRow(dst_uv, rows + awidth, awidth, awidth, 128);
   3208       src_yuy2 += src_stride_yuy2 * 2;
   3209       dst_y += dst_stride_y * 2;
   3210       dst_uv += dst_stride_uv;
   3211     }
   3212     if (height & 1) {
   3213       // Split Y from UV.
   3214       SplitUVRow(src_yuy2, rows, dst_uv, awidth);
   3215       memcpy(dst_y, rows, width);
   3216     }
   3217     free_aligned_buffer_64(rows);
   3218   }
   3219   return 0;
   3220 }
   3221 
   3222 LIBYUV_API
   3223 int UYVYToNV12(const uint8* src_uyvy,
   3224                int src_stride_uyvy,
   3225                uint8* dst_y,
   3226                int dst_stride_y,
   3227                uint8* dst_uv,
   3228                int dst_stride_uv,
   3229                int width,
   3230                int height) {
   3231   int y;
   3232   int halfwidth = (width + 1) >> 1;
   3233   void (*SplitUVRow)(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
   3234                      int width) = SplitUVRow_C;
   3235   void (*InterpolateRow)(uint8 * dst_ptr, const uint8* src_ptr,
   3236                          ptrdiff_t src_stride, int dst_width,
   3237                          int source_y_fraction) = InterpolateRow_C;
   3238   if (!src_uyvy || !dst_y || !dst_uv || width <= 0 || height == 0) {
   3239     return -1;
   3240   }
   3241   // Negative height means invert the image.
   3242   if (height < 0) {
   3243     height = -height;
   3244     src_uyvy = src_uyvy + (height - 1) * src_stride_uyvy;
   3245     src_stride_uyvy = -src_stride_uyvy;
   3246   }
   3247 #if defined(HAS_SPLITUVROW_SSE2)
   3248   if (TestCpuFlag(kCpuHasSSE2)) {
   3249     SplitUVRow = SplitUVRow_Any_SSE2;
   3250     if (IS_ALIGNED(width, 16)) {
   3251       SplitUVRow = SplitUVRow_SSE2;
   3252     }
   3253   }
   3254 #endif
   3255 #if defined(HAS_SPLITUVROW_AVX2)
   3256   if (TestCpuFlag(kCpuHasAVX2)) {
   3257     SplitUVRow = SplitUVRow_Any_AVX2;
   3258     if (IS_ALIGNED(width, 32)) {
   3259       SplitUVRow = SplitUVRow_AVX2;
   3260     }
   3261   }
   3262 #endif
   3263 #if defined(HAS_SPLITUVROW_NEON)
   3264   if (TestCpuFlag(kCpuHasNEON)) {
   3265     SplitUVRow = SplitUVRow_Any_NEON;
   3266     if (IS_ALIGNED(width, 16)) {
   3267       SplitUVRow = SplitUVRow_NEON;
   3268     }
   3269   }
   3270 #endif
   3271 #if defined(HAS_INTERPOLATEROW_SSSE3)
   3272   if (TestCpuFlag(kCpuHasSSSE3)) {
   3273     InterpolateRow = InterpolateRow_Any_SSSE3;
   3274     if (IS_ALIGNED(width, 16)) {
   3275       InterpolateRow = InterpolateRow_SSSE3;
   3276     }
   3277   }
   3278 #endif
   3279 #if defined(HAS_INTERPOLATEROW_AVX2)
   3280   if (TestCpuFlag(kCpuHasAVX2)) {
   3281     InterpolateRow = InterpolateRow_Any_AVX2;
   3282     if (IS_ALIGNED(width, 32)) {
   3283       InterpolateRow = InterpolateRow_AVX2;
   3284     }
   3285   }
   3286 #endif
   3287 #if defined(HAS_INTERPOLATEROW_NEON)
   3288   if (TestCpuFlag(kCpuHasNEON)) {
   3289     InterpolateRow = InterpolateRow_Any_NEON;
   3290     if (IS_ALIGNED(width, 16)) {
   3291       InterpolateRow = InterpolateRow_NEON;
   3292     }
   3293   }
   3294 #endif
   3295 #if defined(HAS_INTERPOLATEROW_MSA)
   3296   if (TestCpuFlag(kCpuHasMSA)) {
   3297     InterpolateRow = InterpolateRow_Any_MSA;
   3298     if (IS_ALIGNED(width, 32)) {
   3299       InterpolateRow = InterpolateRow_MSA;
   3300     }
   3301   }
   3302 #endif
   3303 
   3304   {
   3305     int awidth = halfwidth * 2;
   3306     // row of y and 2 rows of uv
   3307     align_buffer_64(rows, awidth * 3);
   3308 
   3309     for (y = 0; y < height - 1; y += 2) {
   3310       // Split Y from UV.
   3311       SplitUVRow(src_uyvy, rows + awidth, rows, awidth);
   3312       memcpy(dst_y, rows, width);
   3313       SplitUVRow(src_uyvy + src_stride_uyvy, rows + awidth * 2, rows, awidth);
   3314       memcpy(dst_y + dst_stride_y, rows, width);
   3315       InterpolateRow(dst_uv, rows + awidth, awidth, awidth, 128);
   3316       src_uyvy += src_stride_uyvy * 2;
   3317       dst_y += dst_stride_y * 2;
   3318       dst_uv += dst_stride_uv;
   3319     }
   3320     if (height & 1) {
   3321       // Split Y from UV.
   3322       SplitUVRow(src_uyvy, dst_uv, rows, awidth);
   3323       memcpy(dst_y, rows, width);
   3324     }
   3325     free_aligned_buffer_64(rows);
   3326   }
   3327   return 0;
   3328 }
   3329 
   3330 #ifdef __cplusplus
   3331 }  // extern "C"
   3332 }  // namespace libyuv
   3333 #endif
   3334