Home | History | Annotate | Download | only in source
      1 /*
      2  *  Copyright 2011 The LibYuv Project Authors. All rights reserved.
      3  *
      4  *  Use of this source code is governed by a BSD-style license
      5  *  that can be found in the LICENSE file in the root of the source
      6  *  tree. An additional intellectual property rights grant can be found
      7  *  in the file PATENTS. All contributing project authors may
      8  *  be found in the AUTHORS file in the root of the source tree.
      9  */
     10 
     11 #include "libyuv/planar_functions.h"
     12 
     13 #include <string.h>  // for memset()
     14 
     15 #include "libyuv/cpu_id.h"
     16 #ifdef HAVE_JPEG
     17 #include "libyuv/mjpeg_decoder.h"
     18 #endif
     19 #include "libyuv/row.h"
     20 
     21 #ifdef __cplusplus
     22 namespace libyuv {
     23 extern "C" {
     24 #endif
     25 
     26 // Copy a plane of data
     27 LIBYUV_API
     28 void CopyPlane(const uint8* src_y, int src_stride_y,
     29                uint8* dst_y, int dst_stride_y,
     30                int width, int height) {
     31   int y;
     32   void (*CopyRow)(const uint8* src, uint8* dst, int width) = CopyRow_C;
     33   // Coalesce rows.
     34   if (src_stride_y == width &&
     35       dst_stride_y == width) {
     36     width *= height;
     37     height = 1;
     38     src_stride_y = dst_stride_y = 0;
     39   }
     40   // Nothing to do.
     41   if (src_y == dst_y && src_stride_y == dst_stride_y) {
     42     return;
     43   }
     44 #if defined(HAS_COPYROW_X86)
     45   if (TestCpuFlag(kCpuHasX86) && IS_ALIGNED(width, 4)) {
     46     CopyRow = CopyRow_X86;
     47   }
     48 #endif
     49 #if defined(HAS_COPYROW_SSE2)
     50   if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 32) &&
     51       IS_ALIGNED(src_y, 16) && IS_ALIGNED(src_stride_y, 16) &&
     52       IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
     53     CopyRow = CopyRow_SSE2;
     54   }
     55 #endif
     56 #if defined(HAS_COPYROW_ERMS)
     57   if (TestCpuFlag(kCpuHasERMS)) {
     58     CopyRow = CopyRow_ERMS;
     59   }
     60 #endif
     61 #if defined(HAS_COPYROW_NEON)
     62   if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 32)) {
     63     CopyRow = CopyRow_NEON;
     64   }
     65 #endif
     66 #if defined(HAS_COPYROW_MIPS)
     67   if (TestCpuFlag(kCpuHasMIPS)) {
     68     CopyRow = CopyRow_MIPS;
     69   }
     70 #endif
     71 
     72   // Copy plane
     73   for (y = 0; y < height; ++y) {
     74     CopyRow(src_y, dst_y, width);
     75     src_y += src_stride_y;
     76     dst_y += dst_stride_y;
     77   }
     78 }
     79 
     80 LIBYUV_API
     81 void CopyPlane_16(const uint16* src_y, int src_stride_y,
     82                   uint16* dst_y, int dst_stride_y,
     83                   int width, int height) {
     84   int y;
     85   void (*CopyRow)(const uint16* src, uint16* dst, int width) = CopyRow_16_C;
     86   // Coalesce rows.
     87   if (src_stride_y == width &&
     88       dst_stride_y == width) {
     89     width *= height;
     90     height = 1;
     91     src_stride_y = dst_stride_y = 0;
     92   }
     93 #if defined(HAS_COPYROW_16_X86)
     94   if (TestCpuFlag(kCpuHasX86) && IS_ALIGNED(width, 4)) {
     95     CopyRow = CopyRow_16_X86;
     96   }
     97 #endif
     98 #if defined(HAS_COPYROW_16_SSE2)
     99   if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 32) &&
    100       IS_ALIGNED(src_y, 16) && IS_ALIGNED(src_stride_y, 16) &&
    101       IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
    102     CopyRow = CopyRow_16_SSE2;
    103   }
    104 #endif
    105 #if defined(HAS_COPYROW_16_ERMS)
    106   if (TestCpuFlag(kCpuHasERMS)) {
    107     CopyRow = CopyRow_16_ERMS;
    108   }
    109 #endif
    110 #if defined(HAS_COPYROW_16_NEON)
    111   if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 32)) {
    112     CopyRow = CopyRow_16_NEON;
    113   }
    114 #endif
    115 #if defined(HAS_COPYROW_16_MIPS)
    116   if (TestCpuFlag(kCpuHasMIPS)) {
    117     CopyRow = CopyRow_16_MIPS;
    118   }
    119 #endif
    120 
    121   // Copy plane
    122   for (y = 0; y < height; ++y) {
    123     CopyRow(src_y, dst_y, width);
    124     src_y += src_stride_y;
    125     dst_y += dst_stride_y;
    126   }
    127 }
    128 
    129 // Copy I422.
    130 LIBYUV_API
    131 int I422Copy(const uint8* src_y, int src_stride_y,
    132              const uint8* src_u, int src_stride_u,
    133              const uint8* src_v, int src_stride_v,
    134              uint8* dst_y, int dst_stride_y,
    135              uint8* dst_u, int dst_stride_u,
    136              uint8* dst_v, int dst_stride_v,
    137              int width, int height) {
    138   int halfwidth = (width + 1) >> 1;
    139   if (!src_y || !src_u || !src_v ||
    140       !dst_y || !dst_u || !dst_v ||
    141       width <= 0 || height == 0) {
    142     return -1;
    143   }
    144   // Negative height means invert the image.
    145   if (height < 0) {
    146     height = -height;
    147     src_y = src_y + (height - 1) * src_stride_y;
    148     src_u = src_u + (height - 1) * src_stride_u;
    149     src_v = src_v + (height - 1) * src_stride_v;
    150     src_stride_y = -src_stride_y;
    151     src_stride_u = -src_stride_u;
    152     src_stride_v = -src_stride_v;
    153   }
    154   CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
    155   CopyPlane(src_u, src_stride_u, dst_u, dst_stride_u, halfwidth, height);
    156   CopyPlane(src_v, src_stride_v, dst_v, dst_stride_v, halfwidth, height);
    157   return 0;
    158 }
    159 
    160 // Copy I444.
    161 LIBYUV_API
    162 int I444Copy(const uint8* src_y, int src_stride_y,
    163              const uint8* src_u, int src_stride_u,
    164              const uint8* src_v, int src_stride_v,
    165              uint8* dst_y, int dst_stride_y,
    166              uint8* dst_u, int dst_stride_u,
    167              uint8* dst_v, int dst_stride_v,
    168              int width, int height) {
    169   if (!src_y || !src_u || !src_v ||
    170       !dst_y || !dst_u || !dst_v ||
    171       width <= 0 || height == 0) {
    172     return -1;
    173   }
    174   // Negative height means invert the image.
    175   if (height < 0) {
    176     height = -height;
    177     src_y = src_y + (height - 1) * src_stride_y;
    178     src_u = src_u + (height - 1) * src_stride_u;
    179     src_v = src_v + (height - 1) * src_stride_v;
    180     src_stride_y = -src_stride_y;
    181     src_stride_u = -src_stride_u;
    182     src_stride_v = -src_stride_v;
    183   }
    184 
    185   CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
    186   CopyPlane(src_u, src_stride_u, dst_u, dst_stride_u, width, height);
    187   CopyPlane(src_v, src_stride_v, dst_v, dst_stride_v, width, height);
    188   return 0;
    189 }
    190 
    191 // Copy I400.
    192 LIBYUV_API
    193 int I400ToI400(const uint8* src_y, int src_stride_y,
    194                uint8* dst_y, int dst_stride_y,
    195                int width, int height) {
    196   if (!src_y || !dst_y || width <= 0 || height == 0) {
    197     return -1;
    198   }
    199   // Negative height means invert the image.
    200   if (height < 0) {
    201     height = -height;
    202     src_y = src_y + (height - 1) * src_stride_y;
    203     src_stride_y = -src_stride_y;
    204   }
    205   CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
    206   return 0;
    207 }
    208 
    209 // Convert I420 to I400.
    210 LIBYUV_API
    211 int I420ToI400(const uint8* src_y, int src_stride_y,
    212                const uint8* src_u, int src_stride_u,
    213                const uint8* src_v, int src_stride_v,
    214                uint8* dst_y, int dst_stride_y,
    215                int width, int height) {
    216   if (!src_y || !dst_y || width <= 0 || height == 0) {
    217     return -1;
    218   }
    219   // Negative height means invert the image.
    220   if (height < 0) {
    221     height = -height;
    222     src_y = src_y + (height - 1) * src_stride_y;
    223     src_stride_y = -src_stride_y;
    224   }
    225   CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
    226   return 0;
    227 }
    228 
    229 // Mirror a plane of data.
    230 void MirrorPlane(const uint8* src_y, int src_stride_y,
    231                  uint8* dst_y, int dst_stride_y,
    232                  int width, int height) {
    233   int y;
    234   void (*MirrorRow)(const uint8* src, uint8* dst, int width) = MirrorRow_C;
    235   // Negative height means invert the image.
    236   if (height < 0) {
    237     height = -height;
    238     src_y = src_y + (height - 1) * src_stride_y;
    239     src_stride_y = -src_stride_y;
    240   }
    241 #if defined(HAS_MIRRORROW_NEON)
    242   if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 16)) {
    243     MirrorRow = MirrorRow_NEON;
    244   }
    245 #endif
    246 #if defined(HAS_MIRRORROW_SSE2)
    247   if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 16)) {
    248     MirrorRow = MirrorRow_SSE2;
    249   }
    250 #endif
    251 #if defined(HAS_MIRRORROW_SSSE3)
    252   if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 16) &&
    253       IS_ALIGNED(src_y, 16) && IS_ALIGNED(src_stride_y, 16) &&
    254       IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
    255     MirrorRow = MirrorRow_SSSE3;
    256   }
    257 #endif
    258 #if defined(HAS_MIRRORROW_AVX2)
    259   if (TestCpuFlag(kCpuHasAVX2) && IS_ALIGNED(width, 32)) {
    260     MirrorRow = MirrorRow_AVX2;
    261   }
    262 #endif
    263 
    264   // Mirror plane
    265   for (y = 0; y < height; ++y) {
    266     MirrorRow(src_y, dst_y, width);
    267     src_y += src_stride_y;
    268     dst_y += dst_stride_y;
    269   }
    270 }
    271 
    272 // Convert YUY2 to I422.
    273 LIBYUV_API
    274 int YUY2ToI422(const uint8* src_yuy2, int src_stride_yuy2,
    275                uint8* dst_y, int dst_stride_y,
    276                uint8* dst_u, int dst_stride_u,
    277                uint8* dst_v, int dst_stride_v,
    278                int width, int height) {
    279   int y;
    280   void (*YUY2ToUV422Row)(const uint8* src_yuy2,
    281                          uint8* dst_u, uint8* dst_v, int pix) =
    282       YUY2ToUV422Row_C;
    283   void (*YUY2ToYRow)(const uint8* src_yuy2, uint8* dst_y, int pix) =
    284       YUY2ToYRow_C;
    285   // Negative height means invert the image.
    286   if (height < 0) {
    287     height = -height;
    288     src_yuy2 = src_yuy2 + (height - 1) * src_stride_yuy2;
    289     src_stride_yuy2 = -src_stride_yuy2;
    290   }
    291   // Coalesce rows.
    292   if (src_stride_yuy2 == width * 2 &&
    293       dst_stride_y == width &&
    294       dst_stride_u * 2 == width &&
    295       dst_stride_v * 2 == width) {
    296     width *= height;
    297     height = 1;
    298     src_stride_yuy2 = dst_stride_y = dst_stride_u = dst_stride_v = 0;
    299   }
    300 #if defined(HAS_YUY2TOYROW_SSE2)
    301   if (TestCpuFlag(kCpuHasSSE2) && width >= 16) {
    302     YUY2ToUV422Row = YUY2ToUV422Row_Any_SSE2;
    303     YUY2ToYRow = YUY2ToYRow_Any_SSE2;
    304     if (IS_ALIGNED(width, 16)) {
    305       YUY2ToUV422Row = YUY2ToUV422Row_Unaligned_SSE2;
    306       YUY2ToYRow = YUY2ToYRow_Unaligned_SSE2;
    307       if (IS_ALIGNED(src_yuy2, 16) && IS_ALIGNED(src_stride_yuy2, 16)) {
    308         YUY2ToUV422Row = YUY2ToUV422Row_SSE2;
    309         if (IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
    310           YUY2ToYRow = YUY2ToYRow_SSE2;
    311         }
    312       }
    313     }
    314   }
    315 #endif
    316 #if defined(HAS_YUY2TOYROW_AVX2)
    317   if (TestCpuFlag(kCpuHasAVX2) && width >= 32) {
    318     YUY2ToUV422Row = YUY2ToUV422Row_Any_AVX2;
    319     YUY2ToYRow = YUY2ToYRow_Any_AVX2;
    320     if (IS_ALIGNED(width, 32)) {
    321       YUY2ToUV422Row = YUY2ToUV422Row_AVX2;
    322       YUY2ToYRow = YUY2ToYRow_AVX2;
    323     }
    324   }
    325 #endif
    326 #if defined(HAS_YUY2TOYROW_NEON)
    327   if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
    328     YUY2ToYRow = YUY2ToYRow_Any_NEON;
    329     if (width >= 16) {
    330       YUY2ToUV422Row = YUY2ToUV422Row_Any_NEON;
    331     }
    332     if (IS_ALIGNED(width, 16)) {
    333       YUY2ToYRow = YUY2ToYRow_NEON;
    334       YUY2ToUV422Row = YUY2ToUV422Row_NEON;
    335     }
    336   }
    337 #endif
    338 
    339   for (y = 0; y < height; ++y) {
    340     YUY2ToUV422Row(src_yuy2, dst_u, dst_v, width);
    341     YUY2ToYRow(src_yuy2, dst_y, width);
    342     src_yuy2 += src_stride_yuy2;
    343     dst_y += dst_stride_y;
    344     dst_u += dst_stride_u;
    345     dst_v += dst_stride_v;
    346   }
    347   return 0;
    348 }
    349 
    350 // Convert UYVY to I422.
    351 LIBYUV_API
    352 int UYVYToI422(const uint8* src_uyvy, int src_stride_uyvy,
    353                uint8* dst_y, int dst_stride_y,
    354                uint8* dst_u, int dst_stride_u,
    355                uint8* dst_v, int dst_stride_v,
    356                int width, int height) {
    357   int y;
    358   void (*UYVYToUV422Row)(const uint8* src_uyvy,
    359                          uint8* dst_u, uint8* dst_v, int pix) =
    360       UYVYToUV422Row_C;
    361   void (*UYVYToYRow)(const uint8* src_uyvy,
    362                      uint8* dst_y, int pix) = UYVYToYRow_C;
    363   // Negative height means invert the image.
    364   if (height < 0) {
    365     height = -height;
    366     src_uyvy = src_uyvy + (height - 1) * src_stride_uyvy;
    367     src_stride_uyvy = -src_stride_uyvy;
    368   }
    369   // Coalesce rows.
    370   if (src_stride_uyvy == width * 2 &&
    371       dst_stride_y == width &&
    372       dst_stride_u * 2 == width &&
    373       dst_stride_v * 2 == width) {
    374     width *= height;
    375     height = 1;
    376     src_stride_uyvy = dst_stride_y = dst_stride_u = dst_stride_v = 0;
    377   }
    378 #if defined(HAS_UYVYTOYROW_SSE2)
    379   if (TestCpuFlag(kCpuHasSSE2) && width >= 16) {
    380     UYVYToUV422Row = UYVYToUV422Row_Any_SSE2;
    381     UYVYToYRow = UYVYToYRow_Any_SSE2;
    382     if (IS_ALIGNED(width, 16)) {
    383       UYVYToUV422Row = UYVYToUV422Row_Unaligned_SSE2;
    384       UYVYToYRow = UYVYToYRow_Unaligned_SSE2;
    385       if (IS_ALIGNED(src_uyvy, 16) && IS_ALIGNED(src_stride_uyvy, 16)) {
    386         UYVYToUV422Row = UYVYToUV422Row_SSE2;
    387         if (IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
    388           UYVYToYRow = UYVYToYRow_SSE2;
    389         }
    390       }
    391     }
    392   }
    393 #endif
    394 #if defined(HAS_UYVYTOYROW_AVX2)
    395   if (TestCpuFlag(kCpuHasAVX2) && width >= 32) {
    396     UYVYToUV422Row = UYVYToUV422Row_Any_AVX2;
    397     UYVYToYRow = UYVYToYRow_Any_AVX2;
    398     if (IS_ALIGNED(width, 32)) {
    399       UYVYToUV422Row = UYVYToUV422Row_AVX2;
    400       UYVYToYRow = UYVYToYRow_AVX2;
    401     }
    402   }
    403 #endif
    404 #if defined(HAS_UYVYTOYROW_NEON)
    405   if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
    406     UYVYToYRow = UYVYToYRow_Any_NEON;
    407     if (width >= 16) {
    408       UYVYToUV422Row = UYVYToUV422Row_Any_NEON;
    409     }
    410     if (IS_ALIGNED(width, 16)) {
    411       UYVYToYRow = UYVYToYRow_NEON;
    412       UYVYToUV422Row = UYVYToUV422Row_NEON;
    413     }
    414   }
    415 #endif
    416 
    417   for (y = 0; y < height; ++y) {
    418     UYVYToUV422Row(src_uyvy, dst_u, dst_v, width);
    419     UYVYToYRow(src_uyvy, dst_y, width);
    420     src_uyvy += src_stride_uyvy;
    421     dst_y += dst_stride_y;
    422     dst_u += dst_stride_u;
    423     dst_v += dst_stride_v;
    424   }
    425   return 0;
    426 }
    427 
    428 // Mirror I400 with optional flipping
    429 LIBYUV_API
    430 int I400Mirror(const uint8* src_y, int src_stride_y,
    431                uint8* dst_y, int dst_stride_y,
    432                int width, int height) {
    433   if (!src_y || !dst_y ||
    434       width <= 0 || height == 0) {
    435     return -1;
    436   }
    437   // Negative height means invert the image.
    438   if (height < 0) {
    439     height = -height;
    440     src_y = src_y + (height - 1) * src_stride_y;
    441     src_stride_y = -src_stride_y;
    442   }
    443 
    444   MirrorPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
    445   return 0;
    446 }
    447 
    448 // Mirror I420 with optional flipping
    449 LIBYUV_API
    450 int I420Mirror(const uint8* src_y, int src_stride_y,
    451                const uint8* src_u, int src_stride_u,
    452                const uint8* src_v, int src_stride_v,
    453                uint8* dst_y, int dst_stride_y,
    454                uint8* dst_u, int dst_stride_u,
    455                uint8* dst_v, int dst_stride_v,
    456                int width, int height) {
    457   int halfwidth = (width + 1) >> 1;
    458   int halfheight = (height + 1) >> 1;
    459   if (!src_y || !src_u || !src_v || !dst_y || !dst_u || !dst_v ||
    460       width <= 0 || height == 0) {
    461     return -1;
    462   }
    463   // Negative height means invert the image.
    464   if (height < 0) {
    465     height = -height;
    466     halfheight = (height + 1) >> 1;
    467     src_y = src_y + (height - 1) * src_stride_y;
    468     src_u = src_u + (halfheight - 1) * src_stride_u;
    469     src_v = src_v + (halfheight - 1) * src_stride_v;
    470     src_stride_y = -src_stride_y;
    471     src_stride_u = -src_stride_u;
    472     src_stride_v = -src_stride_v;
    473   }
    474 
    475   if (dst_y) {
    476     MirrorPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
    477   }
    478   MirrorPlane(src_u, src_stride_u, dst_u, dst_stride_u, halfwidth, halfheight);
    479   MirrorPlane(src_v, src_stride_v, dst_v, dst_stride_v, halfwidth, halfheight);
    480   return 0;
    481 }
    482 
    483 // ARGB mirror.
    484 LIBYUV_API
    485 int ARGBMirror(const uint8* src_argb, int src_stride_argb,
    486                uint8* dst_argb, int dst_stride_argb,
    487                int width, int height) {
    488   int y;
    489   void (*ARGBMirrorRow)(const uint8* src, uint8* dst, int width) =
    490       ARGBMirrorRow_C;
    491   if (!src_argb || !dst_argb || width <= 0 || height == 0) {
    492     return -1;
    493   }
    494   // Negative height means invert the image.
    495   if (height < 0) {
    496     height = -height;
    497     src_argb = src_argb + (height - 1) * src_stride_argb;
    498     src_stride_argb = -src_stride_argb;
    499   }
    500 
    501 #if defined(HAS_ARGBMIRRORROW_SSSE3)
    502   if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 4) &&
    503       IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16) &&
    504       IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
    505     ARGBMirrorRow = ARGBMirrorRow_SSSE3;
    506   }
    507 #endif
    508 #if defined(HAS_ARGBMIRRORROW_AVX2)
    509   if (TestCpuFlag(kCpuHasAVX2) && IS_ALIGNED(width, 8)) {
    510     ARGBMirrorRow = ARGBMirrorRow_AVX2;
    511   }
    512 #endif
    513 #if defined(HAS_ARGBMIRRORROW_NEON)
    514   if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 4)) {
    515     ARGBMirrorRow = ARGBMirrorRow_NEON;
    516   }
    517 #endif
    518 
    519   // Mirror plane
    520   for (y = 0; y < height; ++y) {
    521     ARGBMirrorRow(src_argb, dst_argb, width);
    522     src_argb += src_stride_argb;
    523     dst_argb += dst_stride_argb;
    524   }
    525   return 0;
    526 }
    527 
    528 // Get a blender that optimized for the CPU, alignment and pixel count.
    529 // As there are 6 blenders to choose from, the caller should try to use
    530 // the same blend function for all pixels if possible.
    531 LIBYUV_API
    532 ARGBBlendRow GetARGBBlend() {
    533   void (*ARGBBlendRow)(const uint8* src_argb, const uint8* src_argb1,
    534                        uint8* dst_argb, int width) = ARGBBlendRow_C;
    535 #if defined(HAS_ARGBBLENDROW_SSSE3)
    536   if (TestCpuFlag(kCpuHasSSSE3)) {
    537     ARGBBlendRow = ARGBBlendRow_SSSE3;
    538     return ARGBBlendRow;
    539   }
    540 #endif
    541 #if defined(HAS_ARGBBLENDROW_SSE2)
    542   if (TestCpuFlag(kCpuHasSSE2)) {
    543     ARGBBlendRow = ARGBBlendRow_SSE2;
    544   }
    545 #endif
    546 #if defined(HAS_ARGBBLENDROW_NEON)
    547   if (TestCpuFlag(kCpuHasNEON)) {
    548     ARGBBlendRow = ARGBBlendRow_NEON;
    549   }
    550 #endif
    551   return ARGBBlendRow;
    552 }
    553 
    554 // Alpha Blend 2 ARGB images and store to destination.
    555 LIBYUV_API
    556 int ARGBBlend(const uint8* src_argb0, int src_stride_argb0,
    557               const uint8* src_argb1, int src_stride_argb1,
    558               uint8* dst_argb, int dst_stride_argb,
    559               int width, int height) {
    560   int y;
    561   void (*ARGBBlendRow)(const uint8* src_argb, const uint8* src_argb1,
    562                        uint8* dst_argb, int width) = GetARGBBlend();
    563   if (!src_argb0 || !src_argb1 || !dst_argb || width <= 0 || height == 0) {
    564     return -1;
    565   }
    566   // Negative height means invert the image.
    567   if (height < 0) {
    568     height = -height;
    569     dst_argb = dst_argb + (height - 1) * dst_stride_argb;
    570     dst_stride_argb = -dst_stride_argb;
    571   }
    572   // Coalesce rows.
    573   if (src_stride_argb0 == width * 4 &&
    574       src_stride_argb1 == width * 4 &&
    575       dst_stride_argb == width * 4) {
    576     width *= height;
    577     height = 1;
    578     src_stride_argb0 = src_stride_argb1 = dst_stride_argb = 0;
    579   }
    580 
    581   for (y = 0; y < height; ++y) {
    582     ARGBBlendRow(src_argb0, src_argb1, dst_argb, width);
    583     src_argb0 += src_stride_argb0;
    584     src_argb1 += src_stride_argb1;
    585     dst_argb += dst_stride_argb;
    586   }
    587   return 0;
    588 }
    589 
    590 // Multiply 2 ARGB images and store to destination.
    591 LIBYUV_API
    592 int ARGBMultiply(const uint8* src_argb0, int src_stride_argb0,
    593                  const uint8* src_argb1, int src_stride_argb1,
    594                  uint8* dst_argb, int dst_stride_argb,
    595                  int width, int height) {
    596   int y;
    597   void (*ARGBMultiplyRow)(const uint8* src0, const uint8* src1, uint8* dst,
    598                           int width) = ARGBMultiplyRow_C;
    599   if (!src_argb0 || !src_argb1 || !dst_argb || width <= 0 || height == 0) {
    600     return -1;
    601   }
    602   // Negative height means invert the image.
    603   if (height < 0) {
    604     height = -height;
    605     dst_argb = dst_argb + (height - 1) * dst_stride_argb;
    606     dst_stride_argb = -dst_stride_argb;
    607   }
    608   // Coalesce rows.
    609   if (src_stride_argb0 == width * 4 &&
    610       src_stride_argb1 == width * 4 &&
    611       dst_stride_argb == width * 4) {
    612     width *= height;
    613     height = 1;
    614     src_stride_argb0 = src_stride_argb1 = dst_stride_argb = 0;
    615   }
    616 #if defined(HAS_ARGBMULTIPLYROW_SSE2)
    617   if (TestCpuFlag(kCpuHasSSE2) && width >= 4) {
    618     ARGBMultiplyRow = ARGBMultiplyRow_Any_SSE2;
    619     if (IS_ALIGNED(width, 4)) {
    620       ARGBMultiplyRow = ARGBMultiplyRow_SSE2;
    621     }
    622   }
    623 #endif
    624 #if defined(HAS_ARGBMULTIPLYROW_AVX2)
    625   if (TestCpuFlag(kCpuHasAVX2) && width >= 8) {
    626     ARGBMultiplyRow = ARGBMultiplyRow_Any_AVX2;
    627     if (IS_ALIGNED(width, 8)) {
    628       ARGBMultiplyRow = ARGBMultiplyRow_AVX2;
    629     }
    630   }
    631 #endif
    632 #if defined(HAS_ARGBMULTIPLYROW_NEON)
    633   if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
    634     ARGBMultiplyRow = ARGBMultiplyRow_Any_NEON;
    635     if (IS_ALIGNED(width, 8)) {
    636       ARGBMultiplyRow = ARGBMultiplyRow_NEON;
    637     }
    638   }
    639 #endif
    640 
    641   // Multiply plane
    642   for (y = 0; y < height; ++y) {
    643     ARGBMultiplyRow(src_argb0, src_argb1, dst_argb, width);
    644     src_argb0 += src_stride_argb0;
    645     src_argb1 += src_stride_argb1;
    646     dst_argb += dst_stride_argb;
    647   }
    648   return 0;
    649 }
    650 
    651 // Add 2 ARGB images and store to destination.
    652 LIBYUV_API
    653 int ARGBAdd(const uint8* src_argb0, int src_stride_argb0,
    654             const uint8* src_argb1, int src_stride_argb1,
    655             uint8* dst_argb, int dst_stride_argb,
    656             int width, int height) {
    657   int y;
    658   void (*ARGBAddRow)(const uint8* src0, const uint8* src1, uint8* dst,
    659                      int width) = ARGBAddRow_C;
    660   if (!src_argb0 || !src_argb1 || !dst_argb || width <= 0 || height == 0) {
    661     return -1;
    662   }
    663   // Negative height means invert the image.
    664   if (height < 0) {
    665     height = -height;
    666     dst_argb = dst_argb + (height - 1) * dst_stride_argb;
    667     dst_stride_argb = -dst_stride_argb;
    668   }
    669   // Coalesce rows.
    670   if (src_stride_argb0 == width * 4 &&
    671       src_stride_argb1 == width * 4 &&
    672       dst_stride_argb == width * 4) {
    673     width *= height;
    674     height = 1;
    675     src_stride_argb0 = src_stride_argb1 = dst_stride_argb = 0;
    676   }
    677 #if defined(HAS_ARGBADDROW_SSE2) && defined(_MSC_VER)
    678   if (TestCpuFlag(kCpuHasSSE2)) {
    679     ARGBAddRow = ARGBAddRow_SSE2;
    680   }
    681 #endif
    682 #if defined(HAS_ARGBADDROW_SSE2) && !defined(_MSC_VER)
    683   if (TestCpuFlag(kCpuHasSSE2) && width >= 4) {
    684     ARGBAddRow = ARGBAddRow_Any_SSE2;
    685     if (IS_ALIGNED(width, 4)) {
    686       ARGBAddRow = ARGBAddRow_SSE2;
    687     }
    688   }
    689 #endif
    690 #if defined(HAS_ARGBADDROW_AVX2)
    691   if (TestCpuFlag(kCpuHasAVX2) && width >= 8) {
    692     ARGBAddRow = ARGBAddRow_Any_AVX2;
    693     if (IS_ALIGNED(width, 8)) {
    694       ARGBAddRow = ARGBAddRow_AVX2;
    695     }
    696   }
    697 #endif
    698 #if defined(HAS_ARGBADDROW_NEON)
    699   if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
    700     ARGBAddRow = ARGBAddRow_Any_NEON;
    701     if (IS_ALIGNED(width, 8)) {
    702       ARGBAddRow = ARGBAddRow_NEON;
    703     }
    704   }
    705 #endif
    706 
    707   // Add plane
    708   for (y = 0; y < height; ++y) {
    709     ARGBAddRow(src_argb0, src_argb1, dst_argb, width);
    710     src_argb0 += src_stride_argb0;
    711     src_argb1 += src_stride_argb1;
    712     dst_argb += dst_stride_argb;
    713   }
    714   return 0;
    715 }
    716 
    717 // Subtract 2 ARGB images and store to destination.
    718 LIBYUV_API
    719 int ARGBSubtract(const uint8* src_argb0, int src_stride_argb0,
    720                  const uint8* src_argb1, int src_stride_argb1,
    721                  uint8* dst_argb, int dst_stride_argb,
    722                  int width, int height) {
    723   int y;
    724   void (*ARGBSubtractRow)(const uint8* src0, const uint8* src1, uint8* dst,
    725                           int width) = ARGBSubtractRow_C;
    726   if (!src_argb0 || !src_argb1 || !dst_argb || width <= 0 || height == 0) {
    727     return -1;
    728   }
    729   // Negative height means invert the image.
    730   if (height < 0) {
    731     height = -height;
    732     dst_argb = dst_argb + (height - 1) * dst_stride_argb;
    733     dst_stride_argb = -dst_stride_argb;
    734   }
    735   // Coalesce rows.
    736   if (src_stride_argb0 == width * 4 &&
    737       src_stride_argb1 == width * 4 &&
    738       dst_stride_argb == width * 4) {
    739     width *= height;
    740     height = 1;
    741     src_stride_argb0 = src_stride_argb1 = dst_stride_argb = 0;
    742   }
    743 #if defined(HAS_ARGBSUBTRACTROW_SSE2)
    744   if (TestCpuFlag(kCpuHasSSE2) && width >= 4) {
    745     ARGBSubtractRow = ARGBSubtractRow_Any_SSE2;
    746     if (IS_ALIGNED(width, 4)) {
    747       ARGBSubtractRow = ARGBSubtractRow_SSE2;
    748     }
    749   }
    750 #endif
    751 #if defined(HAS_ARGBSUBTRACTROW_AVX2)
    752   if (TestCpuFlag(kCpuHasAVX2) && width >= 8) {
    753     ARGBSubtractRow = ARGBSubtractRow_Any_AVX2;
    754     if (IS_ALIGNED(width, 8)) {
    755       ARGBSubtractRow = ARGBSubtractRow_AVX2;
    756     }
    757   }
    758 #endif
    759 #if defined(HAS_ARGBSUBTRACTROW_NEON)
    760   if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
    761     ARGBSubtractRow = ARGBSubtractRow_Any_NEON;
    762     if (IS_ALIGNED(width, 8)) {
    763       ARGBSubtractRow = ARGBSubtractRow_NEON;
    764     }
    765   }
    766 #endif
    767 
    768   // Subtract plane
    769   for (y = 0; y < height; ++y) {
    770     ARGBSubtractRow(src_argb0, src_argb1, dst_argb, width);
    771     src_argb0 += src_stride_argb0;
    772     src_argb1 += src_stride_argb1;
    773     dst_argb += dst_stride_argb;
    774   }
    775   return 0;
    776 }
    777 
    778 // Convert I422 to BGRA.
    779 LIBYUV_API
    780 int I422ToBGRA(const uint8* src_y, int src_stride_y,
    781                const uint8* src_u, int src_stride_u,
    782                const uint8* src_v, int src_stride_v,
    783                uint8* dst_bgra, int dst_stride_bgra,
    784                int width, int height) {
    785   int y;
    786   void (*I422ToBGRARow)(const uint8* y_buf,
    787                         const uint8* u_buf,
    788                         const uint8* v_buf,
    789                         uint8* rgb_buf,
    790                         int width) = I422ToBGRARow_C;
    791   if (!src_y || !src_u || !src_v ||
    792       !dst_bgra ||
    793       width <= 0 || height == 0) {
    794     return -1;
    795   }
    796   // Negative height means invert the image.
    797   if (height < 0) {
    798     height = -height;
    799     dst_bgra = dst_bgra + (height - 1) * dst_stride_bgra;
    800     dst_stride_bgra = -dst_stride_bgra;
    801   }
    802   // Coalesce rows.
    803   if (src_stride_y == width &&
    804       src_stride_u * 2 == width &&
    805       src_stride_v * 2 == width &&
    806       dst_stride_bgra == width * 4) {
    807     width *= height;
    808     height = 1;
    809     src_stride_y = src_stride_u = src_stride_v = dst_stride_bgra = 0;
    810   }
    811 #if defined(HAS_I422TOBGRAROW_NEON)
    812   if (TestCpuFlag(kCpuHasNEON)) {
    813     I422ToBGRARow = I422ToBGRARow_Any_NEON;
    814     if (IS_ALIGNED(width, 16)) {
    815       I422ToBGRARow = I422ToBGRARow_NEON;
    816     }
    817   }
    818 #elif defined(HAS_I422TOBGRAROW_SSSE3)
    819   if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
    820     I422ToBGRARow = I422ToBGRARow_Any_SSSE3;
    821     if (IS_ALIGNED(width, 8)) {
    822       I422ToBGRARow = I422ToBGRARow_Unaligned_SSSE3;
    823       if (IS_ALIGNED(dst_bgra, 16) && IS_ALIGNED(dst_stride_bgra, 16)) {
    824         I422ToBGRARow = I422ToBGRARow_SSSE3;
    825       }
    826     }
    827   }
    828 #elif defined(HAS_I422TOBGRAROW_MIPS_DSPR2)
    829   if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(width, 4) &&
    830       IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) &&
    831       IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) &&
    832       IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2) &&
    833       IS_ALIGNED(dst_bgra, 4) && IS_ALIGNED(dst_stride_bgra, 4)) {
    834     I422ToBGRARow = I422ToBGRARow_MIPS_DSPR2;
    835   }
    836 #endif
    837 
    838   for (y = 0; y < height; ++y) {
    839     I422ToBGRARow(src_y, src_u, src_v, dst_bgra, width);
    840     dst_bgra += dst_stride_bgra;
    841     src_y += src_stride_y;
    842     src_u += src_stride_u;
    843     src_v += src_stride_v;
    844   }
    845   return 0;
    846 }
    847 
    848 // Convert I422 to ABGR.
    849 LIBYUV_API
    850 int I422ToABGR(const uint8* src_y, int src_stride_y,
    851                const uint8* src_u, int src_stride_u,
    852                const uint8* src_v, int src_stride_v,
    853                uint8* dst_abgr, int dst_stride_abgr,
    854                int width, int height) {
    855   int y;
    856   void (*I422ToABGRRow)(const uint8* y_buf,
    857                         const uint8* u_buf,
    858                         const uint8* v_buf,
    859                         uint8* rgb_buf,
    860                         int width) = I422ToABGRRow_C;
    861   if (!src_y || !src_u || !src_v ||
    862       !dst_abgr ||
    863       width <= 0 || height == 0) {
    864     return -1;
    865   }
    866   // Negative height means invert the image.
    867   if (height < 0) {
    868     height = -height;
    869     dst_abgr = dst_abgr + (height - 1) * dst_stride_abgr;
    870     dst_stride_abgr = -dst_stride_abgr;
    871   }
    872   // Coalesce rows.
    873   if (src_stride_y == width &&
    874       src_stride_u * 2 == width &&
    875       src_stride_v * 2 == width &&
    876       dst_stride_abgr == width * 4) {
    877     width *= height;
    878     height = 1;
    879     src_stride_y = src_stride_u = src_stride_v = dst_stride_abgr = 0;
    880   }
    881 #if defined(HAS_I422TOABGRROW_NEON)
    882   if (TestCpuFlag(kCpuHasNEON)) {
    883     I422ToABGRRow = I422ToABGRRow_Any_NEON;
    884     if (IS_ALIGNED(width, 16)) {
    885       I422ToABGRRow = I422ToABGRRow_NEON;
    886     }
    887   }
    888 #elif defined(HAS_I422TOABGRROW_SSSE3)
    889   if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
    890     I422ToABGRRow = I422ToABGRRow_Any_SSSE3;
    891     if (IS_ALIGNED(width, 8)) {
    892       I422ToABGRRow = I422ToABGRRow_Unaligned_SSSE3;
    893       if (IS_ALIGNED(dst_abgr, 16) && IS_ALIGNED(dst_stride_abgr, 16)) {
    894         I422ToABGRRow = I422ToABGRRow_SSSE3;
    895       }
    896     }
    897   }
    898 #endif
    899 
    900   for (y = 0; y < height; ++y) {
    901     I422ToABGRRow(src_y, src_u, src_v, dst_abgr, width);
    902     dst_abgr += dst_stride_abgr;
    903     src_y += src_stride_y;
    904     src_u += src_stride_u;
    905     src_v += src_stride_v;
    906   }
    907   return 0;
    908 }
    909 
    910 // Convert I422 to RGBA.
    911 LIBYUV_API
    912 int I422ToRGBA(const uint8* src_y, int src_stride_y,
    913                const uint8* src_u, int src_stride_u,
    914                const uint8* src_v, int src_stride_v,
    915                uint8* dst_rgba, int dst_stride_rgba,
    916                int width, int height) {
    917   int y;
    918   void (*I422ToRGBARow)(const uint8* y_buf,
    919                         const uint8* u_buf,
    920                         const uint8* v_buf,
    921                         uint8* rgb_buf,
    922                         int width) = I422ToRGBARow_C;
    923   if (!src_y || !src_u || !src_v ||
    924       !dst_rgba ||
    925       width <= 0 || height == 0) {
    926     return -1;
    927   }
    928   // Negative height means invert the image.
    929   if (height < 0) {
    930     height = -height;
    931     dst_rgba = dst_rgba + (height - 1) * dst_stride_rgba;
    932     dst_stride_rgba = -dst_stride_rgba;
    933   }
    934   // Coalesce rows.
    935   if (src_stride_y == width &&
    936       src_stride_u * 2 == width &&
    937       src_stride_v * 2 == width &&
    938       dst_stride_rgba == width * 4) {
    939     width *= height;
    940     height = 1;
    941     src_stride_y = src_stride_u = src_stride_v = dst_stride_rgba = 0;
    942   }
    943 #if defined(HAS_I422TORGBAROW_NEON)
    944   if (TestCpuFlag(kCpuHasNEON)) {
    945     I422ToRGBARow = I422ToRGBARow_Any_NEON;
    946     if (IS_ALIGNED(width, 16)) {
    947       I422ToRGBARow = I422ToRGBARow_NEON;
    948     }
    949   }
    950 #elif defined(HAS_I422TORGBAROW_SSSE3)
    951   if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
    952     I422ToRGBARow = I422ToRGBARow_Any_SSSE3;
    953     if (IS_ALIGNED(width, 8)) {
    954       I422ToRGBARow = I422ToRGBARow_Unaligned_SSSE3;
    955       if (IS_ALIGNED(dst_rgba, 16) && IS_ALIGNED(dst_stride_rgba, 16)) {
    956         I422ToRGBARow = I422ToRGBARow_SSSE3;
    957       }
    958     }
    959   }
    960 #endif
    961 
    962   for (y = 0; y < height; ++y) {
    963     I422ToRGBARow(src_y, src_u, src_v, dst_rgba, width);
    964     dst_rgba += dst_stride_rgba;
    965     src_y += src_stride_y;
    966     src_u += src_stride_u;
    967     src_v += src_stride_v;
    968   }
    969   return 0;
    970 }
    971 
    972 // Convert NV12 to RGB565.
    973 LIBYUV_API
    974 int NV12ToRGB565(const uint8* src_y, int src_stride_y,
    975                  const uint8* src_uv, int src_stride_uv,
    976                  uint8* dst_rgb565, int dst_stride_rgb565,
    977                  int width, int height) {
    978   int y;
    979   void (*NV12ToRGB565Row)(const uint8* y_buf,
    980                           const uint8* uv_buf,
    981                           uint8* rgb_buf,
    982                           int width) = NV12ToRGB565Row_C;
    983   if (!src_y || !src_uv || !dst_rgb565 ||
    984       width <= 0 || height == 0) {
    985     return -1;
    986   }
    987   // Negative height means invert the image.
    988   if (height < 0) {
    989     height = -height;
    990     dst_rgb565 = dst_rgb565 + (height - 1) * dst_stride_rgb565;
    991     dst_stride_rgb565 = -dst_stride_rgb565;
    992   }
    993 #if defined(HAS_NV12TORGB565ROW_SSSE3)
    994   if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
    995     NV12ToRGB565Row = NV12ToRGB565Row_Any_SSSE3;
    996     if (IS_ALIGNED(width, 8)) {
    997       NV12ToRGB565Row = NV12ToRGB565Row_SSSE3;
    998     }
    999   }
   1000 #elif defined(HAS_NV12TORGB565ROW_NEON)
   1001   if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
   1002     NV12ToRGB565Row = NV12ToRGB565Row_Any_NEON;
   1003     if (IS_ALIGNED(width, 8)) {
   1004       NV12ToRGB565Row = NV12ToRGB565Row_NEON;
   1005     }
   1006   }
   1007 #endif
   1008 
   1009   for (y = 0; y < height; ++y) {
   1010     NV12ToRGB565Row(src_y, src_uv, dst_rgb565, width);
   1011     dst_rgb565 += dst_stride_rgb565;
   1012     src_y += src_stride_y;
   1013     if (y & 1) {
   1014       src_uv += src_stride_uv;
   1015     }
   1016   }
   1017   return 0;
   1018 }
   1019 
   1020 // Convert NV21 to RGB565.
   1021 LIBYUV_API
   1022 int NV21ToRGB565(const uint8* src_y, int src_stride_y,
   1023                  const uint8* src_vu, int src_stride_vu,
   1024                  uint8* dst_rgb565, int dst_stride_rgb565,
   1025                  int width, int height) {
   1026   int y;
   1027   void (*NV21ToRGB565Row)(const uint8* y_buf,
   1028                           const uint8* src_vu,
   1029                           uint8* rgb_buf,
   1030                           int width) = NV21ToRGB565Row_C;
   1031   if (!src_y || !src_vu || !dst_rgb565 ||
   1032       width <= 0 || height == 0) {
   1033     return -1;
   1034   }
   1035   // Negative height means invert the image.
   1036   if (height < 0) {
   1037     height = -height;
   1038     dst_rgb565 = dst_rgb565 + (height - 1) * dst_stride_rgb565;
   1039     dst_stride_rgb565 = -dst_stride_rgb565;
   1040   }
   1041 #if defined(HAS_NV21TORGB565ROW_SSSE3)
   1042   if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
   1043     NV21ToRGB565Row = NV21ToRGB565Row_Any_SSSE3;
   1044     if (IS_ALIGNED(width, 8)) {
   1045       NV21ToRGB565Row = NV21ToRGB565Row_SSSE3;
   1046     }
   1047   }
   1048 #elif defined(HAS_NV21TORGB565ROW_NEON)
   1049   if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
   1050     NV21ToRGB565Row = NV21ToRGB565Row_Any_NEON;
   1051     if (IS_ALIGNED(width, 8)) {
   1052       NV21ToRGB565Row = NV21ToRGB565Row_NEON;
   1053     }
   1054   }
   1055 #endif
   1056 
   1057   for (y = 0; y < height; ++y) {
   1058     NV21ToRGB565Row(src_y, src_vu, dst_rgb565, width);
   1059     dst_rgb565 += dst_stride_rgb565;
   1060     src_y += src_stride_y;
   1061     if (y & 1) {
   1062       src_vu += src_stride_vu;
   1063     }
   1064   }
   1065   return 0;
   1066 }
   1067 
   1068 LIBYUV_API
   1069 void SetPlane(uint8* dst_y, int dst_stride_y,
   1070               int width, int height,
   1071               uint32 value) {
   1072   int y;
   1073   uint32 v32 = value | (value << 8) | (value << 16) | (value << 24);
   1074   void (*SetRow)(uint8* dst, uint32 value, int pix) = SetRow_C;
   1075   // Coalesce rows.
   1076   if (dst_stride_y == width) {
   1077     width *= height;
   1078     height = 1;
   1079     dst_stride_y = 0;
   1080   }
   1081 #if defined(HAS_SETROW_NEON)
   1082   if (TestCpuFlag(kCpuHasNEON) &&
   1083       IS_ALIGNED(width, 16) &&
   1084       IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
   1085     SetRow = SetRow_NEON;
   1086   }
   1087 #endif
   1088 #if defined(HAS_SETROW_X86)
   1089   if (TestCpuFlag(kCpuHasX86) && IS_ALIGNED(width, 4)) {
   1090     SetRow = SetRow_X86;
   1091   }
   1092 #endif
   1093 
   1094   // Set plane
   1095   for (y = 0; y < height; ++y) {
   1096     SetRow(dst_y, v32, width);
   1097     dst_y += dst_stride_y;
   1098   }
   1099 }
   1100 
   1101 // Draw a rectangle into I420
   1102 LIBYUV_API
   1103 int I420Rect(uint8* dst_y, int dst_stride_y,
   1104              uint8* dst_u, int dst_stride_u,
   1105              uint8* dst_v, int dst_stride_v,
   1106              int x, int y,
   1107              int width, int height,
   1108              int value_y, int value_u, int value_v) {
   1109   int halfwidth = (width + 1) >> 1;
   1110   int halfheight = (height + 1) >> 1;
   1111   uint8* start_y = dst_y + y * dst_stride_y + x;
   1112   uint8* start_u = dst_u + (y / 2) * dst_stride_u + (x / 2);
   1113   uint8* start_v = dst_v + (y / 2) * dst_stride_v + (x / 2);
   1114   if (!dst_y || !dst_u || !dst_v ||
   1115       width <= 0 || height <= 0 ||
   1116       x < 0 || y < 0 ||
   1117       value_y < 0 || value_y > 255 ||
   1118       value_u < 0 || value_u > 255 ||
   1119       value_v < 0 || value_v > 255) {
   1120     return -1;
   1121   }
   1122 
   1123   SetPlane(start_y, dst_stride_y, width, height, value_y);
   1124   SetPlane(start_u, dst_stride_u, halfwidth, halfheight, value_u);
   1125   SetPlane(start_v, dst_stride_v, halfwidth, halfheight, value_v);
   1126   return 0;
   1127 }
   1128 
   1129 // Draw a rectangle into ARGB
   1130 LIBYUV_API
   1131 int ARGBRect(uint8* dst_argb, int dst_stride_argb,
   1132              int dst_x, int dst_y,
   1133              int width, int height,
   1134              uint32 value) {
   1135   if (!dst_argb ||
   1136       width <= 0 || height <= 0 ||
   1137       dst_x < 0 || dst_y < 0) {
   1138     return -1;
   1139   }
   1140   dst_argb += dst_y * dst_stride_argb + dst_x * 4;
   1141   // Coalesce rows.
   1142   if (dst_stride_argb == width * 4) {
   1143     width *= height;
   1144     height = 1;
   1145     dst_stride_argb = 0;
   1146   }
   1147 #if defined(HAS_SETROW_NEON)
   1148   if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 16) &&
   1149       IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
   1150     ARGBSetRows_NEON(dst_argb, value, width, dst_stride_argb, height);
   1151     return 0;
   1152   }
   1153 #endif
   1154 #if defined(HAS_SETROW_X86)
   1155   if (TestCpuFlag(kCpuHasX86)) {
   1156     ARGBSetRows_X86(dst_argb, value, width, dst_stride_argb, height);
   1157     return 0;
   1158   }
   1159 #endif
   1160   ARGBSetRows_C(dst_argb, value, width, dst_stride_argb, height);
   1161   return 0;
   1162 }
   1163 
   1164 // Convert unattentuated ARGB to preattenuated ARGB.
   1165 // An unattenutated ARGB alpha blend uses the formula
   1166 // p = a * f + (1 - a) * b
   1167 // where
   1168 //   p is output pixel
   1169 //   f is foreground pixel
   1170 //   b is background pixel
   1171 //   a is alpha value from foreground pixel
   1172 // An preattenutated ARGB alpha blend uses the formula
   1173 // p = f + (1 - a) * b
   1174 // where
   1175 //   f is foreground pixel premultiplied by alpha
   1176 
   1177 LIBYUV_API
   1178 int ARGBAttenuate(const uint8* src_argb, int src_stride_argb,
   1179                   uint8* dst_argb, int dst_stride_argb,
   1180                   int width, int height) {
   1181   int y;
   1182   void (*ARGBAttenuateRow)(const uint8* src_argb, uint8* dst_argb,
   1183                            int width) = ARGBAttenuateRow_C;
   1184   if (!src_argb || !dst_argb || width <= 0 || height == 0) {
   1185     return -1;
   1186   }
   1187   if (height < 0) {
   1188     height = -height;
   1189     src_argb = src_argb + (height - 1) * src_stride_argb;
   1190     src_stride_argb = -src_stride_argb;
   1191   }
   1192   // Coalesce rows.
   1193   if (src_stride_argb == width * 4 &&
   1194       dst_stride_argb == width * 4) {
   1195     width *= height;
   1196     height = 1;
   1197     src_stride_argb = dst_stride_argb = 0;
   1198   }
   1199 #if defined(HAS_ARGBATTENUATEROW_SSE2)
   1200   if (TestCpuFlag(kCpuHasSSE2) && width >= 4 &&
   1201       IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16) &&
   1202       IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
   1203     ARGBAttenuateRow = ARGBAttenuateRow_Any_SSE2;
   1204     if (IS_ALIGNED(width, 4)) {
   1205       ARGBAttenuateRow = ARGBAttenuateRow_SSE2;
   1206     }
   1207   }
   1208 #endif
   1209 #if defined(HAS_ARGBATTENUATEROW_SSSE3)
   1210   if (TestCpuFlag(kCpuHasSSSE3) && width >= 4) {
   1211     ARGBAttenuateRow = ARGBAttenuateRow_Any_SSSE3;
   1212     if (IS_ALIGNED(width, 4)) {
   1213       ARGBAttenuateRow = ARGBAttenuateRow_SSSE3;
   1214     }
   1215   }
   1216 #endif
   1217 #if defined(HAS_ARGBATTENUATEROW_AVX2)
   1218   if (TestCpuFlag(kCpuHasAVX2) && width >= 8) {
   1219     ARGBAttenuateRow = ARGBAttenuateRow_Any_AVX2;
   1220     if (IS_ALIGNED(width, 8)) {
   1221       ARGBAttenuateRow = ARGBAttenuateRow_AVX2;
   1222     }
   1223   }
   1224 #endif
   1225 #if defined(HAS_ARGBATTENUATEROW_NEON)
   1226   if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
   1227     ARGBAttenuateRow = ARGBAttenuateRow_Any_NEON;
   1228     if (IS_ALIGNED(width, 8)) {
   1229       ARGBAttenuateRow = ARGBAttenuateRow_NEON;
   1230     }
   1231   }
   1232 #endif
   1233 
   1234   for (y = 0; y < height; ++y) {
   1235     ARGBAttenuateRow(src_argb, dst_argb, width);
   1236     src_argb += src_stride_argb;
   1237     dst_argb += dst_stride_argb;
   1238   }
   1239   return 0;
   1240 }
   1241 
   1242 // Convert preattentuated ARGB to unattenuated ARGB.
   1243 LIBYUV_API
   1244 int ARGBUnattenuate(const uint8* src_argb, int src_stride_argb,
   1245                     uint8* dst_argb, int dst_stride_argb,
   1246                     int width, int height) {
   1247   int y;
   1248   void (*ARGBUnattenuateRow)(const uint8* src_argb, uint8* dst_argb,
   1249                              int width) = ARGBUnattenuateRow_C;
   1250   if (!src_argb || !dst_argb || width <= 0 || height == 0) {
   1251     return -1;
   1252   }
   1253   if (height < 0) {
   1254     height = -height;
   1255     src_argb = src_argb + (height - 1) * src_stride_argb;
   1256     src_stride_argb = -src_stride_argb;
   1257   }
   1258   // Coalesce rows.
   1259   if (src_stride_argb == width * 4 &&
   1260       dst_stride_argb == width * 4) {
   1261     width *= height;
   1262     height = 1;
   1263     src_stride_argb = dst_stride_argb = 0;
   1264   }
   1265 #if defined(HAS_ARGBUNATTENUATEROW_SSE2)
   1266   if (TestCpuFlag(kCpuHasSSE2) && width >= 4) {
   1267     ARGBUnattenuateRow = ARGBUnattenuateRow_Any_SSE2;
   1268     if (IS_ALIGNED(width, 4)) {
   1269       ARGBUnattenuateRow = ARGBUnattenuateRow_SSE2;
   1270     }
   1271   }
   1272 #endif
   1273 #if defined(HAS_ARGBUNATTENUATEROW_AVX2)
   1274   if (TestCpuFlag(kCpuHasAVX2) && width >= 8) {
   1275     ARGBUnattenuateRow = ARGBUnattenuateRow_Any_AVX2;
   1276     if (IS_ALIGNED(width, 8)) {
   1277       ARGBUnattenuateRow = ARGBUnattenuateRow_AVX2;
   1278     }
   1279   }
   1280 #endif
   1281 // TODO(fbarchard): Neon version.
   1282 
   1283   for (y = 0; y < height; ++y) {
   1284     ARGBUnattenuateRow(src_argb, dst_argb, width);
   1285     src_argb += src_stride_argb;
   1286     dst_argb += dst_stride_argb;
   1287   }
   1288   return 0;
   1289 }
   1290 
   1291 // Convert ARGB to Grayed ARGB.
   1292 LIBYUV_API
   1293 int ARGBGrayTo(const uint8* src_argb, int src_stride_argb,
   1294                uint8* dst_argb, int dst_stride_argb,
   1295                int width, int height) {
   1296   int y;
   1297   void (*ARGBGrayRow)(const uint8* src_argb, uint8* dst_argb,
   1298                       int width) = ARGBGrayRow_C;
   1299   if (!src_argb || !dst_argb || width <= 0 || height == 0) {
   1300     return -1;
   1301   }
   1302   if (height < 0) {
   1303     height = -height;
   1304     src_argb = src_argb + (height - 1) * src_stride_argb;
   1305     src_stride_argb = -src_stride_argb;
   1306   }
   1307   // Coalesce rows.
   1308   if (src_stride_argb == width * 4 &&
   1309       dst_stride_argb == width * 4) {
   1310     width *= height;
   1311     height = 1;
   1312     src_stride_argb = dst_stride_argb = 0;
   1313   }
   1314 #if defined(HAS_ARGBGRAYROW_SSSE3)
   1315   if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 8) &&
   1316       IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16) &&
   1317       IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
   1318     ARGBGrayRow = ARGBGrayRow_SSSE3;
   1319   }
   1320 #elif defined(HAS_ARGBGRAYROW_NEON)
   1321   if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) {
   1322     ARGBGrayRow = ARGBGrayRow_NEON;
   1323   }
   1324 #endif
   1325 
   1326   for (y = 0; y < height; ++y) {
   1327     ARGBGrayRow(src_argb, dst_argb, width);
   1328     src_argb += src_stride_argb;
   1329     dst_argb += dst_stride_argb;
   1330   }
   1331   return 0;
   1332 }
   1333 
   1334 // Make a rectangle of ARGB gray scale.
   1335 LIBYUV_API
   1336 int ARGBGray(uint8* dst_argb, int dst_stride_argb,
   1337              int dst_x, int dst_y,
   1338              int width, int height) {
   1339   int y;
   1340   void (*ARGBGrayRow)(const uint8* src_argb, uint8* dst_argb,
   1341                       int width) = ARGBGrayRow_C;
   1342   uint8* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4;
   1343   if (!dst_argb || width <= 0 || height <= 0 || dst_x < 0 || dst_y < 0) {
   1344     return -1;
   1345   }
   1346   // Coalesce rows.
   1347   if (dst_stride_argb == width * 4) {
   1348     width *= height;
   1349     height = 1;
   1350     dst_stride_argb = 0;
   1351   }
   1352 #if defined(HAS_ARGBGRAYROW_SSSE3)
   1353   if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 8) &&
   1354       IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
   1355     ARGBGrayRow = ARGBGrayRow_SSSE3;
   1356   }
   1357 #elif defined(HAS_ARGBGRAYROW_NEON)
   1358   if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) {
   1359     ARGBGrayRow = ARGBGrayRow_NEON;
   1360   }
   1361 #endif
   1362   for (y = 0; y < height; ++y) {
   1363     ARGBGrayRow(dst, dst, width);
   1364     dst += dst_stride_argb;
   1365   }
   1366   return 0;
   1367 }
   1368 
   1369 // Make a rectangle of ARGB Sepia tone.
   1370 LIBYUV_API
   1371 int ARGBSepia(uint8* dst_argb, int dst_stride_argb,
   1372               int dst_x, int dst_y, int width, int height) {
   1373   int y;
   1374   void (*ARGBSepiaRow)(uint8* dst_argb, int width) = ARGBSepiaRow_C;
   1375   uint8* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4;
   1376   if (!dst_argb || width <= 0 || height <= 0 || dst_x < 0 || dst_y < 0) {
   1377     return -1;
   1378   }
   1379   // Coalesce rows.
   1380   if (dst_stride_argb == width * 4) {
   1381     width *= height;
   1382     height = 1;
   1383     dst_stride_argb = 0;
   1384   }
   1385 #if defined(HAS_ARGBSEPIAROW_SSSE3)
   1386   if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 8) &&
   1387       IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
   1388     ARGBSepiaRow = ARGBSepiaRow_SSSE3;
   1389   }
   1390 #elif defined(HAS_ARGBSEPIAROW_NEON)
   1391   if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) {
   1392     ARGBSepiaRow = ARGBSepiaRow_NEON;
   1393   }
   1394 #endif
   1395   for (y = 0; y < height; ++y) {
   1396     ARGBSepiaRow(dst, width);
   1397     dst += dst_stride_argb;
   1398   }
   1399   return 0;
   1400 }
   1401 
   1402 // Apply a 4x4 matrix to each ARGB pixel.
   1403 // Note: Normally for shading, but can be used to swizzle or invert.
   1404 LIBYUV_API
   1405 int ARGBColorMatrix(const uint8* src_argb, int src_stride_argb,
   1406                     uint8* dst_argb, int dst_stride_argb,
   1407                     const int8* matrix_argb,
   1408                     int width, int height) {
   1409   int y;
   1410   void (*ARGBColorMatrixRow)(const uint8* src_argb, uint8* dst_argb,
   1411       const int8* matrix_argb, int width) = ARGBColorMatrixRow_C;
   1412   if (!src_argb || !dst_argb || !matrix_argb || width <= 0 || height == 0) {
   1413     return -1;
   1414   }
   1415   if (height < 0) {
   1416     height = -height;
   1417     src_argb = src_argb + (height - 1) * src_stride_argb;
   1418     src_stride_argb = -src_stride_argb;
   1419   }
   1420   // Coalesce rows.
   1421   if (src_stride_argb == width * 4 &&
   1422       dst_stride_argb == width * 4) {
   1423     width *= height;
   1424     height = 1;
   1425     src_stride_argb = dst_stride_argb = 0;
   1426   }
   1427 #if defined(HAS_ARGBCOLORMATRIXROW_SSSE3)
   1428   if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 8) &&
   1429       IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
   1430     ARGBColorMatrixRow = ARGBColorMatrixRow_SSSE3;
   1431   }
   1432 #elif defined(HAS_ARGBCOLORMATRIXROW_NEON)
   1433   if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) {
   1434     ARGBColorMatrixRow = ARGBColorMatrixRow_NEON;
   1435   }
   1436 #endif
   1437   for (y = 0; y < height; ++y) {
   1438     ARGBColorMatrixRow(src_argb, dst_argb, matrix_argb, width);
   1439     src_argb += src_stride_argb;
   1440     dst_argb += dst_stride_argb;
   1441   }
   1442   return 0;
   1443 }
   1444 
   1445 // Apply a 4x3 matrix to each ARGB pixel.
   1446 // Deprecated.
   1447 LIBYUV_API
   1448 int RGBColorMatrix(uint8* dst_argb, int dst_stride_argb,
   1449                    const int8* matrix_rgb,
   1450                    int dst_x, int dst_y, int width, int height) {
   1451   SIMD_ALIGNED(int8 matrix_argb[16]);
   1452   uint8* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4;
   1453   if (!dst_argb || !matrix_rgb || width <= 0 || height <= 0 ||
   1454       dst_x < 0 || dst_y < 0) {
   1455     return -1;
   1456   }
   1457 
   1458   // Convert 4x3 7 bit matrix to 4x4 6 bit matrix.
   1459   matrix_argb[0] = matrix_rgb[0] / 2;
   1460   matrix_argb[1] = matrix_rgb[1] / 2;
   1461   matrix_argb[2] = matrix_rgb[2] / 2;
   1462   matrix_argb[3] = matrix_rgb[3] / 2;
   1463   matrix_argb[4] = matrix_rgb[4] / 2;
   1464   matrix_argb[5] = matrix_rgb[5] / 2;
   1465   matrix_argb[6] = matrix_rgb[6] / 2;
   1466   matrix_argb[7] = matrix_rgb[7] / 2;
   1467   matrix_argb[8] = matrix_rgb[8] / 2;
   1468   matrix_argb[9] = matrix_rgb[9] / 2;
   1469   matrix_argb[10] = matrix_rgb[10] / 2;
   1470   matrix_argb[11] = matrix_rgb[11] / 2;
   1471   matrix_argb[14] = matrix_argb[13] = matrix_argb[12] = 0;
   1472   matrix_argb[15] = 64;  // 1.0
   1473 
   1474   return ARGBColorMatrix((const uint8*)(dst), dst_stride_argb,
   1475                          dst, dst_stride_argb,
   1476                          &matrix_argb[0], width, height);
   1477 }
   1478 
   1479 // Apply a color table each ARGB pixel.
   1480 // Table contains 256 ARGB values.
   1481 LIBYUV_API
   1482 int ARGBColorTable(uint8* dst_argb, int dst_stride_argb,
   1483                    const uint8* table_argb,
   1484                    int dst_x, int dst_y, int width, int height) {
   1485   int y;
   1486   void (*ARGBColorTableRow)(uint8* dst_argb, const uint8* table_argb,
   1487                             int width) = ARGBColorTableRow_C;
   1488   uint8* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4;
   1489   if (!dst_argb || !table_argb || width <= 0 || height <= 0 ||
   1490       dst_x < 0 || dst_y < 0) {
   1491     return -1;
   1492   }
   1493   // Coalesce rows.
   1494   if (dst_stride_argb == width * 4) {
   1495     width *= height;
   1496     height = 1;
   1497     dst_stride_argb = 0;
   1498   }
   1499 #if defined(HAS_ARGBCOLORTABLEROW_X86)
   1500   if (TestCpuFlag(kCpuHasX86)) {
   1501     ARGBColorTableRow = ARGBColorTableRow_X86;
   1502   }
   1503 #endif
   1504   for (y = 0; y < height; ++y) {
   1505     ARGBColorTableRow(dst, table_argb, width);
   1506     dst += dst_stride_argb;
   1507   }
   1508   return 0;
   1509 }
   1510 
   1511 // Apply a color table each ARGB pixel but preserve destination alpha.
   1512 // Table contains 256 ARGB values.
   1513 LIBYUV_API
   1514 int RGBColorTable(uint8* dst_argb, int dst_stride_argb,
   1515                   const uint8* table_argb,
   1516                   int dst_x, int dst_y, int width, int height) {
   1517   int y;
   1518   void (*RGBColorTableRow)(uint8* dst_argb, const uint8* table_argb,
   1519                            int width) = RGBColorTableRow_C;
   1520   uint8* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4;
   1521   if (!dst_argb || !table_argb || width <= 0 || height <= 0 ||
   1522       dst_x < 0 || dst_y < 0) {
   1523     return -1;
   1524   }
   1525   // Coalesce rows.
   1526   if (dst_stride_argb == width * 4) {
   1527     width *= height;
   1528     height = 1;
   1529     dst_stride_argb = 0;
   1530   }
   1531 #if defined(HAS_RGBCOLORTABLEROW_X86)
   1532   if (TestCpuFlag(kCpuHasX86)) {
   1533     RGBColorTableRow = RGBColorTableRow_X86;
   1534   }
   1535 #endif
   1536   for (y = 0; y < height; ++y) {
   1537     RGBColorTableRow(dst, table_argb, width);
   1538     dst += dst_stride_argb;
   1539   }
   1540   return 0;
   1541 }
   1542 
   1543 // ARGBQuantize is used to posterize art.
   1544 // e.g. rgb / qvalue * qvalue + qvalue / 2
   1545 // But the low levels implement efficiently with 3 parameters, and could be
   1546 // used for other high level operations.
   1547 // dst_argb[0] = (b * scale >> 16) * interval_size + interval_offset;
   1548 // where scale is 1 / interval_size as a fixed point value.
   1549 // The divide is replaces with a multiply by reciprocal fixed point multiply.
   1550 // Caveat - although SSE2 saturates, the C function does not and should be used
   1551 // with care if doing anything but quantization.
   1552 LIBYUV_API
   1553 int ARGBQuantize(uint8* dst_argb, int dst_stride_argb,
   1554                  int scale, int interval_size, int interval_offset,
   1555                  int dst_x, int dst_y, int width, int height) {
   1556   int y;
   1557   void (*ARGBQuantizeRow)(uint8* dst_argb, int scale, int interval_size,
   1558                           int interval_offset, int width) = ARGBQuantizeRow_C;
   1559   uint8* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4;
   1560   if (!dst_argb || width <= 0 || height <= 0 || dst_x < 0 || dst_y < 0 ||
   1561       interval_size < 1 || interval_size > 255) {
   1562     return -1;
   1563   }
   1564   // Coalesce rows.
   1565   if (dst_stride_argb == width * 4) {
   1566     width *= height;
   1567     height = 1;
   1568     dst_stride_argb = 0;
   1569   }
   1570 #if defined(HAS_ARGBQUANTIZEROW_SSE2)
   1571   if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 4) &&
   1572       IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
   1573     ARGBQuantizeRow = ARGBQuantizeRow_SSE2;
   1574   }
   1575 #elif defined(HAS_ARGBQUANTIZEROW_NEON)
   1576   if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) {
   1577     ARGBQuantizeRow = ARGBQuantizeRow_NEON;
   1578   }
   1579 #endif
   1580   for (y = 0; y < height; ++y) {
   1581     ARGBQuantizeRow(dst, scale, interval_size, interval_offset, width);
   1582     dst += dst_stride_argb;
   1583   }
   1584   return 0;
   1585 }
   1586 
   1587 // Computes table of cumulative sum for image where the value is the sum
   1588 // of all values above and to the left of the entry. Used by ARGBBlur.
   1589 LIBYUV_API
   1590 int ARGBComputeCumulativeSum(const uint8* src_argb, int src_stride_argb,
   1591                              int32* dst_cumsum, int dst_stride32_cumsum,
   1592                              int width, int height) {
   1593   int y;
   1594   void (*ComputeCumulativeSumRow)(const uint8* row, int32* cumsum,
   1595       const int32* previous_cumsum, int width) = ComputeCumulativeSumRow_C;
   1596   int32* previous_cumsum = dst_cumsum;
   1597   if (!dst_cumsum || !src_argb || width <= 0 || height <= 0) {
   1598     return -1;
   1599   }
   1600 #if defined(HAS_CUMULATIVESUMTOAVERAGEROW_SSE2)
   1601   if (TestCpuFlag(kCpuHasSSE2)) {
   1602     ComputeCumulativeSumRow = ComputeCumulativeSumRow_SSE2;
   1603   }
   1604 #endif
   1605   memset(dst_cumsum, 0, width * sizeof(dst_cumsum[0]) * 4);  // 4 int per pixel.
   1606   for (y = 0; y < height; ++y) {
   1607     ComputeCumulativeSumRow(src_argb, dst_cumsum, previous_cumsum, width);
   1608     previous_cumsum = dst_cumsum;
   1609     dst_cumsum += dst_stride32_cumsum;
   1610     src_argb += src_stride_argb;
   1611   }
   1612   return 0;
   1613 }
   1614 
   1615 // Blur ARGB image.
   1616 // Caller should allocate CumulativeSum table of width * height * 16 bytes
   1617 // aligned to 16 byte boundary. height can be radius * 2 + 2 to save memory
   1618 // as the buffer is treated as circular.
   1619 LIBYUV_API
   1620 int ARGBBlur(const uint8* src_argb, int src_stride_argb,
   1621              uint8* dst_argb, int dst_stride_argb,
   1622              int32* dst_cumsum, int dst_stride32_cumsum,
   1623              int width, int height, int radius) {
   1624   int y;
   1625   void (*ComputeCumulativeSumRow)(const uint8 *row, int32 *cumsum,
   1626       const int32* previous_cumsum, int width) = ComputeCumulativeSumRow_C;
   1627   void (*CumulativeSumToAverageRow)(const int32* topleft, const int32* botleft,
   1628       int width, int area, uint8* dst, int count) = CumulativeSumToAverageRow_C;
   1629   int32* cumsum_bot_row;
   1630   int32* max_cumsum_bot_row;
   1631   int32* cumsum_top_row;
   1632 
   1633   if (!src_argb || !dst_argb || width <= 0 || height == 0) {
   1634     return -1;
   1635   }
   1636   if (height < 0) {
   1637     height = -height;
   1638     src_argb = src_argb + (height - 1) * src_stride_argb;
   1639     src_stride_argb = -src_stride_argb;
   1640   }
   1641   if (radius > height) {
   1642     radius = height;
   1643   }
   1644   if (radius > (width / 2 - 1)) {
   1645     radius = width / 2 - 1;
   1646   }
   1647   if (radius <= 0) {
   1648     return -1;
   1649   }
   1650 #if defined(HAS_CUMULATIVESUMTOAVERAGEROW_SSE2)
   1651   if (TestCpuFlag(kCpuHasSSE2)) {
   1652     ComputeCumulativeSumRow = ComputeCumulativeSumRow_SSE2;
   1653     CumulativeSumToAverageRow = CumulativeSumToAverageRow_SSE2;
   1654   }
   1655 #endif
   1656   // Compute enough CumulativeSum for first row to be blurred. After this
   1657   // one row of CumulativeSum is updated at a time.
   1658   ARGBComputeCumulativeSum(src_argb, src_stride_argb,
   1659                            dst_cumsum, dst_stride32_cumsum,
   1660                            width, radius);
   1661 
   1662   src_argb = src_argb + radius * src_stride_argb;
   1663   cumsum_bot_row = &dst_cumsum[(radius - 1) * dst_stride32_cumsum];
   1664 
   1665   max_cumsum_bot_row = &dst_cumsum[(radius * 2 + 2) * dst_stride32_cumsum];
   1666   cumsum_top_row = &dst_cumsum[0];
   1667 
   1668   for (y = 0; y < height; ++y) {
   1669     int top_y = ((y - radius - 1) >= 0) ? (y - radius - 1) : 0;
   1670     int bot_y = ((y + radius) < height) ? (y + radius) : (height - 1);
   1671     int area = radius * (bot_y - top_y);
   1672     int boxwidth = radius * 4;
   1673     int x;
   1674     int n;
   1675 
   1676     // Increment cumsum_top_row pointer with circular buffer wrap around.
   1677     if (top_y) {
   1678       cumsum_top_row += dst_stride32_cumsum;
   1679       if (cumsum_top_row >= max_cumsum_bot_row) {
   1680         cumsum_top_row = dst_cumsum;
   1681       }
   1682     }
   1683     // Increment cumsum_bot_row pointer with circular buffer wrap around and
   1684     // then fill in a row of CumulativeSum.
   1685     if ((y + radius) < height) {
   1686       const int32* prev_cumsum_bot_row = cumsum_bot_row;
   1687       cumsum_bot_row += dst_stride32_cumsum;
   1688       if (cumsum_bot_row >= max_cumsum_bot_row) {
   1689         cumsum_bot_row = dst_cumsum;
   1690       }
   1691       ComputeCumulativeSumRow(src_argb, cumsum_bot_row, prev_cumsum_bot_row,
   1692                               width);
   1693       src_argb += src_stride_argb;
   1694     }
   1695 
   1696     // Left clipped.
   1697     for (x = 0; x < radius + 1; ++x) {
   1698       CumulativeSumToAverageRow(cumsum_top_row, cumsum_bot_row,
   1699                                 boxwidth, area, &dst_argb[x * 4], 1);
   1700       area += (bot_y - top_y);
   1701       boxwidth += 4;
   1702     }
   1703 
   1704     // Middle unclipped.
   1705     n = (width - 1) - radius - x + 1;
   1706     CumulativeSumToAverageRow(cumsum_top_row, cumsum_bot_row,
   1707                               boxwidth, area, &dst_argb[x * 4], n);
   1708 
   1709     // Right clipped.
   1710     for (x += n; x <= width - 1; ++x) {
   1711       area -= (bot_y - top_y);
   1712       boxwidth -= 4;
   1713       CumulativeSumToAverageRow(cumsum_top_row + (x - radius - 1) * 4,
   1714                                 cumsum_bot_row + (x - radius - 1) * 4,
   1715                                 boxwidth, area, &dst_argb[x * 4], 1);
   1716     }
   1717     dst_argb += dst_stride_argb;
   1718   }
   1719   return 0;
   1720 }
   1721 
   1722 // Multiply ARGB image by a specified ARGB value.
   1723 LIBYUV_API
   1724 int ARGBShade(const uint8* src_argb, int src_stride_argb,
   1725               uint8* dst_argb, int dst_stride_argb,
   1726               int width, int height, uint32 value) {
   1727   int y;
   1728   void (*ARGBShadeRow)(const uint8* src_argb, uint8* dst_argb,
   1729                        int width, uint32 value) = ARGBShadeRow_C;
   1730   if (!src_argb || !dst_argb || width <= 0 || height == 0 || value == 0u) {
   1731     return -1;
   1732   }
   1733   if (height < 0) {
   1734     height = -height;
   1735     src_argb = src_argb + (height - 1) * src_stride_argb;
   1736     src_stride_argb = -src_stride_argb;
   1737   }
   1738   // Coalesce rows.
   1739   if (src_stride_argb == width * 4 &&
   1740       dst_stride_argb == width * 4) {
   1741     width *= height;
   1742     height = 1;
   1743     src_stride_argb = dst_stride_argb = 0;
   1744   }
   1745 #if defined(HAS_ARGBSHADEROW_SSE2)
   1746   if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 4) &&
   1747       IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16) &&
   1748       IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
   1749     ARGBShadeRow = ARGBShadeRow_SSE2;
   1750   }
   1751 #elif defined(HAS_ARGBSHADEROW_NEON)
   1752   if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) {
   1753     ARGBShadeRow = ARGBShadeRow_NEON;
   1754   }
   1755 #endif
   1756 
   1757   for (y = 0; y < height; ++y) {
   1758     ARGBShadeRow(src_argb, dst_argb, width, value);
   1759     src_argb += src_stride_argb;
   1760     dst_argb += dst_stride_argb;
   1761   }
   1762   return 0;
   1763 }
   1764 
   1765 // Interpolate 2 ARGB images by specified amount (0 to 255).
   1766 LIBYUV_API
   1767 int ARGBInterpolate(const uint8* src_argb0, int src_stride_argb0,
   1768                     const uint8* src_argb1, int src_stride_argb1,
   1769                     uint8* dst_argb, int dst_stride_argb,
   1770                     int width, int height, int interpolation) {
   1771   int y;
   1772   void (*InterpolateRow)(uint8* dst_ptr, const uint8* src_ptr,
   1773                          ptrdiff_t src_stride, int dst_width,
   1774                          int source_y_fraction) = InterpolateRow_C;
   1775   if (!src_argb0 || !src_argb1 || !dst_argb || width <= 0 || height == 0) {
   1776     return -1;
   1777   }
   1778   // Negative height means invert the image.
   1779   if (height < 0) {
   1780     height = -height;
   1781     dst_argb = dst_argb + (height - 1) * dst_stride_argb;
   1782     dst_stride_argb = -dst_stride_argb;
   1783   }
   1784   // Coalesce rows.
   1785   if (src_stride_argb0 == width * 4 &&
   1786       src_stride_argb1 == width * 4 &&
   1787       dst_stride_argb == width * 4) {
   1788     width *= height;
   1789     height = 1;
   1790     src_stride_argb0 = src_stride_argb1 = dst_stride_argb = 0;
   1791   }
   1792 #if defined(HAS_INTERPOLATEROW_SSE2)
   1793   if (TestCpuFlag(kCpuHasSSE2) && width >= 4) {
   1794     InterpolateRow = InterpolateRow_Any_SSE2;
   1795     if (IS_ALIGNED(width, 4)) {
   1796       InterpolateRow = InterpolateRow_Unaligned_SSE2;
   1797       if (IS_ALIGNED(src_argb0, 16) && IS_ALIGNED(src_stride_argb0, 16) &&
   1798           IS_ALIGNED(src_argb1, 16) && IS_ALIGNED(src_stride_argb1, 16) &&
   1799           IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
   1800         InterpolateRow = InterpolateRow_SSE2;
   1801       }
   1802     }
   1803   }
   1804 #endif
   1805 #if defined(HAS_INTERPOLATEROW_SSSE3)
   1806   if (TestCpuFlag(kCpuHasSSSE3) && width >= 4) {
   1807     InterpolateRow = InterpolateRow_Any_SSSE3;
   1808     if (IS_ALIGNED(width, 4)) {
   1809       InterpolateRow = InterpolateRow_Unaligned_SSSE3;
   1810       if (IS_ALIGNED(src_argb0, 16) && IS_ALIGNED(src_stride_argb0, 16) &&
   1811           IS_ALIGNED(src_argb1, 16) && IS_ALIGNED(src_stride_argb1, 16) &&
   1812           IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
   1813         InterpolateRow = InterpolateRow_SSSE3;
   1814       }
   1815     }
   1816   }
   1817 #endif
   1818 #if defined(HAS_INTERPOLATEROW_AVX2)
   1819   if (TestCpuFlag(kCpuHasAVX2) && width >= 8) {
   1820     InterpolateRow = InterpolateRow_Any_AVX2;
   1821     if (IS_ALIGNED(width, 8)) {
   1822       InterpolateRow = InterpolateRow_AVX2;
   1823     }
   1824   }
   1825 #endif
   1826 #if defined(HAS_INTERPOLATEROW_NEON)
   1827   if (TestCpuFlag(kCpuHasNEON) && width >= 4) {
   1828     InterpolateRow = InterpolateRow_Any_NEON;
   1829     if (IS_ALIGNED(width, 4)) {
   1830       InterpolateRow = InterpolateRow_NEON;
   1831     }
   1832   }
   1833 #endif
   1834 #if defined(HAS_INTERPOLATEROWS_MIPS_DSPR2)
   1835   if (TestCpuFlag(kCpuHasMIPS_DSPR2) && width >= 1 &&
   1836       IS_ALIGNED(src_argb0, 4) && IS_ALIGNED(src_stride_argb0, 4) &&
   1837       IS_ALIGNED(src_argb1, 4) && IS_ALIGNED(src_stride_argb1, 4) &&
   1838       IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride_argb, 4)) {
   1839     ScaleARGBFilterRows = InterpolateRow_MIPS_DSPR2;
   1840   }
   1841 #endif
   1842 
   1843   for (y = 0; y < height; ++y) {
   1844     InterpolateRow(dst_argb, src_argb0, src_argb1 - src_argb0,
   1845                    width * 4, interpolation);
   1846     src_argb0 += src_stride_argb0;
   1847     src_argb1 += src_stride_argb1;
   1848     dst_argb += dst_stride_argb;
   1849   }
   1850   return 0;
   1851 }
   1852 
   1853 // Shuffle ARGB channel order.  e.g. BGRA to ARGB.
   1854 LIBYUV_API
   1855 int ARGBShuffle(const uint8* src_bgra, int src_stride_bgra,
   1856                 uint8* dst_argb, int dst_stride_argb,
   1857                 const uint8* shuffler, int width, int height) {
   1858   int y;
   1859   void (*ARGBShuffleRow)(const uint8* src_bgra, uint8* dst_argb,
   1860                          const uint8* shuffler, int pix) = ARGBShuffleRow_C;
   1861   if (!src_bgra || !dst_argb ||
   1862       width <= 0 || height == 0) {
   1863     return -1;
   1864   }
   1865   // Negative height means invert the image.
   1866   if (height < 0) {
   1867     height = -height;
   1868     src_bgra = src_bgra + (height - 1) * src_stride_bgra;
   1869     src_stride_bgra = -src_stride_bgra;
   1870   }
   1871   // Coalesce rows.
   1872   if (src_stride_bgra == width * 4 &&
   1873       dst_stride_argb == width * 4) {
   1874     width *= height;
   1875     height = 1;
   1876     src_stride_bgra = dst_stride_argb = 0;
   1877   }
   1878 #if defined(HAS_ARGBSHUFFLEROW_SSE2)
   1879   if (TestCpuFlag(kCpuHasSSE2) && width >= 4) {
   1880     ARGBShuffleRow = ARGBShuffleRow_Any_SSE2;
   1881     if (IS_ALIGNED(width, 4)) {
   1882       ARGBShuffleRow = ARGBShuffleRow_SSE2;
   1883     }
   1884   }
   1885 #endif
   1886 #if defined(HAS_ARGBSHUFFLEROW_SSSE3)
   1887   if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
   1888     ARGBShuffleRow = ARGBShuffleRow_Any_SSSE3;
   1889     if (IS_ALIGNED(width, 8)) {
   1890       ARGBShuffleRow = ARGBShuffleRow_Unaligned_SSSE3;
   1891       if (IS_ALIGNED(src_bgra, 16) && IS_ALIGNED(src_stride_bgra, 16) &&
   1892           IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
   1893         ARGBShuffleRow = ARGBShuffleRow_SSSE3;
   1894       }
   1895     }
   1896   }
   1897 #endif
   1898 #if defined(HAS_ARGBSHUFFLEROW_AVX2)
   1899   if (TestCpuFlag(kCpuHasAVX2) && width >= 16) {
   1900     ARGBShuffleRow = ARGBShuffleRow_Any_AVX2;
   1901     if (IS_ALIGNED(width, 16)) {
   1902       ARGBShuffleRow = ARGBShuffleRow_AVX2;
   1903     }
   1904   }
   1905 #endif
   1906 #if defined(HAS_ARGBSHUFFLEROW_NEON)
   1907   if (TestCpuFlag(kCpuHasNEON) && width >= 4) {
   1908     ARGBShuffleRow = ARGBShuffleRow_Any_NEON;
   1909     if (IS_ALIGNED(width, 4)) {
   1910       ARGBShuffleRow = ARGBShuffleRow_NEON;
   1911     }
   1912   }
   1913 #endif
   1914 
   1915   for (y = 0; y < height; ++y) {
   1916     ARGBShuffleRow(src_bgra, dst_argb, shuffler, width);
   1917     src_bgra += src_stride_bgra;
   1918     dst_argb += dst_stride_argb;
   1919   }
   1920   return 0;
   1921 }
   1922 
   1923 // Sobel ARGB effect.
   1924 static int ARGBSobelize(const uint8* src_argb, int src_stride_argb,
   1925                         uint8* dst_argb, int dst_stride_argb,
   1926                         int width, int height,
   1927                         void (*SobelRow)(const uint8* src_sobelx,
   1928                                          const uint8* src_sobely,
   1929                                          uint8* dst, int width)) {
   1930   int y;
   1931   void (*ARGBToBayerRow)(const uint8* src_argb, uint8* dst_bayer,
   1932                          uint32 selector, int pix) = ARGBToBayerGGRow_C;
   1933   void (*SobelYRow)(const uint8* src_y0, const uint8* src_y1,
   1934                     uint8* dst_sobely, int width) = SobelYRow_C;
   1935   void (*SobelXRow)(const uint8* src_y0, const uint8* src_y1,
   1936                     const uint8* src_y2, uint8* dst_sobely, int width) =
   1937       SobelXRow_C;
   1938   const int kEdge = 16;  // Extra pixels at start of row for extrude/align.
   1939   if (!src_argb  || !dst_argb || width <= 0 || height == 0) {
   1940     return -1;
   1941   }
   1942   // Negative height means invert the image.
   1943   if (height < 0) {
   1944     height = -height;
   1945     src_argb  = src_argb  + (height - 1) * src_stride_argb;
   1946     src_stride_argb = -src_stride_argb;
   1947   }
   1948   // ARGBToBayer used to select G channel from ARGB.
   1949 #if defined(HAS_ARGBTOBAYERGGROW_SSE2)
   1950   if (TestCpuFlag(kCpuHasSSE2) && width >= 8 &&
   1951       IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16)) {
   1952     ARGBToBayerRow = ARGBToBayerGGRow_Any_SSE2;
   1953     if (IS_ALIGNED(width, 8)) {
   1954       ARGBToBayerRow = ARGBToBayerGGRow_SSE2;
   1955     }
   1956   }
   1957 #endif
   1958 #if defined(HAS_ARGBTOBAYERROW_SSSE3)
   1959   if (TestCpuFlag(kCpuHasSSSE3) && width >= 8 &&
   1960       IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16)) {
   1961     ARGBToBayerRow = ARGBToBayerRow_Any_SSSE3;
   1962     if (IS_ALIGNED(width, 8)) {
   1963       ARGBToBayerRow = ARGBToBayerRow_SSSE3;
   1964     }
   1965   }
   1966 #endif
   1967 #if defined(HAS_ARGBTOBAYERGGROW_NEON)
   1968   if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
   1969     ARGBToBayerRow = ARGBToBayerGGRow_Any_NEON;
   1970     if (IS_ALIGNED(width, 8)) {
   1971       ARGBToBayerRow = ARGBToBayerGGRow_NEON;
   1972     }
   1973   }
   1974 #endif
   1975 #if defined(HAS_SOBELYROW_SSE2)
   1976   if (TestCpuFlag(kCpuHasSSE2)) {
   1977     SobelYRow = SobelYRow_SSE2;
   1978   }
   1979 #endif
   1980 #if defined(HAS_SOBELYROW_NEON)
   1981   if (TestCpuFlag(kCpuHasNEON)) {
   1982     SobelYRow = SobelYRow_NEON;
   1983   }
   1984 #endif
   1985 #if defined(HAS_SOBELXROW_SSE2)
   1986   if (TestCpuFlag(kCpuHasSSE2)) {
   1987     SobelXRow = SobelXRow_SSE2;
   1988   }
   1989 #endif
   1990 #if defined(HAS_SOBELXROW_NEON)
   1991   if (TestCpuFlag(kCpuHasNEON)) {
   1992     SobelXRow = SobelXRow_NEON;
   1993   }
   1994 #endif
   1995   {
   1996     // 3 rows with edges before/after.
   1997     const int kRowSize = (width + kEdge + 15) & ~15;
   1998     align_buffer_64(rows, kRowSize * 2 + (kEdge + kRowSize * 3 + kEdge));
   1999     uint8* row_sobelx = rows;
   2000     uint8* row_sobely = rows + kRowSize;
   2001     uint8* row_y = rows + kRowSize * 2;
   2002 
   2003     // Convert first row.
   2004     uint8* row_y0 = row_y + kEdge;
   2005     uint8* row_y1 = row_y0 + kRowSize;
   2006     uint8* row_y2 = row_y1 + kRowSize;
   2007     ARGBToBayerRow(src_argb, row_y0, 0x0d090501, width);
   2008     row_y0[-1] = row_y0[0];
   2009     memset(row_y0 + width, row_y0[width - 1], 16);  // Extrude 16 for valgrind.
   2010     ARGBToBayerRow(src_argb, row_y1, 0x0d090501, width);
   2011     row_y1[-1] = row_y1[0];
   2012     memset(row_y1 + width, row_y1[width - 1], 16);
   2013     memset(row_y2 + width, 0, 16);
   2014 
   2015     for (y = 0; y < height; ++y) {
   2016       // Convert next row of ARGB to Y.
   2017       if (y < (height - 1)) {
   2018         src_argb += src_stride_argb;
   2019       }
   2020       ARGBToBayerRow(src_argb, row_y2, 0x0d090501, width);
   2021       row_y2[-1] = row_y2[0];
   2022       row_y2[width] = row_y2[width - 1];
   2023 
   2024       SobelXRow(row_y0 - 1, row_y1 - 1, row_y2 - 1, row_sobelx, width);
   2025       SobelYRow(row_y0 - 1, row_y2 - 1, row_sobely, width);
   2026       SobelRow(row_sobelx, row_sobely, dst_argb, width);
   2027 
   2028       // Cycle thru circular queue of 3 row_y buffers.
   2029       {
   2030         uint8* row_yt = row_y0;
   2031         row_y0 = row_y1;
   2032         row_y1 = row_y2;
   2033         row_y2 = row_yt;
   2034       }
   2035 
   2036       dst_argb += dst_stride_argb;
   2037     }
   2038     free_aligned_buffer_64(rows);
   2039   }
   2040   return 0;
   2041 }
   2042 
   2043 // Sobel ARGB effect.
   2044 LIBYUV_API
   2045 int ARGBSobel(const uint8* src_argb, int src_stride_argb,
   2046               uint8* dst_argb, int dst_stride_argb,
   2047               int width, int height) {
   2048   void (*SobelRow)(const uint8* src_sobelx, const uint8* src_sobely,
   2049                    uint8* dst_argb, int width) = SobelRow_C;
   2050 #if defined(HAS_SOBELROW_SSE2)
   2051   if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 16) &&
   2052       IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
   2053     SobelRow = SobelRow_SSE2;
   2054   }
   2055 #endif
   2056 #if defined(HAS_SOBELROW_NEON)
   2057   if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) {
   2058     SobelRow = SobelRow_NEON;
   2059   }
   2060 #endif
   2061   return ARGBSobelize(src_argb, src_stride_argb, dst_argb, dst_stride_argb,
   2062                       width, height, SobelRow);
   2063 }
   2064 
   2065 // Sobel ARGB effect with planar output.
   2066 LIBYUV_API
   2067 int ARGBSobelToPlane(const uint8* src_argb, int src_stride_argb,
   2068                      uint8* dst_y, int dst_stride_y,
   2069                      int width, int height) {
   2070   void (*SobelToPlaneRow)(const uint8* src_sobelx, const uint8* src_sobely,
   2071                           uint8* dst_, int width) = SobelToPlaneRow_C;
   2072 #if defined(HAS_SOBELTOPLANEROW_SSE2)
   2073   if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 16) &&
   2074       IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
   2075     SobelToPlaneRow = SobelToPlaneRow_SSE2;
   2076   }
   2077 #endif
   2078 #if defined(HAS_SOBELTOPLANEROW_NEON)
   2079   if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 16)) {
   2080     SobelToPlaneRow = SobelToPlaneRow_NEON;
   2081   }
   2082 #endif
   2083   return ARGBSobelize(src_argb, src_stride_argb, dst_y, dst_stride_y,
   2084                       width, height, SobelToPlaneRow);
   2085 }
   2086 
   2087 // SobelXY ARGB effect.
   2088 // Similar to Sobel, but also stores Sobel X in R and Sobel Y in B.  G = Sobel.
   2089 LIBYUV_API
   2090 int ARGBSobelXY(const uint8* src_argb, int src_stride_argb,
   2091                 uint8* dst_argb, int dst_stride_argb,
   2092                 int width, int height) {
   2093   void (*SobelXYRow)(const uint8* src_sobelx, const uint8* src_sobely,
   2094                      uint8* dst_argb, int width) = SobelXYRow_C;
   2095 #if defined(HAS_SOBELXYROW_SSE2)
   2096   if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 16) &&
   2097       IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
   2098     SobelXYRow = SobelXYRow_SSE2;
   2099   }
   2100 #endif
   2101 #if defined(HAS_SOBELXYROW_NEON)
   2102   if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) {
   2103     SobelXYRow = SobelXYRow_NEON;
   2104   }
   2105 #endif
   2106   return ARGBSobelize(src_argb, src_stride_argb, dst_argb, dst_stride_argb,
   2107                       width, height, SobelXYRow);
   2108 }
   2109 
   2110 // Apply a 4x4 polynomial to each ARGB pixel.
   2111 LIBYUV_API
   2112 int ARGBPolynomial(const uint8* src_argb, int src_stride_argb,
   2113                    uint8* dst_argb, int dst_stride_argb,
   2114                    const float* poly,
   2115                    int width, int height) {
   2116   int y;
   2117   void (*ARGBPolynomialRow)(const uint8* src_argb,
   2118                             uint8* dst_argb, const float* poly,
   2119                             int width) = ARGBPolynomialRow_C;
   2120   if (!src_argb || !dst_argb || !poly || width <= 0 || height == 0) {
   2121     return -1;
   2122   }
   2123   // Negative height means invert the image.
   2124   if (height < 0) {
   2125     height = -height;
   2126     src_argb  = src_argb  + (height - 1) * src_stride_argb;
   2127     src_stride_argb = -src_stride_argb;
   2128   }
   2129   // Coalesce rows.
   2130   if (src_stride_argb == width * 4 &&
   2131       dst_stride_argb == width * 4) {
   2132     width *= height;
   2133     height = 1;
   2134     src_stride_argb = dst_stride_argb = 0;
   2135   }
   2136 #if defined(HAS_ARGBPOLYNOMIALROW_SSE2)
   2137   if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 2)) {
   2138     ARGBPolynomialRow = ARGBPolynomialRow_SSE2;
   2139   }
   2140 #endif
   2141 #if defined(HAS_ARGBPOLYNOMIALROW_AVX2)
   2142   if (TestCpuFlag(kCpuHasAVX2) && TestCpuFlag(kCpuHasFMA3) &&
   2143       IS_ALIGNED(width, 2)) {
   2144     ARGBPolynomialRow = ARGBPolynomialRow_AVX2;
   2145   }
   2146 #endif
   2147 
   2148   for (y = 0; y < height; ++y) {
   2149     ARGBPolynomialRow(src_argb, dst_argb, poly, width);
   2150     src_argb += src_stride_argb;
   2151     dst_argb += dst_stride_argb;
   2152   }
   2153   return 0;
   2154 }
   2155 
   2156 // Apply a lumacolortable to each ARGB pixel.
   2157 LIBYUV_API
   2158 int ARGBLumaColorTable(const uint8* src_argb, int src_stride_argb,
   2159                        uint8* dst_argb, int dst_stride_argb,
   2160                        const uint8* luma,
   2161                        int width, int height) {
   2162   int y;
   2163   void (*ARGBLumaColorTableRow)(const uint8* src_argb, uint8* dst_argb,
   2164       int width, const uint8* luma, const uint32 lumacoeff) =
   2165       ARGBLumaColorTableRow_C;
   2166   if (!src_argb || !dst_argb || !luma || width <= 0 || height == 0) {
   2167     return -1;
   2168   }
   2169   // Negative height means invert the image.
   2170   if (height < 0) {
   2171     height = -height;
   2172     src_argb  = src_argb  + (height - 1) * src_stride_argb;
   2173     src_stride_argb = -src_stride_argb;
   2174   }
   2175   // Coalesce rows.
   2176   if (src_stride_argb == width * 4 &&
   2177       dst_stride_argb == width * 4) {
   2178     width *= height;
   2179     height = 1;
   2180     src_stride_argb = dst_stride_argb = 0;
   2181   }
   2182 #if defined(HAS_ARGBLUMACOLORTABLEROW_SSSE3)
   2183   if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 4)) {
   2184     ARGBLumaColorTableRow = ARGBLumaColorTableRow_SSSE3;
   2185   }
   2186 #endif
   2187 
   2188   for (y = 0; y < height; ++y) {
   2189     ARGBLumaColorTableRow(src_argb, dst_argb, width, luma, 0x00264b0f);
   2190     src_argb += src_stride_argb;
   2191     dst_argb += dst_stride_argb;
   2192   }
   2193   return 0;
   2194 }
   2195 
   2196 // Copy Alpha from one ARGB image to another.
   2197 LIBYUV_API
   2198 int ARGBCopyAlpha(const uint8* src_argb, int src_stride_argb,
   2199                   uint8* dst_argb, int dst_stride_argb,
   2200                   int width, int height) {
   2201   int y;
   2202   void (*ARGBCopyAlphaRow)(const uint8* src_argb, uint8* dst_argb, int width) =
   2203       ARGBCopyAlphaRow_C;
   2204   if (!src_argb || !dst_argb || width <= 0 || height == 0) {
   2205     return -1;
   2206   }
   2207   // Negative height means invert the image.
   2208   if (height < 0) {
   2209     height = -height;
   2210     src_argb = src_argb + (height - 1) * src_stride_argb;
   2211     src_stride_argb = -src_stride_argb;
   2212   }
   2213   // Coalesce rows.
   2214   if (src_stride_argb == width * 4 &&
   2215       dst_stride_argb == width * 4) {
   2216     width *= height;
   2217     height = 1;
   2218     src_stride_argb = dst_stride_argb = 0;
   2219   }
   2220 #if defined(HAS_ARGBCOPYALPHAROW_SSE2)
   2221   if (TestCpuFlag(kCpuHasSSE2) &&
   2222       IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16) &&
   2223       IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16) &&
   2224       IS_ALIGNED(width, 8)) {
   2225     ARGBCopyAlphaRow = ARGBCopyAlphaRow_SSE2;
   2226   }
   2227 #endif
   2228 #if defined(HAS_ARGBCOPYALPHAROW_AVX2)
   2229   if (TestCpuFlag(kCpuHasAVX2) && IS_ALIGNED(width, 16)) {
   2230     ARGBCopyAlphaRow = ARGBCopyAlphaRow_AVX2;
   2231   }
   2232 #endif
   2233 
   2234   for (y = 0; y < height; ++y) {
   2235     ARGBCopyAlphaRow(src_argb, dst_argb, width);
   2236     src_argb += src_stride_argb;
   2237     dst_argb += dst_stride_argb;
   2238   }
   2239   return 0;
   2240 }
   2241 
   2242 // Copy a planar Y channel to the alpha channel of a destination ARGB image.
   2243 LIBYUV_API
   2244 int ARGBCopyYToAlpha(const uint8* src_y, int src_stride_y,
   2245                      uint8* dst_argb, int dst_stride_argb,
   2246                      int width, int height) {
   2247   int y;
   2248   void (*ARGBCopyYToAlphaRow)(const uint8* src_y, uint8* dst_argb, int width) =
   2249       ARGBCopyYToAlphaRow_C;
   2250   if (!src_y || !dst_argb || width <= 0 || height == 0) {
   2251     return -1;
   2252   }
   2253   // Negative height means invert the image.
   2254   if (height < 0) {
   2255     height = -height;
   2256     src_y = src_y + (height - 1) * src_stride_y;
   2257     src_stride_y = -src_stride_y;
   2258   }
   2259   // Coalesce rows.
   2260   if (src_stride_y == width &&
   2261       dst_stride_argb == width * 4) {
   2262     width *= height;
   2263     height = 1;
   2264     src_stride_y = dst_stride_argb = 0;
   2265   }
   2266 #if defined(HAS_ARGBCOPYYTOALPHAROW_SSE2)
   2267   if (TestCpuFlag(kCpuHasSSE2) &&
   2268       IS_ALIGNED(src_y, 16) && IS_ALIGNED(src_stride_y, 16) &&
   2269       IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16) &&
   2270       IS_ALIGNED(width, 8)) {
   2271     ARGBCopyYToAlphaRow = ARGBCopyYToAlphaRow_SSE2;
   2272   }
   2273 #endif
   2274 #if defined(HAS_ARGBCOPYYTOALPHAROW_AVX2)
   2275   if (TestCpuFlag(kCpuHasAVX2) && IS_ALIGNED(width, 16)) {
   2276     ARGBCopyYToAlphaRow = ARGBCopyYToAlphaRow_AVX2;
   2277   }
   2278 #endif
   2279 
   2280   for (y = 0; y < height; ++y) {
   2281     ARGBCopyYToAlphaRow(src_y, dst_argb, width);
   2282     src_y += src_stride_y;
   2283     dst_argb += dst_stride_argb;
   2284   }
   2285   return 0;
   2286 }
   2287 
   2288 #ifdef __cplusplus
   2289 }  // extern "C"
   2290 }  // namespace libyuv
   2291 #endif
   2292