Home | History | Annotate | Download | only in source
      1 /*
      2  *  Copyright 2011 The LibYuv Project Authors. All rights reserved.
      3  *
      4  *  Use of this source code is governed by a BSD-style license
      5  *  that can be found in the LICENSE file in the root of the source
      6  *  tree. An additional intellectual property rights grant can be found
      7  *  in the file PATENTS. All contributing project authors may
      8  *  be found in the AUTHORS file in the root of the source tree.
      9  */
     10 
     11 #include "libyuv/planar_functions.h"
     12 
     13 #include <string.h>  // for memset()
     14 
     15 #include "libyuv/cpu_id.h"
     16 #ifdef HAVE_JPEG
     17 #include "libyuv/mjpeg_decoder.h"
     18 #endif
     19 #include "libyuv/row.h"
     20 
     21 #ifdef __cplusplus
     22 namespace libyuv {
     23 extern "C" {
     24 #endif
     25 
     26 // Copy a plane of data
     27 LIBYUV_API
     28 void CopyPlane(const uint8* src_y, int src_stride_y,
     29                uint8* dst_y, int dst_stride_y,
     30                int width, int height) {
     31   int y;
     32   void (*CopyRow)(const uint8* src, uint8* dst, int width) = CopyRow_C;
     33   // Coalesce rows.
     34   if (src_stride_y == width &&
     35       dst_stride_y == width) {
     36     width *= height;
     37     height = 1;
     38     src_stride_y = dst_stride_y = 0;
     39   }
     40 #if defined(HAS_COPYROW_X86)
     41   if (TestCpuFlag(kCpuHasX86) && IS_ALIGNED(width, 4)) {
     42     CopyRow = CopyRow_X86;
     43   }
     44 #endif
     45 #if defined(HAS_COPYROW_SSE2)
     46   if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 32) &&
     47       IS_ALIGNED(src_y, 16) && IS_ALIGNED(src_stride_y, 16) &&
     48       IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
     49     CopyRow = CopyRow_SSE2;
     50   }
     51 #endif
     52 #if defined(HAS_COPYROW_ERMS)
     53   if (TestCpuFlag(kCpuHasERMS)) {
     54     CopyRow = CopyRow_ERMS;
     55   }
     56 #endif
     57 #if defined(HAS_COPYROW_NEON)
     58   if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 32)) {
     59     CopyRow = CopyRow_NEON;
     60   }
     61 #endif
     62 #if defined(HAS_COPYROW_MIPS)
     63   if (TestCpuFlag(kCpuHasMIPS)) {
     64     CopyRow = CopyRow_MIPS;
     65   }
     66 #endif
     67 
     68   // Copy plane
     69   for (y = 0; y < height; ++y) {
     70     CopyRow(src_y, dst_y, width);
     71     src_y += src_stride_y;
     72     dst_y += dst_stride_y;
     73   }
     74 }
     75 
     76 LIBYUV_API
     77 void CopyPlane_16(const uint16* src_y, int src_stride_y,
     78                   uint16* dst_y, int dst_stride_y,
     79                   int width, int height) {
     80   int y;
     81   void (*CopyRow)(const uint16* src, uint16* dst, int width) = CopyRow_16_C;
     82   // Coalesce rows.
     83   if (src_stride_y == width &&
     84       dst_stride_y == width) {
     85     width *= height;
     86     height = 1;
     87     src_stride_y = dst_stride_y = 0;
     88   }
     89 #if defined(HAS_COPYROW_16_X86)
     90   if (TestCpuFlag(kCpuHasX86) && IS_ALIGNED(width, 4)) {
     91     CopyRow = CopyRow_16_X86;
     92   }
     93 #endif
     94 #if defined(HAS_COPYROW_16_SSE2)
     95   if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 32) &&
     96       IS_ALIGNED(src_y, 16) && IS_ALIGNED(src_stride_y, 16) &&
     97       IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
     98     CopyRow = CopyRow_16_SSE2;
     99   }
    100 #endif
    101 #if defined(HAS_COPYROW_16_ERMS)
    102   if (TestCpuFlag(kCpuHasERMS)) {
    103     CopyRow = CopyRow_16_ERMS;
    104   }
    105 #endif
    106 #if defined(HAS_COPYROW_16_NEON)
    107   if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 32)) {
    108     CopyRow = CopyRow_16_NEON;
    109   }
    110 #endif
    111 #if defined(HAS_COPYROW_16_MIPS)
    112   if (TestCpuFlag(kCpuHasMIPS)) {
    113     CopyRow = CopyRow_16_MIPS;
    114   }
    115 #endif
    116 
    117   // Copy plane
    118   for (y = 0; y < height; ++y) {
    119     CopyRow(src_y, dst_y, width);
    120     src_y += src_stride_y;
    121     dst_y += dst_stride_y;
    122   }
    123 }
    124 
    125 // Copy I422.
    126 LIBYUV_API
    127 int I422Copy(const uint8* src_y, int src_stride_y,
    128              const uint8* src_u, int src_stride_u,
    129              const uint8* src_v, int src_stride_v,
    130              uint8* dst_y, int dst_stride_y,
    131              uint8* dst_u, int dst_stride_u,
    132              uint8* dst_v, int dst_stride_v,
    133              int width, int height) {
    134   int halfwidth = (width + 1) >> 1;
    135   if (!src_y || !src_u || !src_v ||
    136       !dst_y || !dst_u || !dst_v ||
    137       width <= 0 || height == 0) {
    138     return -1;
    139   }
    140   // Negative height means invert the image.
    141   if (height < 0) {
    142     height = -height;
    143     src_y = src_y + (height - 1) * src_stride_y;
    144     src_u = src_u + (height - 1) * src_stride_u;
    145     src_v = src_v + (height - 1) * src_stride_v;
    146     src_stride_y = -src_stride_y;
    147     src_stride_u = -src_stride_u;
    148     src_stride_v = -src_stride_v;
    149   }
    150   CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
    151   CopyPlane(src_u, src_stride_u, dst_u, dst_stride_u, halfwidth, height);
    152   CopyPlane(src_v, src_stride_v, dst_v, dst_stride_v, halfwidth, height);
    153   return 0;
    154 }
    155 
    156 // Copy I444.
    157 LIBYUV_API
    158 int I444Copy(const uint8* src_y, int src_stride_y,
    159              const uint8* src_u, int src_stride_u,
    160              const uint8* src_v, int src_stride_v,
    161              uint8* dst_y, int dst_stride_y,
    162              uint8* dst_u, int dst_stride_u,
    163              uint8* dst_v, int dst_stride_v,
    164              int width, int height) {
    165   if (!src_y || !src_u || !src_v ||
    166       !dst_y || !dst_u || !dst_v ||
    167       width <= 0 || height == 0) {
    168     return -1;
    169   }
    170   // Negative height means invert the image.
    171   if (height < 0) {
    172     height = -height;
    173     src_y = src_y + (height - 1) * src_stride_y;
    174     src_u = src_u + (height - 1) * src_stride_u;
    175     src_v = src_v + (height - 1) * src_stride_v;
    176     src_stride_y = -src_stride_y;
    177     src_stride_u = -src_stride_u;
    178     src_stride_v = -src_stride_v;
    179   }
    180 
    181   CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
    182   CopyPlane(src_u, src_stride_u, dst_u, dst_stride_u, width, height);
    183   CopyPlane(src_v, src_stride_v, dst_v, dst_stride_v, width, height);
    184   return 0;
    185 }
    186 
    187 // Copy I400.
    188 LIBYUV_API
    189 int I400ToI400(const uint8* src_y, int src_stride_y,
    190                uint8* dst_y, int dst_stride_y,
    191                int width, int height) {
    192   if (!src_y || !dst_y || width <= 0 || height == 0) {
    193     return -1;
    194   }
    195   // Negative height means invert the image.
    196   if (height < 0) {
    197     height = -height;
    198     src_y = src_y + (height - 1) * src_stride_y;
    199     src_stride_y = -src_stride_y;
    200   }
    201   CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
    202   return 0;
    203 }
    204 
    205 // Convert I420 to I400.
    206 LIBYUV_API
    207 int I420ToI400(const uint8* src_y, int src_stride_y,
    208                const uint8* src_u, int src_stride_u,
    209                const uint8* src_v, int src_stride_v,
    210                uint8* dst_y, int dst_stride_y,
    211                int width, int height) {
    212   if (!src_y || !dst_y || width <= 0 || height == 0) {
    213     return -1;
    214   }
    215   // Negative height means invert the image.
    216   if (height < 0) {
    217     height = -height;
    218     src_y = src_y + (height - 1) * src_stride_y;
    219     src_stride_y = -src_stride_y;
    220   }
    221   CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
    222   return 0;
    223 }
    224 
    225 // Mirror a plane of data.
    226 void MirrorPlane(const uint8* src_y, int src_stride_y,
    227                  uint8* dst_y, int dst_stride_y,
    228                  int width, int height) {
    229   int y;
    230   void (*MirrorRow)(const uint8* src, uint8* dst, int width) = MirrorRow_C;
    231   // Negative height means invert the image.
    232   if (height < 0) {
    233     height = -height;
    234     src_y = src_y + (height - 1) * src_stride_y;
    235     src_stride_y = -src_stride_y;
    236   }
    237 #if defined(HAS_MIRRORROW_NEON)
    238   if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 16)) {
    239     MirrorRow = MirrorRow_NEON;
    240   }
    241 #endif
    242 #if defined(HAS_MIRRORROW_SSE2)
    243   if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 16)) {
    244     MirrorRow = MirrorRow_SSE2;
    245   }
    246 #endif
    247 #if defined(HAS_MIRRORROW_SSSE3)
    248   if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 16) &&
    249       IS_ALIGNED(src_y, 16) && IS_ALIGNED(src_stride_y, 16) &&
    250       IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
    251     MirrorRow = MirrorRow_SSSE3;
    252   }
    253 #endif
    254 #if defined(HAS_MIRRORROW_AVX2)
    255   if (TestCpuFlag(kCpuHasAVX2) && IS_ALIGNED(width, 32)) {
    256     MirrorRow = MirrorRow_AVX2;
    257   }
    258 #endif
    259 
    260   // Mirror plane
    261   for (y = 0; y < height; ++y) {
    262     MirrorRow(src_y, dst_y, width);
    263     src_y += src_stride_y;
    264     dst_y += dst_stride_y;
    265   }
    266 }
    267 
    268 // Convert YUY2 to I422.
    269 LIBYUV_API
    270 int YUY2ToI422(const uint8* src_yuy2, int src_stride_yuy2,
    271                uint8* dst_y, int dst_stride_y,
    272                uint8* dst_u, int dst_stride_u,
    273                uint8* dst_v, int dst_stride_v,
    274                int width, int height) {
    275   int y;
    276   void (*YUY2ToUV422Row)(const uint8* src_yuy2,
    277                          uint8* dst_u, uint8* dst_v, int pix) =
    278       YUY2ToUV422Row_C;
    279   void (*YUY2ToYRow)(const uint8* src_yuy2, uint8* dst_y, int pix) =
    280       YUY2ToYRow_C;
    281   // Negative height means invert the image.
    282   if (height < 0) {
    283     height = -height;
    284     src_yuy2 = src_yuy2 + (height - 1) * src_stride_yuy2;
    285     src_stride_yuy2 = -src_stride_yuy2;
    286   }
    287   // Coalesce rows.
    288   if (src_stride_yuy2 == width * 2 &&
    289       dst_stride_y == width &&
    290       dst_stride_u * 2 == width &&
    291       dst_stride_v * 2 == width) {
    292     width *= height;
    293     height = 1;
    294     src_stride_yuy2 = dst_stride_y = dst_stride_u = dst_stride_v = 0;
    295   }
    296 #if defined(HAS_YUY2TOYROW_SSE2)
    297   if (TestCpuFlag(kCpuHasSSE2) && width >= 16) {
    298     YUY2ToUV422Row = YUY2ToUV422Row_Any_SSE2;
    299     YUY2ToYRow = YUY2ToYRow_Any_SSE2;
    300     if (IS_ALIGNED(width, 16)) {
    301       YUY2ToUV422Row = YUY2ToUV422Row_Unaligned_SSE2;
    302       YUY2ToYRow = YUY2ToYRow_Unaligned_SSE2;
    303       if (IS_ALIGNED(src_yuy2, 16) && IS_ALIGNED(src_stride_yuy2, 16)) {
    304         YUY2ToUV422Row = YUY2ToUV422Row_SSE2;
    305         if (IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
    306           YUY2ToYRow = YUY2ToYRow_SSE2;
    307         }
    308       }
    309     }
    310   }
    311 #endif
    312 #if defined(HAS_YUY2TOYROW_AVX2)
    313   if (TestCpuFlag(kCpuHasAVX2) && width >= 32) {
    314     YUY2ToUV422Row = YUY2ToUV422Row_Any_AVX2;
    315     YUY2ToYRow = YUY2ToYRow_Any_AVX2;
    316     if (IS_ALIGNED(width, 32)) {
    317       YUY2ToUV422Row = YUY2ToUV422Row_AVX2;
    318       YUY2ToYRow = YUY2ToYRow_AVX2;
    319     }
    320   }
    321 #endif
    322 #if defined(HAS_YUY2TOYROW_NEON)
    323   if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
    324     YUY2ToYRow = YUY2ToYRow_Any_NEON;
    325     if (width >= 16) {
    326       YUY2ToUV422Row = YUY2ToUV422Row_Any_NEON;
    327     }
    328     if (IS_ALIGNED(width, 16)) {
    329       YUY2ToYRow = YUY2ToYRow_NEON;
    330       YUY2ToUV422Row = YUY2ToUV422Row_NEON;
    331     }
    332   }
    333 #endif
    334 
    335   for (y = 0; y < height; ++y) {
    336     YUY2ToUV422Row(src_yuy2, dst_u, dst_v, width);
    337     YUY2ToYRow(src_yuy2, dst_y, width);
    338     src_yuy2 += src_stride_yuy2;
    339     dst_y += dst_stride_y;
    340     dst_u += dst_stride_u;
    341     dst_v += dst_stride_v;
    342   }
    343   return 0;
    344 }
    345 
    346 // Convert UYVY to I422.
    347 LIBYUV_API
    348 int UYVYToI422(const uint8* src_uyvy, int src_stride_uyvy,
    349                uint8* dst_y, int dst_stride_y,
    350                uint8* dst_u, int dst_stride_u,
    351                uint8* dst_v, int dst_stride_v,
    352                int width, int height) {
    353   int y;
    354   void (*UYVYToUV422Row)(const uint8* src_uyvy,
    355                          uint8* dst_u, uint8* dst_v, int pix) =
    356       UYVYToUV422Row_C;
    357   void (*UYVYToYRow)(const uint8* src_uyvy,
    358                      uint8* dst_y, int pix) = UYVYToYRow_C;
    359   // Negative height means invert the image.
    360   if (height < 0) {
    361     height = -height;
    362     src_uyvy = src_uyvy + (height - 1) * src_stride_uyvy;
    363     src_stride_uyvy = -src_stride_uyvy;
    364   }
    365   // Coalesce rows.
    366   if (src_stride_uyvy == width * 2 &&
    367       dst_stride_y == width &&
    368       dst_stride_u * 2 == width &&
    369       dst_stride_v * 2 == width) {
    370     width *= height;
    371     height = 1;
    372     src_stride_uyvy = dst_stride_y = dst_stride_u = dst_stride_v = 0;
    373   }
    374 #if defined(HAS_UYVYTOYROW_SSE2)
    375   if (TestCpuFlag(kCpuHasSSE2) && width >= 16) {
    376     UYVYToUV422Row = UYVYToUV422Row_Any_SSE2;
    377     UYVYToYRow = UYVYToYRow_Any_SSE2;
    378     if (IS_ALIGNED(width, 16)) {
    379       UYVYToUV422Row = UYVYToUV422Row_Unaligned_SSE2;
    380       UYVYToYRow = UYVYToYRow_Unaligned_SSE2;
    381       if (IS_ALIGNED(src_uyvy, 16) && IS_ALIGNED(src_stride_uyvy, 16)) {
    382         UYVYToUV422Row = UYVYToUV422Row_SSE2;
    383         if (IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
    384           UYVYToYRow = UYVYToYRow_SSE2;
    385         }
    386       }
    387     }
    388   }
    389 #endif
    390 #if defined(HAS_UYVYTOYROW_AVX2)
    391   if (TestCpuFlag(kCpuHasAVX2) && width >= 32) {
    392     UYVYToUV422Row = UYVYToUV422Row_Any_AVX2;
    393     UYVYToYRow = UYVYToYRow_Any_AVX2;
    394     if (IS_ALIGNED(width, 32)) {
    395       UYVYToUV422Row = UYVYToUV422Row_AVX2;
    396       UYVYToYRow = UYVYToYRow_AVX2;
    397     }
    398   }
    399 #endif
    400 #if defined(HAS_UYVYTOYROW_NEON)
    401   if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
    402     UYVYToYRow = UYVYToYRow_Any_NEON;
    403     if (width >= 16) {
    404       UYVYToUV422Row = UYVYToUV422Row_Any_NEON;
    405     }
    406     if (IS_ALIGNED(width, 16)) {
    407       UYVYToYRow = UYVYToYRow_NEON;
    408       UYVYToUV422Row = UYVYToUV422Row_NEON;
    409     }
    410   }
    411 #endif
    412 
    413   for (y = 0; y < height; ++y) {
    414     UYVYToUV422Row(src_uyvy, dst_u, dst_v, width);
    415     UYVYToYRow(src_uyvy, dst_y, width);
    416     src_uyvy += src_stride_uyvy;
    417     dst_y += dst_stride_y;
    418     dst_u += dst_stride_u;
    419     dst_v += dst_stride_v;
    420   }
    421   return 0;
    422 }
    423 
    424 // Mirror I400 with optional flipping
    425 LIBYUV_API
    426 int I400Mirror(const uint8* src_y, int src_stride_y,
    427                uint8* dst_y, int dst_stride_y,
    428                int width, int height) {
    429   if (!src_y || !dst_y ||
    430       width <= 0 || height == 0) {
    431     return -1;
    432   }
    433   // Negative height means invert the image.
    434   if (height < 0) {
    435     height = -height;
    436     src_y = src_y + (height - 1) * src_stride_y;
    437     src_stride_y = -src_stride_y;
    438   }
    439 
    440   MirrorPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
    441   return 0;
    442 }
    443 
    444 // Mirror I420 with optional flipping
    445 LIBYUV_API
    446 int I420Mirror(const uint8* src_y, int src_stride_y,
    447                const uint8* src_u, int src_stride_u,
    448                const uint8* src_v, int src_stride_v,
    449                uint8* dst_y, int dst_stride_y,
    450                uint8* dst_u, int dst_stride_u,
    451                uint8* dst_v, int dst_stride_v,
    452                int width, int height) {
    453   int halfwidth = (width + 1) >> 1;
    454   int halfheight = (height + 1) >> 1;
    455   if (!src_y || !src_u || !src_v || !dst_y || !dst_u || !dst_v ||
    456       width <= 0 || height == 0) {
    457     return -1;
    458   }
    459   // Negative height means invert the image.
    460   if (height < 0) {
    461     height = -height;
    462     halfheight = (height + 1) >> 1;
    463     src_y = src_y + (height - 1) * src_stride_y;
    464     src_u = src_u + (halfheight - 1) * src_stride_u;
    465     src_v = src_v + (halfheight - 1) * src_stride_v;
    466     src_stride_y = -src_stride_y;
    467     src_stride_u = -src_stride_u;
    468     src_stride_v = -src_stride_v;
    469   }
    470 
    471   if (dst_y) {
    472     MirrorPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
    473   }
    474   MirrorPlane(src_u, src_stride_u, dst_u, dst_stride_u, halfwidth, halfheight);
    475   MirrorPlane(src_v, src_stride_v, dst_v, dst_stride_v, halfwidth, halfheight);
    476   return 0;
    477 }
    478 
    479 // ARGB mirror.
    480 LIBYUV_API
    481 int ARGBMirror(const uint8* src_argb, int src_stride_argb,
    482                uint8* dst_argb, int dst_stride_argb,
    483                int width, int height) {
    484   int y;
    485   void (*ARGBMirrorRow)(const uint8* src, uint8* dst, int width) =
    486       ARGBMirrorRow_C;
    487   if (!src_argb || !dst_argb || width <= 0 || height == 0) {
    488     return -1;
    489   }
    490   // Negative height means invert the image.
    491   if (height < 0) {
    492     height = -height;
    493     src_argb = src_argb + (height - 1) * src_stride_argb;
    494     src_stride_argb = -src_stride_argb;
    495   }
    496 
    497 #if defined(HAS_ARGBMIRRORROW_SSSE3)
    498   if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 4) &&
    499       IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16) &&
    500       IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
    501     ARGBMirrorRow = ARGBMirrorRow_SSSE3;
    502   }
    503 #endif
    504 #if defined(HAS_ARGBMIRRORROW_AVX2)
    505   if (TestCpuFlag(kCpuHasAVX2) && IS_ALIGNED(width, 8)) {
    506     ARGBMirrorRow = ARGBMirrorRow_AVX2;
    507   }
    508 #endif
    509 #if defined(HAS_ARGBMIRRORROW_NEON)
    510   if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 4)) {
    511     ARGBMirrorRow = ARGBMirrorRow_NEON;
    512   }
    513 #endif
    514 
    515   // Mirror plane
    516   for (y = 0; y < height; ++y) {
    517     ARGBMirrorRow(src_argb, dst_argb, width);
    518     src_argb += src_stride_argb;
    519     dst_argb += dst_stride_argb;
    520   }
    521   return 0;
    522 }
    523 
    524 // Get a blender that optimized for the CPU, alignment and pixel count.
    525 // As there are 6 blenders to choose from, the caller should try to use
    526 // the same blend function for all pixels if possible.
    527 LIBYUV_API
    528 ARGBBlendRow GetARGBBlend() {
    529   void (*ARGBBlendRow)(const uint8* src_argb, const uint8* src_argb1,
    530                        uint8* dst_argb, int width) = ARGBBlendRow_C;
    531 #if defined(HAS_ARGBBLENDROW_SSSE3)
    532   if (TestCpuFlag(kCpuHasSSSE3)) {
    533     ARGBBlendRow = ARGBBlendRow_SSSE3;
    534     return ARGBBlendRow;
    535   }
    536 #endif
    537 #if defined(HAS_ARGBBLENDROW_SSE2)
    538   if (TestCpuFlag(kCpuHasSSE2)) {
    539     ARGBBlendRow = ARGBBlendRow_SSE2;
    540   }
    541 #endif
    542 #if defined(HAS_ARGBBLENDROW_NEON)
    543   if (TestCpuFlag(kCpuHasNEON)) {
    544     ARGBBlendRow = ARGBBlendRow_NEON;
    545   }
    546 #endif
    547   return ARGBBlendRow;
    548 }
    549 
    550 // Alpha Blend 2 ARGB images and store to destination.
    551 LIBYUV_API
    552 int ARGBBlend(const uint8* src_argb0, int src_stride_argb0,
    553               const uint8* src_argb1, int src_stride_argb1,
    554               uint8* dst_argb, int dst_stride_argb,
    555               int width, int height) {
    556   int y;
    557   void (*ARGBBlendRow)(const uint8* src_argb, const uint8* src_argb1,
    558                        uint8* dst_argb, int width) = GetARGBBlend();
    559   if (!src_argb0 || !src_argb1 || !dst_argb || width <= 0 || height == 0) {
    560     return -1;
    561   }
    562   // Negative height means invert the image.
    563   if (height < 0) {
    564     height = -height;
    565     dst_argb = dst_argb + (height - 1) * dst_stride_argb;
    566     dst_stride_argb = -dst_stride_argb;
    567   }
    568   // Coalesce rows.
    569   if (src_stride_argb0 == width * 4 &&
    570       src_stride_argb1 == width * 4 &&
    571       dst_stride_argb == width * 4) {
    572     width *= height;
    573     height = 1;
    574     src_stride_argb0 = src_stride_argb1 = dst_stride_argb = 0;
    575   }
    576 
    577   for (y = 0; y < height; ++y) {
    578     ARGBBlendRow(src_argb0, src_argb1, dst_argb, width);
    579     src_argb0 += src_stride_argb0;
    580     src_argb1 += src_stride_argb1;
    581     dst_argb += dst_stride_argb;
    582   }
    583   return 0;
    584 }
    585 
    586 // Multiply 2 ARGB images and store to destination.
    587 LIBYUV_API
    588 int ARGBMultiply(const uint8* src_argb0, int src_stride_argb0,
    589                  const uint8* src_argb1, int src_stride_argb1,
    590                  uint8* dst_argb, int dst_stride_argb,
    591                  int width, int height) {
    592   int y;
    593   void (*ARGBMultiplyRow)(const uint8* src0, const uint8* src1, uint8* dst,
    594                           int width) = ARGBMultiplyRow_C;
    595   if (!src_argb0 || !src_argb1 || !dst_argb || width <= 0 || height == 0) {
    596     return -1;
    597   }
    598   // Negative height means invert the image.
    599   if (height < 0) {
    600     height = -height;
    601     dst_argb = dst_argb + (height - 1) * dst_stride_argb;
    602     dst_stride_argb = -dst_stride_argb;
    603   }
    604   // Coalesce rows.
    605   if (src_stride_argb0 == width * 4 &&
    606       src_stride_argb1 == width * 4 &&
    607       dst_stride_argb == width * 4) {
    608     width *= height;
    609     height = 1;
    610     src_stride_argb0 = src_stride_argb1 = dst_stride_argb = 0;
    611   }
    612 #if defined(HAS_ARGBMULTIPLYROW_SSE2)
    613   if (TestCpuFlag(kCpuHasSSE2) && width >= 4) {
    614     ARGBMultiplyRow = ARGBMultiplyRow_Any_SSE2;
    615     if (IS_ALIGNED(width, 4)) {
    616       ARGBMultiplyRow = ARGBMultiplyRow_SSE2;
    617     }
    618   }
    619 #endif
    620 #if defined(HAS_ARGBMULTIPLYROW_AVX2)
    621   if (TestCpuFlag(kCpuHasAVX2) && width >= 8) {
    622     ARGBMultiplyRow = ARGBMultiplyRow_Any_AVX2;
    623     if (IS_ALIGNED(width, 8)) {
    624       ARGBMultiplyRow = ARGBMultiplyRow_AVX2;
    625     }
    626   }
    627 #endif
    628 #if defined(HAS_ARGBMULTIPLYROW_NEON)
    629   if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
    630     ARGBMultiplyRow = ARGBMultiplyRow_Any_NEON;
    631     if (IS_ALIGNED(width, 8)) {
    632       ARGBMultiplyRow = ARGBMultiplyRow_NEON;
    633     }
    634   }
    635 #endif
    636 
    637   // Multiply plane
    638   for (y = 0; y < height; ++y) {
    639     ARGBMultiplyRow(src_argb0, src_argb1, dst_argb, width);
    640     src_argb0 += src_stride_argb0;
    641     src_argb1 += src_stride_argb1;
    642     dst_argb += dst_stride_argb;
    643   }
    644   return 0;
    645 }
    646 
    647 // Add 2 ARGB images and store to destination.
    648 LIBYUV_API
    649 int ARGBAdd(const uint8* src_argb0, int src_stride_argb0,
    650             const uint8* src_argb1, int src_stride_argb1,
    651             uint8* dst_argb, int dst_stride_argb,
    652             int width, int height) {
    653   int y;
    654   void (*ARGBAddRow)(const uint8* src0, const uint8* src1, uint8* dst,
    655                      int width) = ARGBAddRow_C;
    656   if (!src_argb0 || !src_argb1 || !dst_argb || width <= 0 || height == 0) {
    657     return -1;
    658   }
    659   // Negative height means invert the image.
    660   if (height < 0) {
    661     height = -height;
    662     dst_argb = dst_argb + (height - 1) * dst_stride_argb;
    663     dst_stride_argb = -dst_stride_argb;
    664   }
    665   // Coalesce rows.
    666   if (src_stride_argb0 == width * 4 &&
    667       src_stride_argb1 == width * 4 &&
    668       dst_stride_argb == width * 4) {
    669     width *= height;
    670     height = 1;
    671     src_stride_argb0 = src_stride_argb1 = dst_stride_argb = 0;
    672   }
    673 #if defined(HAS_ARGBADDROW_SSE2) && defined(_MSC_VER)
    674   if (TestCpuFlag(kCpuHasSSE2)) {
    675     ARGBAddRow = ARGBAddRow_SSE2;
    676   }
    677 #endif
    678 #if defined(HAS_ARGBADDROW_SSE2) && !defined(_MSC_VER)
    679   if (TestCpuFlag(kCpuHasSSE2) && width >= 4) {
    680     ARGBAddRow = ARGBAddRow_Any_SSE2;
    681     if (IS_ALIGNED(width, 4)) {
    682       ARGBAddRow = ARGBAddRow_SSE2;
    683     }
    684   }
    685 #endif
    686 #if defined(HAS_ARGBADDROW_AVX2)
    687   if (TestCpuFlag(kCpuHasAVX2) && width >= 8) {
    688     ARGBAddRow = ARGBAddRow_Any_AVX2;
    689     if (IS_ALIGNED(width, 8)) {
    690       ARGBAddRow = ARGBAddRow_AVX2;
    691     }
    692   }
    693 #endif
    694 #if defined(HAS_ARGBADDROW_NEON)
    695   if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
    696     ARGBAddRow = ARGBAddRow_Any_NEON;
    697     if (IS_ALIGNED(width, 8)) {
    698       ARGBAddRow = ARGBAddRow_NEON;
    699     }
    700   }
    701 #endif
    702 
    703   // Add plane
    704   for (y = 0; y < height; ++y) {
    705     ARGBAddRow(src_argb0, src_argb1, dst_argb, width);
    706     src_argb0 += src_stride_argb0;
    707     src_argb1 += src_stride_argb1;
    708     dst_argb += dst_stride_argb;
    709   }
    710   return 0;
    711 }
    712 
    713 // Subtract 2 ARGB images and store to destination.
    714 LIBYUV_API
    715 int ARGBSubtract(const uint8* src_argb0, int src_stride_argb0,
    716                  const uint8* src_argb1, int src_stride_argb1,
    717                  uint8* dst_argb, int dst_stride_argb,
    718                  int width, int height) {
    719   int y;
    720   void (*ARGBSubtractRow)(const uint8* src0, const uint8* src1, uint8* dst,
    721                           int width) = ARGBSubtractRow_C;
    722   if (!src_argb0 || !src_argb1 || !dst_argb || width <= 0 || height == 0) {
    723     return -1;
    724   }
    725   // Negative height means invert the image.
    726   if (height < 0) {
    727     height = -height;
    728     dst_argb = dst_argb + (height - 1) * dst_stride_argb;
    729     dst_stride_argb = -dst_stride_argb;
    730   }
    731   // Coalesce rows.
    732   if (src_stride_argb0 == width * 4 &&
    733       src_stride_argb1 == width * 4 &&
    734       dst_stride_argb == width * 4) {
    735     width *= height;
    736     height = 1;
    737     src_stride_argb0 = src_stride_argb1 = dst_stride_argb = 0;
    738   }
    739 #if defined(HAS_ARGBSUBTRACTROW_SSE2)
    740   if (TestCpuFlag(kCpuHasSSE2) && width >= 4) {
    741     ARGBSubtractRow = ARGBSubtractRow_Any_SSE2;
    742     if (IS_ALIGNED(width, 4)) {
    743       ARGBSubtractRow = ARGBSubtractRow_SSE2;
    744     }
    745   }
    746 #endif
    747 #if defined(HAS_ARGBSUBTRACTROW_AVX2)
    748   if (TestCpuFlag(kCpuHasAVX2) && width >= 8) {
    749     ARGBSubtractRow = ARGBSubtractRow_Any_AVX2;
    750     if (IS_ALIGNED(width, 8)) {
    751       ARGBSubtractRow = ARGBSubtractRow_AVX2;
    752     }
    753   }
    754 #endif
    755 #if defined(HAS_ARGBSUBTRACTROW_NEON)
    756   if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
    757     ARGBSubtractRow = ARGBSubtractRow_Any_NEON;
    758     if (IS_ALIGNED(width, 8)) {
    759       ARGBSubtractRow = ARGBSubtractRow_NEON;
    760     }
    761   }
    762 #endif
    763 
    764   // Subtract plane
    765   for (y = 0; y < height; ++y) {
    766     ARGBSubtractRow(src_argb0, src_argb1, dst_argb, width);
    767     src_argb0 += src_stride_argb0;
    768     src_argb1 += src_stride_argb1;
    769     dst_argb += dst_stride_argb;
    770   }
    771   return 0;
    772 }
    773 
    774 // Convert I422 to BGRA.
    775 LIBYUV_API
    776 int I422ToBGRA(const uint8* src_y, int src_stride_y,
    777                const uint8* src_u, int src_stride_u,
    778                const uint8* src_v, int src_stride_v,
    779                uint8* dst_bgra, int dst_stride_bgra,
    780                int width, int height) {
    781   int y;
    782   void (*I422ToBGRARow)(const uint8* y_buf,
    783                         const uint8* u_buf,
    784                         const uint8* v_buf,
    785                         uint8* rgb_buf,
    786                         int width) = I422ToBGRARow_C;
    787   if (!src_y || !src_u || !src_v ||
    788       !dst_bgra ||
    789       width <= 0 || height == 0) {
    790     return -1;
    791   }
    792   // Negative height means invert the image.
    793   if (height < 0) {
    794     height = -height;
    795     dst_bgra = dst_bgra + (height - 1) * dst_stride_bgra;
    796     dst_stride_bgra = -dst_stride_bgra;
    797   }
    798   // Coalesce rows.
    799   if (src_stride_y == width &&
    800       src_stride_u * 2 == width &&
    801       src_stride_v * 2 == width &&
    802       dst_stride_bgra == width * 4) {
    803     width *= height;
    804     height = 1;
    805     src_stride_y = src_stride_u = src_stride_v = dst_stride_bgra = 0;
    806   }
    807 #if defined(HAS_I422TOBGRAROW_NEON)
    808   if (TestCpuFlag(kCpuHasNEON)) {
    809     I422ToBGRARow = I422ToBGRARow_Any_NEON;
    810     if (IS_ALIGNED(width, 16)) {
    811       I422ToBGRARow = I422ToBGRARow_NEON;
    812     }
    813   }
    814 #elif defined(HAS_I422TOBGRAROW_SSSE3)
    815   if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
    816     I422ToBGRARow = I422ToBGRARow_Any_SSSE3;
    817     if (IS_ALIGNED(width, 8)) {
    818       I422ToBGRARow = I422ToBGRARow_Unaligned_SSSE3;
    819       if (IS_ALIGNED(dst_bgra, 16) && IS_ALIGNED(dst_stride_bgra, 16)) {
    820         I422ToBGRARow = I422ToBGRARow_SSSE3;
    821       }
    822     }
    823   }
    824 #elif defined(HAS_I422TOBGRAROW_MIPS_DSPR2)
    825   if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(width, 4) &&
    826       IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) &&
    827       IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) &&
    828       IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2) &&
    829       IS_ALIGNED(dst_bgra, 4) && IS_ALIGNED(dst_stride_bgra, 4)) {
    830     I422ToBGRARow = I422ToBGRARow_MIPS_DSPR2;
    831   }
    832 #endif
    833 
    834   for (y = 0; y < height; ++y) {
    835     I422ToBGRARow(src_y, src_u, src_v, dst_bgra, width);
    836     dst_bgra += dst_stride_bgra;
    837     src_y += src_stride_y;
    838     src_u += src_stride_u;
    839     src_v += src_stride_v;
    840   }
    841   return 0;
    842 }
    843 
    844 // Convert I422 to ABGR.
    845 LIBYUV_API
    846 int I422ToABGR(const uint8* src_y, int src_stride_y,
    847                const uint8* src_u, int src_stride_u,
    848                const uint8* src_v, int src_stride_v,
    849                uint8* dst_abgr, int dst_stride_abgr,
    850                int width, int height) {
    851   int y;
    852   void (*I422ToABGRRow)(const uint8* y_buf,
    853                         const uint8* u_buf,
    854                         const uint8* v_buf,
    855                         uint8* rgb_buf,
    856                         int width) = I422ToABGRRow_C;
    857   if (!src_y || !src_u || !src_v ||
    858       !dst_abgr ||
    859       width <= 0 || height == 0) {
    860     return -1;
    861   }
    862   // Negative height means invert the image.
    863   if (height < 0) {
    864     height = -height;
    865     dst_abgr = dst_abgr + (height - 1) * dst_stride_abgr;
    866     dst_stride_abgr = -dst_stride_abgr;
    867   }
    868   // Coalesce rows.
    869   if (src_stride_y == width &&
    870       src_stride_u * 2 == width &&
    871       src_stride_v * 2 == width &&
    872       dst_stride_abgr == width * 4) {
    873     width *= height;
    874     height = 1;
    875     src_stride_y = src_stride_u = src_stride_v = dst_stride_abgr = 0;
    876   }
    877 #if defined(HAS_I422TOABGRROW_NEON)
    878   if (TestCpuFlag(kCpuHasNEON)) {
    879     I422ToABGRRow = I422ToABGRRow_Any_NEON;
    880     if (IS_ALIGNED(width, 16)) {
    881       I422ToABGRRow = I422ToABGRRow_NEON;
    882     }
    883   }
    884 #elif defined(HAS_I422TOABGRROW_SSSE3)
    885   if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
    886     I422ToABGRRow = I422ToABGRRow_Any_SSSE3;
    887     if (IS_ALIGNED(width, 8)) {
    888       I422ToABGRRow = I422ToABGRRow_Unaligned_SSSE3;
    889       if (IS_ALIGNED(dst_abgr, 16) && IS_ALIGNED(dst_stride_abgr, 16)) {
    890         I422ToABGRRow = I422ToABGRRow_SSSE3;
    891       }
    892     }
    893   }
    894 #endif
    895 
    896   for (y = 0; y < height; ++y) {
    897     I422ToABGRRow(src_y, src_u, src_v, dst_abgr, width);
    898     dst_abgr += dst_stride_abgr;
    899     src_y += src_stride_y;
    900     src_u += src_stride_u;
    901     src_v += src_stride_v;
    902   }
    903   return 0;
    904 }
    905 
    906 // Convert I422 to RGBA.
    907 LIBYUV_API
    908 int I422ToRGBA(const uint8* src_y, int src_stride_y,
    909                const uint8* src_u, int src_stride_u,
    910                const uint8* src_v, int src_stride_v,
    911                uint8* dst_rgba, int dst_stride_rgba,
    912                int width, int height) {
    913   int y;
    914   void (*I422ToRGBARow)(const uint8* y_buf,
    915                         const uint8* u_buf,
    916                         const uint8* v_buf,
    917                         uint8* rgb_buf,
    918                         int width) = I422ToRGBARow_C;
    919   if (!src_y || !src_u || !src_v ||
    920       !dst_rgba ||
    921       width <= 0 || height == 0) {
    922     return -1;
    923   }
    924   // Negative height means invert the image.
    925   if (height < 0) {
    926     height = -height;
    927     dst_rgba = dst_rgba + (height - 1) * dst_stride_rgba;
    928     dst_stride_rgba = -dst_stride_rgba;
    929   }
    930   // Coalesce rows.
    931   if (src_stride_y == width &&
    932       src_stride_u * 2 == width &&
    933       src_stride_v * 2 == width &&
    934       dst_stride_rgba == width * 4) {
    935     width *= height;
    936     height = 1;
    937     src_stride_y = src_stride_u = src_stride_v = dst_stride_rgba = 0;
    938   }
    939 #if defined(HAS_I422TORGBAROW_NEON)
    940   if (TestCpuFlag(kCpuHasNEON)) {
    941     I422ToRGBARow = I422ToRGBARow_Any_NEON;
    942     if (IS_ALIGNED(width, 16)) {
    943       I422ToRGBARow = I422ToRGBARow_NEON;
    944     }
    945   }
    946 #elif defined(HAS_I422TORGBAROW_SSSE3)
    947   if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
    948     I422ToRGBARow = I422ToRGBARow_Any_SSSE3;
    949     if (IS_ALIGNED(width, 8)) {
    950       I422ToRGBARow = I422ToRGBARow_Unaligned_SSSE3;
    951       if (IS_ALIGNED(dst_rgba, 16) && IS_ALIGNED(dst_stride_rgba, 16)) {
    952         I422ToRGBARow = I422ToRGBARow_SSSE3;
    953       }
    954     }
    955   }
    956 #endif
    957 
    958   for (y = 0; y < height; ++y) {
    959     I422ToRGBARow(src_y, src_u, src_v, dst_rgba, width);
    960     dst_rgba += dst_stride_rgba;
    961     src_y += src_stride_y;
    962     src_u += src_stride_u;
    963     src_v += src_stride_v;
    964   }
    965   return 0;
    966 }
    967 
    968 // Convert NV12 to RGB565.
    969 LIBYUV_API
    970 int NV12ToRGB565(const uint8* src_y, int src_stride_y,
    971                  const uint8* src_uv, int src_stride_uv,
    972                  uint8* dst_rgb565, int dst_stride_rgb565,
    973                  int width, int height) {
    974   int y;
    975   void (*NV12ToRGB565Row)(const uint8* y_buf,
    976                           const uint8* uv_buf,
    977                           uint8* rgb_buf,
    978                           int width) = NV12ToRGB565Row_C;
    979   if (!src_y || !src_uv || !dst_rgb565 ||
    980       width <= 0 || height == 0) {
    981     return -1;
    982   }
    983   // Negative height means invert the image.
    984   if (height < 0) {
    985     height = -height;
    986     dst_rgb565 = dst_rgb565 + (height - 1) * dst_stride_rgb565;
    987     dst_stride_rgb565 = -dst_stride_rgb565;
    988   }
    989 #if defined(HAS_NV12TORGB565ROW_SSSE3)
    990   if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
    991     NV12ToRGB565Row = NV12ToRGB565Row_Any_SSSE3;
    992     if (IS_ALIGNED(width, 8)) {
    993       NV12ToRGB565Row = NV12ToRGB565Row_SSSE3;
    994     }
    995   }
    996 #elif defined(HAS_NV12TORGB565ROW_NEON)
    997   if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
    998     NV12ToRGB565Row = NV12ToRGB565Row_Any_NEON;
    999     if (IS_ALIGNED(width, 8)) {
   1000       NV12ToRGB565Row = NV12ToRGB565Row_NEON;
   1001     }
   1002   }
   1003 #endif
   1004 
   1005   for (y = 0; y < height; ++y) {
   1006     NV12ToRGB565Row(src_y, src_uv, dst_rgb565, width);
   1007     dst_rgb565 += dst_stride_rgb565;
   1008     src_y += src_stride_y;
   1009     if (y & 1) {
   1010       src_uv += src_stride_uv;
   1011     }
   1012   }
   1013   return 0;
   1014 }
   1015 
   1016 // Convert NV21 to RGB565.
   1017 LIBYUV_API
   1018 int NV21ToRGB565(const uint8* src_y, int src_stride_y,
   1019                  const uint8* src_vu, int src_stride_vu,
   1020                  uint8* dst_rgb565, int dst_stride_rgb565,
   1021                  int width, int height) {
   1022   int y;
   1023   void (*NV21ToRGB565Row)(const uint8* y_buf,
   1024                           const uint8* src_vu,
   1025                           uint8* rgb_buf,
   1026                           int width) = NV21ToRGB565Row_C;
   1027   if (!src_y || !src_vu || !dst_rgb565 ||
   1028       width <= 0 || height == 0) {
   1029     return -1;
   1030   }
   1031   // Negative height means invert the image.
   1032   if (height < 0) {
   1033     height = -height;
   1034     dst_rgb565 = dst_rgb565 + (height - 1) * dst_stride_rgb565;
   1035     dst_stride_rgb565 = -dst_stride_rgb565;
   1036   }
   1037 #if defined(HAS_NV21TORGB565ROW_SSSE3)
   1038   if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
   1039     NV21ToRGB565Row = NV21ToRGB565Row_Any_SSSE3;
   1040     if (IS_ALIGNED(width, 8)) {
   1041       NV21ToRGB565Row = NV21ToRGB565Row_SSSE3;
   1042     }
   1043   }
   1044 #elif defined(HAS_NV21TORGB565ROW_NEON)
   1045   if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
   1046     NV21ToRGB565Row = NV21ToRGB565Row_Any_NEON;
   1047     if (IS_ALIGNED(width, 8)) {
   1048       NV21ToRGB565Row = NV21ToRGB565Row_NEON;
   1049     }
   1050   }
   1051 #endif
   1052 
   1053   for (y = 0; y < height; ++y) {
   1054     NV21ToRGB565Row(src_y, src_vu, dst_rgb565, width);
   1055     dst_rgb565 += dst_stride_rgb565;
   1056     src_y += src_stride_y;
   1057     if (y & 1) {
   1058       src_vu += src_stride_vu;
   1059     }
   1060   }
   1061   return 0;
   1062 }
   1063 
   1064 LIBYUV_API
   1065 void SetPlane(uint8* dst_y, int dst_stride_y,
   1066               int width, int height,
   1067               uint32 value) {
   1068   int y;
   1069   uint32 v32 = value | (value << 8) | (value << 16) | (value << 24);
   1070   void (*SetRow)(uint8* dst, uint32 value, int pix) = SetRow_C;
   1071   // Coalesce rows.
   1072   if (dst_stride_y == width) {
   1073     width *= height;
   1074     height = 1;
   1075     dst_stride_y = 0;
   1076   }
   1077 #if defined(HAS_SETROW_NEON)
   1078   if (TestCpuFlag(kCpuHasNEON) &&
   1079       IS_ALIGNED(width, 16) &&
   1080       IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
   1081     SetRow = SetRow_NEON;
   1082   }
   1083 #endif
   1084 #if defined(HAS_SETROW_X86)
   1085   if (TestCpuFlag(kCpuHasX86) && IS_ALIGNED(width, 4)) {
   1086     SetRow = SetRow_X86;
   1087   }
   1088 #endif
   1089 
   1090   // Set plane
   1091   for (y = 0; y < height; ++y) {
   1092     SetRow(dst_y, v32, width);
   1093     dst_y += dst_stride_y;
   1094   }
   1095 }
   1096 
   1097 // Draw a rectangle into I420
   1098 LIBYUV_API
   1099 int I420Rect(uint8* dst_y, int dst_stride_y,
   1100              uint8* dst_u, int dst_stride_u,
   1101              uint8* dst_v, int dst_stride_v,
   1102              int x, int y,
   1103              int width, int height,
   1104              int value_y, int value_u, int value_v) {
   1105   int halfwidth = (width + 1) >> 1;
   1106   int halfheight = (height + 1) >> 1;
   1107   uint8* start_y = dst_y + y * dst_stride_y + x;
   1108   uint8* start_u = dst_u + (y / 2) * dst_stride_u + (x / 2);
   1109   uint8* start_v = dst_v + (y / 2) * dst_stride_v + (x / 2);
   1110   if (!dst_y || !dst_u || !dst_v ||
   1111       width <= 0 || height <= 0 ||
   1112       x < 0 || y < 0 ||
   1113       value_y < 0 || value_y > 255 ||
   1114       value_u < 0 || value_u > 255 ||
   1115       value_v < 0 || value_v > 255) {
   1116     return -1;
   1117   }
   1118 
   1119   SetPlane(start_y, dst_stride_y, width, height, value_y);
   1120   SetPlane(start_u, dst_stride_u, halfwidth, halfheight, value_u);
   1121   SetPlane(start_v, dst_stride_v, halfwidth, halfheight, value_v);
   1122   return 0;
   1123 }
   1124 
   1125 // Draw a rectangle into ARGB
   1126 LIBYUV_API
   1127 int ARGBRect(uint8* dst_argb, int dst_stride_argb,
   1128              int dst_x, int dst_y,
   1129              int width, int height,
   1130              uint32 value) {
   1131   if (!dst_argb ||
   1132       width <= 0 || height <= 0 ||
   1133       dst_x < 0 || dst_y < 0) {
   1134     return -1;
   1135   }
   1136   dst_argb += dst_y * dst_stride_argb + dst_x * 4;
   1137   // Coalesce rows.
   1138   if (dst_stride_argb == width * 4) {
   1139     width *= height;
   1140     height = 1;
   1141     dst_stride_argb = 0;
   1142   }
   1143 #if defined(HAS_SETROW_NEON)
   1144   if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 16) &&
   1145       IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
   1146     ARGBSetRows_NEON(dst_argb, value, width, dst_stride_argb, height);
   1147     return 0;
   1148   }
   1149 #endif
   1150 #if defined(HAS_SETROW_X86)
   1151   if (TestCpuFlag(kCpuHasX86)) {
   1152     ARGBSetRows_X86(dst_argb, value, width, dst_stride_argb, height);
   1153     return 0;
   1154   }
   1155 #endif
   1156   ARGBSetRows_C(dst_argb, value, width, dst_stride_argb, height);
   1157   return 0;
   1158 }
   1159 
   1160 // Convert unattentuated ARGB to preattenuated ARGB.
   1161 // An unattenutated ARGB alpha blend uses the formula
   1162 // p = a * f + (1 - a) * b
   1163 // where
   1164 //   p is output pixel
   1165 //   f is foreground pixel
   1166 //   b is background pixel
   1167 //   a is alpha value from foreground pixel
   1168 // An preattenutated ARGB alpha blend uses the formula
   1169 // p = f + (1 - a) * b
   1170 // where
   1171 //   f is foreground pixel premultiplied by alpha
   1172 
   1173 LIBYUV_API
   1174 int ARGBAttenuate(const uint8* src_argb, int src_stride_argb,
   1175                   uint8* dst_argb, int dst_stride_argb,
   1176                   int width, int height) {
   1177   int y;
   1178   void (*ARGBAttenuateRow)(const uint8* src_argb, uint8* dst_argb,
   1179                            int width) = ARGBAttenuateRow_C;
   1180   if (!src_argb || !dst_argb || width <= 0 || height == 0) {
   1181     return -1;
   1182   }
   1183   if (height < 0) {
   1184     height = -height;
   1185     src_argb = src_argb + (height - 1) * src_stride_argb;
   1186     src_stride_argb = -src_stride_argb;
   1187   }
   1188   // Coalesce rows.
   1189   if (src_stride_argb == width * 4 &&
   1190       dst_stride_argb == width * 4) {
   1191     width *= height;
   1192     height = 1;
   1193     src_stride_argb = dst_stride_argb = 0;
   1194   }
   1195 #if defined(HAS_ARGBATTENUATEROW_SSE2)
   1196   if (TestCpuFlag(kCpuHasSSE2) && width >= 4 &&
   1197       IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16) &&
   1198       IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
   1199     ARGBAttenuateRow = ARGBAttenuateRow_Any_SSE2;
   1200     if (IS_ALIGNED(width, 4)) {
   1201       ARGBAttenuateRow = ARGBAttenuateRow_SSE2;
   1202     }
   1203   }
   1204 #endif
   1205 #if defined(HAS_ARGBATTENUATEROW_SSSE3)
   1206   if (TestCpuFlag(kCpuHasSSSE3) && width >= 4) {
   1207     ARGBAttenuateRow = ARGBAttenuateRow_Any_SSSE3;
   1208     if (IS_ALIGNED(width, 4)) {
   1209       ARGBAttenuateRow = ARGBAttenuateRow_SSSE3;
   1210     }
   1211   }
   1212 #endif
   1213 #if defined(HAS_ARGBATTENUATEROW_AVX2)
   1214   if (TestCpuFlag(kCpuHasAVX2) && width >= 8) {
   1215     ARGBAttenuateRow = ARGBAttenuateRow_Any_AVX2;
   1216     if (IS_ALIGNED(width, 8)) {
   1217       ARGBAttenuateRow = ARGBAttenuateRow_AVX2;
   1218     }
   1219   }
   1220 #endif
   1221 #if defined(HAS_ARGBATTENUATEROW_NEON)
   1222   if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
   1223     ARGBAttenuateRow = ARGBAttenuateRow_Any_NEON;
   1224     if (IS_ALIGNED(width, 8)) {
   1225       ARGBAttenuateRow = ARGBAttenuateRow_NEON;
   1226     }
   1227   }
   1228 #endif
   1229 
   1230   for (y = 0; y < height; ++y) {
   1231     ARGBAttenuateRow(src_argb, dst_argb, width);
   1232     src_argb += src_stride_argb;
   1233     dst_argb += dst_stride_argb;
   1234   }
   1235   return 0;
   1236 }
   1237 
   1238 // Convert preattentuated ARGB to unattenuated ARGB.
   1239 LIBYUV_API
   1240 int ARGBUnattenuate(const uint8* src_argb, int src_stride_argb,
   1241                     uint8* dst_argb, int dst_stride_argb,
   1242                     int width, int height) {
   1243   int y;
   1244   void (*ARGBUnattenuateRow)(const uint8* src_argb, uint8* dst_argb,
   1245                              int width) = ARGBUnattenuateRow_C;
   1246   if (!src_argb || !dst_argb || width <= 0 || height == 0) {
   1247     return -1;
   1248   }
   1249   if (height < 0) {
   1250     height = -height;
   1251     src_argb = src_argb + (height - 1) * src_stride_argb;
   1252     src_stride_argb = -src_stride_argb;
   1253   }
   1254   // Coalesce rows.
   1255   if (src_stride_argb == width * 4 &&
   1256       dst_stride_argb == width * 4) {
   1257     width *= height;
   1258     height = 1;
   1259     src_stride_argb = dst_stride_argb = 0;
   1260   }
   1261 #if defined(HAS_ARGBUNATTENUATEROW_SSE2)
   1262   if (TestCpuFlag(kCpuHasSSE2) && width >= 4) {
   1263     ARGBUnattenuateRow = ARGBUnattenuateRow_Any_SSE2;
   1264     if (IS_ALIGNED(width, 4)) {
   1265       ARGBUnattenuateRow = ARGBUnattenuateRow_SSE2;
   1266     }
   1267   }
   1268 #endif
   1269 #if defined(HAS_ARGBUNATTENUATEROW_AVX2)
   1270   if (TestCpuFlag(kCpuHasAVX2) && width >= 8) {
   1271     ARGBUnattenuateRow = ARGBUnattenuateRow_Any_AVX2;
   1272     if (IS_ALIGNED(width, 8)) {
   1273       ARGBUnattenuateRow = ARGBUnattenuateRow_AVX2;
   1274     }
   1275   }
   1276 #endif
   1277 // TODO(fbarchard): Neon version.
   1278 
   1279   for (y = 0; y < height; ++y) {
   1280     ARGBUnattenuateRow(src_argb, dst_argb, width);
   1281     src_argb += src_stride_argb;
   1282     dst_argb += dst_stride_argb;
   1283   }
   1284   return 0;
   1285 }
   1286 
   1287 // Convert ARGB to Grayed ARGB.
   1288 LIBYUV_API
   1289 int ARGBGrayTo(const uint8* src_argb, int src_stride_argb,
   1290                uint8* dst_argb, int dst_stride_argb,
   1291                int width, int height) {
   1292   int y;
   1293   void (*ARGBGrayRow)(const uint8* src_argb, uint8* dst_argb,
   1294                       int width) = ARGBGrayRow_C;
   1295   if (!src_argb || !dst_argb || width <= 0 || height == 0) {
   1296     return -1;
   1297   }
   1298   if (height < 0) {
   1299     height = -height;
   1300     src_argb = src_argb + (height - 1) * src_stride_argb;
   1301     src_stride_argb = -src_stride_argb;
   1302   }
   1303   // Coalesce rows.
   1304   if (src_stride_argb == width * 4 &&
   1305       dst_stride_argb == width * 4) {
   1306     width *= height;
   1307     height = 1;
   1308     src_stride_argb = dst_stride_argb = 0;
   1309   }
   1310 #if defined(HAS_ARGBGRAYROW_SSSE3)
   1311   if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 8) &&
   1312       IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16) &&
   1313       IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
   1314     ARGBGrayRow = ARGBGrayRow_SSSE3;
   1315   }
   1316 #elif defined(HAS_ARGBGRAYROW_NEON)
   1317   if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) {
   1318     ARGBGrayRow = ARGBGrayRow_NEON;
   1319   }
   1320 #endif
   1321 
   1322   for (y = 0; y < height; ++y) {
   1323     ARGBGrayRow(src_argb, dst_argb, width);
   1324     src_argb += src_stride_argb;
   1325     dst_argb += dst_stride_argb;
   1326   }
   1327   return 0;
   1328 }
   1329 
   1330 // Make a rectangle of ARGB gray scale.
   1331 LIBYUV_API
   1332 int ARGBGray(uint8* dst_argb, int dst_stride_argb,
   1333              int dst_x, int dst_y,
   1334              int width, int height) {
   1335   int y;
   1336   void (*ARGBGrayRow)(const uint8* src_argb, uint8* dst_argb,
   1337                       int width) = ARGBGrayRow_C;
   1338   uint8* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4;
   1339   if (!dst_argb || width <= 0 || height <= 0 || dst_x < 0 || dst_y < 0) {
   1340     return -1;
   1341   }
   1342   // Coalesce rows.
   1343   if (dst_stride_argb == width * 4) {
   1344     width *= height;
   1345     height = 1;
   1346     dst_stride_argb = 0;
   1347   }
   1348 #if defined(HAS_ARGBGRAYROW_SSSE3)
   1349   if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 8) &&
   1350       IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
   1351     ARGBGrayRow = ARGBGrayRow_SSSE3;
   1352   }
   1353 #elif defined(HAS_ARGBGRAYROW_NEON)
   1354   if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) {
   1355     ARGBGrayRow = ARGBGrayRow_NEON;
   1356   }
   1357 #endif
   1358   for (y = 0; y < height; ++y) {
   1359     ARGBGrayRow(dst, dst, width);
   1360     dst += dst_stride_argb;
   1361   }
   1362   return 0;
   1363 }
   1364 
   1365 // Make a rectangle of ARGB Sepia tone.
   1366 LIBYUV_API
   1367 int ARGBSepia(uint8* dst_argb, int dst_stride_argb,
   1368               int dst_x, int dst_y, int width, int height) {
   1369   int y;
   1370   void (*ARGBSepiaRow)(uint8* dst_argb, int width) = ARGBSepiaRow_C;
   1371   uint8* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4;
   1372   if (!dst_argb || width <= 0 || height <= 0 || dst_x < 0 || dst_y < 0) {
   1373     return -1;
   1374   }
   1375   // Coalesce rows.
   1376   if (dst_stride_argb == width * 4) {
   1377     width *= height;
   1378     height = 1;
   1379     dst_stride_argb = 0;
   1380   }
   1381 #if defined(HAS_ARGBSEPIAROW_SSSE3)
   1382   if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 8) &&
   1383       IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
   1384     ARGBSepiaRow = ARGBSepiaRow_SSSE3;
   1385   }
   1386 #elif defined(HAS_ARGBSEPIAROW_NEON)
   1387   if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) {
   1388     ARGBSepiaRow = ARGBSepiaRow_NEON;
   1389   }
   1390 #endif
   1391   for (y = 0; y < height; ++y) {
   1392     ARGBSepiaRow(dst, width);
   1393     dst += dst_stride_argb;
   1394   }
   1395   return 0;
   1396 }
   1397 
   1398 // Apply a 4x4 matrix to each ARGB pixel.
   1399 // Note: Normally for shading, but can be used to swizzle or invert.
   1400 LIBYUV_API
   1401 int ARGBColorMatrix(const uint8* src_argb, int src_stride_argb,
   1402                     uint8* dst_argb, int dst_stride_argb,
   1403                     const int8* matrix_argb,
   1404                     int width, int height) {
   1405   int y;
   1406   void (*ARGBColorMatrixRow)(const uint8* src_argb, uint8* dst_argb,
   1407       const int8* matrix_argb, int width) = ARGBColorMatrixRow_C;
   1408   if (!src_argb || !dst_argb || !matrix_argb || width <= 0 || height == 0) {
   1409     return -1;
   1410   }
   1411   if (height < 0) {
   1412     height = -height;
   1413     src_argb = src_argb + (height - 1) * src_stride_argb;
   1414     src_stride_argb = -src_stride_argb;
   1415   }
   1416   // Coalesce rows.
   1417   if (src_stride_argb == width * 4 &&
   1418       dst_stride_argb == width * 4) {
   1419     width *= height;
   1420     height = 1;
   1421     src_stride_argb = dst_stride_argb = 0;
   1422   }
   1423 #if defined(HAS_ARGBCOLORMATRIXROW_SSSE3)
   1424   if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 8) &&
   1425       IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
   1426     ARGBColorMatrixRow = ARGBColorMatrixRow_SSSE3;
   1427   }
   1428 #elif defined(HAS_ARGBCOLORMATRIXROW_NEON)
   1429   if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) {
   1430     ARGBColorMatrixRow = ARGBColorMatrixRow_NEON;
   1431   }
   1432 #endif
   1433   for (y = 0; y < height; ++y) {
   1434     ARGBColorMatrixRow(src_argb, dst_argb, matrix_argb, width);
   1435     src_argb += src_stride_argb;
   1436     dst_argb += dst_stride_argb;
   1437   }
   1438   return 0;
   1439 }
   1440 
   1441 // Apply a 4x3 matrix to each ARGB pixel.
   1442 // Deprecated.
   1443 LIBYUV_API
   1444 int RGBColorMatrix(uint8* dst_argb, int dst_stride_argb,
   1445                    const int8* matrix_rgb,
   1446                    int dst_x, int dst_y, int width, int height) {
   1447   SIMD_ALIGNED(int8 matrix_argb[16]);
   1448   uint8* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4;
   1449   if (!dst_argb || !matrix_rgb || width <= 0 || height <= 0 ||
   1450       dst_x < 0 || dst_y < 0) {
   1451     return -1;
   1452   }
   1453 
   1454   // Convert 4x3 7 bit matrix to 4x4 6 bit matrix.
   1455   matrix_argb[0] = matrix_rgb[0] / 2;
   1456   matrix_argb[1] = matrix_rgb[1] / 2;
   1457   matrix_argb[2] = matrix_rgb[2] / 2;
   1458   matrix_argb[3] = matrix_rgb[3] / 2;
   1459   matrix_argb[4] = matrix_rgb[4] / 2;
   1460   matrix_argb[5] = matrix_rgb[5] / 2;
   1461   matrix_argb[6] = matrix_rgb[6] / 2;
   1462   matrix_argb[7] = matrix_rgb[7] / 2;
   1463   matrix_argb[8] = matrix_rgb[8] / 2;
   1464   matrix_argb[9] = matrix_rgb[9] / 2;
   1465   matrix_argb[10] = matrix_rgb[10] / 2;
   1466   matrix_argb[11] = matrix_rgb[11] / 2;
   1467   matrix_argb[14] = matrix_argb[13] = matrix_argb[12] = 0;
   1468   matrix_argb[15] = 64;  // 1.0
   1469 
   1470   return ARGBColorMatrix((const uint8*)(dst), dst_stride_argb,
   1471                          dst, dst_stride_argb,
   1472                          &matrix_argb[0], width, height);
   1473 }
   1474 
   1475 // Apply a color table each ARGB pixel.
   1476 // Table contains 256 ARGB values.
   1477 LIBYUV_API
   1478 int ARGBColorTable(uint8* dst_argb, int dst_stride_argb,
   1479                    const uint8* table_argb,
   1480                    int dst_x, int dst_y, int width, int height) {
   1481   int y;
   1482   void (*ARGBColorTableRow)(uint8* dst_argb, const uint8* table_argb,
   1483                             int width) = ARGBColorTableRow_C;
   1484   uint8* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4;
   1485   if (!dst_argb || !table_argb || width <= 0 || height <= 0 ||
   1486       dst_x < 0 || dst_y < 0) {
   1487     return -1;
   1488   }
   1489   // Coalesce rows.
   1490   if (dst_stride_argb == width * 4) {
   1491     width *= height;
   1492     height = 1;
   1493     dst_stride_argb = 0;
   1494   }
   1495 #if defined(HAS_ARGBCOLORTABLEROW_X86)
   1496   if (TestCpuFlag(kCpuHasX86)) {
   1497     ARGBColorTableRow = ARGBColorTableRow_X86;
   1498   }
   1499 #endif
   1500   for (y = 0; y < height; ++y) {
   1501     ARGBColorTableRow(dst, table_argb, width);
   1502     dst += dst_stride_argb;
   1503   }
   1504   return 0;
   1505 }
   1506 
   1507 // Apply a color table each ARGB pixel but preserve destination alpha.
   1508 // Table contains 256 ARGB values.
   1509 LIBYUV_API
   1510 int RGBColorTable(uint8* dst_argb, int dst_stride_argb,
   1511                   const uint8* table_argb,
   1512                   int dst_x, int dst_y, int width, int height) {
   1513   int y;
   1514   void (*RGBColorTableRow)(uint8* dst_argb, const uint8* table_argb,
   1515                            int width) = RGBColorTableRow_C;
   1516   uint8* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4;
   1517   if (!dst_argb || !table_argb || width <= 0 || height <= 0 ||
   1518       dst_x < 0 || dst_y < 0) {
   1519     return -1;
   1520   }
   1521   // Coalesce rows.
   1522   if (dst_stride_argb == width * 4) {
   1523     width *= height;
   1524     height = 1;
   1525     dst_stride_argb = 0;
   1526   }
   1527 #if defined(HAS_RGBCOLORTABLEROW_X86)
   1528   if (TestCpuFlag(kCpuHasX86)) {
   1529     RGBColorTableRow = RGBColorTableRow_X86;
   1530   }
   1531 #endif
   1532   for (y = 0; y < height; ++y) {
   1533     RGBColorTableRow(dst, table_argb, width);
   1534     dst += dst_stride_argb;
   1535   }
   1536   return 0;
   1537 }
   1538 
   1539 // ARGBQuantize is used to posterize art.
   1540 // e.g. rgb / qvalue * qvalue + qvalue / 2
   1541 // But the low levels implement efficiently with 3 parameters, and could be
   1542 // used for other high level operations.
   1543 // dst_argb[0] = (b * scale >> 16) * interval_size + interval_offset;
   1544 // where scale is 1 / interval_size as a fixed point value.
   1545 // The divide is replaces with a multiply by reciprocal fixed point multiply.
   1546 // Caveat - although SSE2 saturates, the C function does not and should be used
   1547 // with care if doing anything but quantization.
   1548 LIBYUV_API
   1549 int ARGBQuantize(uint8* dst_argb, int dst_stride_argb,
   1550                  int scale, int interval_size, int interval_offset,
   1551                  int dst_x, int dst_y, int width, int height) {
   1552   int y;
   1553   void (*ARGBQuantizeRow)(uint8* dst_argb, int scale, int interval_size,
   1554                           int interval_offset, int width) = ARGBQuantizeRow_C;
   1555   uint8* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4;
   1556   if (!dst_argb || width <= 0 || height <= 0 || dst_x < 0 || dst_y < 0 ||
   1557       interval_size < 1 || interval_size > 255) {
   1558     return -1;
   1559   }
   1560   // Coalesce rows.
   1561   if (dst_stride_argb == width * 4) {
   1562     width *= height;
   1563     height = 1;
   1564     dst_stride_argb = 0;
   1565   }
   1566 #if defined(HAS_ARGBQUANTIZEROW_SSE2)
   1567   if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 4) &&
   1568       IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
   1569     ARGBQuantizeRow = ARGBQuantizeRow_SSE2;
   1570   }
   1571 #elif defined(HAS_ARGBQUANTIZEROW_NEON)
   1572   if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) {
   1573     ARGBQuantizeRow = ARGBQuantizeRow_NEON;
   1574   }
   1575 #endif
   1576   for (y = 0; y < height; ++y) {
   1577     ARGBQuantizeRow(dst, scale, interval_size, interval_offset, width);
   1578     dst += dst_stride_argb;
   1579   }
   1580   return 0;
   1581 }
   1582 
   1583 // Computes table of cumulative sum for image where the value is the sum
   1584 // of all values above and to the left of the entry. Used by ARGBBlur.
   1585 LIBYUV_API
   1586 int ARGBComputeCumulativeSum(const uint8* src_argb, int src_stride_argb,
   1587                              int32* dst_cumsum, int dst_stride32_cumsum,
   1588                              int width, int height) {
   1589   int y;
   1590   void (*ComputeCumulativeSumRow)(const uint8* row, int32* cumsum,
   1591       const int32* previous_cumsum, int width) = ComputeCumulativeSumRow_C;
   1592   int32* previous_cumsum = dst_cumsum;
   1593   if (!dst_cumsum || !src_argb || width <= 0 || height <= 0) {
   1594     return -1;
   1595   }
   1596 #if defined(HAS_CUMULATIVESUMTOAVERAGEROW_SSE2)
   1597   if (TestCpuFlag(kCpuHasSSE2)) {
   1598     ComputeCumulativeSumRow = ComputeCumulativeSumRow_SSE2;
   1599   }
   1600 #endif
   1601   memset(dst_cumsum, 0, width * sizeof(dst_cumsum[0]) * 4);  // 4 int per pixel.
   1602   for (y = 0; y < height; ++y) {
   1603     ComputeCumulativeSumRow(src_argb, dst_cumsum, previous_cumsum, width);
   1604     previous_cumsum = dst_cumsum;
   1605     dst_cumsum += dst_stride32_cumsum;
   1606     src_argb += src_stride_argb;
   1607   }
   1608   return 0;
   1609 }
   1610 
   1611 // Blur ARGB image.
   1612 // Caller should allocate CumulativeSum table of width * height * 16 bytes
   1613 // aligned to 16 byte boundary. height can be radius * 2 + 2 to save memory
   1614 // as the buffer is treated as circular.
   1615 LIBYUV_API
   1616 int ARGBBlur(const uint8* src_argb, int src_stride_argb,
   1617              uint8* dst_argb, int dst_stride_argb,
   1618              int32* dst_cumsum, int dst_stride32_cumsum,
   1619              int width, int height, int radius) {
   1620   int y;
   1621   void (*ComputeCumulativeSumRow)(const uint8 *row, int32 *cumsum,
   1622       const int32* previous_cumsum, int width) = ComputeCumulativeSumRow_C;
   1623   void (*CumulativeSumToAverageRow)(const int32* topleft, const int32* botleft,
   1624       int width, int area, uint8* dst, int count) = CumulativeSumToAverageRow_C;
   1625   int32* cumsum_bot_row;
   1626   int32* max_cumsum_bot_row;
   1627   int32* cumsum_top_row;
   1628 
   1629   if (!src_argb || !dst_argb || width <= 0 || height == 0) {
   1630     return -1;
   1631   }
   1632   if (height < 0) {
   1633     height = -height;
   1634     src_argb = src_argb + (height - 1) * src_stride_argb;
   1635     src_stride_argb = -src_stride_argb;
   1636   }
   1637   if (radius > height) {
   1638     radius = height;
   1639   }
   1640   if (radius > (width / 2 - 1)) {
   1641     radius = width / 2 - 1;
   1642   }
   1643   if (radius <= 0) {
   1644     return -1;
   1645   }
   1646 #if defined(HAS_CUMULATIVESUMTOAVERAGEROW_SSE2)
   1647   if (TestCpuFlag(kCpuHasSSE2)) {
   1648     ComputeCumulativeSumRow = ComputeCumulativeSumRow_SSE2;
   1649     CumulativeSumToAverageRow = CumulativeSumToAverageRow_SSE2;
   1650   }
   1651 #endif
   1652   // Compute enough CumulativeSum for first row to be blurred. After this
   1653   // one row of CumulativeSum is updated at a time.
   1654   ARGBComputeCumulativeSum(src_argb, src_stride_argb,
   1655                            dst_cumsum, dst_stride32_cumsum,
   1656                            width, radius);
   1657 
   1658   src_argb = src_argb + radius * src_stride_argb;
   1659   cumsum_bot_row = &dst_cumsum[(radius - 1) * dst_stride32_cumsum];
   1660 
   1661   max_cumsum_bot_row = &dst_cumsum[(radius * 2 + 2) * dst_stride32_cumsum];
   1662   cumsum_top_row = &dst_cumsum[0];
   1663 
   1664   for (y = 0; y < height; ++y) {
   1665     int top_y = ((y - radius - 1) >= 0) ? (y - radius - 1) : 0;
   1666     int bot_y = ((y + radius) < height) ? (y + radius) : (height - 1);
   1667     int area = radius * (bot_y - top_y);
   1668     int boxwidth = radius * 4;
   1669     int x;
   1670     int n;
   1671 
   1672     // Increment cumsum_top_row pointer with circular buffer wrap around.
   1673     if (top_y) {
   1674       cumsum_top_row += dst_stride32_cumsum;
   1675       if (cumsum_top_row >= max_cumsum_bot_row) {
   1676         cumsum_top_row = dst_cumsum;
   1677       }
   1678     }
   1679     // Increment cumsum_bot_row pointer with circular buffer wrap around and
   1680     // then fill in a row of CumulativeSum.
   1681     if ((y + radius) < height) {
   1682       const int32* prev_cumsum_bot_row = cumsum_bot_row;
   1683       cumsum_bot_row += dst_stride32_cumsum;
   1684       if (cumsum_bot_row >= max_cumsum_bot_row) {
   1685         cumsum_bot_row = dst_cumsum;
   1686       }
   1687       ComputeCumulativeSumRow(src_argb, cumsum_bot_row, prev_cumsum_bot_row,
   1688                               width);
   1689       src_argb += src_stride_argb;
   1690     }
   1691 
   1692     // Left clipped.
   1693     for (x = 0; x < radius + 1; ++x) {
   1694       CumulativeSumToAverageRow(cumsum_top_row, cumsum_bot_row,
   1695                                 boxwidth, area, &dst_argb[x * 4], 1);
   1696       area += (bot_y - top_y);
   1697       boxwidth += 4;
   1698     }
   1699 
   1700     // Middle unclipped.
   1701     n = (width - 1) - radius - x + 1;
   1702     CumulativeSumToAverageRow(cumsum_top_row, cumsum_bot_row,
   1703                               boxwidth, area, &dst_argb[x * 4], n);
   1704 
   1705     // Right clipped.
   1706     for (x += n; x <= width - 1; ++x) {
   1707       area -= (bot_y - top_y);
   1708       boxwidth -= 4;
   1709       CumulativeSumToAverageRow(cumsum_top_row + (x - radius - 1) * 4,
   1710                                 cumsum_bot_row + (x - radius - 1) * 4,
   1711                                 boxwidth, area, &dst_argb[x * 4], 1);
   1712     }
   1713     dst_argb += dst_stride_argb;
   1714   }
   1715   return 0;
   1716 }
   1717 
   1718 // Multiply ARGB image by a specified ARGB value.
   1719 LIBYUV_API
   1720 int ARGBShade(const uint8* src_argb, int src_stride_argb,
   1721               uint8* dst_argb, int dst_stride_argb,
   1722               int width, int height, uint32 value) {
   1723   int y;
   1724   void (*ARGBShadeRow)(const uint8* src_argb, uint8* dst_argb,
   1725                        int width, uint32 value) = ARGBShadeRow_C;
   1726   if (!src_argb || !dst_argb || width <= 0 || height == 0 || value == 0u) {
   1727     return -1;
   1728   }
   1729   if (height < 0) {
   1730     height = -height;
   1731     src_argb = src_argb + (height - 1) * src_stride_argb;
   1732     src_stride_argb = -src_stride_argb;
   1733   }
   1734   // Coalesce rows.
   1735   if (src_stride_argb == width * 4 &&
   1736       dst_stride_argb == width * 4) {
   1737     width *= height;
   1738     height = 1;
   1739     src_stride_argb = dst_stride_argb = 0;
   1740   }
   1741 #if defined(HAS_ARGBSHADEROW_SSE2)
   1742   if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 4) &&
   1743       IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16) &&
   1744       IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
   1745     ARGBShadeRow = ARGBShadeRow_SSE2;
   1746   }
   1747 #elif defined(HAS_ARGBSHADEROW_NEON)
   1748   if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) {
   1749     ARGBShadeRow = ARGBShadeRow_NEON;
   1750   }
   1751 #endif
   1752 
   1753   for (y = 0; y < height; ++y) {
   1754     ARGBShadeRow(src_argb, dst_argb, width, value);
   1755     src_argb += src_stride_argb;
   1756     dst_argb += dst_stride_argb;
   1757   }
   1758   return 0;
   1759 }
   1760 
   1761 // Interpolate 2 ARGB images by specified amount (0 to 255).
   1762 LIBYUV_API
   1763 int ARGBInterpolate(const uint8* src_argb0, int src_stride_argb0,
   1764                     const uint8* src_argb1, int src_stride_argb1,
   1765                     uint8* dst_argb, int dst_stride_argb,
   1766                     int width, int height, int interpolation) {
   1767   int y;
   1768   void (*InterpolateRow)(uint8* dst_ptr, const uint8* src_ptr,
   1769                          ptrdiff_t src_stride, int dst_width,
   1770                          int source_y_fraction) = InterpolateRow_C;
   1771   if (!src_argb0 || !src_argb1 || !dst_argb || width <= 0 || height == 0) {
   1772     return -1;
   1773   }
   1774   // Negative height means invert the image.
   1775   if (height < 0) {
   1776     height = -height;
   1777     dst_argb = dst_argb + (height - 1) * dst_stride_argb;
   1778     dst_stride_argb = -dst_stride_argb;
   1779   }
   1780   // Coalesce rows.
   1781   if (src_stride_argb0 == width * 4 &&
   1782       src_stride_argb1 == width * 4 &&
   1783       dst_stride_argb == width * 4) {
   1784     width *= height;
   1785     height = 1;
   1786     src_stride_argb0 = src_stride_argb1 = dst_stride_argb = 0;
   1787   }
   1788 #if defined(HAS_INTERPOLATEROW_SSE2)
   1789   if (TestCpuFlag(kCpuHasSSE2) && width >= 4) {
   1790     InterpolateRow = InterpolateRow_Any_SSE2;
   1791     if (IS_ALIGNED(width, 4)) {
   1792       InterpolateRow = InterpolateRow_Unaligned_SSE2;
   1793       if (IS_ALIGNED(src_argb0, 16) && IS_ALIGNED(src_stride_argb0, 16) &&
   1794           IS_ALIGNED(src_argb1, 16) && IS_ALIGNED(src_stride_argb1, 16) &&
   1795           IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
   1796         InterpolateRow = InterpolateRow_SSE2;
   1797       }
   1798     }
   1799   }
   1800 #endif
   1801 #if defined(HAS_INTERPOLATEROW_SSSE3)
   1802   if (TestCpuFlag(kCpuHasSSSE3) && width >= 4) {
   1803     InterpolateRow = InterpolateRow_Any_SSSE3;
   1804     if (IS_ALIGNED(width, 4)) {
   1805       InterpolateRow = InterpolateRow_Unaligned_SSSE3;
   1806       if (IS_ALIGNED(src_argb0, 16) && IS_ALIGNED(src_stride_argb0, 16) &&
   1807           IS_ALIGNED(src_argb1, 16) && IS_ALIGNED(src_stride_argb1, 16) &&
   1808           IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
   1809         InterpolateRow = InterpolateRow_SSSE3;
   1810       }
   1811     }
   1812   }
   1813 #endif
   1814 #if defined(HAS_INTERPOLATEROW_AVX2)
   1815   if (TestCpuFlag(kCpuHasAVX2) && width >= 8) {
   1816     InterpolateRow = InterpolateRow_Any_AVX2;
   1817     if (IS_ALIGNED(width, 8)) {
   1818       InterpolateRow = InterpolateRow_AVX2;
   1819     }
   1820   }
   1821 #endif
   1822 #if defined(HAS_INTERPOLATEROW_NEON)
   1823   if (TestCpuFlag(kCpuHasNEON) && width >= 4) {
   1824     InterpolateRow = InterpolateRow_Any_NEON;
   1825     if (IS_ALIGNED(width, 4)) {
   1826       InterpolateRow = InterpolateRow_NEON;
   1827     }
   1828   }
   1829 #endif
   1830 #if defined(HAS_INTERPOLATEROWS_MIPS_DSPR2)
   1831   if (TestCpuFlag(kCpuHasMIPS_DSPR2) && width >= 1 &&
   1832       IS_ALIGNED(src_argb0, 4) && IS_ALIGNED(src_stride_argb0, 4) &&
   1833       IS_ALIGNED(src_argb1, 4) && IS_ALIGNED(src_stride_argb1, 4) &&
   1834       IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride_argb, 4)) {
   1835     ScaleARGBFilterRows = InterpolateRow_MIPS_DSPR2;
   1836   }
   1837 #endif
   1838 
   1839   for (y = 0; y < height; ++y) {
   1840     InterpolateRow(dst_argb, src_argb0, src_argb1 - src_argb0,
   1841                    width * 4, interpolation);
   1842     src_argb0 += src_stride_argb0;
   1843     src_argb1 += src_stride_argb1;
   1844     dst_argb += dst_stride_argb;
   1845   }
   1846   return 0;
   1847 }
   1848 
   1849 // Shuffle ARGB channel order.  e.g. BGRA to ARGB.
   1850 LIBYUV_API
   1851 int ARGBShuffle(const uint8* src_bgra, int src_stride_bgra,
   1852                 uint8* dst_argb, int dst_stride_argb,
   1853                 const uint8* shuffler, int width, int height) {
   1854   int y;
   1855   void (*ARGBShuffleRow)(const uint8* src_bgra, uint8* dst_argb,
   1856                          const uint8* shuffler, int pix) = ARGBShuffleRow_C;
   1857   if (!src_bgra || !dst_argb ||
   1858       width <= 0 || height == 0) {
   1859     return -1;
   1860   }
   1861   // Negative height means invert the image.
   1862   if (height < 0) {
   1863     height = -height;
   1864     src_bgra = src_bgra + (height - 1) * src_stride_bgra;
   1865     src_stride_bgra = -src_stride_bgra;
   1866   }
   1867   // Coalesce rows.
   1868   if (src_stride_bgra == width * 4 &&
   1869       dst_stride_argb == width * 4) {
   1870     width *= height;
   1871     height = 1;
   1872     src_stride_bgra = dst_stride_argb = 0;
   1873   }
   1874 #if defined(HAS_ARGBSHUFFLEROW_SSE2)
   1875   if (TestCpuFlag(kCpuHasSSE2) && width >= 4) {
   1876     ARGBShuffleRow = ARGBShuffleRow_Any_SSE2;
   1877     if (IS_ALIGNED(width, 4)) {
   1878       ARGBShuffleRow = ARGBShuffleRow_SSE2;
   1879     }
   1880   }
   1881 #endif
   1882 #if defined(HAS_ARGBSHUFFLEROW_SSSE3)
   1883   if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
   1884     ARGBShuffleRow = ARGBShuffleRow_Any_SSSE3;
   1885     if (IS_ALIGNED(width, 8)) {
   1886       ARGBShuffleRow = ARGBShuffleRow_Unaligned_SSSE3;
   1887       if (IS_ALIGNED(src_bgra, 16) && IS_ALIGNED(src_stride_bgra, 16) &&
   1888           IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
   1889         ARGBShuffleRow = ARGBShuffleRow_SSSE3;
   1890       }
   1891     }
   1892   }
   1893 #endif
   1894 #if defined(HAS_ARGBSHUFFLEROW_AVX2)
   1895   if (TestCpuFlag(kCpuHasAVX2) && width >= 16) {
   1896     ARGBShuffleRow = ARGBShuffleRow_Any_AVX2;
   1897     if (IS_ALIGNED(width, 16)) {
   1898       ARGBShuffleRow = ARGBShuffleRow_AVX2;
   1899     }
   1900   }
   1901 #endif
   1902 #if defined(HAS_ARGBSHUFFLEROW_NEON)
   1903   if (TestCpuFlag(kCpuHasNEON) && width >= 4) {
   1904     ARGBShuffleRow = ARGBShuffleRow_Any_NEON;
   1905     if (IS_ALIGNED(width, 4)) {
   1906       ARGBShuffleRow = ARGBShuffleRow_NEON;
   1907     }
   1908   }
   1909 #endif
   1910 
   1911   for (y = 0; y < height; ++y) {
   1912     ARGBShuffleRow(src_bgra, dst_argb, shuffler, width);
   1913     src_bgra += src_stride_bgra;
   1914     dst_argb += dst_stride_argb;
   1915   }
   1916   return 0;
   1917 }
   1918 
   1919 // Sobel ARGB effect.
   1920 static int ARGBSobelize(const uint8* src_argb, int src_stride_argb,
   1921                         uint8* dst_argb, int dst_stride_argb,
   1922                         int width, int height,
   1923                         void (*SobelRow)(const uint8* src_sobelx,
   1924                                          const uint8* src_sobely,
   1925                                          uint8* dst, int width)) {
   1926   int y;
   1927   void (*ARGBToBayerRow)(const uint8* src_argb, uint8* dst_bayer,
   1928                          uint32 selector, int pix) = ARGBToBayerGGRow_C;
   1929   void (*SobelYRow)(const uint8* src_y0, const uint8* src_y1,
   1930                     uint8* dst_sobely, int width) = SobelYRow_C;
   1931   void (*SobelXRow)(const uint8* src_y0, const uint8* src_y1,
   1932                     const uint8* src_y2, uint8* dst_sobely, int width) =
   1933       SobelXRow_C;
   1934   const int kEdge = 16;  // Extra pixels at start of row for extrude/align.
   1935   if (!src_argb  || !dst_argb || width <= 0 || height == 0) {
   1936     return -1;
   1937   }
   1938   // Negative height means invert the image.
   1939   if (height < 0) {
   1940     height = -height;
   1941     src_argb  = src_argb  + (height - 1) * src_stride_argb;
   1942     src_stride_argb = -src_stride_argb;
   1943   }
   1944   // ARGBToBayer used to select G channel from ARGB.
   1945 #if defined(HAS_ARGBTOBAYERGGROW_SSE2)
   1946   if (TestCpuFlag(kCpuHasSSE2) && width >= 8 &&
   1947       IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16)) {
   1948     ARGBToBayerRow = ARGBToBayerGGRow_Any_SSE2;
   1949     if (IS_ALIGNED(width, 8)) {
   1950       ARGBToBayerRow = ARGBToBayerGGRow_SSE2;
   1951     }
   1952   }
   1953 #endif
   1954 #if defined(HAS_ARGBTOBAYERROW_SSSE3)
   1955   if (TestCpuFlag(kCpuHasSSSE3) && width >= 8 &&
   1956       IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16)) {
   1957     ARGBToBayerRow = ARGBToBayerRow_Any_SSSE3;
   1958     if (IS_ALIGNED(width, 8)) {
   1959       ARGBToBayerRow = ARGBToBayerRow_SSSE3;
   1960     }
   1961   }
   1962 #endif
   1963 #if defined(HAS_ARGBTOBAYERGGROW_NEON)
   1964   if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
   1965     ARGBToBayerRow = ARGBToBayerGGRow_Any_NEON;
   1966     if (IS_ALIGNED(width, 8)) {
   1967       ARGBToBayerRow = ARGBToBayerGGRow_NEON;
   1968     }
   1969   }
   1970 #endif
   1971 #if defined(HAS_SOBELYROW_SSE2)
   1972   if (TestCpuFlag(kCpuHasSSE2)) {
   1973     SobelYRow = SobelYRow_SSE2;
   1974   }
   1975 #endif
   1976 #if defined(HAS_SOBELYROW_NEON)
   1977   if (TestCpuFlag(kCpuHasNEON)) {
   1978     SobelYRow = SobelYRow_NEON;
   1979   }
   1980 #endif
   1981 #if defined(HAS_SOBELXROW_SSE2)
   1982   if (TestCpuFlag(kCpuHasSSE2)) {
   1983     SobelXRow = SobelXRow_SSE2;
   1984   }
   1985 #endif
   1986 #if defined(HAS_SOBELXROW_NEON)
   1987   if (TestCpuFlag(kCpuHasNEON)) {
   1988     SobelXRow = SobelXRow_NEON;
   1989   }
   1990 #endif
   1991   {
   1992     // 3 rows with edges before/after.
   1993     const int kRowSize = (width + kEdge + 15) & ~15;
   1994     align_buffer_64(rows, kRowSize * 2 + (kEdge + kRowSize * 3 + kEdge));
   1995     uint8* row_sobelx = rows;
   1996     uint8* row_sobely = rows + kRowSize;
   1997     uint8* row_y = rows + kRowSize * 2;
   1998 
   1999     // Convert first row.
   2000     uint8* row_y0 = row_y + kEdge;
   2001     uint8* row_y1 = row_y0 + kRowSize;
   2002     uint8* row_y2 = row_y1 + kRowSize;
   2003     ARGBToBayerRow(src_argb, row_y0, 0x0d090501, width);
   2004     row_y0[-1] = row_y0[0];
   2005     memset(row_y0 + width, row_y0[width - 1], 16);  // Extrude 16 for valgrind.
   2006     ARGBToBayerRow(src_argb, row_y1, 0x0d090501, width);
   2007     row_y1[-1] = row_y1[0];
   2008     memset(row_y1 + width, row_y1[width - 1], 16);
   2009     memset(row_y2 + width, 0, 16);
   2010 
   2011     for (y = 0; y < height; ++y) {
   2012       // Convert next row of ARGB to Y.
   2013       if (y < (height - 1)) {
   2014         src_argb += src_stride_argb;
   2015       }
   2016       ARGBToBayerRow(src_argb, row_y2, 0x0d090501, width);
   2017       row_y2[-1] = row_y2[0];
   2018       row_y2[width] = row_y2[width - 1];
   2019 
   2020       SobelXRow(row_y0 - 1, row_y1 - 1, row_y2 - 1, row_sobelx, width);
   2021       SobelYRow(row_y0 - 1, row_y2 - 1, row_sobely, width);
   2022       SobelRow(row_sobelx, row_sobely, dst_argb, width);
   2023 
   2024       // Cycle thru circular queue of 3 row_y buffers.
   2025       {
   2026         uint8* row_yt = row_y0;
   2027         row_y0 = row_y1;
   2028         row_y1 = row_y2;
   2029         row_y2 = row_yt;
   2030       }
   2031 
   2032       dst_argb += dst_stride_argb;
   2033     }
   2034     free_aligned_buffer_64(rows);
   2035   }
   2036   return 0;
   2037 }
   2038 
   2039 // Sobel ARGB effect.
   2040 LIBYUV_API
   2041 int ARGBSobel(const uint8* src_argb, int src_stride_argb,
   2042               uint8* dst_argb, int dst_stride_argb,
   2043               int width, int height) {
   2044   void (*SobelRow)(const uint8* src_sobelx, const uint8* src_sobely,
   2045                    uint8* dst_argb, int width) = SobelRow_C;
   2046 #if defined(HAS_SOBELROW_SSE2)
   2047   if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 16) &&
   2048       IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
   2049     SobelRow = SobelRow_SSE2;
   2050   }
   2051 #endif
   2052 #if defined(HAS_SOBELROW_NEON)
   2053   if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) {
   2054     SobelRow = SobelRow_NEON;
   2055   }
   2056 #endif
   2057   return ARGBSobelize(src_argb, src_stride_argb, dst_argb, dst_stride_argb,
   2058                       width, height, SobelRow);
   2059 }
   2060 
   2061 // Sobel ARGB effect with planar output.
   2062 LIBYUV_API
   2063 int ARGBSobelToPlane(const uint8* src_argb, int src_stride_argb,
   2064                      uint8* dst_y, int dst_stride_y,
   2065                      int width, int height) {
   2066   void (*SobelToPlaneRow)(const uint8* src_sobelx, const uint8* src_sobely,
   2067                           uint8* dst_, int width) = SobelToPlaneRow_C;
   2068 #if defined(HAS_SOBELTOPLANEROW_SSE2)
   2069   if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 16) &&
   2070       IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
   2071     SobelToPlaneRow = SobelToPlaneRow_SSE2;
   2072   }
   2073 #endif
   2074 #if defined(HAS_SOBELTOPLANEROW_NEON)
   2075   if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 16)) {
   2076     SobelToPlaneRow = SobelToPlaneRow_NEON;
   2077   }
   2078 #endif
   2079   return ARGBSobelize(src_argb, src_stride_argb, dst_y, dst_stride_y,
   2080                       width, height, SobelToPlaneRow);
   2081 }
   2082 
   2083 // SobelXY ARGB effect.
   2084 // Similar to Sobel, but also stores Sobel X in R and Sobel Y in B.  G = Sobel.
   2085 LIBYUV_API
   2086 int ARGBSobelXY(const uint8* src_argb, int src_stride_argb,
   2087                 uint8* dst_argb, int dst_stride_argb,
   2088                 int width, int height) {
   2089   void (*SobelXYRow)(const uint8* src_sobelx, const uint8* src_sobely,
   2090                      uint8* dst_argb, int width) = SobelXYRow_C;
   2091 #if defined(HAS_SOBELXYROW_SSE2)
   2092   if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 16) &&
   2093       IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
   2094     SobelXYRow = SobelXYRow_SSE2;
   2095   }
   2096 #endif
   2097 #if defined(HAS_SOBELXYROW_NEON)
   2098   if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) {
   2099     SobelXYRow = SobelXYRow_NEON;
   2100   }
   2101 #endif
   2102   return ARGBSobelize(src_argb, src_stride_argb, dst_argb, dst_stride_argb,
   2103                       width, height, SobelXYRow);
   2104 }
   2105 
   2106 // Apply a 4x4 polynomial to each ARGB pixel.
   2107 LIBYUV_API
   2108 int ARGBPolynomial(const uint8* src_argb, int src_stride_argb,
   2109                    uint8* dst_argb, int dst_stride_argb,
   2110                    const float* poly,
   2111                    int width, int height) {
   2112   int y;
   2113   void (*ARGBPolynomialRow)(const uint8* src_argb,
   2114                             uint8* dst_argb, const float* poly,
   2115                             int width) = ARGBPolynomialRow_C;
   2116   if (!src_argb || !dst_argb || !poly || width <= 0 || height == 0) {
   2117     return -1;
   2118   }
   2119   // Negative height means invert the image.
   2120   if (height < 0) {
   2121     height = -height;
   2122     src_argb  = src_argb  + (height - 1) * src_stride_argb;
   2123     src_stride_argb = -src_stride_argb;
   2124   }
   2125   // Coalesce rows.
   2126   if (src_stride_argb == width * 4 &&
   2127       dst_stride_argb == width * 4) {
   2128     width *= height;
   2129     height = 1;
   2130     src_stride_argb = dst_stride_argb = 0;
   2131   }
   2132 #if defined(HAS_ARGBPOLYNOMIALROW_SSE2)
   2133   if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 2)) {
   2134     ARGBPolynomialRow = ARGBPolynomialRow_SSE2;
   2135   }
   2136 #endif
   2137 #if defined(HAS_ARGBPOLYNOMIALROW_AVX2)
   2138   if (TestCpuFlag(kCpuHasAVX2) && TestCpuFlag(kCpuHasFMA3) &&
   2139       IS_ALIGNED(width, 2)) {
   2140     ARGBPolynomialRow = ARGBPolynomialRow_AVX2;
   2141   }
   2142 #endif
   2143 
   2144   for (y = 0; y < height; ++y) {
   2145     ARGBPolynomialRow(src_argb, dst_argb, poly, width);
   2146     src_argb += src_stride_argb;
   2147     dst_argb += dst_stride_argb;
   2148   }
   2149   return 0;
   2150 }
   2151 
   2152 // Apply a lumacolortable to each ARGB pixel.
   2153 LIBYUV_API
   2154 int ARGBLumaColorTable(const uint8* src_argb, int src_stride_argb,
   2155                        uint8* dst_argb, int dst_stride_argb,
   2156                        const uint8* luma,
   2157                        int width, int height) {
   2158   int y;
   2159   void (*ARGBLumaColorTableRow)(const uint8* src_argb, uint8* dst_argb,
   2160       int width, const uint8* luma, const uint32 lumacoeff) =
   2161       ARGBLumaColorTableRow_C;
   2162   if (!src_argb || !dst_argb || !luma || width <= 0 || height == 0) {
   2163     return -1;
   2164   }
   2165   // Negative height means invert the image.
   2166   if (height < 0) {
   2167     height = -height;
   2168     src_argb  = src_argb  + (height - 1) * src_stride_argb;
   2169     src_stride_argb = -src_stride_argb;
   2170   }
   2171   // Coalesce rows.
   2172   if (src_stride_argb == width * 4 &&
   2173       dst_stride_argb == width * 4) {
   2174     width *= height;
   2175     height = 1;
   2176     src_stride_argb = dst_stride_argb = 0;
   2177   }
   2178 #if defined(HAS_ARGBLUMACOLORTABLEROW_SSSE3)
   2179   if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 4)) {
   2180     ARGBLumaColorTableRow = ARGBLumaColorTableRow_SSSE3;
   2181   }
   2182 #endif
   2183 
   2184   for (y = 0; y < height; ++y) {
   2185     ARGBLumaColorTableRow(src_argb, dst_argb, width, luma, 0x00264b0f);
   2186     src_argb += src_stride_argb;
   2187     dst_argb += dst_stride_argb;
   2188   }
   2189   return 0;
   2190 }
   2191 
   2192 // Copy Alpha from one ARGB image to another.
   2193 LIBYUV_API
   2194 int ARGBCopyAlpha(const uint8* src_argb, int src_stride_argb,
   2195                   uint8* dst_argb, int dst_stride_argb,
   2196                   int width, int height) {
   2197   int y;
   2198   void (*ARGBCopyAlphaRow)(const uint8* src_argb, uint8* dst_argb, int width) =
   2199       ARGBCopyAlphaRow_C;
   2200   if (!src_argb || !dst_argb || width <= 0 || height == 0) {
   2201     return -1;
   2202   }
   2203   // Negative height means invert the image.
   2204   if (height < 0) {
   2205     height = -height;
   2206     src_argb = src_argb + (height - 1) * src_stride_argb;
   2207     src_stride_argb = -src_stride_argb;
   2208   }
   2209   // Coalesce rows.
   2210   if (src_stride_argb == width * 4 &&
   2211       dst_stride_argb == width * 4) {
   2212     width *= height;
   2213     height = 1;
   2214     src_stride_argb = dst_stride_argb = 0;
   2215   }
   2216 #if defined(HAS_ARGBCOPYALPHAROW_SSE2)
   2217   if (TestCpuFlag(kCpuHasSSE2) &&
   2218       IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16) &&
   2219       IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16) &&
   2220       IS_ALIGNED(width, 8)) {
   2221     ARGBCopyAlphaRow = ARGBCopyAlphaRow_SSE2;
   2222   }
   2223 #endif
   2224 #if defined(HAS_ARGBCOPYALPHAROW_AVX2)
   2225   if (TestCpuFlag(kCpuHasAVX2) && IS_ALIGNED(width, 16)) {
   2226     ARGBCopyAlphaRow = ARGBCopyAlphaRow_AVX2;
   2227   }
   2228 #endif
   2229 
   2230   for (y = 0; y < height; ++y) {
   2231     ARGBCopyAlphaRow(src_argb, dst_argb, width);
   2232     src_argb += src_stride_argb;
   2233     dst_argb += dst_stride_argb;
   2234   }
   2235   return 0;
   2236 }
   2237 
   2238 // Copy a planar Y channel to the alpha channel of a destination ARGB image.
   2239 LIBYUV_API
   2240 int ARGBCopyYToAlpha(const uint8* src_y, int src_stride_y,
   2241                      uint8* dst_argb, int dst_stride_argb,
   2242                      int width, int height) {
   2243   int y;
   2244   void (*ARGBCopyYToAlphaRow)(const uint8* src_y, uint8* dst_argb, int width) =
   2245       ARGBCopyYToAlphaRow_C;
   2246   if (!src_y || !dst_argb || width <= 0 || height == 0) {
   2247     return -1;
   2248   }
   2249   // Negative height means invert the image.
   2250   if (height < 0) {
   2251     height = -height;
   2252     src_y = src_y + (height - 1) * src_stride_y;
   2253     src_stride_y = -src_stride_y;
   2254   }
   2255   // Coalesce rows.
   2256   if (src_stride_y == width &&
   2257       dst_stride_argb == width * 4) {
   2258     width *= height;
   2259     height = 1;
   2260     src_stride_y = dst_stride_argb = 0;
   2261   }
   2262 #if defined(HAS_ARGBCOPYYTOALPHAROW_SSE2)
   2263   if (TestCpuFlag(kCpuHasSSE2) &&
   2264       IS_ALIGNED(src_y, 16) && IS_ALIGNED(src_stride_y, 16) &&
   2265       IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16) &&
   2266       IS_ALIGNED(width, 8)) {
   2267     ARGBCopyYToAlphaRow = ARGBCopyYToAlphaRow_SSE2;
   2268   }
   2269 #endif
   2270 #if defined(HAS_ARGBCOPYYTOALPHAROW_AVX2)
   2271   if (TestCpuFlag(kCpuHasAVX2) && IS_ALIGNED(width, 16)) {
   2272     ARGBCopyYToAlphaRow = ARGBCopyYToAlphaRow_AVX2;
   2273   }
   2274 #endif
   2275 
   2276   for (y = 0; y < height; ++y) {
   2277     ARGBCopyYToAlphaRow(src_y, dst_argb, width);
   2278     src_y += src_stride_y;
   2279     dst_argb += dst_stride_argb;
   2280   }
   2281   return 0;
   2282 }
   2283 
   2284 #ifdef __cplusplus
   2285 }  // extern "C"
   2286 }  // namespace libyuv
   2287 #endif
   2288