Home | History | Annotate | Download | only in source
      1 /*
      2  *  Copyright 2011 The LibYuv Project Authors. All rights reserved.
      3  *
      4  *  Use of this source code is governed by a BSD-style license
      5  *  that can be found in the LICENSE file in the root of the source
      6  *  tree. An additional intellectual property rights grant can be found
      7  *  in the file PATENTS. All contributing project authors may
      8  *  be found in the AUTHORS file in the root of the source tree.
      9  */
     10 
     11 #include "libyuv/planar_functions.h"
     12 
     13 #include <string.h>  // for memset()
     14 
     15 #include "libyuv/cpu_id.h"
     16 #ifdef HAVE_JPEG
     17 #include "libyuv/mjpeg_decoder.h"
     18 #endif
     19 #include "libyuv/row.h"
     20 
     21 #ifdef __cplusplus
     22 namespace libyuv {
     23 extern "C" {
     24 #endif
     25 
     26 // Copy a plane of data
     27 LIBYUV_API
     28 void CopyPlane(const uint8* src_y, int src_stride_y,
     29                uint8* dst_y, int dst_stride_y,
     30                int width, int height) {
     31   int y;
     32   void (*CopyRow)(const uint8* src, uint8* dst, int width) = CopyRow_C;
     33   // Coalesce rows.
     34   if (src_stride_y == width &&
     35       dst_stride_y == width) {
     36     width *= height;
     37     height = 1;
     38     src_stride_y = dst_stride_y = 0;
     39   }
     40   // Nothing to do.
     41   if (src_y == dst_y && src_stride_y == dst_stride_y) {
     42     return;
     43   }
     44 #if defined(HAS_COPYROW_SSE2)
     45   if (TestCpuFlag(kCpuHasSSE2)) {
     46     CopyRow = IS_ALIGNED(width, 32) ? CopyRow_SSE2 : CopyRow_Any_SSE2;
     47   }
     48 #endif
     49 #if defined(HAS_COPYROW_AVX)
     50   if (TestCpuFlag(kCpuHasAVX)) {
     51     CopyRow = IS_ALIGNED(width, 64) ? CopyRow_AVX : CopyRow_Any_AVX;
     52   }
     53 #endif
     54 #if defined(HAS_COPYROW_ERMS)
     55   if (TestCpuFlag(kCpuHasERMS)) {
     56     CopyRow = CopyRow_ERMS;
     57   }
     58 #endif
     59 #if defined(HAS_COPYROW_NEON)
     60   if (TestCpuFlag(kCpuHasNEON)) {
     61     CopyRow = IS_ALIGNED(width, 32) ? CopyRow_NEON : CopyRow_Any_NEON;
     62   }
     63 #endif
     64 #if defined(HAS_COPYROW_MIPS)
     65   if (TestCpuFlag(kCpuHasMIPS)) {
     66     CopyRow = CopyRow_MIPS;
     67   }
     68 #endif
     69 
     70   // Copy plane
     71   for (y = 0; y < height; ++y) {
     72     CopyRow(src_y, dst_y, width);
     73     src_y += src_stride_y;
     74     dst_y += dst_stride_y;
     75   }
     76 }
     77 
     78 LIBYUV_API
     79 void CopyPlane_16(const uint16* src_y, int src_stride_y,
     80                   uint16* dst_y, int dst_stride_y,
     81                   int width, int height) {
     82   int y;
     83   void (*CopyRow)(const uint16* src, uint16* dst, int width) = CopyRow_16_C;
     84   // Coalesce rows.
     85   if (src_stride_y == width &&
     86       dst_stride_y == width) {
     87     width *= height;
     88     height = 1;
     89     src_stride_y = dst_stride_y = 0;
     90   }
     91 #if defined(HAS_COPYROW_16_SSE2)
     92   if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 32)) {
     93     CopyRow = CopyRow_16_SSE2;
     94   }
     95 #endif
     96 #if defined(HAS_COPYROW_16_ERMS)
     97   if (TestCpuFlag(kCpuHasERMS)) {
     98     CopyRow = CopyRow_16_ERMS;
     99   }
    100 #endif
    101 #if defined(HAS_COPYROW_16_NEON)
    102   if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 32)) {
    103     CopyRow = CopyRow_16_NEON;
    104   }
    105 #endif
    106 #if defined(HAS_COPYROW_16_MIPS)
    107   if (TestCpuFlag(kCpuHasMIPS)) {
    108     CopyRow = CopyRow_16_MIPS;
    109   }
    110 #endif
    111 
    112   // Copy plane
    113   for (y = 0; y < height; ++y) {
    114     CopyRow(src_y, dst_y, width);
    115     src_y += src_stride_y;
    116     dst_y += dst_stride_y;
    117   }
    118 }
    119 
    120 // Copy I422.
    121 LIBYUV_API
    122 int I422Copy(const uint8* src_y, int src_stride_y,
    123              const uint8* src_u, int src_stride_u,
    124              const uint8* src_v, int src_stride_v,
    125              uint8* dst_y, int dst_stride_y,
    126              uint8* dst_u, int dst_stride_u,
    127              uint8* dst_v, int dst_stride_v,
    128              int width, int height) {
    129   int halfwidth = (width + 1) >> 1;
    130   if (!src_y || !src_u || !src_v ||
    131       !dst_y || !dst_u || !dst_v ||
    132       width <= 0 || height == 0) {
    133     return -1;
    134   }
    135   // Negative height means invert the image.
    136   if (height < 0) {
    137     height = -height;
    138     src_y = src_y + (height - 1) * src_stride_y;
    139     src_u = src_u + (height - 1) * src_stride_u;
    140     src_v = src_v + (height - 1) * src_stride_v;
    141     src_stride_y = -src_stride_y;
    142     src_stride_u = -src_stride_u;
    143     src_stride_v = -src_stride_v;
    144   }
    145   CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
    146   CopyPlane(src_u, src_stride_u, dst_u, dst_stride_u, halfwidth, height);
    147   CopyPlane(src_v, src_stride_v, dst_v, dst_stride_v, halfwidth, height);
    148   return 0;
    149 }
    150 
    151 // Copy I444.
    152 LIBYUV_API
    153 int I444Copy(const uint8* src_y, int src_stride_y,
    154              const uint8* src_u, int src_stride_u,
    155              const uint8* src_v, int src_stride_v,
    156              uint8* dst_y, int dst_stride_y,
    157              uint8* dst_u, int dst_stride_u,
    158              uint8* dst_v, int dst_stride_v,
    159              int width, int height) {
    160   if (!src_y || !src_u || !src_v ||
    161       !dst_y || !dst_u || !dst_v ||
    162       width <= 0 || height == 0) {
    163     return -1;
    164   }
    165   // Negative height means invert the image.
    166   if (height < 0) {
    167     height = -height;
    168     src_y = src_y + (height - 1) * src_stride_y;
    169     src_u = src_u + (height - 1) * src_stride_u;
    170     src_v = src_v + (height - 1) * src_stride_v;
    171     src_stride_y = -src_stride_y;
    172     src_stride_u = -src_stride_u;
    173     src_stride_v = -src_stride_v;
    174   }
    175 
    176   CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
    177   CopyPlane(src_u, src_stride_u, dst_u, dst_stride_u, width, height);
    178   CopyPlane(src_v, src_stride_v, dst_v, dst_stride_v, width, height);
    179   return 0;
    180 }
    181 
    182 // Copy I400.
    183 LIBYUV_API
    184 int I400ToI400(const uint8* src_y, int src_stride_y,
    185                uint8* dst_y, int dst_stride_y,
    186                int width, int height) {
    187   if (!src_y || !dst_y || width <= 0 || height == 0) {
    188     return -1;
    189   }
    190   // Negative height means invert the image.
    191   if (height < 0) {
    192     height = -height;
    193     src_y = src_y + (height - 1) * src_stride_y;
    194     src_stride_y = -src_stride_y;
    195   }
    196   CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
    197   return 0;
    198 }
    199 
    200 // Convert I420 to I400.
    201 LIBYUV_API
    202 int I420ToI400(const uint8* src_y, int src_stride_y,
    203                const uint8* src_u, int src_stride_u,
    204                const uint8* src_v, int src_stride_v,
    205                uint8* dst_y, int dst_stride_y,
    206                int width, int height) {
    207   if (!src_y || !dst_y || width <= 0 || height == 0) {
    208     return -1;
    209   }
    210   // Negative height means invert the image.
    211   if (height < 0) {
    212     height = -height;
    213     src_y = src_y + (height - 1) * src_stride_y;
    214     src_stride_y = -src_stride_y;
    215   }
    216   CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
    217   return 0;
    218 }
    219 
    220 // Mirror a plane of data.
    221 void MirrorPlane(const uint8* src_y, int src_stride_y,
    222                  uint8* dst_y, int dst_stride_y,
    223                  int width, int height) {
    224   int y;
    225   void (*MirrorRow)(const uint8* src, uint8* dst, int width) = MirrorRow_C;
    226   // Negative height means invert the image.
    227   if (height < 0) {
    228     height = -height;
    229     src_y = src_y + (height - 1) * src_stride_y;
    230     src_stride_y = -src_stride_y;
    231   }
    232 #if defined(HAS_MIRRORROW_NEON)
    233   if (TestCpuFlag(kCpuHasNEON)) {
    234     MirrorRow = MirrorRow_Any_NEON;
    235     if (IS_ALIGNED(width, 16)) {
    236       MirrorRow = MirrorRow_NEON;
    237     }
    238   }
    239 #endif
    240 #if defined(HAS_MIRRORROW_SSE2)
    241   if (TestCpuFlag(kCpuHasSSE2)) {
    242     MirrorRow = MirrorRow_Any_SSE2;
    243     if (IS_ALIGNED(width, 16)) {
    244       MirrorRow = MirrorRow_SSE2;
    245     }
    246   }
    247 #endif
    248 #if defined(HAS_MIRRORROW_SSSE3)
    249   if (TestCpuFlag(kCpuHasSSSE3)) {
    250     MirrorRow = MirrorRow_Any_SSSE3;
    251     if (IS_ALIGNED(width, 16)) {
    252       MirrorRow = MirrorRow_SSSE3;
    253     }
    254   }
    255 #endif
    256 #if defined(HAS_MIRRORROW_AVX2)
    257   if (TestCpuFlag(kCpuHasAVX2)) {
    258     MirrorRow = MirrorRow_Any_AVX2;
    259     if (IS_ALIGNED(width, 32)) {
    260       MirrorRow = MirrorRow_AVX2;
    261     }
    262   }
    263 #endif
    264 // TODO(fbarchard): Mirror on mips handle unaligned memory.
    265 #if defined(HAS_MIRRORROW_MIPS_DSPR2)
    266   if (TestCpuFlag(kCpuHasMIPS_DSPR2) &&
    267       IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) &&
    268       IS_ALIGNED(dst_y, 4) && IS_ALIGNED(dst_stride_y, 4)) {
    269     MirrorRow = MirrorRow_MIPS_DSPR2;
    270   }
    271 #endif
    272 
    273   // Mirror plane
    274   for (y = 0; y < height; ++y) {
    275     MirrorRow(src_y, dst_y, width);
    276     src_y += src_stride_y;
    277     dst_y += dst_stride_y;
    278   }
    279 }
    280 
    281 // Convert YUY2 to I422.
    282 LIBYUV_API
    283 int YUY2ToI422(const uint8* src_yuy2, int src_stride_yuy2,
    284                uint8* dst_y, int dst_stride_y,
    285                uint8* dst_u, int dst_stride_u,
    286                uint8* dst_v, int dst_stride_v,
    287                int width, int height) {
    288   int y;
    289   void (*YUY2ToUV422Row)(const uint8* src_yuy2,
    290                          uint8* dst_u, uint8* dst_v, int pix) =
    291       YUY2ToUV422Row_C;
    292   void (*YUY2ToYRow)(const uint8* src_yuy2, uint8* dst_y, int pix) =
    293       YUY2ToYRow_C;
    294   // Negative height means invert the image.
    295   if (height < 0) {
    296     height = -height;
    297     src_yuy2 = src_yuy2 + (height - 1) * src_stride_yuy2;
    298     src_stride_yuy2 = -src_stride_yuy2;
    299   }
    300   // Coalesce rows.
    301   if (src_stride_yuy2 == width * 2 &&
    302       dst_stride_y == width &&
    303       dst_stride_u * 2 == width &&
    304       dst_stride_v * 2 == width) {
    305     width *= height;
    306     height = 1;
    307     src_stride_yuy2 = dst_stride_y = dst_stride_u = dst_stride_v = 0;
    308   }
    309 #if defined(HAS_YUY2TOYROW_SSE2)
    310   if (TestCpuFlag(kCpuHasSSE2)) {
    311     YUY2ToUV422Row = YUY2ToUV422Row_Any_SSE2;
    312     YUY2ToYRow = YUY2ToYRow_Any_SSE2;
    313     if (IS_ALIGNED(width, 16)) {
    314       YUY2ToUV422Row = YUY2ToUV422Row_SSE2;
    315       YUY2ToYRow = YUY2ToYRow_SSE2;
    316     }
    317   }
    318 #endif
    319 #if defined(HAS_YUY2TOYROW_AVX2)
    320   if (TestCpuFlag(kCpuHasAVX2)) {
    321     YUY2ToUV422Row = YUY2ToUV422Row_Any_AVX2;
    322     YUY2ToYRow = YUY2ToYRow_Any_AVX2;
    323     if (IS_ALIGNED(width, 32)) {
    324       YUY2ToUV422Row = YUY2ToUV422Row_AVX2;
    325       YUY2ToYRow = YUY2ToYRow_AVX2;
    326     }
    327   }
    328 #endif
    329 #if defined(HAS_YUY2TOYROW_NEON)
    330   if (TestCpuFlag(kCpuHasNEON)) {
    331     YUY2ToYRow = YUY2ToYRow_Any_NEON;
    332     if (width >= 16) {
    333       YUY2ToUV422Row = YUY2ToUV422Row_Any_NEON;
    334     }
    335     if (IS_ALIGNED(width, 16)) {
    336       YUY2ToYRow = YUY2ToYRow_NEON;
    337       YUY2ToUV422Row = YUY2ToUV422Row_NEON;
    338     }
    339   }
    340 #endif
    341 
    342   for (y = 0; y < height; ++y) {
    343     YUY2ToUV422Row(src_yuy2, dst_u, dst_v, width);
    344     YUY2ToYRow(src_yuy2, dst_y, width);
    345     src_yuy2 += src_stride_yuy2;
    346     dst_y += dst_stride_y;
    347     dst_u += dst_stride_u;
    348     dst_v += dst_stride_v;
    349   }
    350   return 0;
    351 }
    352 
    353 // Convert UYVY to I422.
    354 LIBYUV_API
    355 int UYVYToI422(const uint8* src_uyvy, int src_stride_uyvy,
    356                uint8* dst_y, int dst_stride_y,
    357                uint8* dst_u, int dst_stride_u,
    358                uint8* dst_v, int dst_stride_v,
    359                int width, int height) {
    360   int y;
    361   void (*UYVYToUV422Row)(const uint8* src_uyvy,
    362                          uint8* dst_u, uint8* dst_v, int pix) =
    363       UYVYToUV422Row_C;
    364   void (*UYVYToYRow)(const uint8* src_uyvy,
    365                      uint8* dst_y, int pix) = UYVYToYRow_C;
    366   // Negative height means invert the image.
    367   if (height < 0) {
    368     height = -height;
    369     src_uyvy = src_uyvy + (height - 1) * src_stride_uyvy;
    370     src_stride_uyvy = -src_stride_uyvy;
    371   }
    372   // Coalesce rows.
    373   if (src_stride_uyvy == width * 2 &&
    374       dst_stride_y == width &&
    375       dst_stride_u * 2 == width &&
    376       dst_stride_v * 2 == width) {
    377     width *= height;
    378     height = 1;
    379     src_stride_uyvy = dst_stride_y = dst_stride_u = dst_stride_v = 0;
    380   }
    381 #if defined(HAS_UYVYTOYROW_SSE2)
    382   if (TestCpuFlag(kCpuHasSSE2)) {
    383     UYVYToUV422Row = UYVYToUV422Row_Any_SSE2;
    384     UYVYToYRow = UYVYToYRow_Any_SSE2;
    385     if (IS_ALIGNED(width, 16)) {
    386       UYVYToUV422Row = UYVYToUV422Row_SSE2;
    387       UYVYToYRow = UYVYToYRow_SSE2;
    388     }
    389   }
    390 #endif
    391 #if defined(HAS_UYVYTOYROW_AVX2)
    392   if (TestCpuFlag(kCpuHasAVX2)) {
    393     UYVYToUV422Row = UYVYToUV422Row_Any_AVX2;
    394     UYVYToYRow = UYVYToYRow_Any_AVX2;
    395     if (IS_ALIGNED(width, 32)) {
    396       UYVYToUV422Row = UYVYToUV422Row_AVX2;
    397       UYVYToYRow = UYVYToYRow_AVX2;
    398     }
    399   }
    400 #endif
    401 #if defined(HAS_UYVYTOYROW_NEON)
    402   if (TestCpuFlag(kCpuHasNEON)) {
    403     UYVYToYRow = UYVYToYRow_Any_NEON;
    404     if (width >= 16) {
    405       UYVYToUV422Row = UYVYToUV422Row_Any_NEON;
    406     }
    407     if (IS_ALIGNED(width, 16)) {
    408       UYVYToYRow = UYVYToYRow_NEON;
    409       UYVYToUV422Row = UYVYToUV422Row_NEON;
    410     }
    411   }
    412 #endif
    413 
    414   for (y = 0; y < height; ++y) {
    415     UYVYToUV422Row(src_uyvy, dst_u, dst_v, width);
    416     UYVYToYRow(src_uyvy, dst_y, width);
    417     src_uyvy += src_stride_uyvy;
    418     dst_y += dst_stride_y;
    419     dst_u += dst_stride_u;
    420     dst_v += dst_stride_v;
    421   }
    422   return 0;
    423 }
    424 
    425 // Mirror I400 with optional flipping
    426 LIBYUV_API
    427 int I400Mirror(const uint8* src_y, int src_stride_y,
    428                uint8* dst_y, int dst_stride_y,
    429                int width, int height) {
    430   if (!src_y || !dst_y ||
    431       width <= 0 || height == 0) {
    432     return -1;
    433   }
    434   // Negative height means invert the image.
    435   if (height < 0) {
    436     height = -height;
    437     src_y = src_y + (height - 1) * src_stride_y;
    438     src_stride_y = -src_stride_y;
    439   }
    440 
    441   MirrorPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
    442   return 0;
    443 }
    444 
    445 // Mirror I420 with optional flipping
    446 LIBYUV_API
    447 int I420Mirror(const uint8* src_y, int src_stride_y,
    448                const uint8* src_u, int src_stride_u,
    449                const uint8* src_v, int src_stride_v,
    450                uint8* dst_y, int dst_stride_y,
    451                uint8* dst_u, int dst_stride_u,
    452                uint8* dst_v, int dst_stride_v,
    453                int width, int height) {
    454   int halfwidth = (width + 1) >> 1;
    455   int halfheight = (height + 1) >> 1;
    456   if (!src_y || !src_u || !src_v || !dst_y || !dst_u || !dst_v ||
    457       width <= 0 || height == 0) {
    458     return -1;
    459   }
    460   // Negative height means invert the image.
    461   if (height < 0) {
    462     height = -height;
    463     halfheight = (height + 1) >> 1;
    464     src_y = src_y + (height - 1) * src_stride_y;
    465     src_u = src_u + (halfheight - 1) * src_stride_u;
    466     src_v = src_v + (halfheight - 1) * src_stride_v;
    467     src_stride_y = -src_stride_y;
    468     src_stride_u = -src_stride_u;
    469     src_stride_v = -src_stride_v;
    470   }
    471 
    472   if (dst_y) {
    473     MirrorPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
    474   }
    475   MirrorPlane(src_u, src_stride_u, dst_u, dst_stride_u, halfwidth, halfheight);
    476   MirrorPlane(src_v, src_stride_v, dst_v, dst_stride_v, halfwidth, halfheight);
    477   return 0;
    478 }
    479 
    480 // ARGB mirror.
    481 LIBYUV_API
    482 int ARGBMirror(const uint8* src_argb, int src_stride_argb,
    483                uint8* dst_argb, int dst_stride_argb,
    484                int width, int height) {
    485   int y;
    486   void (*ARGBMirrorRow)(const uint8* src, uint8* dst, int width) =
    487       ARGBMirrorRow_C;
    488   if (!src_argb || !dst_argb || width <= 0 || height == 0) {
    489     return -1;
    490   }
    491   // Negative height means invert the image.
    492   if (height < 0) {
    493     height = -height;
    494     src_argb = src_argb + (height - 1) * src_stride_argb;
    495     src_stride_argb = -src_stride_argb;
    496   }
    497 #if defined(HAS_ARGBMIRRORROW_NEON)
    498   if (TestCpuFlag(kCpuHasNEON)) {
    499     ARGBMirrorRow = ARGBMirrorRow_Any_NEON;
    500     if (IS_ALIGNED(width, 4)) {
    501       ARGBMirrorRow = ARGBMirrorRow_NEON;
    502     }
    503   }
    504 #endif
    505 #if defined(HAS_ARGBMIRRORROW_SSE2)
    506   if (TestCpuFlag(kCpuHasSSE2)) {
    507     ARGBMirrorRow = ARGBMirrorRow_Any_SSE2;
    508     if (IS_ALIGNED(width, 4)) {
    509       ARGBMirrorRow = ARGBMirrorRow_SSE2;
    510     }
    511   }
    512 #endif
    513 #if defined(HAS_ARGBMIRRORROW_AVX2)
    514   if (TestCpuFlag(kCpuHasAVX2)) {
    515     ARGBMirrorRow = ARGBMirrorRow_Any_AVX2;
    516     if (IS_ALIGNED(width, 8)) {
    517       ARGBMirrorRow = ARGBMirrorRow_AVX2;
    518     }
    519   }
    520 #endif
    521 
    522   // Mirror plane
    523   for (y = 0; y < height; ++y) {
    524     ARGBMirrorRow(src_argb, dst_argb, width);
    525     src_argb += src_stride_argb;
    526     dst_argb += dst_stride_argb;
    527   }
    528   return 0;
    529 }
    530 
    531 // Get a blender that optimized for the CPU and pixel count.
    532 // As there are 6 blenders to choose from, the caller should try to use
    533 // the same blend function for all pixels if possible.
    534 LIBYUV_API
    535 ARGBBlendRow GetARGBBlend() {
    536   void (*ARGBBlendRow)(const uint8* src_argb, const uint8* src_argb1,
    537                        uint8* dst_argb, int width) = ARGBBlendRow_C;
    538 #if defined(HAS_ARGBBLENDROW_SSSE3)
    539   if (TestCpuFlag(kCpuHasSSSE3)) {
    540     ARGBBlendRow = ARGBBlendRow_SSSE3;
    541     return ARGBBlendRow;
    542   }
    543 #endif
    544 #if defined(HAS_ARGBBLENDROW_SSE2)
    545   if (TestCpuFlag(kCpuHasSSE2)) {
    546     ARGBBlendRow = ARGBBlendRow_SSE2;
    547   }
    548 #endif
    549 #if defined(HAS_ARGBBLENDROW_NEON)
    550   if (TestCpuFlag(kCpuHasNEON)) {
    551     ARGBBlendRow = ARGBBlendRow_NEON;
    552   }
    553 #endif
    554   return ARGBBlendRow;
    555 }
    556 
    557 // Alpha Blend 2 ARGB images and store to destination.
    558 LIBYUV_API
    559 int ARGBBlend(const uint8* src_argb0, int src_stride_argb0,
    560               const uint8* src_argb1, int src_stride_argb1,
    561               uint8* dst_argb, int dst_stride_argb,
    562               int width, int height) {
    563   int y;
    564   void (*ARGBBlendRow)(const uint8* src_argb, const uint8* src_argb1,
    565                        uint8* dst_argb, int width) = GetARGBBlend();
    566   if (!src_argb0 || !src_argb1 || !dst_argb || width <= 0 || height == 0) {
    567     return -1;
    568   }
    569   // Negative height means invert the image.
    570   if (height < 0) {
    571     height = -height;
    572     dst_argb = dst_argb + (height - 1) * dst_stride_argb;
    573     dst_stride_argb = -dst_stride_argb;
    574   }
    575   // Coalesce rows.
    576   if (src_stride_argb0 == width * 4 &&
    577       src_stride_argb1 == width * 4 &&
    578       dst_stride_argb == width * 4) {
    579     width *= height;
    580     height = 1;
    581     src_stride_argb0 = src_stride_argb1 = dst_stride_argb = 0;
    582   }
    583 
    584   for (y = 0; y < height; ++y) {
    585     ARGBBlendRow(src_argb0, src_argb1, dst_argb, width);
    586     src_argb0 += src_stride_argb0;
    587     src_argb1 += src_stride_argb1;
    588     dst_argb += dst_stride_argb;
    589   }
    590   return 0;
    591 }
    592 
    593 // Multiply 2 ARGB images and store to destination.
    594 LIBYUV_API
    595 int ARGBMultiply(const uint8* src_argb0, int src_stride_argb0,
    596                  const uint8* src_argb1, int src_stride_argb1,
    597                  uint8* dst_argb, int dst_stride_argb,
    598                  int width, int height) {
    599   int y;
    600   void (*ARGBMultiplyRow)(const uint8* src0, const uint8* src1, uint8* dst,
    601                           int width) = ARGBMultiplyRow_C;
    602   if (!src_argb0 || !src_argb1 || !dst_argb || width <= 0 || height == 0) {
    603     return -1;
    604   }
    605   // Negative height means invert the image.
    606   if (height < 0) {
    607     height = -height;
    608     dst_argb = dst_argb + (height - 1) * dst_stride_argb;
    609     dst_stride_argb = -dst_stride_argb;
    610   }
    611   // Coalesce rows.
    612   if (src_stride_argb0 == width * 4 &&
    613       src_stride_argb1 == width * 4 &&
    614       dst_stride_argb == width * 4) {
    615     width *= height;
    616     height = 1;
    617     src_stride_argb0 = src_stride_argb1 = dst_stride_argb = 0;
    618   }
    619 #if defined(HAS_ARGBMULTIPLYROW_SSE2)
    620   if (TestCpuFlag(kCpuHasSSE2)) {
    621     ARGBMultiplyRow = ARGBMultiplyRow_Any_SSE2;
    622     if (IS_ALIGNED(width, 4)) {
    623       ARGBMultiplyRow = ARGBMultiplyRow_SSE2;
    624     }
    625   }
    626 #endif
    627 #if defined(HAS_ARGBMULTIPLYROW_AVX2)
    628   if (TestCpuFlag(kCpuHasAVX2)) {
    629     ARGBMultiplyRow = ARGBMultiplyRow_Any_AVX2;
    630     if (IS_ALIGNED(width, 8)) {
    631       ARGBMultiplyRow = ARGBMultiplyRow_AVX2;
    632     }
    633   }
    634 #endif
    635 #if defined(HAS_ARGBMULTIPLYROW_NEON)
    636   if (TestCpuFlag(kCpuHasNEON)) {
    637     ARGBMultiplyRow = ARGBMultiplyRow_Any_NEON;
    638     if (IS_ALIGNED(width, 8)) {
    639       ARGBMultiplyRow = ARGBMultiplyRow_NEON;
    640     }
    641   }
    642 #endif
    643 
    644   // Multiply plane
    645   for (y = 0; y < height; ++y) {
    646     ARGBMultiplyRow(src_argb0, src_argb1, dst_argb, width);
    647     src_argb0 += src_stride_argb0;
    648     src_argb1 += src_stride_argb1;
    649     dst_argb += dst_stride_argb;
    650   }
    651   return 0;
    652 }
    653 
    654 // Add 2 ARGB images and store to destination.
    655 LIBYUV_API
    656 int ARGBAdd(const uint8* src_argb0, int src_stride_argb0,
    657             const uint8* src_argb1, int src_stride_argb1,
    658             uint8* dst_argb, int dst_stride_argb,
    659             int width, int height) {
    660   int y;
    661   void (*ARGBAddRow)(const uint8* src0, const uint8* src1, uint8* dst,
    662                      int width) = ARGBAddRow_C;
    663   if (!src_argb0 || !src_argb1 || !dst_argb || width <= 0 || height == 0) {
    664     return -1;
    665   }
    666   // Negative height means invert the image.
    667   if (height < 0) {
    668     height = -height;
    669     dst_argb = dst_argb + (height - 1) * dst_stride_argb;
    670     dst_stride_argb = -dst_stride_argb;
    671   }
    672   // Coalesce rows.
    673   if (src_stride_argb0 == width * 4 &&
    674       src_stride_argb1 == width * 4 &&
    675       dst_stride_argb == width * 4) {
    676     width *= height;
    677     height = 1;
    678     src_stride_argb0 = src_stride_argb1 = dst_stride_argb = 0;
    679   }
    680 #if defined(HAS_ARGBADDROW_SSE2) && (defined(_MSC_VER) && !defined(__clang__))
    681   if (TestCpuFlag(kCpuHasSSE2)) {
    682     ARGBAddRow = ARGBAddRow_SSE2;
    683   }
    684 #endif
    685 #if defined(HAS_ARGBADDROW_SSE2) && !(defined(_MSC_VER) && !defined(__clang__))
    686   if (TestCpuFlag(kCpuHasSSE2)) {
    687     ARGBAddRow = ARGBAddRow_Any_SSE2;
    688     if (IS_ALIGNED(width, 4)) {
    689       ARGBAddRow = ARGBAddRow_SSE2;
    690     }
    691   }
    692 #endif
    693 #if defined(HAS_ARGBADDROW_AVX2)
    694   if (TestCpuFlag(kCpuHasAVX2)) {
    695     ARGBAddRow = ARGBAddRow_Any_AVX2;
    696     if (IS_ALIGNED(width, 8)) {
    697       ARGBAddRow = ARGBAddRow_AVX2;
    698     }
    699   }
    700 #endif
    701 #if defined(HAS_ARGBADDROW_NEON)
    702   if (TestCpuFlag(kCpuHasNEON)) {
    703     ARGBAddRow = ARGBAddRow_Any_NEON;
    704     if (IS_ALIGNED(width, 8)) {
    705       ARGBAddRow = ARGBAddRow_NEON;
    706     }
    707   }
    708 #endif
    709 
    710   // Add plane
    711   for (y = 0; y < height; ++y) {
    712     ARGBAddRow(src_argb0, src_argb1, dst_argb, width);
    713     src_argb0 += src_stride_argb0;
    714     src_argb1 += src_stride_argb1;
    715     dst_argb += dst_stride_argb;
    716   }
    717   return 0;
    718 }
    719 
    720 // Subtract 2 ARGB images and store to destination.
    721 LIBYUV_API
    722 int ARGBSubtract(const uint8* src_argb0, int src_stride_argb0,
    723                  const uint8* src_argb1, int src_stride_argb1,
    724                  uint8* dst_argb, int dst_stride_argb,
    725                  int width, int height) {
    726   int y;
    727   void (*ARGBSubtractRow)(const uint8* src0, const uint8* src1, uint8* dst,
    728                           int width) = ARGBSubtractRow_C;
    729   if (!src_argb0 || !src_argb1 || !dst_argb || width <= 0 || height == 0) {
    730     return -1;
    731   }
    732   // Negative height means invert the image.
    733   if (height < 0) {
    734     height = -height;
    735     dst_argb = dst_argb + (height - 1) * dst_stride_argb;
    736     dst_stride_argb = -dst_stride_argb;
    737   }
    738   // Coalesce rows.
    739   if (src_stride_argb0 == width * 4 &&
    740       src_stride_argb1 == width * 4 &&
    741       dst_stride_argb == width * 4) {
    742     width *= height;
    743     height = 1;
    744     src_stride_argb0 = src_stride_argb1 = dst_stride_argb = 0;
    745   }
    746 #if defined(HAS_ARGBSUBTRACTROW_SSE2)
    747   if (TestCpuFlag(kCpuHasSSE2)) {
    748     ARGBSubtractRow = ARGBSubtractRow_Any_SSE2;
    749     if (IS_ALIGNED(width, 4)) {
    750       ARGBSubtractRow = ARGBSubtractRow_SSE2;
    751     }
    752   }
    753 #endif
    754 #if defined(HAS_ARGBSUBTRACTROW_AVX2)
    755   if (TestCpuFlag(kCpuHasAVX2)) {
    756     ARGBSubtractRow = ARGBSubtractRow_Any_AVX2;
    757     if (IS_ALIGNED(width, 8)) {
    758       ARGBSubtractRow = ARGBSubtractRow_AVX2;
    759     }
    760   }
    761 #endif
    762 #if defined(HAS_ARGBSUBTRACTROW_NEON)
    763   if (TestCpuFlag(kCpuHasNEON)) {
    764     ARGBSubtractRow = ARGBSubtractRow_Any_NEON;
    765     if (IS_ALIGNED(width, 8)) {
    766       ARGBSubtractRow = ARGBSubtractRow_NEON;
    767     }
    768   }
    769 #endif
    770 
    771   // Subtract plane
    772   for (y = 0; y < height; ++y) {
    773     ARGBSubtractRow(src_argb0, src_argb1, dst_argb, width);
    774     src_argb0 += src_stride_argb0;
    775     src_argb1 += src_stride_argb1;
    776     dst_argb += dst_stride_argb;
    777   }
    778   return 0;
    779 }
    780 
    781 // Convert I422 to BGRA.
    782 LIBYUV_API
    783 int I422ToBGRA(const uint8* src_y, int src_stride_y,
    784                const uint8* src_u, int src_stride_u,
    785                const uint8* src_v, int src_stride_v,
    786                uint8* dst_bgra, int dst_stride_bgra,
    787                int width, int height) {
    788   int y;
    789   void (*I422ToBGRARow)(const uint8* y_buf,
    790                         const uint8* u_buf,
    791                         const uint8* v_buf,
    792                         uint8* rgb_buf,
    793                         int width) = I422ToBGRARow_C;
    794   if (!src_y || !src_u || !src_v ||
    795       !dst_bgra ||
    796       width <= 0 || height == 0) {
    797     return -1;
    798   }
    799   // Negative height means invert the image.
    800   if (height < 0) {
    801     height = -height;
    802     dst_bgra = dst_bgra + (height - 1) * dst_stride_bgra;
    803     dst_stride_bgra = -dst_stride_bgra;
    804   }
    805   // Coalesce rows.
    806   if (src_stride_y == width &&
    807       src_stride_u * 2 == width &&
    808       src_stride_v * 2 == width &&
    809       dst_stride_bgra == width * 4) {
    810     width *= height;
    811     height = 1;
    812     src_stride_y = src_stride_u = src_stride_v = dst_stride_bgra = 0;
    813   }
    814 #if defined(HAS_I422TOBGRAROW_SSSE3)
    815   if (TestCpuFlag(kCpuHasSSSE3)) {
    816     I422ToBGRARow = I422ToBGRARow_Any_SSSE3;
    817     if (IS_ALIGNED(width, 8)) {
    818       I422ToBGRARow = I422ToBGRARow_SSSE3;
    819     }
    820   }
    821 #endif
    822 #if defined(HAS_I422TOBGRAROW_AVX2)
    823   if (TestCpuFlag(kCpuHasAVX2)) {
    824     I422ToBGRARow = I422ToBGRARow_Any_AVX2;
    825     if (IS_ALIGNED(width, 16)) {
    826       I422ToBGRARow = I422ToBGRARow_AVX2;
    827     }
    828   }
    829 #endif
    830 #if defined(HAS_I422TOBGRAROW_NEON)
    831   if (TestCpuFlag(kCpuHasNEON)) {
    832     I422ToBGRARow = I422ToBGRARow_Any_NEON;
    833     if (IS_ALIGNED(width, 8)) {
    834       I422ToBGRARow = I422ToBGRARow_NEON;
    835     }
    836   }
    837 #endif
    838 #if defined(HAS_I422TOBGRAROW_MIPS_DSPR2)
    839   if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(width, 4) &&
    840       IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) &&
    841       IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) &&
    842       IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2) &&
    843       IS_ALIGNED(dst_bgra, 4) && IS_ALIGNED(dst_stride_bgra, 4)) {
    844     I422ToBGRARow = I422ToBGRARow_MIPS_DSPR2;
    845   }
    846 #endif
    847 
    848   for (y = 0; y < height; ++y) {
    849     I422ToBGRARow(src_y, src_u, src_v, dst_bgra, width);
    850     dst_bgra += dst_stride_bgra;
    851     src_y += src_stride_y;
    852     src_u += src_stride_u;
    853     src_v += src_stride_v;
    854   }
    855   return 0;
    856 }
    857 
    858 // Convert I422 to ABGR.
    859 LIBYUV_API
    860 int I422ToABGR(const uint8* src_y, int src_stride_y,
    861                const uint8* src_u, int src_stride_u,
    862                const uint8* src_v, int src_stride_v,
    863                uint8* dst_abgr, int dst_stride_abgr,
    864                int width, int height) {
    865   int y;
    866   void (*I422ToABGRRow)(const uint8* y_buf,
    867                         const uint8* u_buf,
    868                         const uint8* v_buf,
    869                         uint8* rgb_buf,
    870                         int width) = I422ToABGRRow_C;
    871   if (!src_y || !src_u || !src_v ||
    872       !dst_abgr ||
    873       width <= 0 || height == 0) {
    874     return -1;
    875   }
    876   // Negative height means invert the image.
    877   if (height < 0) {
    878     height = -height;
    879     dst_abgr = dst_abgr + (height - 1) * dst_stride_abgr;
    880     dst_stride_abgr = -dst_stride_abgr;
    881   }
    882   // Coalesce rows.
    883   if (src_stride_y == width &&
    884       src_stride_u * 2 == width &&
    885       src_stride_v * 2 == width &&
    886       dst_stride_abgr == width * 4) {
    887     width *= height;
    888     height = 1;
    889     src_stride_y = src_stride_u = src_stride_v = dst_stride_abgr = 0;
    890   }
    891 #if defined(HAS_I422TOABGRROW_NEON)
    892   if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
    893     I422ToABGRRow = I422ToABGRRow_Any_NEON;
    894     if (IS_ALIGNED(width, 8)) {
    895       I422ToABGRRow = I422ToABGRRow_NEON;
    896     }
    897   }
    898 #endif
    899 #if defined(HAS_I422TOABGRROW_SSSE3)
    900   if (TestCpuFlag(kCpuHasSSSE3)) {
    901     I422ToABGRRow = I422ToABGRRow_Any_SSSE3;
    902     if (IS_ALIGNED(width, 8)) {
    903       I422ToABGRRow = I422ToABGRRow_SSSE3;
    904     }
    905   }
    906 #endif
    907 #if defined(HAS_I422TOABGRROW_AVX2)
    908   if (TestCpuFlag(kCpuHasAVX2)) {
    909     I422ToABGRRow = I422ToABGRRow_Any_AVX2;
    910     if (IS_ALIGNED(width, 16)) {
    911       I422ToABGRRow = I422ToABGRRow_AVX2;
    912     }
    913   }
    914 #endif
    915 
    916   for (y = 0; y < height; ++y) {
    917     I422ToABGRRow(src_y, src_u, src_v, dst_abgr, width);
    918     dst_abgr += dst_stride_abgr;
    919     src_y += src_stride_y;
    920     src_u += src_stride_u;
    921     src_v += src_stride_v;
    922   }
    923   return 0;
    924 }
    925 
    926 // Convert I422 to RGBA.
    927 LIBYUV_API
    928 int I422ToRGBA(const uint8* src_y, int src_stride_y,
    929                const uint8* src_u, int src_stride_u,
    930                const uint8* src_v, int src_stride_v,
    931                uint8* dst_rgba, int dst_stride_rgba,
    932                int width, int height) {
    933   int y;
    934   void (*I422ToRGBARow)(const uint8* y_buf,
    935                         const uint8* u_buf,
    936                         const uint8* v_buf,
    937                         uint8* rgb_buf,
    938                         int width) = I422ToRGBARow_C;
    939   if (!src_y || !src_u || !src_v ||
    940       !dst_rgba ||
    941       width <= 0 || height == 0) {
    942     return -1;
    943   }
    944   // Negative height means invert the image.
    945   if (height < 0) {
    946     height = -height;
    947     dst_rgba = dst_rgba + (height - 1) * dst_stride_rgba;
    948     dst_stride_rgba = -dst_stride_rgba;
    949   }
    950   // Coalesce rows.
    951   if (src_stride_y == width &&
    952       src_stride_u * 2 == width &&
    953       src_stride_v * 2 == width &&
    954       dst_stride_rgba == width * 4) {
    955     width *= height;
    956     height = 1;
    957     src_stride_y = src_stride_u = src_stride_v = dst_stride_rgba = 0;
    958   }
    959 #if defined(HAS_I422TORGBAROW_NEON)
    960   if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
    961     I422ToRGBARow = I422ToRGBARow_Any_NEON;
    962     if (IS_ALIGNED(width, 8)) {
    963       I422ToRGBARow = I422ToRGBARow_NEON;
    964     }
    965   }
    966 #endif
    967 #if defined(HAS_I422TORGBAROW_SSSE3)
    968   if (TestCpuFlag(kCpuHasSSSE3)) {
    969     I422ToRGBARow = I422ToRGBARow_Any_SSSE3;
    970     if (IS_ALIGNED(width, 8)) {
    971       I422ToRGBARow = I422ToRGBARow_SSSE3;
    972     }
    973   }
    974 #endif
    975 #if defined(HAS_I422TORGBAROW_AVX2)
    976   if (TestCpuFlag(kCpuHasAVX2)) {
    977     I422ToRGBARow = I422ToRGBARow_Any_AVX2;
    978     if (IS_ALIGNED(width, 16)) {
    979       I422ToRGBARow = I422ToRGBARow_AVX2;
    980     }
    981   }
    982 #endif
    983 
    984   for (y = 0; y < height; ++y) {
    985     I422ToRGBARow(src_y, src_u, src_v, dst_rgba, width);
    986     dst_rgba += dst_stride_rgba;
    987     src_y += src_stride_y;
    988     src_u += src_stride_u;
    989     src_v += src_stride_v;
    990   }
    991   return 0;
    992 }
    993 
    994 // Convert NV12 to RGB565.
    995 LIBYUV_API
    996 int NV12ToRGB565(const uint8* src_y, int src_stride_y,
    997                  const uint8* src_uv, int src_stride_uv,
    998                  uint8* dst_rgb565, int dst_stride_rgb565,
    999                  int width, int height) {
   1000   int y;
   1001   void (*NV12ToRGB565Row)(const uint8* y_buf,
   1002                           const uint8* uv_buf,
   1003                           uint8* rgb_buf,
   1004                           int width) = NV12ToRGB565Row_C;
   1005   if (!src_y || !src_uv || !dst_rgb565 ||
   1006       width <= 0 || height == 0) {
   1007     return -1;
   1008   }
   1009   // Negative height means invert the image.
   1010   if (height < 0) {
   1011     height = -height;
   1012     dst_rgb565 = dst_rgb565 + (height - 1) * dst_stride_rgb565;
   1013     dst_stride_rgb565 = -dst_stride_rgb565;
   1014   }
   1015 #if defined(HAS_NV12TORGB565ROW_SSSE3)
   1016   if (TestCpuFlag(kCpuHasSSSE3)) {
   1017     NV12ToRGB565Row = NV12ToRGB565Row_Any_SSSE3;
   1018     if (IS_ALIGNED(width, 8)) {
   1019       NV12ToRGB565Row = NV12ToRGB565Row_SSSE3;
   1020     }
   1021   }
   1022 #endif
   1023 #if defined(HAS_NV12TORGB565ROW_AVX2)
   1024   if (TestCpuFlag(kCpuHasAVX2)) {
   1025     NV12ToRGB565Row = NV12ToRGB565Row_Any_AVX2;
   1026     if (IS_ALIGNED(width, 16)) {
   1027       NV12ToRGB565Row = NV12ToRGB565Row_AVX2;
   1028     }
   1029   }
   1030 #endif
   1031 #if defined(HAS_NV12TORGB565ROW_NEON)
   1032   if (TestCpuFlag(kCpuHasNEON)) {
   1033     NV12ToRGB565Row = NV12ToRGB565Row_Any_NEON;
   1034     if (IS_ALIGNED(width, 8)) {
   1035       NV12ToRGB565Row = NV12ToRGB565Row_NEON;
   1036     }
   1037   }
   1038 #endif
   1039 
   1040   for (y = 0; y < height; ++y) {
   1041     NV12ToRGB565Row(src_y, src_uv, dst_rgb565, width);
   1042     dst_rgb565 += dst_stride_rgb565;
   1043     src_y += src_stride_y;
   1044     if (y & 1) {
   1045       src_uv += src_stride_uv;
   1046     }
   1047   }
   1048   return 0;
   1049 }
   1050 
   1051 // Convert NV21 to RGB565.
   1052 LIBYUV_API
   1053 int NV21ToRGB565(const uint8* src_y, int src_stride_y,
   1054                  const uint8* src_vu, int src_stride_vu,
   1055                  uint8* dst_rgb565, int dst_stride_rgb565,
   1056                  int width, int height) {
   1057   int y;
   1058   void (*NV21ToRGB565Row)(const uint8* y_buf,
   1059                           const uint8* src_vu,
   1060                           uint8* rgb_buf,
   1061                           int width) = NV21ToRGB565Row_C;
   1062   if (!src_y || !src_vu || !dst_rgb565 ||
   1063       width <= 0 || height == 0) {
   1064     return -1;
   1065   }
   1066   // Negative height means invert the image.
   1067   if (height < 0) {
   1068     height = -height;
   1069     dst_rgb565 = dst_rgb565 + (height - 1) * dst_stride_rgb565;
   1070     dst_stride_rgb565 = -dst_stride_rgb565;
   1071   }
   1072 #if defined(HAS_NV21TORGB565ROW_SSSE3)
   1073   if (TestCpuFlag(kCpuHasSSSE3)) {
   1074     NV21ToRGB565Row = NV21ToRGB565Row_Any_SSSE3;
   1075     if (IS_ALIGNED(width, 8)) {
   1076       NV21ToRGB565Row = NV21ToRGB565Row_SSSE3;
   1077     }
   1078   }
   1079 #endif
   1080 #if defined(HAS_NV21TORGB565ROW_AVX2)
   1081   if (TestCpuFlag(kCpuHasAVX2)) {
   1082     NV21ToRGB565Row = NV21ToRGB565Row_Any_AVX2;
   1083     if (IS_ALIGNED(width, 16)) {
   1084       NV21ToRGB565Row = NV21ToRGB565Row_AVX2;
   1085     }
   1086   }
   1087 #endif
   1088 #if defined(HAS_NV21TORGB565ROW_NEON)
   1089   if (TestCpuFlag(kCpuHasNEON)) {
   1090     NV21ToRGB565Row = NV21ToRGB565Row_Any_NEON;
   1091     if (IS_ALIGNED(width, 8)) {
   1092       NV21ToRGB565Row = NV21ToRGB565Row_NEON;
   1093     }
   1094   }
   1095 #endif
   1096 
   1097   for (y = 0; y < height; ++y) {
   1098     NV21ToRGB565Row(src_y, src_vu, dst_rgb565, width);
   1099     dst_rgb565 += dst_stride_rgb565;
   1100     src_y += src_stride_y;
   1101     if (y & 1) {
   1102       src_vu += src_stride_vu;
   1103     }
   1104   }
   1105   return 0;
   1106 }
   1107 
   1108 LIBYUV_API
   1109 void SetPlane(uint8* dst_y, int dst_stride_y,
   1110               int width, int height,
   1111               uint32 value) {
   1112   int y;
   1113   void (*SetRow)(uint8* dst, uint8 value, int pix) = SetRow_C;
   1114   if (height < 0) {
   1115     height = -height;
   1116     dst_y = dst_y + (height - 1) * dst_stride_y;
   1117     dst_stride_y = -dst_stride_y;
   1118   }
   1119   // Coalesce rows.
   1120   if (dst_stride_y == width) {
   1121     width *= height;
   1122     height = 1;
   1123     dst_stride_y = 0;
   1124   }
   1125 #if defined(HAS_SETROW_NEON)
   1126   if (TestCpuFlag(kCpuHasNEON)) {
   1127     SetRow = SetRow_Any_NEON;
   1128     if (IS_ALIGNED(width, 16)) {
   1129       SetRow = SetRow_NEON;
   1130     }
   1131   }
   1132 #endif
   1133 #if defined(HAS_SETROW_X86)
   1134   if (TestCpuFlag(kCpuHasX86)) {
   1135     SetRow = SetRow_Any_X86;
   1136     if (IS_ALIGNED(width, 4)) {
   1137       SetRow = SetRow_X86;
   1138     }
   1139   }
   1140 #endif
   1141 #if defined(HAS_SETROW_ERMS)
   1142   if (TestCpuFlag(kCpuHasERMS)) {
   1143     SetRow = SetRow_ERMS;
   1144   }
   1145 #endif
   1146 
   1147   // Set plane
   1148   for (y = 0; y < height; ++y) {
   1149     SetRow(dst_y, value, width);
   1150     dst_y += dst_stride_y;
   1151   }
   1152 }
   1153 
   1154 // Draw a rectangle into I420
   1155 LIBYUV_API
   1156 int I420Rect(uint8* dst_y, int dst_stride_y,
   1157              uint8* dst_u, int dst_stride_u,
   1158              uint8* dst_v, int dst_stride_v,
   1159              int x, int y,
   1160              int width, int height,
   1161              int value_y, int value_u, int value_v) {
   1162   int halfwidth = (width + 1) >> 1;
   1163   int halfheight = (height + 1) >> 1;
   1164   uint8* start_y = dst_y + y * dst_stride_y + x;
   1165   uint8* start_u = dst_u + (y / 2) * dst_stride_u + (x / 2);
   1166   uint8* start_v = dst_v + (y / 2) * dst_stride_v + (x / 2);
   1167   if (!dst_y || !dst_u || !dst_v ||
   1168       width <= 0 || height == 0 ||
   1169       x < 0 || y < 0 ||
   1170       value_y < 0 || value_y > 255 ||
   1171       value_u < 0 || value_u > 255 ||
   1172       value_v < 0 || value_v > 255) {
   1173     return -1;
   1174   }
   1175 
   1176   SetPlane(start_y, dst_stride_y, width, height, value_y);
   1177   SetPlane(start_u, dst_stride_u, halfwidth, halfheight, value_u);
   1178   SetPlane(start_v, dst_stride_v, halfwidth, halfheight, value_v);
   1179   return 0;
   1180 }
   1181 
   1182 // Draw a rectangle into ARGB
   1183 LIBYUV_API
   1184 int ARGBRect(uint8* dst_argb, int dst_stride_argb,
   1185              int dst_x, int dst_y,
   1186              int width, int height,
   1187              uint32 value) {
   1188   int y;
   1189   void (*ARGBSetRow)(uint8* dst_argb, uint32 value, int pix) = ARGBSetRow_C;
   1190   if (!dst_argb ||
   1191       width <= 0 || height == 0 ||
   1192       dst_x < 0 || dst_y < 0) {
   1193     return -1;
   1194   }
   1195   if (height < 0) {
   1196     height = -height;
   1197     dst_argb = dst_argb + (height - 1) * dst_stride_argb;
   1198     dst_stride_argb = -dst_stride_argb;
   1199   }
   1200   dst_argb += dst_y * dst_stride_argb + dst_x * 4;
   1201   // Coalesce rows.
   1202   if (dst_stride_argb == width * 4) {
   1203     width *= height;
   1204     height = 1;
   1205     dst_stride_argb = 0;
   1206   }
   1207 
   1208 #if defined(HAS_ARGBSETROW_NEON)
   1209   if (TestCpuFlag(kCpuHasNEON)) {
   1210     ARGBSetRow = ARGBSetRow_Any_NEON;
   1211     if (IS_ALIGNED(width, 4)) {
   1212       ARGBSetRow = ARGBSetRow_NEON;
   1213     }
   1214   }
   1215 #endif
   1216 #if defined(HAS_ARGBSETROW_X86)
   1217   if (TestCpuFlag(kCpuHasX86)) {
   1218     ARGBSetRow = ARGBSetRow_X86;
   1219   }
   1220 #endif
   1221 
   1222   // Set plane
   1223   for (y = 0; y < height; ++y) {
   1224     ARGBSetRow(dst_argb, value, width);
   1225     dst_argb += dst_stride_argb;
   1226   }
   1227   return 0;
   1228 }
   1229 
   1230 // Convert unattentuated ARGB to preattenuated ARGB.
   1231 // An unattenutated ARGB alpha blend uses the formula
   1232 // p = a * f + (1 - a) * b
   1233 // where
   1234 //   p is output pixel
   1235 //   f is foreground pixel
   1236 //   b is background pixel
   1237 //   a is alpha value from foreground pixel
   1238 // An preattenutated ARGB alpha blend uses the formula
   1239 // p = f + (1 - a) * b
   1240 // where
   1241 //   f is foreground pixel premultiplied by alpha
   1242 
   1243 LIBYUV_API
   1244 int ARGBAttenuate(const uint8* src_argb, int src_stride_argb,
   1245                   uint8* dst_argb, int dst_stride_argb,
   1246                   int width, int height) {
   1247   int y;
   1248   void (*ARGBAttenuateRow)(const uint8* src_argb, uint8* dst_argb,
   1249                            int width) = ARGBAttenuateRow_C;
   1250   if (!src_argb || !dst_argb || width <= 0 || height == 0) {
   1251     return -1;
   1252   }
   1253   if (height < 0) {
   1254     height = -height;
   1255     src_argb = src_argb + (height - 1) * src_stride_argb;
   1256     src_stride_argb = -src_stride_argb;
   1257   }
   1258   // Coalesce rows.
   1259   if (src_stride_argb == width * 4 &&
   1260       dst_stride_argb == width * 4) {
   1261     width *= height;
   1262     height = 1;
   1263     src_stride_argb = dst_stride_argb = 0;
   1264   }
   1265 #if defined(HAS_ARGBATTENUATEROW_SSE2)
   1266   if (TestCpuFlag(kCpuHasSSE2)) {
   1267     ARGBAttenuateRow = ARGBAttenuateRow_Any_SSE2;
   1268     if (IS_ALIGNED(width, 4)) {
   1269       ARGBAttenuateRow = ARGBAttenuateRow_SSE2;
   1270     }
   1271   }
   1272 #endif
   1273 #if defined(HAS_ARGBATTENUATEROW_SSSE3)
   1274   if (TestCpuFlag(kCpuHasSSSE3)) {
   1275     ARGBAttenuateRow = ARGBAttenuateRow_Any_SSSE3;
   1276     if (IS_ALIGNED(width, 4)) {
   1277       ARGBAttenuateRow = ARGBAttenuateRow_SSSE3;
   1278     }
   1279   }
   1280 #endif
   1281 #if defined(HAS_ARGBATTENUATEROW_AVX2)
   1282   if (TestCpuFlag(kCpuHasAVX2)) {
   1283     ARGBAttenuateRow = ARGBAttenuateRow_Any_AVX2;
   1284     if (IS_ALIGNED(width, 8)) {
   1285       ARGBAttenuateRow = ARGBAttenuateRow_AVX2;
   1286     }
   1287   }
   1288 #endif
   1289 #if defined(HAS_ARGBATTENUATEROW_NEON)
   1290   if (TestCpuFlag(kCpuHasNEON)) {
   1291     ARGBAttenuateRow = ARGBAttenuateRow_Any_NEON;
   1292     if (IS_ALIGNED(width, 8)) {
   1293       ARGBAttenuateRow = ARGBAttenuateRow_NEON;
   1294     }
   1295   }
   1296 #endif
   1297 
   1298   for (y = 0; y < height; ++y) {
   1299     ARGBAttenuateRow(src_argb, dst_argb, width);
   1300     src_argb += src_stride_argb;
   1301     dst_argb += dst_stride_argb;
   1302   }
   1303   return 0;
   1304 }
   1305 
   1306 // Convert preattentuated ARGB to unattenuated ARGB.
   1307 LIBYUV_API
   1308 int ARGBUnattenuate(const uint8* src_argb, int src_stride_argb,
   1309                     uint8* dst_argb, int dst_stride_argb,
   1310                     int width, int height) {
   1311   int y;
   1312   void (*ARGBUnattenuateRow)(const uint8* src_argb, uint8* dst_argb,
   1313                              int width) = ARGBUnattenuateRow_C;
   1314   if (!src_argb || !dst_argb || width <= 0 || height == 0) {
   1315     return -1;
   1316   }
   1317   if (height < 0) {
   1318     height = -height;
   1319     src_argb = src_argb + (height - 1) * src_stride_argb;
   1320     src_stride_argb = -src_stride_argb;
   1321   }
   1322   // Coalesce rows.
   1323   if (src_stride_argb == width * 4 &&
   1324       dst_stride_argb == width * 4) {
   1325     width *= height;
   1326     height = 1;
   1327     src_stride_argb = dst_stride_argb = 0;
   1328   }
   1329 #if defined(HAS_ARGBUNATTENUATEROW_SSE2)
   1330   if (TestCpuFlag(kCpuHasSSE2)) {
   1331     ARGBUnattenuateRow = ARGBUnattenuateRow_Any_SSE2;
   1332     if (IS_ALIGNED(width, 4)) {
   1333       ARGBUnattenuateRow = ARGBUnattenuateRow_SSE2;
   1334     }
   1335   }
   1336 #endif
   1337 #if defined(HAS_ARGBUNATTENUATEROW_AVX2)
   1338   if (TestCpuFlag(kCpuHasAVX2)) {
   1339     ARGBUnattenuateRow = ARGBUnattenuateRow_Any_AVX2;
   1340     if (IS_ALIGNED(width, 8)) {
   1341       ARGBUnattenuateRow = ARGBUnattenuateRow_AVX2;
   1342     }
   1343   }
   1344 #endif
   1345 // TODO(fbarchard): Neon version.
   1346 
   1347   for (y = 0; y < height; ++y) {
   1348     ARGBUnattenuateRow(src_argb, dst_argb, width);
   1349     src_argb += src_stride_argb;
   1350     dst_argb += dst_stride_argb;
   1351   }
   1352   return 0;
   1353 }
   1354 
   1355 // Convert ARGB to Grayed ARGB.
   1356 LIBYUV_API
   1357 int ARGBGrayTo(const uint8* src_argb, int src_stride_argb,
   1358                uint8* dst_argb, int dst_stride_argb,
   1359                int width, int height) {
   1360   int y;
   1361   void (*ARGBGrayRow)(const uint8* src_argb, uint8* dst_argb,
   1362                       int width) = ARGBGrayRow_C;
   1363   if (!src_argb || !dst_argb || width <= 0 || height == 0) {
   1364     return -1;
   1365   }
   1366   if (height < 0) {
   1367     height = -height;
   1368     src_argb = src_argb + (height - 1) * src_stride_argb;
   1369     src_stride_argb = -src_stride_argb;
   1370   }
   1371   // Coalesce rows.
   1372   if (src_stride_argb == width * 4 &&
   1373       dst_stride_argb == width * 4) {
   1374     width *= height;
   1375     height = 1;
   1376     src_stride_argb = dst_stride_argb = 0;
   1377   }
   1378 #if defined(HAS_ARGBGRAYROW_SSSE3)
   1379   if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 8)) {
   1380     ARGBGrayRow = ARGBGrayRow_SSSE3;
   1381   }
   1382 #endif
   1383 #if defined(HAS_ARGBGRAYROW_NEON)
   1384   if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) {
   1385     ARGBGrayRow = ARGBGrayRow_NEON;
   1386   }
   1387 #endif
   1388 
   1389   for (y = 0; y < height; ++y) {
   1390     ARGBGrayRow(src_argb, dst_argb, width);
   1391     src_argb += src_stride_argb;
   1392     dst_argb += dst_stride_argb;
   1393   }
   1394   return 0;
   1395 }
   1396 
   1397 // Make a rectangle of ARGB gray scale.
   1398 LIBYUV_API
   1399 int ARGBGray(uint8* dst_argb, int dst_stride_argb,
   1400              int dst_x, int dst_y,
   1401              int width, int height) {
   1402   int y;
   1403   void (*ARGBGrayRow)(const uint8* src_argb, uint8* dst_argb,
   1404                       int width) = ARGBGrayRow_C;
   1405   uint8* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4;
   1406   if (!dst_argb || width <= 0 || height <= 0 || dst_x < 0 || dst_y < 0) {
   1407     return -1;
   1408   }
   1409   // Coalesce rows.
   1410   if (dst_stride_argb == width * 4) {
   1411     width *= height;
   1412     height = 1;
   1413     dst_stride_argb = 0;
   1414   }
   1415 #if defined(HAS_ARGBGRAYROW_SSSE3)
   1416   if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 8)) {
   1417     ARGBGrayRow = ARGBGrayRow_SSSE3;
   1418   }
   1419 #endif
   1420 #if defined(HAS_ARGBGRAYROW_NEON)
   1421   if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) {
   1422     ARGBGrayRow = ARGBGrayRow_NEON;
   1423   }
   1424 #endif
   1425   for (y = 0; y < height; ++y) {
   1426     ARGBGrayRow(dst, dst, width);
   1427     dst += dst_stride_argb;
   1428   }
   1429   return 0;
   1430 }
   1431 
   1432 // Make a rectangle of ARGB Sepia tone.
   1433 LIBYUV_API
   1434 int ARGBSepia(uint8* dst_argb, int dst_stride_argb,
   1435               int dst_x, int dst_y, int width, int height) {
   1436   int y;
   1437   void (*ARGBSepiaRow)(uint8* dst_argb, int width) = ARGBSepiaRow_C;
   1438   uint8* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4;
   1439   if (!dst_argb || width <= 0 || height <= 0 || dst_x < 0 || dst_y < 0) {
   1440     return -1;
   1441   }
   1442   // Coalesce rows.
   1443   if (dst_stride_argb == width * 4) {
   1444     width *= height;
   1445     height = 1;
   1446     dst_stride_argb = 0;
   1447   }
   1448 #if defined(HAS_ARGBSEPIAROW_SSSE3)
   1449   if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 8)) {
   1450     ARGBSepiaRow = ARGBSepiaRow_SSSE3;
   1451   }
   1452 #endif
   1453 #if defined(HAS_ARGBSEPIAROW_NEON)
   1454   if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) {
   1455     ARGBSepiaRow = ARGBSepiaRow_NEON;
   1456   }
   1457 #endif
   1458   for (y = 0; y < height; ++y) {
   1459     ARGBSepiaRow(dst, width);
   1460     dst += dst_stride_argb;
   1461   }
   1462   return 0;
   1463 }
   1464 
   1465 // Apply a 4x4 matrix to each ARGB pixel.
   1466 // Note: Normally for shading, but can be used to swizzle or invert.
   1467 LIBYUV_API
   1468 int ARGBColorMatrix(const uint8* src_argb, int src_stride_argb,
   1469                     uint8* dst_argb, int dst_stride_argb,
   1470                     const int8* matrix_argb,
   1471                     int width, int height) {
   1472   int y;
   1473   void (*ARGBColorMatrixRow)(const uint8* src_argb, uint8* dst_argb,
   1474       const int8* matrix_argb, int width) = ARGBColorMatrixRow_C;
   1475   if (!src_argb || !dst_argb || !matrix_argb || width <= 0 || height == 0) {
   1476     return -1;
   1477   }
   1478   if (height < 0) {
   1479     height = -height;
   1480     src_argb = src_argb + (height - 1) * src_stride_argb;
   1481     src_stride_argb = -src_stride_argb;
   1482   }
   1483   // Coalesce rows.
   1484   if (src_stride_argb == width * 4 &&
   1485       dst_stride_argb == width * 4) {
   1486     width *= height;
   1487     height = 1;
   1488     src_stride_argb = dst_stride_argb = 0;
   1489   }
   1490 #if defined(HAS_ARGBCOLORMATRIXROW_SSSE3)
   1491   if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 8)) {
   1492     ARGBColorMatrixRow = ARGBColorMatrixRow_SSSE3;
   1493   }
   1494 #endif
   1495 #if defined(HAS_ARGBCOLORMATRIXROW_NEON)
   1496   if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) {
   1497     ARGBColorMatrixRow = ARGBColorMatrixRow_NEON;
   1498   }
   1499 #endif
   1500   for (y = 0; y < height; ++y) {
   1501     ARGBColorMatrixRow(src_argb, dst_argb, matrix_argb, width);
   1502     src_argb += src_stride_argb;
   1503     dst_argb += dst_stride_argb;
   1504   }
   1505   return 0;
   1506 }
   1507 
   1508 // Apply a 4x3 matrix to each ARGB pixel.
   1509 // Deprecated.
   1510 LIBYUV_API
   1511 int RGBColorMatrix(uint8* dst_argb, int dst_stride_argb,
   1512                    const int8* matrix_rgb,
   1513                    int dst_x, int dst_y, int width, int height) {
   1514   SIMD_ALIGNED(int8 matrix_argb[16]);
   1515   uint8* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4;
   1516   if (!dst_argb || !matrix_rgb || width <= 0 || height <= 0 ||
   1517       dst_x < 0 || dst_y < 0) {
   1518     return -1;
   1519   }
   1520 
   1521   // Convert 4x3 7 bit matrix to 4x4 6 bit matrix.
   1522   matrix_argb[0] = matrix_rgb[0] / 2;
   1523   matrix_argb[1] = matrix_rgb[1] / 2;
   1524   matrix_argb[2] = matrix_rgb[2] / 2;
   1525   matrix_argb[3] = matrix_rgb[3] / 2;
   1526   matrix_argb[4] = matrix_rgb[4] / 2;
   1527   matrix_argb[5] = matrix_rgb[5] / 2;
   1528   matrix_argb[6] = matrix_rgb[6] / 2;
   1529   matrix_argb[7] = matrix_rgb[7] / 2;
   1530   matrix_argb[8] = matrix_rgb[8] / 2;
   1531   matrix_argb[9] = matrix_rgb[9] / 2;
   1532   matrix_argb[10] = matrix_rgb[10] / 2;
   1533   matrix_argb[11] = matrix_rgb[11] / 2;
   1534   matrix_argb[14] = matrix_argb[13] = matrix_argb[12] = 0;
   1535   matrix_argb[15] = 64;  // 1.0
   1536 
   1537   return ARGBColorMatrix((const uint8*)(dst), dst_stride_argb,
   1538                          dst, dst_stride_argb,
   1539                          &matrix_argb[0], width, height);
   1540 }
   1541 
   1542 // Apply a color table each ARGB pixel.
   1543 // Table contains 256 ARGB values.
   1544 LIBYUV_API
   1545 int ARGBColorTable(uint8* dst_argb, int dst_stride_argb,
   1546                    const uint8* table_argb,
   1547                    int dst_x, int dst_y, int width, int height) {
   1548   int y;
   1549   void (*ARGBColorTableRow)(uint8* dst_argb, const uint8* table_argb,
   1550                             int width) = ARGBColorTableRow_C;
   1551   uint8* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4;
   1552   if (!dst_argb || !table_argb || width <= 0 || height <= 0 ||
   1553       dst_x < 0 || dst_y < 0) {
   1554     return -1;
   1555   }
   1556   // Coalesce rows.
   1557   if (dst_stride_argb == width * 4) {
   1558     width *= height;
   1559     height = 1;
   1560     dst_stride_argb = 0;
   1561   }
   1562 #if defined(HAS_ARGBCOLORTABLEROW_X86)
   1563   if (TestCpuFlag(kCpuHasX86)) {
   1564     ARGBColorTableRow = ARGBColorTableRow_X86;
   1565   }
   1566 #endif
   1567   for (y = 0; y < height; ++y) {
   1568     ARGBColorTableRow(dst, table_argb, width);
   1569     dst += dst_stride_argb;
   1570   }
   1571   return 0;
   1572 }
   1573 
   1574 // Apply a color table each ARGB pixel but preserve destination alpha.
   1575 // Table contains 256 ARGB values.
   1576 LIBYUV_API
   1577 int RGBColorTable(uint8* dst_argb, int dst_stride_argb,
   1578                   const uint8* table_argb,
   1579                   int dst_x, int dst_y, int width, int height) {
   1580   int y;
   1581   void (*RGBColorTableRow)(uint8* dst_argb, const uint8* table_argb,
   1582                            int width) = RGBColorTableRow_C;
   1583   uint8* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4;
   1584   if (!dst_argb || !table_argb || width <= 0 || height <= 0 ||
   1585       dst_x < 0 || dst_y < 0) {
   1586     return -1;
   1587   }
   1588   // Coalesce rows.
   1589   if (dst_stride_argb == width * 4) {
   1590     width *= height;
   1591     height = 1;
   1592     dst_stride_argb = 0;
   1593   }
   1594 #if defined(HAS_RGBCOLORTABLEROW_X86)
   1595   if (TestCpuFlag(kCpuHasX86)) {
   1596     RGBColorTableRow = RGBColorTableRow_X86;
   1597   }
   1598 #endif
   1599   for (y = 0; y < height; ++y) {
   1600     RGBColorTableRow(dst, table_argb, width);
   1601     dst += dst_stride_argb;
   1602   }
   1603   return 0;
   1604 }
   1605 
   1606 // ARGBQuantize is used to posterize art.
   1607 // e.g. rgb / qvalue * qvalue + qvalue / 2
   1608 // But the low levels implement efficiently with 3 parameters, and could be
   1609 // used for other high level operations.
   1610 // dst_argb[0] = (b * scale >> 16) * interval_size + interval_offset;
   1611 // where scale is 1 / interval_size as a fixed point value.
   1612 // The divide is replaces with a multiply by reciprocal fixed point multiply.
   1613 // Caveat - although SSE2 saturates, the C function does not and should be used
   1614 // with care if doing anything but quantization.
   1615 LIBYUV_API
   1616 int ARGBQuantize(uint8* dst_argb, int dst_stride_argb,
   1617                  int scale, int interval_size, int interval_offset,
   1618                  int dst_x, int dst_y, int width, int height) {
   1619   int y;
   1620   void (*ARGBQuantizeRow)(uint8* dst_argb, int scale, int interval_size,
   1621                           int interval_offset, int width) = ARGBQuantizeRow_C;
   1622   uint8* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4;
   1623   if (!dst_argb || width <= 0 || height <= 0 || dst_x < 0 || dst_y < 0 ||
   1624       interval_size < 1 || interval_size > 255) {
   1625     return -1;
   1626   }
   1627   // Coalesce rows.
   1628   if (dst_stride_argb == width * 4) {
   1629     width *= height;
   1630     height = 1;
   1631     dst_stride_argb = 0;
   1632   }
   1633 #if defined(HAS_ARGBQUANTIZEROW_SSE2)
   1634   if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 4)) {
   1635     ARGBQuantizeRow = ARGBQuantizeRow_SSE2;
   1636   }
   1637 #endif
   1638 #if defined(HAS_ARGBQUANTIZEROW_NEON)
   1639   if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) {
   1640     ARGBQuantizeRow = ARGBQuantizeRow_NEON;
   1641   }
   1642 #endif
   1643   for (y = 0; y < height; ++y) {
   1644     ARGBQuantizeRow(dst, scale, interval_size, interval_offset, width);
   1645     dst += dst_stride_argb;
   1646   }
   1647   return 0;
   1648 }
   1649 
   1650 // Computes table of cumulative sum for image where the value is the sum
   1651 // of all values above and to the left of the entry. Used by ARGBBlur.
   1652 LIBYUV_API
   1653 int ARGBComputeCumulativeSum(const uint8* src_argb, int src_stride_argb,
   1654                              int32* dst_cumsum, int dst_stride32_cumsum,
   1655                              int width, int height) {
   1656   int y;
   1657   void (*ComputeCumulativeSumRow)(const uint8* row, int32* cumsum,
   1658       const int32* previous_cumsum, int width) = ComputeCumulativeSumRow_C;
   1659   int32* previous_cumsum = dst_cumsum;
   1660   if (!dst_cumsum || !src_argb || width <= 0 || height <= 0) {
   1661     return -1;
   1662   }
   1663 #if defined(HAS_CUMULATIVESUMTOAVERAGEROW_SSE2)
   1664   if (TestCpuFlag(kCpuHasSSE2)) {
   1665     ComputeCumulativeSumRow = ComputeCumulativeSumRow_SSE2;
   1666   }
   1667 #endif
   1668   memset(dst_cumsum, 0, width * sizeof(dst_cumsum[0]) * 4);  // 4 int per pixel.
   1669   for (y = 0; y < height; ++y) {
   1670     ComputeCumulativeSumRow(src_argb, dst_cumsum, previous_cumsum, width);
   1671     previous_cumsum = dst_cumsum;
   1672     dst_cumsum += dst_stride32_cumsum;
   1673     src_argb += src_stride_argb;
   1674   }
   1675   return 0;
   1676 }
   1677 
   1678 // Blur ARGB image.
   1679 // Caller should allocate CumulativeSum table of width * height * 16 bytes
   1680 // aligned to 16 byte boundary. height can be radius * 2 + 2 to save memory
   1681 // as the buffer is treated as circular.
   1682 LIBYUV_API
   1683 int ARGBBlur(const uint8* src_argb, int src_stride_argb,
   1684              uint8* dst_argb, int dst_stride_argb,
   1685              int32* dst_cumsum, int dst_stride32_cumsum,
   1686              int width, int height, int radius) {
   1687   int y;
   1688   void (*ComputeCumulativeSumRow)(const uint8 *row, int32 *cumsum,
   1689       const int32* previous_cumsum, int width) = ComputeCumulativeSumRow_C;
   1690   void (*CumulativeSumToAverageRow)(const int32* topleft, const int32* botleft,
   1691       int width, int area, uint8* dst, int count) = CumulativeSumToAverageRow_C;
   1692   int32* cumsum_bot_row;
   1693   int32* max_cumsum_bot_row;
   1694   int32* cumsum_top_row;
   1695 
   1696   if (!src_argb || !dst_argb || width <= 0 || height == 0) {
   1697     return -1;
   1698   }
   1699   if (height < 0) {
   1700     height = -height;
   1701     src_argb = src_argb + (height - 1) * src_stride_argb;
   1702     src_stride_argb = -src_stride_argb;
   1703   }
   1704   if (radius > height) {
   1705     radius = height;
   1706   }
   1707   if (radius > (width / 2 - 1)) {
   1708     radius = width / 2 - 1;
   1709   }
   1710   if (radius <= 0) {
   1711     return -1;
   1712   }
   1713 #if defined(HAS_CUMULATIVESUMTOAVERAGEROW_SSE2)
   1714   if (TestCpuFlag(kCpuHasSSE2)) {
   1715     ComputeCumulativeSumRow = ComputeCumulativeSumRow_SSE2;
   1716     CumulativeSumToAverageRow = CumulativeSumToAverageRow_SSE2;
   1717   }
   1718 #endif
   1719   // Compute enough CumulativeSum for first row to be blurred. After this
   1720   // one row of CumulativeSum is updated at a time.
   1721   ARGBComputeCumulativeSum(src_argb, src_stride_argb,
   1722                            dst_cumsum, dst_stride32_cumsum,
   1723                            width, radius);
   1724 
   1725   src_argb = src_argb + radius * src_stride_argb;
   1726   cumsum_bot_row = &dst_cumsum[(radius - 1) * dst_stride32_cumsum];
   1727 
   1728   max_cumsum_bot_row = &dst_cumsum[(radius * 2 + 2) * dst_stride32_cumsum];
   1729   cumsum_top_row = &dst_cumsum[0];
   1730 
   1731   for (y = 0; y < height; ++y) {
   1732     int top_y = ((y - radius - 1) >= 0) ? (y - radius - 1) : 0;
   1733     int bot_y = ((y + radius) < height) ? (y + radius) : (height - 1);
   1734     int area = radius * (bot_y - top_y);
   1735     int boxwidth = radius * 4;
   1736     int x;
   1737     int n;
   1738 
   1739     // Increment cumsum_top_row pointer with circular buffer wrap around.
   1740     if (top_y) {
   1741       cumsum_top_row += dst_stride32_cumsum;
   1742       if (cumsum_top_row >= max_cumsum_bot_row) {
   1743         cumsum_top_row = dst_cumsum;
   1744       }
   1745     }
   1746     // Increment cumsum_bot_row pointer with circular buffer wrap around and
   1747     // then fill in a row of CumulativeSum.
   1748     if ((y + radius) < height) {
   1749       const int32* prev_cumsum_bot_row = cumsum_bot_row;
   1750       cumsum_bot_row += dst_stride32_cumsum;
   1751       if (cumsum_bot_row >= max_cumsum_bot_row) {
   1752         cumsum_bot_row = dst_cumsum;
   1753       }
   1754       ComputeCumulativeSumRow(src_argb, cumsum_bot_row, prev_cumsum_bot_row,
   1755                               width);
   1756       src_argb += src_stride_argb;
   1757     }
   1758 
   1759     // Left clipped.
   1760     for (x = 0; x < radius + 1; ++x) {
   1761       CumulativeSumToAverageRow(cumsum_top_row, cumsum_bot_row,
   1762                                 boxwidth, area, &dst_argb[x * 4], 1);
   1763       area += (bot_y - top_y);
   1764       boxwidth += 4;
   1765     }
   1766 
   1767     // Middle unclipped.
   1768     n = (width - 1) - radius - x + 1;
   1769     CumulativeSumToAverageRow(cumsum_top_row, cumsum_bot_row,
   1770                               boxwidth, area, &dst_argb[x * 4], n);
   1771 
   1772     // Right clipped.
   1773     for (x += n; x <= width - 1; ++x) {
   1774       area -= (bot_y - top_y);
   1775       boxwidth -= 4;
   1776       CumulativeSumToAverageRow(cumsum_top_row + (x - radius - 1) * 4,
   1777                                 cumsum_bot_row + (x - radius - 1) * 4,
   1778                                 boxwidth, area, &dst_argb[x * 4], 1);
   1779     }
   1780     dst_argb += dst_stride_argb;
   1781   }
   1782   return 0;
   1783 }
   1784 
   1785 // Multiply ARGB image by a specified ARGB value.
   1786 LIBYUV_API
   1787 int ARGBShade(const uint8* src_argb, int src_stride_argb,
   1788               uint8* dst_argb, int dst_stride_argb,
   1789               int width, int height, uint32 value) {
   1790   int y;
   1791   void (*ARGBShadeRow)(const uint8* src_argb, uint8* dst_argb,
   1792                        int width, uint32 value) = ARGBShadeRow_C;
   1793   if (!src_argb || !dst_argb || width <= 0 || height == 0 || value == 0u) {
   1794     return -1;
   1795   }
   1796   if (height < 0) {
   1797     height = -height;
   1798     src_argb = src_argb + (height - 1) * src_stride_argb;
   1799     src_stride_argb = -src_stride_argb;
   1800   }
   1801   // Coalesce rows.
   1802   if (src_stride_argb == width * 4 &&
   1803       dst_stride_argb == width * 4) {
   1804     width *= height;
   1805     height = 1;
   1806     src_stride_argb = dst_stride_argb = 0;
   1807   }
   1808 #if defined(HAS_ARGBSHADEROW_SSE2)
   1809   if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 4)) {
   1810     ARGBShadeRow = ARGBShadeRow_SSE2;
   1811   }
   1812 #endif
   1813 #if defined(HAS_ARGBSHADEROW_NEON)
   1814   if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) {
   1815     ARGBShadeRow = ARGBShadeRow_NEON;
   1816   }
   1817 #endif
   1818 
   1819   for (y = 0; y < height; ++y) {
   1820     ARGBShadeRow(src_argb, dst_argb, width, value);
   1821     src_argb += src_stride_argb;
   1822     dst_argb += dst_stride_argb;
   1823   }
   1824   return 0;
   1825 }
   1826 
   1827 // Interpolate 2 ARGB images by specified amount (0 to 255).
   1828 LIBYUV_API
   1829 int ARGBInterpolate(const uint8* src_argb0, int src_stride_argb0,
   1830                     const uint8* src_argb1, int src_stride_argb1,
   1831                     uint8* dst_argb, int dst_stride_argb,
   1832                     int width, int height, int interpolation) {
   1833   int y;
   1834   void (*InterpolateRow)(uint8* dst_ptr, const uint8* src_ptr,
   1835                          ptrdiff_t src_stride, int dst_width,
   1836                          int source_y_fraction) = InterpolateRow_C;
   1837   if (!src_argb0 || !src_argb1 || !dst_argb || width <= 0 || height == 0) {
   1838     return -1;
   1839   }
   1840   // Negative height means invert the image.
   1841   if (height < 0) {
   1842     height = -height;
   1843     dst_argb = dst_argb + (height - 1) * dst_stride_argb;
   1844     dst_stride_argb = -dst_stride_argb;
   1845   }
   1846   // Coalesce rows.
   1847   if (src_stride_argb0 == width * 4 &&
   1848       src_stride_argb1 == width * 4 &&
   1849       dst_stride_argb == width * 4) {
   1850     width *= height;
   1851     height = 1;
   1852     src_stride_argb0 = src_stride_argb1 = dst_stride_argb = 0;
   1853   }
   1854 #if defined(HAS_INTERPOLATEROW_SSE2)
   1855   if (TestCpuFlag(kCpuHasSSE2)) {
   1856     InterpolateRow = InterpolateRow_Any_SSE2;
   1857     if (IS_ALIGNED(width, 4)) {
   1858       InterpolateRow = InterpolateRow_SSE2;
   1859     }
   1860   }
   1861 #endif
   1862 #if defined(HAS_INTERPOLATEROW_SSSE3)
   1863   if (TestCpuFlag(kCpuHasSSSE3)) {
   1864     InterpolateRow = InterpolateRow_Any_SSSE3;
   1865     if (IS_ALIGNED(width, 4)) {
   1866       InterpolateRow = InterpolateRow_SSSE3;
   1867     }
   1868   }
   1869 #endif
   1870 #if defined(HAS_INTERPOLATEROW_AVX2)
   1871   if (TestCpuFlag(kCpuHasAVX2)) {
   1872     InterpolateRow = InterpolateRow_Any_AVX2;
   1873     if (IS_ALIGNED(width, 8)) {
   1874       InterpolateRow = InterpolateRow_AVX2;
   1875     }
   1876   }
   1877 #endif
   1878 #if defined(HAS_INTERPOLATEROW_NEON)
   1879   if (TestCpuFlag(kCpuHasNEON)) {
   1880     InterpolateRow = InterpolateRow_Any_NEON;
   1881     if (IS_ALIGNED(width, 4)) {
   1882       InterpolateRow = InterpolateRow_NEON;
   1883     }
   1884   }
   1885 #endif
   1886 #if defined(HAS_INTERPOLATEROW_MIPS_DSPR2)
   1887   if (TestCpuFlag(kCpuHasMIPS_DSPR2) &&
   1888       IS_ALIGNED(src_argb0, 4) && IS_ALIGNED(src_stride_argb0, 4) &&
   1889       IS_ALIGNED(src_argb1, 4) && IS_ALIGNED(src_stride_argb1, 4) &&
   1890       IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride_argb, 4)) {
   1891     InterpolateRow = InterpolateRow_MIPS_DSPR2;
   1892   }
   1893 #endif
   1894 
   1895   for (y = 0; y < height; ++y) {
   1896     InterpolateRow(dst_argb, src_argb0, src_argb1 - src_argb0,
   1897                    width * 4, interpolation);
   1898     src_argb0 += src_stride_argb0;
   1899     src_argb1 += src_stride_argb1;
   1900     dst_argb += dst_stride_argb;
   1901   }
   1902   return 0;
   1903 }
   1904 
   1905 // Shuffle ARGB channel order.  e.g. BGRA to ARGB.
   1906 LIBYUV_API
   1907 int ARGBShuffle(const uint8* src_bgra, int src_stride_bgra,
   1908                 uint8* dst_argb, int dst_stride_argb,
   1909                 const uint8* shuffler, int width, int height) {
   1910   int y;
   1911   void (*ARGBShuffleRow)(const uint8* src_bgra, uint8* dst_argb,
   1912                          const uint8* shuffler, int pix) = ARGBShuffleRow_C;
   1913   if (!src_bgra || !dst_argb ||
   1914       width <= 0 || height == 0) {
   1915     return -1;
   1916   }
   1917   // Negative height means invert the image.
   1918   if (height < 0) {
   1919     height = -height;
   1920     src_bgra = src_bgra + (height - 1) * src_stride_bgra;
   1921     src_stride_bgra = -src_stride_bgra;
   1922   }
   1923   // Coalesce rows.
   1924   if (src_stride_bgra == width * 4 &&
   1925       dst_stride_argb == width * 4) {
   1926     width *= height;
   1927     height = 1;
   1928     src_stride_bgra = dst_stride_argb = 0;
   1929   }
   1930 #if defined(HAS_ARGBSHUFFLEROW_SSE2)
   1931   if (TestCpuFlag(kCpuHasSSE2)) {
   1932     ARGBShuffleRow = ARGBShuffleRow_Any_SSE2;
   1933     if (IS_ALIGNED(width, 4)) {
   1934       ARGBShuffleRow = ARGBShuffleRow_SSE2;
   1935     }
   1936   }
   1937 #endif
   1938 #if defined(HAS_ARGBSHUFFLEROW_SSSE3)
   1939   if (TestCpuFlag(kCpuHasSSSE3)) {
   1940     ARGBShuffleRow = ARGBShuffleRow_Any_SSSE3;
   1941     if (IS_ALIGNED(width, 8)) {
   1942       ARGBShuffleRow = ARGBShuffleRow_SSSE3;
   1943     }
   1944   }
   1945 #endif
   1946 #if defined(HAS_ARGBSHUFFLEROW_AVX2)
   1947   if (TestCpuFlag(kCpuHasAVX2)) {
   1948     ARGBShuffleRow = ARGBShuffleRow_Any_AVX2;
   1949     if (IS_ALIGNED(width, 16)) {
   1950       ARGBShuffleRow = ARGBShuffleRow_AVX2;
   1951     }
   1952   }
   1953 #endif
   1954 #if defined(HAS_ARGBSHUFFLEROW_NEON)
   1955   if (TestCpuFlag(kCpuHasNEON)) {
   1956     ARGBShuffleRow = ARGBShuffleRow_Any_NEON;
   1957     if (IS_ALIGNED(width, 4)) {
   1958       ARGBShuffleRow = ARGBShuffleRow_NEON;
   1959     }
   1960   }
   1961 #endif
   1962 
   1963   for (y = 0; y < height; ++y) {
   1964     ARGBShuffleRow(src_bgra, dst_argb, shuffler, width);
   1965     src_bgra += src_stride_bgra;
   1966     dst_argb += dst_stride_argb;
   1967   }
   1968   return 0;
   1969 }
   1970 
   1971 // Sobel ARGB effect.
   1972 static int ARGBSobelize(const uint8* src_argb, int src_stride_argb,
   1973                         uint8* dst_argb, int dst_stride_argb,
   1974                         int width, int height,
   1975                         void (*SobelRow)(const uint8* src_sobelx,
   1976                                          const uint8* src_sobely,
   1977                                          uint8* dst, int width)) {
   1978   int y;
   1979   void (*ARGBToYJRow)(const uint8* src_argb, uint8* dst_g, int pix) =
   1980       ARGBToYJRow_C;
   1981   void (*SobelYRow)(const uint8* src_y0, const uint8* src_y1,
   1982                     uint8* dst_sobely, int width) = SobelYRow_C;
   1983   void (*SobelXRow)(const uint8* src_y0, const uint8* src_y1,
   1984                     const uint8* src_y2, uint8* dst_sobely, int width) =
   1985       SobelXRow_C;
   1986   const int kEdge = 16;  // Extra pixels at start of row for extrude/align.
   1987   if (!src_argb  || !dst_argb || width <= 0 || height == 0) {
   1988     return -1;
   1989   }
   1990   // Negative height means invert the image.
   1991   if (height < 0) {
   1992     height = -height;
   1993     src_argb  = src_argb  + (height - 1) * src_stride_argb;
   1994     src_stride_argb = -src_stride_argb;
   1995   }
   1996 
   1997 #if defined(HAS_ARGBTOYJROW_SSSE3)
   1998   if (TestCpuFlag(kCpuHasSSSE3)) {
   1999     ARGBToYJRow = ARGBToYJRow_Any_SSSE3;
   2000     if (IS_ALIGNED(width, 16)) {
   2001       ARGBToYJRow = ARGBToYJRow_SSSE3;
   2002     }
   2003   }
   2004 #endif
   2005 #if defined(HAS_ARGBTOYJROW_AVX2)
   2006   if (TestCpuFlag(kCpuHasAVX2)) {
   2007     ARGBToYJRow = ARGBToYJRow_Any_AVX2;
   2008     if (IS_ALIGNED(width, 32)) {
   2009       ARGBToYJRow = ARGBToYJRow_AVX2;
   2010     }
   2011   }
   2012 #endif
   2013 #if defined(HAS_ARGBTOYJROW_NEON)
   2014   if (TestCpuFlag(kCpuHasNEON)) {
   2015     ARGBToYJRow = ARGBToYJRow_Any_NEON;
   2016     if (IS_ALIGNED(width, 8)) {
   2017       ARGBToYJRow = ARGBToYJRow_NEON;
   2018     }
   2019   }
   2020 #endif
   2021 
   2022 #if defined(HAS_SOBELYROW_SSE2)
   2023   if (TestCpuFlag(kCpuHasSSE2)) {
   2024     SobelYRow = SobelYRow_SSE2;
   2025   }
   2026 #endif
   2027 #if defined(HAS_SOBELYROW_NEON)
   2028   if (TestCpuFlag(kCpuHasNEON)) {
   2029     SobelYRow = SobelYRow_NEON;
   2030   }
   2031 #endif
   2032 #if defined(HAS_SOBELXROW_SSE2)
   2033   if (TestCpuFlag(kCpuHasSSE2)) {
   2034     SobelXRow = SobelXRow_SSE2;
   2035   }
   2036 #endif
   2037 #if defined(HAS_SOBELXROW_NEON)
   2038   if (TestCpuFlag(kCpuHasNEON)) {
   2039     SobelXRow = SobelXRow_NEON;
   2040   }
   2041 #endif
   2042   {
   2043     // 3 rows with edges before/after.
   2044     const int kRowSize = (width + kEdge + 31) & ~31;
   2045     align_buffer_64(rows, kRowSize * 2 + (kEdge + kRowSize * 3 + kEdge));
   2046     uint8* row_sobelx = rows;
   2047     uint8* row_sobely = rows + kRowSize;
   2048     uint8* row_y = rows + kRowSize * 2;
   2049 
   2050     // Convert first row.
   2051     uint8* row_y0 = row_y + kEdge;
   2052     uint8* row_y1 = row_y0 + kRowSize;
   2053     uint8* row_y2 = row_y1 + kRowSize;
   2054     ARGBToYJRow(src_argb, row_y0, width);
   2055     row_y0[-1] = row_y0[0];
   2056     memset(row_y0 + width, row_y0[width - 1], 16);  // Extrude 16 for valgrind.
   2057     ARGBToYJRow(src_argb, row_y1, width);
   2058     row_y1[-1] = row_y1[0];
   2059     memset(row_y1 + width, row_y1[width - 1], 16);
   2060     memset(row_y2 + width, 0, 16);
   2061 
   2062     for (y = 0; y < height; ++y) {
   2063       // Convert next row of ARGB to G.
   2064       if (y < (height - 1)) {
   2065         src_argb += src_stride_argb;
   2066       }
   2067       ARGBToYJRow(src_argb, row_y2, width);
   2068       row_y2[-1] = row_y2[0];
   2069       row_y2[width] = row_y2[width - 1];
   2070 
   2071       SobelXRow(row_y0 - 1, row_y1 - 1, row_y2 - 1, row_sobelx, width);
   2072       SobelYRow(row_y0 - 1, row_y2 - 1, row_sobely, width);
   2073       SobelRow(row_sobelx, row_sobely, dst_argb, width);
   2074 
   2075       // Cycle thru circular queue of 3 row_y buffers.
   2076       {
   2077         uint8* row_yt = row_y0;
   2078         row_y0 = row_y1;
   2079         row_y1 = row_y2;
   2080         row_y2 = row_yt;
   2081       }
   2082 
   2083       dst_argb += dst_stride_argb;
   2084     }
   2085     free_aligned_buffer_64(rows);
   2086   }
   2087   return 0;
   2088 }
   2089 
   2090 // Sobel ARGB effect.
   2091 LIBYUV_API
   2092 int ARGBSobel(const uint8* src_argb, int src_stride_argb,
   2093               uint8* dst_argb, int dst_stride_argb,
   2094               int width, int height) {
   2095   void (*SobelRow)(const uint8* src_sobelx, const uint8* src_sobely,
   2096                    uint8* dst_argb, int width) = SobelRow_C;
   2097 #if defined(HAS_SOBELROW_SSE2)
   2098   if (TestCpuFlag(kCpuHasSSE2)) {
   2099     SobelRow = SobelRow_Any_SSE2;
   2100     if (IS_ALIGNED(width, 16)) {
   2101       SobelRow = SobelRow_SSE2;
   2102     }
   2103   }
   2104 #endif
   2105 #if defined(HAS_SOBELROW_NEON)
   2106   if (TestCpuFlag(kCpuHasNEON)) {
   2107     SobelRow = SobelRow_Any_NEON;
   2108     if (IS_ALIGNED(width, 8)) {
   2109       SobelRow = SobelRow_NEON;
   2110     }
   2111   }
   2112 #endif
   2113   return ARGBSobelize(src_argb, src_stride_argb, dst_argb, dst_stride_argb,
   2114                       width, height, SobelRow);
   2115 }
   2116 
   2117 // Sobel ARGB effect with planar output.
   2118 LIBYUV_API
   2119 int ARGBSobelToPlane(const uint8* src_argb, int src_stride_argb,
   2120                      uint8* dst_y, int dst_stride_y,
   2121                      int width, int height) {
   2122   void (*SobelToPlaneRow)(const uint8* src_sobelx, const uint8* src_sobely,
   2123                           uint8* dst_, int width) = SobelToPlaneRow_C;
   2124 #if defined(HAS_SOBELTOPLANEROW_SSE2)
   2125   if (TestCpuFlag(kCpuHasSSE2)) {
   2126     SobelToPlaneRow = SobelToPlaneRow_Any_SSE2;
   2127     if (IS_ALIGNED(width, 16)) {
   2128       SobelToPlaneRow = SobelToPlaneRow_SSE2;
   2129     }
   2130   }
   2131 #endif
   2132 #if defined(HAS_SOBELTOPLANEROW_NEON)
   2133   if (TestCpuFlag(kCpuHasNEON)) {
   2134     SobelToPlaneRow = SobelToPlaneRow_Any_NEON;
   2135     if (IS_ALIGNED(width, 16)) {
   2136       SobelToPlaneRow = SobelToPlaneRow_NEON;
   2137     }
   2138   }
   2139 #endif
   2140   return ARGBSobelize(src_argb, src_stride_argb, dst_y, dst_stride_y,
   2141                       width, height, SobelToPlaneRow);
   2142 }
   2143 
   2144 // SobelXY ARGB effect.
   2145 // Similar to Sobel, but also stores Sobel X in R and Sobel Y in B.  G = Sobel.
   2146 LIBYUV_API
   2147 int ARGBSobelXY(const uint8* src_argb, int src_stride_argb,
   2148                 uint8* dst_argb, int dst_stride_argb,
   2149                 int width, int height) {
   2150   void (*SobelXYRow)(const uint8* src_sobelx, const uint8* src_sobely,
   2151                      uint8* dst_argb, int width) = SobelXYRow_C;
   2152 #if defined(HAS_SOBELXYROW_SSE2)
   2153   if (TestCpuFlag(kCpuHasSSE2)) {
   2154     SobelXYRow = SobelXYRow_Any_SSE2;
   2155     if (IS_ALIGNED(width, 16)) {
   2156       SobelXYRow = SobelXYRow_SSE2;
   2157     }
   2158   }
   2159 #endif
   2160 #if defined(HAS_SOBELXYROW_NEON)
   2161   if (TestCpuFlag(kCpuHasNEON)) {
   2162     SobelXYRow = SobelXYRow_Any_NEON;
   2163     if (IS_ALIGNED(width, 8)) {
   2164       SobelXYRow = SobelXYRow_NEON;
   2165     }
   2166   }
   2167 #endif
   2168   return ARGBSobelize(src_argb, src_stride_argb, dst_argb, dst_stride_argb,
   2169                       width, height, SobelXYRow);
   2170 }
   2171 
   2172 // Apply a 4x4 polynomial to each ARGB pixel.
   2173 LIBYUV_API
   2174 int ARGBPolynomial(const uint8* src_argb, int src_stride_argb,
   2175                    uint8* dst_argb, int dst_stride_argb,
   2176                    const float* poly,
   2177                    int width, int height) {
   2178   int y;
   2179   void (*ARGBPolynomialRow)(const uint8* src_argb,
   2180                             uint8* dst_argb, const float* poly,
   2181                             int width) = ARGBPolynomialRow_C;
   2182   if (!src_argb || !dst_argb || !poly || width <= 0 || height == 0) {
   2183     return -1;
   2184   }
   2185   // Negative height means invert the image.
   2186   if (height < 0) {
   2187     height = -height;
   2188     src_argb  = src_argb  + (height - 1) * src_stride_argb;
   2189     src_stride_argb = -src_stride_argb;
   2190   }
   2191   // Coalesce rows.
   2192   if (src_stride_argb == width * 4 &&
   2193       dst_stride_argb == width * 4) {
   2194     width *= height;
   2195     height = 1;
   2196     src_stride_argb = dst_stride_argb = 0;
   2197   }
   2198 #if defined(HAS_ARGBPOLYNOMIALROW_SSE2)
   2199   if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 2)) {
   2200     ARGBPolynomialRow = ARGBPolynomialRow_SSE2;
   2201   }
   2202 #endif
   2203 #if defined(HAS_ARGBPOLYNOMIALROW_AVX2)
   2204   if (TestCpuFlag(kCpuHasAVX2) && TestCpuFlag(kCpuHasFMA3) &&
   2205       IS_ALIGNED(width, 2)) {
   2206     ARGBPolynomialRow = ARGBPolynomialRow_AVX2;
   2207   }
   2208 #endif
   2209 
   2210   for (y = 0; y < height; ++y) {
   2211     ARGBPolynomialRow(src_argb, dst_argb, poly, width);
   2212     src_argb += src_stride_argb;
   2213     dst_argb += dst_stride_argb;
   2214   }
   2215   return 0;
   2216 }
   2217 
   2218 // Apply a lumacolortable to each ARGB pixel.
   2219 LIBYUV_API
   2220 int ARGBLumaColorTable(const uint8* src_argb, int src_stride_argb,
   2221                        uint8* dst_argb, int dst_stride_argb,
   2222                        const uint8* luma,
   2223                        int width, int height) {
   2224   int y;
   2225   void (*ARGBLumaColorTableRow)(const uint8* src_argb, uint8* dst_argb,
   2226       int width, const uint8* luma, const uint32 lumacoeff) =
   2227       ARGBLumaColorTableRow_C;
   2228   if (!src_argb || !dst_argb || !luma || width <= 0 || height == 0) {
   2229     return -1;
   2230   }
   2231   // Negative height means invert the image.
   2232   if (height < 0) {
   2233     height = -height;
   2234     src_argb  = src_argb  + (height - 1) * src_stride_argb;
   2235     src_stride_argb = -src_stride_argb;
   2236   }
   2237   // Coalesce rows.
   2238   if (src_stride_argb == width * 4 &&
   2239       dst_stride_argb == width * 4) {
   2240     width *= height;
   2241     height = 1;
   2242     src_stride_argb = dst_stride_argb = 0;
   2243   }
   2244 #if defined(HAS_ARGBLUMACOLORTABLEROW_SSSE3)
   2245   if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 4)) {
   2246     ARGBLumaColorTableRow = ARGBLumaColorTableRow_SSSE3;
   2247   }
   2248 #endif
   2249 
   2250   for (y = 0; y < height; ++y) {
   2251     ARGBLumaColorTableRow(src_argb, dst_argb, width, luma, 0x00264b0f);
   2252     src_argb += src_stride_argb;
   2253     dst_argb += dst_stride_argb;
   2254   }
   2255   return 0;
   2256 }
   2257 
   2258 // Copy Alpha from one ARGB image to another.
   2259 LIBYUV_API
   2260 int ARGBCopyAlpha(const uint8* src_argb, int src_stride_argb,
   2261                   uint8* dst_argb, int dst_stride_argb,
   2262                   int width, int height) {
   2263   int y;
   2264   void (*ARGBCopyAlphaRow)(const uint8* src_argb, uint8* dst_argb, int width) =
   2265       ARGBCopyAlphaRow_C;
   2266   if (!src_argb || !dst_argb || width <= 0 || height == 0) {
   2267     return -1;
   2268   }
   2269   // Negative height means invert the image.
   2270   if (height < 0) {
   2271     height = -height;
   2272     src_argb = src_argb + (height - 1) * src_stride_argb;
   2273     src_stride_argb = -src_stride_argb;
   2274   }
   2275   // Coalesce rows.
   2276   if (src_stride_argb == width * 4 &&
   2277       dst_stride_argb == width * 4) {
   2278     width *= height;
   2279     height = 1;
   2280     src_stride_argb = dst_stride_argb = 0;
   2281   }
   2282 #if defined(HAS_ARGBCOPYALPHAROW_SSE2)
   2283   if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 8)) {
   2284     ARGBCopyAlphaRow = ARGBCopyAlphaRow_SSE2;
   2285   }
   2286 #endif
   2287 #if defined(HAS_ARGBCOPYALPHAROW_AVX2)
   2288   if (TestCpuFlag(kCpuHasAVX2) && IS_ALIGNED(width, 16)) {
   2289     ARGBCopyAlphaRow = ARGBCopyAlphaRow_AVX2;
   2290   }
   2291 #endif
   2292 
   2293   for (y = 0; y < height; ++y) {
   2294     ARGBCopyAlphaRow(src_argb, dst_argb, width);
   2295     src_argb += src_stride_argb;
   2296     dst_argb += dst_stride_argb;
   2297   }
   2298   return 0;
   2299 }
   2300 
   2301 // Copy a planar Y channel to the alpha channel of a destination ARGB image.
   2302 LIBYUV_API
   2303 int ARGBCopyYToAlpha(const uint8* src_y, int src_stride_y,
   2304                      uint8* dst_argb, int dst_stride_argb,
   2305                      int width, int height) {
   2306   int y;
   2307   void (*ARGBCopyYToAlphaRow)(const uint8* src_y, uint8* dst_argb, int width) =
   2308       ARGBCopyYToAlphaRow_C;
   2309   if (!src_y || !dst_argb || width <= 0 || height == 0) {
   2310     return -1;
   2311   }
   2312   // Negative height means invert the image.
   2313   if (height < 0) {
   2314     height = -height;
   2315     src_y = src_y + (height - 1) * src_stride_y;
   2316     src_stride_y = -src_stride_y;
   2317   }
   2318   // Coalesce rows.
   2319   if (src_stride_y == width &&
   2320       dst_stride_argb == width * 4) {
   2321     width *= height;
   2322     height = 1;
   2323     src_stride_y = dst_stride_argb = 0;
   2324   }
   2325 #if defined(HAS_ARGBCOPYYTOALPHAROW_SSE2)
   2326   if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 8)) {
   2327     ARGBCopyYToAlphaRow = ARGBCopyYToAlphaRow_SSE2;
   2328   }
   2329 #endif
   2330 #if defined(HAS_ARGBCOPYYTOALPHAROW_AVX2)
   2331   if (TestCpuFlag(kCpuHasAVX2) && IS_ALIGNED(width, 16)) {
   2332     ARGBCopyYToAlphaRow = ARGBCopyYToAlphaRow_AVX2;
   2333   }
   2334 #endif
   2335 
   2336   for (y = 0; y < height; ++y) {
   2337     ARGBCopyYToAlphaRow(src_y, dst_argb, width);
   2338     src_y += src_stride_y;
   2339     dst_argb += dst_stride_argb;
   2340   }
   2341   return 0;
   2342 }
   2343 
   2344 LIBYUV_API
   2345 int YUY2ToNV12(const uint8* src_yuy2, int src_stride_yuy2,
   2346                uint8* dst_y, int dst_stride_y,
   2347                uint8* dst_uv, int dst_stride_uv,
   2348                int width, int height) {
   2349   int y;
   2350   int halfwidth = (width + 1) >> 1;
   2351   void (*SplitUVRow)(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix) =
   2352       SplitUVRow_C;
   2353   void (*InterpolateRow)(uint8* dst_ptr, const uint8* src_ptr,
   2354                          ptrdiff_t src_stride, int dst_width,
   2355                          int source_y_fraction) = InterpolateRow_C;
   2356   if (!src_yuy2 ||
   2357       !dst_y || !dst_uv ||
   2358       width <= 0 || height == 0) {
   2359     return -1;
   2360   }
   2361   // Negative height means invert the image.
   2362   if (height < 0) {
   2363     height = -height;
   2364     src_yuy2 = src_yuy2 + (height - 1) * src_stride_yuy2;
   2365     src_stride_yuy2 = -src_stride_yuy2;
   2366   }
   2367 #if defined(HAS_SPLITUVROW_SSE2)
   2368   if (TestCpuFlag(kCpuHasSSE2)) {
   2369     SplitUVRow = SplitUVRow_Any_SSE2;
   2370     if (IS_ALIGNED(width, 16)) {
   2371       SplitUVRow = SplitUVRow_SSE2;
   2372     }
   2373   }
   2374 #endif
   2375 #if defined(HAS_SPLITUVROW_AVX2)
   2376   if (TestCpuFlag(kCpuHasAVX2)) {
   2377     SplitUVRow = SplitUVRow_Any_AVX2;
   2378     if (IS_ALIGNED(width, 32)) {
   2379       SplitUVRow = SplitUVRow_AVX2;
   2380     }
   2381   }
   2382 #endif
   2383 #if defined(HAS_SPLITUVROW_NEON)
   2384   if (TestCpuFlag(kCpuHasNEON)) {
   2385     SplitUVRow = SplitUVRow_Any_NEON;
   2386     if (IS_ALIGNED(width, 16)) {
   2387       SplitUVRow = SplitUVRow_NEON;
   2388     }
   2389   }
   2390 #endif
   2391 #if defined(HAS_INTERPOLATEROW_SSE2)
   2392   if (TestCpuFlag(kCpuHasSSE2)) {
   2393     InterpolateRow = InterpolateRow_Any_SSE2;
   2394     if (IS_ALIGNED(width, 16)) {
   2395       InterpolateRow = InterpolateRow_SSE2;
   2396     }
   2397   }
   2398 #endif
   2399 #if defined(HAS_INTERPOLATEROW_SSSE3)
   2400   if (TestCpuFlag(kCpuHasSSSE3)) {
   2401     InterpolateRow = InterpolateRow_Any_SSSE3;
   2402     if (IS_ALIGNED(width, 16)) {
   2403       InterpolateRow = InterpolateRow_SSSE3;
   2404     }
   2405   }
   2406 #endif
   2407 #if defined(HAS_INTERPOLATEROW_AVX2)
   2408   if (TestCpuFlag(kCpuHasAVX2)) {
   2409     InterpolateRow = InterpolateRow_Any_AVX2;
   2410     if (IS_ALIGNED(width, 32)) {
   2411       InterpolateRow = InterpolateRow_AVX2;
   2412     }
   2413   }
   2414 #endif
   2415 #if defined(HAS_INTERPOLATEROW_NEON)
   2416   if (TestCpuFlag(kCpuHasNEON)) {
   2417     InterpolateRow = InterpolateRow_Any_NEON;
   2418     if (IS_ALIGNED(width, 16)) {
   2419       InterpolateRow = InterpolateRow_NEON;
   2420     }
   2421   }
   2422 #endif
   2423 
   2424   {
   2425     int awidth = halfwidth * 2;
   2426     // 2 rows of uv
   2427     align_buffer_64(rows, awidth * 2);
   2428 
   2429     for (y = 0; y < height - 1; y += 2) {
   2430       // Split Y from UV.
   2431       SplitUVRow(src_yuy2, dst_y, rows, awidth);
   2432       SplitUVRow(src_yuy2 + src_stride_yuy2, dst_y + dst_stride_y,
   2433                  rows + awidth, awidth);
   2434       InterpolateRow(dst_uv, rows, awidth, awidth, 128);
   2435       src_yuy2 += src_stride_yuy2 * 2;
   2436       dst_y += dst_stride_y * 2;
   2437       dst_uv += dst_stride_uv;
   2438     }
   2439     if (height & 1) {
   2440       // Split Y from UV.
   2441       SplitUVRow(src_yuy2, dst_y, dst_uv, width);
   2442     }
   2443     free_aligned_buffer_64(rows);
   2444   }
   2445   return 0;
   2446 }
   2447 
   2448 LIBYUV_API
   2449 int UYVYToNV12(const uint8* src_uyvy, int src_stride_uyvy,
   2450                uint8* dst_y, int dst_stride_y,
   2451                uint8* dst_uv, int dst_stride_uv,
   2452                int width, int height) {
   2453   int y;
   2454   int halfwidth = (width + 1) >> 1;
   2455   void (*SplitUVRow)(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix) =
   2456       SplitUVRow_C;
   2457   void (*InterpolateRow)(uint8* dst_ptr, const uint8* src_ptr,
   2458                          ptrdiff_t src_stride, int dst_width,
   2459                          int source_y_fraction) = InterpolateRow_C;
   2460   if (!src_uyvy ||
   2461       !dst_y || !dst_uv ||
   2462       width <= 0 || height == 0) {
   2463     return -1;
   2464   }
   2465   // Negative height means invert the image.
   2466   if (height < 0) {
   2467     height = -height;
   2468     src_uyvy = src_uyvy + (height - 1) * src_stride_uyvy;
   2469     src_stride_uyvy = -src_stride_uyvy;
   2470   }
   2471 #if defined(HAS_SPLITUVROW_SSE2)
   2472   if (TestCpuFlag(kCpuHasSSE2)) {
   2473     SplitUVRow = SplitUVRow_Any_SSE2;
   2474     if (IS_ALIGNED(width, 16)) {
   2475       SplitUVRow = SplitUVRow_SSE2;
   2476     }
   2477   }
   2478 #endif
   2479 #if defined(HAS_SPLITUVROW_AVX2)
   2480   if (TestCpuFlag(kCpuHasAVX2)) {
   2481     SplitUVRow = SplitUVRow_Any_AVX2;
   2482     if (IS_ALIGNED(width, 32)) {
   2483       SplitUVRow = SplitUVRow_AVX2;
   2484     }
   2485   }
   2486 #endif
   2487 #if defined(HAS_SPLITUVROW_NEON)
   2488   if (TestCpuFlag(kCpuHasNEON)) {
   2489     SplitUVRow = SplitUVRow_Any_NEON;
   2490     if (IS_ALIGNED(width, 16)) {
   2491       SplitUVRow = SplitUVRow_NEON;
   2492     }
   2493   }
   2494 #endif
   2495 #if defined(HAS_INTERPOLATEROW_SSE2)
   2496   if (TestCpuFlag(kCpuHasSSE2)) {
   2497     InterpolateRow = InterpolateRow_Any_SSE2;
   2498     if (IS_ALIGNED(width, 16)) {
   2499       InterpolateRow = InterpolateRow_SSE2;
   2500     }
   2501   }
   2502 #endif
   2503 #if defined(HAS_INTERPOLATEROW_SSSE3)
   2504   if (TestCpuFlag(kCpuHasSSSE3)) {
   2505     InterpolateRow = InterpolateRow_Any_SSSE3;
   2506     if (IS_ALIGNED(width, 16)) {
   2507       InterpolateRow = InterpolateRow_SSSE3;
   2508     }
   2509   }
   2510 #endif
   2511 #if defined(HAS_INTERPOLATEROW_AVX2)
   2512   if (TestCpuFlag(kCpuHasAVX2)) {
   2513     InterpolateRow = InterpolateRow_Any_AVX2;
   2514     if (IS_ALIGNED(width, 32)) {
   2515       InterpolateRow = InterpolateRow_AVX2;
   2516     }
   2517   }
   2518 #endif
   2519 #if defined(HAS_INTERPOLATEROW_NEON)
   2520   if (TestCpuFlag(kCpuHasNEON)) {
   2521     InterpolateRow = InterpolateRow_Any_NEON;
   2522     if (IS_ALIGNED(width, 16)) {
   2523       InterpolateRow = InterpolateRow_NEON;
   2524     }
   2525   }
   2526 #endif
   2527 
   2528   {
   2529     int awidth = halfwidth * 2;
   2530     // 2 rows of uv
   2531     align_buffer_64(rows, awidth * 2);
   2532 
   2533     for (y = 0; y < height - 1; y += 2) {
   2534       // Split Y from UV.
   2535       SplitUVRow(src_uyvy, rows, dst_y, awidth);
   2536       SplitUVRow(src_uyvy + src_stride_uyvy, rows + awidth,
   2537                  dst_y + dst_stride_y, awidth);
   2538       InterpolateRow(dst_uv, rows, awidth, awidth, 128);
   2539       src_uyvy += src_stride_uyvy * 2;
   2540       dst_y += dst_stride_y * 2;
   2541       dst_uv += dst_stride_uv;
   2542     }
   2543     if (height & 1) {
   2544       // Split Y from UV.
   2545       SplitUVRow(src_uyvy, dst_y, dst_uv, width);
   2546     }
   2547     free_aligned_buffer_64(rows);
   2548   }
   2549   return 0;
   2550 }
   2551 
   2552 #ifdef __cplusplus
   2553 }  // extern "C"
   2554 }  // namespace libyuv
   2555 #endif
   2556