Home | History | Annotate | Download | only in source
      1 /*
      2  *  Copyright 2011 The LibYuv Project Authors. All rights reserved.
      3  *
      4  *  Use of this source code is governed by a BSD-style license
      5  *  that can be found in the LICENSE file in the root of the source
      6  *  tree. An additional intellectual property rights grant can be found
      7  *  in the file PATENTS.  All contributing project authors may
      8  *  be found in the AUTHORS file in the root of the source tree.
      9  */
     10 
     11 #include "libyuv/planar_functions.h"
     12 
     13 #include <string.h>  // for memset()
     14 
     15 #include "libyuv/cpu_id.h"
     16 #ifdef HAVE_JPEG
     17 #include "libyuv/mjpeg_decoder.h"
     18 #endif
     19 #include "libyuv/row.h"
     20 
     21 #ifdef __cplusplus
     22 namespace libyuv {
     23 extern "C" {
     24 #endif
     25 
     26 // Copy a plane of data
     27 LIBYUV_API
     28 void CopyPlane(const uint8* src_y, int src_stride_y,
     29                uint8* dst_y, int dst_stride_y,
     30                int width, int height) {
     31   void (*CopyRow)(const uint8* src, uint8* dst, int width) = CopyRow_C;
     32 #if defined(HAS_COPYROW_NEON)
     33   if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 64)) {
     34     CopyRow = CopyRow_NEON;
     35   }
     36 #endif
     37 #if defined(HAS_COPYROW_X86)
     38   if (TestCpuFlag(kCpuHasX86) && IS_ALIGNED(width, 4)) {
     39     CopyRow = CopyRow_X86;
     40   }
     41 #endif
     42 #if defined(HAS_COPYROW_SSE2)
     43   if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 32) &&
     44       IS_ALIGNED(src_y, 16) && IS_ALIGNED(src_stride_y, 16) &&
     45       IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
     46     CopyRow = CopyRow_SSE2;
     47   }
     48 #endif
     49 
     50   // Copy plane
     51   for (int y = 0; y < height; ++y) {
     52     CopyRow(src_y, dst_y, width);
     53     src_y += src_stride_y;
     54     dst_y += dst_stride_y;
     55   }
     56 }
     57 
     58 // Convert I420 to I400.
     59 LIBYUV_API
     60 int I420ToI400(const uint8* src_y, int src_stride_y,
     61                uint8*, int,  // src_u
     62                uint8*, int,  // src_v
     63                uint8* dst_y, int dst_stride_y,
     64                int width, int height) {
     65   if (!src_y || !dst_y || width <= 0 || height == 0) {
     66     return -1;
     67   }
     68   // Negative height means invert the image.
     69   if (height < 0) {
     70     height = -height;
     71     src_y = src_y + (height - 1) * src_stride_y;
     72     src_stride_y = -src_stride_y;
     73   }
     74   CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
     75   return 0;
     76 }
     77 
     78 // Mirror a plane of data
     79 void MirrorPlane(const uint8* src_y, int src_stride_y,
     80                  uint8* dst_y, int dst_stride_y,
     81                  int width, int height) {
     82   void (*MirrorRow)(const uint8* src, uint8* dst, int width) = MirrorRow_C;
     83 #if defined(HAS_MIRRORROW_NEON)
     84   if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 16)) {
     85     MirrorRow = MirrorRow_NEON;
     86   }
     87 #endif
     88 #if defined(HAS_MIRRORROW_SSE2)
     89   if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 16)) {
     90     MirrorRow = MirrorRow_SSE2;
     91 #if defined(HAS_MIRRORROW_SSSE3)
     92     if (TestCpuFlag(kCpuHasSSSE3) &&
     93         IS_ALIGNED(src_y, 16) && IS_ALIGNED(src_stride_y, 16)) {
     94       MirrorRow = MirrorRow_SSSE3;
     95     }
     96 #endif
     97   }
     98 #endif
     99 
    100   // Mirror plane
    101   for (int y = 0; y < height; ++y) {
    102     MirrorRow(src_y, dst_y, width);
    103     src_y += src_stride_y;
    104     dst_y += dst_stride_y;
    105   }
    106 }
    107 
    108 // Convert YUY2 to I422.
    109 LIBYUV_API
    110 int YUY2ToI422(const uint8* src_yuy2, int src_stride_yuy2,
    111                uint8* dst_y, int dst_stride_y,
    112                uint8* dst_u, int dst_stride_u,
    113                uint8* dst_v, int dst_stride_v,
    114                int width, int height) {
    115   // Negative height means invert the image.
    116   if (height < 0) {
    117     height = -height;
    118     src_yuy2 = src_yuy2 + (height - 1) * src_stride_yuy2;
    119     src_stride_yuy2 = -src_stride_yuy2;
    120   }
    121   void (*YUY2ToUV422Row)(const uint8* src_yuy2,
    122                       uint8* dst_u, uint8* dst_v, int pix);
    123   void (*YUY2ToYRow)(const uint8* src_yuy2,
    124                      uint8* dst_y, int pix);
    125   YUY2ToYRow = YUY2ToYRow_C;
    126   YUY2ToUV422Row = YUY2ToUV422Row_C;
    127 #if defined(HAS_YUY2TOYROW_SSE2)
    128   if (TestCpuFlag(kCpuHasSSE2)) {
    129     if (width > 16) {
    130       YUY2ToUV422Row = YUY2ToUV422Row_Any_SSE2;
    131       YUY2ToYRow = YUY2ToYRow_Any_SSE2;
    132     }
    133     if (IS_ALIGNED(width, 16)) {
    134       YUY2ToUV422Row = YUY2ToUV422Row_Unaligned_SSE2;
    135       YUY2ToYRow = YUY2ToYRow_Unaligned_SSE2;
    136       if (IS_ALIGNED(src_yuy2, 16) && IS_ALIGNED(src_stride_yuy2, 16)) {
    137         YUY2ToUV422Row = YUY2ToUV422Row_SSE2;
    138         if (IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
    139           YUY2ToYRow = YUY2ToYRow_SSE2;
    140         }
    141       }
    142     }
    143   }
    144 #elif defined(HAS_YUY2TOYROW_NEON)
    145   if (TestCpuFlag(kCpuHasNEON)) {
    146     if (width > 8) {
    147       YUY2ToYRow = YUY2ToYRow_Any_NEON;
    148       if (width > 16) {
    149         YUY2ToUV422Row = YUY2ToUV422Row_Any_NEON;
    150       }
    151     }
    152     if (IS_ALIGNED(width, 16)) {
    153       YUY2ToYRow = YUY2ToYRow_NEON;
    154       YUY2ToUV422Row = YUY2ToUV422Row_NEON;
    155     }
    156   }
    157 #endif
    158 
    159   for (int y = 0; y < height; ++y) {
    160     YUY2ToUV422Row(src_yuy2, dst_u, dst_v, width);
    161     YUY2ToYRow(src_yuy2, dst_y, width);
    162     src_yuy2 += src_stride_yuy2;
    163     dst_y += dst_stride_y;
    164     dst_u += dst_stride_u;
    165     dst_v += dst_stride_v;
    166   }
    167   return 0;
    168 }
    169 
    170 // Convert UYVY to I422.
    171 LIBYUV_API
    172 int UYVYToI422(const uint8* src_uyvy, int src_stride_uyvy,
    173                uint8* dst_y, int dst_stride_y,
    174                uint8* dst_u, int dst_stride_u,
    175                uint8* dst_v, int dst_stride_v,
    176                int width, int height) {
    177   // Negative height means invert the image.
    178   if (height < 0) {
    179     height = -height;
    180     src_uyvy = src_uyvy + (height - 1) * src_stride_uyvy;
    181     src_stride_uyvy = -src_stride_uyvy;
    182   }
    183   void (*UYVYToUV422Row)(const uint8* src_uyvy,
    184                       uint8* dst_u, uint8* dst_v, int pix);
    185   void (*UYVYToYRow)(const uint8* src_uyvy,
    186                      uint8* dst_y, int pix);
    187   UYVYToYRow = UYVYToYRow_C;
    188   UYVYToUV422Row = UYVYToUV422Row_C;
    189 #if defined(HAS_UYVYTOYROW_SSE2)
    190   if (TestCpuFlag(kCpuHasSSE2)) {
    191     if (width > 16) {
    192       UYVYToUV422Row = UYVYToUV422Row_Any_SSE2;
    193       UYVYToYRow = UYVYToYRow_Any_SSE2;
    194     }
    195     if (IS_ALIGNED(width, 16)) {
    196       UYVYToUV422Row = UYVYToUV422Row_Unaligned_SSE2;
    197       UYVYToYRow = UYVYToYRow_Unaligned_SSE2;
    198       if (IS_ALIGNED(src_uyvy, 16) && IS_ALIGNED(src_stride_uyvy, 16)) {
    199         UYVYToUV422Row = UYVYToUV422Row_SSE2;
    200         if (IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
    201           UYVYToYRow = UYVYToYRow_SSE2;
    202         }
    203       }
    204     }
    205   }
    206 #elif defined(HAS_UYVYTOYROW_NEON)
    207   if (TestCpuFlag(kCpuHasNEON)) {
    208     if (width > 8) {
    209       UYVYToYRow = UYVYToYRow_Any_NEON;
    210       if (width > 16) {
    211         UYVYToUV422Row = UYVYToUV422Row_Any_NEON;
    212       }
    213     }
    214     if (IS_ALIGNED(width, 16)) {
    215       UYVYToYRow = UYVYToYRow_NEON;
    216       UYVYToUV422Row = UYVYToUV422Row_NEON;
    217     }
    218   }
    219 #endif
    220 
    221   for (int y = 0; y < height; ++y) {
    222     UYVYToUV422Row(src_uyvy, dst_u, dst_v, width);
    223     UYVYToYRow(src_uyvy, dst_y, width);
    224     src_uyvy += src_stride_uyvy;
    225     dst_y += dst_stride_y;
    226     dst_u += dst_stride_u;
    227     dst_v += dst_stride_v;
    228   }
    229   return 0;
    230 }
    231 
    232 // Mirror I420 with optional flipping
    233 LIBYUV_API
    234 int I420Mirror(const uint8* src_y, int src_stride_y,
    235                const uint8* src_u, int src_stride_u,
    236                const uint8* src_v, int src_stride_v,
    237                uint8* dst_y, int dst_stride_y,
    238                uint8* dst_u, int dst_stride_u,
    239                uint8* dst_v, int dst_stride_v,
    240                int width, int height) {
    241   if (!src_y || !src_u || !src_v || !dst_y || !dst_u || !dst_v ||
    242       width <= 0 || height == 0) {
    243     return -1;
    244   }
    245   // Negative height means invert the image.
    246   if (height < 0) {
    247     height = -height;
    248     int halfheight = (height + 1) >> 1;
    249     src_y = src_y + (height - 1) * src_stride_y;
    250     src_u = src_u + (halfheight - 1) * src_stride_u;
    251     src_v = src_v + (halfheight - 1) * src_stride_v;
    252     src_stride_y = -src_stride_y;
    253     src_stride_u = -src_stride_u;
    254     src_stride_v = -src_stride_v;
    255   }
    256 
    257   int halfwidth = (width + 1) >> 1;
    258   int halfheight = (height + 1) >> 1;
    259   if (dst_y) {
    260     MirrorPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
    261   }
    262   MirrorPlane(src_u, src_stride_u, dst_u, dst_stride_u, halfwidth, halfheight);
    263   MirrorPlane(src_v, src_stride_v, dst_v, dst_stride_v, halfwidth, halfheight);
    264   return 0;
    265 }
    266 
    267 // ARGB mirror.
    268 LIBYUV_API
    269 int ARGBMirror(const uint8* src_argb, int src_stride_argb,
    270                uint8* dst_argb, int dst_stride_argb,
    271                int width, int height) {
    272   if (!src_argb || !dst_argb || width <= 0 || height == 0) {
    273     return -1;
    274   }
    275   // Negative height means invert the image.
    276   if (height < 0) {
    277     height = -height;
    278     src_argb = src_argb + (height - 1) * src_stride_argb;
    279     src_stride_argb = -src_stride_argb;
    280   }
    281 
    282   void (*ARGBMirrorRow)(const uint8* src, uint8* dst, int width) =
    283       ARGBMirrorRow_C;
    284 #if defined(HAS_ARGBMIRRORROW_SSSE3)
    285   if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 4) &&
    286       IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16) &&
    287       IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
    288     ARGBMirrorRow = ARGBMirrorRow_SSSE3;
    289   }
    290 #endif
    291 
    292   // Mirror plane
    293   for (int y = 0; y < height; ++y) {
    294     ARGBMirrorRow(src_argb, dst_argb, width);
    295     src_argb += src_stride_argb;
    296     dst_argb += dst_stride_argb;
    297   }
    298   return 0;
    299 }
    300 
    301 // Get a blender that optimized for the CPU, alignment and pixel count.
    302 // As there are 6 blenders to choose from, the caller should try to use
    303 // the same blend function for all pixels if possible.
    304 LIBYUV_API
    305 ARGBBlendRow GetARGBBlend() {
    306   void (*ARGBBlendRow)(const uint8* src_argb, const uint8* src_argb1,
    307                        uint8* dst_argb, int width) = ARGBBlendRow_C;
    308 #if defined(HAS_ARGBBLENDROW_SSSE3)
    309   if (TestCpuFlag(kCpuHasSSSE3)) {
    310     ARGBBlendRow = ARGBBlendRow_SSSE3;
    311     return ARGBBlendRow;
    312   }
    313 #endif
    314 #if defined(HAS_ARGBBLENDROW_SSE2)
    315   if (TestCpuFlag(kCpuHasSSE2)) {
    316     ARGBBlendRow = ARGBBlendRow_SSE2;
    317   }
    318 #endif
    319   return ARGBBlendRow;
    320 }
    321 
    322 // Alpha Blend 2 ARGB images and store to destination.
    323 LIBYUV_API
    324 int ARGBBlend(const uint8* src_argb0, int src_stride_argb0,
    325               const uint8* src_argb1, int src_stride_argb1,
    326               uint8* dst_argb, int dst_stride_argb,
    327               int width, int height) {
    328   if (!src_argb0 || !src_argb1 || !dst_argb || width <= 0 || height == 0) {
    329     return -1;
    330   }
    331   // Negative height means invert the image.
    332   if (height < 0) {
    333     height = -height;
    334     dst_argb = dst_argb + (height - 1) * dst_stride_argb;
    335     dst_stride_argb = -dst_stride_argb;
    336   }
    337   void (*ARGBBlendRow)(const uint8* src_argb, const uint8* src_argb1,
    338                        uint8* dst_argb, int width) = GetARGBBlend();
    339 
    340   for (int y = 0; y < height; ++y) {
    341     ARGBBlendRow(src_argb0, src_argb1, dst_argb, width);
    342     src_argb0 += src_stride_argb0;
    343     src_argb1 += src_stride_argb1;
    344     dst_argb += dst_stride_argb;
    345   }
    346   return 0;
    347 }
    348 
    349 // Convert ARGB to I400.
    350 LIBYUV_API
    351 int ARGBToI400(const uint8* src_argb, int src_stride_argb,
    352                uint8* dst_y, int dst_stride_y,
    353                int width, int height) {
    354   if (!src_argb || !dst_y || width <= 0 || height == 0) {
    355     return -1;
    356   }
    357   if (height < 0) {
    358     height = -height;
    359     src_argb = src_argb + (height - 1) * src_stride_argb;
    360     src_stride_argb = -src_stride_argb;
    361   }
    362   void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) =
    363       ARGBToYRow_C;
    364 #if defined(HAS_ARGBTOYROW_SSSE3)
    365   if (TestCpuFlag(kCpuHasSSSE3) &&
    366       IS_ALIGNED(width, 4) &&
    367       IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16) &&
    368       IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
    369     ARGBToYRow = ARGBToYRow_SSSE3;
    370   }
    371 #endif
    372 
    373   for (int y = 0; y < height; ++y) {
    374     ARGBToYRow(src_argb, dst_y, width);
    375     src_argb += src_stride_argb;
    376     dst_y += dst_stride_y;
    377   }
    378   return 0;
    379 }
    380 
    381 // ARGB little endian (bgra in memory) to I422
    382 // same as I420 except UV plane is full height
    383 LIBYUV_API
    384 int ARGBToI422(const uint8* src_argb, int src_stride_argb,
    385                uint8* dst_y, int dst_stride_y,
    386                uint8* dst_u, int dst_stride_u,
    387                uint8* dst_v, int dst_stride_v,
    388                int width, int height) {
    389   if (!src_argb || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) {
    390     return -1;
    391   }
    392   if (height < 0) {
    393     height = -height;
    394     src_argb = src_argb + (height - 1) * src_stride_argb;
    395     src_stride_argb = -src_stride_argb;
    396   }
    397   void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) =
    398       ARGBToYRow_C;
    399   void (*ARGBToUVRow)(const uint8* src_argb0, int src_stride_argb,
    400                       uint8* dst_u, uint8* dst_v, int width) = ARGBToUVRow_C;
    401 #if defined(HAS_ARGBTOYROW_SSSE3)
    402   if (TestCpuFlag(kCpuHasSSSE3)) {
    403     if (width > 16) {
    404       ARGBToUVRow = ARGBToUVRow_Any_SSSE3;
    405       ARGBToYRow = ARGBToYRow_Any_SSSE3;
    406     }
    407     if (IS_ALIGNED(width, 16)) {
    408       ARGBToUVRow = ARGBToUVRow_Unaligned_SSSE3;
    409       ARGBToYRow = ARGBToYRow_Unaligned_SSSE3;
    410       if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16)) {
    411         ARGBToUVRow = ARGBToUVRow_SSSE3;
    412         if (IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
    413           ARGBToYRow = ARGBToYRow_SSSE3;
    414         }
    415       }
    416     }
    417   }
    418 #endif
    419 
    420   for (int y = 0; y < height; ++y) {
    421     ARGBToUVRow(src_argb, 0, dst_u, dst_v, width);
    422     ARGBToYRow(src_argb, dst_y, width);
    423     src_argb += src_stride_argb;
    424     dst_y += dst_stride_y;
    425     dst_u += dst_stride_u;
    426     dst_v += dst_stride_v;
    427   }
    428   return 0;
    429 }
    430 
    431 // Convert I422 to BGRA.
    432 LIBYUV_API
    433 int I422ToBGRA(const uint8* src_y, int src_stride_y,
    434                const uint8* src_u, int src_stride_u,
    435                const uint8* src_v, int src_stride_v,
    436                uint8* dst_bgra, int dst_stride_bgra,
    437                int width, int height) {
    438   if (!src_y || !src_u || !src_v ||
    439       !dst_bgra ||
    440       width <= 0 || height == 0) {
    441     return -1;
    442   }
    443   // Negative height means invert the image.
    444   if (height < 0) {
    445     height = -height;
    446     dst_bgra = dst_bgra + (height - 1) * dst_stride_bgra;
    447     dst_stride_bgra = -dst_stride_bgra;
    448   }
    449   void (*I422ToBGRARow)(const uint8* y_buf,
    450                         const uint8* u_buf,
    451                         const uint8* v_buf,
    452                         uint8* rgb_buf,
    453                         int width) = I422ToBGRARow_C;
    454 #if defined(HAS_I422TOBGRAROW_NEON)
    455   if (TestCpuFlag(kCpuHasNEON)) {
    456     I422ToBGRARow = I422ToBGRARow_Any_NEON;
    457     if (IS_ALIGNED(width, 16)) {
    458       I422ToBGRARow = I422ToBGRARow_NEON;
    459     }
    460   }
    461 #elif defined(HAS_I422TOBGRAROW_SSSE3)
    462   if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
    463     I422ToBGRARow = I422ToBGRARow_Any_SSSE3;
    464     if (IS_ALIGNED(width, 8)) {
    465       I422ToBGRARow = I422ToBGRARow_Unaligned_SSSE3;
    466       if (IS_ALIGNED(dst_bgra, 16) && IS_ALIGNED(dst_stride_bgra, 16)) {
    467         I422ToBGRARow = I422ToBGRARow_SSSE3;
    468       }
    469     }
    470   }
    471 #endif
    472 
    473   for (int y = 0; y < height; ++y) {
    474     I422ToBGRARow(src_y, src_u, src_v, dst_bgra, width);
    475     dst_bgra += dst_stride_bgra;
    476     src_y += src_stride_y;
    477     src_u += src_stride_u;
    478     src_v += src_stride_v;
    479   }
    480   return 0;
    481 }
    482 
    483 // Convert I422 to ABGR.
    484 LIBYUV_API
    485 int I422ToABGR(const uint8* src_y, int src_stride_y,
    486                const uint8* src_u, int src_stride_u,
    487                const uint8* src_v, int src_stride_v,
    488                uint8* dst_abgr, int dst_stride_abgr,
    489                int width, int height) {
    490   if (!src_y || !src_u || !src_v ||
    491       !dst_abgr ||
    492       width <= 0 || height == 0) {
    493     return -1;
    494   }
    495   // Negative height means invert the image.
    496   if (height < 0) {
    497     height = -height;
    498     dst_abgr = dst_abgr + (height - 1) * dst_stride_abgr;
    499     dst_stride_abgr = -dst_stride_abgr;
    500   }
    501   void (*I422ToABGRRow)(const uint8* y_buf,
    502                         const uint8* u_buf,
    503                         const uint8* v_buf,
    504                         uint8* rgb_buf,
    505                         int width) = I422ToABGRRow_C;
    506 #if defined(HAS_I422TOABGRROW_NEON)
    507   if (TestCpuFlag(kCpuHasNEON)) {
    508     I422ToABGRRow = I422ToABGRRow_Any_NEON;
    509     if (IS_ALIGNED(width, 16)) {
    510       I422ToABGRRow = I422ToABGRRow_NEON;
    511     }
    512   }
    513 #elif defined(HAS_I422TOABGRROW_SSSE3)
    514   if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
    515     I422ToABGRRow = I422ToABGRRow_Any_SSSE3;
    516     if (IS_ALIGNED(width, 8)) {
    517       I422ToABGRRow = I422ToABGRRow_Unaligned_SSSE3;
    518       if (IS_ALIGNED(dst_abgr, 16) && IS_ALIGNED(dst_stride_abgr, 16)) {
    519         I422ToABGRRow = I422ToABGRRow_SSSE3;
    520       }
    521     }
    522   }
    523 #endif
    524 
    525   for (int y = 0; y < height; ++y) {
    526     I422ToABGRRow(src_y, src_u, src_v, dst_abgr, width);
    527     dst_abgr += dst_stride_abgr;
    528     src_y += src_stride_y;
    529     src_u += src_stride_u;
    530     src_v += src_stride_v;
    531   }
    532   return 0;
    533 }
    534 
    535 // Convert I422 to RGBA.
    536 LIBYUV_API
    537 int I422ToRGBA(const uint8* src_y, int src_stride_y,
    538                const uint8* src_u, int src_stride_u,
    539                const uint8* src_v, int src_stride_v,
    540                uint8* dst_rgba, int dst_stride_rgba,
    541                int width, int height) {
    542   if (!src_y || !src_u || !src_v ||
    543       !dst_rgba ||
    544       width <= 0 || height == 0) {
    545     return -1;
    546   }
    547   // Negative height means invert the image.
    548   if (height < 0) {
    549     height = -height;
    550     dst_rgba = dst_rgba + (height - 1) * dst_stride_rgba;
    551     dst_stride_rgba = -dst_stride_rgba;
    552   }
    553   void (*I422ToRGBARow)(const uint8* y_buf,
    554                         const uint8* u_buf,
    555                         const uint8* v_buf,
    556                         uint8* rgb_buf,
    557                         int width) = I422ToRGBARow_C;
    558 #if defined(HAS_I422TORGBAROW_NEON)
    559   if (TestCpuFlag(kCpuHasNEON)) {
    560     I422ToRGBARow = I422ToRGBARow_Any_NEON;
    561     if (IS_ALIGNED(width, 16)) {
    562       I422ToRGBARow = I422ToRGBARow_NEON;
    563     }
    564   }
    565 #elif defined(HAS_I422TORGBAROW_SSSE3)
    566   if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
    567     I422ToRGBARow = I422ToRGBARow_Any_SSSE3;
    568     if (IS_ALIGNED(width, 8)) {
    569       I422ToRGBARow = I422ToRGBARow_Unaligned_SSSE3;
    570       if (IS_ALIGNED(dst_rgba, 16) && IS_ALIGNED(dst_stride_rgba, 16)) {
    571         I422ToRGBARow = I422ToRGBARow_SSSE3;
    572       }
    573     }
    574   }
    575 #endif
    576 
    577   for (int y = 0; y < height; ++y) {
    578     I422ToRGBARow(src_y, src_u, src_v, dst_rgba, width);
    579     dst_rgba += dst_stride_rgba;
    580     src_y += src_stride_y;
    581     src_u += src_stride_u;
    582     src_v += src_stride_v;
    583   }
    584   return 0;
    585 }
    586 
    587 // Convert ARGB to RGBA.
    588 LIBYUV_API
    589 int ARGBToRGBA(const uint8* src_argb, int src_stride_argb,
    590                uint8* dst_rgba, int dst_stride_rgba,
    591                int width, int height) {
    592   if (!src_argb || !dst_rgba ||
    593       width <= 0 || height == 0) {
    594     return -1;
    595   }
    596   // Negative height means invert the image.
    597   if (height < 0) {
    598     height = -height;
    599     src_argb = src_argb + (height - 1) * src_stride_argb;
    600     src_stride_argb = -src_stride_argb;
    601   }
    602   void (*ARGBToRGBARow)(const uint8* src_argb, uint8* dst_rgba, int pix) =
    603       ARGBToRGBARow_C;
    604 #if defined(HAS_ARGBTORGBAROW_SSSE3)
    605   if (TestCpuFlag(kCpuHasSSSE3) &&
    606       IS_ALIGNED(width, 4) &&
    607       IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16) &&
    608       IS_ALIGNED(dst_rgba, 16) && IS_ALIGNED(dst_stride_rgba, 16)) {
    609     ARGBToRGBARow = ARGBToRGBARow_SSSE3;
    610   }
    611 #endif
    612 #if defined(HAS_ARGBTORGBAROW_NEON)
    613   if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) {
    614     ARGBToRGBARow = ARGBToRGBARow_NEON;
    615   }
    616 #endif
    617 
    618   for (int y = 0; y < height; ++y) {
    619     ARGBToRGBARow(src_argb, dst_rgba, width);
    620     src_argb += src_stride_argb;
    621     dst_rgba += dst_stride_rgba;
    622   }
    623   return 0;
    624 }
    625 
    626 // Convert ARGB To RGB24.
    627 LIBYUV_API
    628 int ARGBToRGB24(const uint8* src_argb, int src_stride_argb,
    629                 uint8* dst_rgb24, int dst_stride_rgb24,
    630                 int width, int height) {
    631   if (!src_argb || !dst_rgb24 || width <= 0 || height == 0) {
    632     return -1;
    633   }
    634   if (height < 0) {
    635     height = -height;
    636     src_argb = src_argb + (height - 1) * src_stride_argb;
    637     src_stride_argb = -src_stride_argb;
    638   }
    639   void (*ARGBToRGB24Row)(const uint8* src_argb, uint8* dst_rgb, int pix) =
    640       ARGBToRGB24Row_C;
    641 #if defined(HAS_ARGBTORGB24ROW_SSSE3)
    642   if (TestCpuFlag(kCpuHasSSSE3) &&
    643       IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16)) {
    644     if (width * 3 <= kMaxStride) {
    645       ARGBToRGB24Row = ARGBToRGB24Row_Any_SSSE3;
    646     }
    647     if (IS_ALIGNED(width, 16) &&
    648         IS_ALIGNED(dst_rgb24, 16) && IS_ALIGNED(dst_stride_rgb24, 16)) {
    649       ARGBToRGB24Row = ARGBToRGB24Row_SSSE3;
    650     }
    651   }
    652 #endif
    653 #if defined(HAS_ARGBTORGB24ROW_NEON)
    654   if (TestCpuFlag(kCpuHasNEON)) {
    655     if (width * 3 <= kMaxStride) {
    656       ARGBToRGB24Row = ARGBToRGB24Row_Any_NEON;
    657     }
    658     if (IS_ALIGNED(width, 8)) {
    659       ARGBToRGB24Row = ARGBToRGB24Row_NEON;
    660     }
    661   }
    662 #endif
    663 
    664   for (int y = 0; y < height; ++y) {
    665     ARGBToRGB24Row(src_argb, dst_rgb24, width);
    666     src_argb += src_stride_argb;
    667     dst_rgb24 += dst_stride_rgb24;
    668   }
    669   return 0;
    670 }
    671 
    672 // Convert ARGB To RAW.
    673 LIBYUV_API
    674 int ARGBToRAW(const uint8* src_argb, int src_stride_argb,
    675               uint8* dst_raw, int dst_stride_raw,
    676               int width, int height) {
    677   if (!src_argb || !dst_raw || width <= 0 || height == 0) {
    678     return -1;
    679   }
    680   if (height < 0) {
    681     height = -height;
    682     src_argb = src_argb + (height - 1) * src_stride_argb;
    683     src_stride_argb = -src_stride_argb;
    684   }
    685   void (*ARGBToRAWRow)(const uint8* src_argb, uint8* dst_rgb, int pix) =
    686       ARGBToRAWRow_C;
    687 #if defined(HAS_ARGBTORAWROW_SSSE3)
    688   if (TestCpuFlag(kCpuHasSSSE3) &&
    689       IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16)) {
    690     if (width * 3 <= kMaxStride) {
    691       ARGBToRAWRow = ARGBToRAWRow_Any_SSSE3;
    692     }
    693     if (IS_ALIGNED(width, 16) &&
    694         IS_ALIGNED(dst_raw, 16) && IS_ALIGNED(dst_stride_raw, 16)) {
    695       ARGBToRAWRow = ARGBToRAWRow_SSSE3;
    696     }
    697   }
    698 #endif
    699 #if defined(HAS_ARGBTORAWROW_NEON)
    700   if (TestCpuFlag(kCpuHasNEON)) {
    701     if (width * 3 <= kMaxStride) {
    702       ARGBToRAWRow = ARGBToRAWRow_Any_NEON;
    703     }
    704     if (IS_ALIGNED(width, 8)) {
    705       ARGBToRAWRow = ARGBToRAWRow_NEON;
    706     }
    707   }
    708 #endif
    709 
    710   for (int y = 0; y < height; ++y) {
    711     ARGBToRAWRow(src_argb, dst_raw, width);
    712     src_argb += src_stride_argb;
    713     dst_raw += dst_stride_raw;
    714   }
    715   return 0;
    716 }
    717 
    718 // Convert ARGB To RGB565.
    719 LIBYUV_API
    720 int ARGBToRGB565(const uint8* src_argb, int src_stride_argb,
    721                  uint8* dst_rgb565, int dst_stride_rgb565,
    722                  int width, int height) {
    723   if (!src_argb || !dst_rgb565 || width <= 0 || height == 0) {
    724     return -1;
    725   }
    726   if (height < 0) {
    727     height = -height;
    728     src_argb = src_argb + (height - 1) * src_stride_argb;
    729     src_stride_argb = -src_stride_argb;
    730   }
    731   void (*ARGBToRGB565Row)(const uint8* src_argb, uint8* dst_rgb, int pix) =
    732       ARGBToRGB565Row_C;
    733 #if defined(HAS_ARGBTORGB565ROW_SSE2)
    734   if (TestCpuFlag(kCpuHasSSE2) &&
    735       IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16)) {
    736     if (width * 2 <= kMaxStride) {
    737       ARGBToRGB565Row = ARGBToRGB565Row_Any_SSE2;
    738     }
    739     if (IS_ALIGNED(width, 4)) {
    740       ARGBToRGB565Row = ARGBToRGB565Row_SSE2;
    741     }
    742   }
    743 #endif
    744 
    745   for (int y = 0; y < height; ++y) {
    746     ARGBToRGB565Row(src_argb, dst_rgb565, width);
    747     src_argb += src_stride_argb;
    748     dst_rgb565 += dst_stride_rgb565;
    749   }
    750   return 0;
    751 }
    752 
    753 // Convert ARGB To ARGB1555.
    754 LIBYUV_API
    755 int ARGBToARGB1555(const uint8* src_argb, int src_stride_argb,
    756                    uint8* dst_argb1555, int dst_stride_argb1555,
    757                    int width, int height) {
    758   if (!src_argb || !dst_argb1555 || width <= 0 || height == 0) {
    759     return -1;
    760   }
    761   if (height < 0) {
    762     height = -height;
    763     src_argb = src_argb + (height - 1) * src_stride_argb;
    764     src_stride_argb = -src_stride_argb;
    765   }
    766   void (*ARGBToARGB1555Row)(const uint8* src_argb, uint8* dst_rgb, int pix) =
    767       ARGBToARGB1555Row_C;
    768 #if defined(HAS_ARGBTOARGB1555ROW_SSE2)
    769   if (TestCpuFlag(kCpuHasSSE2) &&
    770       IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16)) {
    771     if (width * 2 <= kMaxStride) {
    772       ARGBToARGB1555Row = ARGBToARGB1555Row_Any_SSE2;
    773     }
    774     if (IS_ALIGNED(width, 4)) {
    775       ARGBToARGB1555Row = ARGBToARGB1555Row_SSE2;
    776     }
    777   }
    778 #endif
    779 
    780   for (int y = 0; y < height; ++y) {
    781     ARGBToARGB1555Row(src_argb, dst_argb1555, width);
    782     src_argb += src_stride_argb;
    783     dst_argb1555 += dst_stride_argb1555;
    784   }
    785   return 0;
    786 }
    787 
    788 // Convert ARGB To ARGB4444.
    789 LIBYUV_API
    790 int ARGBToARGB4444(const uint8* src_argb, int src_stride_argb,
    791                    uint8* dst_argb4444, int dst_stride_argb4444,
    792                    int width, int height) {
    793   if (!src_argb || !dst_argb4444 || width <= 0 || height == 0) {
    794     return -1;
    795   }
    796   if (height < 0) {
    797     height = -height;
    798     src_argb = src_argb + (height - 1) * src_stride_argb;
    799     src_stride_argb = -src_stride_argb;
    800   }
    801   void (*ARGBToARGB4444Row)(const uint8* src_argb, uint8* dst_rgb, int pix) =
    802       ARGBToARGB4444Row_C;
    803 #if defined(HAS_ARGBTOARGB4444ROW_SSE2)
    804   if (TestCpuFlag(kCpuHasSSE2) &&
    805       IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16)) {
    806     if (width * 2 <= kMaxStride) {
    807       ARGBToARGB4444Row = ARGBToARGB4444Row_Any_SSE2;
    808     }
    809     if (IS_ALIGNED(width, 4)) {
    810       ARGBToARGB4444Row = ARGBToARGB4444Row_SSE2;
    811     }
    812   }
    813 #endif
    814 
    815   for (int y = 0; y < height; ++y) {
    816     ARGBToARGB4444Row(src_argb, dst_argb4444, width);
    817     src_argb += src_stride_argb;
    818     dst_argb4444 += dst_stride_argb4444;
    819   }
    820   return 0;
    821 }
    822 
    823 // Convert NV12 to RGB565.
    824 // TODO(fbarchard): (Re) Optimize for Neon.
    825 LIBYUV_API
    826 int NV12ToRGB565(const uint8* src_y, int src_stride_y,
    827                  const uint8* src_uv, int src_stride_uv,
    828                  uint8* dst_rgb565, int dst_stride_rgb565,
    829                  int width, int height) {
    830   if (!src_y || !src_uv || !dst_rgb565 || width <= 0 || height == 0) {
    831     return -1;
    832   }
    833   // Negative height means invert the image.
    834   if (height < 0) {
    835     height = -height;
    836     dst_rgb565 = dst_rgb565 + (height - 1) * dst_stride_rgb565;
    837     dst_stride_rgb565 = -dst_stride_rgb565;
    838   }
    839   void (*NV12ToARGBRow)(const uint8* y_buf,
    840                         const uint8* uv_buf,
    841                         uint8* rgb_buf,
    842                         int width) = NV12ToARGBRow_C;
    843 #if defined(HAS_NV12TOARGBROW_SSSE3)
    844   if (TestCpuFlag(kCpuHasSSSE3) && width * 4 <= kMaxStride) {
    845     NV12ToARGBRow = NV12ToARGBRow_SSSE3;
    846   }
    847 #endif
    848 #if defined(HAS_NV12TOARGBROW_NEON)
    849   if (TestCpuFlag(kCpuHasNEON) && width * 4 <= kMaxStride) {
    850     NV12ToARGBRow = NV12ToARGBRow_NEON;
    851   }
    852 #endif
    853 
    854   SIMD_ALIGNED(uint8 row[kMaxStride]);
    855   void (*ARGBToRGB565Row)(const uint8* src_argb, uint8* dst_rgb, int pix) =
    856       ARGBToRGB565Row_C;
    857 #if defined(HAS_ARGBTORGB565ROW_SSE2)
    858   if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 4)) {
    859     ARGBToRGB565Row = ARGBToRGB565Row_SSE2;
    860   }
    861 #endif
    862 
    863   for (int y = 0; y < height; ++y) {
    864     NV12ToARGBRow(src_y, src_uv, row, width);
    865     ARGBToRGB565Row(row, dst_rgb565, width);
    866     dst_rgb565 += dst_stride_rgb565;
    867     src_y += src_stride_y;
    868     if (y & 1) {
    869       src_uv += src_stride_uv;
    870     }
    871   }
    872   return 0;
    873 }
    874 
    875 // Convert NV21 to RGB565.
    876 LIBYUV_API
    877 int NV21ToRGB565(const uint8* src_y, int src_stride_y,
    878                  const uint8* src_vu, int src_stride_vu,
    879                  uint8* dst_rgb565, int dst_stride_rgb565,
    880                  int width, int height) {
    881   if (!src_y || !src_vu || !dst_rgb565 || width <= 0 || height == 0) {
    882     return -1;
    883   }
    884   // Negative height means invert the image.
    885   if (height < 0) {
    886     height = -height;
    887     dst_rgb565 = dst_rgb565 + (height - 1) * dst_stride_rgb565;
    888     dst_stride_rgb565 = -dst_stride_rgb565;
    889   }
    890   void (*NV21ToARGBRow)(const uint8* y_buf,
    891                         const uint8* uv_buf,
    892                         uint8* rgb_buf,
    893                         int width) = NV21ToARGBRow_C;
    894 #if defined(HAS_NV21TOARGBROW_SSSE3)
    895   if (TestCpuFlag(kCpuHasSSSE3) && width * 4 <= kMaxStride) {
    896     NV21ToARGBRow = NV21ToARGBRow_SSSE3;
    897   }
    898 #endif
    899 
    900   SIMD_ALIGNED(uint8 row[kMaxStride]);
    901   void (*ARGBToRGB565Row)(const uint8* src_argb, uint8* dst_rgb, int pix) =
    902       ARGBToRGB565Row_C;
    903 #if defined(HAS_ARGBTORGB565ROW_SSE2)
    904   if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 4)) {
    905     ARGBToRGB565Row = ARGBToRGB565Row_SSE2;
    906   }
    907 #endif
    908 
    909   for (int y = 0; y < height; ++y) {
    910     NV21ToARGBRow(src_y, src_vu, row, width);
    911     ARGBToRGB565Row(row, dst_rgb565, width);
    912     dst_rgb565 += dst_stride_rgb565;
    913     src_y += src_stride_y;
    914     if (y & 1) {
    915       src_vu += src_stride_vu;
    916     }
    917   }
    918   return 0;
    919 }
    920 
    921 LIBYUV_API
    922 void SetPlane(uint8* dst_y, int dst_stride_y,
    923               int width, int height,
    924               uint32 value) {
    925   void (*SetRow)(uint8* dst, uint32 value, int pix) = SetRow8_C;
    926 #if defined(HAS_SETROW_NEON)
    927   if (TestCpuFlag(kCpuHasNEON) &&
    928       IS_ALIGNED(width, 16) &&
    929       IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
    930     SetRow = SetRow8_NEON;
    931   }
    932 #endif
    933 #if defined(HAS_SETROW_X86)
    934   if (TestCpuFlag(kCpuHasX86) && IS_ALIGNED(width, 4)) {
    935     SetRow = SetRow8_X86;
    936   }
    937 #endif
    938 
    939   uint32 v32 = value | (value << 8) | (value << 16) | (value << 24);
    940   // Set plane
    941   for (int y = 0; y < height; ++y) {
    942     SetRow(dst_y, v32, width);
    943     dst_y += dst_stride_y;
    944   }
    945 }
    946 
    947 // Draw a rectangle into I420
    948 LIBYUV_API
    949 int I420Rect(uint8* dst_y, int dst_stride_y,
    950              uint8* dst_u, int dst_stride_u,
    951              uint8* dst_v, int dst_stride_v,
    952              int x, int y,
    953              int width, int height,
    954              int value_y, int value_u, int value_v) {
    955   if (!dst_y || !dst_u || !dst_v ||
    956       width <= 0 || height <= 0 ||
    957       x < 0 || y < 0 ||
    958       value_y < 0 || value_y > 255 ||
    959       value_u < 0 || value_u > 255 ||
    960       value_v < 0 || value_v > 255) {
    961     return -1;
    962   }
    963   int halfwidth = (width + 1) >> 1;
    964   int halfheight = (height + 1) >> 1;
    965   uint8* start_y = dst_y + y * dst_stride_y + x;
    966   uint8* start_u = dst_u + (y / 2) * dst_stride_u + (x / 2);
    967   uint8* start_v = dst_v + (y / 2) * dst_stride_v + (x / 2);
    968 
    969   SetPlane(start_y, dst_stride_y, width, height, value_y);
    970   SetPlane(start_u, dst_stride_u, halfwidth, halfheight, value_u);
    971   SetPlane(start_v, dst_stride_v, halfwidth, halfheight, value_v);
    972   return 0;
    973 }
    974 
    975 // Draw a rectangle into ARGB
    976 LIBYUV_API
    977 int ARGBRect(uint8* dst_argb, int dst_stride_argb,
    978              int dst_x, int dst_y,
    979              int width, int height,
    980              uint32 value) {
    981   if (!dst_argb ||
    982       width <= 0 || height <= 0 ||
    983       dst_x < 0 || dst_y < 0) {
    984     return -1;
    985   }
    986   uint8* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4;
    987 #if defined(HAS_SETROW_NEON)
    988   if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 16) &&
    989       IS_ALIGNED(dst, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
    990     SetRows32_NEON(dst, value, width, dst_stride_argb, height);
    991     return 0;
    992   }
    993 #endif
    994 #if defined(HAS_SETROW_X86)
    995   if (TestCpuFlag(kCpuHasX86)) {
    996     SetRows32_X86(dst, value, width, dst_stride_argb, height);
    997     return 0;
    998   }
    999 #endif
   1000   SetRows32_C(dst, value, width, dst_stride_argb, height);
   1001   return 0;
   1002 }
   1003 
   1004 // Convert unattentuated ARGB to preattenuated ARGB.
   1005 // An unattenutated ARGB alpha blend uses the formula
   1006 // p = a * f + (1 - a) * b
   1007 // where
   1008 //   p is output pixel
   1009 //   f is foreground pixel
   1010 //   b is background pixel
   1011 //   a is alpha value from foreground pixel
   1012 // An preattenutated ARGB alpha blend uses the formula
   1013 // p = f + (1 - a) * b
   1014 // where
   1015 //   f is foreground pixel premultiplied by alpha
   1016 
   1017 LIBYUV_API
   1018 int ARGBAttenuate(const uint8* src_argb, int src_stride_argb,
   1019                   uint8* dst_argb, int dst_stride_argb,
   1020                   int width, int height) {
   1021   if (!src_argb || !dst_argb || width <= 0 || height == 0) {
   1022     return -1;
   1023   }
   1024   if (height < 0) {
   1025     height = -height;
   1026     src_argb = src_argb + (height - 1) * src_stride_argb;
   1027     src_stride_argb = -src_stride_argb;
   1028   }
   1029   void (*ARGBAttenuateRow)(const uint8* src_argb, uint8* dst_argb,
   1030                            int width) = ARGBAttenuateRow_C;
   1031 #if defined(HAS_ARGBATTENUATE_SSE2)
   1032   if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 4) &&
   1033       IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16) &&
   1034       IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
   1035     ARGBAttenuateRow = ARGBAttenuateRow_SSE2;
   1036   }
   1037 #endif
   1038 #if defined(HAS_ARGBATTENUATEROW_SSSE3)
   1039   if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 4) &&
   1040       IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16) &&
   1041       IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
   1042     ARGBAttenuateRow = ARGBAttenuateRow_SSSE3;
   1043   }
   1044 #endif
   1045 
   1046   for (int y = 0; y < height; ++y) {
   1047     ARGBAttenuateRow(src_argb, dst_argb, width);
   1048     src_argb += src_stride_argb;
   1049     dst_argb += dst_stride_argb;
   1050   }
   1051   return 0;
   1052 }
   1053 
   1054 // Convert preattentuated ARGB to unattenuated ARGB.
   1055 LIBYUV_API
   1056 int ARGBUnattenuate(const uint8* src_argb, int src_stride_argb,
   1057                     uint8* dst_argb, int dst_stride_argb,
   1058                     int width, int height) {
   1059   if (!src_argb || !dst_argb || width <= 0 || height == 0) {
   1060     return -1;
   1061   }
   1062   if (height < 0) {
   1063     height = -height;
   1064     src_argb = src_argb + (height - 1) * src_stride_argb;
   1065     src_stride_argb = -src_stride_argb;
   1066   }
   1067   void (*ARGBUnattenuateRow)(const uint8* src_argb, uint8* dst_argb,
   1068                              int width) = ARGBUnattenuateRow_C;
   1069 #if defined(HAS_ARGBUNATTENUATEROW_SSE2)
   1070   if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 4) &&
   1071       IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16) &&
   1072       IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
   1073     ARGBUnattenuateRow = ARGBUnattenuateRow_SSE2;
   1074   }
   1075 #endif
   1076 
   1077   for (int y = 0; y < height; ++y) {
   1078     ARGBUnattenuateRow(src_argb, dst_argb, width);
   1079     src_argb += src_stride_argb;
   1080     dst_argb += dst_stride_argb;
   1081   }
   1082   return 0;
   1083 }
   1084 
   1085 // Convert ARGB to Grayed ARGB.
   1086 LIBYUV_API
   1087 int ARGBGrayTo(const uint8* src_argb, int src_stride_argb,
   1088                uint8* dst_argb, int dst_stride_argb,
   1089                int width, int height) {
   1090   if (!src_argb || !dst_argb || width <= 0 || height == 0) {
   1091     return -1;
   1092   }
   1093   if (height < 0) {
   1094     height = -height;
   1095     src_argb = src_argb + (height - 1) * src_stride_argb;
   1096     src_stride_argb = -src_stride_argb;
   1097   }
   1098   void (*ARGBGrayRow)(const uint8* src_argb, uint8* dst_argb,
   1099                       int width) = ARGBGrayRow_C;
   1100 #if defined(HAS_ARGBGRAYROW_SSSE3)
   1101   if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 8) &&
   1102       IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16) &&
   1103       IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
   1104     ARGBGrayRow = ARGBGrayRow_SSSE3;
   1105   }
   1106 #endif
   1107 
   1108   for (int y = 0; y < height; ++y) {
   1109     ARGBGrayRow(src_argb, dst_argb, width);
   1110     src_argb += src_stride_argb;
   1111     dst_argb += dst_stride_argb;
   1112   }
   1113   return 0;
   1114 }
   1115 
   1116 // Make a rectangle of ARGB gray scale.
   1117 LIBYUV_API
   1118 int ARGBGray(uint8* dst_argb, int dst_stride_argb,
   1119              int dst_x, int dst_y,
   1120              int width, int height) {
   1121   if (!dst_argb || width <= 0 || height <= 0 || dst_x < 0 || dst_y < 0) {
   1122     return -1;
   1123   }
   1124   void (*ARGBGrayRow)(const uint8* src_argb, uint8* dst_argb,
   1125                       int width) = ARGBGrayRow_C;
   1126 #if defined(HAS_ARGBGRAYROW_SSSE3)
   1127   if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 8) &&
   1128       IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
   1129     ARGBGrayRow = ARGBGrayRow_SSSE3;
   1130   }
   1131 #endif
   1132   uint8* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4;
   1133   for (int y = 0; y < height; ++y) {
   1134     ARGBGrayRow(dst, dst, width);
   1135     dst += dst_stride_argb;
   1136   }
   1137   return 0;
   1138 }
   1139 
   1140 // Make a rectangle of ARGB Sepia tone.
   1141 LIBYUV_API
   1142 int ARGBSepia(uint8* dst_argb, int dst_stride_argb,
   1143               int dst_x, int dst_y, int width, int height) {
   1144   if (!dst_argb || width <= 0 || height <= 0 || dst_x < 0 || dst_y < 0) {
   1145     return -1;
   1146   }
   1147   void (*ARGBSepiaRow)(uint8* dst_argb, int width) = ARGBSepiaRow_C;
   1148 #if defined(HAS_ARGBSEPIAROW_SSSE3)
   1149   if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 8) &&
   1150       IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
   1151     ARGBSepiaRow = ARGBSepiaRow_SSSE3;
   1152   }
   1153 #endif
   1154   uint8* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4;
   1155   for (int y = 0; y < height; ++y) {
   1156     ARGBSepiaRow(dst, width);
   1157     dst += dst_stride_argb;
   1158   }
   1159   return 0;
   1160 }
   1161 
   1162 // Apply a 4x3 matrix rotation to each ARGB pixel.
   1163 LIBYUV_API
   1164 int ARGBColorMatrix(uint8* dst_argb, int dst_stride_argb,
   1165                     const int8* matrix_argb,
   1166                     int dst_x, int dst_y, int width, int height) {
   1167   if (!dst_argb || !matrix_argb || width <= 0 || height <= 0 ||
   1168       dst_x < 0 || dst_y < 0) {
   1169     return -1;
   1170   }
   1171   void (*ARGBColorMatrixRow)(uint8* dst_argb, const int8* matrix_argb,
   1172                              int width) = ARGBColorMatrixRow_C;
   1173 #if defined(HAS_ARGBCOLORMATRIXROW_SSSE3)
   1174   if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 8) &&
   1175       IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
   1176     ARGBColorMatrixRow = ARGBColorMatrixRow_SSSE3;
   1177   }
   1178 #endif
   1179   uint8* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4;
   1180   for (int y = 0; y < height; ++y) {
   1181     ARGBColorMatrixRow(dst, matrix_argb, width);
   1182     dst += dst_stride_argb;
   1183   }
   1184   return 0;
   1185 }
   1186 
   1187 // Apply a color table each ARGB pixel.
   1188 // Table contains 256 ARGB values.
   1189 LIBYUV_API
   1190 int ARGBColorTable(uint8* dst_argb, int dst_stride_argb,
   1191                    const uint8* table_argb,
   1192                    int dst_x, int dst_y, int width, int height) {
   1193   if (!dst_argb || !table_argb || width <= 0 || height <= 0 ||
   1194       dst_x < 0 || dst_y < 0) {
   1195     return -1;
   1196   }
   1197   void (*ARGBColorTableRow)(uint8* dst_argb, const uint8* table_argb,
   1198                             int width) = ARGBColorTableRow_C;
   1199 #if defined(HAS_ARGBCOLORTABLEROW_X86)
   1200   if (TestCpuFlag(kCpuHasX86)) {
   1201     ARGBColorTableRow = ARGBColorTableRow_X86;
   1202   }
   1203 #endif
   1204   uint8* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4;
   1205   for (int y = 0; y < height; ++y) {
   1206     ARGBColorTableRow(dst, table_argb, width);
   1207     dst += dst_stride_argb;
   1208   }
   1209   return 0;
   1210 }
   1211 
   1212 // ARGBQuantize is used to posterize art.
   1213 // e.g. rgb / qvalue * qvalue + qvalue / 2
   1214 // But the low levels implement efficiently with 3 parameters, and could be
   1215 // used for other high level operations.
   1216 // The divide is replaces with a multiply by reciprocal fixed point multiply.
   1217 // Caveat - although SSE2 saturates, the C function does not and should be used
   1218 // with care if doing anything but quantization.
   1219 LIBYUV_API
   1220 int ARGBQuantize(uint8* dst_argb, int dst_stride_argb,
   1221                  int scale, int interval_size, int interval_offset,
   1222                  int dst_x, int dst_y, int width, int height) {
   1223   if (!dst_argb || width <= 0 || height <= 0 || dst_x < 0 || dst_y < 0 ||
   1224       interval_size < 1 || interval_size > 255) {
   1225     return -1;
   1226   }
   1227   void (*ARGBQuantizeRow)(uint8* dst_argb, int scale, int interval_size,
   1228                           int interval_offset, int width) = ARGBQuantizeRow_C;
   1229 #if defined(HAS_ARGBQUANTIZEROW_SSE2)
   1230   if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 4) &&
   1231       IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
   1232     ARGBQuantizeRow = ARGBQuantizeRow_SSE2;
   1233   }
   1234 #endif
   1235   uint8* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4;
   1236   for (int y = 0; y < height; ++y) {
   1237     ARGBQuantizeRow(dst, scale, interval_size, interval_offset, width);
   1238     dst += dst_stride_argb;
   1239   }
   1240   return 0;
   1241 }
   1242 
   1243 // Computes table of cumulative sum for image where the value is the sum
   1244 // of all values above and to the left of the entry. Used by ARGBBlur.
   1245 LIBYUV_API
   1246 int ARGBComputeCumulativeSum(const uint8* src_argb, int src_stride_argb,
   1247                              int32* dst_cumsum, int dst_stride32_cumsum,
   1248                              int width, int height) {
   1249   if (!dst_cumsum || !src_argb || width <= 0 || height <= 0) {
   1250     return -1;
   1251   }
   1252   void (*ComputeCumulativeSumRow)(const uint8* row, int32* cumsum,
   1253       const int32* previous_cumsum, int width) = ComputeCumulativeSumRow_C;
   1254 #if defined(HAS_CUMULATIVESUMTOAVERAGE_SSE2)
   1255   if (TestCpuFlag(kCpuHasSSE2)) {
   1256     ComputeCumulativeSumRow = ComputeCumulativeSumRow_SSE2;
   1257   }
   1258 #endif
   1259   memset(dst_cumsum, 0, width * sizeof(dst_cumsum[0]) * 4);  // 4 int per pixel.
   1260   int32* previous_cumsum = dst_cumsum;
   1261   for (int y = 0; y < height; ++y) {
   1262     ComputeCumulativeSumRow(src_argb, dst_cumsum, previous_cumsum, width);
   1263     previous_cumsum = dst_cumsum;
   1264     dst_cumsum += dst_stride32_cumsum;
   1265     src_argb += src_stride_argb;
   1266   }
   1267   return 0;
   1268 }
   1269 
   1270 // Blur ARGB image.
   1271 // Caller should allocate CumulativeSum table of width * height * 16 bytes
   1272 // aligned to 16 byte boundary. height can be radius * 2 + 2 to save memory
   1273 // as the buffer is treated as circular.
   1274 LIBYUV_API
   1275 int ARGBBlur(const uint8* src_argb, int src_stride_argb,
   1276              uint8* dst_argb, int dst_stride_argb,
   1277              int32* dst_cumsum, int dst_stride32_cumsum,
   1278              int width, int height, int radius) {
   1279   if (!src_argb || !dst_argb || width <= 0 || height == 0) {
   1280     return -1;
   1281   }
   1282   void (*ComputeCumulativeSumRow)(const uint8* row, int32* cumsum,
   1283       const int32* previous_cumsum, int width) = ComputeCumulativeSumRow_C;
   1284   void (*CumulativeSumToAverage)(const int32* topleft, const int32* botleft,
   1285       int width, int area, uint8* dst, int count) = CumulativeSumToAverage_C;
   1286 #if defined(HAS_CUMULATIVESUMTOAVERAGE_SSE2)
   1287   if (TestCpuFlag(kCpuHasSSE2)) {
   1288     ComputeCumulativeSumRow = ComputeCumulativeSumRow_SSE2;
   1289     CumulativeSumToAverage = CumulativeSumToAverage_SSE2;
   1290   }
   1291 #endif
   1292   // Compute enough CumulativeSum for first row to be blurred. After this
   1293   // one row of CumulativeSum is updated at a time.
   1294   ARGBComputeCumulativeSum(src_argb, src_stride_argb,
   1295                            dst_cumsum, dst_stride32_cumsum,
   1296                            width, radius);
   1297 
   1298   src_argb = src_argb + radius * src_stride_argb;
   1299   int32* cumsum_bot_row = &dst_cumsum[(radius - 1) * dst_stride32_cumsum];
   1300 
   1301   const int32* max_cumsum_bot_row =
   1302       &dst_cumsum[(radius * 2 + 2) * dst_stride32_cumsum];
   1303   const int32* cumsum_top_row = &dst_cumsum[0];
   1304 
   1305   for (int y = 0; y < height; ++y) {
   1306     int top_y = ((y - radius - 1) >= 0) ? (y - radius - 1) : 0;
   1307     int bot_y = ((y + radius) < height) ? (y + radius) : (height - 1);
   1308     int area = radius * (bot_y - top_y);
   1309 
   1310     // Increment cumsum_top_row pointer with circular buffer wrap around.
   1311     if (top_y) {
   1312       cumsum_top_row += dst_stride32_cumsum;
   1313       if (cumsum_top_row >= max_cumsum_bot_row) {
   1314         cumsum_top_row = dst_cumsum;
   1315       }
   1316     }
   1317     // Increment cumsum_bot_row pointer with circular buffer wrap around and
   1318     // then fill in a row of CumulativeSum.
   1319     if ((y + radius) < height) {
   1320       const int32* prev_cumsum_bot_row = cumsum_bot_row;
   1321       cumsum_bot_row += dst_stride32_cumsum;
   1322       if (cumsum_bot_row >= max_cumsum_bot_row) {
   1323         cumsum_bot_row = dst_cumsum;
   1324       }
   1325       ComputeCumulativeSumRow(src_argb, cumsum_bot_row, prev_cumsum_bot_row,
   1326                               width);
   1327       src_argb += src_stride_argb;
   1328     }
   1329 
   1330     // Left clipped.
   1331     int boxwidth = radius * 4;
   1332     int x;
   1333     for (x = 0; x < radius + 1; ++x) {
   1334       CumulativeSumToAverage(cumsum_top_row, cumsum_bot_row,
   1335                               boxwidth, area, &dst_argb[x * 4], 1);
   1336       area += (bot_y - top_y);
   1337       boxwidth += 4;
   1338     }
   1339 
   1340     // Middle unclipped.
   1341     int n = (width - 1) - radius - x + 1;
   1342     CumulativeSumToAverage(cumsum_top_row, cumsum_bot_row,
   1343                            boxwidth, area, &dst_argb[x * 4], n);
   1344 
   1345     // Right clipped.
   1346     for (x += n; x <= width - 1; ++x) {
   1347       area -= (bot_y - top_y);
   1348       boxwidth -= 4;
   1349       CumulativeSumToAverage(cumsum_top_row + (x - radius - 1) * 4,
   1350                              cumsum_bot_row + (x - radius - 1) * 4,
   1351                              boxwidth, area, &dst_argb[x * 4], 1);
   1352     }
   1353     dst_argb += dst_stride_argb;
   1354   }
   1355   return 0;
   1356 }
   1357 
   1358 // Multiply ARGB image by a specified ARGB value.
   1359 LIBYUV_API
   1360 int ARGBShade(const uint8* src_argb, int src_stride_argb,
   1361               uint8* dst_argb, int dst_stride_argb,
   1362               int width, int height, uint32 value) {
   1363   if (!src_argb || !dst_argb || width <= 0 || height == 0 || value == 0u) {
   1364     return -1;
   1365   }
   1366   if (height < 0) {
   1367     height = -height;
   1368     src_argb = src_argb + (height - 1) * src_stride_argb;
   1369     src_stride_argb = -src_stride_argb;
   1370   }
   1371   void (*ARGBShadeRow)(const uint8* src_argb, uint8* dst_argb,
   1372                        int width, uint32 value) = ARGBShadeRow_C;
   1373 #if defined(HAS_ARGBSHADE_SSE2)
   1374   if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 4) &&
   1375       IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16) &&
   1376       IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
   1377     ARGBShadeRow = ARGBShadeRow_SSE2;
   1378   }
   1379 #endif
   1380 
   1381   for (int y = 0; y < height; ++y) {
   1382     ARGBShadeRow(src_argb, dst_argb, width, value);
   1383     src_argb += src_stride_argb;
   1384     dst_argb += dst_stride_argb;
   1385   }
   1386   return 0;
   1387 }
   1388 
   1389 // Interpolate 2 ARGB images by specified amount (0 to 255).
   1390 LIBYUV_API
   1391 int ARGBInterpolate(const uint8* src_argb0, int src_stride_argb0,
   1392                     const uint8* src_argb1, int src_stride_argb1,
   1393                     uint8* dst_argb, int dst_stride_argb,
   1394                     int width, int height, int interpolation) {
   1395   if (!src_argb0 || !src_argb1 || !dst_argb || width <= 0 || height == 0) {
   1396     return -1;
   1397   }
   1398   // Negative height means invert the image.
   1399   if (height < 0) {
   1400     height = -height;
   1401     dst_argb = dst_argb + (height - 1) * dst_stride_argb;
   1402     dst_stride_argb = -dst_stride_argb;
   1403   }
   1404   void (*ARGBInterpolateRow)(uint8* dst_ptr, const uint8* src_ptr,
   1405                               ptrdiff_t src_stride, int dst_width,
   1406                               int source_y_fraction) = ARGBInterpolateRow_C;
   1407 #if defined(HAS_ARGBINTERPOLATEROW_SSSE3)
   1408   if (TestCpuFlag(kCpuHasSSSE3) &&
   1409       IS_ALIGNED(src_argb0, 16) && IS_ALIGNED(src_stride_argb0, 16) &&
   1410       IS_ALIGNED(src_argb1, 16) && IS_ALIGNED(src_stride_argb1, 16) &&
   1411       IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
   1412     ARGBInterpolateRow = ARGBInterpolateRow_SSSE3;
   1413   }
   1414 #endif
   1415   for (int y = 0; y < height; ++y) {
   1416     ARGBInterpolateRow(dst_argb, src_argb0, src_argb1 - src_argb0,
   1417                        width, interpolation);
   1418     src_argb0 += src_stride_argb0;
   1419     src_argb1 += src_stride_argb1;
   1420     dst_argb += dst_stride_argb;
   1421   }
   1422   return 0;
   1423 }
   1424 
   1425 #ifdef __cplusplus
   1426 }  // extern "C"
   1427 }  // namespace libyuv
   1428 #endif
   1429