Home | History | Annotate | Download | only in libyuv
      1 /*
      2  *  Copyright 2011 The LibYuv Project Authors. All rights reserved.
      3  *
      4  *  Use of this source code is governed by a BSD-style license
      5  *  that can be found in the LICENSE file in the root of the source
      6  *  tree. An additional intellectual property rights grant can be found
      7  *  in the file PATENTS. All contributing project authors may
      8  *  be found in the AUTHORS file in the root of the source tree.
      9  */
     10 
     11 #ifndef INCLUDE_LIBYUV_ROW_H_  // NOLINT
     12 #define INCLUDE_LIBYUV_ROW_H_
     13 
     14 #include <stdlib.h>  // For malloc.
     15 
     16 #include "libyuv/basic_types.h"
     17 
     18 #ifdef __cplusplus
     19 namespace libyuv {
     20 extern "C" {
     21 #endif
     22 
     23 #define IS_ALIGNED(p, a) (!((uintptr_t)(p) & ((a) - 1)))
     24 
     25 #ifdef __cplusplus
     26 #define align_buffer_64(var, size)                                             \
     27   uint8* var##_mem = reinterpret_cast<uint8*>(malloc((size) + 63));            \
     28   uint8* var = reinterpret_cast<uint8*>                                        \
     29       ((reinterpret_cast<intptr_t>(var##_mem) + 63) & ~63)
     30 #else
     31 #define align_buffer_64(var, size)                                             \
     32   uint8* var##_mem = (uint8*)(malloc((size) + 63));               /* NOLINT */ \
     33   uint8* var = (uint8*)(((intptr_t)(var##_mem) + 63) & ~63)       /* NOLINT */
     34 #endif
     35 
     36 #define free_aligned_buffer_64(var) \
     37   free(var##_mem);  \
     38   var = 0
     39 
     40 #if defined(__pnacl__) || defined(__CLR_VER) || \
     41     (defined(__i386__) && !defined(__SSE2__))
     42 #define LIBYUV_DISABLE_X86
     43 #endif
     44 // MemorySanitizer does not support assembly code yet. http://crbug.com/344505
     45 #if defined(__has_feature)
     46 #if __has_feature(memory_sanitizer)
     47 #define LIBYUV_DISABLE_X86
     48 #endif
     49 #endif
     50 // True if compiling for SSSE3 as a requirement.
     51 #if defined(__SSSE3__) || (defined(_M_IX86_FP) && (_M_IX86_FP >= 3))
     52 #define LIBYUV_SSSE3_ONLY
     53 #endif
     54 
     55 #if defined(__native_client__)
     56 #define LIBYUV_DISABLE_NEON
     57 #endif
     58 // clang >= 3.5.0 required for Arm64.
     59 #if defined(__clang__) && defined(__aarch64__) && !defined(LIBYUV_DISABLE_NEON)
     60 #if (__clang_major__ < 3) || (__clang_major__ == 3 && (__clang_minor__ < 5))
     61 #define LIBYUV_DISABLE_NEON
     62 #endif  // clang >= 3.5
     63 #endif  // __clang__
     64 
     65 // GCC >= 4.7.0 required for AVX2.
     66 #if defined(__GNUC__) && (defined(__x86_64__) || defined(__i386__))
     67 #if (__GNUC__ > 4) || (__GNUC__ == 4 && (__GNUC_MINOR__ >= 7))
     68 #define GCC_HAS_AVX2 1
     69 #endif  // GNUC >= 4.7
     70 #endif  // __GNUC__
     71 
     72 // clang >= 3.4.0 required for AVX2.
     73 #if defined(__clang__) && (defined(__x86_64__) || defined(__i386__))
     74 #if (__clang_major__ > 3) || (__clang_major__ == 3 && (__clang_minor__ >= 4))
     75 #define CLANG_HAS_AVX2 1
     76 #endif  // clang >= 3.4
     77 #endif  // __clang__
     78 
     79 // Visual C 2012 required for AVX2.
     80 #if defined(_M_IX86) && !defined(__clang__) && \
     81     defined(_MSC_VER) && _MSC_VER >= 1700
     82 #define VISUALC_HAS_AVX2 1
     83 #endif  // VisualStudio >= 2012
     84 
     85 // The following are available on all x86 platforms:
     86 #if !defined(LIBYUV_DISABLE_X86) && \
     87     (defined(_M_IX86) || defined(__x86_64__) || defined(__i386__))
     88 // Conversions:
     89 #define HAS_ABGRTOUVROW_SSSE3
     90 #define HAS_ABGRTOYROW_SSSE3
     91 #define HAS_ARGB1555TOARGBROW_SSE2
     92 #define HAS_ARGB4444TOARGBROW_SSE2
     93 #define HAS_ARGBSETROW_X86
     94 #define HAS_ARGBSHUFFLEROW_SSE2
     95 #define HAS_ARGBSHUFFLEROW_SSSE3
     96 #define HAS_ARGBTOARGB1555ROW_SSE2
     97 #define HAS_ARGBTOARGB4444ROW_SSE2
     98 #define HAS_ARGBTORAWROW_SSSE3
     99 #define HAS_ARGBTORGB24ROW_SSSE3
    100 #define HAS_ARGBTORGB565DITHERROW_SSE2
    101 #define HAS_ARGBTORGB565ROW_SSE2
    102 #define HAS_ARGBTOUV444ROW_SSSE3
    103 #define HAS_ARGBTOUVJROW_SSSE3
    104 #define HAS_ARGBTOUVROW_SSSE3
    105 #define HAS_ARGBTOYJROW_SSSE3
    106 #define HAS_ARGBTOYROW_SSSE3
    107 #define HAS_ARGBEXTRACTALPHAROW_SSE2
    108 #define HAS_BGRATOUVROW_SSSE3
    109 #define HAS_BGRATOYROW_SSSE3
    110 #define HAS_COPYROW_ERMS
    111 #define HAS_COPYROW_SSE2
    112 #define HAS_H422TOARGBROW_SSSE3
    113 #define HAS_I400TOARGBROW_SSE2
    114 #define HAS_I422TOARGB1555ROW_SSSE3
    115 #define HAS_I422TOARGB4444ROW_SSSE3
    116 #define HAS_I422TOARGBROW_SSSE3
    117 #define HAS_I422TORGB24ROW_SSSE3
    118 #define HAS_I422TORGB565ROW_SSSE3
    119 #define HAS_I422TORGBAROW_SSSE3
    120 #define HAS_I422TOUYVYROW_SSE2
    121 #define HAS_I422TOYUY2ROW_SSE2
    122 #define HAS_I444TOARGBROW_SSSE3
    123 #define HAS_J400TOARGBROW_SSE2
    124 #define HAS_J422TOARGBROW_SSSE3
    125 #define HAS_MERGEUVROW_SSE2
    126 #define HAS_MIRRORROW_SSSE3
    127 #define HAS_MIRRORUVROW_SSSE3
    128 #define HAS_NV12TOARGBROW_SSSE3
    129 #define HAS_NV12TORGB565ROW_SSSE3
    130 #define HAS_NV21TOARGBROW_SSSE3
    131 #define HAS_RAWTOARGBROW_SSSE3
    132 #define HAS_RAWTORGB24ROW_SSSE3
    133 #define HAS_RAWTOYROW_SSSE3
    134 #define HAS_RGB24TOARGBROW_SSSE3
    135 #define HAS_RGB24TOYROW_SSSE3
    136 #define HAS_RGB565TOARGBROW_SSE2
    137 #define HAS_RGBATOUVROW_SSSE3
    138 #define HAS_RGBATOYROW_SSSE3
    139 #define HAS_SETROW_ERMS
    140 #define HAS_SETROW_X86
    141 #define HAS_SPLITUVROW_SSE2
    142 #define HAS_UYVYTOARGBROW_SSSE3
    143 #define HAS_UYVYTOUV422ROW_SSE2
    144 #define HAS_UYVYTOUVROW_SSE2
    145 #define HAS_UYVYTOYROW_SSE2
    146 #define HAS_YUY2TOARGBROW_SSSE3
    147 #define HAS_YUY2TOUV422ROW_SSE2
    148 #define HAS_YUY2TOUVROW_SSE2
    149 #define HAS_YUY2TOYROW_SSE2
    150 
    151 // Effects:
    152 #define HAS_ARGBADDROW_SSE2
    153 #define HAS_ARGBAFFINEROW_SSE2
    154 #define HAS_ARGBATTENUATEROW_SSSE3
    155 #define HAS_ARGBBLENDROW_SSSE3
    156 #define HAS_ARGBCOLORMATRIXROW_SSSE3
    157 #define HAS_ARGBCOLORTABLEROW_X86
    158 #define HAS_ARGBCOPYALPHAROW_SSE2
    159 #define HAS_ARGBCOPYYTOALPHAROW_SSE2
    160 #define HAS_ARGBGRAYROW_SSSE3
    161 #define HAS_ARGBLUMACOLORTABLEROW_SSSE3
    162 #define HAS_ARGBMIRRORROW_SSE2
    163 #define HAS_ARGBMULTIPLYROW_SSE2
    164 #define HAS_ARGBPOLYNOMIALROW_SSE2
    165 #define HAS_ARGBQUANTIZEROW_SSE2
    166 #define HAS_ARGBSEPIAROW_SSSE3
    167 #define HAS_ARGBSHADEROW_SSE2
    168 #define HAS_ARGBSUBTRACTROW_SSE2
    169 #define HAS_ARGBUNATTENUATEROW_SSE2
    170 #define HAS_BLENDPLANEROW_SSSE3
    171 #define HAS_COMPUTECUMULATIVESUMROW_SSE2
    172 #define HAS_CUMULATIVESUMTOAVERAGEROW_SSE2
    173 #define HAS_INTERPOLATEROW_SSSE3
    174 #define HAS_RGBCOLORTABLEROW_X86
    175 #define HAS_SOBELROW_SSE2
    176 #define HAS_SOBELTOPLANEROW_SSE2
    177 #define HAS_SOBELXROW_SSE2
    178 #define HAS_SOBELXYROW_SSE2
    179 #define HAS_SOBELYROW_SSE2
    180 
    181 // The following functions fail on gcc/clang 32 bit with fpic and framepointer.
    182 // caveat: clangcl uses row_win.cc which works.
    183 #if defined(NDEBUG) || !(defined(_DEBUG) && defined(__i386__)) || \
    184     !defined(__i386__) || defined(_MSC_VER)
    185 // TODO(fbarchard): fix build error on x86 debug
    186 // https://code.google.com/p/libyuv/issues/detail?id=524
    187 #define HAS_I411TOARGBROW_SSSE3
    188 // TODO(fbarchard): fix build error on android_full_debug=1
    189 // https://code.google.com/p/libyuv/issues/detail?id=517
    190 #define HAS_I422ALPHATOARGBROW_SSSE3
    191 #endif
    192 #endif
    193 
    194 // The following are available on all x86 platforms, but
    195 // require VS2012, clang 3.4 or gcc 4.7.
    196 // The code supports NaCL but requires a new compiler and validator.
    197 #if !defined(LIBYUV_DISABLE_X86) && (defined(VISUALC_HAS_AVX2) || \
    198     defined(CLANG_HAS_AVX2) || defined(GCC_HAS_AVX2))
    199 #define HAS_ARGBCOPYALPHAROW_AVX2
    200 #define HAS_ARGBCOPYYTOALPHAROW_AVX2
    201 #define HAS_ARGBMIRRORROW_AVX2
    202 #define HAS_ARGBPOLYNOMIALROW_AVX2
    203 #define HAS_ARGBSHUFFLEROW_AVX2
    204 #define HAS_ARGBTORGB565DITHERROW_AVX2
    205 #define HAS_ARGBTOUVJROW_AVX2
    206 #define HAS_ARGBTOUVROW_AVX2
    207 #define HAS_ARGBTOYJROW_AVX2
    208 #define HAS_ARGBTOYROW_AVX2
    209 #define HAS_COPYROW_AVX
    210 #define HAS_H422TOARGBROW_AVX2
    211 #define HAS_I400TOARGBROW_AVX2
    212 #if !(defined(_DEBUG) && defined(__i386__))
    213 // TODO(fbarchard): fix build error on android_full_debug=1
    214 // https://code.google.com/p/libyuv/issues/detail?id=517
    215 #define HAS_I422ALPHATOARGBROW_AVX2
    216 #endif
    217 #define HAS_I411TOARGBROW_AVX2
    218 #define HAS_I422TOARGB1555ROW_AVX2
    219 #define HAS_I422TOARGB4444ROW_AVX2
    220 #define HAS_I422TOARGBROW_AVX2
    221 #define HAS_I422TORGB24ROW_AVX2
    222 #define HAS_I422TORGB565ROW_AVX2
    223 #define HAS_I422TORGBAROW_AVX2
    224 #define HAS_I444TOARGBROW_AVX2
    225 #define HAS_INTERPOLATEROW_AVX2
    226 #define HAS_J422TOARGBROW_AVX2
    227 #define HAS_MERGEUVROW_AVX2
    228 #define HAS_MIRRORROW_AVX2
    229 #define HAS_NV12TOARGBROW_AVX2
    230 #define HAS_NV12TORGB565ROW_AVX2
    231 #define HAS_NV21TOARGBROW_AVX2
    232 #define HAS_SPLITUVROW_AVX2
    233 #define HAS_UYVYTOARGBROW_AVX2
    234 #define HAS_UYVYTOUV422ROW_AVX2
    235 #define HAS_UYVYTOUVROW_AVX2
    236 #define HAS_UYVYTOYROW_AVX2
    237 #define HAS_YUY2TOARGBROW_AVX2
    238 #define HAS_YUY2TOUV422ROW_AVX2
    239 #define HAS_YUY2TOUVROW_AVX2
    240 #define HAS_YUY2TOYROW_AVX2
    241 
    242 // Effects:
    243 #define HAS_ARGBADDROW_AVX2
    244 #define HAS_ARGBATTENUATEROW_AVX2
    245 #define HAS_ARGBMULTIPLYROW_AVX2
    246 #define HAS_ARGBSUBTRACTROW_AVX2
    247 #define HAS_ARGBUNATTENUATEROW_AVX2
    248 #define HAS_BLENDPLANEROW_AVX2
    249 #endif
    250 
    251 // The following are available for AVX2 Visual C and clangcl 32 bit:
    252 // TODO(fbarchard): Port to gcc.
    253 #if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && \
    254     (defined(VISUALC_HAS_AVX2) || defined(CLANG_HAS_AVX2))
    255 #define HAS_ARGB1555TOARGBROW_AVX2
    256 #define HAS_ARGB4444TOARGBROW_AVX2
    257 #define HAS_ARGBTOARGB1555ROW_AVX2
    258 #define HAS_ARGBTOARGB4444ROW_AVX2
    259 #define HAS_ARGBTORGB565ROW_AVX2
    260 #define HAS_J400TOARGBROW_AVX2
    261 #define HAS_RGB565TOARGBROW_AVX2
    262 #endif
    263 
    264 // The following are also available on x64 Visual C.
    265 #if !defined(LIBYUV_DISABLE_X86) && defined(_MSC_VER) && defined(_M_X64) && \
    266     (!defined(__clang__) || defined(__SSSE3__))
    267 #define HAS_I422ALPHATOARGBROW_SSSE3
    268 #define HAS_I422TOARGBROW_SSSE3
    269 #endif
    270 
    271 // The following are available on Neon platforms:
    272 #if !defined(LIBYUV_DISABLE_NEON) && \
    273     (defined(__aarch64__) || defined(__ARM_NEON__) || defined(LIBYUV_NEON))
    274 #define HAS_ABGRTOUVROW_NEON
    275 #define HAS_ABGRTOYROW_NEON
    276 #define HAS_ARGB1555TOARGBROW_NEON
    277 #define HAS_ARGB1555TOUVROW_NEON
    278 #define HAS_ARGB1555TOYROW_NEON
    279 #define HAS_ARGB4444TOARGBROW_NEON
    280 #define HAS_ARGB4444TOUVROW_NEON
    281 #define HAS_ARGB4444TOYROW_NEON
    282 #define HAS_ARGBSETROW_NEON
    283 #define HAS_ARGBTOARGB1555ROW_NEON
    284 #define HAS_ARGBTOARGB4444ROW_NEON
    285 #define HAS_ARGBTORAWROW_NEON
    286 #define HAS_ARGBTORGB24ROW_NEON
    287 #define HAS_ARGBTORGB565DITHERROW_NEON
    288 #define HAS_ARGBTORGB565ROW_NEON
    289 #define HAS_ARGBTOUV411ROW_NEON
    290 #define HAS_ARGBTOUV444ROW_NEON
    291 #define HAS_ARGBTOUVJROW_NEON
    292 #define HAS_ARGBTOUVROW_NEON
    293 #define HAS_ARGBTOYJROW_NEON
    294 #define HAS_ARGBTOYROW_NEON
    295 #define HAS_ARGBEXTRACTALPHAROW_NEON
    296 #define HAS_BGRATOUVROW_NEON
    297 #define HAS_BGRATOYROW_NEON
    298 #define HAS_COPYROW_NEON
    299 #define HAS_I400TOARGBROW_NEON
    300 #define HAS_I411TOARGBROW_NEON
    301 #define HAS_I422ALPHATOARGBROW_NEON
    302 #define HAS_I422TOARGB1555ROW_NEON
    303 #define HAS_I422TOARGB4444ROW_NEON
    304 #define HAS_I422TOARGBROW_NEON
    305 #define HAS_I422TORGB24ROW_NEON
    306 #define HAS_I422TORGB565ROW_NEON
    307 #define HAS_I422TORGBAROW_NEON
    308 #define HAS_I422TOUYVYROW_NEON
    309 #define HAS_I422TOYUY2ROW_NEON
    310 #define HAS_I444TOARGBROW_NEON
    311 #define HAS_J400TOARGBROW_NEON
    312 #define HAS_MERGEUVROW_NEON
    313 #define HAS_MIRRORROW_NEON
    314 #define HAS_MIRRORUVROW_NEON
    315 #define HAS_NV12TOARGBROW_NEON
    316 #define HAS_NV12TORGB565ROW_NEON
    317 #define HAS_NV21TOARGBROW_NEON
    318 #define HAS_RAWTOARGBROW_NEON
    319 #define HAS_RAWTORGB24ROW_NEON
    320 #define HAS_RAWTOUVROW_NEON
    321 #define HAS_RAWTOYROW_NEON
    322 #define HAS_RGB24TOARGBROW_NEON
    323 #define HAS_RGB24TOUVROW_NEON
    324 #define HAS_RGB24TOYROW_NEON
    325 #define HAS_RGB565TOARGBROW_NEON
    326 #define HAS_RGB565TOUVROW_NEON
    327 #define HAS_RGB565TOYROW_NEON
    328 #define HAS_RGBATOUVROW_NEON
    329 #define HAS_RGBATOYROW_NEON
    330 #define HAS_SETROW_NEON
    331 #define HAS_SPLITUVROW_NEON
    332 #define HAS_UYVYTOARGBROW_NEON
    333 #define HAS_UYVYTOUV422ROW_NEON
    334 #define HAS_UYVYTOUVROW_NEON
    335 #define HAS_UYVYTOYROW_NEON
    336 #define HAS_YUY2TOARGBROW_NEON
    337 #define HAS_YUY2TOUV422ROW_NEON
    338 #define HAS_YUY2TOUVROW_NEON
    339 #define HAS_YUY2TOYROW_NEON
    340 
    341 // Effects:
    342 #define HAS_ARGBADDROW_NEON
    343 #define HAS_ARGBATTENUATEROW_NEON
    344 #define HAS_ARGBBLENDROW_NEON
    345 #define HAS_ARGBCOLORMATRIXROW_NEON
    346 #define HAS_ARGBGRAYROW_NEON
    347 #define HAS_ARGBMIRRORROW_NEON
    348 #define HAS_ARGBMULTIPLYROW_NEON
    349 #define HAS_ARGBQUANTIZEROW_NEON
    350 #define HAS_ARGBSEPIAROW_NEON
    351 #define HAS_ARGBSHADEROW_NEON
    352 #define HAS_ARGBSHUFFLEROW_NEON
    353 #define HAS_ARGBSUBTRACTROW_NEON
    354 #define HAS_INTERPOLATEROW_NEON
    355 #define HAS_SOBELROW_NEON
    356 #define HAS_SOBELTOPLANEROW_NEON
    357 #define HAS_SOBELXROW_NEON
    358 #define HAS_SOBELXYROW_NEON
    359 #define HAS_SOBELYROW_NEON
    360 #endif
    361 
    362 // The following are available on Mips platforms:
    363 #if !defined(LIBYUV_DISABLE_MIPS) && defined(__mips__) && \
    364     (_MIPS_SIM == _MIPS_SIM_ABI32) && (__mips_isa_rev < 6)
    365 #define HAS_COPYROW_MIPS
    366 #if defined(__mips_dsp) && (__mips_dsp_rev >= 2)
    367 #define HAS_I422TOARGBROW_DSPR2
    368 #define HAS_INTERPOLATEROW_DSPR2
    369 #define HAS_MIRRORROW_DSPR2
    370 #define HAS_MIRRORUVROW_DSPR2
    371 #define HAS_SPLITUVROW_DSPR2
    372 #endif
    373 #endif
    374 
    375 #if defined(_MSC_VER) && !defined(__CLR_VER) && !defined(__clang__)
    376 #define SIMD_ALIGNED(var) __declspec(align(16)) var
    377 #define SIMD_ALIGNED32(var) __declspec(align(64)) var
    378 typedef __declspec(align(16)) int16 vec16[8];
    379 typedef __declspec(align(16)) int32 vec32[4];
    380 typedef __declspec(align(16)) int8 vec8[16];
    381 typedef __declspec(align(16)) uint16 uvec16[8];
    382 typedef __declspec(align(16)) uint32 uvec32[4];
    383 typedef __declspec(align(16)) uint8 uvec8[16];
    384 typedef __declspec(align(32)) int16 lvec16[16];
    385 typedef __declspec(align(32)) int32 lvec32[8];
    386 typedef __declspec(align(32)) int8 lvec8[32];
    387 typedef __declspec(align(32)) uint16 ulvec16[16];
    388 typedef __declspec(align(32)) uint32 ulvec32[8];
    389 typedef __declspec(align(32)) uint8 ulvec8[32];
    390 #elif !defined(__pnacl__) && (defined(__GNUC__) || defined(__clang__))
    391 // Caveat GCC 4.2 to 4.7 have a known issue using vectors with const.
    392 #define SIMD_ALIGNED(var) var __attribute__((aligned(16)))
    393 #define SIMD_ALIGNED32(var) var __attribute__((aligned(64)))
    394 typedef int16 __attribute__((vector_size(16))) vec16;
    395 typedef int32 __attribute__((vector_size(16))) vec32;
    396 typedef int8 __attribute__((vector_size(16))) vec8;
    397 typedef uint16 __attribute__((vector_size(16))) uvec16;
    398 typedef uint32 __attribute__((vector_size(16))) uvec32;
    399 typedef uint8 __attribute__((vector_size(16))) uvec8;
    400 typedef int16 __attribute__((vector_size(32))) lvec16;
    401 typedef int32 __attribute__((vector_size(32))) lvec32;
    402 typedef int8 __attribute__((vector_size(32))) lvec8;
    403 typedef uint16 __attribute__((vector_size(32))) ulvec16;
    404 typedef uint32 __attribute__((vector_size(32))) ulvec32;
    405 typedef uint8 __attribute__((vector_size(32))) ulvec8;
    406 #else
    407 #define SIMD_ALIGNED(var) var
    408 #define SIMD_ALIGNED32(var) var
    409 typedef int16 vec16[8];
    410 typedef int32 vec32[4];
    411 typedef int8 vec8[16];
    412 typedef uint16 uvec16[8];
    413 typedef uint32 uvec32[4];
    414 typedef uint8 uvec8[16];
    415 typedef int16 lvec16[16];
    416 typedef int32 lvec32[8];
    417 typedef int8 lvec8[32];
    418 typedef uint16 ulvec16[16];
    419 typedef uint32 ulvec32[8];
    420 typedef uint8 ulvec8[32];
    421 #endif
    422 
    423 #if defined(__aarch64__)
    424 // This struct is for Arm64 color conversion.
    425 struct YuvConstants {
    426   uvec16 kUVToRB;
    427   uvec16 kUVToRB2;
    428   uvec16 kUVToG;
    429   uvec16 kUVToG2;
    430   vec16 kUVBiasBGR;
    431   vec32 kYToRgb;
    432 };
    433 #elif defined(__arm__)
    434 // This struct is for ArmV7 color conversion.
    435 struct YuvConstants {
    436   uvec8 kUVToRB;
    437   uvec8 kUVToG;
    438   vec16 kUVBiasBGR;
    439   vec32 kYToRgb;
    440 };
    441 #else
    442 // This struct is for Intel color conversion.
    443 struct YuvConstants {
    444   lvec8 kUVToB;
    445   lvec8 kUVToG;
    446   lvec8 kUVToR;
    447   lvec16 kUVBiasB;
    448   lvec16 kUVBiasG;
    449   lvec16 kUVBiasR;
    450   lvec16 kYToRgb;
    451 };
    452 
    453 // Offsets into YuvConstants structure
    454 #define KUVTOB   0
    455 #define KUVTOG   32
    456 #define KUVTOR   64
    457 #define KUVBIASB 96
    458 #define KUVBIASG 128
    459 #define KUVBIASR 160
    460 #define KYTORGB  192
    461 #endif
    462 
    463 // Conversion matrix for YUV to RGB
    464 extern const struct YuvConstants kYuvI601Constants;  // BT.601
    465 extern const struct YuvConstants kYuvJPEGConstants;  // JPeg color space
    466 extern const struct YuvConstants kYuvH709Constants;  // BT.709
    467 
    468 // Conversion matrix for YVU to BGR
    469 extern const struct YuvConstants kYvuI601Constants;  // BT.601
    470 extern const struct YuvConstants kYvuJPEGConstants;  // JPeg color space
    471 extern const struct YuvConstants kYvuH709Constants;  // BT.709
    472 
    473 #if defined(__APPLE__) || defined(__x86_64__) || defined(__llvm__)
    474 #define OMITFP
    475 #else
    476 #define OMITFP __attribute__((optimize("omit-frame-pointer")))
    477 #endif
    478 
    479 // NaCL macros for GCC x86 and x64.
    480 #if defined(__native_client__)
    481 #define LABELALIGN ".p2align 5\n"
    482 #else
    483 #define LABELALIGN
    484 #endif
    485 #if defined(__native_client__) && defined(__x86_64__)
    486 // r14 is used for MEMOP macros.
    487 #define NACL_R14 "r14",
    488 #define BUNDLELOCK ".bundle_lock\n"
    489 #define BUNDLEUNLOCK ".bundle_unlock\n"
    490 #define MEMACCESS(base) "%%nacl:(%%r15,%q" #base ")"
    491 #define MEMACCESS2(offset, base) "%%nacl:" #offset "(%%r15,%q" #base ")"
    492 #define MEMLEA(offset, base) #offset "(%q" #base ")"
    493 #define MEMLEA3(offset, index, scale) \
    494     #offset "(,%q" #index "," #scale ")"
    495 #define MEMLEA4(offset, base, index, scale) \
    496     #offset "(%q" #base ",%q" #index "," #scale ")"
    497 #define MEMMOVESTRING(s, d) "%%nacl:(%q" #s "),%%nacl:(%q" #d "), %%r15"
    498 #define MEMSTORESTRING(reg, d) "%%" #reg ",%%nacl:(%q" #d "), %%r15"
    499 #define MEMOPREG(opcode, offset, base, index, scale, reg) \
    500     BUNDLELOCK \
    501     "lea " #offset "(%q" #base ",%q" #index "," #scale "),%%r14d\n" \
    502     #opcode " (%%r15,%%r14),%%" #reg "\n" \
    503     BUNDLEUNLOCK
    504 #define MEMOPMEM(opcode, reg, offset, base, index, scale) \
    505     BUNDLELOCK \
    506     "lea " #offset "(%q" #base ",%q" #index "," #scale "),%%r14d\n" \
    507     #opcode " %%" #reg ",(%%r15,%%r14)\n" \
    508     BUNDLEUNLOCK
    509 #define MEMOPARG(opcode, offset, base, index, scale, arg) \
    510     BUNDLELOCK \
    511     "lea " #offset "(%q" #base ",%q" #index "," #scale "),%%r14d\n" \
    512     #opcode " (%%r15,%%r14),%" #arg "\n" \
    513     BUNDLEUNLOCK
    514 #define VMEMOPREG(opcode, offset, base, index, scale, reg1, reg2) \
    515     BUNDLELOCK \
    516     "lea " #offset "(%q" #base ",%q" #index "," #scale "),%%r14d\n" \
    517     #opcode " (%%r15,%%r14),%%" #reg1 ",%%" #reg2 "\n" \
    518     BUNDLEUNLOCK
    519 #define VEXTOPMEM(op, sel, reg, offset, base, index, scale) \
    520     BUNDLELOCK \
    521     "lea " #offset "(%q" #base ",%q" #index "," #scale "),%%r14d\n" \
    522     #op " $" #sel ",%%" #reg ",(%%r15,%%r14)\n" \
    523     BUNDLEUNLOCK
    524 #else  // defined(__native_client__) && defined(__x86_64__)
    525 #define NACL_R14
    526 #define BUNDLEALIGN
    527 #define MEMACCESS(base) "(%" #base ")"
    528 #define MEMACCESS2(offset, base) #offset "(%" #base ")"
    529 #define MEMLEA(offset, base) #offset "(%" #base ")"
    530 #define MEMLEA3(offset, index, scale) \
    531     #offset "(,%" #index "," #scale ")"
    532 #define MEMLEA4(offset, base, index, scale) \
    533     #offset "(%" #base ",%" #index "," #scale ")"
    534 #define MEMMOVESTRING(s, d)
    535 #define MEMSTORESTRING(reg, d)
    536 #define MEMOPREG(opcode, offset, base, index, scale, reg) \
    537     #opcode " " #offset "(%" #base ",%" #index "," #scale "),%%" #reg "\n"
    538 #define MEMOPMEM(opcode, reg, offset, base, index, scale) \
    539     #opcode " %%" #reg ","#offset "(%" #base ",%" #index "," #scale ")\n"
    540 #define MEMOPARG(opcode, offset, base, index, scale, arg) \
    541     #opcode " " #offset "(%" #base ",%" #index "," #scale "),%" #arg "\n"
    542 #define VMEMOPREG(opcode, offset, base, index, scale, reg1, reg2) \
    543     #opcode " " #offset "(%" #base ",%" #index "," #scale "),%%" #reg1 ",%%" \
    544     #reg2 "\n"
    545 #define VEXTOPMEM(op, sel, reg, offset, base, index, scale) \
    546     #op " $" #sel ",%%" #reg ","#offset "(%" #base ",%" #index "," #scale ")\n"
    547 #endif  // defined(__native_client__) && defined(__x86_64__)
    548 
    549 #if defined(__arm__) || defined(__aarch64__)
    550 #undef MEMACCESS
    551 #if defined(__native_client__)
    552 #define MEMACCESS(base) ".p2align 3\nbic %" #base ", #0xc0000000\n"
    553 #else
    554 #define MEMACCESS(base)
    555 #endif
    556 #endif
    557 
    558 void I444ToARGBRow_NEON(const uint8* src_y,
    559                         const uint8* src_u,
    560                         const uint8* src_v,
    561                         uint8* dst_argb,
    562                         const struct YuvConstants* yuvconstants,
    563                         int width);
    564 void I422ToARGBRow_NEON(const uint8* src_y,
    565                         const uint8* src_u,
    566                         const uint8* src_v,
    567                         uint8* dst_argb,
    568                         const struct YuvConstants* yuvconstants,
    569                         int width);
    570 void I422AlphaToARGBRow_NEON(const uint8* y_buf,
    571                              const uint8* u_buf,
    572                              const uint8* v_buf,
    573                              const uint8* a_buf,
    574                              uint8* dst_argb,
    575                              const struct YuvConstants* yuvconstants,
    576                              int width);
    577 void I422ToARGBRow_NEON(const uint8* src_y,
    578                         const uint8* src_u,
    579                         const uint8* src_v,
    580                         uint8* dst_argb,
    581                         const struct YuvConstants* yuvconstants,
    582                         int width);
    583 void I411ToARGBRow_NEON(const uint8* src_y,
    584                         const uint8* src_u,
    585                         const uint8* src_v,
    586                         uint8* dst_argb,
    587                         const struct YuvConstants* yuvconstants,
    588                         int width);
    589 void I422ToRGBARow_NEON(const uint8* src_y,
    590                         const uint8* src_u,
    591                         const uint8* src_v,
    592                         uint8* dst_rgba,
    593                         const struct YuvConstants* yuvconstants,
    594                         int width);
    595 void I422ToRGB24Row_NEON(const uint8* src_y,
    596                          const uint8* src_u,
    597                          const uint8* src_v,
    598                          uint8* dst_rgb24,
    599                          const struct YuvConstants* yuvconstants,
    600                          int width);
    601 void I422ToRGB565Row_NEON(const uint8* src_y,
    602                           const uint8* src_u,
    603                           const uint8* src_v,
    604                           uint8* dst_rgb565,
    605                           const struct YuvConstants* yuvconstants,
    606                           int width);
    607 void I422ToARGB1555Row_NEON(const uint8* src_y,
    608                             const uint8* src_u,
    609                             const uint8* src_v,
    610                             uint8* dst_argb1555,
    611                             const struct YuvConstants* yuvconstants,
    612                             int width);
    613 void I422ToARGB4444Row_NEON(const uint8* src_y,
    614                             const uint8* src_u,
    615                             const uint8* src_v,
    616                             uint8* dst_argb4444,
    617                             const struct YuvConstants* yuvconstants,
    618                             int width);
    619 void NV12ToARGBRow_NEON(const uint8* src_y,
    620                         const uint8* src_uv,
    621                         uint8* dst_argb,
    622                         const struct YuvConstants* yuvconstants,
    623                         int width);
    624 void NV12ToRGB565Row_NEON(const uint8* src_y,
    625                           const uint8* src_uv,
    626                           uint8* dst_rgb565,
    627                           const struct YuvConstants* yuvconstants,
    628                           int width);
    629 void NV21ToARGBRow_NEON(const uint8* src_y,
    630                         const uint8* src_vu,
    631                         uint8* dst_argb,
    632                         const struct YuvConstants* yuvconstants,
    633                         int width);
    634 void YUY2ToARGBRow_NEON(const uint8* src_yuy2,
    635                         uint8* dst_argb,
    636                         const struct YuvConstants* yuvconstants,
    637                         int width);
    638 void UYVYToARGBRow_NEON(const uint8* src_uyvy,
    639                         uint8* dst_argb,
    640                         const struct YuvConstants* yuvconstants,
    641                         int width);
    642 
    643 void ARGBToYRow_AVX2(const uint8* src_argb, uint8* dst_y, int width);
    644 void ARGBToYRow_Any_AVX2(const uint8* src_argb, uint8* dst_y, int width);
    645 void ARGBToYRow_SSSE3(const uint8* src_argb, uint8* dst_y, int width);
    646 void ARGBToYJRow_AVX2(const uint8* src_argb, uint8* dst_y, int width);
    647 void ARGBToYJRow_Any_AVX2(const uint8* src_argb, uint8* dst_y, int width);
    648 void ARGBToYJRow_SSSE3(const uint8* src_argb, uint8* dst_y, int width);
    649 void BGRAToYRow_SSSE3(const uint8* src_bgra, uint8* dst_y, int width);
    650 void ABGRToYRow_SSSE3(const uint8* src_abgr, uint8* dst_y, int width);
    651 void RGBAToYRow_SSSE3(const uint8* src_rgba, uint8* dst_y, int width);
    652 void RGB24ToYRow_SSSE3(const uint8* src_rgb24, uint8* dst_y, int width);
    653 void RAWToYRow_SSSE3(const uint8* src_raw, uint8* dst_y, int width);
    654 void ARGBToYRow_NEON(const uint8* src_argb, uint8* dst_y, int width);
    655 void ARGBToYJRow_NEON(const uint8* src_argb, uint8* dst_y, int width);
    656 void ARGBToUV444Row_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
    657                          int width);
    658 void ARGBToUV411Row_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
    659                          int width);
    660 void ARGBToUVRow_NEON(const uint8* src_argb, int src_stride_argb,
    661                       uint8* dst_u, uint8* dst_v, int width);
    662 void ARGBToUVJRow_NEON(const uint8* src_argb, int src_stride_argb,
    663                        uint8* dst_u, uint8* dst_v, int width);
    664 void BGRAToUVRow_NEON(const uint8* src_bgra, int src_stride_bgra,
    665                       uint8* dst_u, uint8* dst_v, int width);
    666 void ABGRToUVRow_NEON(const uint8* src_abgr, int src_stride_abgr,
    667                       uint8* dst_u, uint8* dst_v, int width);
    668 void RGBAToUVRow_NEON(const uint8* src_rgba, int src_stride_rgba,
    669                       uint8* dst_u, uint8* dst_v, int width);
    670 void RGB24ToUVRow_NEON(const uint8* src_rgb24, int src_stride_rgb24,
    671                        uint8* dst_u, uint8* dst_v, int width);
    672 void RAWToUVRow_NEON(const uint8* src_raw, int src_stride_raw,
    673                      uint8* dst_u, uint8* dst_v, int width);
    674 void RGB565ToUVRow_NEON(const uint8* src_rgb565, int src_stride_rgb565,
    675                         uint8* dst_u, uint8* dst_v, int width);
    676 void ARGB1555ToUVRow_NEON(const uint8* src_argb1555, int src_stride_argb1555,
    677                           uint8* dst_u, uint8* dst_v, int width);
    678 void ARGB4444ToUVRow_NEON(const uint8* src_argb4444, int src_stride_argb4444,
    679                           uint8* dst_u, uint8* dst_v, int width);
    680 void BGRAToYRow_NEON(const uint8* src_bgra, uint8* dst_y, int width);
    681 void ABGRToYRow_NEON(const uint8* src_abgr, uint8* dst_y, int width);
    682 void RGBAToYRow_NEON(const uint8* src_rgba, uint8* dst_y, int width);
    683 void RGB24ToYRow_NEON(const uint8* src_rgb24, uint8* dst_y, int width);
    684 void RAWToYRow_NEON(const uint8* src_raw, uint8* dst_y, int width);
    685 void RGB565ToYRow_NEON(const uint8* src_rgb565, uint8* dst_y, int width);
    686 void ARGB1555ToYRow_NEON(const uint8* src_argb1555, uint8* dst_y, int width);
    687 void ARGB4444ToYRow_NEON(const uint8* src_argb4444, uint8* dst_y, int width);
    688 void ARGBToYRow_C(const uint8* src_argb, uint8* dst_y, int width);
    689 void ARGBToYJRow_C(const uint8* src_argb, uint8* dst_y, int width);
    690 void BGRAToYRow_C(const uint8* src_bgra, uint8* dst_y, int width);
    691 void ABGRToYRow_C(const uint8* src_abgr, uint8* dst_y, int width);
    692 void RGBAToYRow_C(const uint8* src_rgba, uint8* dst_y, int width);
    693 void RGB24ToYRow_C(const uint8* src_rgb24, uint8* dst_y, int width);
    694 void RAWToYRow_C(const uint8* src_raw, uint8* dst_y, int width);
    695 void RGB565ToYRow_C(const uint8* src_rgb565, uint8* dst_y, int width);
    696 void ARGB1555ToYRow_C(const uint8* src_argb1555, uint8* dst_y, int width);
    697 void ARGB4444ToYRow_C(const uint8* src_argb4444, uint8* dst_y, int width);
    698 void ARGBToYRow_Any_SSSE3(const uint8* src_argb, uint8* dst_y, int width);
    699 void ARGBToYJRow_Any_SSSE3(const uint8* src_argb, uint8* dst_y, int width);
    700 void BGRAToYRow_Any_SSSE3(const uint8* src_bgra, uint8* dst_y, int width);
    701 void ABGRToYRow_Any_SSSE3(const uint8* src_abgr, uint8* dst_y, int width);
    702 void RGBAToYRow_Any_SSSE3(const uint8* src_rgba, uint8* dst_y, int width);
    703 void RGB24ToYRow_Any_SSSE3(const uint8* src_rgb24, uint8* dst_y, int width);
    704 void RAWToYRow_Any_SSSE3(const uint8* src_raw, uint8* dst_y, int width);
    705 void ARGBToYRow_Any_NEON(const uint8* src_argb, uint8* dst_y, int width);
    706 void ARGBToYJRow_Any_NEON(const uint8* src_argb, uint8* dst_y, int width);
    707 void BGRAToYRow_Any_NEON(const uint8* src_bgra, uint8* dst_y, int width);
    708 void ABGRToYRow_Any_NEON(const uint8* src_abgr, uint8* dst_y, int width);
    709 void RGBAToYRow_Any_NEON(const uint8* src_rgba, uint8* dst_y, int width);
    710 void RGB24ToYRow_Any_NEON(const uint8* src_rgb24, uint8* dst_y, int width);
    711 void RAWToYRow_Any_NEON(const uint8* src_raw, uint8* dst_y, int width);
    712 void RGB565ToYRow_Any_NEON(const uint8* src_rgb565, uint8* dst_y, int width);
    713 void ARGB1555ToYRow_Any_NEON(const uint8* src_argb1555, uint8* dst_y,
    714                              int width);
    715 void ARGB4444ToYRow_Any_NEON(const uint8* src_argb4444, uint8* dst_y,
    716                              int width);
    717 
    718 void ARGBToUVRow_AVX2(const uint8* src_argb, int src_stride_argb,
    719                       uint8* dst_u, uint8* dst_v, int width);
    720 void ARGBToUVJRow_AVX2(const uint8* src_argb, int src_stride_argb,
    721                        uint8* dst_u, uint8* dst_v, int width);
    722 void ARGBToUVRow_SSSE3(const uint8* src_argb, int src_stride_argb,
    723                        uint8* dst_u, uint8* dst_v, int width);
    724 void ARGBToUVJRow_SSSE3(const uint8* src_argb, int src_stride_argb,
    725                         uint8* dst_u, uint8* dst_v, int width);
    726 void BGRAToUVRow_SSSE3(const uint8* src_bgra, int src_stride_bgra,
    727                        uint8* dst_u, uint8* dst_v, int width);
    728 void ABGRToUVRow_SSSE3(const uint8* src_abgr, int src_stride_abgr,
    729                        uint8* dst_u, uint8* dst_v, int width);
    730 void RGBAToUVRow_SSSE3(const uint8* src_rgba, int src_stride_rgba,
    731                        uint8* dst_u, uint8* dst_v, int width);
    732 void ARGBToUVRow_Any_AVX2(const uint8* src_argb, int src_stride_argb,
    733                           uint8* dst_u, uint8* dst_v, int width);
    734 void ARGBToUVJRow_Any_AVX2(const uint8* src_argb, int src_stride_argb,
    735                            uint8* dst_u, uint8* dst_v, int width);
    736 void ARGBToUVRow_Any_SSSE3(const uint8* src_argb, int src_stride_argb,
    737                            uint8* dst_u, uint8* dst_v, int width);
    738 void ARGBToUVJRow_Any_SSSE3(const uint8* src_argb, int src_stride_argb,
    739                             uint8* dst_u, uint8* dst_v, int width);
    740 void BGRAToUVRow_Any_SSSE3(const uint8* src_bgra, int src_stride_bgra,
    741                            uint8* dst_u, uint8* dst_v, int width);
    742 void ABGRToUVRow_Any_SSSE3(const uint8* src_abgr, int src_stride_abgr,
    743                            uint8* dst_u, uint8* dst_v, int width);
    744 void RGBAToUVRow_Any_SSSE3(const uint8* src_rgba, int src_stride_rgba,
    745                            uint8* dst_u, uint8* dst_v, int width);
    746 void ARGBToUV444Row_Any_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
    747                              int width);
    748 void ARGBToUV411Row_Any_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
    749                              int width);
    750 void ARGBToUVRow_Any_NEON(const uint8* src_argb, int src_stride_argb,
    751                           uint8* dst_u, uint8* dst_v, int width);
    752 void ARGBToUVJRow_Any_NEON(const uint8* src_argb, int src_stride_argb,
    753                            uint8* dst_u, uint8* dst_v, int width);
    754 void BGRAToUVRow_Any_NEON(const uint8* src_bgra, int src_stride_bgra,
    755                           uint8* dst_u, uint8* dst_v, int width);
    756 void ABGRToUVRow_Any_NEON(const uint8* src_abgr, int src_stride_abgr,
    757                           uint8* dst_u, uint8* dst_v, int width);
    758 void RGBAToUVRow_Any_NEON(const uint8* src_rgba, int src_stride_rgba,
    759                           uint8* dst_u, uint8* dst_v, int width);
    760 void RGB24ToUVRow_Any_NEON(const uint8* src_rgb24, int src_stride_rgb24,
    761                            uint8* dst_u, uint8* dst_v, int width);
    762 void RAWToUVRow_Any_NEON(const uint8* src_raw, int src_stride_raw,
    763                          uint8* dst_u, uint8* dst_v, int width);
    764 void RGB565ToUVRow_Any_NEON(const uint8* src_rgb565, int src_stride_rgb565,
    765                             uint8* dst_u, uint8* dst_v, int width);
    766 void ARGB1555ToUVRow_Any_NEON(const uint8* src_argb1555,
    767                               int src_stride_argb1555,
    768                               uint8* dst_u, uint8* dst_v, int width);
    769 void ARGB4444ToUVRow_Any_NEON(const uint8* src_argb4444,
    770                               int src_stride_argb4444,
    771                               uint8* dst_u, uint8* dst_v, int width);
    772 void ARGBToUVRow_C(const uint8* src_argb, int src_stride_argb,
    773                    uint8* dst_u, uint8* dst_v, int width);
    774 void ARGBToUVJRow_C(const uint8* src_argb, int src_stride_argb,
    775                     uint8* dst_u, uint8* dst_v, int width);
    776 void BGRAToUVRow_C(const uint8* src_bgra, int src_stride_bgra,
    777                    uint8* dst_u, uint8* dst_v, int width);
    778 void ABGRToUVRow_C(const uint8* src_abgr, int src_stride_abgr,
    779                    uint8* dst_u, uint8* dst_v, int width);
    780 void RGBAToUVRow_C(const uint8* src_rgba, int src_stride_rgba,
    781                    uint8* dst_u, uint8* dst_v, int width);
    782 void RGB24ToUVRow_C(const uint8* src_rgb24, int src_stride_rgb24,
    783                     uint8* dst_u, uint8* dst_v, int width);
    784 void RAWToUVRow_C(const uint8* src_raw, int src_stride_raw,
    785                   uint8* dst_u, uint8* dst_v, int width);
    786 void RGB565ToUVRow_C(const uint8* src_rgb565, int src_stride_rgb565,
    787                      uint8* dst_u, uint8* dst_v, int width);
    788 void ARGB1555ToUVRow_C(const uint8* src_argb1555, int src_stride_argb1555,
    789                        uint8* dst_u, uint8* dst_v, int width);
    790 void ARGB4444ToUVRow_C(const uint8* src_argb4444, int src_stride_argb4444,
    791                        uint8* dst_u, uint8* dst_v, int width);
    792 
    793 void ARGBToUV444Row_SSSE3(const uint8* src_argb,
    794                           uint8* dst_u, uint8* dst_v, int width);
    795 void ARGBToUV444Row_Any_SSSE3(const uint8* src_argb,
    796                               uint8* dst_u, uint8* dst_v, int width);
    797 
    798 void ARGBToUV444Row_C(const uint8* src_argb,
    799                       uint8* dst_u, uint8* dst_v, int width);
    800 void ARGBToUV411Row_C(const uint8* src_argb,
    801                       uint8* dst_u, uint8* dst_v, int width);
    802 
    803 void MirrorRow_AVX2(const uint8* src, uint8* dst, int width);
    804 void MirrorRow_SSSE3(const uint8* src, uint8* dst, int width);
    805 void MirrorRow_NEON(const uint8* src, uint8* dst, int width);
    806 void MirrorRow_DSPR2(const uint8* src, uint8* dst, int width);
    807 void MirrorRow_C(const uint8* src, uint8* dst, int width);
    808 void MirrorRow_Any_AVX2(const uint8* src, uint8* dst, int width);
    809 void MirrorRow_Any_SSSE3(const uint8* src, uint8* dst, int width);
    810 void MirrorRow_Any_SSE2(const uint8* src, uint8* dst, int width);
    811 void MirrorRow_Any_NEON(const uint8* src, uint8* dst, int width);
    812 
    813 void MirrorUVRow_SSSE3(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
    814                        int width);
    815 void MirrorUVRow_NEON(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
    816                       int width);
    817 void MirrorUVRow_DSPR2(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
    818                        int width);
    819 void MirrorUVRow_C(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int width);
    820 
    821 void ARGBMirrorRow_AVX2(const uint8* src, uint8* dst, int width);
    822 void ARGBMirrorRow_SSE2(const uint8* src, uint8* dst, int width);
    823 void ARGBMirrorRow_NEON(const uint8* src, uint8* dst, int width);
    824 void ARGBMirrorRow_C(const uint8* src, uint8* dst, int width);
    825 void ARGBMirrorRow_Any_AVX2(const uint8* src, uint8* dst, int width);
    826 void ARGBMirrorRow_Any_SSE2(const uint8* src, uint8* dst, int width);
    827 void ARGBMirrorRow_Any_NEON(const uint8* src, uint8* dst, int width);
    828 
    829 void SplitUVRow_C(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int width);
    830 void SplitUVRow_SSE2(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
    831                      int width);
    832 void SplitUVRow_AVX2(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
    833                      int width);
    834 void SplitUVRow_NEON(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
    835                      int width);
    836 void SplitUVRow_DSPR2(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
    837                       int width);
    838 void SplitUVRow_Any_SSE2(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
    839                          int width);
    840 void SplitUVRow_Any_AVX2(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
    841                          int width);
    842 void SplitUVRow_Any_NEON(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
    843                          int width);
    844 void SplitUVRow_Any_DSPR2(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
    845                           int width);
    846 
    847 void MergeUVRow_C(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
    848                   int width);
    849 void MergeUVRow_SSE2(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
    850                      int width);
    851 void MergeUVRow_AVX2(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
    852                      int width);
    853 void MergeUVRow_NEON(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
    854                      int width);
    855 void MergeUVRow_Any_SSE2(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
    856                          int width);
    857 void MergeUVRow_Any_AVX2(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
    858                          int width);
    859 void MergeUVRow_Any_NEON(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
    860                          int width);
    861 
    862 void CopyRow_SSE2(const uint8* src, uint8* dst, int count);
    863 void CopyRow_AVX(const uint8* src, uint8* dst, int count);
    864 void CopyRow_ERMS(const uint8* src, uint8* dst, int count);
    865 void CopyRow_NEON(const uint8* src, uint8* dst, int count);
    866 void CopyRow_MIPS(const uint8* src, uint8* dst, int count);
    867 void CopyRow_C(const uint8* src, uint8* dst, int count);
    868 void CopyRow_Any_SSE2(const uint8* src, uint8* dst, int count);
    869 void CopyRow_Any_AVX(const uint8* src, uint8* dst, int count);
    870 void CopyRow_Any_NEON(const uint8* src, uint8* dst, int count);
    871 
    872 void CopyRow_16_C(const uint16* src, uint16* dst, int count);
    873 
    874 void ARGBCopyAlphaRow_C(const uint8* src_argb, uint8* dst_argb, int width);
    875 void ARGBCopyAlphaRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width);
    876 void ARGBCopyAlphaRow_AVX2(const uint8* src_argb, uint8* dst_argb, int width);
    877 void ARGBCopyAlphaRow_Any_SSE2(const uint8* src_argb, uint8* dst_argb,
    878                                int width);
    879 void ARGBCopyAlphaRow_Any_AVX2(const uint8* src_argb, uint8* dst_argb,
    880                                int width);
    881 
    882 void ARGBExtractAlphaRow_C(const uint8* src_argb, uint8* dst_a, int width);
    883 void ARGBExtractAlphaRow_SSE2(const uint8* src_argb, uint8* dst_a, int width);
    884 void ARGBExtractAlphaRow_NEON(const uint8* src_argb, uint8* dst_a, int width);
    885 void ARGBExtractAlphaRow_Any_SSE2(const uint8* src_argb, uint8* dst_a,
    886                                   int width);
    887 void ARGBExtractAlphaRow_Any_NEON(const uint8* src_argb, uint8* dst_a,
    888                                   int width);
    889 
    890 void ARGBCopyYToAlphaRow_C(const uint8* src_y, uint8* dst_argb, int width);
    891 void ARGBCopyYToAlphaRow_SSE2(const uint8* src_y, uint8* dst_argb, int width);
    892 void ARGBCopyYToAlphaRow_AVX2(const uint8* src_y, uint8* dst_argb, int width);
    893 void ARGBCopyYToAlphaRow_Any_SSE2(const uint8* src_y, uint8* dst_argb,
    894                                   int width);
    895 void ARGBCopyYToAlphaRow_Any_AVX2(const uint8* src_y, uint8* dst_argb,
    896                                   int width);
    897 
    898 void SetRow_C(uint8* dst, uint8 v8, int count);
    899 void SetRow_X86(uint8* dst, uint8 v8, int count);
    900 void SetRow_ERMS(uint8* dst, uint8 v8, int count);
    901 void SetRow_NEON(uint8* dst, uint8 v8, int count);
    902 void SetRow_Any_X86(uint8* dst, uint8 v8, int count);
    903 void SetRow_Any_NEON(uint8* dst, uint8 v8, int count);
    904 
    905 void ARGBSetRow_C(uint8* dst_argb, uint32 v32, int count);
    906 void ARGBSetRow_X86(uint8* dst_argb, uint32 v32, int count);
    907 void ARGBSetRow_NEON(uint8* dst_argb, uint32 v32, int count);
    908 void ARGBSetRow_Any_NEON(uint8* dst_argb, uint32 v32, int count);
    909 
    910 // ARGBShufflers for BGRAToARGB etc.
    911 void ARGBShuffleRow_C(const uint8* src_argb, uint8* dst_argb,
    912                       const uint8* shuffler, int width);
    913 void ARGBShuffleRow_SSE2(const uint8* src_argb, uint8* dst_argb,
    914                          const uint8* shuffler, int width);
    915 void ARGBShuffleRow_SSSE3(const uint8* src_argb, uint8* dst_argb,
    916                           const uint8* shuffler, int width);
    917 void ARGBShuffleRow_AVX2(const uint8* src_argb, uint8* dst_argb,
    918                          const uint8* shuffler, int width);
    919 void ARGBShuffleRow_NEON(const uint8* src_argb, uint8* dst_argb,
    920                          const uint8* shuffler, int width);
    921 void ARGBShuffleRow_Any_SSE2(const uint8* src_argb, uint8* dst_argb,
    922                              const uint8* shuffler, int width);
    923 void ARGBShuffleRow_Any_SSSE3(const uint8* src_argb, uint8* dst_argb,
    924                               const uint8* shuffler, int width);
    925 void ARGBShuffleRow_Any_AVX2(const uint8* src_argb, uint8* dst_argb,
    926                              const uint8* shuffler, int width);
    927 void ARGBShuffleRow_Any_NEON(const uint8* src_argb, uint8* dst_argb,
    928                              const uint8* shuffler, int width);
    929 
    930 void RGB24ToARGBRow_SSSE3(const uint8* src_rgb24, uint8* dst_argb, int width);
    931 void RAWToARGBRow_SSSE3(const uint8* src_raw, uint8* dst_argb, int width);
    932 void RAWToRGB24Row_SSSE3(const uint8* src_raw, uint8* dst_rgb24, int width);
    933 void RGB565ToARGBRow_SSE2(const uint8* src_rgb565, uint8* dst_argb, int width);
    934 void ARGB1555ToARGBRow_SSE2(const uint8* src_argb1555, uint8* dst_argb,
    935                             int width);
    936 void ARGB4444ToARGBRow_SSE2(const uint8* src_argb4444, uint8* dst_argb,
    937                             int width);
    938 void RGB565ToARGBRow_AVX2(const uint8* src_rgb565, uint8* dst_argb, int width);
    939 void ARGB1555ToARGBRow_AVX2(const uint8* src_argb1555, uint8* dst_argb,
    940                             int width);
    941 void ARGB4444ToARGBRow_AVX2(const uint8* src_argb4444, uint8* dst_argb,
    942                             int width);
    943 
    944 void RGB24ToARGBRow_NEON(const uint8* src_rgb24, uint8* dst_argb, int width);
    945 void RAWToARGBRow_NEON(const uint8* src_raw, uint8* dst_argb, int width);
    946 void RAWToRGB24Row_NEON(const uint8* src_raw, uint8* dst_rgb24, int width);
    947 void RGB565ToARGBRow_NEON(const uint8* src_rgb565, uint8* dst_argb, int width);
    948 void ARGB1555ToARGBRow_NEON(const uint8* src_argb1555, uint8* dst_argb,
    949                             int width);
    950 void ARGB4444ToARGBRow_NEON(const uint8* src_argb4444, uint8* dst_argb,
    951                             int width);
    952 void RGB24ToARGBRow_C(const uint8* src_rgb24, uint8* dst_argb, int width);
    953 void RAWToARGBRow_C(const uint8* src_raw, uint8* dst_argb, int width);
    954 void RAWToRGB24Row_C(const uint8* src_raw, uint8* dst_rgb24, int width);
    955 void RGB565ToARGBRow_C(const uint8* src_rgb, uint8* dst_argb, int width);
    956 void ARGB1555ToARGBRow_C(const uint8* src_argb, uint8* dst_argb, int width);
    957 void ARGB4444ToARGBRow_C(const uint8* src_argb, uint8* dst_argb, int width);
    958 void RGB24ToARGBRow_Any_SSSE3(const uint8* src_rgb24, uint8* dst_argb,
    959                               int width);
    960 void RAWToARGBRow_Any_SSSE3(const uint8* src_raw, uint8* dst_argb, int width);
    961 void RAWToRGB24Row_Any_SSSE3(const uint8* src_raw, uint8* dst_rgb24, int width);
    962 
    963 void RGB565ToARGBRow_Any_SSE2(const uint8* src_rgb565, uint8* dst_argb,
    964                               int width);
    965 void ARGB1555ToARGBRow_Any_SSE2(const uint8* src_argb1555, uint8* dst_argb,
    966                                 int width);
    967 void ARGB4444ToARGBRow_Any_SSE2(const uint8* src_argb4444, uint8* dst_argb,
    968                                 int width);
    969 void RGB565ToARGBRow_Any_AVX2(const uint8* src_rgb565, uint8* dst_argb,
    970                               int width);
    971 void ARGB1555ToARGBRow_Any_AVX2(const uint8* src_argb1555, uint8* dst_argb,
    972                                 int width);
    973 void ARGB4444ToARGBRow_Any_AVX2(const uint8* src_argb4444, uint8* dst_argb,
    974                                 int width);
    975 
    976 void RGB24ToARGBRow_Any_NEON(const uint8* src_rgb24, uint8* dst_argb,
    977                              int width);
    978 void RAWToARGBRow_Any_NEON(const uint8* src_raw, uint8* dst_argb, int width);
    979 void RAWToRGB24Row_Any_NEON(const uint8* src_raw, uint8* dst_rgb24, int width);
    980 void RGB565ToARGBRow_Any_NEON(const uint8* src_rgb565, uint8* dst_argb,
    981                               int width);
    982 void ARGB1555ToARGBRow_Any_NEON(const uint8* src_argb1555, uint8* dst_argb,
    983                                 int width);
    984 void ARGB4444ToARGBRow_Any_NEON(const uint8* src_argb4444, uint8* dst_argb,
    985                                 int width);
    986 
    987 void ARGBToRGB24Row_SSSE3(const uint8* src_argb, uint8* dst_rgb, int width);
    988 void ARGBToRAWRow_SSSE3(const uint8* src_argb, uint8* dst_rgb, int width);
    989 void ARGBToRGB565Row_SSE2(const uint8* src_argb, uint8* dst_rgb, int width);
    990 void ARGBToARGB1555Row_SSE2(const uint8* src_argb, uint8* dst_rgb, int width);
    991 void ARGBToARGB4444Row_SSE2(const uint8* src_argb, uint8* dst_rgb, int width);
    992 
    993 void ARGBToRGB565DitherRow_C(const uint8* src_argb, uint8* dst_rgb,
    994                              const uint32 dither4, int width);
    995 void ARGBToRGB565DitherRow_SSE2(const uint8* src_argb, uint8* dst_rgb,
    996                                 const uint32 dither4, int width);
    997 void ARGBToRGB565DitherRow_AVX2(const uint8* src_argb, uint8* dst_rgb,
    998                                 const uint32 dither4, int width);
    999 
   1000 void ARGBToRGB565Row_AVX2(const uint8* src_argb, uint8* dst_rgb, int width);
   1001 void ARGBToARGB1555Row_AVX2(const uint8* src_argb, uint8* dst_rgb, int width);
   1002 void ARGBToARGB4444Row_AVX2(const uint8* src_argb, uint8* dst_rgb, int width);
   1003 
   1004 void ARGBToRGB24Row_NEON(const uint8* src_argb, uint8* dst_rgb, int width);
   1005 void ARGBToRAWRow_NEON(const uint8* src_argb, uint8* dst_rgb, int width);
   1006 void ARGBToRGB565Row_NEON(const uint8* src_argb, uint8* dst_rgb, int width);
   1007 void ARGBToARGB1555Row_NEON(const uint8* src_argb, uint8* dst_rgb, int width);
   1008 void ARGBToARGB4444Row_NEON(const uint8* src_argb, uint8* dst_rgb, int width);
   1009 void ARGBToRGB565DitherRow_NEON(const uint8* src_argb, uint8* dst_rgb,
   1010                                 const uint32 dither4, int width);
   1011 
   1012 void ARGBToRGBARow_C(const uint8* src_argb, uint8* dst_rgb, int width);
   1013 void ARGBToRGB24Row_C(const uint8* src_argb, uint8* dst_rgb, int width);
   1014 void ARGBToRAWRow_C(const uint8* src_argb, uint8* dst_rgb, int width);
   1015 void ARGBToRGB565Row_C(const uint8* src_argb, uint8* dst_rgb, int width);
   1016 void ARGBToARGB1555Row_C(const uint8* src_argb, uint8* dst_rgb, int width);
   1017 void ARGBToARGB4444Row_C(const uint8* src_argb, uint8* dst_rgb, int width);
   1018 
   1019 void J400ToARGBRow_SSE2(const uint8* src_y, uint8* dst_argb, int width);
   1020 void J400ToARGBRow_AVX2(const uint8* src_y, uint8* dst_argb, int width);
   1021 void J400ToARGBRow_NEON(const uint8* src_y, uint8* dst_argb, int width);
   1022 void J400ToARGBRow_C(const uint8* src_y, uint8* dst_argb, int width);
   1023 void J400ToARGBRow_Any_SSE2(const uint8* src_y, uint8* dst_argb, int width);
   1024 void J400ToARGBRow_Any_AVX2(const uint8* src_y, uint8* dst_argb, int width);
   1025 void J400ToARGBRow_Any_NEON(const uint8* src_y, uint8* dst_argb, int width);
   1026 
   1027 void I444ToARGBRow_C(const uint8* src_y,
   1028                      const uint8* src_u,
   1029                      const uint8* src_v,
   1030                      uint8* dst_argb,
   1031                      const struct YuvConstants* yuvconstants,
   1032                      int width);
   1033 void I422ToARGBRow_C(const uint8* src_y,
   1034                      const uint8* src_u,
   1035                      const uint8* src_v,
   1036                      uint8* dst_argb,
   1037                      const struct YuvConstants* yuvconstants,
   1038                      int width);
   1039 void I422ToARGBRow_C(const uint8* src_y,
   1040                      const uint8* src_u,
   1041                      const uint8* src_v,
   1042                      uint8* dst_argb,
   1043                      const struct YuvConstants* yuvconstants,
   1044                      int width);
   1045 void I422AlphaToARGBRow_C(const uint8* y_buf,
   1046                           const uint8* u_buf,
   1047                           const uint8* v_buf,
   1048                           const uint8* a_buf,
   1049                           uint8* dst_argb,
   1050                           const struct YuvConstants* yuvconstants,
   1051                           int width);
   1052 void I411ToARGBRow_C(const uint8* src_y,
   1053                      const uint8* src_u,
   1054                      const uint8* src_v,
   1055                      uint8* dst_argb,
   1056                      const struct YuvConstants* yuvconstants,
   1057                      int width);
   1058 void NV12ToARGBRow_C(const uint8* src_y,
   1059                      const uint8* src_uv,
   1060                      uint8* dst_argb,
   1061                      const struct YuvConstants* yuvconstants,
   1062                      int width);
   1063 void NV12ToRGB565Row_C(const uint8* src_y,
   1064                        const uint8* src_uv,
   1065                        uint8* dst_argb,
   1066                        const struct YuvConstants* yuvconstants,
   1067                        int width);
   1068 void NV21ToARGBRow_C(const uint8* src_y,
   1069                      const uint8* src_uv,
   1070                      uint8* dst_argb,
   1071                      const struct YuvConstants* yuvconstants,
   1072                      int width);
   1073 void YUY2ToARGBRow_C(const uint8* src_yuy2,
   1074                      uint8* dst_argb,
   1075                      const struct YuvConstants* yuvconstants,
   1076                      int width);
   1077 void UYVYToARGBRow_C(const uint8* src_uyvy,
   1078                      uint8* dst_argb,
   1079                      const struct YuvConstants* yuvconstants,
   1080                      int width);
   1081 void I422ToRGBARow_C(const uint8* src_y,
   1082                      const uint8* src_u,
   1083                      const uint8* src_v,
   1084                      uint8* dst_rgba,
   1085                      const struct YuvConstants* yuvconstants,
   1086                      int width);
   1087 void I422ToRGB24Row_C(const uint8* src_y,
   1088                       const uint8* src_u,
   1089                       const uint8* src_v,
   1090                       uint8* dst_rgb24,
   1091                       const struct YuvConstants* yuvconstants,
   1092                       int width);
   1093 void I422ToARGB4444Row_C(const uint8* src_y,
   1094                          const uint8* src_u,
   1095                          const uint8* src_v,
   1096                          uint8* dst_argb4444,
   1097                          const struct YuvConstants* yuvconstants,
   1098                          int width);
   1099 void I422ToARGB1555Row_C(const uint8* src_y,
   1100                          const uint8* src_u,
   1101                          const uint8* src_v,
   1102                          uint8* dst_argb4444,
   1103                          const struct YuvConstants* yuvconstants,
   1104                          int width);
   1105 void I422ToRGB565Row_C(const uint8* src_y,
   1106                        const uint8* src_u,
   1107                        const uint8* src_v,
   1108                        uint8* dst_rgb565,
   1109                        const struct YuvConstants* yuvconstants,
   1110                        int width);
   1111 void I422ToARGBRow_AVX2(const uint8* src_y,
   1112                         const uint8* src_u,
   1113                         const uint8* src_v,
   1114                         uint8* dst_argb,
   1115                         const struct YuvConstants* yuvconstants,
   1116                         int width);
   1117 void I422ToARGBRow_AVX2(const uint8* src_y,
   1118                         const uint8* src_u,
   1119                         const uint8* src_v,
   1120                         uint8* dst_argb,
   1121                         const struct YuvConstants* yuvconstants,
   1122                         int width);
   1123 void I422ToRGBARow_AVX2(const uint8* src_y,
   1124                         const uint8* src_u,
   1125                         const uint8* src_v,
   1126                         uint8* dst_argb,
   1127                         const struct YuvConstants* yuvconstants,
   1128                         int width);
   1129 void I444ToARGBRow_SSSE3(const uint8* src_y,
   1130                          const uint8* src_u,
   1131                          const uint8* src_v,
   1132                          uint8* dst_argb,
   1133                          const struct YuvConstants* yuvconstants,
   1134                          int width);
   1135 void I444ToARGBRow_AVX2(const uint8* src_y,
   1136                         const uint8* src_u,
   1137                         const uint8* src_v,
   1138                         uint8* dst_argb,
   1139                         const struct YuvConstants* yuvconstants,
   1140                         int width);
   1141 void I444ToARGBRow_SSSE3(const uint8* src_y,
   1142                          const uint8* src_u,
   1143                          const uint8* src_v,
   1144                          uint8* dst_argb,
   1145                          const struct YuvConstants* yuvconstants,
   1146                          int width);
   1147 void I444ToARGBRow_AVX2(const uint8* src_y,
   1148                         const uint8* src_u,
   1149                         const uint8* src_v,
   1150                         uint8* dst_argb,
   1151                         const struct YuvConstants* yuvconstants,
   1152                         int width);
   1153 void I422ToARGBRow_SSSE3(const uint8* src_y,
   1154                          const uint8* src_u,
   1155                          const uint8* src_v,
   1156                          uint8* dst_argb,
   1157                          const struct YuvConstants* yuvconstants,
   1158                          int width);
   1159 void I422AlphaToARGBRow_SSSE3(const uint8* y_buf,
   1160                               const uint8* u_buf,
   1161                               const uint8* v_buf,
   1162                               const uint8* a_buf,
   1163                               uint8* dst_argb,
   1164                               const struct YuvConstants* yuvconstants,
   1165                               int width);
   1166 void I422AlphaToARGBRow_AVX2(const uint8* y_buf,
   1167                              const uint8* u_buf,
   1168                              const uint8* v_buf,
   1169                              const uint8* a_buf,
   1170                              uint8* dst_argb,
   1171                              const struct YuvConstants* yuvconstants,
   1172                              int width);
   1173 void I422ToARGBRow_SSSE3(const uint8* src_y,
   1174                          const uint8* src_u,
   1175                          const uint8* src_v,
   1176                          uint8* dst_argb,
   1177                          const struct YuvConstants* yuvconstants,
   1178                          int width);
   1179 void I411ToARGBRow_SSSE3(const uint8* src_y,
   1180                          const uint8* src_u,
   1181                          const uint8* src_v,
   1182                          uint8* dst_argb,
   1183                          const struct YuvConstants* yuvconstants,
   1184                          int width);
   1185 void I411ToARGBRow_AVX2(const uint8* src_y,
   1186                         const uint8* src_u,
   1187                         const uint8* src_v,
   1188                         uint8* dst_argb,
   1189                         const struct YuvConstants* yuvconstants,
   1190                         int width);
   1191 void NV12ToARGBRow_SSSE3(const uint8* src_y,
   1192                          const uint8* src_uv,
   1193                          uint8* dst_argb,
   1194                          const struct YuvConstants* yuvconstants,
   1195                          int width);
   1196 void NV12ToARGBRow_AVX2(const uint8* src_y,
   1197                         const uint8* src_uv,
   1198                         uint8* dst_argb,
   1199                         const struct YuvConstants* yuvconstants,
   1200                         int width);
   1201 void NV12ToRGB565Row_SSSE3(const uint8* src_y,
   1202                            const uint8* src_uv,
   1203                            uint8* dst_argb,
   1204                            const struct YuvConstants* yuvconstants,
   1205                            int width);
   1206 void NV12ToRGB565Row_AVX2(const uint8* src_y,
   1207                           const uint8* src_uv,
   1208                           uint8* dst_argb,
   1209                           const struct YuvConstants* yuvconstants,
   1210                           int width);
   1211 void NV21ToARGBRow_SSSE3(const uint8* src_y,
   1212                          const uint8* src_uv,
   1213                          uint8* dst_argb,
   1214                          const struct YuvConstants* yuvconstants,
   1215                          int width);
   1216 void NV21ToARGBRow_AVX2(const uint8* src_y,
   1217                         const uint8* src_uv,
   1218                         uint8* dst_argb,
   1219                         const struct YuvConstants* yuvconstants,
   1220                         int width);
   1221 void YUY2ToARGBRow_SSSE3(const uint8* src_yuy2,
   1222                          uint8* dst_argb,
   1223                          const struct YuvConstants* yuvconstants,
   1224                          int width);
   1225 void UYVYToARGBRow_SSSE3(const uint8* src_uyvy,
   1226                          uint8* dst_argb,
   1227                          const struct YuvConstants* yuvconstants,
   1228                          int width);
   1229 void YUY2ToARGBRow_AVX2(const uint8* src_yuy2,
   1230                         uint8* dst_argb,
   1231                         const struct YuvConstants* yuvconstants,
   1232                         int width);
   1233 void UYVYToARGBRow_AVX2(const uint8* src_uyvy,
   1234                         uint8* dst_argb,
   1235                         const struct YuvConstants* yuvconstants,
   1236                         int width);
   1237 void I422ToRGBARow_SSSE3(const uint8* src_y,
   1238                          const uint8* src_u,
   1239                          const uint8* src_v,
   1240                          uint8* dst_rgba,
   1241                          const struct YuvConstants* yuvconstants,
   1242                          int width);
   1243 void I422ToARGB4444Row_SSSE3(const uint8* src_y,
   1244                              const uint8* src_u,
   1245                              const uint8* src_v,
   1246                              uint8* dst_argb,
   1247                              const struct YuvConstants* yuvconstants,
   1248                              int width);
   1249 void I422ToARGB4444Row_AVX2(const uint8* src_y,
   1250                             const uint8* src_u,
   1251                             const uint8* src_v,
   1252                             uint8* dst_argb,
   1253                             const struct YuvConstants* yuvconstants,
   1254                             int width);
   1255 void I422ToARGB1555Row_SSSE3(const uint8* src_y,
   1256                              const uint8* src_u,
   1257                              const uint8* src_v,
   1258                              uint8* dst_argb,
   1259                              const struct YuvConstants* yuvconstants,
   1260                              int width);
   1261 void I422ToARGB1555Row_AVX2(const uint8* src_y,
   1262                             const uint8* src_u,
   1263                             const uint8* src_v,
   1264                             uint8* dst_argb,
   1265                             const struct YuvConstants* yuvconstants,
   1266                             int width);
   1267 void I422ToRGB565Row_SSSE3(const uint8* src_y,
   1268                            const uint8* src_u,
   1269                            const uint8* src_v,
   1270                            uint8* dst_argb,
   1271                            const struct YuvConstants* yuvconstants,
   1272                            int width);
   1273 void I422ToRGB565Row_AVX2(const uint8* src_y,
   1274                           const uint8* src_u,
   1275                           const uint8* src_v,
   1276                           uint8* dst_argb,
   1277                           const struct YuvConstants* yuvconstants,
   1278                           int width);
   1279 void I422ToRGB24Row_SSSE3(const uint8* src_y,
   1280                           const uint8* src_u,
   1281                           const uint8* src_v,
   1282                           uint8* dst_rgb24,
   1283                           const struct YuvConstants* yuvconstants,
   1284                           int width);
   1285 void I422ToRGB24Row_AVX2(const uint8* src_y,
   1286                          const uint8* src_u,
   1287                          const uint8* src_v,
   1288                          uint8* dst_rgb24,
   1289                          const struct YuvConstants* yuvconstants,
   1290                          int width);
   1291 void I422ToARGBRow_Any_AVX2(const uint8* src_y,
   1292                             const uint8* src_u,
   1293                             const uint8* src_v,
   1294                             uint8* dst_argb,
   1295                             const struct YuvConstants* yuvconstants,
   1296                             int width);
   1297 void I422ToRGBARow_Any_AVX2(const uint8* src_y,
   1298                             const uint8* src_u,
   1299                             const uint8* src_v,
   1300                             uint8* dst_argb,
   1301                             const struct YuvConstants* yuvconstants,
   1302                             int width);
   1303 void I444ToARGBRow_Any_SSSE3(const uint8* src_y,
   1304                              const uint8* src_u,
   1305                              const uint8* src_v,
   1306                              uint8* dst_argb,
   1307                              const struct YuvConstants* yuvconstants,
   1308                              int width);
   1309 void I444ToARGBRow_Any_AVX2(const uint8* src_y,
   1310                             const uint8* src_u,
   1311                             const uint8* src_v,
   1312                             uint8* dst_argb,
   1313                             const struct YuvConstants* yuvconstants,
   1314                             int width);
   1315 void I422ToARGBRow_Any_SSSE3(const uint8* src_y,
   1316                              const uint8* src_u,
   1317                              const uint8* src_v,
   1318                              uint8* dst_argb,
   1319                              const struct YuvConstants* yuvconstants,
   1320                              int width);
   1321 void I422AlphaToARGBRow_Any_SSSE3(const uint8* y_buf,
   1322                                   const uint8* u_buf,
   1323                                   const uint8* v_buf,
   1324                                   const uint8* a_buf,
   1325                                   uint8* dst_argb,
   1326                                   const struct YuvConstants* yuvconstants,
   1327                                   int width);
   1328 void I422AlphaToARGBRow_Any_AVX2(const uint8* y_buf,
   1329                                  const uint8* u_buf,
   1330                                  const uint8* v_buf,
   1331                                  const uint8* a_buf,
   1332                                  uint8* dst_argb,
   1333                                  const struct YuvConstants* yuvconstants,
   1334                                  int width);
   1335 void I411ToARGBRow_Any_SSSE3(const uint8* src_y,
   1336                              const uint8* src_u,
   1337                              const uint8* src_v,
   1338                              uint8* dst_argb,
   1339                              const struct YuvConstants* yuvconstants,
   1340                              int width);
   1341 void I411ToARGBRow_Any_AVX2(const uint8* src_y,
   1342                             const uint8* src_u,
   1343                             const uint8* src_v,
   1344                             uint8* dst_argb,
   1345                             const struct YuvConstants* yuvconstants,
   1346                             int width);
   1347 void NV12ToARGBRow_Any_SSSE3(const uint8* src_y,
   1348                              const uint8* src_uv,
   1349                              uint8* dst_argb,
   1350                              const struct YuvConstants* yuvconstants,
   1351                              int width);
   1352 void NV12ToARGBRow_Any_AVX2(const uint8* src_y,
   1353                             const uint8* src_uv,
   1354                             uint8* dst_argb,
   1355                             const struct YuvConstants* yuvconstants,
   1356                             int width);
   1357 void NV21ToARGBRow_Any_SSSE3(const uint8* src_y,
   1358                              const uint8* src_vu,
   1359                              uint8* dst_argb,
   1360                              const struct YuvConstants* yuvconstants,
   1361                              int width);
   1362 void NV21ToARGBRow_Any_AVX2(const uint8* src_y,
   1363                             const uint8* src_vu,
   1364                             uint8* dst_argb,
   1365                             const struct YuvConstants* yuvconstants,
   1366                             int width);
   1367 void NV12ToRGB565Row_Any_SSSE3(const uint8* src_y,
   1368                                const uint8* src_uv,
   1369                                uint8* dst_argb,
   1370                                const struct YuvConstants* yuvconstants,
   1371                                int width);
   1372 void NV12ToRGB565Row_Any_AVX2(const uint8* src_y,
   1373                               const uint8* src_uv,
   1374                               uint8* dst_argb,
   1375                               const struct YuvConstants* yuvconstants,
   1376                               int width);
   1377 void YUY2ToARGBRow_Any_SSSE3(const uint8* src_yuy2,
   1378                              uint8* dst_argb,
   1379                              const struct YuvConstants* yuvconstants,
   1380                              int width);
   1381 void UYVYToARGBRow_Any_SSSE3(const uint8* src_uyvy,
   1382                              uint8* dst_argb,
   1383                              const struct YuvConstants* yuvconstants,
   1384                              int width);
   1385 void YUY2ToARGBRow_Any_AVX2(const uint8* src_yuy2,
   1386                             uint8* dst_argb,
   1387                             const struct YuvConstants* yuvconstants,
   1388                             int width);
   1389 void UYVYToARGBRow_Any_AVX2(const uint8* src_uyvy,
   1390                             uint8* dst_argb,
   1391                             const struct YuvConstants* yuvconstants,
   1392                             int width);
   1393 void I422ToRGBARow_Any_SSSE3(const uint8* src_y,
   1394                              const uint8* src_u,
   1395                              const uint8* src_v,
   1396                              uint8* dst_rgba,
   1397                              const struct YuvConstants* yuvconstants,
   1398                              int width);
   1399 void I422ToARGB4444Row_Any_SSSE3(const uint8* src_y,
   1400                                  const uint8* src_u,
   1401                                  const uint8* src_v,
   1402                                  uint8* dst_rgba,
   1403                                  const struct YuvConstants* yuvconstants,
   1404                                  int width);
   1405 void I422ToARGB4444Row_Any_AVX2(const uint8* src_y,
   1406                                 const uint8* src_u,
   1407                                 const uint8* src_v,
   1408                                 uint8* dst_rgba,
   1409                                 const struct YuvConstants* yuvconstants,
   1410                                 int width);
   1411 void I422ToARGB1555Row_Any_SSSE3(const uint8* src_y,
   1412                                  const uint8* src_u,
   1413                                  const uint8* src_v,
   1414                                  uint8* dst_rgba,
   1415                                  const struct YuvConstants* yuvconstants,
   1416                                  int width);
   1417 void I422ToARGB1555Row_Any_AVX2(const uint8* src_y,
   1418                                 const uint8* src_u,
   1419                                 const uint8* src_v,
   1420                                 uint8* dst_rgba,
   1421                                 const struct YuvConstants* yuvconstants,
   1422                                 int width);
   1423 void I422ToRGB565Row_Any_SSSE3(const uint8* src_y,
   1424                                const uint8* src_u,
   1425                                const uint8* src_v,
   1426                                uint8* dst_rgba,
   1427                                const struct YuvConstants* yuvconstants,
   1428                                int width);
   1429 void I422ToRGB565Row_Any_AVX2(const uint8* src_y,
   1430                               const uint8* src_u,
   1431                               const uint8* src_v,
   1432                               uint8* dst_rgba,
   1433                               const struct YuvConstants* yuvconstants,
   1434                               int width);
   1435 void I422ToRGB24Row_Any_SSSE3(const uint8* src_y,
   1436                               const uint8* src_u,
   1437                               const uint8* src_v,
   1438                               uint8* dst_argb,
   1439                               const struct YuvConstants* yuvconstants,
   1440                               int width);
   1441 void I422ToRGB24Row_Any_AVX2(const uint8* src_y,
   1442                              const uint8* src_u,
   1443                              const uint8* src_v,
   1444                              uint8* dst_argb,
   1445                              const struct YuvConstants* yuvconstants,
   1446                              int width);
   1447 
   1448 void I400ToARGBRow_C(const uint8* src_y, uint8* dst_argb, int width);
   1449 void I400ToARGBRow_SSE2(const uint8* src_y, uint8* dst_argb, int width);
   1450 void I400ToARGBRow_AVX2(const uint8* src_y, uint8* dst_argb, int width);
   1451 void I400ToARGBRow_NEON(const uint8* src_y, uint8* dst_argb, int width);
   1452 void I400ToARGBRow_Any_SSE2(const uint8* src_y, uint8* dst_argb, int width);
   1453 void I400ToARGBRow_Any_AVX2(const uint8* src_y, uint8* dst_argb, int width);
   1454 void I400ToARGBRow_Any_NEON(const uint8* src_y, uint8* dst_argb, int width);
   1455 
   1456 // ARGB preattenuated alpha blend.
   1457 void ARGBBlendRow_SSSE3(const uint8* src_argb, const uint8* src_argb1,
   1458                         uint8* dst_argb, int width);
   1459 void ARGBBlendRow_NEON(const uint8* src_argb, const uint8* src_argb1,
   1460                        uint8* dst_argb, int width);
   1461 void ARGBBlendRow_C(const uint8* src_argb, const uint8* src_argb1,
   1462                     uint8* dst_argb, int width);
   1463 
   1464 // Unattenuated planar alpha blend.
   1465 void BlendPlaneRow_SSSE3(const uint8* src0, const uint8* src1,
   1466                          const uint8* alpha, uint8* dst, int width);
   1467 void BlendPlaneRow_Any_SSSE3(const uint8* src0, const uint8* src1,
   1468                              const uint8* alpha, uint8* dst, int width);
   1469 void BlendPlaneRow_AVX2(const uint8* src0, const uint8* src1,
   1470                         const uint8* alpha, uint8* dst, int width);
   1471 void BlendPlaneRow_Any_AVX2(const uint8* src0, const uint8* src1,
   1472                             const uint8* alpha, uint8* dst, int width);
   1473 void BlendPlaneRow_C(const uint8* src0, const uint8* src1,
   1474                      const uint8* alpha, uint8* dst, int width);
   1475 
   1476 // ARGB multiply images. Same API as Blend, but these require
   1477 // pointer and width alignment for SSE2.
   1478 void ARGBMultiplyRow_C(const uint8* src_argb, const uint8* src_argb1,
   1479                        uint8* dst_argb, int width);
   1480 void ARGBMultiplyRow_SSE2(const uint8* src_argb, const uint8* src_argb1,
   1481                           uint8* dst_argb, int width);
   1482 void ARGBMultiplyRow_Any_SSE2(const uint8* src_argb, const uint8* src_argb1,
   1483                               uint8* dst_argb, int width);
   1484 void ARGBMultiplyRow_AVX2(const uint8* src_argb, const uint8* src_argb1,
   1485                           uint8* dst_argb, int width);
   1486 void ARGBMultiplyRow_Any_AVX2(const uint8* src_argb, const uint8* src_argb1,
   1487                               uint8* dst_argb, int width);
   1488 void ARGBMultiplyRow_NEON(const uint8* src_argb, const uint8* src_argb1,
   1489                           uint8* dst_argb, int width);
   1490 void ARGBMultiplyRow_Any_NEON(const uint8* src_argb, const uint8* src_argb1,
   1491                               uint8* dst_argb, int width);
   1492 
   1493 // ARGB add images.
   1494 void ARGBAddRow_C(const uint8* src_argb, const uint8* src_argb1,
   1495                   uint8* dst_argb, int width);
   1496 void ARGBAddRow_SSE2(const uint8* src_argb, const uint8* src_argb1,
   1497                      uint8* dst_argb, int width);
   1498 void ARGBAddRow_Any_SSE2(const uint8* src_argb, const uint8* src_argb1,
   1499                          uint8* dst_argb, int width);
   1500 void ARGBAddRow_AVX2(const uint8* src_argb, const uint8* src_argb1,
   1501                      uint8* dst_argb, int width);
   1502 void ARGBAddRow_Any_AVX2(const uint8* src_argb, const uint8* src_argb1,
   1503                          uint8* dst_argb, int width);
   1504 void ARGBAddRow_NEON(const uint8* src_argb, const uint8* src_argb1,
   1505                      uint8* dst_argb, int width);
   1506 void ARGBAddRow_Any_NEON(const uint8* src_argb, const uint8* src_argb1,
   1507                          uint8* dst_argb, int width);
   1508 
   1509 // ARGB subtract images. Same API as Blend, but these require
   1510 // pointer and width alignment for SSE2.
   1511 void ARGBSubtractRow_C(const uint8* src_argb, const uint8* src_argb1,
   1512                        uint8* dst_argb, int width);
   1513 void ARGBSubtractRow_SSE2(const uint8* src_argb, const uint8* src_argb1,
   1514                           uint8* dst_argb, int width);
   1515 void ARGBSubtractRow_Any_SSE2(const uint8* src_argb, const uint8* src_argb1,
   1516                               uint8* dst_argb, int width);
   1517 void ARGBSubtractRow_AVX2(const uint8* src_argb, const uint8* src_argb1,
   1518                           uint8* dst_argb, int width);
   1519 void ARGBSubtractRow_Any_AVX2(const uint8* src_argb, const uint8* src_argb1,
   1520                               uint8* dst_argb, int width);
   1521 void ARGBSubtractRow_NEON(const uint8* src_argb, const uint8* src_argb1,
   1522                           uint8* dst_argb, int width);
   1523 void ARGBSubtractRow_Any_NEON(const uint8* src_argb, const uint8* src_argb1,
   1524                               uint8* dst_argb, int width);
   1525 
   1526 void ARGBToRGB24Row_Any_SSSE3(const uint8* src_argb, uint8* dst_rgb, int width);
   1527 void ARGBToRAWRow_Any_SSSE3(const uint8* src_argb, uint8* dst_rgb, int width);
   1528 void ARGBToRGB565Row_Any_SSE2(const uint8* src_argb, uint8* dst_rgb, int width);
   1529 void ARGBToARGB1555Row_Any_SSE2(const uint8* src_argb, uint8* dst_rgb,
   1530                                 int width);
   1531 void ARGBToARGB4444Row_Any_SSE2(const uint8* src_argb, uint8* dst_rgb,
   1532                                 int width);
   1533 
   1534 void ARGBToRGB565DitherRow_Any_SSE2(const uint8* src_argb, uint8* dst_rgb,
   1535                                     const uint32 dither4, int width);
   1536 void ARGBToRGB565DitherRow_Any_AVX2(const uint8* src_argb, uint8* dst_rgb,
   1537                                     const uint32 dither4, int width);
   1538 
   1539 void ARGBToRGB565Row_Any_AVX2(const uint8* src_argb, uint8* dst_rgb, int width);
   1540 void ARGBToARGB1555Row_Any_AVX2(const uint8* src_argb, uint8* dst_rgb,
   1541                                 int width);
   1542 void ARGBToARGB4444Row_Any_AVX2(const uint8* src_argb, uint8* dst_rgb,
   1543                                 int width);
   1544 
   1545 void ARGBToRGB24Row_Any_NEON(const uint8* src_argb, uint8* dst_rgb, int width);
   1546 void ARGBToRAWRow_Any_NEON(const uint8* src_argb, uint8* dst_rgb, int width);
   1547 void ARGBToRGB565Row_Any_NEON(const uint8* src_argb, uint8* dst_rgb, int width);
   1548 void ARGBToARGB1555Row_Any_NEON(const uint8* src_argb, uint8* dst_rgb,
   1549                                 int width);
   1550 void ARGBToARGB4444Row_Any_NEON(const uint8* src_argb, uint8* dst_rgb,
   1551                                 int width);
   1552 void ARGBToRGB565DitherRow_Any_NEON(const uint8* src_argb, uint8* dst_rgb,
   1553                                     const uint32 dither4, int width);
   1554 
   1555 void I444ToARGBRow_Any_NEON(const uint8* src_y,
   1556                             const uint8* src_u,
   1557                             const uint8* src_v,
   1558                             uint8* dst_argb,
   1559                             const struct YuvConstants* yuvconstants,
   1560                             int width);
   1561 void I422ToARGBRow_Any_NEON(const uint8* src_y,
   1562                             const uint8* src_u,
   1563                             const uint8* src_v,
   1564                             uint8* dst_argb,
   1565                             const struct YuvConstants* yuvconstants,
   1566                             int width);
   1567 void I422AlphaToARGBRow_Any_NEON(const uint8* src_y,
   1568                                  const uint8* src_u,
   1569                                  const uint8* src_v,
   1570                                  const uint8* src_a,
   1571                                  uint8* dst_argb,
   1572                                  const struct YuvConstants* yuvconstants,
   1573                                  int width);
   1574 void I411ToARGBRow_Any_NEON(const uint8* src_y,
   1575                             const uint8* src_u,
   1576                             const uint8* src_v,
   1577                             uint8* dst_argb,
   1578                             const struct YuvConstants* yuvconstants,
   1579                             int width);
   1580 void I422ToRGBARow_Any_NEON(const uint8* src_y,
   1581                             const uint8* src_u,
   1582                             const uint8* src_v,
   1583                             uint8* dst_argb,
   1584                             const struct YuvConstants* yuvconstants,
   1585                             int width);
   1586 void I422ToRGB24Row_Any_NEON(const uint8* src_y,
   1587                              const uint8* src_u,
   1588                              const uint8* src_v,
   1589                              uint8* dst_argb,
   1590                              const struct YuvConstants* yuvconstants,
   1591                              int width);
   1592 void I422ToARGB4444Row_Any_NEON(const uint8* src_y,
   1593                                 const uint8* src_u,
   1594                                 const uint8* src_v,
   1595                                 uint8* dst_argb,
   1596                                 const struct YuvConstants* yuvconstants,
   1597                                 int width);
   1598 void I422ToARGB1555Row_Any_NEON(const uint8* src_y,
   1599                                 const uint8* src_u,
   1600                                 const uint8* src_v,
   1601                                 uint8* dst_argb,
   1602                                 const struct YuvConstants* yuvconstants,
   1603                                 int width);
   1604 void I422ToRGB565Row_Any_NEON(const uint8* src_y,
   1605                               const uint8* src_u,
   1606                               const uint8* src_v,
   1607                               uint8* dst_argb,
   1608                               const struct YuvConstants* yuvconstants,
   1609                               int width);
   1610 void NV12ToARGBRow_Any_NEON(const uint8* src_y,
   1611                             const uint8* src_uv,
   1612                             uint8* dst_argb,
   1613                             const struct YuvConstants* yuvconstants,
   1614                             int width);
   1615 void NV21ToARGBRow_Any_NEON(const uint8* src_y,
   1616                             const uint8* src_vu,
   1617                             uint8* dst_argb,
   1618                             const struct YuvConstants* yuvconstants,
   1619                             int width);
   1620 void NV12ToRGB565Row_Any_NEON(const uint8* src_y,
   1621                               const uint8* src_uv,
   1622                               uint8* dst_argb,
   1623                               const struct YuvConstants* yuvconstants,
   1624                               int width);
   1625 void YUY2ToARGBRow_Any_NEON(const uint8* src_yuy2,
   1626                             uint8* dst_argb,
   1627                             const struct YuvConstants* yuvconstants,
   1628                             int width);
   1629 void UYVYToARGBRow_Any_NEON(const uint8* src_uyvy,
   1630                             uint8* dst_argb,
   1631                             const struct YuvConstants* yuvconstants,
   1632                             int width);
   1633 void I422ToARGBRow_DSPR2(const uint8* src_y,
   1634                          const uint8* src_u,
   1635                          const uint8* src_v,
   1636                          uint8* dst_argb,
   1637                          const struct YuvConstants* yuvconstants,
   1638                          int width);
   1639 void I422ToARGBRow_DSPR2(const uint8* src_y,
   1640                          const uint8* src_u,
   1641                          const uint8* src_v,
   1642                          uint8* dst_argb,
   1643                          const struct YuvConstants* yuvconstants,
   1644                          int width);
   1645 
   1646 void YUY2ToYRow_AVX2(const uint8* src_yuy2, uint8* dst_y, int width);
   1647 void YUY2ToUVRow_AVX2(const uint8* src_yuy2, int stride_yuy2,
   1648                       uint8* dst_u, uint8* dst_v, int width);
   1649 void YUY2ToUV422Row_AVX2(const uint8* src_yuy2,
   1650                          uint8* dst_u, uint8* dst_v, int width);
   1651 void YUY2ToYRow_SSE2(const uint8* src_yuy2, uint8* dst_y, int width);
   1652 void YUY2ToUVRow_SSE2(const uint8* src_yuy2, int stride_yuy2,
   1653                       uint8* dst_u, uint8* dst_v, int width);
   1654 void YUY2ToUV422Row_SSE2(const uint8* src_yuy2,
   1655                          uint8* dst_u, uint8* dst_v, int width);
   1656 void YUY2ToYRow_NEON(const uint8* src_yuy2, uint8* dst_y, int width);
   1657 void YUY2ToUVRow_NEON(const uint8* src_yuy2, int stride_yuy2,
   1658                       uint8* dst_u, uint8* dst_v, int width);
   1659 void YUY2ToUV422Row_NEON(const uint8* src_yuy2,
   1660                          uint8* dst_u, uint8* dst_v, int width);
   1661 void YUY2ToYRow_C(const uint8* src_yuy2, uint8* dst_y, int width);
   1662 void YUY2ToUVRow_C(const uint8* src_yuy2, int stride_yuy2,
   1663                    uint8* dst_u, uint8* dst_v, int width);
   1664 void YUY2ToUV422Row_C(const uint8* src_yuy2,
   1665                       uint8* dst_u, uint8* dst_v, int width);
   1666 void YUY2ToYRow_Any_AVX2(const uint8* src_yuy2, uint8* dst_y, int width);
   1667 void YUY2ToUVRow_Any_AVX2(const uint8* src_yuy2, int stride_yuy2,
   1668                           uint8* dst_u, uint8* dst_v, int width);
   1669 void YUY2ToUV422Row_Any_AVX2(const uint8* src_yuy2,
   1670                              uint8* dst_u, uint8* dst_v, int width);
   1671 void YUY2ToYRow_Any_SSE2(const uint8* src_yuy2, uint8* dst_y, int width);
   1672 void YUY2ToUVRow_Any_SSE2(const uint8* src_yuy2, int stride_yuy2,
   1673                           uint8* dst_u, uint8* dst_v, int width);
   1674 void YUY2ToUV422Row_Any_SSE2(const uint8* src_yuy2,
   1675                              uint8* dst_u, uint8* dst_v, int width);
   1676 void YUY2ToYRow_Any_NEON(const uint8* src_yuy2, uint8* dst_y, int width);
   1677 void YUY2ToUVRow_Any_NEON(const uint8* src_yuy2, int stride_yuy2,
   1678                           uint8* dst_u, uint8* dst_v, int width);
   1679 void YUY2ToUV422Row_Any_NEON(const uint8* src_yuy2,
   1680                              uint8* dst_u, uint8* dst_v, int width);
   1681 void UYVYToYRow_AVX2(const uint8* src_uyvy, uint8* dst_y, int width);
   1682 void UYVYToUVRow_AVX2(const uint8* src_uyvy, int stride_uyvy,
   1683                       uint8* dst_u, uint8* dst_v, int width);
   1684 void UYVYToUV422Row_AVX2(const uint8* src_uyvy,
   1685                          uint8* dst_u, uint8* dst_v, int width);
   1686 void UYVYToYRow_SSE2(const uint8* src_uyvy, uint8* dst_y, int width);
   1687 void UYVYToUVRow_SSE2(const uint8* src_uyvy, int stride_uyvy,
   1688                       uint8* dst_u, uint8* dst_v, int width);
   1689 void UYVYToUV422Row_SSE2(const uint8* src_uyvy,
   1690                          uint8* dst_u, uint8* dst_v, int width);
   1691 void UYVYToYRow_AVX2(const uint8* src_uyvy, uint8* dst_y, int width);
   1692 void UYVYToUVRow_AVX2(const uint8* src_uyvy, int stride_uyvy,
   1693                       uint8* dst_u, uint8* dst_v, int width);
   1694 void UYVYToUV422Row_AVX2(const uint8* src_uyvy,
   1695                          uint8* dst_u, uint8* dst_v, int width);
   1696 void UYVYToYRow_NEON(const uint8* src_uyvy, uint8* dst_y, int width);
   1697 void UYVYToUVRow_NEON(const uint8* src_uyvy, int stride_uyvy,
   1698                       uint8* dst_u, uint8* dst_v, int width);
   1699 void UYVYToUV422Row_NEON(const uint8* src_uyvy,
   1700                          uint8* dst_u, uint8* dst_v, int width);
   1701 
   1702 void UYVYToYRow_C(const uint8* src_uyvy, uint8* dst_y, int width);
   1703 void UYVYToUVRow_C(const uint8* src_uyvy, int stride_uyvy,
   1704                    uint8* dst_u, uint8* dst_v, int width);
   1705 void UYVYToUV422Row_C(const uint8* src_uyvy,
   1706                       uint8* dst_u, uint8* dst_v, int width);
   1707 void UYVYToYRow_Any_AVX2(const uint8* src_uyvy, uint8* dst_y, int width);
   1708 void UYVYToUVRow_Any_AVX2(const uint8* src_uyvy, int stride_uyvy,
   1709                           uint8* dst_u, uint8* dst_v, int width);
   1710 void UYVYToUV422Row_Any_AVX2(const uint8* src_uyvy,
   1711                              uint8* dst_u, uint8* dst_v, int width);
   1712 void UYVYToYRow_Any_SSE2(const uint8* src_uyvy, uint8* dst_y, int width);
   1713 void UYVYToUVRow_Any_SSE2(const uint8* src_uyvy, int stride_uyvy,
   1714                           uint8* dst_u, uint8* dst_v, int width);
   1715 void UYVYToUV422Row_Any_SSE2(const uint8* src_uyvy,
   1716                              uint8* dst_u, uint8* dst_v, int width);
   1717 void UYVYToYRow_Any_NEON(const uint8* src_uyvy, uint8* dst_y, int width);
   1718 void UYVYToUVRow_Any_NEON(const uint8* src_uyvy, int stride_uyvy,
   1719                           uint8* dst_u, uint8* dst_v, int width);
   1720 void UYVYToUV422Row_Any_NEON(const uint8* src_uyvy,
   1721                              uint8* dst_u, uint8* dst_v, int width);
   1722 
   1723 void I422ToYUY2Row_C(const uint8* src_y,
   1724                      const uint8* src_u,
   1725                      const uint8* src_v,
   1726                      uint8* dst_yuy2, int width);
   1727 void I422ToUYVYRow_C(const uint8* src_y,
   1728                      const uint8* src_u,
   1729                      const uint8* src_v,
   1730                      uint8* dst_uyvy, int width);
   1731 void I422ToYUY2Row_SSE2(const uint8* src_y,
   1732                         const uint8* src_u,
   1733                         const uint8* src_v,
   1734                         uint8* dst_yuy2, int width);
   1735 void I422ToUYVYRow_SSE2(const uint8* src_y,
   1736                         const uint8* src_u,
   1737                         const uint8* src_v,
   1738                         uint8* dst_uyvy, int width);
   1739 void I422ToYUY2Row_Any_SSE2(const uint8* src_y,
   1740                             const uint8* src_u,
   1741                             const uint8* src_v,
   1742                             uint8* dst_yuy2, int width);
   1743 void I422ToUYVYRow_Any_SSE2(const uint8* src_y,
   1744                             const uint8* src_u,
   1745                             const uint8* src_v,
   1746                             uint8* dst_uyvy, int width);
   1747 void I422ToYUY2Row_NEON(const uint8* src_y,
   1748                         const uint8* src_u,
   1749                         const uint8* src_v,
   1750                         uint8* dst_yuy2, int width);
   1751 void I422ToUYVYRow_NEON(const uint8* src_y,
   1752                         const uint8* src_u,
   1753                         const uint8* src_v,
   1754                         uint8* dst_uyvy, int width);
   1755 void I422ToYUY2Row_Any_NEON(const uint8* src_y,
   1756                             const uint8* src_u,
   1757                             const uint8* src_v,
   1758                             uint8* dst_yuy2, int width);
   1759 void I422ToUYVYRow_Any_NEON(const uint8* src_y,
   1760                             const uint8* src_u,
   1761                             const uint8* src_v,
   1762                             uint8* dst_uyvy, int width);
   1763 
   1764 // Effects related row functions.
   1765 void ARGBAttenuateRow_C(const uint8* src_argb, uint8* dst_argb, int width);
   1766 void ARGBAttenuateRow_SSSE3(const uint8* src_argb, uint8* dst_argb, int width);
   1767 void ARGBAttenuateRow_AVX2(const uint8* src_argb, uint8* dst_argb, int width);
   1768 void ARGBAttenuateRow_NEON(const uint8* src_argb, uint8* dst_argb, int width);
   1769 void ARGBAttenuateRow_Any_SSE2(const uint8* src_argb, uint8* dst_argb,
   1770                                int width);
   1771 void ARGBAttenuateRow_Any_SSSE3(const uint8* src_argb, uint8* dst_argb,
   1772                                 int width);
   1773 void ARGBAttenuateRow_Any_AVX2(const uint8* src_argb, uint8* dst_argb,
   1774                                int width);
   1775 void ARGBAttenuateRow_Any_NEON(const uint8* src_argb, uint8* dst_argb,
   1776                                int width);
   1777 
   1778 // Inverse table for unattenuate, shared by C and SSE2.
   1779 extern const uint32 fixed_invtbl8[256];
   1780 void ARGBUnattenuateRow_C(const uint8* src_argb, uint8* dst_argb, int width);
   1781 void ARGBUnattenuateRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width);
   1782 void ARGBUnattenuateRow_AVX2(const uint8* src_argb, uint8* dst_argb, int width);
   1783 void ARGBUnattenuateRow_Any_SSE2(const uint8* src_argb, uint8* dst_argb,
   1784                                  int width);
   1785 void ARGBUnattenuateRow_Any_AVX2(const uint8* src_argb, uint8* dst_argb,
   1786                                  int width);
   1787 
   1788 void ARGBGrayRow_C(const uint8* src_argb, uint8* dst_argb, int width);
   1789 void ARGBGrayRow_SSSE3(const uint8* src_argb, uint8* dst_argb, int width);
   1790 void ARGBGrayRow_NEON(const uint8* src_argb, uint8* dst_argb, int width);
   1791 
   1792 void ARGBSepiaRow_C(uint8* dst_argb, int width);
   1793 void ARGBSepiaRow_SSSE3(uint8* dst_argb, int width);
   1794 void ARGBSepiaRow_NEON(uint8* dst_argb, int width);
   1795 
   1796 void ARGBColorMatrixRow_C(const uint8* src_argb, uint8* dst_argb,
   1797                           const int8* matrix_argb, int width);
   1798 void ARGBColorMatrixRow_SSSE3(const uint8* src_argb, uint8* dst_argb,
   1799                               const int8* matrix_argb, int width);
   1800 void ARGBColorMatrixRow_NEON(const uint8* src_argb, uint8* dst_argb,
   1801                              const int8* matrix_argb, int width);
   1802 
   1803 void ARGBColorTableRow_C(uint8* dst_argb, const uint8* table_argb, int width);
   1804 void ARGBColorTableRow_X86(uint8* dst_argb, const uint8* table_argb, int width);
   1805 
   1806 void RGBColorTableRow_C(uint8* dst_argb, const uint8* table_argb, int width);
   1807 void RGBColorTableRow_X86(uint8* dst_argb, const uint8* table_argb, int width);
   1808 
   1809 void ARGBQuantizeRow_C(uint8* dst_argb, int scale, int interval_size,
   1810                        int interval_offset, int width);
   1811 void ARGBQuantizeRow_SSE2(uint8* dst_argb, int scale, int interval_size,
   1812                           int interval_offset, int width);
   1813 void ARGBQuantizeRow_NEON(uint8* dst_argb, int scale, int interval_size,
   1814                           int interval_offset, int width);
   1815 
   1816 void ARGBShadeRow_C(const uint8* src_argb, uint8* dst_argb, int width,
   1817                     uint32 value);
   1818 void ARGBShadeRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width,
   1819                        uint32 value);
   1820 void ARGBShadeRow_NEON(const uint8* src_argb, uint8* dst_argb, int width,
   1821                        uint32 value);
   1822 
   1823 // Used for blur.
   1824 void CumulativeSumToAverageRow_SSE2(const int32* topleft, const int32* botleft,
   1825                                     int width, int area, uint8* dst, int count);
   1826 void ComputeCumulativeSumRow_SSE2(const uint8* row, int32* cumsum,
   1827                                   const int32* previous_cumsum, int width);
   1828 
   1829 void CumulativeSumToAverageRow_C(const int32* topleft, const int32* botleft,
   1830                                  int width, int area, uint8* dst, int count);
   1831 void ComputeCumulativeSumRow_C(const uint8* row, int32* cumsum,
   1832                                const int32* previous_cumsum, int width);
   1833 
   1834 LIBYUV_API
   1835 void ARGBAffineRow_C(const uint8* src_argb, int src_argb_stride,
   1836                      uint8* dst_argb, const float* uv_dudv, int width);
   1837 LIBYUV_API
   1838 void ARGBAffineRow_SSE2(const uint8* src_argb, int src_argb_stride,
   1839                         uint8* dst_argb, const float* uv_dudv, int width);
   1840 
   1841 // Used for I420Scale, ARGBScale, and ARGBInterpolate.
   1842 void InterpolateRow_C(uint8* dst_ptr, const uint8* src_ptr,
   1843                       ptrdiff_t src_stride_ptr,
   1844                       int width, int source_y_fraction);
   1845 void InterpolateRow_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
   1846                           ptrdiff_t src_stride_ptr, int width,
   1847                           int source_y_fraction);
   1848 void InterpolateRow_AVX2(uint8* dst_ptr, const uint8* src_ptr,
   1849                          ptrdiff_t src_stride_ptr, int width,
   1850                          int source_y_fraction);
   1851 void InterpolateRow_NEON(uint8* dst_ptr, const uint8* src_ptr,
   1852                          ptrdiff_t src_stride_ptr, int width,
   1853                          int source_y_fraction);
   1854 void InterpolateRow_DSPR2(uint8* dst_ptr, const uint8* src_ptr,
   1855                           ptrdiff_t src_stride_ptr, int width,
   1856                           int source_y_fraction);
   1857 void InterpolateRow_Any_NEON(uint8* dst_ptr, const uint8* src_ptr,
   1858                              ptrdiff_t src_stride_ptr, int width,
   1859                              int source_y_fraction);
   1860 void InterpolateRow_Any_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
   1861                               ptrdiff_t src_stride_ptr, int width,
   1862                               int source_y_fraction);
   1863 void InterpolateRow_Any_AVX2(uint8* dst_ptr, const uint8* src_ptr,
   1864                              ptrdiff_t src_stride_ptr, int width,
   1865                              int source_y_fraction);
   1866 void InterpolateRow_Any_DSPR2(uint8* dst_ptr, const uint8* src_ptr,
   1867                               ptrdiff_t src_stride_ptr, int width,
   1868                               int source_y_fraction);
   1869 
   1870 void InterpolateRow_16_C(uint16* dst_ptr, const uint16* src_ptr,
   1871                          ptrdiff_t src_stride_ptr,
   1872                          int width, int source_y_fraction);
   1873 
   1874 // Sobel images.
   1875 void SobelXRow_C(const uint8* src_y0, const uint8* src_y1, const uint8* src_y2,
   1876                  uint8* dst_sobelx, int width);
   1877 void SobelXRow_SSE2(const uint8* src_y0, const uint8* src_y1,
   1878                     const uint8* src_y2, uint8* dst_sobelx, int width);
   1879 void SobelXRow_NEON(const uint8* src_y0, const uint8* src_y1,
   1880                     const uint8* src_y2, uint8* dst_sobelx, int width);
   1881 void SobelYRow_C(const uint8* src_y0, const uint8* src_y1,
   1882                  uint8* dst_sobely, int width);
   1883 void SobelYRow_SSE2(const uint8* src_y0, const uint8* src_y1,
   1884                     uint8* dst_sobely, int width);
   1885 void SobelYRow_NEON(const uint8* src_y0, const uint8* src_y1,
   1886                     uint8* dst_sobely, int width);
   1887 void SobelRow_C(const uint8* src_sobelx, const uint8* src_sobely,
   1888                 uint8* dst_argb, int width);
   1889 void SobelRow_SSE2(const uint8* src_sobelx, const uint8* src_sobely,
   1890                    uint8* dst_argb, int width);
   1891 void SobelRow_NEON(const uint8* src_sobelx, const uint8* src_sobely,
   1892                    uint8* dst_argb, int width);
   1893 void SobelToPlaneRow_C(const uint8* src_sobelx, const uint8* src_sobely,
   1894                        uint8* dst_y, int width);
   1895 void SobelToPlaneRow_SSE2(const uint8* src_sobelx, const uint8* src_sobely,
   1896                           uint8* dst_y, int width);
   1897 void SobelToPlaneRow_NEON(const uint8* src_sobelx, const uint8* src_sobely,
   1898                           uint8* dst_y, int width);
   1899 void SobelXYRow_C(const uint8* src_sobelx, const uint8* src_sobely,
   1900                   uint8* dst_argb, int width);
   1901 void SobelXYRow_SSE2(const uint8* src_sobelx, const uint8* src_sobely,
   1902                      uint8* dst_argb, int width);
   1903 void SobelXYRow_NEON(const uint8* src_sobelx, const uint8* src_sobely,
   1904                      uint8* dst_argb, int width);
   1905 void SobelRow_Any_SSE2(const uint8* src_sobelx, const uint8* src_sobely,
   1906                        uint8* dst_argb, int width);
   1907 void SobelRow_Any_NEON(const uint8* src_sobelx, const uint8* src_sobely,
   1908                        uint8* dst_argb, int width);
   1909 void SobelToPlaneRow_Any_SSE2(const uint8* src_sobelx, const uint8* src_sobely,
   1910                               uint8* dst_y, int width);
   1911 void SobelToPlaneRow_Any_NEON(const uint8* src_sobelx, const uint8* src_sobely,
   1912                               uint8* dst_y, int width);
   1913 void SobelXYRow_Any_SSE2(const uint8* src_sobelx, const uint8* src_sobely,
   1914                          uint8* dst_argb, int width);
   1915 void SobelXYRow_Any_NEON(const uint8* src_sobelx, const uint8* src_sobely,
   1916                          uint8* dst_argb, int width);
   1917 
   1918 void ARGBPolynomialRow_C(const uint8* src_argb,
   1919                          uint8* dst_argb, const float* poly,
   1920                          int width);
   1921 void ARGBPolynomialRow_SSE2(const uint8* src_argb,
   1922                             uint8* dst_argb, const float* poly,
   1923                             int width);
   1924 void ARGBPolynomialRow_AVX2(const uint8* src_argb,
   1925                             uint8* dst_argb, const float* poly,
   1926                             int width);
   1927 
   1928 void ARGBLumaColorTableRow_C(const uint8* src_argb, uint8* dst_argb, int width,
   1929                              const uint8* luma, uint32 lumacoeff);
   1930 void ARGBLumaColorTableRow_SSSE3(const uint8* src_argb, uint8* dst_argb,
   1931                                  int width,
   1932                                  const uint8* luma, uint32 lumacoeff);
   1933 
   1934 #ifdef __cplusplus
   1935 }  // extern "C"
   1936 }  // namespace libyuv
   1937 #endif
   1938 
   1939 #endif  // INCLUDE_LIBYUV_ROW_H_  NOLINT
   1940