Home | History | Annotate | Download | only in libyuv
      1 /*
      2  *  Copyright 2011 The LibYuv Project Authors. All rights reserved.
      3  *
      4  *  Use of this source code is governed by a BSD-style license
      5  *  that can be found in the LICENSE file in the root of the source
      6  *  tree. An additional intellectual property rights grant can be found
      7  *  in the file PATENTS. All contributing project authors may
      8  *  be found in the AUTHORS file in the root of the source tree.
      9  */
     10 
     11 #ifndef INCLUDE_LIBYUV_ROW_H_  // NOLINT
     12 #define INCLUDE_LIBYUV_ROW_H_
     13 
     14 #include <stdlib.h>  // For malloc.
     15 
     16 #include "libyuv/basic_types.h"
     17 
     18 #ifdef __cplusplus
     19 namespace libyuv {
     20 extern "C" {
     21 #endif
     22 
     23 #define IS_ALIGNED(p, a) (!((uintptr_t)(p) & ((a) - 1)))
     24 
     25 #ifdef __cplusplus
     26 #define align_buffer_64(var, size)                                             \
     27   uint8* var##_mem = reinterpret_cast<uint8*>(malloc((size) + 63));            \
     28   uint8* var = reinterpret_cast<uint8*>                                        \
     29       ((reinterpret_cast<intptr_t>(var##_mem) + 63) & ~63)
     30 #else
     31 #define align_buffer_64(var, size)                                             \
     32   uint8* var##_mem = (uint8*)(malloc((size) + 63));               /* NOLINT */ \
     33   uint8* var = (uint8*)(((intptr_t)(var##_mem) + 63) & ~63)       /* NOLINT */
     34 #endif
     35 
     36 #define free_aligned_buffer_64(var) \
     37   free(var##_mem);  \
     38   var = 0
     39 
     40 #if defined(__pnacl__) || defined(__CLR_VER) || \
     41     (defined(__i386__) && !defined(__SSE2__))
     42 #define LIBYUV_DISABLE_X86
     43 #endif
     44 // True if compiling for SSSE3 as a requirement.
     45 #if defined(__SSSE3__) || (defined(_M_IX86_FP) && (_M_IX86_FP >= 3))
     46 #define LIBYUV_SSSE3_ONLY
     47 #endif
     48 
     49 #if defined(__native_client__)
     50 #define LIBYUV_DISABLE_NEON
     51 #endif
     52 // clang >= 3.5.0 required for Arm64.
     53 #if defined(__clang__) && defined(__aarch64__) && !defined(LIBYUV_DISABLE_NEON)
     54 #if (__clang_major__ < 3) || (__clang_major__ == 3 && (__clang_minor__ < 5))
     55 #define LIBYUV_DISABLE_NEON
     56 #endif  // clang >= 3.5
     57 #endif  // __clang__
     58 
     59 // The following are available on all x86 platforms:
     60 #if !defined(LIBYUV_DISABLE_X86) && \
     61     (defined(_M_IX86) || defined(__x86_64__) || defined(__i386__))
     62 // Conversions:
     63 #define HAS_ABGRTOUVROW_SSSE3
     64 #define HAS_ABGRTOYROW_SSSE3
     65 #define HAS_ARGB1555TOARGBROW_SSE2
     66 #define HAS_ARGB4444TOARGBROW_SSE2
     67 #define HAS_ARGBSETROW_X86
     68 #define HAS_ARGBSHUFFLEROW_SSE2
     69 #define HAS_ARGBSHUFFLEROW_SSSE3
     70 #define HAS_ARGBTOARGB1555ROW_SSE2
     71 #define HAS_ARGBTOARGB4444ROW_SSE2
     72 #define HAS_ARGBTORAWROW_SSSE3
     73 #define HAS_ARGBTORGB24ROW_SSSE3
     74 #define HAS_ARGBTORGB565ROW_SSE2
     75 #define HAS_ARGBTOUV422ROW_SSSE3
     76 #define HAS_ARGBTOUV444ROW_SSSE3
     77 #define HAS_ARGBTOUVJROW_SSSE3
     78 #define HAS_ARGBTOUVROW_SSSE3
     79 #define HAS_ARGBTOYJROW_SSSE3
     80 #define HAS_ARGBTOYROW_SSSE3
     81 #define HAS_BGRATOUVROW_SSSE3
     82 #define HAS_BGRATOYROW_SSSE3
     83 #define HAS_COPYROW_ERMS
     84 #define HAS_COPYROW_SSE2
     85 #define HAS_I400TOARGBROW_SSE2
     86 #define HAS_I411TOARGBROW_SSSE3
     87 #define HAS_I422TOABGRROW_SSSE3
     88 #define HAS_I422TOARGB1555ROW_SSSE3
     89 #define HAS_I422TOARGB4444ROW_SSSE3
     90 #define HAS_I422TOARGBROW_SSSE3
     91 #define HAS_I422TOBGRAROW_SSSE3
     92 #define HAS_I422TORAWROW_SSSE3
     93 #define HAS_I422TORGB24ROW_SSSE3
     94 #define HAS_I422TORGB565ROW_SSSE3
     95 #define HAS_I422TORGBAROW_SSSE3
     96 #define HAS_I422TOUYVYROW_SSE2
     97 #define HAS_I422TOYUY2ROW_SSE2
     98 #define HAS_I444TOARGBROW_SSSE3
     99 #define HAS_J400TOARGBROW_SSE2
    100 #define HAS_J422TOARGBROW_SSSE3
    101 #define HAS_MERGEUVROW_SSE2
    102 #define HAS_MIRRORROW_SSE2
    103 #define HAS_MIRRORROW_SSSE3
    104 #define HAS_MIRRORROW_UV_SSSE3
    105 #define HAS_MIRRORUVROW_SSSE3
    106 #define HAS_NV12TOARGBROW_SSSE3
    107 #define HAS_NV12TORGB565ROW_SSSE3
    108 #define HAS_NV21TOARGBROW_SSSE3
    109 #define HAS_NV21TORGB565ROW_SSSE3
    110 #define HAS_RAWTOARGBROW_SSSE3
    111 #define HAS_RAWTOYROW_SSSE3
    112 #define HAS_RGB24TOARGBROW_SSSE3
    113 #define HAS_RGB24TOYROW_SSSE3
    114 #define HAS_RGB565TOARGBROW_SSE2
    115 #define HAS_RGBATOUVROW_SSSE3
    116 #define HAS_RGBATOYROW_SSSE3
    117 #define HAS_SETROW_ERMS
    118 #define HAS_SETROW_X86
    119 #define HAS_SPLITUVROW_SSE2
    120 #define HAS_UYVYTOARGBROW_SSSE3
    121 #define HAS_UYVYTOUV422ROW_SSE2
    122 #define HAS_UYVYTOUVROW_SSE2
    123 #define HAS_UYVYTOYROW_SSE2
    124 #define HAS_YUY2TOARGBROW_SSSE3
    125 #define HAS_YUY2TOUV422ROW_SSE2
    126 #define HAS_YUY2TOUVROW_SSE2
    127 #define HAS_YUY2TOYROW_SSE2
    128 
    129 // Effects:
    130 #define HAS_ARGBADDROW_SSE2
    131 #define HAS_ARGBAFFINEROW_SSE2
    132 #define HAS_ARGBATTENUATEROW_SSSE3
    133 #define HAS_ARGBBLENDROW_SSSE3
    134 #define HAS_ARGBCOLORMATRIXROW_SSSE3
    135 #define HAS_ARGBCOLORTABLEROW_X86
    136 #define HAS_ARGBCOPYALPHAROW_SSE2
    137 #define HAS_ARGBCOPYYTOALPHAROW_SSE2
    138 #define HAS_ARGBGRAYROW_SSSE3
    139 #define HAS_ARGBLUMACOLORTABLEROW_SSSE3
    140 #define HAS_ARGBMIRRORROW_SSE2
    141 #define HAS_ARGBMULTIPLYROW_SSE2
    142 #define HAS_ARGBPOLYNOMIALROW_SSE2
    143 #define HAS_ARGBQUANTIZEROW_SSE2
    144 #define HAS_ARGBSEPIAROW_SSSE3
    145 #define HAS_ARGBSHADEROW_SSE2
    146 #define HAS_ARGBSUBTRACTROW_SSE2
    147 #define HAS_ARGBUNATTENUATEROW_SSE2
    148 #define HAS_COMPUTECUMULATIVESUMROW_SSE2
    149 #define HAS_CUMULATIVESUMTOAVERAGEROW_SSE2
    150 #define HAS_INTERPOLATEROW_SSE2
    151 #define HAS_INTERPOLATEROW_SSSE3
    152 #define HAS_RGBCOLORTABLEROW_X86
    153 #define HAS_SOBELROW_SSE2
    154 #define HAS_SOBELTOPLANEROW_SSE2
    155 #define HAS_SOBELXROW_SSE2
    156 #define HAS_SOBELXYROW_SSE2
    157 #define HAS_SOBELYROW_SSE2
    158 #endif
    159 
    160 // The following are available on x64 Visual C and clangcl.
    161 #if !defined(LIBYUV_DISABLE_X86) && defined (_M_X64) && \
    162     (!defined(__clang__) || defined(__SSSE3__))
    163 #define HAS_I422TOARGBROW_SSSE3
    164 #endif
    165 
    166 // GCC >= 4.7.0 required for AVX2.
    167 #if defined(__GNUC__) && (defined(__x86_64__) || defined(__i386__))
    168 #if (__GNUC__ > 4) || (__GNUC__ == 4 && (__GNUC_MINOR__ >= 7))
    169 #define GCC_HAS_AVX2 1
    170 #endif  // GNUC >= 4.7
    171 #endif  // __GNUC__
    172 
    173 // clang >= 3.4.0 required for AVX2.
    174 #if defined(__clang__) && (defined(__x86_64__) || defined(__i386__))
    175 #if (__clang_major__ > 3) || (__clang_major__ == 3 && (__clang_minor__ >= 4))
    176 #define CLANG_HAS_AVX2 1
    177 #endif  // clang >= 3.4
    178 #endif  // __clang__
    179 
    180 // Visual C 2012 required for AVX2.
    181 #if defined(_M_IX86) && !defined(__clang__) && \
    182     defined(_MSC_VER) && _MSC_VER >= 1700
    183 #define VISUALC_HAS_AVX2 1
    184 #endif  // VisualStudio >= 2012
    185 
    186 // The following are available require VS2012.  Port to GCC.
    187 #if !defined(LIBYUV_DISABLE_X86) && defined(VISUALC_HAS_AVX2)
    188 #define HAS_ARGB1555TOARGBROW_AVX2
    189 #define HAS_ARGB4444TOARGBROW_AVX2
    190 #define HAS_ARGBTOARGB1555ROW_AVX2
    191 #define HAS_ARGBTOARGB4444ROW_AVX2
    192 #define HAS_ARGBTORGB565DITHERROW_AVX2
    193 #define HAS_ARGBTORGB565DITHERROW_SSE2
    194 #define HAS_ARGBTORGB565ROW_AVX2
    195 #define HAS_I411TOARGBROW_AVX2
    196 #define HAS_I422TOARGB1555ROW_AVX2
    197 #define HAS_I422TOARGB4444ROW_AVX2
    198 #define HAS_I422TORGB565ROW_AVX2
    199 #define HAS_I444TOARGBROW_AVX2
    200 #define HAS_J400TOARGBROW_AVX2
    201 #define HAS_NV12TOARGBROW_AVX2
    202 #define HAS_NV12TORGB565ROW_AVX2
    203 #define HAS_NV21TOARGBROW_AVX2
    204 #define HAS_NV21TORGB565ROW_AVX2
    205 #define HAS_RGB565TOARGBROW_AVX2
    206 #endif
    207 
    208 // The following are available on all x86 platforms, but
    209 // require VS2012, clang 3.4 or gcc 4.7.
    210 // The code supports NaCL but requires a new compiler and validator.
    211 #if !defined(LIBYUV_DISABLE_X86) && (defined(VISUALC_HAS_AVX2) || \
    212     defined(CLANG_HAS_AVX2) || defined(GCC_HAS_AVX2))
    213 #define HAS_ARGBCOPYALPHAROW_AVX2
    214 #define HAS_ARGBCOPYYTOALPHAROW_AVX2
    215 #define HAS_ARGBMIRRORROW_AVX2
    216 #define HAS_ARGBPOLYNOMIALROW_AVX2
    217 #define HAS_ARGBSHUFFLEROW_AVX2
    218 #define HAS_ARGBTOUVROW_AVX2
    219 #define HAS_ARGBTOYJROW_AVX2
    220 #define HAS_ARGBTOYROW_AVX2
    221 #define HAS_COPYROW_AVX
    222 #define HAS_I400TOARGBROW_AVX2
    223 #define HAS_I422TOABGRROW_AVX2
    224 #define HAS_I422TOARGBROW_AVX2
    225 #define HAS_I422TOBGRAROW_AVX2
    226 #define HAS_I422TORAWROW_AVX2
    227 #define HAS_I422TORGB24ROW_AVX2
    228 #define HAS_I422TORGBAROW_AVX2
    229 #define HAS_INTERPOLATEROW_AVX2
    230 #define HAS_J422TOARGBROW_AVX2
    231 #define HAS_MERGEUVROW_AVX2
    232 #define HAS_MIRRORROW_AVX2
    233 #define HAS_SPLITUVROW_AVX2
    234 #define HAS_UYVYTOARGBROW_AVX2
    235 #define HAS_UYVYTOUV422ROW_AVX2
    236 #define HAS_UYVYTOUVROW_AVX2
    237 #define HAS_UYVYTOYROW_AVX2
    238 #define HAS_YUY2TOARGBROW_AVX2
    239 #define HAS_YUY2TOUV422ROW_AVX2
    240 #define HAS_YUY2TOUVROW_AVX2
    241 #define HAS_YUY2TOYROW_AVX2
    242 
    243 // Effects:
    244 #define HAS_ARGBADDROW_AVX2
    245 #define HAS_ARGBATTENUATEROW_AVX2
    246 #define HAS_ARGBMULTIPLYROW_AVX2
    247 #define HAS_ARGBSUBTRACTROW_AVX2
    248 #define HAS_ARGBUNATTENUATEROW_AVX2
    249 #endif
    250 
    251 // The following are disabled when SSSE3 is available:
    252 #if !defined(LIBYUV_DISABLE_X86) && \
    253     (defined(_M_IX86) || defined(__x86_64__) || defined(__i386__)) && \
    254     !defined(LIBYUV_SSSE3_ONLY)
    255 #define HAS_ARGBATTENUATEROW_SSE2
    256 #define HAS_ARGBBLENDROW_SSE2
    257 #define HAS_MIRRORROW_SSE2
    258 #endif
    259 
    260 // The following are available on Neon platforms:
    261 #if !defined(LIBYUV_DISABLE_NEON) && \
    262     (defined(__aarch64__) || defined(__ARM_NEON__) || defined(LIBYUV_NEON))
    263 #define HAS_ABGRTOUVROW_NEON
    264 #define HAS_ABGRTOYROW_NEON
    265 #define HAS_ARGB1555TOARGBROW_NEON
    266 #define HAS_ARGB1555TOUVROW_NEON
    267 #define HAS_ARGB1555TOYROW_NEON
    268 #define HAS_ARGB4444TOARGBROW_NEON
    269 #define HAS_ARGB4444TOUVROW_NEON
    270 #define HAS_ARGB4444TOYROW_NEON
    271 #define HAS_ARGBTOARGB1555ROW_NEON
    272 #define HAS_ARGBTOARGB4444ROW_NEON
    273 #define HAS_ARGBTORAWROW_NEON
    274 #define HAS_ARGBTORGB24ROW_NEON
    275 #define HAS_ARGBTORGB565ROW_NEON
    276 #define HAS_ARGBTOUV411ROW_NEON
    277 #define HAS_ARGBTOUV422ROW_NEON
    278 #define HAS_ARGBTOUV444ROW_NEON
    279 #define HAS_ARGBTOUVJROW_NEON
    280 #define HAS_ARGBTOUVROW_NEON
    281 #define HAS_ARGBTOYJROW_NEON
    282 #define HAS_ARGBTOYROW_NEON
    283 #define HAS_BGRATOUVROW_NEON
    284 #define HAS_BGRATOYROW_NEON
    285 #define HAS_COPYROW_NEON
    286 #define HAS_J400TOARGBROW_NEON
    287 #define HAS_I411TOARGBROW_NEON
    288 #define HAS_I422TOABGRROW_NEON
    289 #define HAS_I422TOARGB1555ROW_NEON
    290 #define HAS_I422TOARGB4444ROW_NEON
    291 #define HAS_I422TOARGBROW_NEON
    292 #define HAS_I422TOBGRAROW_NEON
    293 #define HAS_I422TORAWROW_NEON
    294 #define HAS_I422TORGB24ROW_NEON
    295 #define HAS_I422TORGB565ROW_NEON
    296 #define HAS_I422TORGBAROW_NEON
    297 #define HAS_I422TOUYVYROW_NEON
    298 #define HAS_I422TOYUY2ROW_NEON
    299 #define HAS_I444TOARGBROW_NEON
    300 #define HAS_MERGEUVROW_NEON
    301 #define HAS_MIRRORROW_NEON
    302 #define HAS_MIRRORUVROW_NEON
    303 #define HAS_NV12TOARGBROW_NEON
    304 #define HAS_NV12TORGB565ROW_NEON
    305 #define HAS_NV21TOARGBROW_NEON
    306 #define HAS_NV21TORGB565ROW_NEON
    307 #define HAS_RAWTOARGBROW_NEON
    308 #define HAS_RAWTOUVROW_NEON
    309 #define HAS_RAWTOYROW_NEON
    310 #define HAS_RGB24TOARGBROW_NEON
    311 #define HAS_RGB24TOUVROW_NEON
    312 #define HAS_RGB24TOYROW_NEON
    313 #define HAS_RGB565TOARGBROW_NEON
    314 #define HAS_RGB565TOUVROW_NEON
    315 #define HAS_RGB565TOYROW_NEON
    316 #define HAS_RGBATOUVROW_NEON
    317 #define HAS_RGBATOYROW_NEON
    318 #define HAS_SETROW_NEON
    319 #define HAS_ARGBSETROW_NEON
    320 #define HAS_SPLITUVROW_NEON
    321 #define HAS_UYVYTOARGBROW_NEON
    322 #define HAS_UYVYTOUV422ROW_NEON
    323 #define HAS_UYVYTOUVROW_NEON
    324 #define HAS_UYVYTOYROW_NEON
    325 #define HAS_I400TOARGBROW_NEON
    326 #define HAS_YUY2TOARGBROW_NEON
    327 #define HAS_YUY2TOUV422ROW_NEON
    328 #define HAS_YUY2TOUVROW_NEON
    329 #define HAS_YUY2TOYROW_NEON
    330 #define HAS_ARGBTORGB565DITHERROW_NEON
    331 
    332 // Effects:
    333 #define HAS_ARGBADDROW_NEON
    334 #define HAS_ARGBATTENUATEROW_NEON
    335 #define HAS_ARGBBLENDROW_NEON
    336 #define HAS_ARGBGRAYROW_NEON
    337 #define HAS_ARGBMIRRORROW_NEON
    338 #define HAS_ARGBMULTIPLYROW_NEON
    339 #define HAS_ARGBQUANTIZEROW_NEON
    340 #define HAS_ARGBSEPIAROW_NEON
    341 #define HAS_ARGBSHADEROW_NEON
    342 #define HAS_ARGBSUBTRACTROW_NEON
    343 #define HAS_INTERPOLATEROW_NEON
    344 #define HAS_SOBELROW_NEON
    345 #define HAS_SOBELTOPLANEROW_NEON
    346 #define HAS_SOBELXROW_NEON
    347 #define HAS_SOBELXYROW_NEON
    348 #define HAS_SOBELYROW_NEON
    349 #define HAS_ARGBCOLORMATRIXROW_NEON
    350 #define HAS_ARGBSHUFFLEROW_NEON
    351 #endif
    352 
    353 // The following are available on Mips platforms:
    354 #if !defined(LIBYUV_DISABLE_MIPS) && defined(__mips__) && \
    355     (_MIPS_SIM == _MIPS_SIM_ABI32) && (__mips_isa_rev < 6)
    356 #define HAS_COPYROW_MIPS
    357 #if defined(__mips_dsp) && (__mips_dsp_rev >= 2)
    358 #define HAS_I422TOABGRROW_MIPS_DSPR2
    359 #define HAS_I422TOARGBROW_MIPS_DSPR2
    360 #define HAS_I422TOBGRAROW_MIPS_DSPR2
    361 #define HAS_INTERPOLATEROW_MIPS_DSPR2
    362 #define HAS_MIRRORROW_MIPS_DSPR2
    363 #define HAS_MIRRORUVROW_MIPS_DSPR2
    364 #define HAS_SPLITUVROW_MIPS_DSPR2
    365 #endif
    366 #endif
    367 
    368 #if defined(_MSC_VER) && !defined(__CLR_VER)
    369 #define SIMD_ALIGNED(var) __declspec(align(16)) var
    370 #define SIMD_ALIGNED32(var) __declspec(align(64)) var
    371 typedef __declspec(align(16)) int16 vec16[8];
    372 typedef __declspec(align(16)) int32 vec32[4];
    373 typedef __declspec(align(16)) int8 vec8[16];
    374 typedef __declspec(align(16)) uint16 uvec16[8];
    375 typedef __declspec(align(16)) uint32 uvec32[4];
    376 typedef __declspec(align(16)) uint8 uvec8[16];
    377 typedef __declspec(align(32)) int16 lvec16[16];
    378 typedef __declspec(align(32)) int32 lvec32[8];
    379 typedef __declspec(align(32)) int8 lvec8[32];
    380 typedef __declspec(align(32)) uint16 ulvec16[16];
    381 typedef __declspec(align(32)) uint32 ulvec32[8];
    382 typedef __declspec(align(32)) uint8 ulvec8[32];
    383 #elif defined(__GNUC__)
    384 // Caveat GCC 4.2 to 4.7 have a known issue using vectors with const.
    385 #define SIMD_ALIGNED(var) var __attribute__((aligned(16)))
    386 #define SIMD_ALIGNED32(var) var __attribute__((aligned(64)))
    387 typedef int16 __attribute__((vector_size(16))) vec16;
    388 typedef int32 __attribute__((vector_size(16))) vec32;
    389 typedef int8 __attribute__((vector_size(16))) vec8;
    390 typedef uint16 __attribute__((vector_size(16))) uvec16;
    391 typedef uint32 __attribute__((vector_size(16))) uvec32;
    392 typedef uint8 __attribute__((vector_size(16))) uvec8;
    393 typedef int16 __attribute__((vector_size(32))) lvec16;
    394 typedef int32 __attribute__((vector_size(32))) lvec32;
    395 typedef int8 __attribute__((vector_size(32))) lvec8;
    396 typedef uint16 __attribute__((vector_size(32))) ulvec16;
    397 typedef uint32 __attribute__((vector_size(32))) ulvec32;
    398 typedef uint8 __attribute__((vector_size(32))) ulvec8;
    399 #else
    400 #define SIMD_ALIGNED(var) var
    401 #define SIMD_ALIGNED32(var) var
    402 typedef int16 vec16[8];
    403 typedef int32 vec32[4];
    404 typedef int8 vec8[16];
    405 typedef uint16 uvec16[8];
    406 typedef uint32 uvec32[4];
    407 typedef uint8 uvec8[16];
    408 typedef int16 lvec16[16];
    409 typedef int32 lvec32[8];
    410 typedef int8 lvec8[32];
    411 typedef uint16 ulvec16[16];
    412 typedef uint32 ulvec32[8];
    413 typedef uint8 ulvec8[32];
    414 #endif
    415 
    416 #if defined(__APPLE__) || defined(__x86_64__) || defined(__llvm__)
    417 #define OMITFP
    418 #else
    419 #define OMITFP __attribute__((optimize("omit-frame-pointer")))
    420 #endif
    421 
    422 // NaCL macros for GCC x86 and x64.
    423 #if defined(__native_client__)
    424 #define LABELALIGN ".p2align 5\n"
    425 #else
    426 #define LABELALIGN
    427 #endif
    428 #if defined(__native_client__) && defined(__x86_64__)
    429 // r14 is used for MEMOP macros.
    430 #define NACL_R14 "r14",
    431 #define BUNDLELOCK ".bundle_lock\n"
    432 #define BUNDLEUNLOCK ".bundle_unlock\n"
    433 #define MEMACCESS(base) "%%nacl:(%%r15,%q" #base ")"
    434 #define MEMACCESS2(offset, base) "%%nacl:" #offset "(%%r15,%q" #base ")"
    435 #define MEMLEA(offset, base) #offset "(%q" #base ")"
    436 #define MEMLEA3(offset, index, scale) \
    437     #offset "(,%q" #index "," #scale ")"
    438 #define MEMLEA4(offset, base, index, scale) \
    439     #offset "(%q" #base ",%q" #index "," #scale ")"
    440 #define MEMMOVESTRING(s, d) "%%nacl:(%q" #s "),%%nacl:(%q" #d "), %%r15"
    441 #define MEMSTORESTRING(reg, d) "%%" #reg ",%%nacl:(%q" #d "), %%r15"
    442 #define MEMOPREG(opcode, offset, base, index, scale, reg) \
    443     BUNDLELOCK \
    444     "lea " #offset "(%q" #base ",%q" #index "," #scale "),%%r14d\n" \
    445     #opcode " (%%r15,%%r14),%%" #reg "\n" \
    446     BUNDLEUNLOCK
    447 #define MEMOPMEM(opcode, reg, offset, base, index, scale) \
    448     BUNDLELOCK \
    449     "lea " #offset "(%q" #base ",%q" #index "," #scale "),%%r14d\n" \
    450     #opcode " %%" #reg ",(%%r15,%%r14)\n" \
    451     BUNDLEUNLOCK
    452 #define MEMOPARG(opcode, offset, base, index, scale, arg) \
    453     BUNDLELOCK \
    454     "lea " #offset "(%q" #base ",%q" #index "," #scale "),%%r14d\n" \
    455     #opcode " (%%r15,%%r14),%" #arg "\n" \
    456     BUNDLEUNLOCK
    457 #define VMEMOPREG(opcode, offset, base, index, scale, reg1, reg2) \
    458     BUNDLELOCK \
    459     "lea " #offset "(%q" #base ",%q" #index "," #scale "),%%r14d\n" \
    460     #opcode " (%%r15,%%r14),%%" #reg1 ",%%" #reg2 "\n" \
    461     BUNDLEUNLOCK
    462 #define VEXTOPMEM(op, sel, reg, offset, base, index, scale) \
    463     BUNDLELOCK \
    464     "lea " #offset "(%q" #base ",%q" #index "," #scale "),%%r14d\n" \
    465     #op " $" #sel ",%%" #reg ",(%%r15,%%r14)\n" \
    466     BUNDLEUNLOCK
    467 #else  // defined(__native_client__) && defined(__x86_64__)
    468 #define NACL_R14
    469 #define BUNDLEALIGN
    470 #define MEMACCESS(base) "(%" #base ")"
    471 #define MEMACCESS2(offset, base) #offset "(%" #base ")"
    472 #define MEMLEA(offset, base) #offset "(%" #base ")"
    473 #define MEMLEA3(offset, index, scale) \
    474     #offset "(,%" #index "," #scale ")"
    475 #define MEMLEA4(offset, base, index, scale) \
    476     #offset "(%" #base ",%" #index "," #scale ")"
    477 #define MEMMOVESTRING(s, d)
    478 #define MEMSTORESTRING(reg, d)
    479 #define MEMOPREG(opcode, offset, base, index, scale, reg) \
    480     #opcode " " #offset "(%" #base ",%" #index "," #scale "),%%" #reg "\n"
    481 #define MEMOPMEM(opcode, reg, offset, base, index, scale) \
    482     #opcode " %%" #reg ","#offset "(%" #base ",%" #index "," #scale ")\n"
    483 #define MEMOPARG(opcode, offset, base, index, scale, arg) \
    484     #opcode " " #offset "(%" #base ",%" #index "," #scale "),%" #arg "\n"
    485 #define VMEMOPREG(opcode, offset, base, index, scale, reg1, reg2) \
    486     #opcode " " #offset "(%" #base ",%" #index "," #scale "),%%" #reg1 ",%%" \
    487     #reg2 "\n"
    488 #define VEXTOPMEM(op, sel, reg, offset, base, index, scale) \
    489     #op " $" #sel ",%%" #reg ","#offset "(%" #base ",%" #index "," #scale ")\n"
    490 #endif  // defined(__native_client__) && defined(__x86_64__)
    491 
    492 #if defined(__arm__) || defined(__aarch64__)
    493 #undef MEMACCESS
    494 #if defined(__native_client__)
    495 #define MEMACCESS(base) ".p2align 3\nbic %" #base ", #0xc0000000\n"
    496 #else
    497 #define MEMACCESS(base)
    498 #endif
    499 #endif
    500 
    501 void I444ToARGBRow_NEON(const uint8* src_y,
    502                         const uint8* src_u,
    503                         const uint8* src_v,
    504                         uint8* dst_argb,
    505                         int width);
    506 void I422ToARGBRow_NEON(const uint8* src_y,
    507                         const uint8* src_u,
    508                         const uint8* src_v,
    509                         uint8* dst_argb,
    510                         int width);
    511 void I411ToARGBRow_NEON(const uint8* src_y,
    512                         const uint8* src_u,
    513                         const uint8* src_v,
    514                         uint8* dst_argb,
    515                         int width);
    516 void I422ToBGRARow_NEON(const uint8* src_y,
    517                         const uint8* src_u,
    518                         const uint8* src_v,
    519                         uint8* dst_bgra,
    520                         int width);
    521 void I422ToABGRRow_NEON(const uint8* src_y,
    522                         const uint8* src_u,
    523                         const uint8* src_v,
    524                         uint8* dst_abgr,
    525                         int width);
    526 void I422ToRGBARow_NEON(const uint8* src_y,
    527                         const uint8* src_u,
    528                         const uint8* src_v,
    529                         uint8* dst_rgba,
    530                         int width);
    531 void I422ToRGB24Row_NEON(const uint8* src_y,
    532                          const uint8* src_u,
    533                          const uint8* src_v,
    534                          uint8* dst_rgb24,
    535                          int width);
    536 void I422ToRAWRow_NEON(const uint8* src_y,
    537                        const uint8* src_u,
    538                        const uint8* src_v,
    539                        uint8* dst_raw,
    540                        int width);
    541 void I422ToRGB565Row_NEON(const uint8* src_y,
    542                           const uint8* src_u,
    543                           const uint8* src_v,
    544                           uint8* dst_rgb565,
    545                           int width);
    546 void I422ToARGB1555Row_NEON(const uint8* src_y,
    547                             const uint8* src_u,
    548                             const uint8* src_v,
    549                             uint8* dst_argb1555,
    550                             int width);
    551 void I422ToARGB4444Row_NEON(const uint8* src_y,
    552                             const uint8* src_u,
    553                             const uint8* src_v,
    554                             uint8* dst_argb4444,
    555                             int width);
    556 void NV12ToARGBRow_NEON(const uint8* src_y,
    557                         const uint8* src_uv,
    558                         uint8* dst_argb,
    559                         int width);
    560 void NV21ToARGBRow_NEON(const uint8* src_y,
    561                         const uint8* src_vu,
    562                         uint8* dst_argb,
    563                         int width);
    564 void NV12ToRGB565Row_NEON(const uint8* src_y,
    565                           const uint8* src_uv,
    566                           uint8* dst_rgb565,
    567                           int width);
    568 void NV21ToRGB565Row_NEON(const uint8* src_y,
    569                           const uint8* src_vu,
    570                           uint8* dst_rgb565,
    571                           int width);
    572 void YUY2ToARGBRow_NEON(const uint8* src_yuy2,
    573                         uint8* dst_argb,
    574                         int width);
    575 void UYVYToARGBRow_NEON(const uint8* src_uyvy,
    576                         uint8* dst_argb,
    577                         int width);
    578 
    579 void ARGBToYRow_AVX2(const uint8* src_argb, uint8* dst_y, int pix);
    580 void ARGBToYRow_Any_AVX2(const uint8* src_argb, uint8* dst_y, int pix);
    581 void ARGBToYRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix);
    582 void ARGBToYJRow_AVX2(const uint8* src_argb, uint8* dst_y, int pix);
    583 void ARGBToYJRow_Any_AVX2(const uint8* src_argb, uint8* dst_y, int pix);
    584 void ARGBToYJRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix);
    585 void BGRAToYRow_SSSE3(const uint8* src_bgra, uint8* dst_y, int pix);
    586 void ABGRToYRow_SSSE3(const uint8* src_abgr, uint8* dst_y, int pix);
    587 void RGBAToYRow_SSSE3(const uint8* src_rgba, uint8* dst_y, int pix);
    588 void RGB24ToYRow_SSSE3(const uint8* src_rgb24, uint8* dst_y, int pix);
    589 void RAWToYRow_SSSE3(const uint8* src_raw, uint8* dst_y, int pix);
    590 void ARGBToYRow_NEON(const uint8* src_argb, uint8* dst_y, int pix);
    591 void ARGBToYJRow_NEON(const uint8* src_argb, uint8* dst_y, int pix);
    592 void ARGBToUV444Row_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
    593                          int pix);
    594 void ARGBToUV422Row_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
    595                          int pix);
    596 void ARGBToUV411Row_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
    597                          int pix);
    598 void ARGBToUVRow_NEON(const uint8* src_argb, int src_stride_argb,
    599                       uint8* dst_u, uint8* dst_v, int pix);
    600 void ARGBToUVJRow_NEON(const uint8* src_argb, int src_stride_argb,
    601                        uint8* dst_u, uint8* dst_v, int pix);
    602 void BGRAToUVRow_NEON(const uint8* src_bgra, int src_stride_bgra,
    603                       uint8* dst_u, uint8* dst_v, int pix);
    604 void ABGRToUVRow_NEON(const uint8* src_abgr, int src_stride_abgr,
    605                       uint8* dst_u, uint8* dst_v, int pix);
    606 void RGBAToUVRow_NEON(const uint8* src_rgba, int src_stride_rgba,
    607                       uint8* dst_u, uint8* dst_v, int pix);
    608 void RGB24ToUVRow_NEON(const uint8* src_rgb24, int src_stride_rgb24,
    609                        uint8* dst_u, uint8* dst_v, int pix);
    610 void RAWToUVRow_NEON(const uint8* src_raw, int src_stride_raw,
    611                      uint8* dst_u, uint8* dst_v, int pix);
    612 void RGB565ToUVRow_NEON(const uint8* src_rgb565, int src_stride_rgb565,
    613                         uint8* dst_u, uint8* dst_v, int pix);
    614 void ARGB1555ToUVRow_NEON(const uint8* src_argb1555, int src_stride_argb1555,
    615                           uint8* dst_u, uint8* dst_v, int pix);
    616 void ARGB4444ToUVRow_NEON(const uint8* src_argb4444, int src_stride_argb4444,
    617                           uint8* dst_u, uint8* dst_v, int pix);
    618 void BGRAToYRow_NEON(const uint8* src_bgra, uint8* dst_y, int pix);
    619 void ABGRToYRow_NEON(const uint8* src_abgr, uint8* dst_y, int pix);
    620 void RGBAToYRow_NEON(const uint8* src_rgba, uint8* dst_y, int pix);
    621 void RGB24ToYRow_NEON(const uint8* src_rgb24, uint8* dst_y, int pix);
    622 void RAWToYRow_NEON(const uint8* src_raw, uint8* dst_y, int pix);
    623 void RGB565ToYRow_NEON(const uint8* src_rgb565, uint8* dst_y, int pix);
    624 void ARGB1555ToYRow_NEON(const uint8* src_argb1555, uint8* dst_y, int pix);
    625 void ARGB4444ToYRow_NEON(const uint8* src_argb4444, uint8* dst_y, int pix);
    626 void ARGBToYRow_C(const uint8* src_argb, uint8* dst_y, int pix);
    627 void ARGBToYJRow_C(const uint8* src_argb, uint8* dst_y, int pix);
    628 void BGRAToYRow_C(const uint8* src_bgra, uint8* dst_y, int pix);
    629 void ABGRToYRow_C(const uint8* src_abgr, uint8* dst_y, int pix);
    630 void RGBAToYRow_C(const uint8* src_rgba, uint8* dst_y, int pix);
    631 void RGB24ToYRow_C(const uint8* src_rgb24, uint8* dst_y, int pix);
    632 void RAWToYRow_C(const uint8* src_raw, uint8* dst_y, int pix);
    633 void RGB565ToYRow_C(const uint8* src_rgb565, uint8* dst_y, int pix);
    634 void ARGB1555ToYRow_C(const uint8* src_argb1555, uint8* dst_y, int pix);
    635 void ARGB4444ToYRow_C(const uint8* src_argb4444, uint8* dst_y, int pix);
    636 void ARGBToYRow_Any_SSSE3(const uint8* src_argb, uint8* dst_y, int pix);
    637 void ARGBToYJRow_Any_SSSE3(const uint8* src_argb, uint8* dst_y, int pix);
    638 void BGRAToYRow_Any_SSSE3(const uint8* src_bgra, uint8* dst_y, int pix);
    639 void ABGRToYRow_Any_SSSE3(const uint8* src_abgr, uint8* dst_y, int pix);
    640 void RGBAToYRow_Any_SSSE3(const uint8* src_rgba, uint8* dst_y, int pix);
    641 void RGB24ToYRow_Any_SSSE3(const uint8* src_rgb24, uint8* dst_y, int pix);
    642 void RAWToYRow_Any_SSSE3(const uint8* src_raw, uint8* dst_y, int pix);
    643 void ARGBToYRow_Any_NEON(const uint8* src_argb, uint8* dst_y, int pix);
    644 void ARGBToYJRow_Any_NEON(const uint8* src_argb, uint8* dst_y, int pix);
    645 void BGRAToYRow_Any_NEON(const uint8* src_bgra, uint8* dst_y, int pix);
    646 void ABGRToYRow_Any_NEON(const uint8* src_abgr, uint8* dst_y, int pix);
    647 void RGBAToYRow_Any_NEON(const uint8* src_rgba, uint8* dst_y, int pix);
    648 void RGB24ToYRow_Any_NEON(const uint8* src_rgb24, uint8* dst_y, int pix);
    649 void RAWToYRow_Any_NEON(const uint8* src_raw, uint8* dst_y, int pix);
    650 void RGB565ToYRow_Any_NEON(const uint8* src_rgb565, uint8* dst_y, int pix);
    651 void ARGB1555ToYRow_Any_NEON(const uint8* src_argb1555, uint8* dst_y, int pix);
    652 void ARGB4444ToYRow_Any_NEON(const uint8* src_argb4444, uint8* dst_y, int pix);
    653 
    654 void ARGBToUVRow_AVX2(const uint8* src_argb, int src_stride_argb,
    655                       uint8* dst_u, uint8* dst_v, int width);
    656 void ARGBToUVRow_Any_AVX2(const uint8* src_argb, int src_stride_argb,
    657                           uint8* dst_u, uint8* dst_v, int width);
    658 void ARGBToUVRow_SSSE3(const uint8* src_argb, int src_stride_argb,
    659                        uint8* dst_u, uint8* dst_v, int width);
    660 void ARGBToUVJRow_SSSE3(const uint8* src_argb, int src_stride_argb,
    661                         uint8* dst_u, uint8* dst_v, int width);
    662 void BGRAToUVRow_SSSE3(const uint8* src_bgra, int src_stride_bgra,
    663                        uint8* dst_u, uint8* dst_v, int width);
    664 void ABGRToUVRow_SSSE3(const uint8* src_abgr, int src_stride_abgr,
    665                        uint8* dst_u, uint8* dst_v, int width);
    666 void RGBAToUVRow_SSSE3(const uint8* src_rgba, int src_stride_rgba,
    667                        uint8* dst_u, uint8* dst_v, int width);
    668 void ARGBToUVRow_Any_SSSE3(const uint8* src_argb, int src_stride_argb,
    669                            uint8* dst_u, uint8* dst_v, int width);
    670 void ARGBToUVJRow_Any_SSSE3(const uint8* src_argb, int src_stride_argb,
    671                             uint8* dst_u, uint8* dst_v, int width);
    672 void BGRAToUVRow_Any_SSSE3(const uint8* src_bgra, int src_stride_bgra,
    673                            uint8* dst_u, uint8* dst_v, int width);
    674 void ABGRToUVRow_Any_SSSE3(const uint8* src_abgr, int src_stride_abgr,
    675                            uint8* dst_u, uint8* dst_v, int width);
    676 void RGBAToUVRow_Any_SSSE3(const uint8* src_rgba, int src_stride_rgba,
    677                            uint8* dst_u, uint8* dst_v, int width);
    678 void ARGBToUV444Row_Any_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
    679                              int pix);
    680 void ARGBToUV422Row_Any_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
    681                              int pix);
    682 void ARGBToUV411Row_Any_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
    683                              int pix);
    684 void ARGBToUVRow_Any_NEON(const uint8* src_argb, int src_stride_argb,
    685                           uint8* dst_u, uint8* dst_v, int pix);
    686 void ARGBToUVJRow_Any_NEON(const uint8* src_argb, int src_stride_argb,
    687                            uint8* dst_u, uint8* dst_v, int pix);
    688 void BGRAToUVRow_Any_NEON(const uint8* src_bgra, int src_stride_bgra,
    689                           uint8* dst_u, uint8* dst_v, int pix);
    690 void ABGRToUVRow_Any_NEON(const uint8* src_abgr, int src_stride_abgr,
    691                           uint8* dst_u, uint8* dst_v, int pix);
    692 void RGBAToUVRow_Any_NEON(const uint8* src_rgba, int src_stride_rgba,
    693                           uint8* dst_u, uint8* dst_v, int pix);
    694 void RGB24ToUVRow_Any_NEON(const uint8* src_rgb24, int src_stride_rgb24,
    695                            uint8* dst_u, uint8* dst_v, int pix);
    696 void RAWToUVRow_Any_NEON(const uint8* src_raw, int src_stride_raw,
    697                          uint8* dst_u, uint8* dst_v, int pix);
    698 void RGB565ToUVRow_Any_NEON(const uint8* src_rgb565, int src_stride_rgb565,
    699                             uint8* dst_u, uint8* dst_v, int pix);
    700 void ARGB1555ToUVRow_Any_NEON(const uint8* src_argb1555,
    701                               int src_stride_argb1555,
    702                               uint8* dst_u, uint8* dst_v, int pix);
    703 void ARGB4444ToUVRow_Any_NEON(const uint8* src_argb4444,
    704                               int src_stride_argb4444,
    705                               uint8* dst_u, uint8* dst_v, int pix);
    706 void ARGBToUVRow_C(const uint8* src_argb, int src_stride_argb,
    707                    uint8* dst_u, uint8* dst_v, int width);
    708 void ARGBToUVJRow_C(const uint8* src_argb, int src_stride_argb,
    709                     uint8* dst_u, uint8* dst_v, int width);
    710 void BGRAToUVRow_C(const uint8* src_bgra, int src_stride_bgra,
    711                    uint8* dst_u, uint8* dst_v, int width);
    712 void ABGRToUVRow_C(const uint8* src_abgr, int src_stride_abgr,
    713                    uint8* dst_u, uint8* dst_v, int width);
    714 void RGBAToUVRow_C(const uint8* src_rgba, int src_stride_rgba,
    715                    uint8* dst_u, uint8* dst_v, int width);
    716 void RGB24ToUVRow_C(const uint8* src_rgb24, int src_stride_rgb24,
    717                     uint8* dst_u, uint8* dst_v, int width);
    718 void RAWToUVRow_C(const uint8* src_raw, int src_stride_raw,
    719                   uint8* dst_u, uint8* dst_v, int width);
    720 void RGB565ToUVRow_C(const uint8* src_rgb565, int src_stride_rgb565,
    721                      uint8* dst_u, uint8* dst_v, int width);
    722 void ARGB1555ToUVRow_C(const uint8* src_argb1555, int src_stride_argb1555,
    723                        uint8* dst_u, uint8* dst_v, int width);
    724 void ARGB4444ToUVRow_C(const uint8* src_argb4444, int src_stride_argb4444,
    725                        uint8* dst_u, uint8* dst_v, int width);
    726 
    727 void ARGBToUV444Row_SSSE3(const uint8* src_argb,
    728                           uint8* dst_u, uint8* dst_v, int width);
    729 void ARGBToUV444Row_Any_SSSE3(const uint8* src_argb,
    730                               uint8* dst_u, uint8* dst_v, int width);
    731 
    732 void ARGBToUV422Row_SSSE3(const uint8* src_argb,
    733                           uint8* dst_u, uint8* dst_v, int width);
    734 void ARGBToUV422Row_Any_SSSE3(const uint8* src_argb,
    735                               uint8* dst_u, uint8* dst_v, int width);
    736 
    737 void ARGBToUV444Row_C(const uint8* src_argb,
    738                       uint8* dst_u, uint8* dst_v, int width);
    739 void ARGBToUV422Row_C(const uint8* src_argb,
    740                       uint8* dst_u, uint8* dst_v, int width);
    741 void ARGBToUV411Row_C(const uint8* src_argb,
    742                       uint8* dst_u, uint8* dst_v, int width);
    743 void ARGBToUVJ422Row_C(const uint8* src_argb,
    744                        uint8* dst_u, uint8* dst_v, int width);
    745 
    746 void MirrorRow_AVX2(const uint8* src, uint8* dst, int width);
    747 void MirrorRow_SSSE3(const uint8* src, uint8* dst, int width);
    748 void MirrorRow_SSE2(const uint8* src, uint8* dst, int width);
    749 void MirrorRow_NEON(const uint8* src, uint8* dst, int width);
    750 void MirrorRow_MIPS_DSPR2(const uint8* src, uint8* dst, int width);
    751 void MirrorRow_C(const uint8* src, uint8* dst, int width);
    752 void MirrorRow_Any_AVX2(const uint8* src, uint8* dst, int width);
    753 void MirrorRow_Any_SSSE3(const uint8* src, uint8* dst, int width);
    754 void MirrorRow_Any_SSE2(const uint8* src, uint8* dst, int width);
    755 void MirrorRow_Any_NEON(const uint8* src, uint8* dst, int width);
    756 
    757 void MirrorUVRow_SSSE3(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
    758                        int width);
    759 void MirrorUVRow_NEON(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
    760                       int width);
    761 void MirrorUVRow_MIPS_DSPR2(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
    762                             int width);
    763 void MirrorUVRow_C(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
    764                    int width);
    765 
    766 void ARGBMirrorRow_AVX2(const uint8* src, uint8* dst, int width);
    767 void ARGBMirrorRow_SSE2(const uint8* src, uint8* dst, int width);
    768 void ARGBMirrorRow_NEON(const uint8* src, uint8* dst, int width);
    769 void ARGBMirrorRow_C(const uint8* src, uint8* dst, int width);
    770 void ARGBMirrorRow_Any_AVX2(const uint8* src, uint8* dst, int width);
    771 void ARGBMirrorRow_Any_SSE2(const uint8* src, uint8* dst, int width);
    772 void ARGBMirrorRow_Any_NEON(const uint8* src, uint8* dst, int width);
    773 
    774 void SplitUVRow_C(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix);
    775 void SplitUVRow_SSE2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix);
    776 void SplitUVRow_AVX2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix);
    777 void SplitUVRow_NEON(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix);
    778 void SplitUVRow_MIPS_DSPR2(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
    779                            int pix);
    780 void SplitUVRow_Any_SSE2(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
    781                          int pix);
    782 void SplitUVRow_Any_AVX2(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
    783                          int pix);
    784 void SplitUVRow_Any_NEON(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
    785                          int pix);
    786 void SplitUVRow_Any_MIPS_DSPR2(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
    787                                int pix);
    788 
    789 void MergeUVRow_C(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
    790                   int width);
    791 void MergeUVRow_SSE2(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
    792                      int width);
    793 void MergeUVRow_AVX2(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
    794                      int width);
    795 void MergeUVRow_NEON(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
    796                      int width);
    797 void MergeUVRow_Any_SSE2(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
    798                          int width);
    799 void MergeUVRow_Any_AVX2(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
    800                          int width);
    801 void MergeUVRow_Any_NEON(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
    802                          int width);
    803 
    804 void CopyRow_SSE2(const uint8* src, uint8* dst, int count);
    805 void CopyRow_AVX(const uint8* src, uint8* dst, int count);
    806 void CopyRow_ERMS(const uint8* src, uint8* dst, int count);
    807 void CopyRow_NEON(const uint8* src, uint8* dst, int count);
    808 void CopyRow_MIPS(const uint8* src, uint8* dst, int count);
    809 void CopyRow_C(const uint8* src, uint8* dst, int count);
    810 void CopyRow_Any_SSE2(const uint8* src, uint8* dst, int count);
    811 void CopyRow_Any_AVX(const uint8* src, uint8* dst, int count);
    812 void CopyRow_Any_NEON(const uint8* src, uint8* dst, int count);
    813 
    814 void CopyRow_16_C(const uint16* src, uint16* dst, int count);
    815 
    816 void ARGBCopyAlphaRow_C(const uint8* src_argb, uint8* dst_argb, int width);
    817 void ARGBCopyAlphaRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width);
    818 void ARGBCopyAlphaRow_AVX2(const uint8* src_argb, uint8* dst_argb, int width);
    819 
    820 void ARGBCopyYToAlphaRow_C(const uint8* src_y, uint8* dst_argb, int width);
    821 void ARGBCopyYToAlphaRow_SSE2(const uint8* src_y, uint8* dst_argb, int width);
    822 void ARGBCopyYToAlphaRow_AVX2(const uint8* src_y, uint8* dst_argb, int width);
    823 
    824 void SetRow_C(uint8* dst, uint8 v8, int count);
    825 void SetRow_X86(uint8* dst, uint8 v8, int count);
    826 void SetRow_ERMS(uint8* dst, uint8 v8, int count);
    827 void SetRow_NEON(uint8* dst, uint8 v8, int count);
    828 void SetRow_Any_X86(uint8* dst, uint8 v8, int count);
    829 void SetRow_Any_NEON(uint8* dst, uint8 v8, int count);
    830 
    831 void ARGBSetRow_C(uint8* dst_argb, uint32 v32, int count);
    832 void ARGBSetRow_X86(uint8* dst_argb, uint32 v32, int count);
    833 void ARGBSetRow_NEON(uint8* dst_argb, uint32 v32, int count);
    834 void ARGBSetRow_Any_NEON(uint8* dst_argb, uint32 v32, int count);
    835 
    836 // ARGBShufflers for BGRAToARGB etc.
    837 void ARGBShuffleRow_C(const uint8* src_argb, uint8* dst_argb,
    838                       const uint8* shuffler, int pix);
    839 void ARGBShuffleRow_SSE2(const uint8* src_argb, uint8* dst_argb,
    840                          const uint8* shuffler, int pix);
    841 void ARGBShuffleRow_SSSE3(const uint8* src_argb, uint8* dst_argb,
    842                           const uint8* shuffler, int pix);
    843 void ARGBShuffleRow_AVX2(const uint8* src_argb, uint8* dst_argb,
    844                          const uint8* shuffler, int pix);
    845 void ARGBShuffleRow_NEON(const uint8* src_argb, uint8* dst_argb,
    846                          const uint8* shuffler, int pix);
    847 void ARGBShuffleRow_Any_SSE2(const uint8* src_argb, uint8* dst_argb,
    848                              const uint8* shuffler, int pix);
    849 void ARGBShuffleRow_Any_SSSE3(const uint8* src_argb, uint8* dst_argb,
    850                               const uint8* shuffler, int pix);
    851 void ARGBShuffleRow_Any_AVX2(const uint8* src_argb, uint8* dst_argb,
    852                              const uint8* shuffler, int pix);
    853 void ARGBShuffleRow_Any_NEON(const uint8* src_argb, uint8* dst_argb,
    854                              const uint8* shuffler, int pix);
    855 
    856 void RGB24ToARGBRow_SSSE3(const uint8* src_rgb24, uint8* dst_argb, int pix);
    857 void RAWToARGBRow_SSSE3(const uint8* src_raw, uint8* dst_argb, int pix);
    858 void RGB565ToARGBRow_SSE2(const uint8* src_rgb565, uint8* dst_argb, int pix);
    859 void ARGB1555ToARGBRow_SSE2(const uint8* src_argb1555, uint8* dst_argb,
    860                             int pix);
    861 void ARGB4444ToARGBRow_SSE2(const uint8* src_argb4444, uint8* dst_argb,
    862                             int pix);
    863 void RGB565ToARGBRow_AVX2(const uint8* src_rgb565, uint8* dst_argb, int pix);
    864 void ARGB1555ToARGBRow_AVX2(const uint8* src_argb1555, uint8* dst_argb,
    865                             int pix);
    866 void ARGB4444ToARGBRow_AVX2(const uint8* src_argb4444, uint8* dst_argb,
    867                             int pix);
    868 
    869 void RGB24ToARGBRow_NEON(const uint8* src_rgb24, uint8* dst_argb, int pix);
    870 void RAWToARGBRow_NEON(const uint8* src_raw, uint8* dst_argb, int pix);
    871 void RGB565ToARGBRow_NEON(const uint8* src_rgb565, uint8* dst_argb, int pix);
    872 void ARGB1555ToARGBRow_NEON(const uint8* src_argb1555, uint8* dst_argb,
    873                             int pix);
    874 void ARGB4444ToARGBRow_NEON(const uint8* src_argb4444, uint8* dst_argb,
    875                             int pix);
    876 void RGB24ToARGBRow_C(const uint8* src_rgb24, uint8* dst_argb, int pix);
    877 void RAWToARGBRow_C(const uint8* src_raw, uint8* dst_argb, int pix);
    878 void RGB565ToARGBRow_C(const uint8* src_rgb, uint8* dst_argb, int pix);
    879 void ARGB1555ToARGBRow_C(const uint8* src_argb, uint8* dst_argb, int pix);
    880 void ARGB4444ToARGBRow_C(const uint8* src_argb, uint8* dst_argb, int pix);
    881 void RGB24ToARGBRow_Any_SSSE3(const uint8* src_rgb24, uint8* dst_argb, int pix);
    882 void RAWToARGBRow_Any_SSSE3(const uint8* src_raw, uint8* dst_argb, int pix);
    883 
    884 void RGB565ToARGBRow_Any_SSE2(const uint8* src_rgb565, uint8* dst_argb,
    885                               int pix);
    886 void ARGB1555ToARGBRow_Any_SSE2(const uint8* src_argb1555, uint8* dst_argb,
    887                                 int pix);
    888 void ARGB4444ToARGBRow_Any_SSE2(const uint8* src_argb4444, uint8* dst_argb,
    889                                 int pix);
    890 void RGB565ToARGBRow_Any_AVX2(const uint8* src_rgb565, uint8* dst_argb,
    891                               int pix);
    892 void ARGB1555ToARGBRow_Any_AVX2(const uint8* src_argb1555, uint8* dst_argb,
    893                                 int pix);
    894 void ARGB4444ToARGBRow_Any_AVX2(const uint8* src_argb4444, uint8* dst_argb,
    895                                 int pix);
    896 
    897 void RGB24ToARGBRow_Any_NEON(const uint8* src_rgb24, uint8* dst_argb, int pix);
    898 void RAWToARGBRow_Any_NEON(const uint8* src_raw, uint8* dst_argb, int pix);
    899 void RGB565ToARGBRow_Any_NEON(const uint8* src_rgb565, uint8* dst_argb,
    900                               int pix);
    901 void ARGB1555ToARGBRow_Any_NEON(const uint8* src_argb1555, uint8* dst_argb,
    902                                 int pix);
    903 void ARGB4444ToARGBRow_Any_NEON(const uint8* src_argb4444, uint8* dst_argb,
    904                                 int pix);
    905 
    906 void ARGBToRGB24Row_SSSE3(const uint8* src_argb, uint8* dst_rgb, int pix);
    907 void ARGBToRAWRow_SSSE3(const uint8* src_argb, uint8* dst_rgb, int pix);
    908 void ARGBToRGB565Row_SSE2(const uint8* src_argb, uint8* dst_rgb, int pix);
    909 void ARGBToARGB1555Row_SSE2(const uint8* src_argb, uint8* dst_rgb, int pix);
    910 void ARGBToARGB4444Row_SSE2(const uint8* src_argb, uint8* dst_rgb, int pix);
    911 
    912 void ARGBToRGB565DitherRow_C(const uint8* src_argb, uint8* dst_rgb,
    913                              const uint32 dither4, int pix);
    914 void ARGBToRGB565DitherRow_SSE2(const uint8* src_argb, uint8* dst_rgb,
    915                                 const uint32 dither4, int pix);
    916 void ARGBToRGB565DitherRow_AVX2(const uint8* src_argb, uint8* dst_rgb,
    917                                 const uint32 dither4, int pix);
    918 
    919 void ARGBToRGB565Row_AVX2(const uint8* src_argb, uint8* dst_rgb, int pix);
    920 void ARGBToARGB1555Row_AVX2(const uint8* src_argb, uint8* dst_rgb, int pix);
    921 void ARGBToARGB4444Row_AVX2(const uint8* src_argb, uint8* dst_rgb, int pix);
    922 
    923 void ARGBToRGB24Row_NEON(const uint8* src_argb, uint8* dst_rgb, int pix);
    924 void ARGBToRAWRow_NEON(const uint8* src_argb, uint8* dst_rgb, int pix);
    925 void ARGBToRGB565Row_NEON(const uint8* src_argb, uint8* dst_rgb, int pix);
    926 void ARGBToARGB1555Row_NEON(const uint8* src_argb, uint8* dst_rgb, int pix);
    927 void ARGBToARGB4444Row_NEON(const uint8* src_argb, uint8* dst_rgb, int pix);
    928 void ARGBToRGB565DitherRow_NEON(const uint8* src_argb, uint8* dst_rgb,
    929                                 const uint32 dither4, int width);
    930 
    931 void ARGBToRGBARow_C(const uint8* src_argb, uint8* dst_rgb, int pix);
    932 void ARGBToRGB24Row_C(const uint8* src_argb, uint8* dst_rgb, int pix);
    933 void ARGBToRAWRow_C(const uint8* src_argb, uint8* dst_rgb, int pix);
    934 void ARGBToRGB565Row_C(const uint8* src_argb, uint8* dst_rgb, int pix);
    935 void ARGBToARGB1555Row_C(const uint8* src_argb, uint8* dst_rgb, int pix);
    936 void ARGBToARGB4444Row_C(const uint8* src_argb, uint8* dst_rgb, int pix);
    937 
    938 void J400ToARGBRow_SSE2(const uint8* src_y, uint8* dst_argb, int pix);
    939 void J400ToARGBRow_AVX2(const uint8* src_y, uint8* dst_argb, int pix);
    940 void J400ToARGBRow_NEON(const uint8* src_y, uint8* dst_argb, int pix);
    941 void J400ToARGBRow_C(const uint8* src_y, uint8* dst_argb, int pix);
    942 void J400ToARGBRow_Any_SSE2(const uint8* src_y, uint8* dst_argb, int pix);
    943 void J400ToARGBRow_Any_AVX2(const uint8* src_y, uint8* dst_argb, int pix);
    944 void J400ToARGBRow_Any_NEON(const uint8* src_y, uint8* dst_argb, int pix);
    945 
    946 void I444ToARGBRow_C(const uint8* src_y,
    947                      const uint8* src_u,
    948                      const uint8* src_v,
    949                      uint8* dst_argb,
    950                      int width);
    951 void I422ToARGBRow_C(const uint8* src_y,
    952                      const uint8* src_u,
    953                      const uint8* src_v,
    954                      uint8* dst_argb,
    955                      int width);
    956 void I411ToARGBRow_C(const uint8* src_y,
    957                      const uint8* src_u,
    958                      const uint8* src_v,
    959                      uint8* dst_argb,
    960                      int width);
    961 void NV12ToARGBRow_C(const uint8* src_y,
    962                      const uint8* src_uv,
    963                      uint8* dst_argb,
    964                      int width);
    965 void NV21ToRGB565Row_C(const uint8* src_y,
    966                        const uint8* src_vu,
    967                        uint8* dst_argb,
    968                        int width);
    969 void NV12ToRGB565Row_C(const uint8* src_y,
    970                        const uint8* src_uv,
    971                        uint8* dst_argb,
    972                        int width);
    973 void NV21ToARGBRow_C(const uint8* src_y,
    974                      const uint8* src_vu,
    975                      uint8* dst_argb,
    976                      int width);
    977 void YUY2ToARGBRow_C(const uint8* src_yuy2,
    978                      uint8* dst_argb,
    979                      int width);
    980 void UYVYToARGBRow_C(const uint8* src_uyvy,
    981                      uint8* dst_argb,
    982                      int width);
    983 void J422ToARGBRow_C(const uint8* src_y,
    984                      const uint8* src_u,
    985                      const uint8* src_v,
    986                      uint8* dst_argb,
    987                      int width);
    988 void I422ToBGRARow_C(const uint8* src_y,
    989                      const uint8* src_u,
    990                      const uint8* src_v,
    991                      uint8* dst_bgra,
    992                      int width);
    993 void I422ToABGRRow_C(const uint8* src_y,
    994                      const uint8* src_u,
    995                      const uint8* src_v,
    996                      uint8* dst_abgr,
    997                      int width);
    998 void I422ToRGBARow_C(const uint8* src_y,
    999                      const uint8* src_u,
   1000                      const uint8* src_v,
   1001                      uint8* dst_rgba,
   1002                      int width);
   1003 void I422ToRGB24Row_C(const uint8* src_y,
   1004                       const uint8* src_u,
   1005                       const uint8* src_v,
   1006                       uint8* dst_rgb24,
   1007                       int width);
   1008 void I422ToRAWRow_C(const uint8* src_y,
   1009                     const uint8* src_u,
   1010                     const uint8* src_v,
   1011                     uint8* dst_raw,
   1012                     int width);
   1013 void I422ToARGB4444Row_C(const uint8* src_y,
   1014                          const uint8* src_u,
   1015                          const uint8* src_v,
   1016                          uint8* dst_argb4444,
   1017                          int width);
   1018 void I422ToARGB1555Row_C(const uint8* src_y,
   1019                          const uint8* src_u,
   1020                          const uint8* src_v,
   1021                          uint8* dst_argb4444,
   1022                          int width);
   1023 void I422ToRGB565Row_C(const uint8* src_y,
   1024                        const uint8* src_u,
   1025                        const uint8* src_v,
   1026                        uint8* dst_rgb565,
   1027                        int width);
   1028 void I422ToARGBRow_AVX2(const uint8* src_y,
   1029                         const uint8* src_u,
   1030                         const uint8* src_v,
   1031                         uint8* dst_argb,
   1032                         int width);
   1033 void I422ToBGRARow_AVX2(const uint8* src_y,
   1034                         const uint8* src_u,
   1035                         const uint8* src_v,
   1036                         uint8* dst_argb,
   1037                         int width);
   1038 void I422ToRGBARow_AVX2(const uint8* src_y,
   1039                         const uint8* src_u,
   1040                         const uint8* src_v,
   1041                         uint8* dst_argb,
   1042                         int width);
   1043 void I422ToABGRRow_AVX2(const uint8* src_y,
   1044                         const uint8* src_u,
   1045                         const uint8* src_v,
   1046                         uint8* dst_argb,
   1047                         int width);
   1048 void I444ToARGBRow_SSSE3(const uint8* src_y,
   1049                          const uint8* src_u,
   1050                          const uint8* src_v,
   1051                          uint8* dst_argb,
   1052                          int width);
   1053 void I444ToARGBRow_AVX2(const uint8* src_y,
   1054                         const uint8* src_u,
   1055                         const uint8* src_v,
   1056                         uint8* dst_argb,
   1057                         int width);
   1058 void I422ToARGBRow_SSSE3(const uint8* src_y,
   1059                          const uint8* src_u,
   1060                          const uint8* src_v,
   1061                          uint8* dst_argb,
   1062                          int width);
   1063 void I411ToARGBRow_SSSE3(const uint8* src_y,
   1064                          const uint8* src_u,
   1065                          const uint8* src_v,
   1066                          uint8* dst_argb,
   1067                          int width);
   1068 void I411ToARGBRow_AVX2(const uint8* src_y,
   1069                         const uint8* src_u,
   1070                         const uint8* src_v,
   1071                         uint8* dst_argb,
   1072                         int width);
   1073 void NV12ToARGBRow_SSSE3(const uint8* src_y,
   1074                          const uint8* src_uv,
   1075                          uint8* dst_argb,
   1076                          int width);
   1077 void NV21ToARGBRow_SSSE3(const uint8* src_y,
   1078                          const uint8* src_vu,
   1079                          uint8* dst_argb,
   1080                          int width);
   1081 void NV12ToARGBRow_AVX2(const uint8* src_y,
   1082                         const uint8* src_uv,
   1083                         uint8* dst_argb,
   1084                         int width);
   1085 void NV21ToARGBRow_AVX2(const uint8* src_y,
   1086                         const uint8* src_vu,
   1087                         uint8* dst_argb,
   1088                         int width);
   1089 void NV12ToRGB565Row_SSSE3(const uint8* src_y,
   1090                            const uint8* src_uv,
   1091                            uint8* dst_argb,
   1092                            int width);
   1093 void NV21ToRGB565Row_SSSE3(const uint8* src_y,
   1094                            const uint8* src_vu,
   1095                            uint8* dst_argb,
   1096                            int width);
   1097 void NV12ToRGB565Row_AVX2(const uint8* src_y,
   1098                           const uint8* src_uv,
   1099                           uint8* dst_argb,
   1100                           int width);
   1101 void NV21ToRGB565Row_AVX2(const uint8* src_y,
   1102                           const uint8* src_vu,
   1103                           uint8* dst_argb,
   1104                           int width);
   1105 void YUY2ToARGBRow_SSSE3(const uint8* src_yuy2,
   1106                          uint8* dst_argb,
   1107                          int width);
   1108 void UYVYToARGBRow_SSSE3(const uint8* src_uyvy,
   1109                          uint8* dst_argb,
   1110                          int width);
   1111 void YUY2ToARGBRow_AVX2(const uint8* src_yuy2,
   1112                         uint8* dst_argb,
   1113                         int width);
   1114 void UYVYToARGBRow_AVX2(const uint8* src_uyvy,
   1115                         uint8* dst_argb,
   1116                         int width);
   1117 void J422ToARGBRow_SSSE3(const uint8* src_y,
   1118                          const uint8* src_u,
   1119                          const uint8* src_v,
   1120                          uint8* dst_argb,
   1121                          int width);
   1122 void J422ToARGBRow_AVX2(const uint8* src_y,
   1123                         const uint8* src_u,
   1124                         const uint8* src_v,
   1125                         uint8* dst_argb,
   1126                         int width);
   1127 void I422ToBGRARow_SSSE3(const uint8* src_y,
   1128                          const uint8* src_u,
   1129                          const uint8* src_v,
   1130                          uint8* dst_bgra,
   1131                          int width);
   1132 void I422ToABGRRow_SSSE3(const uint8* src_y,
   1133                          const uint8* src_u,
   1134                          const uint8* src_v,
   1135                          uint8* dst_abgr,
   1136                          int width);
   1137 void I422ToRGBARow_SSSE3(const uint8* src_y,
   1138                          const uint8* src_u,
   1139                          const uint8* src_v,
   1140                          uint8* dst_rgba,
   1141                          int width);
   1142 void I422ToARGB4444Row_SSSE3(const uint8* src_y,
   1143                              const uint8* src_u,
   1144                              const uint8* src_v,
   1145                              uint8* dst_argb,
   1146                              int width);
   1147 void I422ToARGB4444Row_AVX2(const uint8* src_y,
   1148                             const uint8* src_u,
   1149                             const uint8* src_v,
   1150                             uint8* dst_argb,
   1151                             int width);
   1152 void I422ToARGB1555Row_SSSE3(const uint8* src_y,
   1153                              const uint8* src_u,
   1154                              const uint8* src_v,
   1155                              uint8* dst_argb,
   1156                              int width);
   1157 void I422ToARGB1555Row_AVX2(const uint8* src_y,
   1158                             const uint8* src_u,
   1159                             const uint8* src_v,
   1160                             uint8* dst_argb,
   1161                             int width);
   1162 void I422ToRGB565Row_SSSE3(const uint8* src_y,
   1163                            const uint8* src_u,
   1164                            const uint8* src_v,
   1165                            uint8* dst_argb,
   1166                            int width);
   1167 void I422ToRGB565Row_AVX2(const uint8* src_y,
   1168                           const uint8* src_u,
   1169                           const uint8* src_v,
   1170                           uint8* dst_argb,
   1171                           int width);
   1172 void I422ToRGB24Row_SSSE3(const uint8* src_y,
   1173                           const uint8* src_u,
   1174                           const uint8* src_v,
   1175                           uint8* dst_rgb24,
   1176                           int width);
   1177 void I422ToRGB24Row_AVX2(const uint8* src_y,
   1178                          const uint8* src_u,
   1179                          const uint8* src_v,
   1180                          uint8* dst_rgb24,
   1181                          int width);
   1182 void I422ToRAWRow_SSSE3(const uint8* src_y,
   1183                         const uint8* src_u,
   1184                         const uint8* src_v,
   1185                         uint8* dst_raw,
   1186                         int width);
   1187 void I422ToRAWRow_AVX2(const uint8* src_y,
   1188                        const uint8* src_u,
   1189                        const uint8* src_v,
   1190                        uint8* dst_raw,
   1191                        int width);
   1192 void I422ToARGBRow_Any_AVX2(const uint8* src_y,
   1193                             const uint8* src_u,
   1194                             const uint8* src_v,
   1195                             uint8* dst_argb,
   1196                             int width);
   1197 void I422ToBGRARow_Any_AVX2(const uint8* src_y,
   1198                             const uint8* src_u,
   1199                             const uint8* src_v,
   1200                             uint8* dst_argb,
   1201                             int width);
   1202 void I422ToRGBARow_Any_AVX2(const uint8* src_y,
   1203                             const uint8* src_u,
   1204                             const uint8* src_v,
   1205                             uint8* dst_argb,
   1206                             int width);
   1207 void I422ToABGRRow_Any_AVX2(const uint8* src_y,
   1208                             const uint8* src_u,
   1209                             const uint8* src_v,
   1210                             uint8* dst_argb,
   1211                             int width);
   1212 void I444ToARGBRow_Any_SSSE3(const uint8* src_y,
   1213                              const uint8* src_u,
   1214                              const uint8* src_v,
   1215                              uint8* dst_argb,
   1216                              int width);
   1217 void I444ToARGBRow_Any_AVX2(const uint8* src_y,
   1218                             const uint8* src_u,
   1219                             const uint8* src_v,
   1220                             uint8* dst_argb,
   1221                             int width);
   1222 void I422ToARGBRow_Any_SSSE3(const uint8* src_y,
   1223                              const uint8* src_u,
   1224                              const uint8* src_v,
   1225                              uint8* dst_argb,
   1226                              int width);
   1227 void I411ToARGBRow_Any_SSSE3(const uint8* src_y,
   1228                              const uint8* src_u,
   1229                              const uint8* src_v,
   1230                              uint8* dst_argb,
   1231                              int width);
   1232 void I411ToARGBRow_Any_AVX2(const uint8* src_y,
   1233                             const uint8* src_u,
   1234                             const uint8* src_v,
   1235                             uint8* dst_argb,
   1236                             int width);
   1237 void NV12ToARGBRow_Any_SSSE3(const uint8* src_y,
   1238                              const uint8* src_uv,
   1239                              uint8* dst_argb,
   1240                              int width);
   1241 void NV21ToARGBRow_Any_SSSE3(const uint8* src_y,
   1242                              const uint8* src_vu,
   1243                              uint8* dst_argb,
   1244                              int width);
   1245 void NV12ToARGBRow_Any_AVX2(const uint8* src_y,
   1246                             const uint8* src_uv,
   1247                             uint8* dst_argb,
   1248                             int width);
   1249 void NV21ToARGBRow_Any_AVX2(const uint8* src_y,
   1250                             const uint8* src_vu,
   1251                             uint8* dst_argb,
   1252                             int width);
   1253 void NV12ToRGB565Row_Any_SSSE3(const uint8* src_y,
   1254                                const uint8* src_uv,
   1255                                uint8* dst_argb,
   1256                                int width);
   1257 void NV21ToRGB565Row_Any_SSSE3(const uint8* src_y,
   1258                                const uint8* src_vu,
   1259                                uint8* dst_argb,
   1260                                int width);
   1261 void NV12ToRGB565Row_Any_AVX2(const uint8* src_y,
   1262                               const uint8* src_uv,
   1263                               uint8* dst_argb,
   1264                               int width);
   1265 void NV21ToRGB565Row_Any_AVX2(const uint8* src_y,
   1266                               const uint8* src_vu,
   1267                               uint8* dst_argb,
   1268                               int width);
   1269 void YUY2ToARGBRow_Any_SSSE3(const uint8* src_yuy2,
   1270                              uint8* dst_argb,
   1271                              int width);
   1272 void UYVYToARGBRow_Any_SSSE3(const uint8* src_uyvy,
   1273                              uint8* dst_argb,
   1274                              int width);
   1275 void YUY2ToARGBRow_Any_AVX2(const uint8* src_yuy2,
   1276                             uint8* dst_argb,
   1277                             int width);
   1278 void UYVYToARGBRow_Any_AVX2(const uint8* src_uyvy,
   1279                             uint8* dst_argb,
   1280                             int width);
   1281 void J422ToARGBRow_Any_SSSE3(const uint8* src_y,
   1282                              const uint8* src_u,
   1283                              const uint8* src_v,
   1284                              uint8* dst_argb,
   1285                              int width);
   1286 void J422ToARGBRow_Any_AVX2(const uint8* src_y,
   1287                             const uint8* src_u,
   1288                             const uint8* src_v,
   1289                             uint8* dst_argb,
   1290                             int width);
   1291 void I422ToBGRARow_Any_SSSE3(const uint8* src_y,
   1292                              const uint8* src_u,
   1293                              const uint8* src_v,
   1294                              uint8* dst_bgra,
   1295                              int width);
   1296 void I422ToABGRRow_Any_SSSE3(const uint8* src_y,
   1297                              const uint8* src_u,
   1298                              const uint8* src_v,
   1299                              uint8* dst_abgr,
   1300                              int width);
   1301 void I422ToRGBARow_Any_SSSE3(const uint8* src_y,
   1302                              const uint8* src_u,
   1303                              const uint8* src_v,
   1304                              uint8* dst_rgba,
   1305                              int width);
   1306 void I422ToARGB4444Row_Any_SSSE3(const uint8* src_y,
   1307                                  const uint8* src_u,
   1308                                  const uint8* src_v,
   1309                                  uint8* dst_rgba,
   1310                                  int width);
   1311 void I422ToARGB4444Row_Any_AVX2(const uint8* src_y,
   1312                                 const uint8* src_u,
   1313                                 const uint8* src_v,
   1314                                 uint8* dst_rgba,
   1315                                 int width);
   1316 void I422ToARGB1555Row_Any_SSSE3(const uint8* src_y,
   1317                                  const uint8* src_u,
   1318                                  const uint8* src_v,
   1319                                  uint8* dst_rgba,
   1320                                  int width);
   1321 void I422ToARGB1555Row_Any_AVX2(const uint8* src_y,
   1322                                 const uint8* src_u,
   1323                                 const uint8* src_v,
   1324                                 uint8* dst_rgba,
   1325                                 int width);
   1326 void I422ToRGB565Row_Any_SSSE3(const uint8* src_y,
   1327                                const uint8* src_u,
   1328                                const uint8* src_v,
   1329                                uint8* dst_rgba,
   1330                                int width);
   1331 void I422ToRGB565Row_Any_AVX2(const uint8* src_y,
   1332                               const uint8* src_u,
   1333                               const uint8* src_v,
   1334                               uint8* dst_rgba,
   1335                               int width);
   1336 void I422ToRGB24Row_Any_SSSE3(const uint8* src_y,
   1337                               const uint8* src_u,
   1338                               const uint8* src_v,
   1339                               uint8* dst_argb,
   1340                               int width);
   1341 void I422ToRGB24Row_Any_AVX2(const uint8* src_y,
   1342                              const uint8* src_u,
   1343                              const uint8* src_v,
   1344                              uint8* dst_argb,
   1345                              int width);
   1346 void I422ToRAWRow_Any_SSSE3(const uint8* src_y,
   1347                             const uint8* src_u,
   1348                             const uint8* src_v,
   1349                             uint8* dst_argb,
   1350                             int width);
   1351 void I422ToRAWRow_Any_AVX2(const uint8* src_y,
   1352                            const uint8* src_u,
   1353                            const uint8* src_v,
   1354                            uint8* dst_argb,
   1355                            int width);
   1356 
   1357 void I400ToARGBRow_C(const uint8* src_y, uint8* dst_argb, int width);
   1358 void I400ToARGBRow_SSE2(const uint8* src_y, uint8* dst_argb, int width);
   1359 void I400ToARGBRow_AVX2(const uint8* src_y, uint8* dst_argb, int width);
   1360 void I400ToARGBRow_NEON(const uint8* src_y, uint8* dst_argb, int width);
   1361 void I400ToARGBRow_Any_SSE2(const uint8* src_y, uint8* dst_argb, int width);
   1362 void I400ToARGBRow_Any_AVX2(const uint8* src_y, uint8* dst_argb, int width);
   1363 void I400ToARGBRow_Any_NEON(const uint8* src_y, uint8* dst_argb, int width);
   1364 
   1365 // ARGB preattenuated alpha blend.
   1366 void ARGBBlendRow_SSSE3(const uint8* src_argb, const uint8* src_argb1,
   1367                         uint8* dst_argb, int width);
   1368 void ARGBBlendRow_SSE2(const uint8* src_argb, const uint8* src_argb1,
   1369                        uint8* dst_argb, int width);
   1370 void ARGBBlendRow_NEON(const uint8* src_argb, const uint8* src_argb1,
   1371                        uint8* dst_argb, int width);
   1372 void ARGBBlendRow_C(const uint8* src_argb, const uint8* src_argb1,
   1373                     uint8* dst_argb, int width);
   1374 
   1375 // ARGB multiply images. Same API as Blend, but these require
   1376 // pointer and width alignment for SSE2.
   1377 void ARGBMultiplyRow_C(const uint8* src_argb, const uint8* src_argb1,
   1378                        uint8* dst_argb, int width);
   1379 void ARGBMultiplyRow_SSE2(const uint8* src_argb, const uint8* src_argb1,
   1380                           uint8* dst_argb, int width);
   1381 void ARGBMultiplyRow_Any_SSE2(const uint8* src_argb, const uint8* src_argb1,
   1382                               uint8* dst_argb, int width);
   1383 void ARGBMultiplyRow_AVX2(const uint8* src_argb, const uint8* src_argb1,
   1384                           uint8* dst_argb, int width);
   1385 void ARGBMultiplyRow_Any_AVX2(const uint8* src_argb, const uint8* src_argb1,
   1386                               uint8* dst_argb, int width);
   1387 void ARGBMultiplyRow_NEON(const uint8* src_argb, const uint8* src_argb1,
   1388                           uint8* dst_argb, int width);
   1389 void ARGBMultiplyRow_Any_NEON(const uint8* src_argb, const uint8* src_argb1,
   1390                               uint8* dst_argb, int width);
   1391 
   1392 // ARGB add images.
   1393 void ARGBAddRow_C(const uint8* src_argb, const uint8* src_argb1,
   1394                   uint8* dst_argb, int width);
   1395 void ARGBAddRow_SSE2(const uint8* src_argb, const uint8* src_argb1,
   1396                      uint8* dst_argb, int width);
   1397 void ARGBAddRow_Any_SSE2(const uint8* src_argb, const uint8* src_argb1,
   1398                          uint8* dst_argb, int width);
   1399 void ARGBAddRow_AVX2(const uint8* src_argb, const uint8* src_argb1,
   1400                      uint8* dst_argb, int width);
   1401 void ARGBAddRow_Any_AVX2(const uint8* src_argb, const uint8* src_argb1,
   1402                          uint8* dst_argb, int width);
   1403 void ARGBAddRow_NEON(const uint8* src_argb, const uint8* src_argb1,
   1404                      uint8* dst_argb, int width);
   1405 void ARGBAddRow_Any_NEON(const uint8* src_argb, const uint8* src_argb1,
   1406                          uint8* dst_argb, int width);
   1407 
   1408 // ARGB subtract images. Same API as Blend, but these require
   1409 // pointer and width alignment for SSE2.
   1410 void ARGBSubtractRow_C(const uint8* src_argb, const uint8* src_argb1,
   1411                        uint8* dst_argb, int width);
   1412 void ARGBSubtractRow_SSE2(const uint8* src_argb, const uint8* src_argb1,
   1413                           uint8* dst_argb, int width);
   1414 void ARGBSubtractRow_Any_SSE2(const uint8* src_argb, const uint8* src_argb1,
   1415                               uint8* dst_argb, int width);
   1416 void ARGBSubtractRow_AVX2(const uint8* src_argb, const uint8* src_argb1,
   1417                           uint8* dst_argb, int width);
   1418 void ARGBSubtractRow_Any_AVX2(const uint8* src_argb, const uint8* src_argb1,
   1419                               uint8* dst_argb, int width);
   1420 void ARGBSubtractRow_NEON(const uint8* src_argb, const uint8* src_argb1,
   1421                           uint8* dst_argb, int width);
   1422 void ARGBSubtractRow_Any_NEON(const uint8* src_argb, const uint8* src_argb1,
   1423                               uint8* dst_argb, int width);
   1424 
   1425 void ARGBToRGB24Row_Any_SSSE3(const uint8* src_argb, uint8* dst_rgb, int pix);
   1426 void ARGBToRAWRow_Any_SSSE3(const uint8* src_argb, uint8* dst_rgb, int pix);
   1427 void ARGBToRGB565Row_Any_SSE2(const uint8* src_argb, uint8* dst_rgb, int pix);
   1428 void ARGBToARGB1555Row_Any_SSE2(const uint8* src_argb, uint8* dst_rgb, int pix);
   1429 void ARGBToARGB4444Row_Any_SSE2(const uint8* src_argb, uint8* dst_rgb, int pix);
   1430 
   1431 void ARGBToRGB565DitherRow_Any_SSE2(const uint8* src_argb, uint8* dst_rgb,
   1432                                     const uint32 dither4, int pix);
   1433 void ARGBToRGB565DitherRow_Any_AVX2(const uint8* src_argb, uint8* dst_rgb,
   1434                                     const uint32 dither4, int pix);
   1435 
   1436 void ARGBToRGB565Row_Any_AVX2(const uint8* src_argb, uint8* dst_rgb, int pix);
   1437 void ARGBToARGB1555Row_Any_AVX2(const uint8* src_argb, uint8* dst_rgb, int pix);
   1438 void ARGBToARGB4444Row_Any_AVX2(const uint8* src_argb, uint8* dst_rgb, int pix);
   1439 
   1440 void ARGBToRGB24Row_Any_NEON(const uint8* src_argb, uint8* dst_rgb, int pix);
   1441 void ARGBToRAWRow_Any_NEON(const uint8* src_argb, uint8* dst_rgb, int pix);
   1442 void ARGBToRGB565Row_Any_NEON(const uint8* src_argb, uint8* dst_rgb, int pix);
   1443 void ARGBToARGB1555Row_Any_NEON(const uint8* src_argb, uint8* dst_rgb, int pix);
   1444 void ARGBToARGB4444Row_Any_NEON(const uint8* src_argb, uint8* dst_rgb, int pix);
   1445 void ARGBToRGB565DitherRow_Any_NEON(const uint8* src_argb, uint8* dst_rgb,
   1446                                     const uint32 dither4, int width);
   1447 
   1448 void I444ToARGBRow_Any_NEON(const uint8* src_y,
   1449                             const uint8* src_u,
   1450                             const uint8* src_v,
   1451                             uint8* dst_argb,
   1452                             int width);
   1453 void I422ToARGBRow_Any_NEON(const uint8* src_y,
   1454                             const uint8* src_u,
   1455                             const uint8* src_v,
   1456                             uint8* dst_argb,
   1457                             int width);
   1458 void I411ToARGBRow_Any_NEON(const uint8* src_y,
   1459                             const uint8* src_u,
   1460                             const uint8* src_v,
   1461                             uint8* dst_argb,
   1462                             int width);
   1463 void I422ToBGRARow_Any_NEON(const uint8* src_y,
   1464                             const uint8* src_u,
   1465                             const uint8* src_v,
   1466                             uint8* dst_argb,
   1467                             int width);
   1468 void I422ToABGRRow_Any_NEON(const uint8* src_y,
   1469                             const uint8* src_u,
   1470                             const uint8* src_v,
   1471                             uint8* dst_argb,
   1472                             int width);
   1473 void I422ToRGBARow_Any_NEON(const uint8* src_y,
   1474                             const uint8* src_u,
   1475                             const uint8* src_v,
   1476                             uint8* dst_argb,
   1477                             int width);
   1478 void I422ToRGB24Row_Any_NEON(const uint8* src_y,
   1479                              const uint8* src_u,
   1480                              const uint8* src_v,
   1481                              uint8* dst_argb,
   1482                              int width);
   1483 void I422ToRAWRow_Any_NEON(const uint8* src_y,
   1484                            const uint8* src_u,
   1485                            const uint8* src_v,
   1486                            uint8* dst_argb,
   1487                            int width);
   1488 void I422ToARGB4444Row_Any_NEON(const uint8* src_y,
   1489                                 const uint8* src_u,
   1490                                 const uint8* src_v,
   1491                                 uint8* dst_argb,
   1492                                 int width);
   1493 void I422ToARGB1555Row_Any_NEON(const uint8* src_y,
   1494                                 const uint8* src_u,
   1495                                 const uint8* src_v,
   1496                                 uint8* dst_argb,
   1497                                 int width);
   1498 void I422ToRGB565Row_Any_NEON(const uint8* src_y,
   1499                               const uint8* src_u,
   1500                               const uint8* src_v,
   1501                               uint8* dst_argb,
   1502                               int width);
   1503 void NV12ToARGBRow_Any_NEON(const uint8* src_y,
   1504                             const uint8* src_uv,
   1505                             uint8* dst_argb,
   1506                             int width);
   1507 void NV21ToARGBRow_Any_NEON(const uint8* src_y,
   1508                             const uint8* src_uv,
   1509                             uint8* dst_argb,
   1510                             int width);
   1511 void NV12ToRGB565Row_Any_NEON(const uint8* src_y,
   1512                               const uint8* src_uv,
   1513                               uint8* dst_argb,
   1514                               int width);
   1515 void NV21ToRGB565Row_Any_NEON(const uint8* src_y,
   1516                               const uint8* src_uv,
   1517                               uint8* dst_argb,
   1518                               int width);
   1519 void YUY2ToARGBRow_Any_NEON(const uint8* src_yuy2,
   1520                             uint8* dst_argb,
   1521                             int width);
   1522 void UYVYToARGBRow_Any_NEON(const uint8* src_uyvy,
   1523                             uint8* dst_argb,
   1524                             int width);
   1525 void I422ToARGBRow_MIPS_DSPR2(const uint8* src_y,
   1526                               const uint8* src_u,
   1527                               const uint8* src_v,
   1528                               uint8* dst_argb,
   1529                               int width);
   1530 void I422ToBGRARow_MIPS_DSPR2(const uint8* src_y,
   1531                               const uint8* src_u,
   1532                               const uint8* src_v,
   1533                               uint8* dst_argb,
   1534                               int width);
   1535 void I422ToABGRRow_MIPS_DSPR2(const uint8* src_y,
   1536                               const uint8* src_u,
   1537                               const uint8* src_v,
   1538                               uint8* dst_argb,
   1539                               int width);
   1540 void I422ToARGBRow_MIPS_DSPR2(const uint8* src_y,
   1541                               const uint8* src_u,
   1542                               const uint8* src_v,
   1543                               uint8* dst_argb,
   1544                               int width);
   1545 void I422ToBGRARow_MIPS_DSPR2(const uint8* src_y,
   1546                               const uint8* src_u,
   1547                               const uint8* src_v,
   1548                               uint8* dst_argb,
   1549                               int width);
   1550 void I422ToABGRRow_MIPS_DSPR2(const uint8* src_y,
   1551                               const uint8* src_u,
   1552                               const uint8* src_v,
   1553                               uint8* dst_argb,
   1554                               int width);
   1555 
   1556 void YUY2ToYRow_AVX2(const uint8* src_yuy2, uint8* dst_y, int pix);
   1557 void YUY2ToUVRow_AVX2(const uint8* src_yuy2, int stride_yuy2,
   1558                       uint8* dst_u, uint8* dst_v, int pix);
   1559 void YUY2ToUV422Row_AVX2(const uint8* src_yuy2,
   1560                          uint8* dst_u, uint8* dst_v, int pix);
   1561 void YUY2ToYRow_SSE2(const uint8* src_yuy2, uint8* dst_y, int pix);
   1562 void YUY2ToUVRow_SSE2(const uint8* src_yuy2, int stride_yuy2,
   1563                       uint8* dst_u, uint8* dst_v, int pix);
   1564 void YUY2ToUV422Row_SSE2(const uint8* src_yuy2,
   1565                          uint8* dst_u, uint8* dst_v, int pix);
   1566 void YUY2ToYRow_NEON(const uint8* src_yuy2, uint8* dst_y, int pix);
   1567 void YUY2ToUVRow_NEON(const uint8* src_yuy2, int stride_yuy2,
   1568                       uint8* dst_u, uint8* dst_v, int pix);
   1569 void YUY2ToUV422Row_NEON(const uint8* src_yuy2,
   1570                          uint8* dst_u, uint8* dst_v, int pix);
   1571 void YUY2ToYRow_C(const uint8* src_yuy2, uint8* dst_y, int pix);
   1572 void YUY2ToUVRow_C(const uint8* src_yuy2, int stride_yuy2,
   1573                    uint8* dst_u, uint8* dst_v, int pix);
   1574 void YUY2ToUV422Row_C(const uint8* src_yuy2,
   1575                       uint8* dst_u, uint8* dst_v, int pix);
   1576 void YUY2ToYRow_Any_AVX2(const uint8* src_yuy2, uint8* dst_y, int pix);
   1577 void YUY2ToUVRow_Any_AVX2(const uint8* src_yuy2, int stride_yuy2,
   1578                           uint8* dst_u, uint8* dst_v, int pix);
   1579 void YUY2ToUV422Row_Any_AVX2(const uint8* src_yuy2,
   1580                              uint8* dst_u, uint8* dst_v, int pix);
   1581 void YUY2ToYRow_Any_SSE2(const uint8* src_yuy2, uint8* dst_y, int pix);
   1582 void YUY2ToUVRow_Any_SSE2(const uint8* src_yuy2, int stride_yuy2,
   1583                           uint8* dst_u, uint8* dst_v, int pix);
   1584 void YUY2ToUV422Row_Any_SSE2(const uint8* src_yuy2,
   1585                              uint8* dst_u, uint8* dst_v, int pix);
   1586 void YUY2ToYRow_Any_NEON(const uint8* src_yuy2, uint8* dst_y, int pix);
   1587 void YUY2ToUVRow_Any_NEON(const uint8* src_yuy2, int stride_yuy2,
   1588                           uint8* dst_u, uint8* dst_v, int pix);
   1589 void YUY2ToUV422Row_Any_NEON(const uint8* src_yuy2,
   1590                              uint8* dst_u, uint8* dst_v, int pix);
   1591 void UYVYToYRow_AVX2(const uint8* src_uyvy, uint8* dst_y, int pix);
   1592 void UYVYToUVRow_AVX2(const uint8* src_uyvy, int stride_uyvy,
   1593                       uint8* dst_u, uint8* dst_v, int pix);
   1594 void UYVYToUV422Row_AVX2(const uint8* src_uyvy,
   1595                          uint8* dst_u, uint8* dst_v, int pix);
   1596 void UYVYToYRow_SSE2(const uint8* src_uyvy, uint8* dst_y, int pix);
   1597 void UYVYToUVRow_SSE2(const uint8* src_uyvy, int stride_uyvy,
   1598                       uint8* dst_u, uint8* dst_v, int pix);
   1599 void UYVYToUV422Row_SSE2(const uint8* src_uyvy,
   1600                          uint8* dst_u, uint8* dst_v, int pix);
   1601 void UYVYToYRow_AVX2(const uint8* src_uyvy, uint8* dst_y, int pix);
   1602 void UYVYToUVRow_AVX2(const uint8* src_uyvy, int stride_uyvy,
   1603                       uint8* dst_u, uint8* dst_v, int pix);
   1604 void UYVYToUV422Row_AVX2(const uint8* src_uyvy,
   1605                          uint8* dst_u, uint8* dst_v, int pix);
   1606 void UYVYToYRow_NEON(const uint8* src_uyvy, uint8* dst_y, int pix);
   1607 void UYVYToUVRow_NEON(const uint8* src_uyvy, int stride_uyvy,
   1608                       uint8* dst_u, uint8* dst_v, int pix);
   1609 void UYVYToUV422Row_NEON(const uint8* src_uyvy,
   1610                          uint8* dst_u, uint8* dst_v, int pix);
   1611 
   1612 void UYVYToYRow_C(const uint8* src_uyvy, uint8* dst_y, int pix);
   1613 void UYVYToUVRow_C(const uint8* src_uyvy, int stride_uyvy,
   1614                    uint8* dst_u, uint8* dst_v, int pix);
   1615 void UYVYToUV422Row_C(const uint8* src_uyvy,
   1616                       uint8* dst_u, uint8* dst_v, int pix);
   1617 void UYVYToYRow_Any_AVX2(const uint8* src_uyvy, uint8* dst_y, int pix);
   1618 void UYVYToUVRow_Any_AVX2(const uint8* src_uyvy, int stride_uyvy,
   1619                           uint8* dst_u, uint8* dst_v, int pix);
   1620 void UYVYToUV422Row_Any_AVX2(const uint8* src_uyvy,
   1621                              uint8* dst_u, uint8* dst_v, int pix);
   1622 void UYVYToYRow_Any_SSE2(const uint8* src_uyvy, uint8* dst_y, int pix);
   1623 void UYVYToUVRow_Any_SSE2(const uint8* src_uyvy, int stride_uyvy,
   1624                           uint8* dst_u, uint8* dst_v, int pix);
   1625 void UYVYToUV422Row_Any_SSE2(const uint8* src_uyvy,
   1626                              uint8* dst_u, uint8* dst_v, int pix);
   1627 void UYVYToYRow_Any_NEON(const uint8* src_uyvy, uint8* dst_y, int pix);
   1628 void UYVYToUVRow_Any_NEON(const uint8* src_uyvy, int stride_uyvy,
   1629                           uint8* dst_u, uint8* dst_v, int pix);
   1630 void UYVYToUV422Row_Any_NEON(const uint8* src_uyvy,
   1631                              uint8* dst_u, uint8* dst_v, int pix);
   1632 
   1633 void I422ToYUY2Row_C(const uint8* src_y,
   1634                      const uint8* src_u,
   1635                      const uint8* src_v,
   1636                      uint8* dst_yuy2, int width);
   1637 void I422ToUYVYRow_C(const uint8* src_y,
   1638                      const uint8* src_u,
   1639                      const uint8* src_v,
   1640                      uint8* dst_uyvy, int width);
   1641 void I422ToYUY2Row_SSE2(const uint8* src_y,
   1642                         const uint8* src_u,
   1643                         const uint8* src_v,
   1644                         uint8* dst_yuy2, int width);
   1645 void I422ToUYVYRow_SSE2(const uint8* src_y,
   1646                         const uint8* src_u,
   1647                         const uint8* src_v,
   1648                         uint8* dst_uyvy, int width);
   1649 void I422ToYUY2Row_Any_SSE2(const uint8* src_y,
   1650                             const uint8* src_u,
   1651                             const uint8* src_v,
   1652                             uint8* dst_yuy2, int width);
   1653 void I422ToUYVYRow_Any_SSE2(const uint8* src_y,
   1654                             const uint8* src_u,
   1655                             const uint8* src_v,
   1656                             uint8* dst_uyvy, int width);
   1657 void I422ToYUY2Row_NEON(const uint8* src_y,
   1658                         const uint8* src_u,
   1659                         const uint8* src_v,
   1660                         uint8* dst_yuy2, int width);
   1661 void I422ToUYVYRow_NEON(const uint8* src_y,
   1662                         const uint8* src_u,
   1663                         const uint8* src_v,
   1664                         uint8* dst_uyvy, int width);
   1665 void I422ToYUY2Row_Any_NEON(const uint8* src_y,
   1666                             const uint8* src_u,
   1667                             const uint8* src_v,
   1668                             uint8* dst_yuy2, int width);
   1669 void I422ToUYVYRow_Any_NEON(const uint8* src_y,
   1670                             const uint8* src_u,
   1671                             const uint8* src_v,
   1672                             uint8* dst_uyvy, int width);
   1673 
   1674 // Effects related row functions.
   1675 void ARGBAttenuateRow_C(const uint8* src_argb, uint8* dst_argb, int width);
   1676 void ARGBAttenuateRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width);
   1677 void ARGBAttenuateRow_SSSE3(const uint8* src_argb, uint8* dst_argb, int width);
   1678 void ARGBAttenuateRow_AVX2(const uint8* src_argb, uint8* dst_argb, int width);
   1679 void ARGBAttenuateRow_NEON(const uint8* src_argb, uint8* dst_argb, int width);
   1680 void ARGBAttenuateRow_Any_SSE2(const uint8* src_argb, uint8* dst_argb,
   1681                                int width);
   1682 void ARGBAttenuateRow_Any_SSSE3(const uint8* src_argb, uint8* dst_argb,
   1683                                 int width);
   1684 void ARGBAttenuateRow_Any_AVX2(const uint8* src_argb, uint8* dst_argb,
   1685                                int width);
   1686 void ARGBAttenuateRow_Any_NEON(const uint8* src_argb, uint8* dst_argb,
   1687                                int width);
   1688 
   1689 // Inverse table for unattenuate, shared by C and SSE2.
   1690 extern const uint32 fixed_invtbl8[256];
   1691 void ARGBUnattenuateRow_C(const uint8* src_argb, uint8* dst_argb, int width);
   1692 void ARGBUnattenuateRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width);
   1693 void ARGBUnattenuateRow_AVX2(const uint8* src_argb, uint8* dst_argb, int width);
   1694 void ARGBUnattenuateRow_Any_SSE2(const uint8* src_argb, uint8* dst_argb,
   1695                                  int width);
   1696 void ARGBUnattenuateRow_Any_AVX2(const uint8* src_argb, uint8* dst_argb,
   1697                                  int width);
   1698 
   1699 void ARGBGrayRow_C(const uint8* src_argb, uint8* dst_argb, int width);
   1700 void ARGBGrayRow_SSSE3(const uint8* src_argb, uint8* dst_argb, int width);
   1701 void ARGBGrayRow_NEON(const uint8* src_argb, uint8* dst_argb, int width);
   1702 
   1703 void ARGBSepiaRow_C(uint8* dst_argb, int width);
   1704 void ARGBSepiaRow_SSSE3(uint8* dst_argb, int width);
   1705 void ARGBSepiaRow_NEON(uint8* dst_argb, int width);
   1706 
   1707 void ARGBColorMatrixRow_C(const uint8* src_argb, uint8* dst_argb,
   1708                           const int8* matrix_argb, int width);
   1709 void ARGBColorMatrixRow_SSSE3(const uint8* src_argb, uint8* dst_argb,
   1710                               const int8* matrix_argb, int width);
   1711 void ARGBColorMatrixRow_NEON(const uint8* src_argb, uint8* dst_argb,
   1712                              const int8* matrix_argb, int width);
   1713 
   1714 void ARGBColorTableRow_C(uint8* dst_argb, const uint8* table_argb, int width);
   1715 void ARGBColorTableRow_X86(uint8* dst_argb, const uint8* table_argb, int width);
   1716 
   1717 void RGBColorTableRow_C(uint8* dst_argb, const uint8* table_argb, int width);
   1718 void RGBColorTableRow_X86(uint8* dst_argb, const uint8* table_argb, int width);
   1719 
   1720 void ARGBQuantizeRow_C(uint8* dst_argb, int scale, int interval_size,
   1721                        int interval_offset, int width);
   1722 void ARGBQuantizeRow_SSE2(uint8* dst_argb, int scale, int interval_size,
   1723                           int interval_offset, int width);
   1724 void ARGBQuantizeRow_NEON(uint8* dst_argb, int scale, int interval_size,
   1725                           int interval_offset, int width);
   1726 
   1727 void ARGBShadeRow_C(const uint8* src_argb, uint8* dst_argb, int width,
   1728                     uint32 value);
   1729 void ARGBShadeRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width,
   1730                        uint32 value);
   1731 void ARGBShadeRow_NEON(const uint8* src_argb, uint8* dst_argb, int width,
   1732                        uint32 value);
   1733 
   1734 // Used for blur.
   1735 void CumulativeSumToAverageRow_SSE2(const int32* topleft, const int32* botleft,
   1736                                     int width, int area, uint8* dst, int count);
   1737 void ComputeCumulativeSumRow_SSE2(const uint8* row, int32* cumsum,
   1738                                   const int32* previous_cumsum, int width);
   1739 
   1740 void CumulativeSumToAverageRow_C(const int32* topleft, const int32* botleft,
   1741                                  int width, int area, uint8* dst, int count);
   1742 void ComputeCumulativeSumRow_C(const uint8* row, int32* cumsum,
   1743                                const int32* previous_cumsum, int width);
   1744 
   1745 LIBYUV_API
   1746 void ARGBAffineRow_C(const uint8* src_argb, int src_argb_stride,
   1747                      uint8* dst_argb, const float* uv_dudv, int width);
   1748 LIBYUV_API
   1749 void ARGBAffineRow_SSE2(const uint8* src_argb, int src_argb_stride,
   1750                         uint8* dst_argb, const float* uv_dudv, int width);
   1751 
   1752 // Used for I420Scale, ARGBScale, and ARGBInterpolate.
   1753 void InterpolateRow_C(uint8* dst_ptr, const uint8* src_ptr,
   1754                       ptrdiff_t src_stride_ptr,
   1755                       int width, int source_y_fraction);
   1756 void InterpolateRow_SSE2(uint8* dst_ptr, const uint8* src_ptr,
   1757                          ptrdiff_t src_stride_ptr, int width,
   1758                          int source_y_fraction);
   1759 void InterpolateRow_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
   1760                           ptrdiff_t src_stride_ptr, int width,
   1761                           int source_y_fraction);
   1762 void InterpolateRow_AVX2(uint8* dst_ptr, const uint8* src_ptr,
   1763                          ptrdiff_t src_stride_ptr, int width,
   1764                          int source_y_fraction);
   1765 void InterpolateRow_NEON(uint8* dst_ptr, const uint8* src_ptr,
   1766                          ptrdiff_t src_stride_ptr, int width,
   1767                          int source_y_fraction);
   1768 void InterpolateRow_MIPS_DSPR2(uint8* dst_ptr, const uint8* src_ptr,
   1769                                ptrdiff_t src_stride_ptr, int width,
   1770                                int source_y_fraction);
   1771 void InterpolateRow_Any_NEON(uint8* dst_ptr, const uint8* src_ptr,
   1772                              ptrdiff_t src_stride_ptr, int width,
   1773                              int source_y_fraction);
   1774 void InterpolateRow_Any_SSE2(uint8* dst_ptr, const uint8* src_ptr,
   1775                              ptrdiff_t src_stride_ptr, int width,
   1776                              int source_y_fraction);
   1777 void InterpolateRow_Any_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
   1778                               ptrdiff_t src_stride_ptr, int width,
   1779                               int source_y_fraction);
   1780 void InterpolateRow_Any_AVX2(uint8* dst_ptr, const uint8* src_ptr,
   1781                              ptrdiff_t src_stride_ptr, int width,
   1782                              int source_y_fraction);
   1783 void InterpolateRow_Any_MIPS_DSPR2(uint8* dst_ptr, const uint8* src_ptr,
   1784                                    ptrdiff_t src_stride_ptr, int width,
   1785                                    int source_y_fraction);
   1786 
   1787 void InterpolateRow_16_C(uint16* dst_ptr, const uint16* src_ptr,
   1788                          ptrdiff_t src_stride_ptr,
   1789                          int width, int source_y_fraction);
   1790 
   1791 // Sobel images.
   1792 void SobelXRow_C(const uint8* src_y0, const uint8* src_y1, const uint8* src_y2,
   1793                  uint8* dst_sobelx, int width);
   1794 void SobelXRow_SSE2(const uint8* src_y0, const uint8* src_y1,
   1795                     const uint8* src_y2, uint8* dst_sobelx, int width);
   1796 void SobelXRow_NEON(const uint8* src_y0, const uint8* src_y1,
   1797                     const uint8* src_y2, uint8* dst_sobelx, int width);
   1798 void SobelYRow_C(const uint8* src_y0, const uint8* src_y1,
   1799                  uint8* dst_sobely, int width);
   1800 void SobelYRow_SSE2(const uint8* src_y0, const uint8* src_y1,
   1801                     uint8* dst_sobely, int width);
   1802 void SobelYRow_NEON(const uint8* src_y0, const uint8* src_y1,
   1803                     uint8* dst_sobely, int width);
   1804 void SobelRow_C(const uint8* src_sobelx, const uint8* src_sobely,
   1805                 uint8* dst_argb, int width);
   1806 void SobelRow_SSE2(const uint8* src_sobelx, const uint8* src_sobely,
   1807                    uint8* dst_argb, int width);
   1808 void SobelRow_NEON(const uint8* src_sobelx, const uint8* src_sobely,
   1809                    uint8* dst_argb, int width);
   1810 void SobelToPlaneRow_C(const uint8* src_sobelx, const uint8* src_sobely,
   1811                        uint8* dst_y, int width);
   1812 void SobelToPlaneRow_SSE2(const uint8* src_sobelx, const uint8* src_sobely,
   1813                           uint8* dst_y, int width);
   1814 void SobelToPlaneRow_NEON(const uint8* src_sobelx, const uint8* src_sobely,
   1815                           uint8* dst_y, int width);
   1816 void SobelXYRow_C(const uint8* src_sobelx, const uint8* src_sobely,
   1817                   uint8* dst_argb, int width);
   1818 void SobelXYRow_SSE2(const uint8* src_sobelx, const uint8* src_sobely,
   1819                      uint8* dst_argb, int width);
   1820 void SobelXYRow_NEON(const uint8* src_sobelx, const uint8* src_sobely,
   1821                      uint8* dst_argb, int width);
   1822 void SobelRow_Any_SSE2(const uint8* src_sobelx, const uint8* src_sobely,
   1823                        uint8* dst_argb, int width);
   1824 void SobelRow_Any_NEON(const uint8* src_sobelx, const uint8* src_sobely,
   1825                        uint8* dst_argb, int width);
   1826 void SobelToPlaneRow_Any_SSE2(const uint8* src_sobelx, const uint8* src_sobely,
   1827                               uint8* dst_y, int width);
   1828 void SobelToPlaneRow_Any_NEON(const uint8* src_sobelx, const uint8* src_sobely,
   1829                               uint8* dst_y, int width);
   1830 void SobelXYRow_Any_SSE2(const uint8* src_sobelx, const uint8* src_sobely,
   1831                          uint8* dst_argb, int width);
   1832 void SobelXYRow_Any_NEON(const uint8* src_sobelx, const uint8* src_sobely,
   1833                          uint8* dst_argb, int width);
   1834 
   1835 void ARGBPolynomialRow_C(const uint8* src_argb,
   1836                          uint8* dst_argb, const float* poly,
   1837                          int width);
   1838 void ARGBPolynomialRow_SSE2(const uint8* src_argb,
   1839                             uint8* dst_argb, const float* poly,
   1840                             int width);
   1841 void ARGBPolynomialRow_AVX2(const uint8* src_argb,
   1842                             uint8* dst_argb, const float* poly,
   1843                             int width);
   1844 
   1845 void ARGBLumaColorTableRow_C(const uint8* src_argb, uint8* dst_argb, int width,
   1846                              const uint8* luma, uint32 lumacoeff);
   1847 void ARGBLumaColorTableRow_SSSE3(const uint8* src_argb, uint8* dst_argb,
   1848                                  int width,
   1849                                  const uint8* luma, uint32 lumacoeff);
   1850 
   1851 #ifdef __cplusplus
   1852 }  // extern "C"
   1853 }  // namespace libyuv
   1854 #endif
   1855 
   1856 #endif  // INCLUDE_LIBYUV_ROW_H_  NOLINT
   1857